Решил проблему таким способом, может кому понадобится
# -*- coding: utf-8 -*-
import csv
import requests
from bs4 import BeautifulSoup
import fake_useragent
bl = []
def get_request(url, header):
r = requests.get(url, headers=header)
return r.text
def get_content(html):
soup = BeautifulSoup(html, 'lxml')
link = soup.find('ul', class_='menu-m mobile_menu_fixed').find_all('a')[0:2]
all_category = []
for i in link:
all_category.append('https://termo-door.ru' + i.get('href'))
for n in all_category:
r = requests.get(n)
soup = BeautifulSoup(r.text, 'lxml')
kolv_str = soup.find('ul', class_='c-pagination')
if kolv_str is not None:
pars_count = soup.find('ul', class_='c-pagination').find_all('a')[-2].text
for i in range(1, int(pars_count) + 1):
l = (n + f'?page={i}')
r = requests.get(l)
soup = BeautifulSoup(r.text, 'lxml')
block = soup.find_all('div', class_='name')
for i in block:
yield ('https://termo-door.ru' + i.find('a').get('href'))
else:
block = soup.find_all('div', class_='adaptive col-lg-5 col-md-4 col-sm-12 col-xs-12 fly-to-cart flexdiscount-product-wrap hover-active')
for i in block:
yield ('https://termo-door.ru' + i.find('a').get('href'))
def get_content_parser(html):
global bl
soup = BeautifulSoup(html, 'lxml')
try:
name = 'No name'
name = soup.find('h1', class_='category-name').text.strip()
except:
pass
try:
price = 'No price'
price = soup.find('span', class_='price nowrap').text.strip()
except:
pass
try:
img = 'No photo'
img = 'https://termo-door.ru' + soup.find('img', id='product-image').get('src')
except:
pass
bl.append(
{
'name': name,
'price': price,
'img': img
}
)
return bl
def save_content(bl):
with open("pae.csv", "w", newline='') as file:
writer = csv.writer(file, delimiter=";")
writer.writerow(("Название", "Цена", "Фото"))
for item in bl:
writer.writerow( [item['name'], item['price'], item['img']] )
def main():
url = 'https://termo-door.ru/'
user = fake_useragent.UserAgent().random
header = {'user-agent': user}
link = get_content(get_request(url, header))
for i in link:
html = get_request(i, header)
data = get_content_parser(html)
save_content(data)
if __name__ == '__main__':
main()