import requests
from bs4 import BeautifulSoup
import csv
URL = 'https://au.ru/user/Gruz600/shop/computers/components/motherboard/'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36', 'accept': '*/*'}
HOST = 'https:'
RUB = ' руб'
FILE = 'motherboard.csv'
def get_html(url, params=None):
r = requests.get(url, headers=HEADERS, params=params)
return r
def get_pages_count(html):
soup = BeautifulSoup(html, 'html.parser')
pageination = soup.find('div', class_='au-pager__wrapper').find(class_='au-pager__pages').find_all('a')
if pageination:
return int(pageination[-1].get_text())
else:
return 1
def get_content(html):
soup = BeautifulSoup(html, 'html.parser')
items = soup.find_all('div', class_='au-lot-list-card')
motherboard = []
for item in items:
price = item.find('span', class_='au-price__value')
if price:
price = price.get_text().replace('\xa0', '')
else:
price = 'Цену уточняйте'
motherboard.append({
'link': HOST + item.find('a', class_='au-lot-list-card-title-link').get('href'),
'title': item.find('div', class_='au-lot-list-card-title').get_text(),
'price': price + RUB,
})
return motherboard
def save_file(items, path):
with open(path, 'w', newline='') as file:
writer = csv.writer(file, delimiter=';')
writer.writerow(['Ссылка', 'Название', 'Цена'])
for item in items:
writer.writerow([item['link'], item['title'], item['price']])
def parse():
html = get_html(URL)
if html.status_code == 200:
motherboard = []
pages_count = get_pages_count(html.text)
for page in range(1, pages_count + 1):
print(f'Парсинг страницы {page} из {pages_count}...')
html = get_html(URL, params={'page': page})
motherboard.extend(get_content(html.text))
save_file(motherboard, FILE)
print(f'Количество лотов: {len(motherboard)}')
else:
print('Error')
parse()