Я делал парсер вайлдбериса и тут возникли вот такие ошибки:
DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): www.wildberries.ru:443
DEBUG:urllib3.connectionpool:https://www.wildberries.ru:443 "GET /catalog/zhenshchinam/odezhda/bluzki-i-rubashki:443 HTTP/1.1" 200 50570
С чем это связано и как решить?
import logging
import collections
import requests
import bs4
logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger('wildberries')
ParseResult = collections.namedtuple = {
'ParseResult',
(
'brand_name',
'goods_name',
'url',
),
}
class Client:
def __init__(self):
self.session = requests.Session()
self.session.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.114 Safari/537.36 Edg/103.0.1264.62',
'Accept-Language': 'ru',
}
self.result = []
def load_page(self):
url = 'https://www.wildberries.ru/catalog/zhenshchinam/odezhda/bluzki-i-rubashki:443' #Женщинам булзки и рубашки#
res = self.session.get(url=url)
res.raise_for_status()
return res.text
def parse_page(self, text: str):
soup = bs4.BeautifulSoup(text, 'lxml')
container = soup.select('div.product-card.j-card-item.j-good-for-listing-event')
for block in container:
self.parse_block(block=block)
def parse_block(self, block):
#logger.info(block)
#logger.info('=' * 100)
url_block = block.select_one('div.product-card__price.j-cataloger-price')
if not url_block:
logger.error('no url_block')
return
url = url_block.get('href')
if not url:
logger.error('no href')
return
logger.info('%s', url)
#'''''
def run(self):
text = self.load_page()
self.parse_page(text=text)
#'''''
if __name__ == '__main__':
parser = Client()
parser.run()
#'''''