from bs4 import BeautifulSoup
import requests
def parse():
URL = 'https://www.olx.pt/tecnologia-e-informatica/'
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}
response = requests.get(URL,
headers=HEADERS)
soup = BeautifulSoup(response.content, 'html.parser')
items = soup.findAll('div', class_='offer-wrapper')
comps = []
for item in items:
olx_ship = item.find('span', class_='promo-label promo-label--ctt inlblk rel')
if olx_ship:
pass
else:
pass
try:
comps.append(
{
#получение названия
'title': item.find('a', class_='marginright5 link linkWithHash detailsLink').get_text(strip=True),
#получения цены
'price': item.find('p', class_='price').get_text(strip=True),
# получения ссылки
'link': item.find('a', class_='marginright5 link linkWithHash detailsLink').get('href'),
# получения города
'city': item.find('small', class_='breadcrumb x-normal').find_next('span').get_text(strip=True)
})
except:
pass
for user in comps:
try:
r = requests.get(user["link"], headers=HEADERS)
soup_2 = BeautifulSoup(r.text, 'html.parser')
name = soup_2.find('a', class_='user-offers').get('href')
comps.append({'user': name})
except:
pass
for comp in comps:
print(f'{comp["title"]} -> Price: {comp["price"]} -> Link: {comp["link"]} -> City: {comp["city"]}')
# -> Price: {comp["price"]} -> Link: {comp["link"]} -> City: {comp["city"]}
parse()
Вывод:
Dell K17A Thunderbolt USB-C Docking Station -> Price: 45 € -> Link:
https://www.olx.pt/anuncio/dell-k17a-thunderbolt-u... -> City: Negociável
Traceback (most recent call last):
File "E:\софт\parser portugal\main.py", line 53, in
parse()
File "E:\софт\parser portugal\main.py", line 48, in parse
print(f'{comp["title"]} -> Price: {comp["price"]} -> Link: {comp["link"]} -> City: {comp["city"]}')
KeyError: 'title'
Process finished with exit code 1
и как видим что последний товар совпадает и дальше ошибка
и при парсинге ссылок профиля на странице товара их не находит или если находит, то всем одинаковые