Всем привет, разбираюсь в парсерах на python. У меня перестал работать парсер для сайта realt.by.
Пример ссылки:
https://realt.by/sale-flats/object/2562548/
import pandas
import pandas as pd
import requests
import PySimpleGUI as sg
from bs4 import BeautifulSoup
from time import sleep
import urllib.request
def is_valid(url):
try:
urllib.request.urlopen(url)
return True
except Exception:
return False
layout = [
[sg.Text('Ссылка на объявление'), sg.InputText(key='link')],
[sg.Button('Применить'), sg.Button('Отмена')],
[sg.ProgressBar(max_value=10, orientation='h', size=(30,15), key='-PROG-')]
]
window = sg.Window('Добавить объявление', layout)
while True: # The Event Loop
event, values = window.read()
# print(event, values) #debug
if event in (None, 'Exit', 'Отмена'):
break
if event == 'Применить':
curr = 0
window["-PROG-"].update_bar(curr + 1)
curr += 1
sql_img = []
sql_link = []
sql_title = []
sql_address = []
sql_rajon = []
sql_metro = []
sql_rooms = []
sql_area = []
sql_floor = []
sql_price = []
sql_comment = []
sql_status = []
allAparts = []
def intTryParse(value):
try:
int(value)
return True
except ValueError:
return False
window["-PROG-"].update_bar(curr + 1)
curr += 1
url = values['link']
if not is_valid(url):
sg.popup("Некорректная ссылка")
else:
data = requests.get(url)
soup = BeautifulSoup(data.text, features="html.parser")
img = soup.find('div', attrs={"class": "swiper-wrapper"}).findAll('img', class_="blur-sm scale-105")[1].get('src')
window["-PROG-"].update_bar(curr + 1)
curr += 1
region = soup.find('ul', class_="w-full mb-0.5 -my-1").findAll('li')[4].find('a',
class_="focus:outline-none sm:focus:shadow-10bottom transition-colors cursor-pointer text-info-500 hover:text-info-600 active:text-info").text
if str(region).split(' ')[1] != "район":
region = soup.find('ul', class_="w-full mb-0.5 -my-1").findAll('li')[3].find('a',
class_="focus:outline-none sm:focus:shadow-10bottom transition-colors cursor-pointer text-info-500 hover:text-info-600 active:text-info").text
window["-PROG-"].update_bar(curr + 1)
curr += 1
title = soup.find('h1',
class_='order-1 mb-0.5 md:-order-2 md:mb-4 block w-full !inline-block lg:text-h1Lg text-h1 font-raleway font-bold flex items-center').text
address = soup.findAll('a',
class_="focus:outline-none sm:focus:shadow-10bottom transition-colors cursor-pointer inline md:inline-block mr-4 text-basic hover:text-info-500 active:text-info")
window["-PROG-"].update_bar(curr + 1)
curr += 1
address = address[0].text + " " + address[1].text
try:
metro = soup.find('li', class_="align-top inline-flex mr-4 last:mr-0").find('a',
class_='focus:outline-none sm:focus:shadow-10bottom transition-colors cursor-pointer inline md:inline-block mr-4 text-basic hover:text-info-500 active:text-info').text.strip()
except AttributeError:
metro = "Нету"
window["-PROG-"].update_bar(curr + 1)
curr += 1
info = soup.find('ul', class_='w-full -my-1').findAll('p')
rooms = info[0].text.strip()
window["-PROG-"].update_bar(curr + 1)
curr += 1
area = info[1].text.strip()
floor = info[5].text.strip()
if intTryParse(rooms):
area = info[2].text.strip()
window["-PROG-"].update_bar(curr + 1)
curr += 1
if intTryParse(floor):
floor = info[6].text.strip()
comment = soup.find('section', class_="bg-white flex flex-wrap md:p-6 my-4 rounded-md").text
price = soup.find('h2',
class_='w-full sm:w-auto sm:inline-block sm:mr-1.5 lg:text-h2Lg text-h2 font-raleway font-bold flex items-center').text.strip()
status = soup.find('div', class_="md:w-full text-info-500 md:items-center relative flex flex-wrap w-1/2 mb-6").find('span', class_="text-subhead md:text-body text-basic w-full").text
window["-PROG-"].update_bar(curr + 1)
curr += 1
sql_img.append(img)
sql_link.append(url)
sql_title.append(title)
sql_address.append(address)
sql_rajon.append(region)
sql_metro.append(metro)
sql_rooms.append(rooms)
sql_area.append(area)
sql_floor.append(floor)
sql_price.append(price)
sql_comment.append(comment)
sql_status.append(status)
allAparts.append([img, url, title, address, region, metro, rooms, area, floor, price, comment, status])
df_aparts = pandas.DataFrame(
{'img': sql_img, 'link': sql_link, 'title': sql_title, 'address': sql_address, 'rajon': sql_rajon,
'metro': sql_metro, 'rooms': sql_rooms, 'area': sql_area, 'floor': sql_floor, 'price': sql_price,
'comment': sql_comment, 'status': sql_status})
window["-PROG-"].update_bar(curr + 1)
curr += 1
# To excel
writer = pd.ExcelWriter(r"C:\Users\Kisliy\source\repos\RealEstateAgency\RealEstateAgency\bin\Debug\output.xlsx")
df_aparts.to_excel(writer)
writer.close()
break
Получаю такую ошибку:
Traceback (most recent call last):
File "C:\Users\karat\PycharmProjects\testproj\parser.py", line 70, in
img = soup.find('div', attrs={"class": "swiper-wrapper"}).findAll('img', class_="blur-sm scale-105")[1].get('src')
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
AttributeError: 'NoneType' object has no attribute 'findAll'