Вот такой вот код:
import csv
import time
from tqdm import tqdm
from requests import Session
from bs4 import BeautifulSoup
from proxy_randomizer import RegisteredProviders
from headers import headers
s = Session()
rp = RegisteredProviders()
rp.parse_providers()
prox = rp.get_random_proxy()
prox_list = {
'http': {prox},
}
with open('useful_links.txt', 'r') as f:
lines = [line.strip() for line in f.readlines()]
companys = []
time.sleep(1.5)
for line in lines:
q = s.get(line, headers=headers, proxies=prox_list)
result = q.content
soup = BeautifulSoup(result, 'lxml')
try:
name = soup.find('h1', {'class': 'product-title'}).text
except AttributeError:
name = ''
try:
price_strong = soup.find('li', {'class': 'price-current'}).find('strong').text
except AttributeError:
price_strong = ''
try:
price_sup = soup.find('li', {'class': 'price-current'}).find('sup').text
except AttributeError:
price_sup = ''
price = price_strong + price_sup
try:
discription = soup.find('div', {'class': 'product-bullets'}).text
except AttributeError:
discription = ''
try:
img = soup.find('div', {'class': 'swiper-zoom-container'}).find('img').get('src')
except AttributeError:
img = ''
try:
p = s.get(img, headers=headers, proxies=prox_list)
try:
with open(f'/home/anatolii/PycharmProjects/pythonProject/img/{name}.jpg', 'wb') as file:
file.write(p.content)
except FileNotFoundError:
pass
pass
except AttributeError:
continue
photo_path = f'/home/anatolii/PycharmProjects/pythonProject/img/{name}.jpg'
company = {
'name': name,
'price': price,
'discription': discription,
'photo_path': photo_path,
}
companys.append(company)
with open('companys.csv', 'w', newline='') as file:
writer = csv.writer(file, delimiter=',', lineterminator='\r')
writer.writerow(['Name', 'Price', 'Discription', 'Photo_path'])
for company in tqdm(companys):
writer.writerow(
[company['name'], company['price'], company['discription'], company['photo_path']])
Выдает такую ошибку:
raceback (most recent call last):
File "/home/anatolii/PycharmProjects/pythonProject/newegg_pars.py", line 51, in <module>
p = s.get(img, headers=headers, proxies=prox_list)
requests.exceptions.MissingSchema: Invalid URL '': No scheme supplied. Perhaps you meant http://?
Какой УРЛ неправильный? Как обработать эту ошибку? Возникает либо сразу, либо отработав какое-то время.