import requests
from bs4 import BeautifulSoup
import csv
URL = 'https://www.olx.kz/transport/moto/alma-ata/'
HEADERS = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:93.0) Gecko/20100101 Firefox/93.0',
'accept': '*/*'}
FILE_CSV = 'motocycles.csv'
def get_html(url,params= None):
r = requests.get(url, headers= HEADERS,params = params )
return r
def get_page(html):
soup = BeautifulSoup(html, 'html.parser')
pagenation = soup.find_all('span', class_='item')
if pagenation:
return int(pagenation[-1].get_text())
else:
return 1
def get_content(html):
soup = BeautifulSoup(html,'html.parser')
items = soup.find_all('tr',class_= 'wrap')
moto = []
for item in items:
moto.append({
'title': item.find('h3', class_='lheight22 margintop5').get_text(strip = True),
'price': item.find('p', class_='price').get_text(strip=True),
'link': item.find('a', class_='link').get('href'),
'Adress-time': item.find('td', class_='bottom-cell').get_text(strip = True),
})
return moto
def save_csv(items, path):
with open (path,'w', newline='') as file:
writer = csv.writer(file, delimiter=';')
writer.writerow(['Марка', 'Цена', 'Ссылка', 'Адрес-Время'])
for item in items:
writer.writerow([item['title'], item['price'],item['link'],item['Adress-time']])
def pars():
html = get_html(URL)
if html.status_code == 200:
motos_while = []
#moto = get_content(html.text)
pages = get_page(html.text)
for page in range(1,pages + 1 ):
print(f'Парсинг страницы{page} из {pages}')
html = get_html(URL,params ={'page': page})
motos_while.extend(get_content(html.text))
save_csv(motos_while, FILE_CSV)
print(motos_while)
else:
print('error')
pars()
Выдает вот такую ошибку
Traceback (most recent call last):
File "C:\Users\AlexK\PycharmProjects\numpy\main.py", line 66, in
pars()
File "C:\Users\AlexK\PycharmProjects\numpy\main.py", line 60, in pars
save_csv(motos_while, FILE_CSV)
File "C:\Users\AlexK\PycharmProjects\numpy\main.py", line 48, in save_csv
writer.writerow([item['title'], item['price'],item['link'],item['Adress-time']])
File "C:\Users\AlexK\AppData\Local\Programs\Python\Python39\lib\encodings\cp1251.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\u0493' in position 20: character maps to
Понятия не имею почему.