#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib.request
from bs4 import BeautifulSoup
import csv
#url0 = 'http://journals.indexcopernicus.com/masterlist.php?page=%s' %(page)
p = []
url1 = '&2&1&1&cntr%5B%5D=UKR&icv_from=0&icv_to=176'
url0 = 'http://journals.indexcopernicus.com/masterlist.php?page=%s' %(str(1))
html = urllib.request.urlopen(url0+url1).read()
soup = BeautifulSoup(html,"lxml")
table = soup.find('table')
page_all = int(((((((table.find_all('tr')[9]).find('td')).find('div')).find('ul')).find_all('li')[8]).a.text)[1:-1])
for page in range(page_all):
url0 = 'http://journals.indexcopernicus.com/masterlist.php?page=%s' %(str(page+1))
html = urllib.request.urlopen(url0+url1).read()
soup = BeautifulSoup(html,"lxml")
table = soup.find('table')
tags = table.find_all('tr')[12:]
for i in tags:
c = i.find_all('td')
p.append({'title': c[0].a.text, 'ISSN':c[1].a.text, 'Country':c[2].alt, 'Area':c[3].text, 'ICV2013':c[4].text, 'ICV2014':c[5].text})
print('Парсинг %i/%i' %(page+1,page_all))
with open('books.csv', 'w') as csvfile:
writer = csv.writer(csvfile)
for i in p:
writer.writerow([i['title'],i['ISSN']])
Парсер должен считать таблицу и записать ее в csv. Но на последнем этапе выдает ошибку
Traceback (most recent call last):
File "C:/Users/User/Desktop/1.py", line 33, in
writer.writerow([i['title'],i['ISSN']])
File "C:\Users\User\AppData\Local\Programs\Python\Python35-32\lib\encodings\cp1251.py", line 19, in encode
return codecs.charmap_encode(input,self.errors,encoding_table)[0]
UnicodeEncodeError: 'charmap' codec can't encode character '\u2011' in position 82: character maps to
Что делать?