Приветствую, пожалуйста, прошу тех, кто может - помочь. Суть: есть словарь скажем из 5 ссылок, по которым в цикле надо запустить 3 метода класса: 1. создать папку с направлением (если такого нету) и в ней папку с именем художника, 2. создать файл .log с ссылками на картины
logging.basicConfig(filename=f'{path}/{school}/{artist_name}/list_of_all_works_of_{artist_name}.log', level=logging.INFO, format=FORMAT)
,
3. скачать картины, ссылки на которые записаны в данный файл
Вот так:
for i in ar_deco:
w = Wikiart()
w.create_folder(i)
w.get_list_of_all_works(i)
w.download_images(i)
del w
Если в словаре 1 ссылка, работает. Если больше - на второй выдает ошибку
Traceback (most recent call last):
File "G:\Desktop\py\wikiart\wikiart.py", line 285, in <module>
w.download_images(i)
File "G:\Desktop\py\wikiart\wikiart.py", line 227, in download_images
f = open(f'{path}/{school}/{artist_name}/list_of_all_works_of_{artist_name}.log', 'r').readlines()
FileNotFoundError: [Errno 2] No such file or directory: 'G:/Desktop/py/wikiart/Экспрессионизм/Erin Hanson/list_of_all_works_of_Erin Hanson.log'
т.е. выполняются методы w.create_folder(i) и w.download_images(i), но не w.get_list_of_all_works(i).
Сам код:
import requests
from bs4 import BeautifulSoup as bs
import re, os, sys,
import logging
from wget import download
ar_deco = [
"https://www.wikiart.org/ru/francois-pompon/all-works/text-list",
"https://www.wikiart.org/ru/aleksandra-ekster/all-works/text-list"
]
FORMAT = '%(message)s'
path = os.path.abspath(os.path.dirname(sys.argv[0])).replace('\\', '/')
BASE_URL = 'https://wikiart.org'
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"
}
class Wikiart:
def get_soup(self, url):
session = requests.Session()
r = session.get(url, headers=headers)
soup = bs(r.content, 'html.parser')
return soup
def get_artist_name(self, url):
soup = self.get_soup(url)
artist_name = soup.find('a', class_='artist-href').text.replace(':', '')
return artist_name.strip()
def get_school(self, url):
soup = self.get_soup(url)
school = soup.find('div', class_='wiki-breadcrumbs-links').find_all('a')[2].text.strip()
return school.strip()
def create_folder(self, url):
artist_name = self.get_artist_name(url)
school = self.get_school(url)
if os.path.exists(f'{path}/{school}/{artist_name}'):
print(f'Exists: {path}/{school}/{artist_name}')
sys.exit()
if not os.path.exists(f'{path}/{school}'):
os.mkdir(f'{path}/{school}')
print(f'Created: {path}/{school}')
if not os.path.exists(f'{path}/{school}/{artist_name}'):
os.mkdir(f'{path}/{school}/{artist_name}')
print(f'Created: {path}/{school}/{artist_name}')
def get_list_of_all_works(self, url):
artist_name = self.get_artist_name(url)
school = self.get_school(url)
logging.basicConfig(
filename=f'{path}/{school}/{artist_name}/list_of_all_works_of_{artist_name}.log',
level=logging.INFO,
format=FORMAT
)
soup = self.get_soup(url)
arts = soup.find_all('li', class_='painting-list-text-row')
for link in arts:
img = BASE_URL + link.a['href']
title = link.text.replace(', ?', '')
logging.info(img)
return
def download_images(self, url):
artist_name = self.get_artist_name(url)
school = self.get_school(url)
f = open(f'{path}/{school}/{artist_name}/list_of_all_works_of_{artist_name}.log', 'r').readlines()
num_of_lines = sum(1 for _ in f)
n = 0
forbidden_symbols = ('*,<>:\'\\"/\|?=')
try:
for _ in f:
soup = self.get_soup(_.strip())
try:
img = soup.find('img', itemprop='image')['src']
except:
pass
try:
title = soup.find('div', class_='wiki-breadcrumbs wiki-breadcrumbs-artwork'). \
find_all('a')[5].text. \
replace('"', '_')
except:
pass
session = requests.Session()
try:
img_r_ = session.get(img)
except Exception as e:
print(e)
continue
con = img_r_.content
file_name = f'{path}/{school}/{artist_name}/{title}_{n}.jpg'
try:
outf = open(file_name, "wb")
outf.write(con)
outf.close()
except:
pass
print(f'{img} : {title} ({n} from {num_of_lines})')
n += 1
except Exception as e:
raise(e)
pass
for i in ar_deco:
w = Wikiart()
w.create_folder(i)
w.get_list_of_all_works(i)
w.download_images(i)
del w