С таким подходом скоро в книжных магазинах начнут предлагать почитать Толстого вместо Донцовой.
import os
import urllib, urllib2
from BeautifulSoup import BeautifulSoup
def get_soup(address):
response = urllib2.urlopen(address)
html = response.read()
soup = BeautifulSoup(html)
return soup
def get_pic_urls(start_pic):
soup = get_soup("http://wallbase.cc/toplist/%d/213/eqeq/0x0/0/110/60/0" % start_pic)
pics = soup.findAll("a", "thdraggable thlink")
return [pic["href"] for pic in pics]
number_of_pages = 20
for page_number in xrange(number_of_pages):
for link in get_pic_urls(page_number * 60):
pic_soup = get_soup(link)
div = pic_soup.findAll("div", id="bigwall")[0]
pic_address = div.img["src"]
filename = pic_address.split('/')[-1].strip()
if not os.path.exists(filename):
urllib.urlretrieve(pic_address, filename)
print "Downloaded %s" % filename