import requests
from bs4 import BeautifulSoup
def get_html(url):
result = requests.get(url)
return result.text
def get_total_pages(html):
soup = BeautifulSoup(html, 'lxml')
divs = soup.find('div', class_='pagination-pages clearfix')
pages = divs.find_all('a', class_='pagination-page')[-1].get('href').split('=')[1] #если всего 1 равно
return pages
def get_links(html, url_site, site_end):
soup = BeautifulSoup(html, 'lxml')
div = soup.find('div', class_='catalog-list js-catalog-list clearfix')
len_links = len(div.find_all('a', class_='item-description-title-link'))
file = open('file_links_avito.txt', 'a')
for j in range(0, len_links):
links = div.find_all('a', class_='item-description-title-link')[j].get('href')
#links = url_site+links
site = url_site+str(links)
if site == site_end:
file.close()
exit()
else:
file.write('{}{}\n'.format(url_site, links))
print('{}. {}{}'.format(j, url_site, links))
file.close()
return links
def main():
site_end = 'https://www.avito.ru/vologda/komnaty/komnata_12_m_v_1-k_35_et._1559836158'
url = 'https://www.avito.ru/vologodskaya_oblast/komnaty/prodam?p=1'
url_site = 'https://www.avito.ru'
base_url = 'https://www.avito.ru/vologodskaya_oblast/komnaty/prodam?p='
for i in range(1, int(get_total_pages(get_html(url)))+1):
get_links(get_html(base_url+str(i)), url_site, site_end)
print('Page {}'.format(i))
input('Работа завершена!\nДля выхода нажмите [Enter]')
if __name__ == '__main__':
main()