from bs4 import BeautifulSoup
import requests
from selenium import webdriver
import sqlite3
def url_generator():
home_links = []
for ino in range(1, 25):
ino += 1
home_links.append(f"https://www.olx.ua/elektronika/?search%5Bad_homepage_to%3Afrom%5D=2020-08-25&page={ino}")
return home_links
# no fasted(
def get_links_by_selenium():
list_links = []
driver = webdriver.Chrome(
r'C:\Users\tester\Documents\mypythonproject\myparrsers\chromedriver.exe')
driver.get(
"https://www.olx.ua/elektronika/?search%5Bad_homepage_to%3Afrom%5D=2020-08-25&page=2")
driver.find_element_by_xpath(
'//button[@class="cookie-close abs cookiesBarClose"]').click()
for url in url_generator():
driver.get(url)
for link in driver.find_elements_by_xpath('//a[@class="marginright5 link linkWithHash detailsLink"]'):
list_links.append(link.get_attribute('href'))
return list_links
# fasted)
def get_links_by_beautifulsoup():
list_links = []
for url in url_generator():
html = requests.get(url).text
soup = BeautifulSoup(html, 'lxml')
for link in soup.find_all('a', {'class': 'marginright5 link linkWithHash detailsLink'}):
list_links.append(link['href'])
print(len(list_links))
return list_links
def get_content_from_page():
nam = 0
for link in get_links_by_beautifulsoup():
page_html = requests.get(link).text
page_soup = BeautifulSoup(page_html, 'lxml')
try:
price = page_soup.find('strong', {'class': 'pricelabel__value arranged'}).text
name = page_soup.find('h1').text.strip()
except AttributeError:
pass
else:
nam += 1
print(f'{nam}.) {name} | {price} | {link}')
yield name, price, link
def save_content_in_db():
db = sqlite3.connect('links.db')
sql = db.cursor()
for n, p, l in get_content_from_page():
sql.execute('INSERT into commodity (name, prise, link) values (?, ?, ?)', (n, p, l))
db.commit()
db.close()
def main():
save_content_in_db()
print('finished')
if __name__ == '__main__':
main()