Есть код, работает он так, сначало получает ввсе ссылки, а потом начинает парсить, как сделать, чтобы он брал 1 ссылку и начинал парсить? Если на аккаунте 1000 страниц по которым надо перейти он потратит огромное время для заполнения списка "urls" а только потом начнёт соскребать инфу, как можно переделать?
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.common.proxy import *
import time
import random
f = open('udemy_titles', 'a', encoding='utf8')
file = open("input_1.txt", 'r')
login_pass_dict = dict()
for x in [line.strip().split(':') for line in file]:
login_pass_dict.update({x[0]: x[1]})
for login, password in login_pass_dict.items():
f.write("\n" + "USERNAME: ")
f.write(login + "\n")
f.write("PASSWORD: ")
f.write(password + "\n")
myProxy = random.choice(open('proxy.txt').readlines())
Proxy_list = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': myProxy,
'httpsProxy': myProxy,
'ftpProxy': myProxy,
'sslProxy': myProxy,
'noProxy': ''
})
print(myProxy + "\n")
driver= webdriver.Firefox(proxy=Proxy_list)
driver.get('https://www.udemy.com')
print("GOT URL\n")
time.sleep(5)
driver.find_element_by_xpath("//button[@data-purpose='header-login']").click()
print("OPEN LOGIN FORM\n")
time.sleep(5)
webdriver.ActionChains(driver).move_by_offset(570, 295).click().send_keys(login).perform()
print("PRINT MAIL\n")
time.sleep(5)
webdriver.ActionChains(driver).move_by_offset(100, 65).click().send_keys(password).perform()
print("PRINT PASSWORD\n")
time.sleep(5)
try:
webdriver.ActionChains(driver).move_by_offset(0, 60).click().perform()
print("AUTORIZATION\n")
time.sleep(6)
driver.find_element_by_xpath("//a[@data-purpose='my-courses']").click()
print("GO TO URL\n")
time.sleep(5)
try:
def parse_pagination(driver):
ul_pagination = driver.find_element_by_css_selector("ul.pagination.pagination-expanded")
li_pagination = ul_pagination.find_elements_by_css_selector("li")[-2]
count_page = int(li_pagination.find_element_by_css_selector("a").text)
urls = []
for i in range(2, count_page + 1):
urls.append("https://www.udemy.com/home/my-courses/learning/?p=" + str(i))
time.sleep(2)
return urls
def parse_list(driver):
div_card_wrapper = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.card-wrapper")))
div_cards = div_card_wrapper.find_elements_by_css_selector("div.card.card--learning")
for div_card in div_cards:
a = div_card.find_element_by_css_selector("a.card--learning__details > div > strong")
name = a.text
print(name)
f.write(name + "\n")
except:
pass
try:
div = driver.find_element_by_xpath("//div[@class='pager-label']").text
div_1 = div.split(" ")[-2]
f.write("TOTAL COURSES: " )
f.write(div_1 + "\n")
except:
pass
try:
parse_list(driver)
urls = parse_pagination(driver)
for url in urls:
driver.get(url)
time.sleep(2)
parse_list(driver)
except:
pass
driver.close()
time.sleep(2)
except:
f.write("LOGIN OR PASSWORD IS INCORRECTLY PROVIDED" + "\n")
driver.close()
time.sleep(1)
f.close()
file.close()
Пытался переделать функции так:
def url_parse(driver):
ul_pagination = driver.find_element_by_css_selector("ul.pagination.pagination-expanded")
li_pagination = ul_pagination.find_elements_by_css_selector("li")[-2]
count_page = int(li_pagination.find_element_by_css_selector("a").text)
for page in range(2, count_page):
urls = driver.get("https://www.udemy.com/home/my-courses/learning/?p=" + str(page))
time.sleep(2)
return urls
def parse_list(driver):
div_card_wrapper = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.card-wrapper")))
div_cards = div_card_wrapper.find_elements_by_css_selector("div.card.card--learning")
for div_card in div_cards:
a = div_card.find_element_by_css_selector("a.card--learning__details > div > strong")
name = a.text
print(name)
А затем вызывать функции вот так:
parse_list(driver)
ul_pagination = driver.find_element_by_css_selector("ul.pagination.pagination-expanded")
li_pagination = ul_pagination.find_elements_by_css_selector("li")[-2]
count_page = int(li_pagination.find_element_by_css_selector("a").text)
for number_page in range(2, count_page):
parse_list(driver)
url_parse(driver)