from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException as TE
from selenium.common.exceptions import StaleElementReferenceException
import time
def get_url(driver): # перехожу по урлу на нужный сайт
driver.get('https://www.avvo.com/topics/landlord-tenant-law/advice?order=recency&page=1&search_topic_advice_search[content_type]=Q%26A&search_topic_advice_search[query]=security+deposit&search_topic_advice_search[state]=CA')
time.sleep(3)
def get_content(driver): # собираю информация со страницы
try:
span = driver.find_element_by_xpath("//div[@class='col-xs-12']/div[@id='qa-body-display']/p[@class='a button btn btn-link u-vertical-padding-0']/span[@class='icon-chevron-down-after-blue']").click()
time.sleep(2)
except:
pass
try:
mores_lawyer = driver.find_elements_by_xpath("//div[@class='col-xs-12 gtm-context']/div[@id='answers_container']/div[@class='card qa-lawyer-card qa-answer v-borderless']/div[@itemscope='itemscope']/div[@id='answer-9088247']/div[@class='row answer-body']/div[@class='col-xs-12']/button[@class='btn btn-link u-vertical-padding-0']")
for more_lawyer in mores_lawyer:
more_lawyer.click()
time.sleep(2)
except:
pass
date_question_post = driver.find_element_by_xpath("//div[@class='col-xs-12 gtm-context']/div[@id='answers_container']/div[@class='card qa-lawyer-card qa-answer v-borderless']/div[@itemscope='itemscope']/div[@id='answer-9088247']/div[@class='row answer-metadata']/div[@class='col-xs-12']/ul[@class='inline-list']/li[@class='text-muted small hidden-xs']")
question_short = driver.find_element_by_xpath("//h1[@itemprop='name']")
question_long = driver.find_element_by_xpath("//p[@itemprop='text']")
name_lawyer = driver.find_element_by_xpath("//span[@itemprop='name']")
lawyer_answer = driver.find_element_by_xpath("//div[@class='col-xs-12 gtm-context']/div[@id='answers_container']/div[@class='card qa-lawyer-card qa-answer v-borderless']/div[@itemscope='itemscope']/div[@id='answer-9088247']/div[@class='row answer-body']/div[@class='col-xs-12']/div[@id='answer-body-9088247']")
name_data_question_post = date_question_post.text
name_short = question_short.text
name_long = question_long.text
name_of_lawyer = name_lawyer.text
name_of_lawyer_answer = lawyer_answer.text
print(name_short, "\n" ,name_long, "\n" ,name_data_question_post ,"\nLawyer name: ", name_of_lawyer, "\nLawyer answer:", name_of_lawyer_answer)
return (name_short, name_long, name_of_lawyer, name_data_question_post, name_of_lawyer_answer)
def page_pagination(driver): # парнинация по странице (переход по постам)
divs_pagination = driver.find_element_by_xpath("//div[@class='col-xs-12 advice-content']/div[@class='js-documents-list gtm-context']/div[@class='v-topic-page-card-list']")
divs = divs_pagination.find_elements_by_xpath("//a[@class='block-link']")
for div in divs:
url = div.get_attribute("href")
div = driver.get(url)
time.sleep(2)
get_content(driver)
time.sleep(2)
get_url(driver)
time.sleep(2)
def main():
driver = webdriver.Firefox()
get_url(driver)
page_pagination(driver)
driver.close()
if __name__ == "__main__":
main()
time.sleep(3)
def parse_list(driver):
div_card_wrapper = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.card-wrapper")))
div_cards = div_card_wrapper.find_elements_by_css_selector("div.card.card--learning")
for div_card in div_cards:
a = div_card.find_element_by_css_selector("a.card--learning__details > div > strong")
name = a.text
print(name)
f.write(name + "\n")
def parse_pagination(driver):
ul_pagination = driver.find_element_by_css_selector("ul.pagination.pagination-expanded")
li_pagination = ul_pagination.find_elements_by_css_selector("li")[-2]
count_page = int(li_pagination.find_element_by_css_selector("a").text)
# urls = []
for i in range(2, count_page + 1):
urls = driver.get("https://www.udemy.com/home/my-courses/learning/?p=" + str(i))
parse_list(driver)
return urls
def parse_list(driver):
div_card_wrapper = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.card-wrapper")))
div_cards = div_card_wrapper.find_elements_by_css_selector("div.card.card--learning")
for div_card in div_cards:
a = div_card.find_element_by_css_selector("a.card--learning__details > div > strong")
name = a.text
print(name)
f.write(name + "\n")
parse_list(driver)
urls = parse_pagination(driver)
for url in urls:
driver.get(url)
time.sleep(2)
parse_list(driver)
file = open('input_1.txt', 'r')
for i in range(0, 4):
y = 0
maill = []
pasww = []
for i_1 in file:
a = i_1.strip().split(":")
maill.append(a[0])
pasww.append(a[1])
for y in range(int(y), len(maill)):
mail_1 = maill[y]
pasw_1 = pasww[y]
y += 1
def parse_pagination(driver):
try:
ul_pagination = driver.find_element_by_css_selector("ul.pagination.pagination-expanded")
li_pagination = ul_pagination.find_elements_by_css_selector("li")[-2]
count_page = int(li_pagination.find_element_by_css_selector("a").text)
urls = []
for i in range(2, count_page + 1):
urls.append("https://www.udemy.com/home/my-courses/learning/?p=" + str(i))
time.sleep(2)
return urls
except:
pass
def parse_list(driver):
try:
div_card_wrapper = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.card-wrapper")))
div_cards = div_card_wrapper.find_elements_by_css_selector("div.card.card--learning")
for div_card in div_cards:
a = div_card.find_element_by_css_selector("a.card--learning__details > div > strong")
name = a.text
print(name)
f.write(name + "\n")
except:
pass
try:
parse_list(driver)
urls = parse_pagination(driver)
for url in urls:
driver.get(url)
parse_list(driver)
except:
pass
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
f = open('udemy_titles','w', encoding='utf8')
file = open('input_1.txt', 'r')
y = 0
for i in range(0, 4):
maill = []
pasww = []
for i_1 in file:
a = i_1.strip().split(":")
maill.append(a[0])
pasww.append(a[1])
for y in range(int(y), len(maill)):
mail_1 = maill[y]
pasw_1 = pasww[y]
y += 1
f.write("USERNAME: ")
f.write(mail_1 + "\n")
f.write("PASSWORD: ")
f.write(pasw_1 + "\n")
def parse_pagination(driver):
try:
ul_pagination = driver.find_element_by_css_selector("ul.pagination.pagination-expanded")
li_pagination = ul_pagination.find_elements_by_css_selector("li")[-2]
count_page = int(li_pagination.find_element_by_css_selector("a").text)
urls = []
for i in range(2, count_page + 1):
urls.append("https://www.udemy.com/home/my-courses/learning/?p=" + str(i))
time.sleep(2)
return urls
except:
pass
def parse_list(driver):
try:
div_card_wrapper = WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.CSS_SELECTOR, "div.card-wrapper")))
div_cards = div_card_wrapper.find_elements_by_css_selector("div.card.card--learning")
for div_card in div_cards:
a = div_card.find_element_by_css_selector("a.card--learning__details > div > strong")
name = a.text
print(name)
f.write(name + "\n")
except:
pass
driver= webdriver.Firefox()
driver.get('https://www.udemy.com')
print("\nGOT URL\n")
time.sleep(5)
driver.find_element_by_xpath("//button[@data-purpose='header-login']").click()
print("OPEN LOGIN FORM\n")
time.sleep(5)
webdriver.ActionChains(driver).move_by_offset(570, 295).click().send_keys(mail_1).perform()
print("PRINT MAIL\n")
time.sleep(5)
webdriver.ActionChains(driver).move_by_offset(100, 65).click().send_keys(pasw_1).perform()
print("PRINT PASSWORD\n")
time.sleep(5)
webdriver.ActionChains(driver).move_by_offset(0, 60).click().perform()
print("AUTORIZATION\n")
time.sleep(5)
driver.find_element_by_xpath("//a[@data-purpose='my-courses']").click()
print("GO TO URL\n")
time.sleep(5)
try:
div = driver.find_element_by_xpath("//div[@class='pager-label']").text
div_1 = div.split(" ")[-2]
f.write("TOTAL COURSES: " )
f.write(div_1 + "\n")
except:
pass
try:
parse_list(driver)
urls = parse_pagination(driver)
for url in urls:
driver.get(url)
parse_list(driver)
except:
pass
f.close()
file.close()
driver.close()
time.sleep(2)