Надо спарсить с сайта вопросы и ответы на них, но чтобы до них добраться надо перейти в обсуждения данного вопроса, как реализовать?
Вот сам сайт с нужным разделом:
https://www.avvo.com/topics/landlord-tenant-law/ad... Пытаюсь делать так: 1) Перехожу по урлу 2) Захожу в 1-й пост, собираю нужную инфу, выхожу 3) Пытаюсь зайти на некст (не выходит) + ко всему на втором этапе выбивает ошибку "selenium.common.exceptions.StaleElementReferenceException: Message: The element reference of is stale; either the element is no longer attached to the DOM, it is not in the current frame context, or the document has been refreshed" Сам код:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException as TE
import time
driver = webdriver.Firefox()
url = "https://www.avvo.com/topics/landlord-tenant-law/advice?order=recency&page=1&search_topic_advice_search[content_type]=Q%26A&search_topic_advice_search[query]=security+deposit&search_topic_advice_search[state]=CA"
def get_url(driver, url):
driver.get(url)
time.sleep(3)
def page_pagination(driver):
divs_pagination = driver.find_elements_by_xpath("//div[@class='col-xs-12 advice-content']/div[@class='js-documents-list gtm-context']/div[@class='v-topic-page-card-list']/div[@class='card topic-advice-question-card']")
for div_pagination in divs_pagination:
div_pagination.click()
time.sleep(2)
def get_content(driver):
# divs = driver.find_elements_by_xpath("//div[@class='col-xs-12 advice-content']/div[@class='js-documents-list gtm-context']/div[@class='v-topic-page-card-list']/div[@class='card topic-advice-question-card']")
# for div in divs:
# div = driver.find_element_by_xpath("//div[@class='card topic-advice-question-card']/div[@class='row']/div[@class='col-xs-12 u-margin-top-half']/a[@class='block-link']").click()
# time.sleep(3)
try:
span = driver.find_element_by_xpath("//div[@class='col-xs-12']/div[@id='qa-body-display']/p[@class='a button btn btn-link u-vertical-padding-0']/span[@class='icon-chevron-down-after-blue']").click()
time.sleep(2)
except:
pass
try:
mores_lawyer = driver.find_elements_by_xpath("//div[@class='col-xs-12 gtm-context']/div[@id='answers_container']/div[@class='card qa-lawyer-card qa-answer v-borderless']/div[@itemscope='itemscope']/div[@id='answer-9088247']/div[@class='row answer-body']/div[@class='col-xs-12']/button[@class='btn btn-link u-vertical-padding-0']")
for more_lawyer in mores_lawyer:
more_lawyer.click()
time.sleep(2)
except:
pass
date_question_post = driver.find_element_by_xpath("//div[@class='col-xs-12 gtm-context']/div[@id='answers_container']/div[@class='card qa-lawyer-card qa-answer v-borderless']/div[@itemscope='itemscope']/div[@id='answer-9088247']/div[@class='row answer-metadata']/div[@class='col-xs-12']/ul[@class='inline-list']/li[@class='text-muted small hidden-xs']")
question_short = driver.find_element_by_xpath("//h1[@itemprop='name']")
question_long = driver.find_element_by_xpath("//p[@itemprop='text']")
name_lawyer = driver.find_element_by_xpath("//span[@itemprop='name']")
lawyer_answer = driver.find_element_by_xpath("//div[@class='col-xs-12 gtm-context']/div[@id='answers_container']/div[@class='card qa-lawyer-card qa-answer v-borderless']/div[@itemscope='itemscope']/div[@id='answer-9088247']/div[@class='row answer-body']/div[@class='col-xs-12']/div[@id='answer-body-9088247']")
name_data_question_post = date_question_post.text
name_short = question_short.text
name_long = question_long.text
name_of_lawyer = name_lawyer.text
name_of_lawyer_answer = lawyer_answer.text
print(name_short, "\n" ,name_long, "\n" ,name_data_question_post ,"\nLawyer name: ", name_of_lawyer, "\nLawyer answer:", name_of_lawyer_answer)
return (name_short, name_long, name_of_lawyer, name_data_question_post, name_of_lawyer_answer)
get_url(driver, url)
urls = page_pagination(driver)
for url in urls:
get_content(driver)
get_url(driver, url)
driver.close()
Больше идей, как реализовать нету