class Parser:
def __init__(self, login,password,user_id,user_agent):
self.login = login
self.user_id =user_id
self.password = password
self.user_agent = user_agent
options = webdriver.FirefoxOptions()
options.set_preference(
"general.useragent.override", "{self.user_agent}"
)
options.set_preference ("dom.webdriver.enable", False)
self.driver = webdriver.Firefox(options=options)
def close_driver(self):
self.driver.close();
self.driver.quit();
def get_first_start_parser (self):
try:
self.driver.get("https://profi.ru/backoffice/n.php")
login = self.driver.find_element_by_xpath('/html/body/div[1]/div/div[2]/div/div[2]/div/div/form/div/div/div[1]/label/span/input')
login.send_keys(self.login)
time.sleep(0.1)
passw = self.driver.find_element_by_xpath('/html/body/div[1]/div/div[2]/div/div[2]/div/div/form/div/div/div[2]/label/span/input')
passw.send_keys(self.password)
click = self.driver.find_element_by_xpath('/html/body/div[1]/div/div[2]/div/div[2]/div/div/form/a')
click.click()
time.sleep(15)
page = self.driver.page_source
soup = bs(page, 'lxml')
applications_cards = soup.find_all(class_=re.compile("OrderSnippetContainerStyles__Container"))
first_list_applications={}
for application in applications_cards:
try:
application_title = application.find('h3', class_=re.compile("SubjectAndPriceStyles")).text.strip()
except Exception:
application_title = "Нет заголовка"
try:
application_price = application.find('span', class_=re.compile("SubjectAndPriceStyles__PriceValue")).text.strip()
except Exception:
application_price = "Без цен"
try:
application_snippet = application.find('p', class_=re.compile("SnippetBodyStyles__MainInfo")).text.strip().replace('\n', ' ')
except Exception:
application_snippet = "Нет описания"
try:
application_date = application.find('span', class_= re.compile("Date__DateText")).text.strip()
except Exception:
application_date = "время не указано"
try:
application_location = application.find('span', class_=re.compile("LocationAndScheduleStyles__TextContainer")).text.strip()
except Exception:
application_location = "Адрес не указан"
try:
application_shedule = application.find('span', class_=re.compile("LocationAndScheduleStyles__ScheduleText")).text.strip().replace('\n', ' ')
except Exception:
application_shedule= "Дата не указана"
application_name_client = application.find('span', class_=re.compile("StatusAndClientInfoStyles__Name")).text.strip()
application_link_req = application.find('a', class_=re.compile("SnippetBodyStyles__Container"))
application_link = application_link_req.get('href')
application_id = application_link_req.get('id')
first_list_applications [application_id] = {
"title":application_title,
"price":application_price,
"desc":application_snippet,
"location":application_location,
"time":application_shedule,
"name":application_name_client,
"url":url_leads + application_link,
"id":application_id,
"date":application_date
}
with open ("first_list_applications.json", "w") as file:
json.dump(first_list_applications, file, indent =4, ensure_ascii=False)
print("список сформирован")
except Exception as ex:
print(ex)
async def get_update_aplications (self):
while True:
await asyncio.sleep(5)
with open ("parser_sprint/first_list_applications.json") as file:
first_list_applications = json.load(file)
with open ("parser_sprint/fresh_list_applications.json") as file:
fresh_list_applications = json.load(file)
page = self.driver.page_source
soup = bs(page, 'lxml')
applications_cards = soup.find_all(class_=re.compile("OrderSnippetContainerStyles__Container"))
for application in applications_cards:
application_link_req = application.find('a', class_=re.compile("SnippetBodyStyles__Container"))
application_id = application_link_req.get('id')
if application_id not in first_list_applications:
try:
application_title = application.find('h3', class_=re.compile("SubjectAndPriceStyles")).text.strip()
except Exception:
application_title = "Нет заголовка"
try:
application_price = application.find('span', class_=re.compile("SubjectAndPriceStyles__PriceValue")).text.strip()
except Exception:
application_price = "Без цен"
try:
application_snippet = application.find('p', class_=re.compile("SnippetBodyStyles__MainInfo")).text.strip().replace('\n', ' ')
except Exception:
application_snippet = "Нет описания"
try:
application_date = application.find('span', class_= re.compile("Date__DateText")).text.strip()
except Exception:
application_date = "Время не указано"
try:
application_location = application.find('span', class_=re.compile("LocationAndScheduleStyles__TextContainer")).text.strip()
except Exception:
application_location = "Адрес не указан"
try:
application_shedule = application.find('span', class_=re.compile("LocationAndScheduleStyles__ScheduleText")).text.strip().replace('\n', ' ')
except Exception:
application_shedule= "Дата не указана"
application_name_client = application.find('span', class_=re.compile("StatusAndClientInfoStyles__Name")).text.strip()
application_link_req = application.find('a', class_=re.compile("SnippetBodyStyles__Container"))
application_link = application_link_req.get('href')
application_id = application_link_req.get('id')
first_list_applications [application_id] = {
"title":application_title,
"price":application_price,
"desc":application_snippet,
"location":application_location,
"time":application_shedule,
"name":application_name_client,
"url":url_leads + application_link,
"id":application_id,
"date":application_date
}
fresh_list_applications [application_id] = {
"title":application_title,
"price":application_price,
"desc":application_snippet,
"location":application_location,
"time":application_shedule,
"name":application_name_client,
"url":url_leads + application_link,
"id":application_id,
"date":application_date
}
with open ("parser_sprint/first_list_applications.json", "w") as file:
json.dump(first_list_applications, file, indent =4, ensure_ascii=False)
with open ("parser_sprint/fresh_list_applications.json", "w") as file:
json.dump(fresh_list_applications, file, indent =4, ensure_ascii=False)
await personal_actions.send_ntf(self.user_id)
time.sleep(30)
self.driver.refresh();