from bs4 import BeautifulSoup
import telebot
import random
from time import sleep
import selenium
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
bot = telebot.TeleBot("ТОКЕН", parse_mode=None)
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--hide-scrollbars')
driver = webdriver.Firefox(executable_path="./geckodriver.exe", options=options)
def get_html(url, params=None):
driver.get(url)
get_source = driver.page_source
return get_source
def get_content(html, html_comment, id):
soup = BeautifulSoup(html, "html.parser")
soup_comment = BeautifulSoup(html_comment, "html.parser")
items = soup.find_all("div", class_="product-card-header-right")
items_comment = soup_comment.find_all("div", class_="feedback-info")
items_comment_form = soup_comment.find_all("div", class_="feedback-item__content feedback-item__content--block-content js-feedback-content")
title_w = ""
# price_w = ""
price = ""
for item in items:
title_w = item.find('h1', {"class":"product-card-header-right__name"}).get_text(strip=True)
price = item.find('div', {"class":"price-box__content-i"}).get_text(strip=True)
rating_w = ""
comments_name_w = ""
comment_perew_w = ""
comment_minus_w = ""
comments_text_w = ""
for item in items_comment:
try:
rating_w = item.find('span', {"class":"feedback-info__label"}).get_text()
except:
pass
for item in items_comment_form:
try:
comments_name_w = item.find('span', {"class":"feedback-item__username"}).get_text(strip=True).replace("Купив у Comfy", "")
comments_text_w = item.find('p', {"class":"feedback-item__msg"}).get_text(strip=True)
except:
pass
try:
comment_perew_w = item.find('p', {"class":"feedback-item__msg"}).find_next('p', {"class":"feedback-item__msg"}).get_text(strip=True)
except:
pass
try:
comment_minus_w = item.find('p', {"class":"feedback-item__msg"}).find_next('p', {"class":"feedback-item__msg"}).find_next('p', {"class":"feedback- item__msg"}).get_text(strip=True)
except:
pass
break
send_form = "Product name: " + title_w.replace("\n", "") + "\n" + "Product price: "+price+"\n"+"Product rating: " + rating_w + "\n"+"Product commentor name: " + comments_name_w.replace("\n", "")+"\n"+"Product comment text: " + comments_text_w.replace('\n',"")
if comment_perew_w.find("Переваги")>-1:
send_form = send_form + "\n"+"Product plusses text: " + comment_perew_w.replace('\n',"")
if comment_minus_w.find("Недоліки")>-1:
send_form = send_form + "\n"+"Product minusses text: " + comment_minus_w.replace('\n',"")
bot.send_message(id, send_form)
print("Product name: " + title_w.replace("\n", ""))
print("Product price: " + price)
print("Product rating: " + rating_w)
print("Product commentor name: " + comments_name_w.replace("\n", ""))
print("Product comment text: " + comments_text_w.replace('\n',""))
if comment_perew_w.find("Переваги")>-1:
print("Product plusses text: " + comment_perew_w.replace('\n',""))
if comment_minus_w.find("Недоліки")>-1:
print("Product minusses text: " + comment_minus_w.replace('\n',""))
def parse_comfy(URL, id):
html = get_html(URL)
html_comment = get_html(URL.replace(".html","-otzyvy.html"))
get_content(html, html_comment, id)
driver.quit()
if __name__ == '__main__':
parse_comfy("https://comfy.ua/ua/smartfon-apple-iphone-11-64gb-black.html")
bot.polling()
options = Options()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.add_argument('--hide-scrollbars')
driver = webdriver.Firefox(executable_path="./geckodriver.exe", options=options)
def get_html(url, params=None):
driver.get(url)
get_source = driver.page_source
return get_source
def get_content(html, html_comment, id):
soup = BeautifulSoup(html, "html.parser")
soup_comment = BeautifulSoup(html_comment, "html.parser")
items = soup.find_all("div", class_="product-card-header-right")
items_comment = soup_comment.find_all("div", class_="feedback-info")
items_comment_form = soup_comment.find_all("div", class_="feedback-item__content feedback-item__content--block-content js-feedback-content")
title_w = ""
# price_w = ""
price = ""
for item in items:
title_w = item.find('h1', {"class":"product-card-header-right__name"}).get_text(strip=True)
price = item.find('div', {"class":"price-box__content-i"}).get_text(strip=True)
rating_w = ""
comments_name_w = ""
comment_perew_w = ""
comment_minus_w = ""
comments_text_w = ""
for item in items_comment:
try:
rating_w = item.find('span', {"class":"feedback-info__label"}).get_text()
except:
pass
for item in items_comment_form:
try:
comments_name_w = item.find('span', {"class":"feedback-item__username"}).get_text(strip=True).replace("Купив у Comfy", "")
comments_text_w = item.find('p', {"class":"feedback-item__msg"}).get_text(strip=True)
except:
pass
try:
comment_perew_w = item.find('p', {"class":"feedback-item__msg"}).find_next('p', {"class":"feedback-item__msg"}).get_text(strip=True)
except:
pass
try:
comment_minus_w = item.find('p', {"class":"feedback-item__msg"}).find_next('p', {"class":"feedback-item__msg"}).find_next('p', {"class":"feedback- item__msg"}).get_text(strip=True)
except:
pass
break
send_form = "Product name: " + title_w.replace("\n", "") + "\n" + "Product price: "+price+"\n"+"Product rating: " + rating_w + "\n"+"Product commentor name: " + comments_name_w.replace("\n", "")+"\n"+"Product comment text: " + comments_text_w.replace('\n',"")
if comment_perew_w.find("Переваги")>-1:
send_form = send_form + "\n"+"Product plusses text: " + comment_perew_w.replace('\n',"")
if comment_minus_w.find("Недоліки")>-1:
send_form = send_form + "\n"+"Product minusses text: " + comment_minus_w.replace('\n',"")
bot.send_message(id, send_form)
print("Product name: " + title_w.replace("\n", ""))
print("Product price: " + price)
print("Product rating: " + rating_w)
print("Product commentor name: " + comments_name_w.replace("\n", ""))
print("Product comment text: " + comments_text_w.replace('\n',""))
if comment_perew_w.find("Переваги")>-1:
print("Product plusses text: " + comment_perew_w.replace('\n',""))
if comment_minus_w.find("Недоліки")>-1:
print("Product minusses text: " + comment_minus_w.replace('\n',""))
def parse_comfy(URL, id):
html = get_html(URL)
html_comment = get_html(URL.replace(".html","-otzyvy.html"))
get_content(html, html_comment, id)
driver.quit()