Пытаюсь сделать простенький GUI для парсера сайта. Вот есть код дизайна:
# -*- coding: utf-8 -*-
# Form implementation generated from reading ui file 'gui.ui'
#
# Created by: PyQt5 UI code generator 5.11.2
#
# WARNING! All changes made in this file will be lost!
from PyQt5 import QtCore, QtGui, QtWidgets
class Ui_MainWindow(object):
def setupUi(self, MainWindow):
MainWindow.setObjectName("MainWindow")
MainWindow.resize(638, 350)
sizePolicy = QtWidgets.QSizePolicy(QtWidgets.QSizePolicy.Fixed, QtWidgets.QSizePolicy.Fixed)
sizePolicy.setHorizontalStretch(0)
sizePolicy.setVerticalStretch(0)
sizePolicy.setHeightForWidth(MainWindow.sizePolicy().hasHeightForWidth())
MainWindow.setSizePolicy(sizePolicy)
MainWindow.setMinimumSize(QtCore.QSize(638, 350))
MainWindow.setMaximumSize(QtCore.QSize(638, 350))
self.centralwidget = QtWidgets.QWidget(MainWindow)
self.centralwidget.setObjectName("centralwidget")
self.start_btn = QtWidgets.QPushButton(self.centralwidget)
self.start_btn.setGeometry(QtCore.QRect(10, 100, 191, 71))
font = QtGui.QFont()
font.setPointSize(20)
self.start_btn.setFont(font)
self.start_btn.setObjectName("start_btn")
self.open_btn = QtWidgets.QPushButton(self.centralwidget)
self.open_btn.setGeometry(QtCore.QRect(10, 10, 191, 71))
font = QtGui.QFont()
font.setPointSize(15)
self.open_btn.setFont(font)
self.open_btn.setObjectName("open_btn")
self.listWidget = QtWidgets.QListWidget(self.centralwidget)
self.listWidget.setGeometry(QtCore.QRect(220, 10, 411, 311))
self.listWidget.setFrameShape(QtWidgets.QFrame.StyledPanel)
self.listWidget.setEditTriggers(QtWidgets.QAbstractItemView.DoubleClicked|QtWidgets.QAbstractItemView.SelectedClicked)
self.listWidget.setAlternatingRowColors(True)
self.listWidget.setObjectName("listWidget")
self.progressBar = QtWidgets.QProgressBar(self.centralwidget)
self.progressBar.setEnabled(False)
self.progressBar.setGeometry(QtCore.QRect(10, 280, 191, 41))
self.progressBar.setProperty("value", 0)
self.progressBar.setObjectName("progressBar")
self.pushButton = QtWidgets.QPushButton(self.centralwidget)
self.pushButton.setEnabled(False)
self.pushButton.setGeometry(QtCore.QRect(10, 180, 191, 71))
font = QtGui.QFont()
font.setPointSize(20)
self.pushButton.setFont(font)
self.pushButton.setObjectName("pushButton")
MainWindow.setCentralWidget(self.centralwidget)
self.statusbar = QtWidgets.QStatusBar(MainWindow)
self.statusbar.setObjectName("statusbar")
MainWindow.setStatusBar(self.statusbar)
self.retranslateUi(MainWindow)
QtCore.QMetaObject.connectSlotsByName(MainWindow)
def retranslateUi(self, MainWindow):
_translate = QtCore.QCoreApplication.translate
MainWindow.setWindowTitle(_translate("MainWindow", "firmenabc scraper"))
self.start_btn.setText(_translate("MainWindow", "Start"))
self.open_btn.setText(_translate("MainWindow", "Open File"))
self.pushButton.setText(_translate("MainWindow", "Stop"))
А вот код парсера:
import requests, csv, sys, os
import gui
from bs4 import BeautifulSoup as bs
from PyQt5 import QtWidgets
from PyQt5.QtCore import Qt
from multiprocessing import Pool
class ScrapeApp(QtWidgets.QMainWindow, gui.Ui_MainWindow):
def __init__(self):
# Это здесь нужно для доступа к переменным, методам
# и т.д. в файле gui_table.py
super().__init__()
self.setupUi(self) # Это нужно для инициализации нашего дизайна
self.open_btn.clicked.connect(self.open_file)
self.start_btn.clicked.connect(self.scrape)
self.start_btn.clicked.connect(self.stop)
self.statusBar().showMessage('Ready')
self.completed = 0
def open_file(self):
filename = QtWidgets.QFileDialog.getOpenFileName(self, 'Open .txt file')[0]
if filename and filename.split('.')[-1] == 'txt':
with open(filename, 'rb') as file:
content = file.read().splitlines()
self.statusbar.showMessage('File opened successfully!')
self.listWidget.clear()
for word in content:
item = QtWidgets.QListWidgetItem(word.decode('iso-8859-14'))
item.setFlags(Qt.ItemIsEnabled | Qt.ItemIsSelectable | Qt.ItemIsEditable)
self.listWidget.addItem(item)
else:
self.statusbar.showMessage('Wrong file extension. Please, choose .txt file')
def stop(self):
self.stop()
def scrape(self):
self.progressBar.setEnabled(True)
self.start_btn.setDisabled(True)
self.pushButton.setEnabled(True)
while self.completed <= 100:
if self.listWidget.count() == 0:
self.open_file()
self.statusbar.showMessage('Started scraping process...')
with open('result.csv', 'w', newline='') as file:
writer = csv.writer(file)
header = ['Name', 'Street', 'Zip Code', 'City', 'Region', 'Email', 'Phone', 'Website']
writer.writerow(header)
rows = []
for index in range(self.listWidget.count()):
rows.append(self.listWidget.item(index).text())
print('rows:', rows)
for keyword in rows:
soup = get_soup(keyword, '')
max_pages = soup.find('ol', {'class': 'pagination'}).find_all('li')[-1].get_text().strip()
print('max pages:', max_pages)
links = get_links(soup)
with Pool(10) as p:
p.map(make_all, links)
if int(max_pages) > 1:
si = 50
for page in range(2, int(max_pages)):
new_soup = get_soup(keyword, si)
new_links = get_links(new_soup)
with Pool(10) as pp:
pp.map(make_all, new_links)
si += 50
self.completed += 100 / len(rows)
self.progressBar.setValue(self.completed)
self.statusbar.showMessage('Scraping process finished successfully!')
self.stop()
def get_soup(word, si):
r = requests.get('https://www.firmenabc.at/result.aspx?what={}&where=&exact=false&inTitleOnly=false&l=&si={}'
'0&iid=&sid=-1&did=&cc='.format(word, si))
soup = bs(r.text, 'lxml')
return soup
def get_links(sp): # extract companies links from current page's soup
links = [a.get('href') for a in sp.find_all('a', {'itemprop': 'url'})]
return links
def scrape_items(link): # extract required data of each company
soup = bs(requests.get(link).text, 'lxml')
name = soup.find('div', {'itemprop': 'name'}).get_text().strip()
street = soup.find('span', {'itemprop': 'streetAddress'}).get_text().strip()
zip_code = soup.find('span', {'itemprop': 'postalCode'}).get_text().strip()
city = soup.find('span', {'itemprop': 'addressLocality'}).get_text().strip()
try:
region = soup.find('meta', {'itemprop': 'addressRegion'}).get('content')
except AttributeError:
try:
region = soup.find('meta', {'property': 'og:region'}).get('content')
except AttributeError:
try:
region = soup.find('meta', {'itemprop': 'og:region'}).get('content')
except AttributeError:
region = city
try:
email = soup.find('meta', {'property': 'og:email'}).get('content')
except AttributeError:
try:
email = soup.find('a', {'itemprop': 'email'}).get_text().strip()
except AttributeError:
email = ''
try:
phone = soup.find('meta', {'property': 'og:phonenumber'}).get('content')
except AttributeError:
try:
phone = soup.find('span', {'itemprop': 'telephone'}).get_text().strip()
except AttributeError:
phone = ''
try:
website = soup.find('div', {'class': 'contact-data'}).find('a', {'itemprop': 'url'}).get('href')
except AttributeError:
website = ''
value = [name, street, zip_code, city, region, email, phone, website]
print(value)
return value
def save(lst):
with open('result.csv', 'a', newline='') as file:
writer = csv.writer(file)
writer.writerow(lst)
def make_all(link):
data = scrape_items(link)
save(data)
def main():
app = QtWidgets.QApplication(sys.argv)
window = ScrapeApp()
window.show()
app.exec_()
if __name__ == '__main__':
main()
Собственно вопрос в чем - когда нажимаю на Старт, основное окно блокируется и я не могу нажимать на кнопку Стоп. Как сделать ее активной? Подскажите где что почитать, пожалуйста. Ну, и если пример покажете - буду тоже благодарен.... Ну и следующий вопрос - как остановить выполнение парсера кнопкой Стоп?
P.S.: на вход программа получает простой текстовый файл с ключевыми словами, типа
"
tischlerei
möbel
bau
garten
"