Есть скрипт:
from concurrent.futures import ThreadPoolExecutor, Future, TimeoutError
from urllib.parse import urlparse
from threading import RLock
import threading
import requests
import urllib3
import time
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
lock = threading.Lock()
with open('lists/0.txt') as lines:
domains = iter(['http://' + line.strip() for line in lines])
db = set()
def worker(domain):
try:
r = requests.head(domain, verify=False, allow_redirects=True, timeout=3)
parsed_uri = urlparse(r.url)
if parsed_uri.netloc.startswith('www.'):
parsed_domain = '{uri.scheme}://{}'.format(parsed_uri.netloc[4:], uri=parsed_uri)
else:
parsed_domain = '{uri.scheme}://{uri.netloc}'.format(uri=parsed_uri)
tmp_domain = '{uri.netloc}'.format(uri=parsed_uri)
r.raise_for_status()
except (requests.exceptions.ConnectionError, requests.exceptions.HTTPError, requests.exceptions.TooManyRedirects, requests.exceptions.Timeout):
if tmp_domain not in db:
with lock:
with open('error.txt', 'a') as f:
f.write(parsed_domain + '\n')
db.add(tmp_domain)
return
except:
if tmp_domain not in db:
with lock:
with open('unknow.txt', 'a') as f:
f.write(parsed_domain + '\n')
db.add(tmp_domain)
return
if tmp_domain not in db:
with lock:
with open('good.txt', 'a') as f:
f.write(parsed_domain + '\n')
db.add(tmp_domain)
def task_queue(task, iterator, concurrency=10):
def submit():
try:
with lock:
obj = next(iterator)
except StopIteration:
return
stats['delayed'] += 1
future = executor.submit(task, obj)
future.add_done_callback(upload_done)
def upload_done(future):
with io_lock:
submit()
stats['delayed'] -= 1
stats['done'] += 1
io_lock = RLock()
executor = ThreadPoolExecutor(concurrency)
stats = {'done': 0, 'delayed': 0}
for _ in range(concurrency):
submit()
return stats
stats = task_queue(worker, domains, concurrency=150)
while True:
#print('\rdone {done}, in work: {delayed} '.format(**stats))
if stats['delayed'] == 0:
break
time.sleep(0.2)
Код работает, но мне кажется он пропускает поситивные срабатывания. Подскажите, пожалуйста, как исправить.