import sys
text = await response.text()
sys.getsizeof(text) # размер в байтах
start_time = time.time()
async with session.get(url, headers=headers) as response:
response_time = time.time() - start_time
date = response.headers.get("DATE")
ulimit -n 65535
на всякий случай, если планируете таки много тасков запускать. from domainlist import site_pages, real_site,site_pages_mini, site_pages_react, site_pages_all,site_pages_random_3000,site_pages_random_1000, site_pages_random_2000, list_5000
from memory_profiler import memory_usage
import random
import aiohttp
import asyncio
import time
sem = asyncio.Semaphore(100)
async def fetch(url, session):
headers = {
"Accept":"application/json, text/plain, */*",
"User-Agent":"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36",
"Accept-Language":"zh-CN,zh;q=0.9",
"accept-encoding":"gzip, deflate"}
try:
async with session.get(url, timeout=0.1, headers=headers) as response:
date = response.headers.get("DATE")
code_status = response.history[0].status if response.history else response.status
print('%s -> Status Code: %s' % (url, code_status))
return await response.read()
except Exception as e:
print("{} : exeption {}/ class {}".format(url, e, e.__class__))
pass
async def bound_fetch(url, session):
async with sem:
await fetch(url, session)
async def run():
tasks = []
check_site = real_site
async with aiohttp.ClientSession() as session:
for url in check_site:
task = asyncio.ensure_future(bound_fetch(url, session))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
loop = asyncio.get_event_loop()
global_start_time = time.time()
future = asyncio.ensure_future(run())
loop.run_until_complete(future)
print("--- %s seconds ---" % (time.time() - global_start_time))
sem = asyncio.Semaphore(100)
async def bound_fetch(url, session):
async with sem:
async with session.get():
...
async def main():
async with aiohttp.ClientSession() as session:
for url in check_site:
task = asyncio.ensure_future(bound_fetch(sem, url, session))
tasks.append(task)
responses = asyncio.gather(*tasks)
await responses
bound_fetch
возвращает date
. bound_fetch(sem, url, session)
где sem
- это Semaphore?async with sem:
async with session.get():
...
при str(a) в консоли выводится ''\r\n Чернобыль / Chernobyl\r\n
'
'
а при print(a) - пустота
>>> import requests
>>> from bs4 import BeautifulSoup
>>> r = requests.get('https://myseries.ru/series?page=1')
>>> r.status_code
200
>>> soup = BeautifulSoup(r.text, 'html.parser')
>>> inf = soup.find('a', class_='series-link')
>>> inf.text
'\r\n Чернобыль / Chernobyl\r\n '
>>> print(inf.text)
Чернобыль / Chernobyl
>>>
>>> import requests
>>> from bs4 import BeautifulSoup
>>> r = requests.get('https://myseries.ru/series?page=1')
>>> r.status_code
200
>>> soup = BeautifulSoup(r.text, 'lxml')
>>> inf = soup.find('a', class_='series-link')
>>> inf.text
'\r\n Чернобыль / Chernobyl\r\n '
>>> print(inf.text)
Чернобыль / Chernobyl
>>>
print(inf.text)
for i in items:
на for i in items[:-1]:
for i in items:
if i == '':
continue
numbers.append([int(n) for n in i.split(',')])
driver.get("http://www.python.org")
, а на деле пытаетесь на яндекс зайти. Ай-ай-ай.