import asyncio
import time
from concurrent.futures import ProcessPoolExecutor
from grab import Grab
import random
import psycopg2
# Open connection to the database
connection = psycopg2.connect(database="<....>",
user="<....>",
password="<....>",
host="127.0.0.1",
port="5432")
# Create a new cursor for it
c = connection.cursor()
# Select settings from database
c.execute("SELECT * FROM <....> WHERE id=1;")
data = c.fetchall()
# Get time starting script
start_time = time.time()
def operation(link):
# open a new connection to the database
conn = psycopg2.connect(database="<....>",
user="<....>",
password="<....>",
host="127.0.0.1",
port="5432")
curs = conn.cursor()
# init grab framework
g = Grab()
# try to find some elements on the page
try:
# open link
g.go(link)
except:
pass
conn.close()
@asyncio.coroutine
def main(item):
yield from loop.run_in_executor(p, operation, item)
# Create async loop, declare number of threads
loop = asyncio.get_event_loop()
p = ProcessPoolExecutor(data[0][13]) # =200
# Init tasks list - empty
tasks = []
# Select all urls which need to process
c.execute ("SELECT url FROM <....> ORDER BY id;")
# Forming tasks
for item in c.fetchall():
tasks.append(main(item[0]))
# Close main connection to the database
connection.close()
# Run async tasks
loop.run_until_complete(asyncio.wait(tasks))
# Get script finish time
print("--- %s seconds ---" % (time.time() - start_time))
Собственно вопрос в названии топика. Уже не знаю что сделать для оптимизации (4 gb RAM загруженны до предела, CPU - 90%+). Сервер просто ложится от такой нагрузки. Что можно предпринять?