Ответы пользователя по тегу Парсинг
  • Скрипт на Python работает медленно, как ускорить?

    1HAWK1
    @1HAWK1
    import requests
    import pandas as pd
    from concurrent.futures import ThreadPoolExecutor, as_completed
    
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
    }
    
    options = {
        'muteHttpExceptions': True,
        'followRedirects': False
    }
    
    def check_url(url):
        resp = f'https://www.google.ru/search?q=site:{url}'
        response = requests.get(resp, options, headers=headers).text
        if "ничего не найдено" in response:
            return url, "Не в индексе"
        else:
            return url, "В индексе"
    
    def main():
        with open('all-website-url.txt', encoding="utf-8") as f:
            urls = [line.strip() for line in f]
    
        results = []
    
        with ThreadPoolExecutor() as executor:
            futures = [executor.submit(check_url, url) for url in urls]
            for future in as_completed(futures):
                url, status = future.result()
                results.append({"URL": url, "Статус": status})
                print(f"{url}: {status}")
    
        df = pd.DataFrame(results)
        df.to_excel('./googlecheckindex.xlsx', index=False)
    
    if __name__ == '__main__':
        main()
    Ответ написан