import requests
import pandas as pd
from concurrent.futures import ThreadPoolExecutor, as_completed
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"
}
options = {
'muteHttpExceptions': True,
'followRedirects': False
}
def check_url(url):
resp = f'https://www.google.ru/search?q=site:{url}'
response = requests.get(resp, options, headers=headers).text
if "ничего не найдено" in response:
return url, "Не в индексе"
else:
return url, "В индексе"
def main():
with open('all-website-url.txt', encoding="utf-8") as f:
urls = [line.strip() for line in f]
results = []
with ThreadPoolExecutor() as executor:
futures = [executor.submit(check_url, url) for url in urls]
for future in as_completed(futures):
url, status = future.result()
results.append({"URL": url, "Статус": status})
print(f"{url}: {status}")
df = pd.DataFrame(results)
df.to_excel('./googlecheckindex.xlsx', index=False)
if __name__ == '__main__':
main()