@Harbik

Парсинг результатов DuckDuckGo,google,bing?

При запуске код должен искать заметки из блокнота в разных поисковых системах, но сейчас он ищет только в Google и дает неверные результаты.


Результаты после поиска в google:
https://accounts.google. com/ServiceLogin?hl=de&continue=wait
https://www.google. com/search?q%3Dinurl:%2522/view/index.shtml%2522%250A&gae=cb- eomcrt
https://policies.google. com/technologies/cookies?hl=de&utm_source=ucb


import requests
from bs4 import BeautifulSoup
import sqlite3

# Open the file with search queries
with open('search_queries.txt', 'r') as file:
    queries = file.readlines()

# Create or connect to the SQLite database
conn = sqlite3.connect('unique_links.db')
cursor = conn.cursor()

# Create a table if it doesn't exist
cursor.execute('''CREATE TABLE IF NOT EXISTS links (url TEXT UNIQUE)''')
conn.commit()

# Function to perform a search query and collect unique links
def search_and_collect_links(query, search_engine):
    if search_engine == 'duckduckgo':
        search_url = f'https://duckduckgo.com/html/?q={query}'
    elif search_engine == 'google':
        search_url = f'https://www.google.com/search?q={query}'
    elif search_engine == 'bing':
        search_url = f'https://www.bing.com/search?q={query}'

    response = requests.get(search_url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract links from the search results
    if search_engine == 'google':
        links = [a['href'] for a in soup.find_all('a', href=True) if a['href'].startswith("http")]
    else:
        links = [a['href'] for a in soup.find_all('a', class_='result__url')]

    # Filter unique links and save them to the database
    for link in links:
        cursor.execute('INSERT OR IGNORE INTO links (url) VALUES (?)', (link,))
        conn.commit()

# Iterate through all search queries
for query in queries:
    for search_engine in ['duckduckgo', 'google', 'bing']:
        search_and_collect_links(query, search_engine)

# Create the 'unique_links.txt' file and save unique links to it
with open('unique_links.txt', 'w') as link_file:
    cursor.execute('SELECT url FROM links')
    unique_links = cursor.fetchall()
    for link in unique_links:
        link_file.write(link[0] + '\n')

# Close the connection to the database
conn.close()
  • Вопрос задан
  • 57 просмотров
Пригласить эксперта
Ваш ответ на вопрос

Войдите, чтобы написать ответ

Войти через центр авторизации
Похожие вопросы