Задать вопрос
@Decrement

Ошибка в коде парсера Ozon, что не так?

У меня есть следующий код:

import time
import json

from selenium import webdriver
from selenium_stealth import stealth
from bs4 import BeautifulSoup

from curl_cffi import requests

def init_webdriver():
    driver = webdriver.Chrome()
    stealth(driver,
            languages=["en-US", "en"],
            vendor="Google Inc.",
            platform="Win32",
            webgl_vendor="Intel Inc.",
            renderer="Intel Iris OpenGL Engine",
            fix_hairline=True)
    driver.maximize_window()
    return driver

def scrolldown(driver, deep):
    for _ in range(deep):
        driver.execute_script('window.scrollBy(0, 500)')
        time.sleep(0.1)

def get_product_info(product_url):
    session = requests.Session()

    raw_data = session.get("https://www.ozon.ru/api/composer-api.bx/page/json/v2?url=" + product_url)
    json_data = json.loads(raw_data.content.decode())

    full_name = json_data["seo"]["title"]

    if json_data["layout"][0]["component"] == "userAdultModal":
        product_id = str(full_name.split()[-1])[1:-1]
        print(product_id, full_name)
        return (product_id, full_name, "Товар для лиц старше 18 лет", None, None)
    else:
        description = json.loads(json_data["seo"]["script"][0]["innerHTML"])["description"]
        image_url = json.loads(json_data["seo"]["script"][0]["innerHTML"])["image"]
        price = json.loads(json_data["seo"]["script"][0]["innerHTML"])["offers"]["price"] + " " +\
                json.loads(json_data["seo"]["script"][0]["innerHTML"])["offers"]["priceCurrency"]
        rating = json.loads(json_data["seo"]["script"][0]["innerHTML"]["ratingValue"])
        rating_counter = json.loads(json_data["seo"]["script"][0]["innerHTML"]["reviewCount"])
        product_id = json.loads(json_data["seo"]["script"][0]["innerHTML"])["sku"]

        return (product_id, full_name, description, price, rating, rating_counter, image_url)


def get_searchpage_cards(driver, url, all_cards = []):
    driver.get(url)
    scrolldown(driver, 20)
    search_page_html = BeautifulSoup(driver.page_source, "html.parser")

    content = search_page_html.find("div", {"id": "layoutPage"})
    content = content.find("div")

    content_with_cards = content.find("div", {"class": "widget-search-result-container"})
    content_with_cards = content_with_cards.find("div").findChildren(recursive=False)

    cards_in_page = list()
    for card in content_with_cards:
        card_url = card.find("a", href=True)["href"]
        card_name = card.find("span", {"class": "tsBody500Medium"}).contents[0]

        product_url = "https://ozon.ru/" + card_url

        product_id, full_name, description, price, rating, rating_counter, image_url = get_product_info(card_url)
        card_info = {product_id: {"short_name": card_name,
                                  "full_name": full_name,
                                  "description": description,
                                  "url": product_url,
                                  "rating": rating,
                                  "rating_counter": rating_counter,
                                  "price": price,
                                  "image_url": image_url
                                  }
                     }
        cards_in_page.append(card_info)
        print(product_id, "- DONE")

    content_with_next = [div for div in content.find_all("a", href=True) if "Дальше" in str(div)]
    if not content_with_next:
        return cards_in_page
    else:
        next_page_url = "https://www.ozon.ru" + content_with_next[0]["href"]
        all_cards.extend(get_searchpage_cards(driver, next_page_url, cards_in_page))
        return all_cards


if __name__ == "__main__":
    url_ozon = "https://www.ozon.ru"

    driver = init_webdriver()

    search_list = ["Шарф", "Шапка", "Кепка"]
    end_list = list()


    for search_tag in search_list:
        url_search = f"https://www.ozon.ru/search/?text={search_tag}&from_global=true"

        search_cards = get_searchpage_cards(driver, url_search)
        print("Я успешно нашёл", len(search_cards), "по поиску", search_tag)
        end_list.append(search_tag)
    print(end_list)

    driver.quit()


При его запуске выдаёт ошибку:
68842aa4addf4744562947.jpeg

Дело в том, что раньше всё работало, и я не могу найти ничего, что бы поменялось, однако он не может найти "div"
  • Вопрос задан
  • 95 просмотров
Подписаться 1 Простой 3 комментария
Пригласить эксперта
Ваш ответ на вопрос

Войдите, чтобы написать ответ

Похожие вопросы