@lexansk

Как спарсить скидки с hoff.ru?

Пытаюсь разбираться с парсингом json, пока не очень успешно. Перепробовал разные варианты и пришел к схеме записи в файл. Все бы ничего, но это прекрасно работает, когда страница одна, а когда несколько, склеить их не получается. В итоге решил сразу парсить и писать результат в csv. По моей логике программа должна работать, но вываливается с ошибкой:
NameError: name 'id_' is not defined
в функции files_writer(item)
Подозреваю, что дело в вызове функции из функции(никогда такого не делал): files_writer(get_items(url))

# -*- coding: utf-8 -*-
import requests
import json
import csv
import datetime


headers = {
    'authority': 'hoff.ru',
    'method': 'GET',
    'path': '/vue/catalog/section/?category_id=5779&limit=30&offset=0&showCount=true&type=product_list&sort=discount_desc',
    'scheme': 'https',
    'cache-control': 'max-age=0',
    "accept": "application/json, text/plain, */*",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36"
}
cookie = {
    'cookie': '_ym_uid=1590116755735318163; _ym_d=1647941356; __exponea_etc__=73620569-e14e-4d32-946d-d47e0931a75e; amlitude_samp=770132357; rrpvid=690663027500351; tmr_lvidTS=1590116754659; tmr_lvid=e845d934c1c4ec58c8ea2efeb98d1141; _userGUID=0:l11xqhwo:2GZO1tDwEW0tAfmTS6iP3oUKso7J7hea; rcuid=5e8a84cd92b43f00011a5ed4; adspire_uid=AS.591982479.1647941362; flocktory-uuid=4b874de9-d31f-4e86-85a6-5fa61be620e2-5; BITRIX_SM_USER_ID=6154415; BITRIX_SM_SOUND_LOGIN_PLAYED=Y; uxs_uid=9cbc2bd0-a9c2-11ec-9141-7d7017611ec4; current_location_id=3922; current_city=814; cted=modId%3Deb7999f8%3Bclient_id%3D241883441.1647941357%3Bya_client_id%3D1590116755735318163%7CmodId%3Dnunlkcp4%3Bclient_id%3D241883441.1647941357%3Bya_client_id%3D1590116755735318163; _ct_site_id=30991; _ct=1000000000267955568; _ct_client_global_id=912fd753-be6b-57f4-b0d8-b7d35bad8bb6; _cc_id=40d66771cf78de15881afcf3bba38660; current_location_data=a%3A4%3A%7Bs%3A5%3A%22chain%22%3Ba%3A2%3A%7Bi%3A0%3Bi%3A68%3Bi%3A1%3Bi%3A3922%3B%7Ds%3A4%3A%22name%22%3Bs%3A22%3A%22%D0%9D%D0%BE%D0%B2%D0%BE%D1%81%D0%B8%D0%B1%D0%B8%D1%80%D1%81%D0%BA%22%3Bs%3A9%3A%22full_name%22%3Bs%3A57%3A%22%D0%9D%D0%BE%D0%B2%D0%BE%D1%81%D0%B8%D0%B1%D0%B8%D1%80%D1%81%D0%BA%D0%B0%D1%8F%20%D0%BE%D0%B1%D0%BB%2C%20%D0%9D%D0%BE%D0%B2%D0%BE%D1%81%D0%B8%D0%B1%D0%B8%D1%80%D1%81%D0%BA%22%3Bs%3A11%3A%22location_id%22%3Bi%3A3922%3B%7D; AdRiver=complete; _gaexp=GAX1.2.HgGfS9F4SKelSRUlVEYKNQ.19201.1; rrlevt=1653892685372; DIGI_CARTID=52398536566; advcake_track_id=a7ef2273-0949-78fb-9409-dd981f60db90; advcake_session_id=298d2304-bf97-db60-fc85-ee861c45ad88; PHPSESSID=kfut57bsee446prh5d2n2njh2q; BITRIX_SM_SALE_UID=1501334645; _gcl_au=1.1.1720181463.1655776048; _ct_ids=nunlkcp4%3A30991%3A389139721; _ct_session_id=389139721; _gpVisits={"isFirstVisitDomain":true,"todayD":"Tue%20Jun%2028%202022","idContainer":"1000248A"}; amp_1fb6bd=N81CqoXf_HdeRwSMgAhgwL.NjE1NDQxNQ==..1g6lcgrp5.1g6lcl2g8.0.0.0; _ga=GA1.2.241883441.1647941357; tmr_reqNum=3423; _ga_RQLWT53ZT9=GS1.1.1656427671.30.1.1656428301.60; _ga_S2WZY9J7P9=GS1.1.1656427671.30.1.1656428301.60; _ga_444YM4BF0J=GS1.1.1656427671.30.1.1656428301.60; BITRIX_SM_ab_test_multi=%7B%22aa06%22%3A%7B%22ID%22%3A%227710124%22%2C%22NAME%22%3A%22aa06%22%2C%22GROUP%22%3A%22A%22%7D%2C%22aa07%22%3A%7B%22ID%22%3A%227710127%22%2C%22NAME%22%3A%22aa07%22%2C%22GROUP%22%3A%22B%22%7D%2C%22aa08%22%3A%7B%22ID%22%3A%227710128%22%2C%22NAME%22%3A%22aa08%22%2C%22GROUP%22%3A%22A%22%7D%2C%22aa09%22%3A%7B%22ID%22%3A%227710129%22%2C%22NAME%22%3A%22aa09%22%2C%22GROUP%22%3A%22B%22%7D%2C%22aa10%22%3A%7B%22ID%22%3A%227710131%22%2C%22NAME%22%3A%22aa10%22%2C%22GROUP%22%3A%22B%22%7D%2C%22reg_bk_and_yz%22%3A%7B%22ID%22%3A%228122416%22%2C%22NAME%22%3A%22reg_bk_and_yz%22%2C%22GROUP%22%3A%22B%22%7D%2C%22checkbox%22%3A%7B%22ID%22%3A%227971955%22%2C%22NAME%22%3A%22checkbox%22%2C%22GROUP%22%3A%22A%22%7D%2C%22cartpopap%22%3A%7B%22ID%22%3A%228270671%22%2C%22NAME%22%3A%22cartpopap%22%2C%22GROUP%22%3A%22B%22%7D%2C%22KS%22%3A%7B%22ID%22%3A%228116774%22%2C%22NAME%22%3A%22KS%22%2C%22GROUP%22%3A%22%22%7D%2C%223d%22%3A%7B%22ID%22%3A%228123834%22%2C%22NAME%22%3A%223d%22%2C%22GROUP%22%3A%22%22%7D%2C%22ar%22%3A%7B%22ID%22%3A%228123836%22%2C%22NAME%22%3A%22ar%22%2C%22GROUP%22%3A%22%22%7D%2C%22kt_left%22%3A%7B%22ID%22%3A%228107330%22%2C%22NAME%22%3A%22kt_left%22%2C%22GROUP%22%3A%22B%22%7D%2C%22rr_basket%22%3A%7B%22ID%22%3A%228362213%22%2C%22NAME%22%3A%22rr_basket%22%2C%22GROUP%22%3A%22%22%7D%2C%22services%22%3A%7B%22ID%22%3A%228362535%22%2C%22NAME%22%3A%22services%22%2C%22GROUP%22%3A%22B%22%7D%2C%22credit_new_widget%22%3A%7B%22ID%22%3A%228411623%22%2C%22NAME%22%3A%22credit_new_widget%22%2C%22GROUP%22%3A%22A%22%7D%2C%22dates%22%3A%7B%22ID%22%3A%227895663%22%2C%22NAME%22%3A%22dates%22%2C%22GROUP%22%3A%22%22%7D%2C%22anyquery%22%3A%7B%22ID%22%3A%228402103%22%2C%22NAME%22%3A%22anyquery%22%2C%22GROUP%22%3A%22A%22%7D%2C%22rr_popup%22%3A%7B%22ID%22%3A%228413423%22%2C%22NAME%22%3A%22rr_popup%22%2C%22GROUP%22%3A%22A%22%7D%2C%22alter_sort%22%3A%7B%22ID%22%3A%228377079%22%2C%22NAME%22%3A%22alter_sort%22%2C%22GROUP%22%3A%22%22%7D%2C%22card_registration%22%3A%7B%22ID%22%3A%228415737%22%2C%22NAME%22%3A%22card_registration%22%2C%22GROUP%22%3A%22%22%7D%7D; iwaf_http_cookie_291e829ea12795e7e56f937263071616=c1271521a9ab3fdc67b3365fac08f060; iwaf_js_cookie_291e829ea12795e7e56f937263071616=0a14339f8b56b67efec237330b17c6f9'
}
cur_time = datetime.datetime.now().strftime('%d_%m_%Y')
temp_url = 'https://hoff.ru/vue/catalog/section/?category_id=5779&limit=30&offset=0&showCount=true&type=product_list&sort=discount_desc'

def writeCsvHeader():
    with open(f'{cur_time}_results.csv', 'w', encoding="utf-8", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(
            (
                "ID",
                "Name",
                "Discount",
                "New_price",
                "Old_price",
                "URL"
            )
        )
def get_last_item(temp_url):
    request = requests.get(url=temp_url, headers=headers, cookies=cookie)

    with open("temp.json", "w") as file:
        json.dump(request.json(), file, indent=4)
    with open('temp.json') as file:
        last_item = json.load(file)['data']['total_count']

        return last_item

def get_items(url):

    request = requests.get(url=url, headers=headers, cookies=cookie)

    with open("r.json", "w") as file:
        json.dump(request.json(), file, indent=4)

    with open('r.json') as file:
        data = json.load(file)['data']
        items = data['items']



    for item in items:
        id_ = item['id']
        name = item['name']
        old_price = item['prices']['old']
        new_price = item['prices']['new']
        discount = item['discount']
        page_url = 'https://hoff.ru/' + item['detail_page_url']

    return item



def files_writer(item):

    with open(f'{cur_time}_results.csv', 'a', encoding="utf-8", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(
            (
                id_,
                name,
                discount,
                new_price,
                old_price,
                page_url
            )
        )







def main():
    writeCsvHeader()  # Write csv file
    print(f'[INFO] Заголовок файла {cur_time}_results.csv записан успешно...')
    last_item = get_last_item(temp_url) # get last item
    print(f'[INFO] Найдено {last_item} товаров в категории...')
    for x in range(0, last_item, 30):
        print(f'[WORKING] Обрабатываем {int(x/30)+1} страницу из {int(last_item/30+1)}. Выполнено {int(((x/30)*100)/(last_item/30))}%')
        url = f'https://hoff.ru/vue/catalog/section/?category_id=5779&limit=30&offset={x}&showCount=true&type=product_list&sort=discount_desc'
        files_writer(get_items(url))

if __name__ == "__main__":
    main()
  • Вопрос задан
  • 186 просмотров
Пригласить эксперта
Ответы на вопрос 2
tumbler
@tumbler Куратор тега Python
бекенд-разработчик на python
Ошибка намекает, что Вы переменную id_ не определили, а код в функции files_writer - что не её одну. Похоже, тут проблема с базовыми знаниями python, которые стоит подтянуть с помощью обучающих материалов. А ответ на вопрос "как спарсить скидки" - выучить язык для начала.
Ответ написан
Комментировать
@lexansk Автор вопроса
Спасибо за ответ. Как я и говорил выше, я только вначале обучения. И мне интереснее учиться на полезных примерах. Исходя из ваших замечаний, я модифицировал код:
# -*- coding: utf-8 -*-
import requests
import json
import csv
import datetime

headers = {
    'authority': 'hoff.ru',
    'method': 'GET',
    'path': '/vue/catalog/section/?category_id=5779&limit=30&offset=0&showCount=true&type=product_list&sort=discount_desc',
    'scheme': 'https',
    'cache-control': 'max-age=0',
    "accept": "application/json, text/plain, */*",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36"
}
cookie = {
    'cookie': '_ym_uid=1590116755735318163; _ym_d=1647941356; __exponea_etc__=73620569-e14e-4d32-946d-d47e0931a75e; amlitude_samp=770132357; rrpvid=690663027500351; tmr_lvidTS=1590116754659; tmr_lvid=e845d934c1c4ec58c8ea2efeb98d1141; _userGUID=0:l11xqhwo:2GZO1tDwEW0tAfmTS6iP3oUKso7J7hea; rcuid=5e8a84cd92b43f00011a5ed4; adspire_uid=AS.591982479.1647941362; flocktory-uuid=4b874de9-d31f-4e86-85a6-5fa61be620e2-5; BITRIX_SM_USER_ID=6154415; BITRIX_SM_SOUND_LOGIN_PLAYED=Y; uxs_uid=9cbc2bd0-a9c2-11ec-9141-7d7017611ec4; current_location_id=3922; current_city=814; cted=modId%3Deb7999f8%3Bclient_id%3D241883441.1647941357%3Bya_client_id%3D1590116755735318163%7CmodId%3Dnunlkcp4%3Bclient_id%3D241883441.1647941357%3Bya_client_id%3D1590116755735318163; _ct_site_id=30991; _ct=1000000000267955568; _ct_client_global_id=912fd753-be6b-57f4-b0d8-b7d35bad8bb6; _cc_id=40d66771cf78de15881afcf3bba38660; current_location_data=a%3A4%3A%7Bs%3A5%3A%22chain%22%3Ba%3A2%3A%7Bi%3A0%3Bi%3A68%3Bi%3A1%3Bi%3A3922%3B%7Ds%3A4%3A%22name%22%3Bs%3A22%3A%22%D0%9D%D0%BE%D0%B2%D0%BE%D1%81%D0%B8%D0%B1%D0%B8%D1%80%D1%81%D0%BA%22%3Bs%3A9%3A%22full_name%22%3Bs%3A57%3A%22%D0%9D%D0%BE%D0%B2%D0%BE%D1%81%D0%B8%D0%B1%D0%B8%D1%80%D1%81%D0%BA%D0%B0%D1%8F%20%D0%BE%D0%B1%D0%BB%2C%20%D0%9D%D0%BE%D0%B2%D0%BE%D1%81%D0%B8%D0%B1%D0%B8%D1%80%D1%81%D0%BA%22%3Bs%3A11%3A%22location_id%22%3Bi%3A3922%3B%7D; AdRiver=complete; _gaexp=GAX1.2.HgGfS9F4SKelSRUlVEYKNQ.19201.1; rrlevt=1653892685372; DIGI_CARTID=52398536566; advcake_track_id=a7ef2273-0949-78fb-9409-dd981f60db90; advcake_session_id=298d2304-bf97-db60-fc85-ee861c45ad88; PHPSESSID=kfut57bsee446prh5d2n2njh2q; BITRIX_SM_SALE_UID=1501334645; _gcl_au=1.1.1720181463.1655776048; _ct_ids=nunlkcp4%3A30991%3A389139721; _ct_session_id=389139721; _gpVisits={"isFirstVisitDomain":true,"todayD":"Tue%20Jun%2028%202022","idContainer":"1000248A"}; amp_1fb6bd=N81CqoXf_HdeRwSMgAhgwL.NjE1NDQxNQ==..1g6lcgrp5.1g6lcl2g8.0.0.0; _ga=GA1.2.241883441.1647941357; tmr_reqNum=3423; _ga_RQLWT53ZT9=GS1.1.1656427671.30.1.1656428301.60; _ga_S2WZY9J7P9=GS1.1.1656427671.30.1.1656428301.60; _ga_444YM4BF0J=GS1.1.1656427671.30.1.1656428301.60; BITRIX_SM_ab_test_multi=%7B%22aa06%22%3A%7B%22ID%22%3A%227710124%22%2C%22NAME%22%3A%22aa06%22%2C%22GROUP%22%3A%22A%22%7D%2C%22aa07%22%3A%7B%22ID%22%3A%227710127%22%2C%22NAME%22%3A%22aa07%22%2C%22GROUP%22%3A%22B%22%7D%2C%22aa08%22%3A%7B%22ID%22%3A%227710128%22%2C%22NAME%22%3A%22aa08%22%2C%22GROUP%22%3A%22A%22%7D%2C%22aa09%22%3A%7B%22ID%22%3A%227710129%22%2C%22NAME%22%3A%22aa09%22%2C%22GROUP%22%3A%22B%22%7D%2C%22aa10%22%3A%7B%22ID%22%3A%227710131%22%2C%22NAME%22%3A%22aa10%22%2C%22GROUP%22%3A%22B%22%7D%2C%22reg_bk_and_yz%22%3A%7B%22ID%22%3A%228122416%22%2C%22NAME%22%3A%22reg_bk_and_yz%22%2C%22GROUP%22%3A%22B%22%7D%2C%22checkbox%22%3A%7B%22ID%22%3A%227971955%22%2C%22NAME%22%3A%22checkbox%22%2C%22GROUP%22%3A%22A%22%7D%2C%22cartpopap%22%3A%7B%22ID%22%3A%228421477%22%2C%22NAME%22%3A%22cartpopap%22%2C%22GROUP%22%3A%22A%22%7D%2C%22KS%22%3A%7B%22ID%22%3A%228116774%22%2C%22NAME%22%3A%22KS%22%2C%22GROUP%22%3A%22%22%7D%2C%223d%22%3A%7B%22ID%22%3A%228123834%22%2C%22NAME%22%3A%223d%22%2C%22GROUP%22%3A%22%22%7D%2C%22ar%22%3A%7B%22ID%22%3A%228123836%22%2C%22NAME%22%3A%22ar%22%2C%22GROUP%22%3A%22%22%7D%2C%22kt_left%22%3A%7B%22ID%22%3A%228107330%22%2C%22NAME%22%3A%22kt_left%22%2C%22GROUP%22%3A%22B%22%7D%2C%22rr_basket%22%3A%7B%22ID%22%3A%228362213%22%2C%22NAME%22%3A%22rr_basket%22%2C%22GROUP%22%3A%22%22%7D%2C%22services%22%3A%7B%22ID%22%3A%228362535%22%2C%22NAME%22%3A%22services%22%2C%22GROUP%22%3A%22B%22%7D%2C%22credit_new_widget%22%3A%7B%22ID%22%3A%228411623%22%2C%22NAME%22%3A%22credit_new_widget%22%2C%22GROUP%22%3A%22A%22%7D%2C%22dates%22%3A%7B%22ID%22%3A%227895663%22%2C%22NAME%22%3A%22dates%22%2C%22GROUP%22%3A%22%22%7D%2C%22anyquery%22%3A%7B%22ID%22%3A%228402103%22%2C%22NAME%22%3A%22anyquery%22%2C%22GROUP%22%3A%22A%22%7D%2C%22rr_popup%22%3A%7B%22ID%22%3A%228413423%22%2C%22NAME%22%3A%22rr_popup%22%2C%22GROUP%22%3A%22A%22%7D%2C%22alter_sort%22%3A%7B%22ID%22%3A%228423533%22%2C%22NAME%22%3A%22alter_sort%22%2C%22GROUP%22%3A%22%22%7D%2C%22card_registration%22%3A%7B%22ID%22%3A%228415737%22%2C%22NAME%22%3A%22card_registration%22%2C%22GROUP%22%3A%22%22%7D%2C%22aaaa%22%3A%7B%22ID%22%3A%228423545%22%2C%22NAME%22%3A%22aaaa%22%2C%22GROUP%22%3A%22C%22%7D%7D; iwaf_http_cookie_291e829ea12795e7e56f937263071616=52f2a3083df4ce3da8a64a9e15f26666; iwaf_js_cookie_291e829ea12795e7e56f937263071616=bab9e8a9446586d217f21571c1b5e078'
}
cur_time = datetime.datetime.now().strftime('%d_%m_%Y')
temp_url = 'https://hoff.ru/vue/catalog/section/?category_id=5779&limit=30&offset=0&showCount=true&type=product_list&sort=discount_desc'

def writeCsvHeader():
    with open(f'{cur_time}_results.csv', 'w', encoding="utf-8", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(
            (
                "ID",
                "Name",
                "Discount",
                "New_price",
                "Old_price",
                "URL"
            )
        )

def get_last_item(temp_url):
    request = requests.get(url=temp_url, headers=headers, cookies=cookie)

    with open("temp.json", "w") as file:
        json.dump(request.json(), file, indent=4)
    with open('temp.json') as file:
        last_item = json.load(file)['data']['total_count']

        return last_item

def get_items(url):

    request = requests.get(url=url, headers=headers, cookies=cookie)

    with open("r.json", "w") as file:
        json.dump(request.json(), file, indent=4)

    with open('r.json') as file:
        data = json.load(file)['data']
        items = data['items']

    for item in items:
        id_ = item['id']
        name = item['name']
        old_price = item['prices']['old']
        new_price = item['prices']['new']
        discount = item['discount']
        page_url = 'https://hoff.ru/' + item['detail_page_url']



    return id_, name, old_price, new_price, discount, page_url

def files_writer(id_, name, old_price, new_price, discount, page_url):

    with open(f'{cur_time}_results.csv', 'a', encoding="utf-8", newline="") as file:
        writer = csv.writer(file)
        writer.writerow(
            (
                id_,
                name,
                discount,
                new_price,
                old_price,
                page_url
            )
        )

def main():
    writeCsvHeader()  # Write csv file
    print(f'[INFO] Заголовок файла {cur_time}_results.csv записан успешно...')
    last_item = get_last_item(temp_url) # get last item
    print(f'[INFO] Найдено {last_item} товаров в категории...')
    for x in range(0, last_item, 30):
        print(f'[WORKING] Обрабатываем {int(x/30)+1} страницу из {int(last_item/30+1)}. Выполнено {int(((x/30)*100)/(last_item/30))}%')
        url = f'https://hoff.ru/vue/catalog/section/?category_id=5779&limit=30&offset={x}&showCount=true&type=product_list&sort=discount_desc'
        files_writer(get_items(url))

if __name__ == "__main__":
    main()

Но он выдает следующую ошибку:
Traceback (most recent call last):
File "D:/Py/pythonProject/hoff_parser.py", line 116, in
main()
File "D:/Py/pythonProject/hoff_parser.py", line 104, in main
files_writer(get_items(url))
TypeError: files_writer() missing 5 required positional arguments: 'name', 'old_price', 'new_price', 'discount', and 'page_url'

Process finished with exit code 1


Насколько я понимаю, дело в конструкции files_writer(get_items(url))
Результатом выполнения функции get_item(url) как раз и должны являться эти аргументы, которые являются входными для files_writer.
Ответ написан
Ваш ответ на вопрос

Войдите, чтобы написать ответ

Войти через центр авторизации
Похожие вопросы