Приветствую! Подскажите, с чем может быть связано то, что когда запускаю парсер на сервере он блочится (гугл капча срабатывает), когда запускаю тот-же самый код с теми-же самыми параметрами на своем компьютере, то все работает.
Вот пример кода:
import requests
import ssl
from requests.adapters import HTTPAdapter
from urllib3.poolmanager import PoolManager
from urllib3.util import ssl_
import html
from bs4 import BeautifulSoup
CIPHERS = """ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-SHA384:ECDHE-ECDSA-AES256-SHA384:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-SHA256:AES256-SHA"""
class TlsAdapter(HTTPAdapter):
def __init__(self, ssl_options=0, **kwargs):
self.ssl_options = ssl_options
super(TlsAdapter, self).__init__(**kwargs)
def init_poolmanager(self, *pool_args, **pool_kwargs):
ctx = ssl_.create_urllib3_context(ciphers=CIPHERS, cert_reqs=ssl.CERT_REQUIRED, options=self.ssl_options)
self.poolmanager = PoolManager(*pool_args, ssl_context=ctx, **pool_kwargs)
def except_error(): # Эту функцию можно дополнить, например обработку капчи
print('Требуется пройти капчу')
anti_bot_value = get_cian_antibot()
#sys.exit(1)
#s = requests.Session() # Будем всё делать в рамках одной сессии
proxies = {'http': '185.170.215.228:80'} # Стоят другие прокси
s = requests.session()
adapter = TlsAdapter(ssl.OP_NO_TLSv1 | ssl.OP_NO_TLSv1_1)
s.mount("https://", adapter)
phone_agents = [
'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1',
'Mozilla/5.0 (Linux; Android 7.1; Mi A1 Build/N2G47H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.83 Mobile Safari/537.36',
'Mozilla/5.0 (Linux; U; Android 4.4.2; en-us; SCH-I535 Build/KOT49H) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30'
]
cookies = {
'_CIAN_GK': '587a175d-43f5-4788-99d4-e38bf5a45566',
'_ym_uid': '1664268595755252386',
'uxs_uid': '5bd1b5e0-3e41-11ed-9422-79599aaf127a',
'adrcid': 'A5dZwUr0RszDlIqZWUhUxog',
'_cc_id': '46634f6dc119f28322305b66a5bb6528',
'cookie_agreement_accepted': '1',
'afUserId': 'ba0f74e0-bb48-442c-b6fc-08211bc0f48d-p',
'cian_ruid': '35293226',
'uxfb_usertype': 'realtor',
'tmr_lvid': '258cb98c73d5b1f1e9d217c0ba1242ed',
'tmr_lvidTS': '1666520839080',
'_hjSessionUser_2021803': 'eyJpZCI6ImFmZWYwZjBjLWQ0OTQtNWZhZC05NTdjLTE4ZTQzMWQyZTA3OCIsImNyZWF0ZWQiOjE2Njk1Nzg4ODE2NTYsImV4aXN0aW5nIjp0cnVlfQ==',
'uxmainpagesearch': '1kazansearchuser',
'_gpVisits': '{"isFirstVisitDomain":true,"todayD":"Tue%20Jan%2024%202023","idContainer":"10002511"}',
'gnezdo_uid': 'XV9kdWM6uo54yAaKHeUvAg==',
'userLoginAsAgent': '1',
'rrpvid': '451557884237786',
'rcuid': '641028961e2435f8abe1fa8d',
'_gcl_au': '1.1.1302193987.1679923996',
'_ym_d': '1680072971',
'pview': '49',
'forever_region_id': '2',
'forever_region_name': '%D0%A1%D0%B0%D0%BD%D0%BA%D1%82-%D0%9F%D0%B5%D1%82%D0%B5%D1%80%D0%B1%D1%83%D1%80%D0%B3',
'distance_calculating_onboarding_counter': '3',
'AF_SYNC': '1685543634614',
'sopr_utm': '%7B%22utm_source%22%3A+%22yandex%22%2C+%22utm_medium%22%3A+%22organic%22%7D',
'session_region_id': '1',
'session_main_town_region_id': '1',
'login_mro_popup': '1',
'sopr_session': '0ec96ad5f68348aa',
'_gid': 'GA1.2.646287869.1685784700',
'_ym_isad': '2',
'_ym_visorc': 'b',
'number_banner_appearances': '1',
'_ga': 'GA1.2.1743996369.1664268595',
'__cf_bm': 'ydDltSkevciOiO3yoQUvC8GmmtQBJrxe91W910L2wk8-1685786966-0-AQdSyOXaElhYCu2NlPhzWL+NQuBky5iwTzqkd1cB62u/98tvBCOvCqrhDIu6OZcNFblzxBr36Rfi9M70vd9y4gQ=',
'_ga_3369S417EL': 'GS1.1.1685784695.12.1.1685786967.60.0.0',
'viewpageTimer': '1222.0230000000001',
'anti_bot': '"2|1:0|10:1685945784|8:anti_bot|44:eyJyZW1vdGVfaXAiOiAiMTg4LjIyNS4xMjIuMjQ2In0=|4cf038d04ad62b05c450e9e979c8d5656a77a0ee6b41ef9c1bd92277533427e8"',
}
headers = {
'authority': 'api.cian.ru',
'accept': '*/*',
'accept-language': 'ru,en;q=0.9,en-GB;q=0.8,en-US;q=0.7',
'content-type': 'application/json',
# 'cookie': '_CIAN_GK=587a175d-43f5-4788-99d4-e38bf5a45566; _ym_uid=1664268595755252386; uxs_uid=5bd1b5e0-3e41-11ed-9422-79599aaf127a; adrcid=A5dZwUr0RszDlIqZWUhUxog; _cc_id=46634f6dc119f28322305b66a5bb6528; cookie_agreement_accepted=1; afUserId=ba0f74e0-bb48-442c-b6fc-08211bc0f48d-p; cian_ruid=35293226; uxfb_usertype=realtor; tmr_lvid=258cb98c73d5b1f1e9d217c0ba1242ed; tmr_lvidTS=1666520839080; _hjSessionUser_2021803=eyJpZCI6ImFmZWYwZjBjLWQ0OTQtNWZhZC05NTdjLTE4ZTQzMWQyZTA3OCIsImNyZWF0ZWQiOjE2Njk1Nzg4ODE2NTYsImV4aXN0aW5nIjp0cnVlfQ==; uxmainpagesearch=1kazansearchuser; _gpVisits={"isFirstVisitDomain":true,"todayD":"Tue%20Jan%2024%202023","idContainer":"10002511"}; gnezdo_uid=XV9kdWM6uo54yAaKHeUvAg==; userLoginAsAgent=1; rrpvid=451557884237786; rcuid=641028961e2435f8abe1fa8d; _gcl_au=1.1.1302193987.1679923996; _ym_d=1680072971; pview=49; forever_region_id=2; forever_region_name=%D0%A1%D0%B0%D0%BD%D0%BA%D1%82-%D0%9F%D0%B5%D1%82%D0%B5%D1%80%D0%B1%D1%83%D1%80%D0%B3; distance_calculating_onboarding_counter=3; AF_SYNC=1685543634614; sopr_utm=%7B%22utm_source%22%3A+%22yandex%22%2C+%22utm_medium%22%3A+%22organic%22%7D; session_region_id=1; session_main_town_region_id=1; login_mro_popup=1; sopr_session=0ec96ad5f68348aa; _gid=GA1.2.646287869.1685784700; _ym_isad=2; _ym_visorc=b; number_banner_appearances=1; __cf_bm=rjf3h0r6wIglDuaL3mIa8c_12v5Wqu_VutcGpMSxU84-1685786039-0-AehEt6KUrsLVM84XG9uZAx/Lgv6tJHTxSrBdvjliBAUjNVbbv6HK8WoCwMsuNaWQBC7eoGxhUISfDc1D1CuYv/Q=; viewpageTimer=321.461; anti_bot="2|1:0|10:1685786043|8:anti_bot|44:eyJyZW1vdGVfaXAiOiAiMTg4LjIyNS4xMjIuMjQ2In0=|ce495fcdf2979a29e1157c551ac2c9093c405d1e030d4a2edc490d5398791aaf"; _ga=GA1.2.1743996369.1664268595; _dc_gtm_UA-30374201-1=1; _ga_3369S417EL=GS1.1.1685784695.12.1.1685786058.51.0.0',
'origin': 'https://www.cian.ru',
'referer': 'https://www.cian.ru/',
'sec-ch-ua': '"Microsoft Edge";v="113", "Chromium";v="113", "Not-A.Brand";v="24"',
'sec-ch-ua-mobile': '?0',
'sec-ch-ua-platform': '"Windows"',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'cors',
'sec-fetch-site': 'same-site',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57',
}
json_data = {
'jsonQuery': {
'_type': 'flatrent',
'sort': {
'type': 'term',
'value': 'creation_date_desc',
},
'engine_version': {
'type': 'term',
'value': 2,
},
'region': {
'type': 'terms',
'value': [
-1, # -1 это Москва и МО
],
},
'is_by_homeowner': {
'type': 'term',
'value': True,
},
'for_day': {
'type': 'term',
'value': '!1',
},
'room': {
'type': 'terms',
'value': [
1,
2,
3,
4,
5,
6,
9,
],
},
},
}
response = s.post(
'https://api.cian.ru/search-offers/v2/search-offers-desktop/',
cookies=cookies,
headers=headers,
json=json_data,
proxies=proxies
)
res = response.json()
saved_cities = ['Москва']
items = []
for item in res["data"]["offersSerialized"]:
items.append(item)
if not saved_cities: # Проверка на пустой список
items_filtr = items # Записываем все объявления
else:
items_filtr = []
for we in items:
#offer = {}
#offer["url"] = item["fullUrl"]
capital_city = we.get("geo", {}).get("address", [{}])[0].get("shortName")
region_city = we.get("geo", {}).get("address", [{}])[1].get("shortName")
if capital_city in saved_cities or region_city in saved_cities:
items_filtr.append(we)
#offer["address"] = item["geo"]["userInput"]
for item in items_filtr:
offer = {}
offer["url"] = item["fullUrl"]
offer["geo"] = item["geo"]["address"][0]["shortName"]
print(offer)
В чем может быть проблема?