Сообщество IT-специалистов
Ответы на любые вопросы об IT
Профессиональное развитие в IT
Удаленная работа для IT-специалистов
<script> window.__REDIAL_PROPS__ = [null,null,null,null,null,{"req":{"filters":{"category":{"id":"2"},"enum............... ] </script>
[null,null,null,null,null,{"req":{"filters":{"category":{"id":"2"},"enum............... ]
[
]
window\.__REDIAL_PROPS__\s=\s(\[.*\])
import requests, time, json, os, datetime, copy, re from bs4 import BeautifulSoup found_articles = [] l = '/var/www/html/' def get_articles(location, min_price): url = 'https://www.leboncoin.fr/recherche/?category=2&locations='+ location + '&price=min-' + str(min_price) user_agent = {'User-Agent':"Mozilla/5.0 (X11; Linux x86_64; rv:60.0) Gecko/20100101 Firefox/60.0"} #Sent request contents =requests.get(url, headers=user_agent) soup = BeautifulSoup(contents.text, 'html5lib') #Get script script = soup.findAll("script") matches = re.search(r"window\.__REDIAL_PROPS__\s=\s(\[.*\])", str(script[14]) ) #My REGEX: r'\[(.*?)\]$' print('script: ', script[14] ) print('matches: ', matches) #Debug #f = open('cdiscount.log', 'w', encoding='utf-8') #f.write(str(script[14])) #f.close() def main(): get_articles('d_67', 1000) if __name__== "__main__": main()
print('matches: ', matches)
print('matches: ', matches.group(1))