import requests
from bs4 import BeautifulSoup
import lxml
from selenium import webdriver
import time
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36 Edg/121.0.0.0'
}
proxies = {
'https': 'http://146.247.105.71:4827'
}
def get_location(url):
response = requests.get(url, headers=headers, proxies=proxies)
soup = BeautifulSoup(response.text, 'lxml')
print(soup, '\n\n\nlox\n\n\n')
options = webdriver.ChromeOptions()
options.add_argument('--proxy-server=146.247.105.71:4827')
driver = webdriver.Chrome(
options=options
)
driver.get(url)
response = driver.page_source
time.sleep(5)
print(response)
def main():
get_location(url='https://www.skiddle.com/festivals/dates.html')
if __name__ == '__main__':
main()
Пытался забрать html простым реквестом, получалась не очень
попробовал через selenium - получил то же самое
import requests
import socks
import socket
socks.set_default_proxy(socks.SOCKS5, "45.56.219.55", 52759)
socket.socket = socks.socksocket
url = 'https://www.skiddle.com/festivals/dates.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
print(response.text)
except requests.exceptions.HTTPError as errh:
print("Http Error:", errh)
except requests.exceptions.ConnectionError as errc:
print("Connecting Error:", errc)
except requests.exceptions.Timeout as errt:
print("Timeout Error:", errt)
except requests.exceptions.RequestException as err:
print("Other Error", err)
<!DOCTYPE html>
<!--[if lt IE 7]><html class='ie ie6 lte9 lte8 lte7 no-js'> <![endif]-->
<!--[if IE 7]><html class='ie ie7 lte9 lte8 lte7 no-js'> <![endif]-->
<!--[if IE 8]><html class='ie ie8 lte9 lte8 no-js'> <![endif]-->
<!--[if IE 9]><html class='ie ie9 lte9 no-js'> <![endif]-->
<!--[if gt IE 9]><html class='ie no-js'><![endif]-->
<!--[if !IE]><!--> <html class='no-ie no-js' lang='en'><!--<![endif]-->
<head><!-- Basic Page Needs
================================================== -->
<title>A-Z of Future Festivals</title>
<link rel="manifest" href="/manifest.json">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><!-- DNS Prefetching
================================================== -->
<link rel="preconnect" href="https://d31fr2pwly4c4s.cloudfront.net"/>
<link rel="preconnect" href="https://d1plawd8huk6hh.cloudfront.net"/>
<link rel="preconnect" href="https://www.google-analytics.com" />
<link rel="preload" href="https://d1plawd8huk6hh.cloudfront.net/css-responsive2/fonts/BuenosAires/BuenosAiresWeb1-Regular_gdi.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://d1plawd8huk6hh.cloudfront.net/css-responsive2/fonts/BuenosAires/BuenosAiresWeb1-Bold_gdi.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="dns-prefetch" href="https://d31fr2pwly4c4s.cloudfront.net"/>
<link rel="dns-prefetch" href="https://d1plawd8huk6hh.cloudfront.net"/>
<link rel="dns-prefetch" href="https://www.google-analytics.com" />
<link rel="dns-prefetch" href="//connect.facebook.net" />
<link rel="dns-prefetch" href="//www.googleadservices.com" />
<link rel="dns-prefetch" href="//www.stay22.com" />
<link rel="dns-prefetch" href="//api.stay22.com" />
<link rel="dns-prefetch" href="//skiddle.imgix.net" />
<link rel="dns-prefetch" href="//skiddleartists.imgix.net" />
<!-- CSS
================================================== -->
<meta property="fb:app_id" content="102483650494"/>
<meta http-equiv="x-dns-prefetch-control" content="on"/>
<meta name="insight-app-sec-validation" content="5574b896-289f-48c0-aebb-98730b831151">
<!-- Mobile Specific Metas
================================================== -->
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
...