Пытался забрать html простым реквестом, получалась не очень
попробовал через selenium - получил то же самое
Не spa, не ajax и защиты нет, а значит request вполне справится, ниже рабочий пример, который печатает всю html страницу (российские ip блочат (403), использовал рабочий socks5):
import requests
import socks
import socket
socks.set_default_proxy(socks.SOCKS5, "45.56.219.55", 52759)
socket.socket = socks.socksocket
url = 'https://www.skiddle.com/festivals/dates.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'}
try:
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
print(response.text)
except requests.exceptions.HTTPError as errh:
print("Http Error:", errh)
except requests.exceptions.ConnectionError as errc:
print("Connecting Error:", errc)
except requests.exceptions.Timeout as errt:
print("Timeout Error:", errt)
except requests.exceptions.RequestException as err:
print("Other Error", err)
print(response.text)
<!DOCTYPE html>
<!--[if lt IE 7]><html class='ie ie6 lte9 lte8 lte7 no-js'> <![endif]-->
<!--[if IE 7]><html class='ie ie7 lte9 lte8 lte7 no-js'> <![endif]-->
<!--[if IE 8]><html class='ie ie8 lte9 lte8 no-js'> <![endif]-->
<!--[if IE 9]><html class='ie ie9 lte9 no-js'> <![endif]-->
<!--[if gt IE 9]><html class='ie no-js'><![endif]-->
<!--[if !IE]><!--> <html class='no-ie no-js' lang='en'><!--<![endif]-->
<head><!-- Basic Page Needs
================================================== -->
<title>A-Z of Future Festivals</title>
<link rel="manifest" href="/manifest.json">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" /><!-- DNS Prefetching
================================================== -->
<link rel="preconnect" href="https://d31fr2pwly4c4s.cloudfront.net"/>
<link rel="preconnect" href="https://d1plawd8huk6hh.cloudfront.net"/>
<link rel="preconnect" href="https://www.google-analytics.com" />
<link rel="preload" href="https://d1plawd8huk6hh.cloudfront.net/css-responsive2/fonts/BuenosAires/BuenosAiresWeb1-Regular_gdi.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="preload" href="https://d1plawd8huk6hh.cloudfront.net/css-responsive2/fonts/BuenosAires/BuenosAiresWeb1-Bold_gdi.woff2" as="font" type="font/woff2" crossorigin="anonymous">
<link rel="dns-prefetch" href="https://d31fr2pwly4c4s.cloudfront.net"/>
<link rel="dns-prefetch" href="https://d1plawd8huk6hh.cloudfront.net"/>
<link rel="dns-prefetch" href="https://www.google-analytics.com" />
<link rel="dns-prefetch" href="//connect.facebook.net" />
<link rel="dns-prefetch" href="//www.googleadservices.com" />
<link rel="dns-prefetch" href="//www.stay22.com" />
<link rel="dns-prefetch" href="//api.stay22.com" />
<link rel="dns-prefetch" href="//skiddle.imgix.net" />
<link rel="dns-prefetch" href="//skiddleartists.imgix.net" />
<!-- CSS
================================================== -->
<meta property="fb:app_id" content="102483650494"/>
<meta http-equiv="x-dns-prefetch-control" content="on"/>
<meta name="insight-app-sec-validation" content="5574b896-289f-48c0-aebb-98730b831151">
<!-- Mobile Specific Metas
================================================== -->
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
...