Мой код возвращает часть правильно с одного сайта, а с другого не декодидует
AccuWeather:
1°
Дощ і сніг
Rp_5:
1 °C
Почему?
import html
from urllib.request import urlopen, Request
ACCU_URL = 'https://www.accuweather.com/uk/ua/lviv/324561/weather-forecast/324561'
ACCU_TAGS = ('<span class="large-temp">' , '<span class="cond">')
RP5_URL = 'http://rp5.ua/%D0%9F%D0%BE%D0%B3%D0%BE%D0%B4%D0%B0_%D1%83_%D0%9B%D1%8C%D0%B2%D0%BE%D0%B2%D1%96,_%D0%9B%D1%8C%D0%B2%D1%96%D0%B2%D1%81%D1%8C%D0%BA%D0%B0_%D0%BE%D0%B1%D0%BB%D0%B0%D1%81%D1%82%D1%8C'
#WINFO_CONTAINER_TAG = '<div id="ArchTemp">'
#RP5_TAGS = '<span class="t_0" style="display: block;">'
RP5_TAGS = ('<div id="ArchTemp">',
'<span class="t_0" style="display: block;">')
def get_request_headers ():
return {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv: 61.0)'}
def get_page_source ( url ):
request = Request ( url , headers=get_request_headers ( ) )
page_source = urlopen( request ).read( )
return page_source.decode ( 'utf-8' )
def get_tag_content ( page_content , tag ):
tag_index = page_content.find ( tag )
tag_size = len ( tag )
value_start = tag_index + tag_size
content = ''
for c in page_content[ value_start: ]: # type: object
if c != '<':
content += c
else:
break
return content
def get_weather_info ( page_content , tags ):
return tuple ( [ get_tag_content ( page_content , tag ) for tag in tags ] )
def produce_output ( provider_name , temp , condition ):
print (f'{provider_name}: \n' )
print (f'{html.unescape(temp)} \n' )
print (f'{(condition)} \n' )
def main ():
weather_sites = {"AccuWeather": (ACCU_URL , ACCU_TAGS),
"Rp_5": (RP5_URL , RP5_TAGS)
}
for name in weather_sites:
url , tags = weather_sites[ name ]
content = get_page_source ( url)
temp , condition = get_weather_info ( content , tags )
produce_output ( name , temp , condition )
if __name__ == '__main__':
main ( )