import urllib.parse
import requests
from lxml import html
response = requests.get('http://lenta.ru/')
parsed_body = html.fromstring(response.text)
for y in parsed_body.xpath("//a"):
url=y.get("href")
print(url)
def geturl(href, baseurl):
domain = [i for i in baseurl.split('/') if i][1]
proto = [i for i in baseurl.split('/') if i][0]
protos = ['https://', 'http://', 'ftp://',]
for i in protos:
if href.startswith(i):
return href
if href.startswith('/'):
return proto + '://' + domain + href
if baseurl.endswith('/'):
return baseurl + href
return '/'.join(baseurl.split('/')[:-1]) + url