Как-то так? Возможно несколько переусложнено и нет проверки на статускод (сами допишите, если надо), но оно работает
# site_base.txt
https://xakep.ru
https://habr.com
https://vc.ru
# [file_name].py
import requests
import os
def append_filename(url:str, filename:str) ->tuple:
domain = url.split('//')[-1]
data = (domain, url + '/' + filename)
return data
def save_file_to_disk(filename:str):
if not os.path.exists('files'): os.mkdir('files')
with open('site_base.txt', 'r', encoding='UTF-8') as site_base:
sites_list = site_base.read().split('\n')
urls_list = [append_filename(url, filename) for url in sites_list]
for url in urls_list:
try:
domain = url[0]
requested_url = url[1]
response = requests.get(requested_url)
saved_filename = domain+'_'+filename
file_path = os.path.join('files',saved_filename)
with open(file_path, 'a', encoding='UTF-8') as robots:
robots.write(response.text)
except requests.exceptions.RequestException as e:
print(e)
save_file_to_disk('robots.txt')
save_file_to_disk('sitemap.xml')