Решил скачать разного рода документы из сайта Российского Центра Научной Информатики. Отчёты, формы, образцы и т.д. В основном из раздела "Конкурсы". Ввожу код:
import time
from urllib.parse import quote, unquote
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry
import requests
from bs4 import BeautifulSoup
def main():
base_url = 'https://rfbr.ru'
download_links = set()
response = requests.get(base_url+'/rffi')
parse_text = BeautifulSoup(response.text, 'html.parser')
links = set([x.get('href') for x in parse_text.find_all(href=re.compile('^/rffi/'))])
for link in links:
time.sleep(1)
response = requests.get(base_url+link)
parse_text = BeautifulSoup(response.text, 'html.parser')
download_links.update(set([x.get('href') for x in parse_text.find_all(href=re.compile('^/getimage/'))]))
for link in download_links:
time.sleep(1)
file_name = unquote(link).split('/')[-1]
response = requests.get(base_url+quote(link))
with open(file_name, 'wb') as f:
f.write(response.content)
if __name__ == '__main__':
main()
И тут же возникает ошибка
Traceback (most recent call last):
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\urllib3\connection.py", line 174, in _new_conn
conn = connection.create_connection(
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\urllib3\util\connection.py", line 72, in create_connection
for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\socket.py", line 962, in getaddrinfo
for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
socket.gaierror: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\urllib3\connectionpool.py", line 703, in urlopen
httplib_response = self._make_request(
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\urllib3\connectionpool.py", line 386, in _make_request
self._validate_conn(conn)
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\urllib3\connectionpool.py", line 1042, in _validate_conn
conn.connect()
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\urllib3\connection.py", line 363, in connect
self.sock = conn = self._new_conn()
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\urllib3\connection.py", line 186, in _new_conn
raise NewConnectionError(
urllib3.exceptions.NewConnectionError: <urllib3.connection.HTTPSConnection object at 0x0000019854B46E90>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\requests\adapters.py", line 489, in send
resp = conn.urlopen(
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\urllib3\connectionpool.py", line 787, in urlopen
retries = retries.increment(
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\urllib3\util\retry.py", line 592, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='rfbr.ru', port=443): Max retries exceeded with url: /rffi (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000019854B46E90>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "C:\Download\rbfi-parser\main2.py", line 30, in <module>
main()
File "C:\Download\rbfi-parser\main2.py", line 12, in main
response = requests.get(base_url+'/rffi')
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\requests\api.py", line 73, in get
return request("get", url, params=params, **kwargs)
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\requests\api.py", line 59, in request
return session.request(method=method, url=url, **kwargs)
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\requests\sessions.py", line 587, in request
resp = self.send(prep, **send_kwargs)
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\requests\sessions.py", line 701, in send
r = adapter.send(request, **kwargs)
File "C:\Users\Руслан\AppData\Local\Programs\Python\Python311\Lib\site-packages\requests\adapters.py", line 565, in send
raise ConnectionError(e, request=request)
requests.exceptions.ConnectionError: HTTPSConnectionPool(host='rfbr.ru', port=443): Max retries exceeded with url: /rffi (Caused by NewConnectionError('<urllib3.connection.HTTPSConnection object at 0x0000019854B46E90>: Failed to establish a new connection: [Errno 11001] getaddrinfo failed'))
Как решить?