def inner_page(link, license_check, sample):
inner_r = requests.get(link, headers=headers, timeout=7)
try:
inner_r = requests.get(link, headers=headers, timeout=7)
except requests.ConnectionError:
return 'Недоступно'
if inner_r.status_code != 200:
return 'Недоступно'
inner_r.encoding = 'utf8'
inner_soup = BeautifulSoup(inner_r.text, 'html.parser')
list_content = []
page_content = inner_soup.find('div', class_='single_entry')
check_inner_page = page_content.find('div', 'news-hide')
if check_inner_page:
sting_page = check_inner_page.find_all('p')
for i in sting_page:
list_content.append(str(i))
st = ''.join(list_content)
return f'<![CDATA[{st}]]>'
sting_page = page_content.find_all('p')
for i in sting_page:
img = i.find('img')
if img:
list_content.append(f'''<p><img src="{img.attrs['data-src']}"></p>''')
else:
if re.match(r'<p><a.*?</a></p>', str(i)):
list_content.append('<p></p>')
else:
list_content.append(str(i))
st = ''.join(list_content)
return f'<![CDATA[{st}]]>'
def inner_page(link, license_check, sample):
inner_r = requests.get(link, headers=headers, timeout=7)
try:
inner_r = requests.get(link, headers=headers, timeout=7)
except requests.ConnectionError:
return 'Недоступно'
if inner_r.status_code != 200:
return 'Недоступно'
inner_r.encoding = 'utf8'
inner_soup = BeautifulSoup(inner_r.text, 'html.parser')
list_content = []
page_content = inner_soup.find('div', class_='single_entry')
try:
check_inner_page = page_content.find('div', 'news-hide')
sting_page = check_inner_page.find_all('p')
for i in sting_page:
list_content.append(str(i))
st = ''.join(list_content)
data_body = f'<![CDATA[{st}]]>'
except:
sting_page = page_content.find_all('p')
for i in sting_page:
try:
img = i.find('img').attrs['data-src']
i = f'<p><img src="{img}"></p>'
list_content.append(str(i))
except:
pattern = '<p><a.*?</a></p>'
if re.match(pattern, str(i)) is not None:
list_content.append('<p></p>')
else:
list_content.append(str(i))
st = ''.join(list_content)
data_body = f'<![CDATA[{st}]]>'
return data_body
sting_page = check_inner_page.find_all('p')
for i in sting_page:
img = i.find('img').attrs['data-src']