import requests
from bs4 import BeautifulSoup
import re
import os
from urllib.request import urlopen
import json
from urllib.parse import unquote
class Parser:
def __init__(self, href) -> None:
self.url = "https://fasie.ru"
self.href = href
self.source = self.url + self.href['href']
self.name = self.href.text
try:
r = requests.get(self.source, timeout=20)
except:
print(f'err. conn: {self.source} in "def __init__"')
soup = BeautifulSoup(r.text, "html.parser")
self.section = soup.find('div',{'class':'tabs'}).find_all('section')
def get_description(self):
l = re.findall(r"[^\n\t\r]+", self.section[2].text)
return '\n'.join(map(str.strip, l))
def get_program(self):
l = re.findall(r"[^\n\t\r]+", self.section[0].text)
return '\n'.join(map(str.strip, l))
def get_contact(self):
l = []
pattern = r"(\+?[\d\(\) -]+)\s\(?доб\.\s?\d{3}\)?"
pattern_email = r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,7}\b"
if self.section[-1].find('tr'):
for i in self.section[-1].find_all('tr'):
d = {}
d['name'] = i.find('h4').text.strip().replace('\xa0', ' ').split('\r', 1)[0]
tmp = re.search(pattern, i.text)
d['tel'] = tmp[1] if tmp else ''
tmp = re.search(pattern_email, i.text)
d['email'] = tmp[0] if tmp else ''
if sum(map(len, d.values())):
l.append(d)
elif self.section[-1].find('b'):
name = [i.text for i in self.section[-1].find_all('b') if i.text.strip()]
tel = re.findall(pattern, self.section[-1].text)
email = [i.text for i in self.section[-1].find_all('a') if i.text]
for i in zip(name, tel, email):
d = {}
d['name'] = i[0].strip().replace('\xa0', ' ')
d['tel'] = re.sub(r'\s\(?доб\.\s?\d{3}\)', '', i[1].strip())
d['email'] = i[2].strip()
l.append(d)
else:
for i in self.section[-1].find_all('p', recursive=False):
if i.find('a'):
d = {}
d['name'] = ''
tmp = re.search(pattern, i)
d['tel'] = tmp[0] if tmp else ''
d['email'] = i.find('a').text
l.append(d)
return l
def get_documents(self):
l = []
for i in self.section[1].find_all('a'):
if i['href']:
i['href'] = i['href'].replace(self.url, '')
name = unquote(i['href'])
d = {}
d['source'] = self.url + '/' + i['href'].lstrip('/')
d['path'] = f"./{self.href['href'].strip('/')}/{name}"
d['name'] = name.rsplit('/', 1)[-1]
d['extension'] = name.rsplit('.', 1)[-1]
try:
r = requests.get(d['source'], timeout=20)
except:
print(f"err. conn: {d['source']}")
continue
if r.status_code == 200:
try:
os.makedirs(os.path.dirname(d['path']), exist_ok=True)
except:
print(f"Ошибка при создание папки\nТег: {i}\nname{d['path']}")
raise
try:
with open(d['path'], 'wb') as f:
f.write(r.content)
except:
print(f"Ошибка при создание файла\nТег: {i}")
raise
d['size'] = len(r.content)
l.append(d)
else:
print(f"{d['source']} no response")
return l
def run(self):
d = {
'source':self.source,
'name':self.name,
'description':self.get_description(),
'programs':self.get_program(),
'contacts':self.get_contact(),
'documents':self.get_documents()
}
return d
def main():
url = "https://fasie.ru"
page = urlopen(url)
html = page.read().decode("utf-8")
soup = BeautifulSoup(html, "html.parser")
div = soup.find_all('div', class_ = 'wrap')
programms_list = div[1].find('ul', class_='').find_all('ul', class_='')[1]
hrefs = programms_list.find_all('a')
data = []
for i in hrefs:
p = Parser(i)
data.append(p.run())
with open('output.json', 'w', encoding="utf-8") as f:
f.write(json.dumps(data, indent=2, ensure_ascii=False))
main()
import requests
from bs4 import BeautifulSoup
import re
import os
from urllib.request import urlopen
import json
class Parser:
def __init__(self, href) -> None:
self.url = "https://fasie.ru"
self.href = href
self.source = self.url + self.href['href']
self.name = self.href.text
try:
r = requests.get(self.source, timeout=20)
except:
print(f'err. conn: {self.source} in "def __init__"')
soup = BeautifulSoup(r.text, "html.parser")
self.section = soup.find('div',{'class':'tabs'}).find_all('section')
def get_description(self):
l = re.findall(r"[^\n\t\r]+", self.section[2].text)
return '\n'.join(map(str.strip, l))
def get_program(self):
l = re.findall(r"[^\n\t\r]+", self.section[0].text)
return '\n'.join(map(str.strip, l))
def get_contact(self):
l = []
pattern = r"(\+?[\d\(\) -]+)\s\(?доб\.\s?\d{3}\)?"
if self.section[-1].find('tr'):
for i in self.section[-1].find_all('tr'):
d = {}
d['name'] = i.find('h4').text.strip().replace('\xa0', ' ')
d['tel'] = ''
tmp = re.search(pattern, i.text)
if tmp:
d['tel'] = tmp[1].strip()
d['email'] = i.find('a').text
l.append(d)
elif self.section[-1].find('b'):
name = [i.text for i in self.section[-1].find_all('b') if i.text.strip()]
tel = re.findall(pattern, self.section[-1].text)
email = self.section[-1].find_all('a')
for i in zip(name, tel, email):
d = {}
d['name'] = i[0].strip().replace('\xa0', ' ')
d['tel'] = i[1].strip()
d['email'] = i[2].text
l.append(d)
else:
for i in self.section[-1].find_all('p', recursive=False):
if i.find('a'):
d = {}
d['name'] = ''
d['tel'] = ''
tmp = re.search(pattern, i)
if tmp:
d['tel'] = tmp[1].strip()
d['email'] = i.find('a').text
l.append(d)
return l
def get_documents(self):
l = []
for i in self.section[1].find_all('a'):
if i['href']:
d = {}
d['source'] = self.url + '/' + i['href'].lstrip('/')
d['path'] = '.'+ self.href['href'] + '/'.join(i['href'].replace('%20', '_').rsplit('/', 2)[-2:])
d['name'] = d['path'].rsplit('/', 1)[-1]
d['extension'] = d['name'].rsplit('.', 1)[-1]
try:
r = requests.get(d['source'], timeout=20)
except:
print(f"err. conn: {d['source']}")
continue
if r.status_code == 200:
os.makedirs(os.path.dirname(d['path']), exist_ok=True)
try:
with open(d['path'], 'wb') as f:
f.write(r.content)
except:
print(f"Ошибка при создание файла\nТег: {i}")
raise
d['size'] = len(r.content)
l.append(d)
else:
print(f"{d['source']} no response")
return l
def run(self):
d = {
'source':self.source,
'name':self.name,
'description':self.get_description(),
'programs':self.get_program(),
'contacts':self.get_contact(),
'documents':self.get_documents()
}
return d
def main():
url = "https://fasie.ru"
page = urlopen(url)
html = page.read().decode("utf-8")
soup = BeautifulSoup(html, "html.parser")
div = soup.find_all('div', class_ = 'wrap')
programms_list = div[1].find('ul', class_='').find_all('ul', class_='')[1]
hrefs = programms_list.find_all('a')
data = []
for i in hrefs:
p = Parser(i)
data.append(p.run())
with open('output.json', 'w', encoding="utf-8") as f:
f.write(json.dumps(data, indent=2, ensure_ascii=False))
main()
<a href=""> </a>
def automat(n):
n = n << 1 ^ (n.bit_count() > n.bit_length() - n.bit_count())
bit_l = n.bit_length() #длина битов
del_bit_l = 2 + bit_l%2 #длина удаляемого бита
l_r = int((bit_l - del_bit_l)/2) #длина левого и правого бита
l_bit = n >> bit_l - l_r << l_r #левый бит
r_bit = n & 2**l_r - 1 #правый бит
return l_bit | r_bit
cnt = 0
for n in range(1, 1000):
if 50 <= automat(n) <= 100:
cnt += 1
print(cnt)
в случий если изменение кода 1-го функции интерпретатор все равно будет видеть второго функции, но у тебя ошибка не из-за этого это просто замечание. У тебя проблема в вводимых данных, или в методе remove_unreachable. Надо тшательно изучать вводимые данные и использование класса CFG.