pip install python-magic
>>> import magic
>>> magic.from_file("testdata/test.pdf")
'PDF document, version 1.2'
# recommend using at least the first 2048 bytes, as less can produce incorrect identification
>>> magic.from_buffer(open("testdata/test.pdf", "rb").read(2048))
'PDF document, version 1.2'
>>> magic.from_file("testdata/test.pdf", mime=True)
'application/pdf'
import requests
base_url = "https://www.ursus.ru/catalogue/zashchita_ot_padeniy_s_vysoty/page-"
pages = 8
for i in range(1, pages + 1):
print(f"Page: {i}")
print(f"{base_url}{i}/")
requests.get(f"{base_url}{i}.")
headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N)\
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Mobile Safari/537.36"
}
data = requests.get(url, headers=headers)
import requests
from bs4 import BeautifulSoup
url = "https://realt.by/sale-flats/object/2562548/"
headers = {
"User-Agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N)\
AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Mobile Safari/537.36"
}
data = requests.get(url, headers=headers)
soup = BeautifulSoup(data.text, features="html.parser")
img = (
soup.find("div", attrs={"class": "swiper-wrapper"})
.findAll("img", class_="blur-sm scale-105")[1]
.get("src")
)
print(img)
https://static.realt.by/thumb/c/600x400/6f57b1d409f96f2b1ede7f082f120b50/ja/e/site15nf8eja/7c30f38145.jpg
from bs4 import BeautifulSoup
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.77 Safari/537.36"}
estrade_url = 'https://www.instagram.com/estrade.pmk/'
page = requests.get(estrade_url,headers=headers)
soup = BeautifulSoup(page.content, 'html.parser')
print(soup.prettify())
for i in range(0, 740, 20):
main_url = "https://www.bundestag.de"
url =f"https://www.bundestag.de/ajax/filterlist/de/abgeordnete/862712-862712?limit=20&noFilterSet=true&offset={i}"
q = requests.get(url)
result = q.content
soup = BeautifulSoup(result, 'lxml')
persons = soup.find_all(class_="bt-open-in-overlay")
for person in persons:
person_page_url = person.get('href')
persons_url_list.append(main_url+person_page_url)
soup = BeautifulSoup(html)
author_text = soup.find('i',{'class':'icon icon-user'})
email_text = soup.find('i',{'class':'icon icon-support'})
phone_text = soup.find('i',{'class':'icon icon-phone'})
print(author_text.next)
print(email_text.next)
print(phone_text.next)
Пыльнев Анатолий
tollik36@mail.ru
89055663563
from bs4 import BeautifulSoup
html="""
<tr>
<td>
<br/><br/>
<i class="icon icon-user" data-selector=".icon" title="Автор"></i> Барышева Олеся<br/>
<i class="icon icon-support" data-selector=".icon" title="E-mail"></i> olesya052019@bk.ru<br/>
<i class="icon icon-phone" data-selector=".icon" title="Телефон"></i> 89188565504<br/>
</td>
</tr>
"""
soup = BeautifulSoup(html)
my_text = soup.find('td')
print(my_text.get_text().split())
['Барышева', 'Олеся', 'olesya052019@bk.ru', '89188565504']
Барышева Олеся
olesya052019@bk.ru
89188565504
from bs4 import BeautifulSoup
html="""
<tr>
<td>
<br/><br/>
<i class="icon icon-user" data-selector=".icon" title="Автор"></i> Барышева Олеся<br/>
<i class="icon icon-support" data-selector=".icon" title="E-mail"></i> olesya052019@bk.ru<br/>
<i class="icon icon-phone" data-selector=".icon" title="Телефон"></i> 89188565504<br/>
</td>
</tr>
<tr>
<td>
<br/><br/>
<i class="icon icon-user" data-selector=".icon" title="Автор"></i> Иван Иванович<br/>
<i class="icon icon-support" data-selector=".icon" title="E-mail"></i> obi_van_ia9@bk.ru<br/>
<i class="icon icon-phone" data-selector=".icon" title="Телефон"></i> 232321113312<br/>
</td>
</tr>
<tr>
<td>
<br/><br/>
<i class="icon icon-user" data-selector=".icon" title="Автор"></i> Темный лорд<br/>
<i class="icon icon-support" data-selector=".icon" title="E-mail"></i> pirojok51@mail.ru<br/>
<i class="icon icon-phone" data-selector=".icon" title="Телефон"></i> 80002111122<br/>
</td>
</tr>
"""
soup = BeautifulSoup(html)
my_text = soup.findAll('td')
for text in my_text:
print(text.get_text().split())
['Барышева', 'Олеся', 'olesya052019@bk.ru', '89188565504']
['Иван', 'Иванович', 'obi_van_ia9@bk.ru', '232321113312']
['Темный', 'лорд', 'pirojok51@mail.ru', '80002111122']
C:\Users\david\Desktop>test.py www.google.com
#
# ARIN WHOIS data and services are subject to the Terms of Use
# available at: https://www.arin.net/resources/registry/whois/tou/
#
# If you see inaccuracies in the results, please report at
# https://www.arin.net/resources/registry/whois/inaccuracy_reporting/
#
# Copyright 1997-2021, American Registry for Internet Numbers, Ltd.
#
NetRange: 216.58.192.0 - 216.58.223.255
CIDR: 216.58.192.0/19
NetName: GOOGLE
NetHandle: NET-216-58-192-0-1
Parent: NET216 (NET-216-0-0-0-0)
NetType: Direct Allocation
OriginAS: AS15169
Organization: Google LLC (GOGL)
RegDate: 2012-01-27
Updated: 2012-01-27
Ref: https://rdap.arin.net/registry/ip/216.58.192.0
OrgName: Google LLC
OrgId: GOGL
Address: 1600 Amphitheatre Parkway
City: Mountain View
StateProv: CA
PostalCode: 94043
Country: US
RegDate: 2000-03-30
Updated: 2019-10-31
Comment: Please note that the recommended way to file abuse complaints are located in the following links.
Comment:
Comment: To report abuse and illegal activity: https://www.google.com/contact/
Comment:
Comment: For legal requests: http://support.google.com/legal
Comment:
Comment: Regards,
Comment: The Google Team
Ref: https://rdap.arin.net/registry/entity/GOGL
OrgAbuseHandle: ABUSE5250-ARIN
OrgAbuseName: Abuse
OrgAbusePhone: +1-650-253-0000
OrgAbuseEmail: network-abuse@google.com
OrgAbuseRef: https://rdap.arin.net/registry/entity/ABUSE5250-ARIN
OrgTechHandle: ZG39-ARIN
OrgTechName: Google LLC
OrgTechPhone: +1-650-253-0000
OrgTechEmail: arin-contact@google.com
OrgTechRef: https://rdap.arin.net/registry/entity/ZG39-ARIN
#
# ARIN WHOIS data and services are subject to the Terms of Use
# available at: https://www.arin.net/resources/registry/whois/tou/
#
# If you see inaccuracies in the results, please report at
# https://www.arin.net/resources/registry/whois/inaccuracy_reporting/
#
# Copyright 1997-2021, American Registry for Internet Numbers, Ltd.
как сделать что бы питон мониторил папку (например раз в 10сек)
from datetime import date
today = date.today()
d1 = today.strftime("%d.%m.%Y")
print("d1 =", d1)
answer = input ("Тебе больше 18 лет? ")
if answer.upper() == 'ДА':
print(answer)
if answer.upper() == 'НЕТ':
print(answer)
"Да" "да" "ДА"
from datetime import datetime
import locale
locale.setlocale(
category=locale.LC_ALL,
locale="Russian" # Note: do not use "de_DE" as it doesn't work
)
a = datetime.today()
print('Сейчас у вас', a.strftime('%B'), 'месяц.')