Решил я переустановить Python на машине с debian 10
Была введена лишь одна команда
sudo apt purge -y python3.9-minimal
$ python
Python 3.10.2 (main, Mar 8 2022, 23:56:15) [GCC 10.2.1 20210110] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> import requests
>>> from bs4 import BeautifulSoup
>>> URL = 'https://pixabay.com/ru/'
>>> response = requests.get(URL)
>>> soup = BeautifulSoup(response.text, 'lxml')
>>> soup.title
<title>Attention Required! | Cloudflare</title>
>>>
функция find_all работает некорректно
хотя исходные данные верные
from bs4 import BeautifulSoup
import requests
def parser():
URL = 'https://www.avito.ru/voronezh/tovary_dlya_kompyutera/komplektuyuschie/materinskie_platy-ASgBAgICAkTGB~pm7gnOZw?cd=1&q=%D0%BC%D0%B0%D1%82%D0%B5%D1%80%D0%B8%D0%BD%D1%81%D0%BA%D0%B0%D1%8F+%D0%BF%D0%BB%D0%B0%D1%82%D0%B0+%D1%81+%D0%BF%D1%80%D0%BE%D1%86%D0%B5%D1%81%D1%81%D0%BE%D1%80%D0%BE%D0%BC'
HEADERS = {'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.0.5005.124 Safari/537.36 Edg/102.0.1245.41'}
site = requests.get(URL, headers=HEADERS)
soup = BeautifulSoup(site.content, "lxml")
print(soup.prettify())
parser()
охватывает ли он все, что дают вышеназванные линтеры?
$ python3
Python 3.9.2 (default, Feb 28 2021, 17:03:44)
[GCC 10.2.1 20210110] on linux
Type "help", "copyright", "credits" or "license" for more information.
>>> words = 'как дела'
>>> phrase = 'у тебя дела как'
>>> [word for word in words.split(' ') if word in phrase.split(' ')] == words.split(' ')
True
>>> phrase = 'у тебя дела'
>>> [word for word in words.split(' ') if word in phrase.split(' ')] == words.split(' ')
False
>>>
from autocorrect import Speller
spell = Speller('ru')
text = 'Проверкка текста на ашибки.'
print(spell(text))
Проверка текста на ошибки.
import language_tool_python
tool = language_tool_python.LanguageTool('ru-RU')
text = 'Проверкка текста на ашибки.'
matches = tool.check(text)
print(matches)
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
options = Options()
options.headless = False
options.binary_location = '/usr/bin/chromium'
driver = webdriver.Chrome(options=options)
URL = 'https://tracker.gg/valorant/profile/riot/mrsnowman%23siuu/overview'
driver.get(URL)
time.sleep(5)
html = driver.page_source
driver.quit()
soup = BeautifulSoup(html, 'lxml')
agent = soup.find('div', class_='value', set=True).text.strip()
print(agent)
import csv
import unicodedata
import requests
from anti_useragent import UserAgent
from bs4 import BeautifulSoup
ua = UserAgent()
headers = {'User-Agent': ua.chrome}
def get_html(url):
response = requests.get(url, headers=headers)
return response.text
def get_content(html):
soup = BeautifulSoup(html, 'lxml')
items = soup.find_all('a', class_='css-yo21t ewrty961')
cars = []
for item in items:
cars.append({
'Title': item.find('div', class_='css-17lk78h e3f4v4l2').text.strip(),
'Price': unicodedata.normalize("NFKD", item.find('div', class_='css-1dv8s3l eyvqki91').text.strip()),
'About': item.find('div', class_='css-1fe6w6s e162wx9x0').text.strip()
})
return cars
def content_to_csv(content):
columns = ['Title', 'Price', 'About']
result_file = 'autodrom.csv'
try:
with open(result_file, 'w', encoding='utf-8', newline='') as csv_file:
writer = csv.DictWriter(csv_file, fieldnames=columns, dialect='unix')
writer.writeheader()
for item in content:
writer.writerow(item)
except IOError as err:
print(f'Error writing the file {result_file}: {err}')
def main():
url = 'https://auto.drom.ru/audi/'
content_to_csv(get_content(get_html(url)))
if __name__ == '__main__':
main()
вот пример моего кода
Вот Json:
import json
DOC = """\
{
"name": "Bitcoin",
"tickers": [
{
"base": "BTC",
"bid_ask_spread_percentage": 0.010034,
"coin_id": "bitcoin",
"converted_last": {
"btc": 1.000832,
"eth": 14.633225,
"usd": 29784
},
"converted_volume": {
"btc": 48398,
"eth": 707624,
"usd": 1440259765
},
"is_anomaly": false,
"is_stale": false,
"last": 29668.22,
"last_fetch_at": "2022-05-15T10:13:49+00:00",
"last_traded_at": "2022-05-15T10:13:49+00:00",
"market": {
"has_trading_incentive": false,
"identifier": "binance",
"name": "Binance"
},
"target": "USDT",
"target_coin_id": "tether",
"timestamp": "2022-05-15T10:13:49+00:00",
"token_info_url": null,
"trade_url": "https://www.binance.com/en/trade/BTC_USDT?ref=37754157",
"trust_score": "green",
"volume": 48357.32095661529
},
{
"base": "WBTC",
"bid_ask_spread_percentage": 0.010002,
"coin_id": "wrapped-bitcoin",
"converted_last": {
"btc": 1,
"eth": 14.603248,
"usd": 29671
},
"converted_volume": {
"btc": 137.98,
"eth": 2015,
"usd": 4093977
},
"is_anomaly": false,
"is_stale": false,
"last": 0.9997,
"last_fetch_at": "2022-05-15T10:23:54+00:00",
"last_traded_at": "2022-05-15T10:19:38+00:00",
"market": {
"has_trading_incentive": false,
"identifier": "ftx_spot",
"name": "FTX"
},
"target": "BTC",
"target_coin_id": "bitcoin",
"timestamp": "2022-05-15T10:19:38+00:00",
"token_info_url": null,
"trade_url": "https://ftx.com/trade/WBTC/BTC",
"trust_score": "green",
"volume": 138.02190083024908
},
{
"base": "BTC",
"bid_ask_spread_percentage": 0.016816,
"coin_id": "bitcoin",
"converted_last": {
"btc": 1.000886,
"eth": 14.620339,
"usd": 29721
},
"converted_volume": {
"btc": 13166,
"eth": 192320,
"usd": 390963508
},
"is_anomaly": false,
"is_stale": false,
"last": 29652.24,
"last_fetch_at": "2022-05-15T10:22:07+00:00",
"last_traded_at": "2022-05-15T10:22:07+00:00",
"market": {
"has_trading_incentive": false,
"identifier": "digifinex",
"name": "Digifinex"
},
"target": "USDT",
"target_coin_id": "tether",
"timestamp": "2022-05-15T10:22:07+00:00",
"token_info_url": null,
"trade_url": "https://www.digifinex.com/en-ww/trade/USDT/BTC",
"trust_score": "green",
"volume": 13154.30795151
},
{
"base": "ETH",
"bid_ask_spread_percentage": 0.011459,
"coin_id": "ethereum",
"converted_last": {
"btc": 1,
"eth": 14.603248,
"usd": 29671
},
"converted_volume": {
"btc": 3337,
"eth": 48727,
"usd": 99002386
},
"is_anomaly": false,
"is_stale": false,
"last": 0.068503,
"last_fetch_at": "2022-05-15T10:23:58+00:00",
"last_traded_at": "2022-05-15T10:23:58+00:00",
"market": {
"has_trading_incentive": false,
"identifier": "whitebit",
"name": "WhiteBIT"
},
"target": "BTC",
"target_coin_id": "bitcoin",
"timestamp": "2022-05-15T10:23:58+00:00",
"token_info_url": null,
"trade_url": "https://whitebit.com/trade/ETH_BTC",
"trust_score": "green",
"volume": 48708.902
},
{
"base": "BTC",
"bid_ask_spread_percentage": 0.01338,
"coin_id": "bitcoin",
"converted_last": {
"btc": 0.99724658,
"eth": 14.563039,
"usd": 29589
},
"converted_volume": {
"btc": 2150,
"eth": 31403,
"usd": 63803915
},
"is_anomaly": false,
"is_stale": false,
"last": 29589,
"last_fetch_at": "2022-05-15T10:23:57+00:00",
"last_traded_at": "2022-05-15T10:23:57+00:00",
"market": {
"has_trading_incentive": false,
"identifier": "ftx_us",
"name": "FTX.US"
},
"target": "USD",
"timestamp": "2022-05-15T10:23:57+00:00",
"token_info_url": null,
"trade_url": "https://ftx.us/trade/BTC/USD",
"trust_score": "green",
"volume": 2156.3390012132886
},
{
"base": "BTC",
"bid_ask_spread_percentage": 0.013498,
"coin_id": "bitcoin",
"converted_last": {
"btc": 0.99998279,
"eth": 14.602997,
"usd": 29670
},
"converted_volume": {
"btc": 192.67,
"eth": 2814,
"usd": 5716642
},
"is_anomaly": false,
"is_stale": false,
"last": 29634,
"last_fetch_at": "2022-05-15T10:23:58+00:00",
"last_traded_at": "2022-05-15T10:23:58+00:00",
"market": {
"has_trading_incentive": false,
"identifier": "ftx_us",
"name": "FTX.US"
},
"target": "USDT",
"target_coin_id": "tether",
"timestamp": "2022-05-15T10:23:58+00:00",
"token_info_url": null,
"trade_url": "https://ftx.us/trade/BTC/USDT",
"trust_score": "green",
"volume": 192.672936569481
},
{
"base": "BTC",
"bid_ask_spread_percentage": 0.033738,
"coin_id": "bitcoin",
"converted_last": {
"btc": 0.99687626,
"eth": 14.579921,
"usd": 29634
},
"converted_volume": {
"btc": 3782,
"eth": 55319,
"usd": 112436637
},
"is_anomaly": false,
"is_stale": false,
"last": 29634.0638176,
"last_fetch_at": "2022-05-15T10:18:21+00:00",
"last_traded_at": "2022-05-15T10:18:21+00:00",
"market": {
"has_trading_incentive": false,
"identifier": "bitfinex",
"name": "Bitfinex"
},
"target": "USD",
"timestamp": "2022-05-15T10:18:21+00:00",
"token_info_url": null,
"trade_url": "https://www.bitfinex.com/t/BTCUSD",
"trust_score": "green",
"volume": 3794.16867832
}
]
}\
"""
data = json.loads(DOC)
market_name = []
for ticket in data['tickers']:
market_name.append(ticket['market']['name'])
import requests
from bs4 import BeautifulSoup
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:100.0) Gecko/20100101 Firefox/100.0"}
URL = "https://hamptonvolgograd.ru/nomera/"
response = requests.get(URL, headers = headers)
soup = BeautifulSoup(response.text, "lxml")
roomlist = soup.find("ul", id="roomsList").select("li > span > div")
for hostel in roomlist:
title = hostel.find("span", class_ = "h1").text.strip()
price = hostel.find("ul", class_ = "roomPrice").find("li").text.strip()
print(title)
print(price)
import tkinter as tk
window = tk.Tk()
window.title('Hello World!')
window.mainloop()