$ chromedriver --version
ChromeDriver 99.0.4844.51 (d537ec02474b5afe23684e7963d538896c63ac77-refs/branch-heads/4844@{#875})
$ google-chrome-stable --version
Google Chrome 99.0.4844.51
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
options = Options()
options.headless = True
driver = webdriver.Chrome(options=options)
URL = 'https://eth.nanopool.org/'
driver.get(URL)
html = driver.page_source
time.sleep(5)
soup = BeautifulSoup(html, 'lxml')
data = soup.find_all('div', class_='panel panel-primary fixed-height-panel')
for elem in data:
elem_title = elem.find('div', class_='panel-title').text
elem_value = elem.find('div', class_='panel-body').text
print(f'{elem_title}: {elem_value}')
from bs4 import BeautifulSoup
DOC = '''
<html>
<body>
<div>
<p class="1">
<label>Значение, которое нужно спарсить</label>
</div>
<div>
<p class="2">
<label>Значение, которое мне не нужно</label>
</div>
<div>
<p class="1">
<label>Ещё одно значение, которое нужно спарсить</label>
</div>
</body>
</html>
'''
soup = BeautifulSoup(DOC, 'lxml')
for par in soup.find_all('p', class_='1'):
label = par.find_parent('div').find('label')
print(label.text)
import sys
from bs4 import BeautifulSoup
DOC = 'test.html'
try:
with open(DOC, encoding='utf-8') as f:
page = f.read()
except IOError as err:
print(f'Error reading the file {DOC}: {err}')
sys.exit()
soup = BeautifulSoup(page, 'lxml')
rows = soup.find_all('tr')
rows = [row.find_all('td') for row in rows]
rows = [row for row in rows if row]
for row in rows:
columns = [cell.text.strip() for cell in row]
columns = [cell for cell in columns if cell]
print(f'{columns[0]}:', *columns[1:-1], f'Ср: {columns[-1]}')
import json
from bs4 import BeautifulSoup
DOC = '''
<html>
<head></head>
<body>
<pre style="word-wrap: break-word; white-space: pre-wrap;">
[
{"id":1,"name":"Склад 407","quantity":"0.0"},
{"id":9,"name":"Литейка","quantity":"0.0"},
{"id":16,"name":"СЛК - 407 каб.","quantity":"0.0"},
{"id":18,"name":"СЛК - 521 каб. МКШ-96","quantity":"0.0"},
{"id":19,"name":"СЛК - 524 каб.","quantity":"0.0"},
{"id":20,"name":"СЛК - литейка","quantity":"0.0"},
{"id":26,"name":"СЛК - Сборка Lifepad","quantity":"3.0"}
]
</pre>
</body>
</html>
'''
soup = BeautifulSoup(DOC, 'lxml')
data = json.loads(soup.find('pre').text)
for item in data:
print(f"{item['name']} - {item['quantity']}")
import pandas as pd
CSV = 'test.csv'
with open(CSV, 'r', encoding='utf-8') as f:
df = pd.read_csv(f)
row = df[df['user_id'] == 100289]
1252 Latin 1
1250 Latin 2: Eastern Europe
1251 Cyrillic
1253 Greek
1254 Turkish
1255 Hebrew
1256 Arabic
1257 Windows Baltic
1258 Vietnamese
Mac Roman Macintosh Character Set (US Roman)
Python 2.5 to 2.7 support (with experimental Python3 support)
title = "TOML Example"
[owner]
name = "Tom Preston-Werner"
dob = 1979-05-27T07:32:00-08:00
[database]
enabled = true
ports = [ 8000, 8001, 8002 ]
data = [ ["delta", "phi"], [3.14] ]
temp_targets = { cpu = 79.5, case = 72.0 }
[servers]
[servers.alpha]
ip = "10.0.0.1"
role = "frontend"
import toml
data = toml.load('data.toml')
print(data['owner']['name'])
data['owner']['name'] = 'John Doe'
with open('new.toml', 'w', encoding='utf-8') as f:
new_data = toml.dump(data, f)
print(new_data)
import requests
from bs4 import BeautifulSoup
URL = 'https://proproprogs.ru/django/model-mtv-marshrutizaciya-funkcii-predstavleniya'
response = requests.get(URL)
soup = BeautifulSoup(response.text)
data = soup.find('div', class_='content-text')
noneed = data.find('div', class_='video_container')
noneed.decompose()