Вообщем, парсил раньше, все было норм
Начинаю парсить другие данные с того же веб-сайта, просто пишет None. Хотя они там есть.
Мой код:
Кодimport json
import time
import os
from PIL import Image
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("--disable-images")
options.add_argument("--disable-webgl")
options.add_argument("--enable-javascript")
options.add_experimental_option("excludeSwitches", ["enable-logging"])
options.add_argument("--enable-chrome-browser-cloud-management")
with open('./cookies.json', 'r') as f:
cookies = json.load(f)
def get_source(url):
driver = webdriver.Chrome(options=options)
driver.get('https://excalibur-craft.ru/index.php?do=profile&name=' + url)
for cookie in cookies:
driver.add_cookie(cookie)
driver.refresh()
time.sleep(0.50)
source_code = driver.page_source
soup = BeautifulSoup(source_code, 'html.parser')
forum_link_element = soup.select_one('a#forum-tab')
forum_link = forum_link_element['href'] if forum_link_element else None
exp = soup.select_one('html > body > div:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(5) > div:nth-of-type(2) > p')
clan = soup.select_one('html > body > div:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(6) > div > label > a')
status = soup.select_one('html > body > div:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(4) > div:nth-of-type(2) > p')
online_on_month = soup.select_one('html > body > div:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(2) > div:nth-of-type(2) > p')
online_on_all = soup.select_one('html > body > div:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(3) > div:nth-of-type(2) > p')
registration_date = soup.select_one('html > body > div:nth-of-type(2) > div:nth-of-type(1) > div > div:nth-of-type(1) > div > div:nth-of-type(2) > div:nth-of-type(2) > div:nth-of-type(1) > div:nth-of-type(1) > div:nth-of-type(2) > p')
if exp and status and online_on_month and online_on_all and registration_date:
driver.get(forum_link)
time.sleep(1)
driver.get(forum_link)
time.sleep(1)
rank = soup.select_one('html > body > main > div > div > div > div > div > div > section > div:nth-of-type(1) > div > div > h3')
reputation = soup.select_one('html > body > main > div > div > div > div > div > div > section > div:nth-of-type(1) > div > a:nth-of-type(2) > p')
publications = soup.select_one('html > body > main > div > div > div > div > div > div > section > div:nth-of-type(1) > ul > li:nth-of-type(1) > h4')
if clan:
return (f"Опыт: {exp}\n"
f"Клан: {clan}\n"
f"Статус в игре: {status}\n"
f"Онлайн за месяц: {online_on_month}\n"
f"Онлайн за все время: {online_on_all}\n"
f"Дата регистрации: {registration_date}\n"
f"Кол-во публикаций: {publications}"
f"Репутация: {reputation}"
f"Ранг: {rank}")
else:
return (f"Опыт: {exp}\n"
f"Статус в игре: {status}\n"
f"Онлайн за месяц: {online_on_month}\n"
f"Онлайн за все время: {online_on_all}\n"
f"Дата регистрации: {registration_date}\n")
else:
return 1337
print(get_source("KirillSafe"))
Всякая другая шняга для поиска тех нужных данных(в коде они отмечены как rank, reputation, publications):
spoiler
XPATH - /html/body/main/div/div/div/div/div/div/section/div[1]/div/a[2]/p
/html/body/main/div/div/div/div/div/div/section/div[1]/div/div/h3
/html/body/main/div/div/div/div/header/div[3]/ul/li[1]
Укажите на ошибку :(