@juniorpie

Почему скачиваются миниатюры изображений вместо их полного размера?

import asyncio
import io
import re
import zipfile
import logging
import aiohttp
from bs4 import BeautifulSoup
from aiogram import types
import PIL.Image
from config import VALID_URL_PATTERN

async def validate_url(url: str) -> bool:
    return re.match(VALID_URL_PATTERN, url) is not None

async def download_images(url: str) -> list | None:
    async with aiohttp.ClientSession() as session:
        for attempt in range(3):
            await asyncio.sleep(5)
            async with session.get(url) as response:
                logging.info(f"Fetching page: {url} - Status: {response.status}")
                if response.status == 429:
                    logging.warning("Received 429, retrying...")
                    await asyncio.sleep(10)
                elif response.status != 200:
                    logging.error(f"Failed to fetch page: {url} - Status: {response.status}")
                    return None

                html = await response.text()
                soup = BeautifulSoup(html, 'html.parser')
                image_tags = soup.find_all('img')

                image_urls = [img.get('src') for img in image_tags if img.get('src') and img.get('src').startswith('http')]
                logging.info(f"Found image URLs: {image_urls}")
                return image_urls

async def fetch_images(url: str) -> list:
    image_urls = await download_images(url)
    if not image_urls:
        return None

    images = []
    for idx, img_url in enumerate(image_urls):
        logging.info(f"Attempting to fetch image: {img_url}")
        img_data = await request_image(img_url, idx)
        if img_data is not None:
            images.append(img_data)

    return images

async def request_image(img_url: str, idx: int) -> tuple:
    async with aiohttp.ClientSession() as session:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        async with session.get(img_url, headers=headers) as img_response:
            logging.info(f"Image fetch status for {img_url}: {img_response.status}")
            if img_response.status != 200:
                logging.error(f"Failed to fetch image {img_url}: {img_response.status}")
                return None

            image_data = await img_response.read()
            try:
                image = PIL.Image.open(io.BytesIO(image_data))
                img_byte_arr = io.BytesIO()
                image.save(img_byte_arr, format='PNG')
                img_byte_arr.seek(0)
                img_name = f"image_{idx + 1}.png"
                return img_byte_arr, img_name
            except Exception as e:
                logging.error(f"Error opening image {img_url}: {e}")
                return None

async def create_zip_file(images: list) -> io.BytesIO:
    zip_io = io.BytesIO()
    with zipfile.ZipFile(zip_io, 'w', zipfile.ZIP_DEFLATED) as zip_file:
        for img_data, img_name in images:
            zip_file.writestr(img_name, img_data.getvalue())
    zip_io.seek(0)
    return zip_io

async def handle_url(message: types.Message) -> io.BytesIO:
    url = message.text.strip()
    logging.info(f"Received URL: {url}")

    if not await validate_url(url):
        await message.answer("⛔️ Не корректная ссылка!\nСсылка должна вести к конкретному объявлению.")
        return None

    await message.answer("⏳ Поиск объявления\nпожалуйста подождите")

    images = await fetch_images(url)
    if images is None:
        await message.answer("❌ Не удалось скачать изображения.")
        return None

    # Создаем ZIP-файл в памяти
    zip_io = await create_zip_file(images)
    return zip_io


#Config.py
VALID_URL_PATTERN = r'https?://(www\.)?(avito\.ru|cian\.ru|domclick\.ru|yula\.ru|sutki\.ru|farpost\.ru|realty\.ya\.ru)/[^/]+/[^/]+(\?.*)?'
  • Вопрос задан
  • 162 просмотра
Пригласить эксперта
Ваш ответ на вопрос

Войдите, чтобы написать ответ

Похожие вопросы