import asyncio
import io
import re
import zipfile
import logging
import aiohttp
from bs4 import BeautifulSoup
from aiogram import types
import PIL.Image
from config import VALID_URL_PATTERN
async def validate_url(url: str) -> bool:
return re.match(VALID_URL_PATTERN, url) is not None
async def download_images(url: str) -> list | None:
async with aiohttp.ClientSession() as session:
for attempt in range(3):
await asyncio.sleep(5)
async with session.get(url) as response:
logging.info(f"Fetching page: {url} - Status: {response.status}")
if response.status == 429:
logging.warning("Received 429, retrying...")
await asyncio.sleep(10)
elif response.status != 200:
logging.error(f"Failed to fetch page: {url} - Status: {response.status}")
return None
html = await response.text()
soup = BeautifulSoup(html, 'html.parser')
image_tags = soup.find_all('img')
image_urls = [img.get('src') for img in image_tags if img.get('src') and img.get('src').startswith('http')]
logging.info(f"Found image URLs: {image_urls}")
return image_urls
async def fetch_images(url: str) -> list:
image_urls = await download_images(url)
if not image_urls:
return None
images = []
for idx, img_url in enumerate(image_urls):
logging.info(f"Attempting to fetch image: {img_url}")
img_data = await request_image(img_url, idx)
if img_data is not None:
images.append(img_data)
return images
async def request_image(img_url: str, idx: int) -> tuple:
async with aiohttp.ClientSession() as session:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
async with session.get(img_url, headers=headers) as img_response:
logging.info(f"Image fetch status for {img_url}: {img_response.status}")
if img_response.status != 200:
logging.error(f"Failed to fetch image {img_url}: {img_response.status}")
return None
image_data = await img_response.read()
try:
image = PIL.Image.open(io.BytesIO(image_data))
img_byte_arr = io.BytesIO()
image.save(img_byte_arr, format='PNG')
img_byte_arr.seek(0)
img_name = f"image_{idx + 1}.png"
return img_byte_arr, img_name
except Exception as e:
logging.error(f"Error opening image {img_url}: {e}")
return None
async def create_zip_file(images: list) -> io.BytesIO:
zip_io = io.BytesIO()
with zipfile.ZipFile(zip_io, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for img_data, img_name in images:
zip_file.writestr(img_name, img_data.getvalue())
zip_io.seek(0)
return zip_io
async def handle_url(message: types.Message) -> io.BytesIO:
url = message.text.strip()
logging.info(f"Received URL: {url}")
if not await validate_url(url):
await message.answer("⛔️ Не корректная ссылка!\nСсылка должна вести к конкретному объявлению.")
return None
await message.answer("⏳ Поиск объявления\nпожалуйста подождите")
images = await fetch_images(url)
if images is None:
await message.answer("❌ Не удалось скачать изображения.")
return None
# Создаем ZIP-файл в памяти
zip_io = await create_zip_file(images)
return zip_io
#Config.py
VALID_URL_PATTERN = r'https?://(www\.)?(avito\.ru|cian\.ru|domclick\.ru|yula\.ru|sutki\.ru|farpost\.ru|realty\.ya\.ru)/[^/]+/[^/]+(\?.*)?'