import pytesseract
from PIL import Image
import os
# Укажите полный путь к Tesseract
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'
# Установите TESSDATA_PREFIX
os.environ['TESSDATA_PREFIX'] = r'C:\Program Files\Tesseract-OCR'
# Укажите путь к папке с изображениями
image_folder = r"D:\Новая папка\PythonProject1\.venv\frames\input_video1"
output_file = r"D:\Новая папка\PythonProject1\output.txt"
# Открываем файл для записи
with open(output_file, 'w', encoding='utf-8') as f:
for filename in os.listdir(image_folder):
if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
img_path = os.path.join(image_folder, filename)
if not os.path.exists(img_path):
print(f"Файл не найден: {img_path}")
continue
try:
img = Image.open(img_path)
text = pytesseract.image_to_string(img, lang='rus')
if text.strip(): # Проверка, что текст не пустой
f.write(text + '\n')
else:
print(f"Текст не извлечен из {filename}")
print(f"Текст извлечен из {filename}")
except pytesseract.pytesseract.TesseractError as e:
print(f"Ошибка Tesseract при обработке {filename}: {e}")
except Exception as e:
print(f"Ошибка при обработке {filename}: {e}")
print("Текст успешно извлечен и сохранен в", output_file)
Ошибка Tesseract при обработке output_0028.png: (1, 'Error opening data file C:\\Program Files\\Tesseract-OCR/rus.traineddata Please make sure the TESSDATA_PREFIX environment variable is set to your "tessdata" directory. Failed loading language \'rus\' Tesseract couldn\'t load any languages! Could not initialize tesseract.')
[03.02.2025 / 16:31:55] Task " is completed. Result: 31/35. User is: 1369
[03.02.2025 / 14:14:47] Task " is completed. Result: 14/17. User is: 1322
[31.01.2025 / 16:27:18] Task " is completed. Result: 22/25. User is: 1322
[31.01.2025 / 16:01:32] Task " is completed. Result: 9/9. User is: 1369
[30.01.2025 / 15:52:53] Task " is completed. Result: 45/58. User is: 1369
[29.01.2025 / 15:12:49] Task " is completed. Result: 40/44. User is: 1369