Ужасно глупо звучит, но такое "решение для организации учёта PDF-документов" умещается в 100 строк на питоне и его даже нейросеть может реализовать:
#!/usr/bin/env python3
"""
pdf_report.py
This script recursively scans a given directory for PDF files and generates a report
listing each PDF's file path, size (in bytes), and page count. The report is printed
as a table to stdout, and can optionally be saved as an XLSX file.
Usage:
python pdf_report.py /path/to/folder [--xlsx output.xlsx]
Dependencies:
- PyPDF2
- tabulate
- openpyxl
Install dependencies:
pip install PyPDF2 tabulate openpyxl
"""
import os
import argparse
from PyPDF2 import PdfReader
from tabulate import tabulate
from openpyxl import Workbook
def get_pdf_info(path):
"""
Given a PDF file path, returns a tuple (size_in_bytes, page_count).
"""
try:
size = os.path.getsize(path)
reader = PdfReader(path)
pages = len(reader.pages)
return size, pages
except Exception:
# If unreadable or corrupted, return None values
return None, None
def scan_directory(root_dir):
"""
Walks through root_dir recursively, finds all .pdf files,
and collects their path, size, and page count.
Returns a list of dicts.
"""
report = []
for dirpath, _, filenames in os.walk(root_dir):
for filename in filenames:
if filename.lower().endswith('.pdf'):
full_path = os.path.join(dirpath, filename)
size, pages = get_pdf_info(full_path)
report.append({
'path': full_path,
'size_bytes': size,
'page_count': pages
})
return report
def print_report(report):
"""
Prints the report as a formatted table to stdout.
"""
headers = ['File Path', 'Size (bytes)', 'Pages']
rows = [(item['path'], item['size_bytes'], item['page_count']) for item in report]
print(tabulate(rows, headers=headers, tablefmt='github'))
def save_xlsx(report, xlsx_path):
"""
Saves the report to an XLSX file at xlsx_path.
"""
wb = Workbook()
ws = wb.active
ws.title = "PDF Report"
# Write header
ws.append(['File Path', 'Size (bytes)', 'Pages'])
# Write data rows
for item in report:
ws.append([item['path'], item['size_bytes'], item['page_count']])
wb.save(xlsx_path)
def main():
parser = argparse.ArgumentParser(description='Generate a PDF report for a directory.')
parser.add_argument('directory', help='Path to the directory to scan')
parser.add_argument('--xlsx', dest='xlsx_output', help='Optional XLSX output path')
args = parser.parse_args()
report = scan_directory(args.directory)
if args.xlsx_output:
save_xlsx(report, args.xlsx_output)
print(f"Report saved to XLSX: {args.xlsx_output}")
else:
print_report(report)
if __name__ == '__main__':
main()
А если нужен гайд, то берите любую книжку по программированию и Python и учитесь)