from pdf2image import convert_from_path
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams, LTTextBox
from pdfminer.pdfinterp import PDFPageInterpreter, PDFResourceManager
from pdfminer.pdfpage import PDFPage
from PIL import Image, ImageColor, ImageDraw
fille = 'D:\\Python\\TG\\SharagaBot\\na_19_11_1_korpus.pdf'
find = "З-3-9Б-19В"
fp = open(fille, 'rb')
rsrcmgr = PDFResourceManager()
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
pages = PDFPage.get_pages(fp)
for page in pages:
print(f'Processing pars {fille} ...')
interpreter.process_page(page)
layout = device.get_result()
for lobj in layout:
if isinstance(lobj, LTTextBox):
text = lobj.get_text()
text = text.replace(' ', '')
if find in text:
print(lobj.bbox)
x0, y0_orig, x1, y1_orig = lobj.bbox
y0 = page.mediabox[3] - y1_orig
y1 = page.mediabox[3] - y0_orig
x_1 = (lobj.bbox[0])
y_1 = (page.mediabox[3] - y0_orig)
images = convert_from_path(fille, 576)
for image in images:
fille = fille.replace(".pdf", ".png")
image.save(fille)
picture = Image.open(fille)
draw = ImageDraw.Draw(picture)
draw.rectangle(((8*(x_1-15), 8*(y_1-30)), 8*(x_1 + 100), 8*(y_1 + 150)), outline=ImageColor.getrgb("red"), width=8)
#picture = picture.crop((8*(x_1-15), 8*(y_1-20), 8*(x_2+60), 8*(y_2+145)))
picture.show()
picture.save(fille)
print(f"Saving ressult as {fille} ...")
break
else:
print("Sorry... I didnt find anything :(")
fille = 'D:\\Python\\TG\\SharagaBot\\na_19_11_1_korpus.pdf'
find = "З-3-9Б-19В"
fp = open(fille, 'rb')
rsrcmgr = PDFResourceManager()
laparams = LAParams()
device = PDFPageAggregator(rsrcmgr, laparams=laparams)
interpreter = PDFPageInterpreter(rsrcmgr, device)
pages = PDFPage.get_pages(fp)
for page in pages:
print(f'Processing pars {fille} ...')
interpreter.process_page(page)
layout = device.get_result()
for lobj in layout:
if isinstance(lobj, LTTextBox):
text = lobj.get_text()
text = text.replace(' ', '')
if find in text:
print(lobj.bbox)
x0, y0_orig, x1, y1_orig = lobj.bbox
y0 = page.mediabox[3] - y1_orig
y1 = page.mediabox[3] - y0_orig
x_1 = (lobj.bbox[0])
y_1 = (page.mediabox[3] - y0_orig)
images = convert_from_path(fille, 576)
for image in images:
fille = fille.replace(".pdf", ".png")
image.save(fille)
picture = Image.open(fille)
draw = ImageDraw.Draw(picture)
draw.rectangle(((8*(x_1-15), 8*(y_1-30)), 8*(x_1 + 100), 8*(y_1 + 150)), outline=ImageColor.getrgb("red"), width=8)
#picture = picture.crop((8*(x_1-15), 8*(y_1-20), 8*(x_2+60), 8*(y_2+145)))
picture.show()
picture.save(fille)
print(f"Saving ressult as {fille} ...")
break
else:
print("Sorry... I didnt find anything :(")