Доюрый день!
Пишу программу для распознавания картинок.
Столкнулся с проблемой, что чем больше шаблонов тем больше времени тратиться на распознавание.
Попробовал использовать в потоках, но кажется это не особо правильное решение.
Пожалуйста, подскажите как ускорить работу скрипта, а именно перебор шаблонов из списка
from pdf2image import convert_from_path
import template_search
import final_config
import connect_motya
import threading
import pytesseract
import cv2
import numpy as np
from PIL import Image
import os
import re
import conf_find
from time import time
date_r_1 = final_config.date_r_list[0]
date_r_2 = final_config.date_r_list[1]
folder_r = connect_motya.list_folder(date_r_1, date_r_2)
print('Кол-во папок: ' + str(len(folder_r)))
folder_result = ['ggg/']
print(folder_result)
for i in folder_result:
folder_s = i
#print(folder_s)
os.chdir('./' + folder_s)
path = os.getcwd()
q = os.listdir('.')
q = sorted(q, key=len)
#print(q)
os.chdir('C:/Users/SorokaMI/PycharmProjects/motya/dgr')
path_img = './dgr_img'+'_'+folder_s.replace('/pdf/', '')
os.makedirs(path_img)
print('Файлов в папке: ' + str(len(q)))
print('')
count =0
for file in q:
start_time = time()
JPG_file = 'C:/Users/SorokaMI/PycharmProjects/motya/' + folder_s + '/' + file
print(file + ' размер: ' + str(os.path.getsize(JPG_file)) + ' байт')
result_list = []
result_list2 = []
def search_1(ppp, JPG_file, path_img, file):
#rint(str(ppp) + 'старт')
k2 = JPG_file
img_rgb = cv2.imread(k2)
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
temp_path = 'C:/Users/SorokaMI/PycharmProjects/motya/dgr/template/1/' + str(ppp) + '.jpg'
template = cv2.imread(temp_path, 0)
w, h = template.shape[::-1]
res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
threshold = conf_find.threshold
loc = np.where(res >= threshold)
f = set()
for pt in zip(*loc[::-1]):
cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 1)
sensitivity = 100
f.add((round(pt[0] / sensitivity), round(pt[1] / sensitivity)))
found_count = len(f)
if len(f) > 0:
print('Найдено совпадений по шаблону ' + str(ppp) + ': ' + str(found_count))
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
print(k2)
im = Image.open(k2)
print((top_left[0], top_left[1], bottom_right[0], bottom_right[1]))
cropped2 = im.crop((top_left[0]-200, top_left[1], bottom_right[0], bottom_right[1]))
path_img_crop = path_img + 'res_' + file
cropped2.save(path_img_crop, 'JPEG')
z1 = cv2.imread(path_img + 'res_' + file)
a1 = cv2.rotate(z1, cv2.cv2.ROTATE_90_COUNTERCLOCKWISE)
cv2.imwrite(path_img_crop, a1)
a1 = cv2.resize(a1, None, fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
a1 = cv2.cvtColor(a1, cv2.COLOR_BGR2GRAY)
kernel = np.ones((1, 1), np.uint8)
a1 = cv2.dilate(a1, kernel, iterations=1)
a1 = cv2.erode(a1, kernel, iterations=1)
a1 = cv2.GaussianBlur(a1, (5, 5), 0)
a1 = cv2.threshold(a1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
text = pytesseract.image_to_string(a1)
# print(text)
result = re.sub("\D", "", text)
r1 = result[:-11]
c1 = len(result) - 11
r2 = result[c1:]
if len(r2) <= 7:
r2 = '00000000000'
rr = r1 + '/' + r2
result_list.append(r2)
# return rr, r2
#print(str(ppp) + 'закончил')
qs1 = os.listdir(path='C:/Users/SorokaMI/PycharmProjects/motya/dgr/template/1/')
list_1 = [i for i in range(1, len(qs1) + 1)]
threads = []
for ppp in list_1:
t = threading.Thread(target=search_1, args=(ppp, JPG_file, path_img, file,))
threads.append(t)
t.start()
for t in threads:
# print(t)
t.join()
# print('Работа потоков завершена!')
# print(result_list)
if len(result_list) == 0:
def search_2(ppp, JPG_file, path_img, file):
k2 = JPG_file
img_rgb = cv2.imread(k2)
img_gray = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY)
temp_path = 'C:/Users/SorokaMI/PycharmProjects/motya/dgr/template/7/' + str(ppp) + '.jpg'
template = cv2.imread(temp_path, 0)
w, h = template.shape[::-1]
res = cv2.matchTemplate(img_gray, template, cv2.TM_CCOEFF_NORMED)
threshold = conf_find.threshold
loc = np.where(res >= threshold)
f = set()
for pt in zip(*loc[::-1]):
# print('Найдено совпадений вар 2: ' + str(found_count))
cv2.rectangle(img_rgb, pt, (pt[0] + w, pt[1] + h), (0, 255, 255), 1)
sensitivity = 100
f.add((round(pt[0] / sensitivity), round(pt[1] / sensitivity)))
found_count = len(f)
if len(f) > 0:
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = max_loc
bottom_right = (top_left[0] + w, top_left[1] + h)
im = Image.open(k2)
print((top_left[0], top_left[1], bottom_right[0], bottom_right[1]))
cropped2 = im.crop((top_left[0], top_left[1], bottom_right[0]+200, bottom_right[1]))
path_img_crop = path_img + 'res_' + file
cropped2.save(path_img_crop, 'JPEG')
z1 = cv2.imread(path_img + 'res_' + file)
a1 = cv2.rotate(z1, cv2.cv2.ROTATE_90_CLOCKWISE)
cv2.imwrite(path_img_crop, a1)
a1 = cv2.resize(a1, None, fx=1.5, fy=1.5, interpolation=cv2.INTER_CUBIC)
a1 = cv2.cvtColor(a1, cv2.COLOR_BGR2GRAY)
kernel = np.ones((1, 1), np.uint8)
a1 = cv2.dilate(a1, kernel, iterations=1)
a1 = cv2.erode(a1, kernel, iterations=1)
a1 = cv2.GaussianBlur(a1, (5, 5), 0)
a1 = cv2.threshold(a1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
text = pytesseract.image_to_string(a1)
result = re.sub("\D", "", text)
r1 = result[:-11]
c1 = len(result) - 11
r2 = result[c1:]
if len(r2) <= 7:
r2 = '00000000000'
rr = r1 + '/' + r2
# print(rr)
result_list2.append(r2)
qs2 = os.listdir(path='C:/Users/SorokaMI/PycharmProjects/motya/dgr/template/7/')
list_2 = [i for i in range(1, len(qs2) + 1)]
threads2 = []
for ppp in list_2:
t = threading.Thread(target=search_2, args=(ppp, JPG_file, path_img, file,))
threads2.append(t)
t.start()
for t in threads2:
# print(t)
t.join()
# print('Работа потоков завершена!')
# print(result_list2)
if len(result_list2) == 0:
print('файл: ' + file + '\n' + 'Документ не распознан')
count += 1
print('Обработано: ' + str(count))
else:
ind = q.index(file) + 1
print('файлы: ' + file + ' и ' + q[ind] + '\n' + 'СНИЛС: ' + result_list2[0])
connect_motya.insert_draft_results(result_list2[0], folder_s, file, q[ind],str(os.path.getsize(JPG_file)))
q.pop(ind)
count += 2
print('Обработано: ' + str(count))
else:
ind = q.index(file) + 1
print('файлы: ' + file + ' и ' + q[ind] + '\n' + 'СНИЛС: ' + result_list[0])
connect_motya.insert_draft_results(result_list[0], folder_s, file, q[ind],str(os.path.getsize(JPG_file)))
q.pop(ind)
count += 2
print('Обработано: ' + str(count))
end_time = time()
time_taken = str(round(end_time - start_time)) # time_taken is in seconds
print('Время распознования файла: ' + time_taken + ' секунд')
print('')