import os
import time
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
# Load your model
model = keras.models.load_model('MobileNet.h5')
# Create an ImageDataGenerator for your data
datagen = ImageDataGenerator(rescale=1./255)
# Define the path to your data and the batch size
data_path = 'test'
batch_size = 1
# Create a generator for your data
data_generator = datagen.flow_from_directory(
data_path,
target_size=(224, 224),
batch_size=batch_size,
class_mode='categorical',
shuffle=False
)
# Calculate the number of batches
num_batches = len(data_generator)
# Measure the time it takes to make predictions on all batches
start_time = time.time()
for i in range(num_batches):
x_batch, y_batch = next(data_generator)
prediction = model.predict(x_batch)
end_time = time.time()
# Calculate the average inference time per batch
total_time = end_time - start_time
avg_time_per_batch = total_time / num_batches
print(f"Average inference time per batch: {avg_time_per_batch} seconds")
В каких случаях лучше использовать квантование динамического диапазона (с меньшим размером модели, но более длительным inference) и в каких floa16 квантование (больший размер модели, но более быстрый inference)?