import time
import tensorflow as tf
import numpy as np
# Check available GPUs
from tensorflow.python.client import device_lib
# Load the Facenet512 model within the context of GPU
with tf.device("/cpu:0"):
model = tf.keras.models.load_model('my_model.h5')
# Example usage of the represent function
image_path = 'faces/Quazar/photo_5395339502534381014_y.jpg'
start_time = time.time()
img = tf.keras.preprocessing.image.load_img(image_path, target_size=(160, 160))
img_array = tf.keras.preprocessing.image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
with tf.device("/gpu:0"):
start_time = time.time()
embedding = model.predict(tf.keras.applications.imagenet_utils.preprocess_input(img_array))
embedding_info_1 = {'img_path': image_path, 'embedding': embedding.flatten()}
fin = time.time() - start_time
print(fin)
print("Embedding for the first image:", embedding_info_1['embedding'])
почему данный код на cpu выполняеться быстрее чем на gpu?
gpu - RTX3060 2.27s
cpu - Intel i7 12700 0.7s
я думал может gpu не правильно настроен но при этом другой код на gpu выполняеться быстрее
import numpy as np
# from tensorflow.examples.tutorials.mnist import input_data
import tensorflow as tf
tf.keras.datasets.mnist
tf.keras.models.Sequential
tf.keras.layers.Dense
tf.keras.layers.Dropout
tf.keras.layers.Flatten
tf.keras.layers.Conv2D
tf.keras.layers.MaxPooling2D
tf.keras.layers.BatchNormalization
tf.keras.optimizers.SGD
tf.keras.optimizers.Adam
tf.keras.models.load_model
tf.keras.utils.to_categorical
tf.keras.backend
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
img_rows, img_cols = 28,28
x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1)
x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1)
input_shape = (img_rows, img_cols, 1)
x_test=x_test.astype('float32')
x_train=x_train.astype('float32')
mean=np.mean(x_train)
std=np.std(x_train)
x_test = (x_test-mean)/std
x_train = (x_train-mean)/std
num_classes=10
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)
print("counts of x_train : {}, y_train : {}, x_test : {}, y_test : {}".format(
len(x_train), len(y_train), len(x_test), len(y_test)))
import time
num_filter=32
num_dense=512
drop_dense=0.7
ac='relu'
learningrate=0.001
with tf.device("/cpu:0"):
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Conv2D(num_filter, (3, 3), activation=ac, input_shape=(28, 28, 1),padding='same'))
model.add(tf.keras.layers.BatchNormalization(axis=-1))
model.add(tf.keras.layers.Conv2D(num_filter, (3, 3), activation=ac,padding='same'))
model.add(tf.keras.layers.BatchNormalization(axis=-1))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2))) # reduces to 14x14x32
model.add(tf.keras.layers.Conv2D(2*num_filter, (3, 3), activation=ac,padding='same'))
model.add(tf.keras.layers.BatchNormalization(axis=-1))
model.add(tf.keras.layers.Conv2D(2*num_filter, (3, 3), activation=ac,padding='same'))
model.add(tf.keras.layers.BatchNormalization(axis=-1))
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2))) # reduces to 7x7x64 = 3136 neurons
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(num_dense, activation=ac))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Dropout(drop_dense))
model.add(tf.keras.layers.Dense(10, activation='softmax'))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'],optimizer=tf.keras.optimizers.Adam(learning_rate=1e-3))
cpu_list=[]
batch_sizes = []
with tf.device("/cpu:0"):
for i in range(0,7):
tf.keras = 8*2**i
print("batch size "+str(tf.keras))
t1 = time.time()
model.fit(x_train, y_train, batch_size=tf.keras, epochs=1, validation_data=(x_test, y_test))
t2 = time.time()
cpu_list.append(int(t2-t1))
batch_sizes.append(tf.keras)
cpu_score = sum(cpu_list)/len(cpu_list)
print(cpu_score)
cpu_score = 48
gpu_score = 17
(чем меньше тем лучше)