При использовании GPU в следующем коде:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import *
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical
from pickle import *
gpus = tf.config.list_physical_devices('GPU')
if gpus:
# Restrict TensorFlow to only allocate 1GB of memory on the first GPU
try:
tf.config.set_logical_device_configuration(
gpus[0],
[tf.config.LogicalDeviceConfiguration(memory_limit=8000)])
logical_gpus = tf.config.list_logical_devices('GPU')
print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
except RuntimeError as e:
# Virtual devices must be set before GPUs have been initialized
print(e)
#preprocessing database
f=open('aneks.sql')
f=f.read()
f=f[221:]
f.lower()
f=f.split('\n')
aneks=''
for i in f:
el=i[25:]
while len(el)>10 and not el[0].isalpha():
el=el[1:]
el=el[:-3]
el=el.replace('\\n','')
aneks+=' '+el
max_words_count=10000
aneks=aneks.replace('\ufeff','')
aneks=aneks.replace('"','')
tokenizer=Tokenizer(num_words=max_words_count,lower=True,split=' ',filters="\n\t±§!@#$%^&*()_+=-;.,:№{}[]'|~`<>/?",char_level=False)
tokenizer.fit_on_texts([aneks])
dist=list(tokenizer.word_counts.items())
data=tokenizer.texts_to_sequences([aneks])
res=np.array(data[0])
inp_words=7
n=res.shape[0]-inp_words
#generating X and Y arrays
X=np.array([res[i:i+inp_words] for i in range(n)])
Y=to_categorical(res[inp_words:], num_classes=max_words_count)
model=keras.Sequential()
model.add(Embedding(max_words_count, 128, input_length=inp_words))
model.add(LSTM(256,activation='tanh', return_sequences=True))
model.add(LSTM(64,activation='tanh'))
model.add(Dense(max_words_count,activation='softmax'))
model.summary()
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
#model=keras.models.load_model('aneks')
with tf.device("/gpu:0"):
history=model.fit(X,Y,batch_size=16,epochs=10,validation_split=0.1)
plt.plot(history.history['accuracy'],color='blue')
plt.plot(history.history['val_accuracy'],color='orange')
plt.grid(True)
plt.show()
model.save('aneks')
#model=keras.models.load_model('aneks')
def build_phrase(phrase,words_num):
res=phrase
data=tokenizer.texts_to_sequences([phrase])[0]
for i in range(words_num):
x=data[i:i+inp_words]
inp=np.expand_dims(x,axis=0)
pred=model.predict(inp)
indx= pred.argmax(axis=1)[0]
data.append(indx)
res+= ' '+tokenizer.index_word[indx]
return res
while True:
a=input()
print(build_phrase(a,20))
Возникает следующая ошибка:
Traceback (most recent call last):
File "/Users/mzalik/mambaforge/envs/mlp/main.py", line 67, in <module>
history=model.fit(X,Y,batch_size=16,epochs=10,validation_split=0.1)
File "/Users/mzalik/mambaforge/envs/mlp/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 67, in error_handler
raise e.with_traceback(filtered_tb) from None
File "/Users/mzalik/mambaforge/envs/mlp/lib/python3.8/site-packages/tensorflow/python/framework/constant_op.py", line 102, in convert_to_eager_tensor
return ops.EagerTensor(value, ctx.device_name, dtype)
tensorflow.python.framework.errors_impl.InternalError: Failed copying input tensor from /job:localhost/replica:0/task:0/device:CPU:0 to /job:localhost/replica:0/task:0/device:GPU:0 in order to run _EagerConst: Dst tensor is not initialized.
Размер обучающих данных достаточно большой и при его сильном уменьшении (примерно до 1000 из 46000) такой ошибки не возникает, также ошибки не возникает при использовании /cpu:0 в model.fit() . Можно ли как-то использовать GPU вместо CPU без уменьшения обучающей выборки?
Использую MacBook Air M1 8/512, tensorflow устанавливал по этому гайду:
https://caffeinedev.medium.com/how-to-install-tens...