from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import skipgrams
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense
sentences = [line.strip() for line in open('alice_in_wonderland.txt') if line != '\n']
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)
corpus = tokenizer.texts_to_sequences(sentences)
#count of words
V = len(tokenizer.word_index)+1
dim = 200
window_size = 5
model = Sequential()
model.add(Dense(200, input_shape = V))
model.add(Dense(V, activation='softmax'))
model.compile(loss= 'categorical_crossentropy', optimizer = 'rmsprop')
model.summary()
def generate_data(corpus, window_size, V):
for words in corpus:
couples, labels = skipgrams(words, V, window_size, negative_samples=0, shuffle=True)
if couples:
X, y = zip(*couples)
X = np_utils.to_categorical(X, V)
y = np_utils.to_categorical(y, V)
yield X, y
for epoch in range(10):
loss = 0.
for x, y in generate_data(corpus, window_size, V):
loss += model.train_on_batch(x, y)
print(epoch, loss)
Выдает вот такую ошибку:
Traceback (most recent call last):
File "prep.py", line 18, in
model.add(Dense(200, input_shape = V))
File "C:\Users\adels\AppData\Local\Programs\Python\Python36\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
return func(*args, **kwargs)
File "C:\Users\adels\AppData\Local\Programs\Python\Python36\lib\site-packages\keras\layers\core.py", line 839, in __init__
super(Dense, self).__init__(**kwargs)
File "C:\Users\adels\AppData\Local\Programs\Python\Python36\lib\site-packages\keras\engine\base_layer.py", line 147, in __init__
batch_size,) + tuple(kwargs['input_shape'])
TypeError: 'int' object is not iterable