Python
0
Вклад в тег
token = Tokenizer(7229)
token.fit_on_texts(df.Message)
text = token.texts_to_sequences(df.Message)
text = sequence.pad_sequences(text, maxlen=75)
mes = []
for i in df['Message']:
mes.append(i.split())
model = Word2Vec(mes, size=300, window=3, min_count=1, workers=16)
x_train, x_test, y_train, y_test = train_test_split(text, y, test_size=0.2, stratify=y)
kmodel = Sequential()
kmodel.add(model.wv.get_keras_embedding(train_embeddings=True))
kmodel.add(Dropout(0.2))
kmodel.add(Conv1D(50,
3,
padding='valid',
activation='relu',
strides=1))
kmodel.add(GlobalMaxPooling1D())
kmodel.add(Dense(250))
kmodel.add(Dropout(0.2))
kmodel.add(Activation('relu'))
kmodel.add(Dense(1))
kmodel.add(Activation('sigmoid'))
kmodel.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
kmodel.fit(x_train, y_train,
batch_size=32,
epochs=5,
validation_data=(x_test, y_test)
train_embeddings=TrueЗаметно увеличивает точность, как и время обучения.