您好,提前感谢您的任何回答。我正在为多类分类问题构建 NN。我已经通过 Word2vec 对我的数据进行了预训练,并生成了一个 300 维的带有值的向量。我有 6 个班级..我的数据包含 15000 行(x300 维度)。我的第一个问题是:
单位数是多少?它是我们可以从理论中提取的东西吗?我也设法获得了 0.85-0.90 的训练集准确度,但验证集的准确度总是很低:0.22-0.25。我不知道该怎么做,因为我是这类东西的新手。我的代码是:
import numpy as np
import pandas as pd
import keras
from sklearn.preprocessing import Imputer
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import Dense,Dropout
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
from keras import regularizers
dataset = pd.read_csv('word2vec.csv')
X = dataset.iloc[:, 0:300].values
y = dataset.iloc[:, 300].values
imputer = Imputer(missing_values = 'NaN', strategy = 'mean', axis = 0)
imputer = imputer.fit(X[:, 0:300])
X[:, 0:300] = imputer.transform(X[:, 0:300])
counter = 0
for iterations in range(1):
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
classifier = Sequential()
classifier.add(Dense(units=70, kernel_initializer='uniform',kernel_regularizer=regularizers.l2(0.0001),
activity_regularizer=regularizers.l1(0.001), activation='relu', input_dim=300))
classifier.add(Dropout(0.3))
# classifier.add(Dense(units=70, kernel_initializer='uniform', activation='relu'))
classifier.add(Dense(units=6, kernel_initializer='uniform', activation='softmax'))
classifier.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
history = classifier.fit(X_train, y_train,validation_split=0.1, batch_size=10, epochs=100,shuffle=True)
y_pred = classifier.predict(X_test)
# y_pred = (y_pred > 0.1666)
classifier.summary()
counter+=1
print(classifier.layers)
print("Running RNN with Dropout Layer")
print("Number of layers used: "+str(len(classifier.layers)))
# list all data in history
print(history.history.keys())
# summarize history for accuracy
plt.plot(history.history['acc'])
plt.plot(history.history['val_acc'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()
# summarize history for loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'test'], loc='upper left')
plt.show()