在下面的代码中,我使用序列到序列方法作为异常检测的预测模型。我正在使用的数据集是 ADFA-LD。训练阶段仅使用正常序列完成。预测下一个序列后,我想添加一个分类器,它将检测异常序列(测试数据包含正常和异常序列)。为此,我将oneClass SVM其用作分类器。
我不知道为什么,但有时程序运行正常,有时会显示此错误:
ValueError:使用序列设置数组元素
在线:欺诈预测 = oneclass.predict(np.array(x_test))
编码:
# Train - Test Split
X=lines[['eng','Class1']]
y=lines[['fr','Class2']]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)
#fonction pour charger le data par lot (batch size)
#séquence du générateur utilisée pour entraîner le réseau de neurones
def generate_batch(X = X_train, y = y_train, batch_size = 128):
''' Generate a batch of data '''
encoder_inputs = Input(shape=(None,))
en_x= Embedding(num_encoder_tokens, embedding_size,mask_zero = True)
(encoder_inputs) #convertit chaque mot en un vecteur de taille fix
encoder = GRU(256, return_state=True)
encoder_outputs, state_h = encoder(en_x)
decoder_inputs = Input(shape=(None,))
dex= Embedding(num_decoder_tokens, embedding_size,mask_zero =
True)#convertit chaque mot en un vecteur de taille fix
final_dex= dex(decoder_inputs)
decoder_gru = GRU(256, return_sequences=True)
decoder_gru2 = GRU(256, return_sequences=True,return_state=True)
decoder_outputs= decoder_gru(final_dex,initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
start = time.time()
model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=
['acc'])
train_samples = len(X_train)
val_samples = len(X_test)
batch_size = 128
epochs = 1
mod_train=model.fit_generator(generator = generate_batch(X_train, y_train,
batch_size = batch_size),
steps_per_epoch = train_samples//batch_size,
epochs=epochs,
validation_data = generate_batch(X_test, y_test, batch_size
= batch_size),
validation_steps = val_samples//batch_size)
df_history = pd.DataFrame (mod_train.history)
print(df_history)
#####################################
#modeles de prédiction
# define inference encoder
encoder_model = Model(encoder_inputs,encoder_states)
# define inference decoder
decoder_state_input_h = Input(shape=(256,))
decoder_states_inputs = [decoder_state_input_h]
final_dex2= dex(decoder_inputs)
print(final_dex2)
decoder_outputs2,state_h2 = decoder_gru2(final_dex2,
initial_state=decoder_states_inputs)
decoder_states2 = [state_h2]
decoder_outputs2 = decoder_dense(decoder_outputs2)
decoder_model = Model([decoder_inputs] + decoder_states_inputs,
[decoder_outputs2] + decoder_states2)
def decode_sequence(input_seq):
# Encode the input as state vectors.
states_value = encoder_model.predict(input_seq)
print(len(states_value),states_value.shape)
# Generate empty target sequence of length 1.
target_seq = np.zeros((1,1))
# Populate the first word of target sequence with the start word.
target_seq[0,0] = target_token_index['0000']
stop_condition = False
decoded_sentence = ''
while not stop_condition:
output_tokens,h= decoder_model.predict([target_seq]+[states_value])
sampled_token_index = np.argmax(output_tokens[0, -1, :])
sampled_char = reverse_target_char_index[sampled_token_index]
decoded_sentence +=' '+sampled_char
# Exit condition: either hit max length
# or find stop character.
if (sampled_char =='1111' or len(decoded_sentence.split(' '))>
max_len_fr-2):#critere d'arret (la fin de la phrase en français)
stop_condition = True
# Update the target sequence (of length 1).
target_seq = np.zeros((1,1))
target_seq[0] = sampled_token_index
# Update states
states_value = h
return decoded_sentence
y_true=[]
NN=[]
test_gen = generate_batch(X_train, y_train, batch_size = 1)
k=0
while k<300:
(input_seq, actual_output), _ = next(test_gen)
decoded_sentence = decode_sequence(input_seq)
exp=map(float, str(decoded_sentence).split())
NN.append(list(exp))
k+=1
#seq_preditN = np.transpose(NN)
seq_preditN = NN
"""------Classification--- """
seq_preditA=[]
path1="abnormal.csv"
abnor_data=load_data(path1)
abnor_data.insert(1, 'Class1',1)
abnor_data.insert(3, 'Class2',1)
X_attck=abnor_data[['eng','Class1']]
y_attck=abnor_data[['fr','Class2']]
#X_trainA, X_testA, y_trainA, y_testA = train_test_split(X, y, test_size =
0.2)
test_gen = generate_batch(X_attck, y_attck, batch_size = 1)
k=-1
k+=1
while k<300:
(input_seq, actual_output), _ = next(test_gen)
decoded_sentence = decode_sequence(input_seq)
print('Input English sentence:', X_attck[k:k+1].values[0])
print('Actual Marathi Translation:', y_attck.fr[k:k+1].values[0])
print('Predicted Marathi Translation:', decoded_sentence)
exp=map(float, str(decoded_sentence).split())
seq_preditA.append(list(exp))
k+=1
#seq_preditN = seq_preditN[0:300]
#seq_preditA = np.transpose(seq_preditA)
X_test = seq_preditA + seq_preditN
X_test = np.transpose(X_test)
#X_test = np.concatenate((seq_preditA, seq_preditN), axis =0)
print ("X_test")
print (X_test)
print("kkkkkkk", len(X_test))
oneclass = svm.OneClassSVM(kernel='linear', gamma=0.001, nu=0.95)
y=np.array([np.array(item) for item in seq_preditN])
oneclass.fit(np.array(y))
fraud_pred = oneclass.predict(np.array(X_test))
unique, counts = np.unique(fraud_pred, return_counts=True)
print (np.asarray((unique, counts)).T)
fraud_pred = pd.DataFrame(fraud_pred)
print("fraus.......",fraud_pred)
y1=y_attck.Class2[0:300]
y2=y_test.Class2[0:300]
Y_test=y1.append(y2)
print(Y_test)
如果有人可以帮助我,我将不胜感激。