TensorFlow MLP 比 Keras 差(TF 后端)

数据挖掘 张量流 喀拉斯
2022-03-04 16:23:08

我对这个领域有点陌生,所以我开始在 Keras 中修改一些模型(使用 Tensorflow 后端)。但是最近我开始迁移到纯 Tensorflow 方法,但我没有得到好的结果,这很奇怪,因为我在 Keras 中使用 TF 后端,所以我期待类似的结果。所以,最肯定的是,我在实现中遇到了一些错误,但我无法弄清楚它是什么。

我正在尝试使用 ADAM 实现一个 7 层 MLP,一个线性输出。为方便起见,我从模型中删除了所有正则化,因此我预计模型会过拟合,Keras 模型会发生这种情况,但 TF 模型不会。如果有人能指出 Tensorflow 实现中的问题,我将不胜感激。

这是 github 存储库: https ://github.com/makalaia/Tensorflow-Benchmark

凯拉斯代码:

import keras
import numpy as np
import time
import matplotlib.pyplot as plt

from keras.layers import Dense
from keras.models import Sequential
from pandas import read_csv


def calculate_rmse(real, predict):
    m = len(real)
    return np.sqrt(np.sum(np.power((real - predict), 2)) / m)


test_size = 150
df = read_csv('data/mastigadin.csv', header=None)
df.set_index(list(df)[0], inplace=True)

y_total = df.iloc[:, -1:].values
x_total = df.iloc[:, :-1].values
y_train = y_total[:-test_size, :]
x_train = x_total[:-test_size, :]
y_test = y_total[-test_size:, :]
x_test = x_total[-test_size:, :]

tempo = time.time()

# Neural net
epochs = 200
batch_size = 64
optmizer = keras.optimizers.Adam()
model = Sequential()
model.add(Dense(256, input_shape=(x_train.shape[1],)))
model.add(Dense(256, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(256, activation='relu'))
model.add(Dense(1))

# fit
model.compile(loss='mean_squared_error', optimizer=optmizer)
model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(x_test, y_test), verbose=2)
print('TIME: ' + str(time.time() - tempo))

# predict
y_trained = model.predict(x_train)
y_tested = model.predict(x_test)

# errors
error_train = calculate_rmse(y_train, y_trained)
print('TRAIN: RMSE - ' + str(error_train))
error_test = calculate_rmse(y_test, y_tested)
print('\nVAL:   RMSE - ' + str(error_test))

# plot
plt.plot(y_total, label='REAL DATA')
plt.plot(y_trained, label='TRAINED DATA')
plt.plot(range(len(y_train), len(y_total)), y_tested, label='TEST DATA')
plt.legend()
plt.title('KERAS')
plt.show()

张量流代码:

import numpy as np
import tensorflow as tf
import time
import matplotlib.pyplot as plt
from pandas import read_csv


def calculate_rmse(real, predict):
    m = len(real)
    return np.sqrt(np.sum(np.power((real - predict), 2)) / m)


test_size = 150
df = read_csv('data/mastigadin.csv', header=None)
df.set_index(list(df)[0], inplace=True)

y_total = df.iloc[:, -1:].values
x_total = df.iloc[:, :-1].values
y_train = y_total[:-test_size, :]
x_train = x_total[:-test_size, :]
y_test = y_total[-test_size:, :]
x_test = x_total[-test_size:, :]
n_samples = x_train.shape[0]

tempo = time.time()
epochs = 200
batch_size = 64

n_input = 36
n_output = 1
n_hidden = 256

# tf Graph input
X = tf.placeholder("float", [None, n_input])
Y = tf.placeholder("float", [None, n_output])

# Store layers weight & bias
weights = {
    'h1': tf.get_variable('h1', shape=[n_input, n_hidden]),
    'h2': tf.get_variable('h2', shape=[n_hidden, n_hidden]),
    'h3': tf.get_variable('h3', shape=[n_hidden, n_hidden]),
    'h4': tf.get_variable('h4', shape=[n_hidden, n_hidden]),
    'h5': tf.get_variable('h5', shape=[n_hidden, n_hidden]),
    'h6': tf.get_variable('h6', shape=[n_hidden, n_hidden]),
    'h7': tf.get_variable('h7', shape=[n_hidden, n_hidden]),
    'out': tf.Variable(tf.random_normal([n_hidden, n_output]))
}
biases = {
    'b1': tf.Variable(tf.random_normal([n_hidden])),
    'b2': tf.Variable(tf.random_normal([n_hidden])),
    'b3': tf.Variable(tf.random_normal([n_hidden])),
    'b4': tf.Variable(tf.random_normal([n_hidden])),
    'b5': tf.Variable(tf.random_normal([n_hidden])),
    'b6': tf.Variable(tf.random_normal([n_hidden])),
    'b7': tf.Variable(tf.random_normal([n_hidden])),
    'out': tf.Variable(tf.random_normal([n_output]))
}


# Create model
def multilayer_perceptron(x):
    layer_1 = tf.nn.relu(tf.add(tf.matmul(x, weights['h1']), biases['b1']))
    layer_2 = tf.nn.relu(tf.add(tf.matmul(layer_1, weights['h2']), biases['b2']))
    layer_3 = tf.nn.relu(tf.add(tf.matmul(layer_2, weights['h3']), biases['b3']))
    layer_4 = tf.nn.relu(tf.add(tf.matmul(layer_3, weights['h4']), biases['b4']))
    layer_5 = tf.nn.relu(tf.add(tf.matmul(layer_4, weights['h5']), biases['b5']))
    layer_6 = tf.nn.relu(tf.add(tf.matmul(layer_5, weights['h6']), biases['b6']))
    layer_7 = tf.nn.relu(tf.add(tf.matmul(layer_6, weights['h7']), biases['b7']))
    out_layer = tf.matmul(layer_7, weights['out']) + biases['out']
    return out_layer


# Construct model
pred = multilayer_perceptron(X)

# Define loss and optimizer
cost = tf.reduce_mean(tf.squared_difference(pred, Y))
optimizer = tf.train.AdamOptimizer()
train_op = optimizer.minimize(cost)

# Initializing the variables
init = tf.global_variables_initializer()

display_step = 1
with tf.Session() as sess:
    sess.run(init)

    # Training cycle
    for epoch in range(epochs):
        avg_cost = 0.
        total_batch = int(n_samples / batch_size)
        # Loop over all batches
        tp = time.time()
        for i in range(total_batch):
            batch_x = x_train[i * batch_size:(i + 1) * batch_size]
            batch_y = y_train[i * batch_size:(i + 1) * batch_size]
            # Run optimization op (backprop) and cost op (to get loss value)
            _, c = sess.run([train_op, cost], feed_dict={X: batch_x, Y: batch_y})
            # Compute average loss
            avg_cost += c / total_batch
        # Display logs per epoch step
        if epoch % display_step == 0:
            print("Epoch:", '%04d' % (epoch + 1), "cost={:.9f}".format(avg_cost), "TIME: %.2f" % (time.time() - tp))
    print('TIME: ' + str(time.time() - tempo))

    # Test model
    y_trained = sess.run(pred, feed_dict={X: x_train})
    y_tested = sess.run(pred, feed_dict={X: x_test})

    error_train = calculate_rmse(y_train, y_trained)
    print('TRAIN: RMSE - ' + str(error_train))
    error_test = calculate_rmse(y_test, y_tested)
    print('\nVAL:   RMSE - ' + str(error_test))

plt.plot(y_total, label='REAL DATA')
plt.plot(y_trained, label='TRAINED DATA')
plt.plot(range(len(y_train), len(y_total)), y_tested, label='TEST DATA')
plt.legend()
plt.title('TENSORFLOW')
plt.show()
1个回答

这部分错误非常严重,您将得到糟糕的结果:

# Store layers weight & bias
weights = {
    'h1': tf.get_variable('h1', shape=[n_input, n_hidden]),
    'h2': tf.get_variable('h2', shape=[n_hidden, n_hidden]),
    'h3': tf.get_variable('h3', shape=[n_hidden, n_hidden]),
    'h4': tf.get_variable('h4', shape=[n_hidden, n_hidden]),
    'h5': tf.get_variable('h5', shape=[n_hidden, n_hidden]),
    'h6': tf.get_variable('h6', shape=[n_hidden, n_hidden]),
    'h7': tf.get_variable('h7', shape=[n_hidden, n_hidden]),
    'out': tf.Variable(tf.random_normal([n_hidden, n_output]))
}

问题是初始化。您的隐藏层根本没有初始化。输出层可能以错误的比例初始化。为了匹配 Keras,你的初始化器应该是这样的:

tf.random_normal([n_in, n_out]) * (math.sqrt(2.0/(n_in + n_out))

或者您可以使用内置的 Xavier 初始化程序:

tf.contrib.layers.xavier_initializer()

此外,您可能会删除偏差值的初始化程序。