我是 ANN 的新手。我正在尝试几种“简单”算法,以了解 ANN 可以(或不能)用于什么以及如何使用。我玩过一次 Conv2d 并让它成功识别图像。现在我正在研究趋势线分析。我已经成功地训练了一个求解线性方程的网络。现在我想看看它是否可以被训练来解决在公式.
无论我改变什么参数,或者密集层的数量,我得到的损失和验证损失的值都很高,并且预测是不正确的。
是否有可能解决这个方程,以及使用什么网络?如果不可能,为什么不呢?我不是要解决实际问题,而是要建立对 ANN 的理解和直觉。
请参阅下面我尝试的代码
#region Imports
from __future__ import absolute_import, division, print_function, unicode_literals
import math
import numpy as np
import tensorflow as tf
from tensorflow.keras import models, optimizers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, Lambda
import tensorflow.keras.backend as K
#endregion
#region Constants
learningRate = 0.01
epochs: int = 1000
batch_size = None
trainingValidationFactor = 0.75
nrOfSamples = 100
activation = None
#endregion
#region Function definitions
def CreateNetwork(inputDimension):
model = Sequential()
model.add(Dense(2, input_dim=2, activation=activation))
model.add(Dense(64, use_bias=True, activation=activation))
model.add(Dense(32, use_bias=True, activation=activation))
model.add(Dense(1))
adam = optimizers.Adam(learning_rate=learningRate)
# sgd = optimizers.SGD(lr=learningRate, decay=1e-6, momentum=0.9, nesterov=True)
# adamax = optimizers.Adamax(learning_rate=learningRate)
model.compile(loss='mse', optimizer=adam)
return model
def SplitDataForValidation(factor, data, labels):
upperBoundary = int(len(data) * factor)
trainingData = data[:upperBoundary]
trainingLabels = labels[:upperBoundary]
validationData = data[upperBoundary:]
validationLabels = labels[upperBoundary:]
return ((trainingData, trainingLabels), (validationData, validationLabels))
def Train(network, training, validation):
trainingData, trainingLabels = training
history = network.fit(
trainingData
,trainingLabels
,validation_data=validation
,epochs=epochs
,batch_size=batch_size
)
return history
def subtractMean(data):
mean = np.mean(data)
data -= mean
return mean
def rescale(data):
max = np.amax(data)
factor = 1 / max
data *= factor
return factor
def Normalize(data, labels):
dataScaleFactor = rescale(data)
dataMean = subtractMean(data)
labels *= dataScaleFactor
labelsMean = np.mean(labels)
labels -= labelsMean
def Randomize(data, labels):
rng_state = np.random.get_state()
np.random.shuffle(data)
np.random.set_state(rng_state)
np.random.shuffle(labels)
def CreateTestData(nrOfSamples):
data = np.zeros(shape=(nrOfSamples,2))
labels = np.zeros(nrOfSamples)
for i in range(nrOfSamples):
for j in range(2):
randomInt = np.random.randint(1, 5)
data[i, j] = (randomInt * i) + 10
labels[i] = data[i, 0] + math.pow(data[i, 1], 2)
Randomize(data, labels)
return (data, labels)
#endregion
allData, allLabels = CreateTestData(nrOfSamples)
Normalize(allData, allLabels)
training, validation = SplitDataForValidation(trainingValidationFactor, allData, allLabels)
inputDimension = np.size(allData, 1)
network = CreateNetwork(inputDimension)
history = Train(network, training, validation)
prediction = network.predict([
[2, 2], # Should be 2 + 2 * 2 = 6
[4, 7], # Should be 4 + 7 * 7 = 53
[23, 56], # Should be 23 + 56 * 56 = 3159
[128,256] # Should be 128 + 256 * 256 = 65664
])
print(str(prediction))