所以我正在关注这个笔记本(至少对于数据部分)并且有一个训练有素的模型。
我想做的是真正的未来预测,因为 LSTM 能够为看不见的时间步生成数据(如果它们是有状态的)。
尝试在这里关注这篇文章,因为这是我在这个概念上唯一能找到的。
这是默认功能:
def load_data(filename, sequence_length):
# Read the data file
raw_data = pd.read_csv(filename, dtype=float)
raw_data = raw_data.values
print (raw_data)
# Change all zeros to the number before the zero occurs
for x in range(0, raw_data.shape[0]):
for y in range(0, raw_data.shape[1]):
if (raw_data[x][y] == 0):
raw_data[x][y] = raw_data[x - 1][y]
# Convert the file to a list
data = raw_data.tolist()
# Convert the data to a 3D array (a x b x c)
# Where a is the number of days, b is the window size, and c is the number of features in the data file
result = []
for index in range(len(data) - sequence_length):
result.append(data[index: index + sequence_length])
# Normalizing data by going through each window
# Every value in the window is divided by the first value in the window, and then 1 is subtracted
d0 = np.array(result)
dr = np.zeros_like(d0)
dr[:, 1:, :] = d0[:, 1:, :] / d0[:, 0:1, :] - 1
# Keeping the unnormalized prices for Y_test
# Useful when graphing bitcoin price over time later
start = 2400
end = int(dr.shape[0] + 1)
unnormalized_bases = d0[start:end, 0:1, 7]
# Splitting data set into training (First 90% of data points) and testing data (last 10% of data points)
split_line = round(0.9 * dr.shape[0])
training_data = dr[:int(split_line), :]
# Shuffle the data
np.random.shuffle(training_data)
# Training Data
X_train = training_data[:, :-1]
Y_train = training_data[:, -1]
Y_train = Y_train[:, 7]
# Testing data
X_test = dr[int(split_line):, :-1]
Y_test = dr[int(split_line):, 6, :]
Y_test = Y_test[:, 7]
# Get the day before Y_test's price
Y_daybefore = dr[int(split_line):, 5, :]
Y_daybefore = Y_daybefore[:, 7]
# Get window size and sequence length
sequence_length = sequence_length
window_size = sequence_length - 1 # because the last value is reserved as the y value
return X_train, Y_train, X_test, Y_test, Y_daybefore, unnormalized_bases, window_size
并尝试根据帖子进行重构:
def load_data_future(filename, sequence_length):
# Read the data file
raw_data = pd.read_csv(filename, dtype=float)
raw_data = raw_data.values
print (raw_data)
# Change all zeros to the number before the zero occurs
for x in range(0, raw_data.shape[0]):
for y in range(0, raw_data.shape[1]):
if (raw_data[x][y] == 0):
raw_data[x][y] = raw_data[x - 1][y]
# Convert the file to a list
data = raw_data.tolist()
# Convert the data to a 3D array (a x b x c)
# Where a is the number of days, b is the window size, and c is the number of features in the data file
result = []
for index in range(len(data) - sequence_length):
result.append(data[index: index + sequence_length])
# Normalizing data by going through each window
# Every value in the window is divided by the first value in the window, and then 1 is subtracted
d0 = np.array(result)
dr = np.zeros_like(d0)
dr[:, 1:, :] = d0[:, 1:, :] / d0[:, 0:1, :] - 1
# Keeping the unnormalized prices for Y_test
# Useful when graphing bitcoin price over time later
start = 2400
end = int(dr.shape[0] + 1)
unnormalized_bases = d0[start:end, 0:1, 7]
# Splitting data set into training (First 90% of data points) and testing data (last 10% of data points)
split_line = round(1 * dr.shape[0])
training_data = dr[:int(split_line), :]
# Shuffle the data
np.random.shuffle(training_data)
#Reformatting training data
X_train = training_data[:,:-1]
Y_train = training_data[:,1]
#Y_train = Y_train[:, 1]
# Get the day before Y_test's price
Y_daybefore = dr[int(split_line):, 5, :]
Y_daybefore = Y_daybefore[:, 7]
# Get window size and sequence length
sequence_length = sequence_length
window_size = sequence_length - 1 # because the last value is reserved as the y value
entire_data = dr
return X_train, Y_train, Y_daybefore, unnormalized_bases, window_size,entire_data
我究竟做错了什么??我可以看到每个特征的格式化样本、输出、时间步长。不知道如何操纵它来实现我的目标。
即使是高级指针也会在评论中有所帮助!