当我调用 Keras'fit_generator()并传入我创建的自定义生成器类时,我在 spew 中看到“Epoch 1/1”,仅此而已。它就挂在那里,并且永远不会调用生成器。我知道这一点,因为我将打印语句放入getitem从未打印过的语句中。
此数据生成器是Shervine Amidi 的教程示例的修改版本,该示例生成器继承自 Keras 序列对象:
class DataGenerator(keras.utils.Sequence):
'Generates data for Keras'
def __init__(self, batchID,
batch_size = 32,
dim = (32,32,32)):
self.dim = dim
self.batch_size = batch_size
self.datafile_IDs = []
self.labelfile_IDs = []
self.batchID = batchID
self.DataDir = "data/"
self.BatchDir = ""
DataDir = self.DataDir
BatchDir = DataDir + batchID + "/"
self.BatchDir = BatchDir
path = BatchDir + "datafilenames_" + batchID + ".pkl"
fd = open(path, "rb")
self.datafile_IDs = pkl.load(fd)
fd.close()
path = BatchDir + "labelfilenames_" + batchID + ".pkl"
fd = open(path, "rb")
self.labelfile_IDs = pkl.load(fd)
fd.close()
def __len__(self):
'Denotes the number of batches per epoch'
return int(
np.floor(len(self.datafile_IDs) / self.batch_size))
def __getitem__(self, index):
'Generate one batch of data'
datafn = self.datafile_IDs[index]
labelfn = self.labelfile_IDs[index]
print("In getitem: index = %d, datafn = %s, labelfn = %s" % (
index, datafn, labelfn))
batch_size = self.batch_size
# Initialize data arrays for this batch
X = np.empty((self.batch_size, *self.dim))
y = np.empty((self.batch_size), dtype=int)
BatchDir = self.BatchDir
# Load data
datafn = BatchDir + datafn
X = np.load(datafn)
# Load label
labelfn = BatchDir + labelfn
y = np.load(labelfn)
return X, y
genbatchfiles(df_short, batchID = "short", batch_size = 20)
params = {'batchID': "short", 'batch_size': 20, 'dim': (100, 10088)}
dg = DataGenerator(**params)
time_series_length, input_dim, output_dim = 100, 10088, 1
model = Sequential()
model.add(LSTM(20, input_shape=(time_series_length, input_dim)))
model.add(Dense(output_dim, activation='relu'))
model.compile(loss='mean_squared_error',
optimizer='sgd',
metrics=['accuracy'])
model.fit_generator(generator = dg,
steps_per_epoch = 5,
use_multiprocessing = True,
workers = 6,
verbose = 2)
根据下面回复的人的请求,我正在为用于genbatchfiles生成输入数据的函数添加代码:
def genbatchfiles(df, batchID, batch_size = 5):
DataDir = "data/"
BatchDir = DataDir + batchID + "/"
# If directory does not already exist
# for this batch, create it.
if not os.path.isdir(BatchDir):
mkdir(BatchDir)
# Column "sigccm" (signal cross correlation matrix) holds
# a series each of whose elements is a 100 X 10088 array.
# Column "Attack" holds labels with values 1's and 0's that
# indicate whether a cell phone spoofing signal is present
sigccm = df["sigccm"]
attack = df["Attack"]
idcnt = 0
datafiles = []
labelfiles = []
# number of records to process
nrecs = len(sigccm)
for i in range(0, nrecs, batch_size):
if i + batch_size > nrecs:
upperbound = nrecs
else:
upperbound = i + batch_size
x = np.stack(sigccm[i : upperbound])
y = np.stack(attack[i : upperbound])
fnm = 'data_{:s}{:02d}.npy'.format(batchID, idcnt)
datafiles.append(fnm)
np.save(BatchDir + fnm, x)
fnm = 'labels_{:s}{:02d}.npy'.format(batchID, idcnt)
labelfiles.append(fnm)
np.save(BatchDir + fnm, y)
idcnt = idcnt + 1
path = BatchDir + "datafilenames_" + batchID + ".pkl"
fd = open(path, "wb")
pkl.dump(datafiles, fd)
fd.close()
path = BatchDir + "labelfilenames_" + batchID + ".pkl"
fd = open(path, "wb")
pkl.dump(labelfiles, fd)
fd.close()
以下是如何生成一些与我输入 Keras LSTM 的数据具有相同形状的虚假数据的方法:
x = np.random.rand(150, 100, 10088).tolist()
df = pd.DataFrame({"sigccm" : x})
bools = np.round(np.random.rand(150), decimals=0)
attack = pd.Series(bools)
df["Attack"] = attack
其次是:
genbatchfiles(df, batchID = "phony", batch_size = 20)
params = {'batchID': "phony", 'batch_size': 20, 'dim': (100, 10088)}
dg = DataGenerator(**params)
其次是 LSTM 代码。
当我将这些虚假数据输入 LSTM 时,Keras 挂在生成器中的方式与挂在真实数据中的方式完全相同,因此任何希望这样做的人都应该能够重现我看到的错误。
警告:即使这个玩具数据集也很大。每条记录大约 1 兆字节,而我上面生成的虚假数据有 150 条记录,因此它包含大约 150MB。