我需要了解以下代码中的标签拆分是如何发生的:
import keras
import librosa
import librosa.feature
import librosa.display
import glob
import numpy as np
from keras.models import Sequential
from keras.layers import Dense , Activation
from keras.utils.np_utils import to_categorical
def extract_features_song(f):
y, _ = librosa.load(f)
# get mfcc
mfcc = librosa.feature.mfcc(y)
# make value between 1 -1
mfcc /= np.amax(np.absolute(mfcc))
return np.ndarray.flatten(mfcc)[:25000]
def extrac_features_and_labels():
all_features = []
all_labels = []
genres = ['blues' , 'classical', 'country' , 'disco' , 'hiphop', 'jazz', 'metal' , 'pop', 'reggae', 'rock']
for genre in genres:
sound_files = glob.glob('genres/'+genre+'/*.au')
print ('prcoessing %d songs in %s genre'% (len(sound_files), genre))
for f in sound_files:
features =extract_features_song(f)
all_features.append(features)
all_labels.append(genre)
# one hot encoding
label_uniq_ids , label_row_ids = np.unique(all_labels, return_inverse= True)
label_row_ids = label_row_ids.astype(np.int32, copy= False)
onehot_labels = to_categorical(label_row_ids, len(label_uniq_ids))
return np.stack(all_features), onehot_labels
features , labels = extrac_features_and_labels()
print (np.shape(features))
print (np.shape(labels))
training_split = 0.8
alldata = np.column_stack(features , labels)
np.random.shuffle(alldata)
splitidx = int(len(alldata))*training_split
train , test = alldata[:splitidx,:], alldata[splitidx:, :]
print (np.shape(train))
print (np.shape(test))
# the concerned part: begin
train_input = train [:,:-10]
train_labels = train [:,-10:]
test_input = test [:,:-10]
test_labels = test [:,-10:]
#the concerned part: end
print (np.shape(train_input))
print (np.shape(train_labels))
输出如下:
(1000, 25000)
(1000, 10)
(800, 25010)
(200, 25010)
(800, 25000)
(800, 10)
现在,当他 - 代码指导员 - 将两个数组堆叠在一起并打乱它们时,他不能确定最后十个元素是标签,对吗?如果是这样,他是如何做到的,这将导致错误。