sklearn
在将数据拆分为之前和之后对数据集进行预处理有区别train_test_split
吗?
换句话说,这两种方法是否等效?
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
#standardizing after splitting
X_train, X_test, y_train, y_test = train_test_split(data, target)
sc = StandardScaler().fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)
#standardizing before splitting
data_std = StandardScaler().fit_transform(data)
X_train, X_test, y_train, y_test = train_test_split(data_std, target)