下面是我自己在 MATLAB 中实现最小二乘回归算法的方法。请您看一下并告诉我这是否有意义;如果它确实应该做什么?
编辑:请注意注释的命令。
function [MSE_train_mean,MSE_train_std,MSE_test_mean,MSE_test_std,w_star] = perform_lsr(X,Y,Z)
% 604 days for the training set (29/05/2009 - 18/10/2011)
% 145 days for the evaluation set (19/10/2011 - 16/05/2012)
Xtran = X(1:end-145,:);
Ytran = Y(1:end-145,:);
Ztran = Z(1:end-145,:);
Xeval = X(end-144:end,:);
Yeval = Y(end-144:end,:);
Zeval = Z(end-144:end,:);
% Set number of runs.
runs = 20;
% Set number of folds.
folds = 5;
% Initialize auxiliary matrices.
MSE_train = zeros(runs,folds);
MSE_test = zeros(runs,folds);
w_star = zeros(runs,folds,size(Xtran,2));
% Perform runs.
for r=1:runs,
% Split original dataset into training and test set.
split_assignments = cross_val_split(folds,size(Xtran,1));
% Perform folds.
for f=1:folds,
% Assign explanatory variables (x).
x_train = Xtran((split_assignments(:,f)==0),:);
x_test = Xtran((split_assignments(:,f)==1),:);
% Assign respond variable (y).
y_train = Ytran((split_assignments(:,f)==0),:);
y_test = Ytran((split_assignments(:,f)==1),:);
% Retrieve size of matrices.
[l_train_n,l_train_m] = size(x_train);
[l_test_n,l_test_m] = size(x_test);
% Estimate parameters (w0,w1,...) of the 1st order model from training set.
w_star(r,f,:) = (x_train' * x_train) \ (x_train' * y_train);
w_star_temp = w_star(r,f,:);
w_star_temp = w_star_temp(:);
% Apply the learned weights on both training and test sets and compute the
% corresponding MSEs.
MSE_train(r,f) = (1 / l_train_n) * (w_star_temp' * (x_train') * x_train * w_star_temp - 2 * y_train' * x_train * w_star_temp + y_train' * y_train);
MSE_test(r,f) = (1 / l_test_n) * (w_star_temp' * (x_test') * x_test * w_star_temp - 2 * y_test' * x_test * w_star_temp + y_test' * y_test);
end
end
% % Plot both training and test sets' MSEs as a function of runs and folds.
% figure, mesh(MSE_train), title('Training Set`s MSEs vs Runs and Folds');
% figure, mesh(MSE_test), title('Testing Set`s MSEs vs Runs and Folds');
%
% % Plot both training and test sets' mean of MSEs accompanied by their
% % corresponding std of MSEs.
% figure, errorbar(1:runs,mean(MSE_train,2),std(MSE_train,0,2));
% figure, errorbar(1:runs,mean(MSE_test,2),std(MSE_test,0,2));
% Average over folds and then over runs.
MSE_train_mean = mean(mean(MSE_train,2),1);
MSE_train_std = std(mean(MSE_train,2),0,1);
MSE_test_mean = mean(mean(MSE_test,2),1);
MSE_test_std = std(mean(MSE_test,2),0,1);
w_star_temp = mean(mean(w_star,2),1);
w_star = w_star_temp(:);
% % Calculate in-sample residuals.
% y_hat_tran = Xtran * w_star;
% e_tran = Ytran - y_hat_tran;
% % Check that they sum up to zero.
% fprintf('In-sample residuals sum up to %i.\n',sum(e_tran));
% % Finally, plot them.
% figure, scatter(1:size(e_tran,1),e_tran);
%
% % Calculate out-of-sample residuals.
% y_hat_eval = Xeval * w_star;
% e_eval = Yeval - y_hat_eval;
% % Check that they sum up to zero.
% fprintf('Out-of-sample residuals sum up to %i.\n',sum(e_eval));
% % Finally, plot them.
% figure, scatter(1:size(e_eval,1),e_eval);
% Print summary statistics.
fprintf('Over %i runs, %i folds, and regarding the training set, the mean and standard deviation is %.10f and %.10f respectively.\n',runs,folds,MSE_train_mean,MSE_train_std);
fprintf('Over %i runs, %i folds, and regarding the testing set, the mean and standard deviation is %.10f and %.10f respectively.\n',runs,folds,MSE_test_mean,MSE_test_std);
end