再现线性判别分析投影图

机器算法验证 r 机器学习 分类 判别分析
2022-03-22 12:23:41

我正在与线性判别分析(LDA)中的投影点作斗争。许多关于多元统计方法的书籍都用下图说明了 LDA 的思想。

图1

问题描述如下。首先,我们需要绘制决策边界,添加垂直线,然后在其上绘制数据点的投影。我想知道如何将投影点添加到垂直线。

有什么建议/指示吗?

2个回答

判别轴(点在图 1 上投影到的轴)由的第一个特征向量给出。如果只有两个类,这个特征向量与,其中是类质心。归一化这个向量(或获得的特征向量)以获得单位轴向量这足以绘制轴。W1BW1(m1m2)miv

要将(中心)点投影到该轴上,您只需计算这里是一个线性投影Xvvvvv

以下是您的保管箱和 LDA 投影中的数据样本:

LDA 投影

这是生成此图的 MATLAB 代码(根据要求):

% # data taken from your example
X = [-0.9437    -0.0433; -2.4165    -0.5211; -2.0249    -1.0120; ...
    -3.7482 0.2826; -3.3314 0.1653; -3.1927 0.0043; -2.2233 -0.8607; ...
    -3.1965 0.7736; -2.5039 0.2960; -4.4509 -0.3555];
G = [1 1 1 1 1 2 2 2 2 2];

% # overall mean
mu = mean(X);

% # loop over groups
for g=1:max(G)
    mus(g,:) = mean(X(G==g,:)); % # class means
    Ng(g) = length(find(G==g)); % # number of points per group
end

Sw = zeros(size(X,2)); % # within-class scatter matrix
Sb = zeros(size(X,2)); % # between-class scatter matrix
for g=1:max(G)
    Xg = bsxfun(@minus, X(G==g,:), mus(g,:)); % # centred group data
    Sw = Sw + transpose(Xg)*Xg;
    Sb = Sb + Ng(g)*(transpose(mus(g,:) - mu)*(mus(g,:) - mu));
end

St = transpose(bsxfun(@minus,X,mu)) * bsxfun(@minus,X,mu); % # total scatter matrix
assert(sum(sum((St-Sw-Sb).^2)) < 1e-10, 'Error: Sw + Sb ~= St')

% # LDA
[U,S] = eig(Sw\Sb);

% # projecting data points onto the first discriminant axis
Xcentred = bsxfun(@minus, X, mu);
Xprojected = Xcentred * U(:,1)*transpose(U(:,1));
Xprojected = bsxfun(@plus, Xprojected, mu);

% # preparing the figure
colors = [1 0 0; 0 0 1];
figure
hold on
axis([-5 0 -2.5 2.5])
axis square

% # plot discriminant axis
plot(mu(1) + U(1,1)*[-2 2], mu(2) + U(2,1)*[-2 2], 'k')
% # plot projection lines for each data point
for i=1:size(X,1)
    plot([X(i,1) Xprojected(i,1)], [X(i,2) Xprojected(i,2)], 'k--')
end
% # plot projected points
scatter(Xprojected(:,1), Xprojected(:,2), [], colors(G, :))
% # plot original points
scatter(X(:,1), X(:,2), [], colors(G, :), 'filled')

和“我的”解决方案。非常感谢@ttnphns 和@amoeba!

set.seed(2014)
library(MASS)
library(DiscriMiner) # For scatter matrices
library(ggplot2)
library(grid)
# Generate multivariate data
mu1 <- c(2, -3)
mu2 <- c(2, 5)
rho <- 0.6
s1 <- 1
s2 <- 3
Sigma <- matrix(c(s1^2, rho * s1 * s2, rho * s1 * s2, s2^2), byrow = TRUE, nrow = 2)
n <- 50
# Multivariate normal sampling
X1 <- mvrnorm(n, mu = mu1, Sigma = Sigma)
X2 <- mvrnorm(n, mu = mu2, Sigma = Sigma)
X <- rbind(X1, X2)
# Center data
Z <- scale(X, scale = FALSE)
# Class variable
y <- rep(c(0, 1), each = n)

# Scatter matrices
B <- betweenCov(variables = X, group = y)
W <- withinCov(variables = X, group = y)

# Eigenvectors
ev <- eigen(solve(W) %*% B)$vectors
slope <- - ev[1,1] / ev[2,1]
intercept <- ev[2,1]

# Create projections on 1st discriminant
P <- Z %*% ev[,1] %*% t(ev[,1])

# ggplo2 requires data frame
my.df <- data.frame(Z1 = Z[, 1], Z2 = Z[, 2], P1 = P[, 1], P2 = P[, 2])

plt <- ggplot(data = my.df, aes(Z1, Z2))
plt <- plt + geom_segment(aes(xend = P1, yend = P2), size = 0.2, color = "gray")
plt <- plt + geom_point(aes(color = factor(y)))
plt <- plt + geom_point(aes(x = P1, y = P2, colour = factor(y)))
plt <- plt + scale_colour_brewer(palette = "Set1")
plt <- plt + geom_abline(intercept = intercept, slope = slope, size = 0.2)
plt <- plt + coord_fixed()
plt <- plt + xlab(expression(X[1])) + ylab(expression(X[2]))
plt <- plt + theme_bw()
plt <- plt + theme(axis.title.x = element_text(size = 8),
                   axis.text.x  = element_text(size = 8),
                   axis.title.y = element_text(size = 8),
                   axis.text.y  = element_text(size = 8),
                   legend.position = "none")
plt

在此处输入图像描述