事实上,它们确实给出了相同的结果。我不确定你是如何比较它们的,但这里有一个例子:
# Create example data
set.seed(123)
# create nominal variable
nom <- factor(rep(letters[1:3], each=10))
# create numeric variables
vars <- as.matrix(replicate(17, rnorm(30)))
df <- data.frame(nom, vars)
library(cluster)
daisy.mat <- as.matrix(daisy(df, metric="gower"))
library(StatMatch)
gower.mat <- gower.dist(df)
# you can look directly to see the numbers are the same
head(daisy.mat, 3)
head(gower.mat, 3)
# now identical will return FALSE, why?
identical(daisy.mat, gower.mat)
> identical(daisy.mat, gower.mat)
[1] FALSE
# This is because there is of extremely small differences
# in the numbers returned by the different functions
max(abs(daisy.mat - gower.mat))
> max(abs(daisy.mat - gower.mat))
[1] 5.551115e-17
# Using all.equal has a higher tolerance threshold
all.equal(daisy.mat, gower.mat, check.attributes = F)
> all.equal(daisy.mat, gower.mat, check.attributes = F)
[1] TRUE
既然我知道您正在向daisy函数添加一个额外的组件,那么仍然有一个解决方案。它位于gower.dist. 关键部分在文档的第一部分,即模式逻辑列将被视为二进制非对称变量。所以你要确保你的数据结构是合适的。
set.seed(123)
# create nominal variable
nom <- factor(rep(letters[1:3], each=10))
# create binary variables
bin <- as.matrix(replicate(5, rep(sample(c(0,1), 30, replace=T))))
# create numeric variables
vars <- as.matrix(replicate(9, rnorm(30)))
df <- data.frame(nom, bin, vars)
# You can see that the columns are not 'logical' types
# We need to change this
str(df)
> str(df)
'data.frame': 30 obs. of 15 variables:
$ nom : Factor w/ 3 levels "a","b","c": 1 1 1 1 1 1 1 1 1 1 ...
$ X1 : num 0 1 0 1 1 0 1 1 1 0 ...
$ X2 : num 1 1 1 1 0 0 1 0 0 0 ...
$ X3 : num 1 0 0 0 1 0 1 1 1 0 ...
$ X4 : num 0 1 0 1 0 0 1 0 0 1 ...
$ X5 : num 1 0 0 0 0 1 0 0 0 1 ...
$ X1.1: num 1.026 -0.285 -1.221 0.181 -0.139 ...
$ X2.1: num -0.045 -0.785 -1.668 -0.38 0.919 ...
$ X3.1: num 1.13 -1.46 0.74 1.91 -1.44 ...
$ X4.1: num 0.298 0.637 -0.484 0.517 0.369 ...
$ X5.1: num 1.997 0.601 -1.251 -0.611 -1.185 ...
$ X6 : num 0.0597 -0.7046 -0.7172 0.8847 -1.0156 ...
$ X7 : num -0.0886 1.0808 0.6308 -0.1136 -1.5329 ...
$ X8 : num 0.134 0.221 1.641 -0.219 0.168 ...
$ X9 : num 0.704 -0.106 -1.259 1.684 0.911 ...
# make columns logical
df[,2:6] <- sapply(df[,2:6], FUN=function(x) ifelse(x==1, TRUE, FALSE))
# now the columns are the correct types
> str(df)
'data.frame': 30 obs. of 15 variables:
$ nom : Factor w/ 3 levels "a","b","c": 1 1 1 1 1 1 1 1 1 1 ...
$ X1 : logi FALSE TRUE FALSE TRUE TRUE FALSE ...
$ X2 : logi TRUE TRUE TRUE TRUE FALSE FALSE ...
$ X3 : logi TRUE FALSE FALSE FALSE TRUE FALSE ...
$ X4 : logi FALSE TRUE FALSE TRUE FALSE FALSE ...
$ X5 : logi TRUE FALSE FALSE FALSE FALSE TRUE ...
$ X1.1: num 1.026 -0.285 -1.221 0.181 -0.139 ...
$ X2.1: num -0.045 -0.785 -1.668 -0.38 0.919 ...
$ X3.1: num 1.13 -1.46 0.74 1.91 -1.44 ...
$ X4.1: num 0.298 0.637 -0.484 0.517 0.369 ...
$ X5.1: num 1.997 0.601 -1.251 -0.611 -1.185 ...
$ X6 : num 0.0597 -0.7046 -0.7172 0.8847 -1.0156 ...
$ X7 : num -0.0886 1.0808 0.6308 -0.1136 -1.5329 ...
$ X8 : num 0.134 0.221 1.641 -0.219 0.168 ...
$ X9 : num 0.704 -0.106 -1.259 1.684 0.911 ...
# now you can do your calls
daisy.mat <- as.matrix(daisy(df, metric="gower", type=list(asymm=c(2,3,4,5,6))))
gower.mat <- gower.dist(df)
# and you can see that the results are the same
all.equal(as.matrix(daisy.mat), gower.mat, check.attributes = F)
[1] TRUE