Maximum Mean Discrepancy (MMD) as a measure of discrepancy between samples is employed as a test statistic for two-sample hypothesis test of equal distributions. Kernel matrix \(K\) is a symmetric square matrix that is positive semidefinite.
mmd2test(K, label, method = c("b", "u"), mc.iter = 999)a (list) object of S3 class htest containing:
a test statistic.
\(p\)-value under \(H_0\).
alternative hypothesis.
name of the test.
name(s) of provided kernel matrix.
Gretton A, Borgwardt KM, Rasch MJ, Schölkopf B, Smola A (2012). “A Kernel Two-Sample Test.” J. Mach. Learn. Res., 13, 723–773. ISSN 1532-4435.
## small test for CRAN submission
dat1 <- matrix(rnorm(60, mean= 1), ncol=2) # group 1 : 30 obs of mean 1
dat2 <- matrix(rnorm(50, mean=-1), ncol=2) # group 2 : 25 obs of mean -1
dmat <- as.matrix(dist(rbind(dat1, dat2))) # Euclidean distance matrix
kmat <- exp(-(dmat^2)) # build a gaussian kernel matrix
lab <- c(rep(1,30), rep(2,25)) # corresponding label
mmd2test(kmat, lab) # run the code !
#>
#> Kernel Two-sample Test with Maximum Mean Discrepancy
#>
#> data: kmat
#> MMD = 0.43367, p-value = 0.001
#> alternative hypothesis: two distributions are not equal
#>
if (FALSE) { # \dontrun{
## WARNING: computationally heavy.
# Let's compute empirical Type 1 error at alpha=0.05
niter = 496
pvals1 = rep(0,niter)
pvals2 = rep(0,niter)
for (i in 1:niter){
dat = matrix(rnorm(200),ncol=2)
lab = c(rep(1,50), rep(2,50))
lbd = 0.1
kmat = exp(-lbd*(as.matrix(dist(dat))^2))
pvals1[i] = mmd2test(kmat, lab, method="b")$p.value
pvals2[i] = mmd2test(kmat, lab, method="u")$p.value
print(paste("iteration ",i," complete..",sep=""))
}
# Visualize the above at multiple significance levels
alphas = seq(from=0.001, to=0.999, length.out=100)
errors1 = rep(0,100)
errors2 = rep(0,100)
for (i in 1:100){
errors1[i] = sum(pvals1<=alphas[i])/niter
errors2[i] = sum(pvals2<=alphas[i])/niter
}
opar <- par(no.readonly=TRUE)
par(mfrow=c(1,2), pty="s")
plot(alphas, errors1, "b", main="Biased Estimator Error",
xlab="alpha", ylab="error", cex=0.5)
abline(a=0,b=1, lwd=1.5, col="red")
plot(alphas, errors2, "b", main="Unbiased Estimator Error",
xlab="alpha", ylab="error", cex=0.5)
abline(a=0,b=1, lwd=1.5, col="blue")
par(opar)
} # }