What is Directional Median Analysis?
It is a new unsupervised data exploration method concept, fitting directions explaining structure of the majority of data objects, regardless of the distribution and variance of whole data set. Theory and algorithm is explained in (10.1016/j.chemolab.2016.03.005).
Below you can find source code snippet in R for computing DMA. It centers data with l1median_BFGS, so it requires pcaPP package to be loaded. The returned object is analogous to prcomp() result, therefore standard commands such as summary(), biplot() etc. can be used.
dma <- function(x, center = l1median_BFGS(x)$par, scale = FALSE, tol = 1e-06, maxiter = 200) { x <- as.matrix(x) x <- scale(x, center = center, scale = scale) cen <- attr(x, "scaled:center") sc <- attr(x, "scaled:scale") k <- min(dim(x)) X <- x P <- matrix(NA, ncol(x), k) for (i in 1:nrow(x)) { cp <- sqrt(crossprod(x[i, ])) if (cp != 0) x[i, ] <- x[i, ]/cp } for (i in 1:k) { w <- rep(1, nrow(x)) t <- 10 it <- 0 while ((t > tol) && (it < maxiter)) { s <- svd(diag(w) %*% x) r <- sqrt(apply((x %*% s$v[, -1])^2, 1, sum)) r[r < 0.01] <- 0.01 r[r > 0.98] <- 0.98 ow <- w w <- r * sqrt(1 - r^2) w <- 1/w w <- w/sqrt(crossprod(w)) t <- sum((ow - w)^2) it <- it + 1 } P[, i] = s$v[, 1] x <- x - x %*% tcrossprod(P[, i]) } dimnames(P) <- list(colnames(x), paste0("DM", seq_len(ncol(P)))) T <- X %*% P r <- list(x = T, sdev = apply(T, 2, sd), rotation = P, center = if (is.null(cen)) FALSE else cen, scale = if (is.null(sc)) FALSE else sc) class(r) <- "prcomp" return(r) }
The MATLAB snippet below does not center the data, so user must manually compute L1Median and center the data before calling this function. Scores are returned as T, loadings as P and S contain standard deviations.
function [T,P,S] = dma(x,tol,maxiter) if nargin<3 maxiter = 100; end if nargin<2 tol = 1e-6; end k = min(size(x)); X = x; P = repmat(NaN, size(x,2), k); for i=1:size(x,1) cp = sqrt(x(i,:)*x(i,:)'); if (cp ~= 0) x(i,:) = x(i,:)./cp; end end for i=1:k w = repmat(1, 1, size(x,1)); t = 10; it = 0; while ((t > tol) && (it < maxiter)) [U,S,V] = svd(diag(w)*x); r = sqrt(sum((x*V(:,2:end)).^2,2)); r(r < 0.01) = 0.01; r(r > 0.98) = 0.98; ow = w; w = r .* sqrt(1 - r.^2); w = (1./w)'; w = w./sqrt(w*w'); t = sum((ow - w).^2); it = it + 1; end P(:,i) = V(:,1); x = x - x*P(:,i)*P(:,i)'; end T = X*P; S = std(P); end