Directional median analysis

Below you can find R and Matlab code snippets for DMA.

It is a new unsupervised data exploration method concept, fitting directions by explaining structure of the majority of data objects, regardless of the distribution and variance of whole data set. Theory and algorithm is explained in (10.1016/j.chemolab.2016.03.005).

R snippet centers data with l1median_BFGS, so it requires pcaPP package to be loaded. The returned object is analogous to prcomp() result, therefore standard commands such as summary(), biplot() etc. can be used.

dma <- function(x, center = l1median_BFGS(x)$par, scale = FALSE,
                tol = 1e-06, maxiter = 200) {
    x <- as.matrix(x)
    x <- scale(x, center = center, scale = scale)
    cen <- attr(x, "scaled:center")
    sc <- attr(x, "scaled:scale")
    k <- min(dim(x))
    X <- x
    P <- matrix(NA, ncol(x), k)
    for (i in 1:nrow(x)) {
        cp <- sqrt(crossprod(x[i, ]))
        if (cp != 0)
            x[i, ] <- x[i, ]/cp
    }
    for (i in 1:k) {
        w <- rep(1, nrow(x))
        t <- 10
        it <- 0
        while ((t > tol) && (it < maxiter)) {
            s <- svd(diag(w) %*% x)
            r <- sqrt(apply((x %*% s$v[, -1])^2, 1, sum))
            r[r < 0.01] <- 0.01
            r[r > 0.98] <- 0.98
            ow <- w
            w <- r * sqrt(1 - r^2)
            w <- 1/w
            w <- w/sqrt(crossprod(w))
            t <- sum((ow - w)^2)
            it <- it + 1
        }
        P[, i] = s$v[, 1]
        x <- x - x %*% tcrossprod(P[, i])
    }
    dimnames(P) <- list(colnames(x), paste0("DM", seq_len(ncol(P))))
    T <- X %*% P
    r <- list(x = T, sdev = apply(T, 2, sd), rotation = P,
              center = if (is.null(cen)) FALSE else cen,
              scale = if (is.null(sc)) FALSE else sc)
    class(r) <- "prcomp"
    return(r)
}

The MATLAB snippet below does not center the data, so user must manually compute L1Median and center the data before calling this function. Scores are returned as T, loadings as P and S contain standard deviations.

function [T,P,S] = dma(x,tol,maxiter)
    if nargin<3
        maxiter = 100;
    end
    if nargin<2
        tol = 1e-6;
    end
    k = min(size(x));
    X = x;
    P = repmat(NaN, size(x,2), k);
    for i=1:size(x,1)
        cp = sqrt(x(i,:)*x(i,:)');
        if (cp ~= 0)
            x(i,:) = x(i,:)./cp;
        end
    end
    for i=1:k
        w = repmat(1, 1, size(x,1));
        t = 10;
        it = 0;
        while ((t > tol) && (it < maxiter))
            [U,S,V] = svd(diag(w)*x);
            r = sqrt(sum((x*V(:,2:end)).^2,2));
            r(r < 0.01) = 0.01;
            r(r > 0.98) = 0.98;
            ow = w;
            w = r .* sqrt(1 - r.^2);
            w = (1./w)';
            w = w./sqrt(w*w');
            t = sum((ow - w).^2);
            it = it + 1;
        end
        P(:,i) = V(:,1);
        x = x - x*P(:,i)*P(:,i)';
    end
    T = X*P;
    S = std(P);
end