Source code for mgcpy.independence_tests.utils.mdmr_functions

import numpy as np

DTYPE = np.float64
ITYPE = np.int32


[docs]def check_rank(X): """ This function checks if X is rank deficient. :param matrix_X: is interpreted as: - a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions :type matrix_X: 2D `numpy.array` :rtype: None :raise: Raises Exception if X matrix is rank deficient. """ rank = np.linalg.matrix_rank(X) if rank < min(X.shape[0], X.shape[1]): raise Exception("matrix is rank deficient (rank %i vs cols %i)" % (rank, X.shape[1]))
[docs]def hatify(X): """ Calculates the "hat" matrix. :param X: is interpreted as: - a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions :type X: 2D `numpy.array` :return: returns the hat matrix of the data matrix input. :rtype: 2D `numpy.array` """ return X.dot(np.linalg.inv(X.T.dot(X))).dot(X.T)
[docs]def gower_center(Y): """ Computes Gower's centered similarity matrix. :param Y: is interpreted as: - a ``[n*n]`` distance matrix :type Y: 2D `numpy.array` :return: returns the gower centered similarity matrix of the input matrix. :rtype: 2D `numpy.array` """ n = Y.shape[0] I = np.eye(n, n) uno = np.ones((n, 1)) A = -0.5 * (Y ** 2) C = I - (1.0 / n) * uno.dot(uno.T) G = C.dot(A).dot(C) return G
[docs]def gower_center_many(Ys): """ Gower centers each matrix in the input, which is a special centering process disucssed in detail in Gower (1966). :param Ys: is interpreted as: - an array of ``[n^2*1]`` distance matrices :type Ys: 2D `numpy.array` Note: in practice this function is only run on one matrix currently. Due to this, Ys will just be a 1D `numpy.array` :return: returns the gower centered similarity matrix of the all input matrices. :rtype: 2D `numpy.array` """ observations = int(np.sqrt(Ys.shape[0])) tests = Ys.shape[1] Gs = np.zeros_like(Ys) for i in range(tests): # print(type(observations)) D = Ys[:, i].reshape(observations, observations) Gs[:, i] = gower_center(D).flatten() return Gs
[docs]def gen_H2_perms(X, predictors, permutation_indexes): """ Return H2 for each permutation of X indices, where H2 is the hat matrix minus the hat matrix of the untested columns. :param X: is interpreted as: - a ``[n*d+1]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions and a column of ones placed before the matrix :type X: 2D `numpy.array` :param predictors: is interpreted as: - a ``[1*d]`` array with the number of each variable in X used as a predictor :type predictors: 1D `numpy.array` :param permutation_indexes: is interpreted as: - a ``[p+1*n]`` matrix where p is the number of permutations given in the main code. This matrix has p permutations of indexes of the X data. :type permutation_indexes: 2D `numpy.array` :return: a ``[p+1*n^2]`` array of the flattened H2 matrices for each permutation :rtype: 2D `numpy.array` """ permutations, observations = permutation_indexes.shape variables = X.shape[1] covariates = [i for i in range(variables) if i not in predictors] H2_permutations = np.zeros((observations ** 2, permutations)) for i in range(permutations): perm_X = X[permutation_indexes[i]] H2 = hatify(perm_X) - hatify(perm_X[:, covariates]) H2_permutations[:, i] = H2.flatten() return H2_permutations
[docs]def gen_IH_perms(X, predictors, permutation_indexes): """ Return I-H where H is the hat matrix and I is the identity matrix. The function calculates this correctly for multiple predictor tests. :param X: is interpreted as: - a ``[n*d+1]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions and a column of ones placed before the matrix :type X: 2D `numpy.array` :param predictors: is interpreted as: - a ``[1*d]`` array with the number of each variable in X used as a predictor :type predictors: 1D `numpy.array` :param permutation_indexes: is interpreted as: - a ``[p+1*n]`` matrix where p is the number of permutations given in the main code. This matrix has p permutations of indexes of the X data. :type permutation_indexes: 2D `numpy.array` :return: a ``[p+1*n^2]`` array of the flattened arrays of the IH matrix for each permutation :rtype: 2D `numpy.array` """ permutations, observations = permutation_indexes.shape I = np.eye(observations, observations) IH_permutations = np.zeros((observations ** 2, permutations)) for i in range(permutations): IH = I - hatify(X[permutation_indexes[i, :]]) IH_permutations[:, i] = IH.flatten() return IH_permutations
[docs]def calc_ftest(Hs, IHs, Gs, m2, nm): """ This function calculates the pseudo-F statistic. :param Hs: is interpreted as: - a ``[p+1*n^2]`` array with the flattened H2 matrix for each permutation :type Hs: 2D `numpy.array` :param IHs: is interpreted as: - a ``[p+1*n^2]`` array with the flattened IH matrix for each permutation :type IHs: 2D `numpy.array` :param Gs: is interpreted as: - a [n^2*a] array with the gower centered distance matrix where a is in practice 1 :type Gs: 2D `numpy.array` :param m2: is interpreted as: - a float equal to the number of predictors minus the number of tests (which will be 1) :type m2: `float` :param nm: is interpreted as: - a float equal to the number of subjects minus the number of predictors :type nm: `float` :return: a ``[p+1*1]`` array of F statistics for each permutation :rtype: 1D `numpy.array` """ N = Hs.T.dot(Gs) D = IHs.T.dot(Gs) F = (N / m2) / (D / nm) return F
[docs]def fperms_to_pvals(F_perms): """ This function calculates the permutation p-value from the test statistics of all permutations. :param F_perms: is interpreted as: - a ``[p+1*1]`` array of F statistics for each permutation :type F_perms: 1D `numpy.array` :return: a float which is the permutation p-value of the F-statistic :rtype: `float` """ permutations, tests = F_perms.shape permutations -= 1 pvals = np.zeros(tests) for i in range(tests): j = (F_perms[1:, i] >= F_perms[0, i]).sum().astype('float') pvals[i] = (j) / (permutations) return pvals