Source code for mgcpy.independence_tests.utils.mdmr_functions

import numpy as np

DTYPE = np.float64
ITYPE = np.int32


[docs]def check_rank(X):
    """
    This function checks if X is rank deficient.

    :param matrix_X: is interpreted as:

        - a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions
    :type matrix_X: 2D `numpy.array`

    :rtype: None

    :raise: Raises Exception if X matrix is rank deficient.
    """
    rank = np.linalg.matrix_rank(X)
    if rank < min(X.shape[0], X.shape[1]):
        raise Exception("matrix is rank deficient (rank %i vs cols %i)" % (rank, X.shape[1]))


[docs]def hatify(X):
    """
    Calculates the "hat" matrix.

    :param X: is interpreted as:

        - a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions
    :type X: 2D `numpy.array`

    :return: returns the hat matrix of the data matrix input.
    :rtype: 2D `numpy.array`
    """
    return X.dot(np.linalg.inv(X.T.dot(X))).dot(X.T)


[docs]def gower_center(Y):
    """
    Computes Gower's centered similarity matrix.

    :param Y: is interpreted as:

        - a ``[n*n]`` distance matrix
    :type Y: 2D `numpy.array`

    :return: returns the gower centered similarity matrix of the input matrix.
    :rtype: 2D `numpy.array`
    """
    n = Y.shape[0]
    I = np.eye(n, n)
    uno = np.ones((n, 1))

    A = -0.5 * (Y ** 2)
    C = I - (1.0 / n) * uno.dot(uno.T)
    G = C.dot(A).dot(C)

    return G


[docs]def gower_center_many(Ys):
    """
    Gower centers each matrix in the input, which is a special centering process disucssed in detail in Gower (1966).

    :param Ys: is interpreted as:

        - an array of ``[n^2*1]`` distance matrices
    :type Ys: 2D `numpy.array`
        Note: in practice this function is only run on one matrix currently.
        Due to this, Ys will just be a 1D `numpy.array`

    :return: returns the gower centered similarity matrix of the all input matrices.
    :rtype: 2D `numpy.array`
    """
    observations = int(np.sqrt(Ys.shape[0]))
    tests = Ys.shape[1]
    Gs = np.zeros_like(Ys)

    for i in range(tests):
        #        print(type(observations))
        D = Ys[:, i].reshape(observations, observations)
        Gs[:, i] = gower_center(D).flatten()

    return Gs


[docs]def gen_H2_perms(X, predictors, permutation_indexes):
    """
    Return H2 for each permutation of X indices, where H2 is the hat matrix
    minus the hat matrix of the untested columns.

    :param X: is interpreted as:

        - a ``[n*d+1]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions
        and a column of ones placed before the matrix
    :type X: 2D `numpy.array`

    :param predictors: is interpreted as:

        - a ``[1*d]`` array with the number of each variable in X used as a predictor
    :type predictors: 1D `numpy.array`

    :param permutation_indexes: is interpreted as:

        - a ``[p+1*n]`` matrix where p is the number of permutations given in the main code.
        This matrix has p permutations of indexes of the X data.
    :type permutation_indexes: 2D `numpy.array`

    :return: a ``[p+1*n^2]`` array of the flattened H2 matrices for each permutation
    :rtype: 2D `numpy.array`
    """
    permutations, observations = permutation_indexes.shape
    variables = X.shape[1]

    covariates = [i for i in range(variables) if i not in predictors]
    H2_permutations = np.zeros((observations ** 2, permutations))
    for i in range(permutations):
        perm_X = X[permutation_indexes[i]]
        H2 = hatify(perm_X) - hatify(perm_X[:, covariates])
        H2_permutations[:, i] = H2.flatten()

    return H2_permutations


[docs]def gen_IH_perms(X, predictors, permutation_indexes):
    """
    Return I-H where H is the hat matrix and I is the identity matrix.

    The function calculates this correctly for multiple predictor tests.

    :param X: is interpreted as:

        - a ``[n*d+1]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions
        and a column of ones placed before the matrix
    :type X: 2D `numpy.array`

    :param predictors: is interpreted as:

        - a ``[1*d]`` array with the number of each variable in X used as a predictor
    :type predictors: 1D `numpy.array`

    :param permutation_indexes: is interpreted as:

        - a ``[p+1*n]`` matrix where p is the number of permutations given in the main code.
        This matrix has p permutations of indexes of the X data.
    :type permutation_indexes: 2D `numpy.array`

    :return: a ``[p+1*n^2]`` array of the flattened arrays of the IH matrix for each permutation
    :rtype: 2D `numpy.array`
    """
    permutations, observations = permutation_indexes.shape
    I = np.eye(observations, observations)

    IH_permutations = np.zeros((observations ** 2, permutations))
    for i in range(permutations):
        IH = I - hatify(X[permutation_indexes[i, :]])
        IH_permutations[:, i] = IH.flatten()

    return IH_permutations


[docs]def calc_ftest(Hs, IHs, Gs, m2, nm):
    """
    This function calculates the pseudo-F statistic.

    :param Hs: is interpreted as:

        - a ``[p+1*n^2]`` array with the flattened H2 matrix for each permutation
    :type Hs: 2D `numpy.array`

    :param IHs: is interpreted as:

        - a ``[p+1*n^2]`` array with the flattened IH matrix for each permutation
    :type IHs: 2D `numpy.array`

    :param Gs: is interpreted as:

        - a [n^2*a] array with the gower centered distance matrix where a is in practice 1
    :type Gs: 2D `numpy.array`

    :param m2: is interpreted as:

        - a float equal to the number of predictors minus the number of tests (which will be 1)
    :type m2: `float`

    :param nm: is interpreted as:

        - a float equal to the number of subjects minus the number of predictors
    :type nm: `float`

    :return: a ``[p+1*1]`` array of F statistics for each permutation
    :rtype: 1D `numpy.array`
    """
    N = Hs.T.dot(Gs)
    D = IHs.T.dot(Gs)
    F = (N / m2) / (D / nm)
    return F


[docs]def fperms_to_pvals(F_perms):
    """
    This function calculates the permutation p-value from the test statistics of all permutations.

    :param F_perms: is interpreted as:

        - a ``[p+1*1]`` array of F statistics for each permutation
    :type F_perms: 1D `numpy.array`

    :return: a float which is the permutation p-value of the F-statistic
    :rtype: `float`
    """
    permutations, tests = F_perms.shape
    permutations -= 1
    pvals = np.zeros(tests)
    for i in range(tests):
        j = (F_perms[1:, i] >= F_perms[0, i]).sum().astype('float')
        pvals[i] = (j) / (permutations)
    return pvals