Source code for mgcpy.independence_tests.mdmr

"""
    **Main MDMR Independence Test Module**
"""

import numpy as np
import scipy.spatial as scp
from mgcpy.independence_tests.abstract_class import IndependenceTest
from mgcpy.independence_tests.utils.compute_distance_matrix import \
    compute_distance
from mgcpy.independence_tests.utils.mdmr_functions import (calc_ftest,
                                                           check_rank,
                                                           fperms_to_pvals,
                                                           gen_H2_perms,
                                                           gen_IH_perms,
                                                           gower_center_many)


[docs]class MDMR(IndependenceTest): def __init__(self, compute_distance_matrix=None): ''' :param compute_distance_matrix: a function to compute the pairwise distance matrix, given a data matrix :type compute_distance_matrix: ``FunctionType`` or ``callable()`` ''' IndependenceTest.__init__(self, compute_distance_matrix) self.which_test = "mdmr"
[docs] def get_name(self): return self.which_test
[docs] def test_statistic(self, matrix_X, matrix_Y, permutations=0, individual=0, disttype='cityblock'): """ Computes MDMR Pseudo-F statistic between two datasets. - It first takes the distance matrix of Y (by ) - Next it regresses X into a portion due to Y and a portion due to residual - The p-value is for the null hypothesis that the variable of X is not correlated with Y's distance matrix :param data_matrix_X: (optional, default picked from class attr) is interpreted as: - a ``[n*d]`` data matrix, a matrix with n samples in d dimensions :type data_matrix_X: 2D `numpy.array` :param data_matrix_Y: (optional, default picked from class attr) is interpreted as: - a ``[n*d]`` data matrix, a matrix with n samples in d dimensions :type data_matrix_Y: 2D `numpy.array` :parameter 'individual': -integer, `0` or `1` with value `0` tests the entire X matrix (default) with value `1` tests the entire X matrix and then each predictor variable individually :return: with individual = `0`, returns 1 values, with individual = `1` returns 2 values, containing: -the test statistic of the entire X matrix -for individual = 1, an array with the variable of X in the first column, the test statistic in the second, and the permutation p-value in the third (which here will always be 1) :rtype: list """ X = matrix_X Y = matrix_Y # calculate distance matrix of Y D, _ = compute_distance(Y, np.identity(1), self.compute_distance_matrix) a = D.shape[0]**2 D = D.reshape((a, 1)) predictors = np.arange(X.shape[1]) predsingle = X.shape[1] check_rank(X) # check number of subjects compatible subjects = X.shape[0] if subjects != np.sqrt(D.shape[0]): raise Exception("# of subjects incompatible between X and D") X = np.hstack((np.ones((X.shape[0], 1)), X)) predictors = np.array(predictors) predictors += 1 # Gower Center the distance matrix of Y Gs = gower_center_many(D) m2 = float(X.shape[1] - predictors.shape[0]) nm = float(subjects - X.shape[1]) # form permutation indexes permutation_indexes = np.zeros((permutations + 1, subjects), dtype=np.int) permutation_indexes[0, :] = range(subjects) for i in range(1, permutations + 1): permutation_indexes[i, :] = np.random.permutation(subjects) H2perms = gen_H2_perms(X, predictors, permutation_indexes) IHperms = gen_IH_perms(X, predictors, permutation_indexes) # Calculate test statistic F_perms = calc_ftest(H2perms, IHperms, Gs, m2, nm) # Calculate p-value p_vals = None if permutations > 0: p_vals = fperms_to_pvals(F_perms) F_permtotal = F_perms[0, :] self.test_statistic_ = F_permtotal if individual == 0: return self.test_statistic_, self.test_statistic_metadata_ # code for individual test if individual == 1: results = np.zeros((predsingle, 3)) for predictors in range(1, predsingle+1): predictors = np.array([predictors]) Gs = gower_center_many(D) m2 = float(X.shape[1] - predictors.shape[0]) nm = float(subjects - X.shape[1]) permutation_indexes = np.zeros((permutations + 1, subjects), dtype=np.int) permutation_indexes[0, :] = range(subjects) for i in range(1, permutations + 1): permutation_indexes[i, :] = np.random.permutation(subjects) H2perms = gen_H2_perms(X, predictors, permutation_indexes) IHperms = gen_IH_perms(X, predictors, permutation_indexes) F_perms = calc_ftest(H2perms, IHperms, Gs, m2, nm) p_vals = None if permutations > 0: p_vals = fperms_to_pvals(F_perms) results[predictors-1, 0] = predictors results[predictors-1, 1] = F_perms[0, :] results[predictors-1, 2] = p_vals return F_permtotal, results
[docs] def p_value(self, matrix_X, matrix_Y, replication_factor=1000): """ Tests independence between two datasets using MGC and permutation test. :param matrix_X: is interpreted as: - a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions :type matrix_X: 2D `numpy.array` :param matrix_Y: is interpreted as: - a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions :type matrix_Y: 2D `numpy.array` :param replication_factor: specifies the number of replications to use for the permutation test. Defaults to ``1000``. :type replication_factor: integer :return: returns a list of two items,that contains: - :p_value: P-value of MGC - :p_value_metadata: :rtype: list """ return super(MDMR, self).p_value(matrix_X, matrix_Y)
[docs] def ind_p_value(self, matrix_X, matrix_Y, permutations=1000, individual=1, disttype='cityblock'): """ Individual predictor variable p-values calculation :param matrix_X: is interpreted as: - a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions :type matrix_X: 2D `numpy.array` :param matrix_Y: is interpreted as: - a ``[n*d]`` data matrix, a matrix with ``n`` samples in ``d`` dimensions :type matrix_Y: 2D `numpy.array` """ results = self.test_statistic(matrix_X, matrix_Y, permutations, individual)[1] return results