Source code for mgcpy.benchmarks.hypothesis_tests.three_sample_test.power

import numpy as np
import math
from mgcpy.hypothesis_tests.transforms import k_sample_transform


[docs]def generate_three_two_d_gaussians(epsilon, num_samples, type=1): ''' Three 2D Gaussians: - Type 1: all with same mean and covariance. mean = [0, 0] and cov = I - Type 2: two with same mean and covariance. mean = [0, 0] and cov = I; thrid: mean = [0, epsilon], cov = I - Type 3: means 1, 2, and 3 should form an equvilateral triangle on a circle with center (0, 0) and radius `epsilon` in 2d plane with cov = I. ''' # default mean zeros mean_one, mean_two, mean_three = [0, epsilon], [0, epsilon], [0, epsilon] if type == 2: mean_one, mean_two, mean_three = [0, 0], [0, 0], [0, epsilon] elif type == 3: mean_one, mean_two, mean_three = [0, (np.sqrt(3)/3)*epsilon], [-epsilon/2, -(np.sqrt(3)/6)*epsilon], [epsilon/2, -(np.sqrt(3)/6)*epsilon] cov = [[1, 0], [0, 1]] # identity matrix one = np.random.multivariate_normal(mean_one, cov, num_samples) two = np.random.multivariate_normal(mean_two, cov, num_samples) three = np.random.multivariate_normal(mean_three, cov, num_samples) return one, two, three
[docs]def power_given_epsilon(independence_test, simulation_type, epsilon, repeats=1000, alpha=.05, additional_params={}): # test statistics under the null, used to estimate the cutoff value under the null distribution test_stats_null = np.zeros(repeats) # test statistic under the alternative test_stats_alternative = np.zeros(repeats) # direct p values on permutation (now, only for fast_mgc) p_values = np.zeros(repeats) for rep in range(repeats): matrix_X, matrix_Y, matrix_Z = generate_three_two_d_gaussians(epsilon, 100, simulation_type) data = np.concatenate([matrix_X, matrix_Y, matrix_Z], axis=0) labels = np.concatenate([np.repeat(1, matrix_X.shape[0]), np.repeat(2, matrix_Y.shape[0]), np.repeat(3, matrix_Z.shape[0])], axis=0).reshape(-1, 1) matrix_U, matrix_V = k_sample_transform(data, labels, is_y_categorical=True) # permutation test if additional_params and additional_params["is_fast"]: p_values[rep], _ = independence_test.p_value(matrix_U, matrix_V, **additional_params) else: permuted_V = np.random.permutation(matrix_V) test_stats_null[rep], _ = independence_test.test_statistic( matrix_U, permuted_V, **additional_params) test_stats_alternative[rep], _ = independence_test.test_statistic( matrix_U, matrix_V, **additional_params) if additional_params and additional_params["is_fast"]: empirical_power = np.where(p_values <= alpha)[0].shape[0] / repeats else: # the cutoff is determined so that 1-alpha of the test statistics under the null distribution # is less than the cutoff cutoff = np.sort(test_stats_null)[math.ceil(repeats*(1-alpha))] # the proportion of test statistics under the alternative which is no less than the cutoff (in which case # the null is rejected) is the empirical power empirical_power = np.where(test_stats_alternative >= cutoff)[0].shape[0] / repeats return empirical_power