Source code for mgcpy.benchmarks.simulations

import numpy as np


[docs]def gen_coeffs(num_dim): """ Helper function for generating a linear simulation. :param num_dim: number of dimensions for the simulation :return: a vector of coefficients """ coeff_vec = np.array([1 / (x+1) for x in range(num_dim)]) return coeff_vec.reshape(-1, 1)
[docs]def gen_x_unif(num_samp, num_dim, low=-1, high=1): """ Helper function for generating n samples from d-dimensional vector :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param low: the lower limit of the data matrix, defaults to -1 :param high: the upper limit of the data matrix, defaults to 1 :return: uniformly distributed simulated data matrix """ uniform_vec = np.array(np.random.uniform(low=low, high=high, size=num_samp * num_dim)) data_mat = uniform_vec.reshape(num_samp, num_dim) return data_mat
[docs]def linear_sim(num_samp, num_dim, noise=1, indep=False, low=-1, high=1): """ Function for generating a linear simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 1 :param indep: whether to sample x and y independently, defaults to false :param low: the lower limit of the data matrix, defaults to -1 :param high: the upper limit of the data matrix, defaults to 1 :return: the data matrix and a response array """ x = gen_x_unif(num_samp, num_dim, low=low, high=high) coeffs = gen_coeffs(num_dim) gauss_noise = np.random.normal(loc=0, scale=1, size=(num_samp, 1)) if (num_dim == 1): kappa = 1 else: kappa = 0 y = (np.dot(a=x, b=coeffs) + kappa*noise*gauss_noise) if indep: x = gen_x_unif(num_samp, num_dim, low=low, high=high) return x, y
[docs]def exp_sim(num_samp, num_dim, noise=10, indep=False, low=0, high=3): """ Function for generating an exponential simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 10 :param indep: whether to sample x and y independently, defaults to false :param low: the lower limit of the data matrix, defaults to 0 :param high: the upper limit of the data matrix, defaults to 3 :return: the data matrix and a response array """ x = gen_x_unif(num_samp, num_dim, low=low, high=high) coeffs = gen_coeffs(num_dim) gauss_noise = np.random.normal(loc=0, scale=1, size=(num_samp, 1)) if (num_dim == 1): kappa = 1 else: kappa = 0 y = (np.exp(np.dot(a=x, b=coeffs)) + kappa*noise*gauss_noise) if indep: x = gen_x_unif(num_samp, num_dim, low=low, high=high) return x, y
[docs]def cub_sim(num_samp, num_dim, noise=80, indep=False, low=-1, high=1, cub_coeff=np.array([-12, 48, 128]), scale=1/3): """ Function for generating a cubic simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 80 :param indep: whether to sample x and y independently, defaults to False :param low: the lower limit of the data matrix, defaults to -1 :param high: the upper limit of the data matrix, defaults to 1 :param cub_coeff: coefficients of the cubic function where each value corresponds to the respective order coefficientj, defaults to [-12, 48, 128] :param scale: scaling center of the cubic, defaults to 1/3 :return: the data matrix and a response array """ x = gen_x_unif(num_samp, num_dim, low=low, high=high) coeffs = gen_coeffs(num_dim) gauss_noise = np.random.normal(loc=0, scale=1, size=(num_samp, 1)) if (num_dim == 1): kappa = 1 else: kappa = 0 x_coeffs = np.dot(a=x, b=coeffs) y = ((cub_coeff[2] * (x_coeffs-scale)**3) + (cub_coeff[1] * (x_coeffs-scale)**2) + (cub_coeff[0] * (x_coeffs-scale)) + kappa * noise * gauss_noise) if indep: x = gen_x_unif(num_samp, num_dim, low=low, high=high) return x, y
[docs]def joint_sim(num_samp, num_dim, noise=0.5): """ Function for generating a joint-normal simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 80 :return: the data matrix and a response array """ gauss_noise = np.random.normal(loc=0, scale=1, size=(num_samp, 1)) if (num_dim > 1): kappa = 1 else: kappa = 0 rho = 1 / (2*num_dim) sig = np.diag(np.ones(shape=(2*num_dim))) sig[num_dim: (2*num_dim), 0: num_dim] = rho sig[0: num_dim, num_dim: (2*num_dim)] = rho samp = (np.random.multivariate_normal(cov=sig, mean=np.zeros(2*num_dim), size=num_samp)) if num_dim == 1: y = samp[:, (num_dim):(2*num_dim)] + kappa*noise*gauss_noise x = samp[:, 0:num_dim] else: y = samp[:, (num_dim+1):(2*num_dim)] + kappa*noise*gauss_noise x = samp[:, 0:num_dim] return x, y
[docs]def step_sim(num_samp, num_dim, noise=1, indep=False, low=-1, high=1): """ Function for generating a joint-normal simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 1 :param indep: whether to sample x and y independently, defaults to false :param low: the lower limit of the data matrix, defaults to -1 :param high: the upper limit of the data matrix, defaults to 1 :return: the data matrix and a response array """ x = gen_x_unif(num_samp, num_dim, low=low, high=high) coeffs = gen_coeffs(num_dim) gauss_noise = np.random.normal(loc=0, scale=1, size=(num_samp, 1)) if (num_dim > 1): kappa = 1 else: kappa = 0 x_coeff = np.dot(a=x, b=coeffs) x_coeff = 1 * (x_coeff > 0) y = (x_coeff + kappa*noise*gauss_noise) if indep: x = gen_x_unif(num_samp, num_dim, low=low, high=high) return x, y
[docs]def quad_sim(num_samp, num_dim, noise=1, indep=False, low=-1, high=1): """ Function for generating a quadratic simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 1 :param indep: whether to sample x and y independently, defaults to false :param low: the lower limit of the data matrix, defaults to -1 :param high: the upper limit of the data matrix, defaults to 1 :return: the data matrix and a response array """ x = gen_x_unif(num_samp, num_dim, low=low, high=high) coeffs = gen_coeffs(num_dim) gauss_noise = np.random.normal(loc=0, scale=1, size=(num_samp, 1)) if (num_dim == 1): kappa = 1 else: kappa = 0 y = ((np.dot(a=x, b=coeffs)**2) + kappa*noise*gauss_noise) if indep: x = gen_x_unif(num_samp, num_dim, low=low, high=high) return x, y
[docs]def w_sim(num_samp, num_dim, noise=1, indep=False, low=-1, high=1): """ Function for generating a w-shaped simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 1 :param indep: whether to sample x and y independently, defaults to false :param low: the lower limit of the data matrix, defaults to -1 :param high: the upper limit of the data matrix, defaults to 1 :return: the data matrix and a response array """ x = gen_x_unif(num_samp, num_dim, low=low, high=high) u = gen_x_unif(num_samp, num_dim, low=low, high=high) coeffs = gen_coeffs(num_dim) gauss_noise = np.random.normal(loc=0, scale=1, size=(num_samp, 1)) if (num_dim == 1): kappa = 1 else: kappa = 0 gauss_noise = np.random.normal(loc=0, scale=1, size=(x.shape[0], 1)) y = (4 * ((np.dot(a=x, b=coeffs)**2 - 0.5)**2 + np.dot(a=u, b=coeffs)/500) + kappa*noise*gauss_noise) if indep: x = gen_x_unif(num_samp, num_dim, low=low, high=high) return x, y
[docs]def spiral_sim(num_samp, num_dim, noise=0.4, low=0, high=5): """ Function for generating a spiral simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 0.4 :param low: the lower limit of the data matrix, defaults to 0 :param high: the upper limit of the data matrix, defaults to 5 :return: the data matrix and a response array """ uniform_dist = gen_x_unif(num_samp, num_dim=1, low=low, high=high) the_x = np.array(np.cos(np.pi * uniform_dist)).reshape(num_samp, 1) y = uniform_dist * np.sin(np.pi * uniform_dist) x = np.zeros(shape=(num_samp, num_dim)) if num_dim > 1: for i in range(num_dim - 1): x[:, i] = np.squeeze((y * np.power(the_x, i))) x[:, num_dim-1] = np.squeeze(uniform_dist * the_x) gauss_noise = np.random.normal(loc=0, scale=1, size=(x.shape[0], 1)) y = y + noise*num_dim*gauss_noise return x, y
[docs]def ubern_sim(num_samp, num_dim, noise=0.5, bern_prob=0.5): """ Function for generating an uncorrelated bernoulli simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 0.5 :param bern_prob: the bernoulli probability, defaults to 0.5 :return: the data matrix and a response array """ if num_dim > 1: kappa = 1 else: kappa = 0 binom_dist = np.random.binomial(1, p=bern_prob, size=(num_samp, 1)) sig = np.diag(np.ones(shape=num_dim) * num_dim) gauss_noise1 = (np.random.multivariate_normal( cov=sig, mean=np.zeros(num_dim), size=num_samp )) x = (np.array(np.random.binomial(1, size=num_samp * num_dim, p=bern_prob)).reshape(num_samp, num_dim) + noise*gauss_noise1) coeffs = gen_coeffs(num_dim) y = np.empty(shape=(num_samp, 1)) y[:] = np.nan gauss_noise2 = np.random.normal(loc=0, scale=1, size=(num_samp, 1)) for i in range(num_samp): y[i] = (np.dot((2*binom_dist[i]-1) * coeffs.T, x[i, :]) + kappa*noise*gauss_noise2[i]) return x, y
[docs]def log_sim(num_samp, num_dim, noise=3, indep=False, base=2): """ Function for generating a logarithmic simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 1 :param indep: whether to sample x and y independently, defaults to false :param base: the base of the log, defaults to 2 :return: the data matrix and a response array """ sig = np.diag(np.ones(shape=(num_dim))) x = (np.random.multivariate_normal(cov=sig, mean=np.zeros(num_dim), size=num_samp)) gauss_noise = np.random.normal(loc=0, scale=1, size=(num_samp, 1)) if (num_dim == 1): kappa = 1 else: kappa = 0 y = (base * np.divide(np.log(np.abs(x)), np.log(base) + kappa*noise*gauss_noise)) if indep: x = (np.random.multivariate_normal(cov=sig, mean=np.zeros(num_dim), size=num_samp)) return x, y
[docs]def root_sim(num_samp, num_dim, noise=0.25, indep=False, low=-1, high=1, n_root=4): """ Function for generating an nth root simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 1 :param indep: whether to sample x and y independently, defaults to false :param low: the lower limit of the data matrix, defaults to -1 :param high: the upper limit of the data matrix, defaults to 1 :param n_root: the root of the simulation, defaults to 4 :return: the data matrix and a response array """ x = gen_x_unif(num_samp, num_dim, low=low, high=high) coeffs = gen_coeffs(num_dim) gauss_noise = np.random.normal(loc=0, scale=1, size=(num_samp, 1)) if (num_dim == 1): kappa = 1 else: kappa = 0 y = (np.power(np.abs(np.dot(a=x, b=coeffs.reshape(num_dim, 1))), 1/n_root) + kappa*noise*gauss_noise) if indep: x = gen_x_unif(num_samp, num_dim, low=low, high=high) return x, y
[docs]def sin_sim(num_samp, num_dim, noise=1, indep=False, low=-1, high=1, period=4*np.pi): """ Function for generating a sinusoid simulation. Note: For producing 4*pi and 16*pi simulations, change the ``period`` to the respective value. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 1 :param indep: whether to sample x and y independently, defaults to false :param low: the lower limit of the data matrix, defaults to -1 :param high: the upper limit of the data matrix, defaults to 1 :param period: the period of the sine wave, defaults to 4*pi :return: the data matrix and a response array """ x = gen_x_unif(num_samp, num_dim, low=low, high=high) if num_dim > 1 or noise > 0: sig = np.diag(np.ones(shape=(num_dim))) v = (np.random.multivariate_normal(cov=sig, mean=np.zeros(num_dim), size=num_samp)) x = x + 0.02*num_dim*v gauss_noise = np.random.normal(loc=0, scale=1, size=(num_samp, 1)) if (num_dim == 1): kappa = 1 else: kappa = 0 y = np.sin(x*period) + kappa*noise*gauss_noise if indep: x = gen_x_unif(num_samp, num_dim, low=low, high=high) if num_dim > 1: sig = np.diag(np.ones(shape=(num_dim))) v = (np.random.multivariate_normal(cov=sig, mean=np.zeros(num_dim), size=num_samp)) x = x + 0.02*num_dim*v return x, y
[docs]def square_sim(num_samp, num_dim, noise=1, indep=False, low=-1, high=1, period=-np.pi/8): """ Function for generating a square or diamond simulation. Note: For producing square or diamond simulations, change the ``period`` to -pi/8 or -pi/4. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 0.05 :param indep: whether to sample x and y independently, defaults to false :param low: the lower limit of the data matrix, defaults to -1 :param high: the upper limit of the data matrix, defaults to 1 :param period: the period of the sine and cosine square equation, defaults to 4*pi :return: the data matrix and a response array """ u = gen_x_unif(num_samp, num_dim, low=low, high=high) v = gen_x_unif(num_samp, num_dim, low=low, high=high) sig = np.diag(np.ones(shape=(num_dim))) gauss_noise = (np.random.multivariate_normal(cov=sig, mean=np.zeros(num_dim), size=num_samp)) x = u*np.cos(period) + v*np.sin(period) + 0.05*num_dim*gauss_noise y = -u*np.sin(period) + v*np.cos(period) if indep: u = gen_x_unif(num_samp, num_dim, low=low, high=high) v = gen_x_unif(num_samp, num_dim, low=low, high=high) sig = np.diag(np.ones(shape=(num_dim))) gauss_noise = (np.random.multivariate_normal(cov=sig, mean=np.zeros( num_dim), size=num_samp)) x = u*np.cos(period) + v*np.sin(period) + 0.05*num_dim*gauss_noise return x, y
[docs]def two_parab_sim(num_samp, num_dim, noise=2, low=-1, high=1, prob=0.5): """ Function for generating a two parabolas simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 2 :param low: the lower limit of the data matrix, defaults to -1 :param high: the upper limit of the data matrix, defaults to 1 :param prob: the binomial probability, defaults to 0.5 :return: the data matrix and a response array """ x = gen_x_unif(num_samp, num_dim, low=low, high=high) coeffs = gen_coeffs(num_dim) u = np.random.binomial(1, p=prob, size=(num_samp, 1)) gauss_noise = gen_x_unif(num_samp, num_dim, low=0, high=1) if (num_dim == 1): kappa = 1 else: kappa = 0 y = (np.power(np.dot(x, coeffs.reshape(num_dim, 1)), 2) + noise*kappa*gauss_noise) * (u - 0.5) return x, y
[docs]def circle_sim(num_samp, num_dim, noise=0.4, low=-1, high=1, radius=1): """ Function for generating a circle or ellipse simulation. Note: For producing circle or ellipse simulations, change the ``radius`` to 1 or 5. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param noise: noise level of the simulation, defaults to 0.4 :param low: the lower limit of the data matrix, defaults to -1 :param high: the upper limit of the data matrix, defaults to 1 :param radius: the radius of the circle or ellipse, defaults to 1 :return: the data matrix and a response array """ if num_dim > 1: kappa = 1 else: kappa = 0 x = gen_x_unif(num_samp, num_dim, low=low, high=high) rx = radius * np.ones((num_samp, num_dim)) z = gen_x_unif(num_samp, num_dim, low=low, high=high) sig = np.diag(np.ones(shape=(num_dim))) gauss_noise = (np.random.multivariate_normal(cov=sig, mean=np.zeros(num_dim), size=num_samp)) ry = np.ones((num_samp, num_dim)) x[:, 0] = np.cos(z[:, 0].reshape((num_samp)) * np.pi) for i in range(num_dim - 1): x[:, i+1] = (x[:, i].reshape((num_samp)) * np.cos(z[:, i+1].reshape((num_samp)) * np.pi)) x[:, i] = (x[:, i].reshape((num_samp)) * np.sin(z[:, i+1].reshape((num_samp)) * np.pi)) x = rx * x + noise*rx*gauss_noise y = ry * np.sin(z[:, 0].reshape((num_samp, 1)) * np.pi) return x, y
[docs]def multi_noise_sim(num_samp, num_dim): """ Function for generating a multiplicative noise simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :return: the data matrix and a response array """ sig = np.diag(np.ones(shape=(num_dim))) u = np.random.multivariate_normal( cov=sig, mean=np.zeros(num_dim), size=num_samp) x = np.random.multivariate_normal( cov=sig, mean=np.zeros(num_dim), size=num_samp) y = u * x return x, y
[docs]def multi_indep_sim(num_samp, num_dim, prob=0.5, sep1=3, sep2=2): """ Function for generating a multimodal independence simulation. :param num_samp: number of samples for the simulation :param num_dim: number of dimensions for the simulation :param prob: the binomial probability, defaults to 0.5 :param sep1: determines the size and separation of clusters, defaults to 3 :param sep2: determines the size and separation of clusters, defaults to 2 :return: the data matrix and a response array """ sig = np.diag(np.ones(shape=(num_dim))) u = np.random.multivariate_normal( cov=sig, mean=np.zeros(num_dim), size=num_samp) v = np.random.multivariate_normal( cov=sig, mean=np.zeros(num_dim), size=num_samp) u_2 = np.random.binomial(1, p=prob, size=(num_samp, 1)) v_2 = np.random.binomial(1, p=prob, size=(num_samp, 1)) x = u/sep1 + sep2*u_2 - 1 y = v/sep1 + sep2*v_2 - 1 return x, y