Source code for copulas.univariate.gaussian_kde

"""GaussianKDE module."""

import numpy as np
from scipy.special import ndtr
from scipy.stats import gaussian_kde

from copulas import EPSILON, random_state, store_args, validate_random_state
from copulas.optimize import bisect, chandrupatla
from copulas.univariate.base import BoundedType, ParametricType, ScipyModel


[docs]class GaussianKDE(ScipyModel): """A wrapper for gaussian Kernel density estimation. It was implemented in scipy.stats toolbox. gaussian_kde is slower than statsmodels but allows more flexibility. When a sample_size is provided the fit method will sample the data, and mask the real information. Also, ensure the number of entries will be always the value of sample_size. Args: sample_size(int): amount of parameters to sample """ PARAMETRIC = ParametricType.NON_PARAMETRIC BOUNDED = BoundedType.UNBOUNDED MODEL_CLASS = gaussian_kde @store_args def __init__(self, sample_size=None, random_state=None, bw_method=None, weights=None): self.random_state = validate_random_state(random_state) self._sample_size = sample_size self.bw_method = bw_method self.weights = weights def _get_model(self): dataset = self._params['dataset'] self._sample_size = self._sample_size or len(dataset) return gaussian_kde(dataset, bw_method=self.bw_method, weights=self.weights) def _get_bounds(self): X = self._params['dataset'] lower = np.min(X) - (5 * np.std(X)) upper = np.max(X) + (5 * np.std(X)) return lower, upper
[docs] def probability_density(self, X): """Compute the probability density for each point in X. Arguments: X (numpy.ndarray): Values for which the probability density will be computed. It must have shape (n, 1). Returns: numpy.ndarray: Probability density values for points in X. Raises: NotFittedError: if the model is not fitted. """ self.check_fit() return self._model.evaluate(X)
@random_state def sample(self, n_samples=1): """Sample values from this model. Argument: n_samples (int): Number of values to sample Returns: numpy.ndarray: Array of shape (n_samples, 1) with values randomly sampled from this model distribution. Raises: NotFittedError: if the model is not fitted. """ self.check_fit() return self._model.resample(size=n_samples)[0]
[docs] def cumulative_distribution(self, X): """Compute the cumulative distribution value for each point in X. Arguments: X (numpy.ndarray): Values for which the cumulative distribution will be computed. It must have shape (n, 1). Returns: numpy.ndarray: Cumulative distribution values for points in X. Raises: NotFittedError: if the model is not fitted. """ self.check_fit() X = np.array(X) stdev = np.sqrt(self._model.covariance[0, 0]) lower = ndtr((self._get_bounds()[0] - self._model.dataset) / stdev)[0] uppers = ndtr((X[:, None] - self._model.dataset) / stdev) return (uppers - lower).dot(self._model.weights)
[docs] def percent_point(self, U, method='chandrupatla'): """Compute the inverse cumulative distribution value for each point in U. Arguments: U (numpy.ndarray): Values for which the cumulative distribution will be computed. It must have shape (n, 1) and values must be in [0,1]. method (str): Whether to use the `chandrupatla` or `bisect` solver. Returns: numpy.ndarray: Inverse cumulative distribution values for points in U. Raises: NotFittedError: if the model is not fitted. """ self.check_fit() if len(U.shape) > 1: raise ValueError(f'Expected 1d array, got {(U, )}.') if np.any(U > 1.0) or np.any(U < 0.0): raise ValueError('Expected values in range [0.0, 1.0].') is_one = U >= 1.0 - EPSILON is_zero = U <= EPSILON is_valid = ~(is_zero | is_one) lower, upper = self._get_bounds() def _f(X): return self.cumulative_distribution(X) - U[is_valid] X = np.zeros(U.shape) X[is_one] = float('inf') X[is_zero] = float('-inf') if is_valid.any(): lower = np.full(U[is_valid].shape, lower) upper = np.full(U[is_valid].shape, upper) if method == 'bisect': X[is_valid] = bisect(_f, lower, upper) else: X[is_valid] = chandrupatla(_f, lower, upper) return X
def _fit_constant(self, X): sample_size = self._sample_size or len(X) constant = np.unique(X)[0] self._params = { 'dataset': [constant] * sample_size, } def _fit(self, X): if self._sample_size: X = gaussian_kde(X, bw_method=self.bw_method, weights=self.weights).resample(self._sample_size) self._params = { 'dataset': X.tolist() } self._model = self._get_model() def _is_constant(self): return len(np.unique(self._params['dataset'])) == 1 def _extract_constant(self): return self._params['dataset'][0] def _set_params(self, params): """Set the parameters of this univariate. Args: params (dict): Parameters to recreate this instance. """ self._params = params.copy() if self._is_constant(): constant = self._extract_constant() self._set_constant_value(constant) else: self._model = self._get_model()