Source code for pesummary.utils.pdf

# Licensed under an MIT style license -- see LICENSE.md

from scipy.stats._distn_infrastructure import rv_continuous, rv_sample
from scipy.interpolate import interp1d, interp2d
import numpy as np
from pesummary.utils.array import Array

__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"]


[docs] class InterpolatedPDF(rv_continuous): """Subclass of the scipy.stats._distn_infrastructure.rv_continous class. This class handles interpolated PDFs Attributes ---------- interpolant: func the interpolant to use when evaluate probabilities Methods ------- pdf: Evaluate the interpolant for a given input and return the PDF at that input """ def __new__(cls, *args, **kwargs): return super(InterpolatedPDF, cls).__new__(cls) def __init__(self, *args, interpolant=None, dimensionality=1, **kwargs): self.interpolant = interpolant self.dimensionality = dimensionality if self.dimensionality > 2: import warnings warnings.warn( "The .rvs() method will currently only work for dimensionalities " "< 3" ) super(InterpolatedPDF, self).__init__(*args, **kwargs) def _pdf(self, x): return self.interpolant(x) def rvs(self, size=None, N=10**6): if size is None: return if self.dimensionality > 1: raise ValueError("method only valid for one dimensional data") _xrange = np.linspace( self.interpolant.x[0], self.interpolant.x[-1], N ) args = _xrange # if self.dimensionality > 1: # _yrange = np.linspace( # self.interpolant.y[0], self.interpolant.y[-1], N # ) # args = [args, _yrange] probs = self.interpolant(args).flatten() inds = np.random.choice( np.arange(N**self.dimensionality), p=probs / np.sum(probs), size=size ) # if self.dimensionality > 1: # row = inds // 10 # column = inds % 10 # return np.array([_xrange[row], _yrange[columns]]).T return args[inds]
[docs] class DiscretePDF(rv_sample): """Subclass of the scipy.stats._distn_infrastructure.rv_sample class. This class handles discrete probabilities. Parameters ---------- args: list, optional 2d list of length 2. First element integers, second element probabilities corresponding to integers. values: list, optional 2d list of length 2. First element integers, second element probabilities corresponding to integers. **kwargs: dict, optional all additional kwargs passed to the scipy.stats._distn_infrastructure.rv_sample class Attributes ---------- x: np.ndarray array of integers with corresponding probabilities probs: np.ndarray array of probabilities corresponding to the array of integers Methods ------- interpolate: interpolate the discrete probabilities and return a continuous InterpolatedPDF object percentile: calculate a percentile from the discrete PDF write: write the discrete PDF to file using the pesummary.io.write module Examples -------- >>> from pesummary.utils.pdf import DiscretePDF >>> numbers = [1, 2, 3, 4] >>> distribution = [0.1, 0.2, 0.3, 0.4] >>> probs = DiscretePDF(numbers, distribution) >>> print(probs.x) [1, 2, 3, 4] >>> probs = DiscretePDF(values=[numbers, distribution]) >>> print(probs.x) [1, 2, 3, 4] """ def __new__(cls, *args, **kwargs): return super(DiscretePDF, cls).__new__(cls) def __init__(self, *args, **kwargs): if len(args): try: self.x, self.probs = args except ValueError: self.x, self.probs = list(args)[0] kwargs["values"] = [self.x, self.probs] args = () else: self._values = kwargs.get("values", None) self.x = self._values[0] if self._values is not None else None self.probs = self._values[1] if self._values is not None else None super(DiscretePDF, self).__init__(*args, **kwargs) def _pmf(self, x): _x = np.atleast_1d(x) if any(value not in self.x for value in _x): raise ValueError( "Unable to compute PMF for some of the provided values as " "provided probabilities are discrete. Either choose a value " "from {} or interpolate the data with " "`.interpolate().pdf({}).`".format(np.array(self.x), np.array(x)) ) return super(DiscretePDF, self)._pmf(x) def rvs(self, *args, **kwargs): return Array(super(DiscretePDF, self).rvs(*args, **kwargs)) def interpolate(self, interpolant=interp1d, include_bounds=True): """Interpolate the discrete pdf and return an InterpolatedPDF object Parameters ---------- interpolant: func function to use to interpolate the discrete pdf include_bounds: Bool, optional if True, pass the upper and lower bounds to InterpolatedPDF """ kwargs = {} if include_bounds: kwargs.update({"a": self.x[0], "b": self.x[-1]}) return InterpolatedPDF( interpolant=interp1d(self.x, self.probs), **kwargs ) def percentile(self, p): """Calculate a percentile from the discrete PDF Parameters ---------- p: float, list percentile/list of percentiles to calculate """ from pesummary.utils.credible_interval import credible_interval return credible_interval(self.x, p, weights=self.probs) def write(self, *args, **kwargs): """Write the discrete PDF to file using the pesummary.io.write module Parameters ---------- *args: tuple all args passed to pesummary.io.write function **kwargs: dict, optional all kwargs passed to the pesummary.io.write function """ from pesummary.io import write if "dataset_name" not in kwargs.keys(): kwargs["dataset_name"] = "discrete_pdf" write( ["x", "PDF"], np.array([self.x, self.probs]).T, *args, **kwargs )
[docs] class DiscretePDF2D(object): """Class to handle 2D discrete probabilities. Parameters ---------- args: list, optional 2d list of length 3. First element integers for the x axis, second element inters for the y axis and third element, the 2d joint probability density. Attributes ---------- x: np.ndarray array of integers for the x axis y: np.ndarray array of integers for the y axis probs: np.ndarray 2D array of probabilities for the x y join probability density Methods ------- marginalize: marginalize the 2D joint probability distribution to obtain the discrete probability density for x and y. Returns the probabilities as as a DiscretePDF2Dplus1D object level_from_confidence: return the level to use for plt.contour for a given confidence. minimum_encompassing_contour_level: return the minimum contour level that encompasses a specific point Examples -------- >>> from pesummary.utils.pdf import DiscretePDF2D >>> x = [1., 2., 3.] >>> y = [0.1, 0.2, 0.3] >>> probs = [ ... [1./9, 1./9, 1./9], ... [1./9, 1./9, 1./9], ... [1./9, 1./9, 1./9] ... ] >>> pdf = DiscretePDF2D(x, y, probs) >>> pdf.x [1.0, 2.0, 3.0] >>> pdf.y [0.1, 0.2, 0.3] >>> pdf.probs array([[0.11111111, 0.11111111, 0.11111111], [0.11111111, 0.11111111, 0.11111111], [0.11111111, 0.11111111, 0.11111111]]) """ def __init__(self, x, y, probability, **kwargs): self.x = x self.y = y self.dx = np.mean(np.diff(x)) self.dy = np.mean(np.diff(y)) self.probs = np.array(probability) if not self.probs.ndim == 2: raise ValueError("Please provide a 2d array of probabilities") if not np.isclose(np.sum(self.probs), 1.): raise ValueError("Probabilities do not sum to 1") def marginalize(self): """Marginalize the 2d probability distribution and return a DiscretePDF2Dplus1D object containing the probability distribution for x, y and xy """ probs_x = np.sum(self.probs, axis=0) * self.dy probs_x /= np.sum(probs_x) probs_y = np.sum(self.probs, axis=1) * self.dx probs_y /= np.sum(probs_y) return DiscretePDF2Dplus1D( self.x, self.y, [probs_x, probs_y, self.probs] ) def interpolate(self, interpolant=interp2d): """Interpolate the discrete pdf and return an InterpolatedPDF object Parameters ---------- interpolant: func function to use to interpolate the discrete pdf include_bounds: Bool, optional if True, pass the upper and lower bounds to InterpolatedPDF """ return InterpolatedPDF( interpolant=interp2d(self.x, self.y, self.probs), dimensionality=2 ) def sort(self, descending=True): """Flatten and sort the stored probabilities Parameters ---------- descending: Bool, optional if True, sort the probabilities in descending order """ _sorted = np.sort(self.probs.flatten()) if descending: return _sorted[::-1] return _sorted def cdf(self, normalize=True): """Return the cumulative distribution function Parameters ---------- normalize: Bool, optional if True, normalize the cumulative distribution function. Default True """ cumsum = np.cumsum(self.sort()) if normalize: cumsum /= np.sum(self.probs) return cumsum def level_from_confidence(self, confidence): """Return the level to use for plt.contour for a given confidence. Confidence must be less than 1. Parameters ---------- confidence: float/list float/list of confidences """ confidence = np.atleast_1d(confidence) if any(_c > 1 for _c in confidence): raise ValueError("confidence must be less than 1") _sorted = self.sort() idx = interp1d( self.cdf(), np.arange(len(_sorted)), bounds_error=False, fill_value=len(_sorted) )(confidence) level = interp1d( np.arange(len(_sorted)), _sorted, bounds_error=False, fill_value=0. )(idx) try: return sorted(level) except TypeError: return level def minimum_encompassing_contour_level(self, x, y, interpolate=False): """Return the minimum encompassing contour level that encompasses a specific point Parameters ---------- point: tuple the point you wish to find the minimum encompassing contour for """ _sorted = self.sort() if interpolate: _interp = self.interpolate() _idx = _interp.interpolant(x, y) else: _x = min( range(len(self.x)), key=lambda i: abs(self.x[i] - x) ) _y = min( range(len(self.y)), key=lambda i: abs(self.y[i] - y) ) _idx = [self.probs[_x, _y]] idx = interp1d( _sorted[::-1], np.arange(len(_sorted))[::-1], bounds_error=False, fill_value=len(_sorted) )(_idx) level = interp1d( np.arange(len(_sorted)), self.cdf(), bounds_error=False, fill_value=1. )(idx) return level def write(self, *args, include_1d=False, **kwargs): """Write the discrete PDF to file using the pesummary.io.write module Parameters ---------- *args: tuple all args passed to pesummary.io.write function include_1d: Bool, optional if True, save the 1D marginalized as well as the 2D PDF to file **kwargs: dict, optional all kwargs passed to the pesummary.io.write function """ from pesummary.io import write if not include_1d: if "dataset_name" not in kwargs.keys(): kwargs["dataset_name"] = "discrete_pdf" X, Y = np.meshgrid(self.x, self.y) write( ["x", "y", "PDF"], np.array([X.ravel(), Y.ravel(), self.probs.flatten()]).T, *args, **kwargs ) else: _pdf = self.marginalize() _pdf.write(*args, **kwargs)
[docs] class DiscretePDF2Dplus1D(object): """Class to handle 2D discrete probabilities alongside 1D discrete probability distributions. Parameters ---------- args: list, optional 3d list of length 3. First element integers for the x axis, second element inters for the y axis and third element, a list containing the probability distribution for x, y and the 2d join probability distribution xy. Attributes ---------- x: np.ndarray array of integers for the x axis y: np.ndarray array of integers for the y axis probs: np.ndarray 3D array of probabilities for the x axis, y axis and the xy joint probability density probs_x: DiscretePDF the probability density function for the x axis stored as a DiscretePDF object probs_y: DiscretePDF the probability density function for the y axis stored as a DiscretePDF object probs_xy: DiscretePDF2D the joint probability density function for the x and y axes stored as DiscretePDF2D object Methods ------- write: write the discrete PDF to file using the pesummary.io.write module """ def __init__(self, x, y, probabilities, **kwargs): self.x = x self.y = y self.probs = [np.array(_p) for _p in probabilities] if len(self.probs) != 3: raise ValueError( "Please provide a tuple of length 3. Probabilities for x " "y and xy" ) if not any(_p.ndim == 2 for _p in self.probs): raise ValueError("Please provide the probabilities for xy") if not len([_p for _p in self.probs if _p.ndim == 1]) == 2: raise ValueError( "2 probabilities array must be 1 dimensional and 1 2 dimensional" ) _x = 0. for num, _p in enumerate(self.probs): if _p.ndim == 1 and _x == 0: self.probs_x = DiscretePDF(self.x, _p) self.probs[num] = self.probs_x _x = 1. elif _p.ndim == 1: self.probs_y = DiscretePDF(self.y, _p) self.probs[num] = self.probs_y else: self.probs_xy = DiscretePDF2D(self.x, self.y, _p) self.probs[num] = self.probs_xy def write(self, *args, **kwargs): """Write the 1D and 2D discrete PDF to file using the pesummary.io.write module Parameters ---------- *args: tuple all args passed to pesummary.io.write function **kwargs: dict, optional all kwargs passed to the pesummary.io.write function """ if "filename" in kwargs.keys() and not isinstance(kwargs["filename"], dict): raise ValueError( "Please provide filenames as a dictionary with keys '1d' and " "'2d'" ) elif "filename" in kwargs.keys(): if not all(k in ["1d", "2d"] for k in kwargs["filename"].keys()): raise ValueError("Filename must be keyed by '1d' and/or '2d'") else: _format = "dat" if "file_format" not in kwargs.keys() else kwargs[ "file_format" ] _default = "pesummary_{}_pdf.%s" % (_format) kwargs["filename"] = { "1d": _default.format("1d"), "2d": _default.format("2d") } _filenames = kwargs.pop("filename") self.probs_x.write(*args, filename=_filenames["1d"], **kwargs) self.probs_xy.write(*args, filename=_filenames["2d"], **kwargs)