Coverage for pesummary/utils/credible_interval.py: 50.7%
67 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-12-09 22:34 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-12-09 22:34 +0000
1# Licensed under an MIT style license -- see LICENSE.md
3import numpy as np
4from scipy.stats import gaussian_kde
5import scipy.integrate as integrate
8def weighted_credible_interval(samples, percentile, weights):
9 """Compute the credible interval from a set of weighted samples.
11 Parameters
12 ----------
13 samples: np.ndarray
14 array of samples you wish to calculate the credible interval for
15 percentile: float, list
16 either a float, or a list of floats, giving the percentile you wish to
17 use
18 weights: np.ndarray
19 array of weights of length samples
20 """
21 float_type = isinstance(percentile, (float, int, np.number))
22 percentile = np.array(percentile).astype(float)
23 weights = np.asarray(weights)
24 if percentile.ndim < 1:
25 percentile = np.array([percentile])
26 ind_sorted = np.argsort(samples)
27 sorted_data = samples[ind_sorted]
28 sorted_weights = weights[ind_sorted]
29 Sn = 100 * sorted_weights.cumsum() / sorted_weights.sum()
30 data = np.zeros_like(percentile)
31 for num, p in enumerate(percentile):
32 inds = np.argwhere(Sn >= p)[0]
33 data[num] = np.interp(percentile, Sn[inds], sorted_data[inds])[0]
35 # conserve input type
36 if float_type:
37 return float(data[0])
38 return data
40def credible_interval(samples, percentile, weights=None):
41 """Compute the credible interval for a set of samples.
43 Parameters
44 ----------
45 samples: np.ndarray
46 array of samples you wish to calculate the credible interval for
47 percentile: float, list
48 percentile(s) you wish to compute.
49 weights: np.ndarray, optional
50 array of weights of length samples
51 """
52 if weights is None:
53 return np.percentile(samples, percentile)
54 return weighted_credible_interval(samples, percentile, weights)
57def two_sided_credible_interval(samples, percentile, weights=None):
58 """Compute the 2-sided credible interval from a set of samples.
60 Parameters
61 ----------
62 samples: np.ndarray
63 array of samples you wish to calculate the credible interval for
64 percentile: float, list
65 percentile(s) you wish to compute. If a single value if provided,
66 the upper bound is defined as 50 + percentile / 2 and the lower
67 bound is defined as 50 - percentile / 2. If a list of 2 values
68 if provided, the first is assumed to be the lower bound and
69 the second is assumed to be the upper bound
70 weights: np.ndarray, optional
71 array of weights of length samples
72 """
73 percentile = np.array(percentile).astype(float)
74 if percentile.ndim and len(percentile) > 2:
75 raise ValueError("Please provide a single percentile to compute")
76 elif percentile.ndim and len(percentile) == 1:
77 percentile = np.array(percentile[0]).astype(float)
78 if not percentile.ndim:
79 percentile = [50 - percentile / 2, 50 + percentile / 2]
80 return credible_interval(samples, percentile, weights=weights)
82def hpd_two_sided_credible_interval(
83 samples, percentile, weights=None, xlow=None, xhigh=None, xN=1000,
84 kde=gaussian_kde, kde_kwargs={}, x=None, pdf=None
85):
86 """Compute the highest posterior density (HPD) 2-sided credible interval
87 from a set of samples. Code adapted from BNS_plots.ipynb located here:
88 https://git.ligo.org/publications/O2/cbc-catalog/-/blob/master/event_plots
90 Parameters
91 ----------
92 samples: np.ndarray
93 array of samples you wish to calculate the credible interval for
94 percentile: float
95 the percentile you wish to use
96 weights: np.ndarray, optional
97 array of weights of length samples. These weights are added to
98 `kde_kwargs` and passed to kde. Only used if pdf=None
99 xlow: float, optional
100 minimum value to evaluate the KDE. If not provided, and x=None, xlow is
101 set to the minimum sample
102 xhigh: float, optional
103 maximum value to evaluate the KDE. If not provided, and x=None, xhigh is
104 set to the maximum sample
105 xN: float, optional
106 number of points to evaluate the KDE between xlow and xhigh. Only used
107 if x=None. Default 1000
108 kde: func, optional
109 function to use to generate the KDE. Default scipy.stats.gaussian_kde
110 kde_kwargs: dict, optional
111 kwargs to pass to kde. Default {}
112 x: np.ndarray, optional
113 array of points to evaluate the KDE. If not provided,
114 x=np.linspace(xlow, xhigh, xN). Default None.
115 pdf: np.ndarray, optional
116 the PDF to use when calculating the 2-sided credible interval. If
117 provided, kde and kde_kwargs are not used. The array of points used to
118 evaluate the PDF must be provided. Default None
119 """
120 percentile = np.array(percentile).astype(float)
121 if percentile.ndim >= 1:
122 raise ValueError(
123 "Unable to pass more than one percentile when using the highest"
124 "posterior density method"
125 )
126 if xlow is None and x is None:
127 xlow = np.min(samples)
128 elif xlow is None:
129 xlow = np.min(x)
130 if xhigh is None and x is None:
131 xhigh = np.max(samples)
132 elif xhigh is None:
133 xhigh = np.max(x)
134 if pdf is not None and x is None:
135 raise ValueError(
136 "Please provide the array of points used to evaluate the PDF"
137 )
138 if x is None:
139 x = np.linspace(xlow, xhigh, xN)
140 if pdf is None:
141 if weights is not None:
142 kde_kwargs.update({"weights": weights})
143 pdf = kde(samples, **kde_kwargs)(x)
145 credible_interval = []
146 ilow = 0
147 ihigh = len(x) - 1
148 xlow, xhigh = x[ilow], x[ihigh]
149 area = 1.0
150 while area > (percentile / 100.0):
151 xlow, xhigh = x[ilow], x[ihigh]
152 x_interval = x[ilow:ihigh + 1]
153 pdf_interval = pdf[ilow:ihigh + 1]
154 if pdf[ilow] > pdf[ihigh]:
155 ihigh -= 1
156 else:
157 ilow += 1
158 area = integrate.simpson(pdf_interval, x=x_interval)
159 return np.array([xlow, xhigh]), area