Coverage for pesummary/utils/credible_interval.py: 76.1%

67 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-05-02 08:42 +0000

1# Licensed under an MIT style license -- see LICENSE.md 

2 

3import numpy as np 

4from scipy.stats import gaussian_kde 

5import scipy.integrate as integrate 

6 

7 

8def weighted_credible_interval(samples, percentile, weights): 

9 """Compute the credible interval from a set of weighted samples. 

10 

11 Parameters 

12 ---------- 

13 samples: np.ndarray 

14 array of samples you wish to calculate the credible interval for 

15 percentile: float, list 

16 either a float, or a list of floats, giving the percentile you wish to 

17 use 

18 weights: np.ndarray 

19 array of weights of length samples 

20 """ 

21 float_type = isinstance(percentile, (float, int, np.number)) 

22 percentile = np.array(percentile).astype(float) 

23 weights = np.asarray(weights) 

24 if percentile.ndim < 1: 

25 percentile = np.array([percentile]) 

26 ind_sorted = np.argsort(samples) 

27 sorted_data = samples[ind_sorted] 

28 sorted_weights = weights[ind_sorted] 

29 Sn = 100 * sorted_weights.cumsum() / sorted_weights.sum() 

30 data = np.zeros_like(percentile) 

31 for num, p in enumerate(percentile): 

32 inds = np.argwhere(Sn >= p)[0] 

33 data[num] = np.interp(percentile, Sn[inds], sorted_data[inds])[0] 

34 

35 # conserve input type 

36 if float_type: 

37 return float(data[0]) 

38 return data 

39 

40def credible_interval(samples, percentile, weights=None): 

41 """Compute the credible interval for a set of samples. 

42 

43 Parameters 

44 ---------- 

45 samples: np.ndarray 

46 array of samples you wish to calculate the credible interval for 

47 percentile: float, list 

48 percentile(s) you wish to compute. 

49 weights: np.ndarray, optional 

50 array of weights of length samples 

51 """ 

52 if weights is None: 

53 return np.percentile(samples, percentile) 

54 return weighted_credible_interval(samples, percentile, weights) 

55 

56 

57def two_sided_credible_interval(samples, percentile, weights=None): 

58 """Compute the 2-sided credible interval from a set of samples. 

59 

60 Parameters 

61 ---------- 

62 samples: np.ndarray 

63 array of samples you wish to calculate the credible interval for 

64 percentile: float, list 

65 percentile(s) you wish to compute. If a single value if provided, 

66 the upper bound is defined as 50 + percentile / 2 and the lower 

67 bound is defined as 50 - percentile / 2. If a list of 2 values 

68 if provided, the first is assumed to be the lower bound and 

69 the second is assumed to be the upper bound 

70 weights: np.ndarray, optional 

71 array of weights of length samples 

72 """ 

73 percentile = np.array(percentile).astype(float) 

74 if percentile.ndim and len(percentile) > 2: 

75 raise ValueError("Please provide a single percentile to compute") 

76 elif percentile.ndim and len(percentile) == 1: 

77 percentile = np.array(percentile[0]).astype(float) 

78 if not percentile.ndim: 

79 percentile = [50 - percentile / 2, 50 + percentile / 2] 

80 return credible_interval(samples, percentile, weights=weights) 

81 

82def hpd_two_sided_credible_interval( 

83 samples, percentile, weights=None, xlow=None, xhigh=None, xN=1000, 

84 kde=gaussian_kde, kde_kwargs={}, x=None, pdf=None 

85): 

86 """Compute the highest posterior density (HPD) 2-sided credible interval 

87 from a set of samples. Code adapted from BNS_plots.ipynb located here: 

88 https://git.ligo.org/publications/O2/cbc-catalog/-/blob/master/event_plots 

89 

90 Parameters 

91 ---------- 

92 samples: np.ndarray 

93 array of samples you wish to calculate the credible interval for 

94 percentile: float 

95 the percentile you wish to use 

96 weights: np.ndarray, optional 

97 array of weights of length samples. These weights are added to 

98 `kde_kwargs` and passed to kde. Only used if pdf=None 

99 xlow: float, optional 

100 minimum value to evaluate the KDE. If not provided, and x=None, xlow is 

101 set to the minimum sample 

102 xhigh: float, optional 

103 maximum value to evaluate the KDE. If not provided, and x=None, xhigh is 

104 set to the maximum sample 

105 xN: float, optional 

106 number of points to evaluate the KDE between xlow and xhigh. Only used 

107 if x=None. Default 1000 

108 kde: func, optional 

109 function to use to generate the KDE. Default scipy.stats.gaussian_kde 

110 kde_kwargs: dict, optional 

111 kwargs to pass to kde. Default {} 

112 x: np.ndarray, optional 

113 array of points to evaluate the KDE. If not provided, 

114 x=np.linspace(xlow, xhigh, xN). Default None. 

115 pdf: np.ndarray, optional 

116 the PDF to use when calculating the 2-sided credible interval. If 

117 provided, kde and kde_kwargs are not used. The array of points used to 

118 evaluate the PDF must be provided. Default None 

119 """ 

120 percentile = np.array(percentile).astype(float) 

121 if percentile.ndim >= 1: 

122 raise ValueError( 

123 "Unable to pass more than one percentile when using the highest" 

124 "posterior density method" 

125 ) 

126 if xlow is None and x is None: 

127 xlow = np.min(samples) 

128 elif xlow is None: 

129 xlow = np.min(x) 

130 if xhigh is None and x is None: 

131 xhigh = np.max(samples) 

132 elif xhigh is None: 

133 xhigh = np.max(x) 

134 if pdf is not None and x is None: 

135 raise ValueError( 

136 "Please provide the array of points used to evaluate the PDF" 

137 ) 

138 if x is None: 

139 x = np.linspace(xlow, xhigh, xN) 

140 if pdf is None: 

141 if weights is not None: 

142 kde_kwargs.update({"weights": weights}) 

143 pdf = kde(samples, **kde_kwargs)(x) 

144 

145 credible_interval = [] 

146 ilow = 0 

147 ihigh = len(x) - 1 

148 xlow, xhigh = x[ilow], x[ihigh] 

149 area = 1.0 

150 while area > (percentile / 100.0): 

151 xlow, xhigh = x[ilow], x[ihigh] 

152 x_interval = x[ilow:ihigh + 1] 

153 pdf_interval = pdf[ilow:ihigh + 1] 

154 if pdf[ilow] > pdf[ihigh]: 

155 ihigh -= 1 

156 else: 

157 ilow += 1 

158 area = integrate.simps(pdf_interval, x_interval) 

159 return np.array([xlow, xhigh]), area