Coverage for pesummary/core/file/formats/hdf5.py: 79.1%

86 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-05-02 08:42 +0000

1# Licensed under an MIT style license -- see LICENSE.md 

2 

3import h5py 

4import numpy as np 

5from pesummary.core.file.formats.base_read import Read 

6from pesummary.utils.dict import load_recursively, paths_to_key 

7 

8__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"] 

9 

10 

11def read_hdf5(path, **kwargs): 

12 """Grab the parameters and samples in a .hdf5 file 

13 

14 Parameters 

15 ---------- 

16 path: str 

17 path to the result file you wish to read in 

18 kwargs: dict 

19 all kwargs passed to _read_hdf5_with_deepdish or _read_hdf5_with_h5py functions 

20 """ 

21 try: 

22 return _read_hdf5_with_deepdish(path, **kwargs) 

23 except Exception: 

24 return _read_hdf5_with_h5py(path, **kwargs) 

25 

26 

27def _read_hdf5_with_deepdish(path, remove_params=None, path_to_samples=None): 

28 """Grab the parameters and samples in a .hdf5 file with deepdish 

29 

30 Parameters 

31 ---------- 

32 path: str 

33 path to the result file you wish to read in 

34 remove_params: list, optional 

35 parameters you wish to remove from the posterior table 

36 """ 

37 import deepdish 

38 

39 f = deepdish.io.load(path) 

40 if path_to_samples is None: 

41 try: 

42 path_to_samples, = paths_to_key("posterior", f) 

43 path_to_samples = path_to_samples[0] 

44 except ValueError: 

45 try: 

46 path_to_samples, = paths_to_key("posterior_samples", f) 

47 path_to_samples = path_to_samples[0] 

48 except ValueError: 

49 raise ValueError( 

50 "Unable to find a 'posterior' or 'posterior_samples' group " 

51 "in the file '{}'".format(path) 

52 ) 

53 reduced_f, = load_recursively(path_to_samples, f) 

54 parameters = [i for i in reduced_f.keys()] 

55 if remove_params is not None: 

56 for param in remove_params: 

57 if param in parameters: 

58 parameters.remove(param) 

59 data = np.zeros([len(reduced_f[parameters[0]]), len(parameters)]) 

60 for num, par in enumerate(parameters): 

61 for key, i in enumerate(reduced_f[par]): 

62 data[key][num] = float(np.real(i)) 

63 data = data.tolist() 

64 for num, par in enumerate(parameters): 

65 if par == "logL": 

66 parameters[num] = "log_likelihood" 

67 return parameters, data 

68 

69 

70def _read_hdf5_with_h5py( 

71 path, remove_params=None, path_to_samples=None, 

72 return_posterior_dataset=False 

73): 

74 """Grab the parameters and samples in a .hdf5 file with h5py 

75 

76 Parameters 

77 ---------- 

78 path: str 

79 path to the result file you wish to read in 

80 remove_params: list, optional 

81 parameters you wish to remove from the posterior table 

82 """ 

83 import h5py 

84 import copy 

85 

86 if path_to_samples is None: 

87 path_to_samples = Read.guess_path_to_samples(path) 

88 

89 f = h5py.File(path, 'r') 

90 c1 = isinstance(f[path_to_samples], h5py._hl.group.Group) 

91 if c1 and "parameter_names" not in f[path_to_samples].keys(): 

92 original_parameters = [i for i in f[path_to_samples].keys()] 

93 if remove_params is not None: 

94 parameters = [ 

95 i for i in original_parameters if i not in remove_params 

96 ] 

97 else: 

98 parameters = copy.deepcopy(original_parameters) 

99 n_samples = len(f[path_to_samples][parameters[0]]) 

100 try: 

101 samples = np.array([ 

102 f[path_to_samples][original_parameters.index(i)] for i in 

103 parameters 

104 ]).T 

105 except (AttributeError, KeyError, TypeError): 

106 samples = np.array([f[path_to_samples][i] for i in parameters]).T 

107 cond1 = "loglr" not in parameters or "log_likelihood" not in \ 

108 parameters 

109 cond2 = "likelihood_stats" in f.keys() and "loglr" in \ 

110 f["likelihood_stats"] 

111 if cond1 and cond2: 

112 parameters.append("log_likelihood") 

113 for num, i in enumerate(samples): 

114 samples[num].append(float(f["likelihood_stats/loglr"][num])) 

115 elif c1: 

116 original_parameters = [ 

117 i.decode("utf-8") if isinstance(i, bytes) else i for i in 

118 f[path_to_samples]["parameter_names"] 

119 ] 

120 if remove_params is not None: 

121 parameters = [ 

122 i for i in original_parameters if i not in remove_params 

123 ] 

124 else: 

125 parameters = copy.deepcopy(original_parameters) 

126 samples = np.array(f[path_to_samples]["samples"]) 

127 elif isinstance(f[path_to_samples], h5py._hl.dataset.Dataset): 

128 parameters = list(f[path_to_samples].dtype.names) 

129 samples = np.array(f[path_to_samples]).view((float, len(parameters))).tolist() 

130 if return_posterior_dataset: 

131 return parameters, samples, f[path_to_samples] 

132 f.close() 

133 return parameters, samples 

134 

135 

136def _write_hdf5( 

137 parameters, samples, outdir="./", label=None, filename=None, overwrite=False, 

138 dataset_name="posterior_samples", **kwargs 

139): 

140 """Write a set of samples to a hdf5 file 

141 

142 Parameters 

143 ---------- 

144 parameters: list 

145 list of parameters 

146 samples: 2d list 

147 list of samples. Columns correspond to a given parameter 

148 outdir: str, optional 

149 directory to write the dat file 

150 label: str, optional 

151 The label of the analysis. This is used in the filename if a filename 

152 if not specified 

153 filename: str, optional 

154 The name of the file that you wish to write 

155 overwrite: Bool, optional 

156 If True, an existing file of the same name will be overwritten 

157 dataset_name: str, optional 

158 name of the dataset to store a set of samples. Default posterior_samples 

159 """ 

160 from pesummary.utils.samples_dict import SamplesDict 

161 from pesummary.utils.utils import check_filename 

162 

163 default_filename = "pesummary_{}.h5" 

164 filename = check_filename( 

165 default_filename=default_filename, outdir=outdir, label=label, filename=filename, 

166 overwrite=overwrite 

167 ) 

168 samples = SamplesDict(parameters, np.array(samples).T) 

169 _samples = samples.to_structured_array() 

170 with h5py.File(filename, "w") as f: 

171 f.create_dataset(dataset_name, data=_samples) 

172 

173 

174def write_hdf5( 

175 parameters, samples, outdir="./", label=None, filename=None, overwrite=False, 

176 **kwargs 

177): 

178 """Write a set of samples to a hdf5 file 

179 

180 Parameters 

181 ---------- 

182 parameters: list 

183 list of parameters 

184 samples: 2d list 

185 list of samples. Columns correspond to a given parameter 

186 outdir: str, optional 

187 directory to write the dat file 

188 label: str, optional 

189 The label of the analysis. This is used in the filename if a filename 

190 if not specified 

191 filename: str, optional 

192 The name of the file that you wish to write 

193 overwrite: Bool, optional 

194 If True, an existing file of the same name will be overwritten 

195 """ 

196 from pesummary.io.write import _multi_analysis_write 

197 

198 _multi_analysis_write( 

199 _write_hdf5, parameters, samples, outdir=outdir, label=label, 

200 filename=filename, overwrite=overwrite, file_format="hdf5", **kwargs 

201 )