Coverage for pesummary/core/file/formats/hdf5.py: 58.1%
86 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-12-09 22:34 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-12-09 22:34 +0000
1# Licensed under an MIT style license -- see LICENSE.md
3import h5py
4import numpy as np
5from pesummary.core.file.formats.base_read import Read
6from pesummary.utils.dict import load_recursively, paths_to_key
8__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"]
11def read_hdf5(path, **kwargs):
12 """Grab the parameters and samples in a .hdf5 file
14 Parameters
15 ----------
16 path: str
17 path to the result file you wish to read in
18 kwargs: dict
19 all kwargs passed to _read_hdf5_with_deepdish or _read_hdf5_with_h5py functions
20 """
21 try:
22 return _read_hdf5_with_deepdish(path, **kwargs)
23 except Exception:
24 return _read_hdf5_with_h5py(path, **kwargs)
27def _read_hdf5_with_deepdish(path, remove_params=None, path_to_samples=None):
28 """Grab the parameters and samples in a .hdf5 file with deepdish
30 Parameters
31 ----------
32 path: str
33 path to the result file you wish to read in
34 remove_params: list, optional
35 parameters you wish to remove from the posterior table
36 """
37 import deepdish
39 f = deepdish.io.load(path)
40 if path_to_samples is None:
41 try:
42 path_to_samples, = paths_to_key("posterior", f)
43 path_to_samples = path_to_samples[0]
44 except ValueError:
45 try:
46 path_to_samples, = paths_to_key("posterior_samples", f)
47 path_to_samples = path_to_samples[0]
48 except ValueError:
49 raise ValueError(
50 "Unable to find a 'posterior' or 'posterior_samples' group "
51 "in the file '{}'".format(path)
52 )
53 reduced_f, = load_recursively(path_to_samples, f)
54 parameters = [i for i in reduced_f.keys()]
55 if remove_params is not None:
56 for param in remove_params:
57 if param in parameters:
58 parameters.remove(param)
59 data = np.zeros([len(reduced_f[parameters[0]]), len(parameters)])
60 for num, par in enumerate(parameters):
61 for key, i in enumerate(reduced_f[par]):
62 data[key][num] = float(np.real(i))
63 data = data.tolist()
64 for num, par in enumerate(parameters):
65 if par == "logL":
66 parameters[num] = "log_likelihood"
67 return parameters, data
70def _read_hdf5_with_h5py(
71 path, remove_params=None, path_to_samples=None,
72 return_posterior_dataset=False
73):
74 """Grab the parameters and samples in a .hdf5 file with h5py
76 Parameters
77 ----------
78 path: str
79 path to the result file you wish to read in
80 remove_params: list, optional
81 parameters you wish to remove from the posterior table
82 """
83 import h5py
84 import copy
86 if path_to_samples is None:
87 path_to_samples = Read.guess_path_to_samples(path)
89 f = h5py.File(path, 'r')
90 c1 = isinstance(f[path_to_samples], h5py._hl.group.Group)
91 if c1 and "parameter_names" not in f[path_to_samples].keys():
92 original_parameters = [i for i in f[path_to_samples].keys()]
93 if remove_params is not None:
94 parameters = [
95 i for i in original_parameters if i not in remove_params
96 ]
97 else:
98 parameters = copy.deepcopy(original_parameters)
99 n_samples = len(f[path_to_samples][parameters[0]])
100 try:
101 samples = np.array([
102 f[path_to_samples][original_parameters.index(i)] for i in
103 parameters
104 ]).T
105 except (AttributeError, KeyError, TypeError):
106 samples = np.array([f[path_to_samples][i] for i in parameters]).T
107 cond1 = "loglr" not in parameters or "log_likelihood" not in \
108 parameters
109 cond2 = "likelihood_stats" in f.keys() and "loglr" in \
110 f["likelihood_stats"]
111 if cond1 and cond2:
112 parameters.append("log_likelihood")
113 for num, i in enumerate(samples):
114 samples[num].append(float(f["likelihood_stats/loglr"][num]))
115 elif c1:
116 original_parameters = [
117 i.decode("utf-8") if isinstance(i, bytes) else i for i in
118 f[path_to_samples]["parameter_names"]
119 ]
120 if remove_params is not None:
121 parameters = [
122 i for i in original_parameters if i not in remove_params
123 ]
124 else:
125 parameters = copy.deepcopy(original_parameters)
126 samples = np.array(f[path_to_samples]["samples"])
127 elif isinstance(f[path_to_samples], h5py._hl.dataset.Dataset):
128 parameters = list(f[path_to_samples].dtype.names)
129 samples = np.array(f[path_to_samples]).view((float, len(parameters))).tolist()
130 if return_posterior_dataset:
131 return parameters, samples, f[path_to_samples]
132 f.close()
133 return parameters, samples
136def _write_hdf5(
137 parameters, samples, outdir="./", label=None, filename=None, overwrite=False,
138 dataset_name="posterior_samples", **kwargs
139):
140 """Write a set of samples to a hdf5 file
142 Parameters
143 ----------
144 parameters: list
145 list of parameters
146 samples: 2d list
147 list of samples. Columns correspond to a given parameter
148 outdir: str, optional
149 directory to write the dat file
150 label: str, optional
151 The label of the analysis. This is used in the filename if a filename
152 if not specified
153 filename: str, optional
154 The name of the file that you wish to write
155 overwrite: Bool, optional
156 If True, an existing file of the same name will be overwritten
157 dataset_name: str, optional
158 name of the dataset to store a set of samples. Default posterior_samples
159 """
160 from pesummary.utils.samples_dict import SamplesDict
161 from pesummary.utils.utils import check_filename
163 default_filename = "pesummary_{}.h5"
164 filename = check_filename(
165 default_filename=default_filename, outdir=outdir, label=label, filename=filename,
166 overwrite=overwrite
167 )
168 samples = SamplesDict(parameters, np.array(samples).T)
169 _samples = samples.to_structured_array()
170 with h5py.File(filename, "w") as f:
171 f.create_dataset(dataset_name, data=_samples)
174def write_hdf5(
175 parameters, samples, outdir="./", label=None, filename=None, overwrite=False,
176 **kwargs
177):
178 """Write a set of samples to a hdf5 file
180 Parameters
181 ----------
182 parameters: list
183 list of parameters
184 samples: 2d list
185 list of samples. Columns correspond to a given parameter
186 outdir: str, optional
187 directory to write the dat file
188 label: str, optional
189 The label of the analysis. This is used in the filename if a filename
190 if not specified
191 filename: str, optional
192 The name of the file that you wish to write
193 overwrite: Bool, optional
194 If True, an existing file of the same name will be overwritten
195 """
196 from pesummary.io.write import _multi_analysis_write
198 _multi_analysis_write(
199 _write_hdf5, parameters, samples, outdir=outdir, label=label,
200 filename=filename, overwrite=overwrite, file_format="hdf5", **kwargs
201 )