Coverage for pesummary/core/file/formats/pesummary.py: 80.5%
344 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-12-09 22:34 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-12-09 22:34 +0000
1# Licensed under an MIT style license -- see LICENSE.md
3from glob import glob
4import os
5import h5py
6import json
7import numpy as np
9from pesummary.core.file.formats.base_read import MultiAnalysisRead
10from pesummary.utils.samples_dict import (
11 MCMCSamplesDict, MultiAnalysisSamplesDict, SamplesDict, Array
12)
13from pesummary.utils.dict import load_recursively
14from pesummary.utils.decorators import deprecation
16__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"]
19def write_pesummary(
20 *args, cls=None, outdir="./", label=None, config=None, injection_data=None,
21 file_kwargs=None, file_versions=None, mcmc_samples=False, hdf5=True, **kwargs
22):
23 """Write a set of samples to a pesummary file
25 Parameters
26 ----------
27 args: tuple
28 either a 2d tuple containing the parameters as first argument and samples
29 as the second argument, or a SamplesDict object containing the samples
30 cls: class, optional
31 PESummary metafile class to use
32 outdir: str, optional
33 directory to write the dat file
34 label: str, optional
35 The label of the analysis. This is used in the filename if a filename
36 if not specified
37 config: dict, optional
38 configuration file that you wish to save to file
39 injection_data: dict, optional
40 dictionary containing the injection values that you wish to save to file keyed
41 by parameter
42 file_kwargs: dict, optional
43 any kwargs that you wish to save to file
44 file_versions: dict, optional
45 version of the data you wish to save to file
46 mcmc_samples: Bool, optional
47 if True, the set of samples provided are from multiple MCMC chains
48 hdf5: Bool, optional
49 if True, save the pesummary file in hdf5 format
50 kwargs: dict
51 all other kwargs are passed to the pesummary.core.file.meta_file._MetaFile class
52 """
53 from pesummary.core.file.meta_file import _MetaFile
55 if cls is None:
56 cls = _MetaFile
58 default_label = "dataset"
59 if label is None:
60 labels = [default_label]
61 elif not isinstance(label, str):
62 raise ValueError("label must be a string")
63 else:
64 labels = [label]
66 if isinstance(args[0], MultiAnalysisSamplesDict):
67 labels = list(args[0].keys())
68 samples = args[0]
69 elif isinstance(args[0], (SamplesDict, MCMCSamplesDict)):
70 _samples = args[0]
71 if isinstance(args[0], SamplesDict):
72 mcmc_samples = False
73 else:
74 mcmc_samples = True
75 else:
76 _parameters, _samples = args
77 _samples = np.array(_samples).T
78 if mcmc_samples:
79 _samples = MCMCSamplesDict(_parameters, _samples)
80 elif len(_samples.shape) != 2:
81 raise ValueError(
82 "samples must be a 2 dimensional array. If you wish to save more "
83 "than one analysis to file, please provide the samples as a "
84 "pesummary.utils.samples_dict.MultiAnalysisSamplesDict object. If "
85 "you wish to save mcmc chains to file, please add the "
86 "mcmc_samples=True argument"
87 )
88 else:
89 _samples = SamplesDict(_parameters, _samples)
91 try:
92 samples = {labels[0]: _samples}
93 except NameError:
94 pass
96 if file_kwargs is None:
97 file_kwargs = {
98 label: {"sampler": {}, "meta_data": {}} for label in labels
99 }
100 elif not all(label in file_kwargs.keys() for label in labels):
101 file_kwargs = {label: file_kwargs for label in labels}
103 if file_versions is None or isinstance(file_versions, str):
104 file_versions = {label: "No version information found" for label in labels}
105 elif not all(label in file_versions.keys() for label in labels):
106 file_versions = {label: file_versions for label in labels}
108 if injection_data is None:
109 injection_data = {
110 label: {
111 param: float("nan") for param in samples[label].keys()
112 } for label in samples.keys()
113 }
114 elif not all(label in injection_data.keys() for label in labels):
115 injection_data = {label: injection_data for label in labels}
117 if config is None:
118 config = [None for label in labels]
119 elif isinstance(config, dict):
120 config = [config]
121 obj = cls(
122 samples, labels, config, injection_data, file_versions, file_kwargs,
123 mcmc_samples=mcmc_samples, outdir=outdir, hdf5=hdf5, **kwargs
124 )
125 obj.make_dictionary()
126 if not hdf5:
127 obj.save_to_json(obj.data, obj.meta_file)
128 else:
129 obj.save_to_hdf5(
130 obj.data, obj.labels, obj.samples, obj.meta_file,
131 mcmc_samples=mcmc_samples
132 )
135class PESummary(MultiAnalysisRead):
136 """This class handles the existing posterior_samples.h5 file
138 Parameters
139 ----------
140 path_to_results_file: str
141 path to the results file you wish to load
142 remove_nan_likelihood_samples: Bool, optional
143 if True, remove samples which have log_likelihood='nan'. Default True
145 Attributes
146 ----------
147 parameters: nd list
148 list of parameters stored in the result file for each analysis stored
149 in the result file
150 samples: 3d list
151 list of samples stored in the result file for each analysis stored
152 in the result file
153 samples_dict: nested dict
154 nested dictionary of samples stored in the result file keyed by their
155 respective label
156 input_version: str
157 version of the result file passed.
158 extra_kwargs: list
159 list of dictionaries containing kwargs that were extracted from each
160 analysis
161 injection_parameters: list
162 list of dictionaries of injection parameters for each analysis
163 injection_dict: dict
164 dictionary containing the injection parameters keyed by their respective
165 label
166 prior: dict
167 dictionary of prior samples stored in the result file
168 config: dict
169 dictionary containing the configuration file stored in the result file
170 labels: list
171 list of analyses stored in the result file
172 weights: dict
173 dictionary of weights for each sample for each analysis
174 pe_algorithm: dict
175 name of the algorithm used to generate the each analysis
176 preferred: str
177 name of the preferred analysis in the result file
179 Methods
180 -------
181 samples_dict_for_label: dict
182 dictionary of samples for a specific analysis
183 reduced_samples_dict: dict
184 dictionary of samples for one or more analyses
185 to_dat:
186 save the posterior samples to a .dat file
187 to_bilby:
188 convert the posterior samples to a bilby.core.result.Result object
189 to_latex_table:
190 convert the posterior samples to a latex table
191 generate_latex_macros:
192 generate a set of latex macros for the stored posterior samples
193 write_config_to_file:
194 write the config file stored in the result file to file
195 """
196 def __init__(self, path_to_results_file, **kwargs):
197 super(PESummary, self).__init__(path_to_results_file, **kwargs)
198 self.load(self._grab_data_from_pesummary_file, **self.load_kwargs)
200 @property
201 def load_kwargs(self):
202 return dict()
204 @property
205 def pe_algorithm(self):
206 _algorithm = {label: None for label in self.labels}
207 for num, _kwargs in enumerate(self.extra_kwargs):
208 _label = self.labels[num]
209 try:
210 _algorithm[_label] = _kwargs["sampler"]["pe_algorithm"]
211 except KeyError:
212 pass
213 return _algorithm
215 @property
216 def preferred(self):
217 _preferred = None
218 for num, _kwargs in enumerate(self.extra_kwargs):
219 if "other" in _kwargs.keys() and "preferred" in _kwargs["other"].keys():
220 import ast
221 if ast.literal_eval(_kwargs["other"]["preferred"]):
222 _preferred = self.labels[num]
223 break
224 if _preferred is None and len(self.labels) == 1:
225 _preferred = self.labels[0]
226 return _preferred
228 @classmethod
229 def load_file(cls, path, **kwargs):
230 if os.path.isdir(path):
231 files = glob(path + "/*")
232 if "home.html" in files:
233 path = glob(path + "/samples/posterior_samples*")[0]
234 else:
235 raise FileNotFoundError(
236 "Unable to find a file called 'posterior_samples' in "
237 "the directory %s" % (path + "/samples"))
238 return super(PESummary, cls).load_file(path, **kwargs)
240 @staticmethod
241 def _grab_data_from_pesummary_file(path, **kwargs):
242 """
243 """
244 func_map = {"h5": PESummary._grab_data_from_hdf5_file,
245 "hdf5": PESummary._grab_data_from_hdf5_file,
246 "json": PESummary._grab_data_from_json_file}
247 return func_map[MultiAnalysisRead.extension_from_path(path)](path, **kwargs)
249 @staticmethod
250 def _convert_hdf5_to_dict(dictionary, path="/"):
251 """
252 """
253 mydict = {}
254 for key, item in dictionary[path].items():
255 if isinstance(item, h5py._hl.dataset.Dataset):
256 _attrs = dict(item.attrs)
257 if len(_attrs):
258 mydict["{}_attrs".format(key)] = _attrs
259 mydict[key] = np.array(item)
260 elif isinstance(item, h5py._hl.group.Group):
261 mydict[key] = PESummary._convert_hdf5_to_dict(
262 dictionary, path=path + key + "/")
263 return mydict
265 @staticmethod
266 def _grab_data_from_hdf5_file(path, **kwargs):
267 """
268 """
269 function = kwargs.get(
270 "grab_data_from_dictionary", PESummary._grab_data_from_dictionary)
271 f = h5py.File(path, 'r')
272 data = PESummary._convert_hdf5_to_dict(f)
273 existing_data = function(data)
274 f.close()
275 return existing_data
277 @staticmethod
278 def _grab_data_from_json_file(path, **kwargs):
279 function = kwargs.get(
280 "grab_data_from_dictionary", PESummary._grab_data_from_dictionary)
281 with open(path) as f:
282 data = json.load(f)
283 return function(data)
285 @staticmethod
286 def _grab_data_from_dictionary(dictionary, ignore=[]):
287 """
288 """
289 labels = list(dictionary.keys())
290 if "version" in labels:
291 labels.remove("version")
293 history_dict = None
294 if "history" in labels:
295 history_dict = dictionary["history"]
296 labels.remove("history")
298 if len(ignore):
299 for _ignore in ignore:
300 if _ignore in labels:
301 labels.remove(_ignore)
303 parameter_list, sample_list, inj_list, ver_list = [], [], [], []
304 meta_data_list, weights_list = [], []
305 description_dict, prior_dict, config_dict = {}, {}, {}
306 mcmc_samples = False
307 for num, label in enumerate(labels):
308 if label == "version" or label == "history":
309 continue
310 data, = load_recursively(label, dictionary)
311 if "mcmc_chains" in data.keys():
312 mcmc_samples = True
313 dataset = data["mcmc_chains"]
314 chains = list(dataset.keys())
315 parameters = [j for j in dataset[chains[0]].dtype.names]
316 samples = [
317 [np.array(j.tolist()) for j in dataset[chain]] for chain
318 in chains
319 ]
320 else:
321 posterior_samples = data["posterior_samples"]
322 new_format = (h5py._hl.dataset.Dataset, np.ndarray)
323 if isinstance(posterior_samples, new_format):
324 parameters = [j for j in posterior_samples.dtype.names]
325 samples = [np.array(j.tolist()) for j in posterior_samples]
326 else:
327 parameters = \
328 posterior_samples["parameter_names"].copy()
329 samples = [
330 j for j in posterior_samples["samples"]
331 ].copy()
332 if isinstance(parameters[0], bytes):
333 parameters = [
334 parameter.decode("utf-8") for parameter in parameters
335 ]
336 parameter_list.append(parameters)
337 if "injection_data" in data.keys():
338 old_format = (h5py._hl.group.Group, dict)
339 _injection_data = data["injection_data"]
340 if not isinstance(_injection_data, old_format):
341 parameters = [j for j in _injection_data.dtype.names]
342 inj = np.array(_injection_data.tolist())
343 else:
344 inj = data["injection_data"]["injection_values"].copy()
346 def parse_injection_value(_value):
347 if isinstance(_value, (list, np.ndarray)):
348 _value = _value[0]
349 if isinstance(_value, bytes):
350 _value = _value.decode("utf-8")
351 if isinstance(_value, str):
352 if _value.lower() == "nan":
353 _value = np.nan
354 elif _value.lower() == "none":
355 _value = None
356 return _value
357 inj_list.append({
358 parameter: parse_injection_value(value)
359 for parameter, value in zip(parameters, inj)
360 })
361 else:
362 inj_list.append({
363 parameter: np.nan for parameter in parameters
364 })
365 sample_list.append(samples)
366 config = None
367 if "config_file" in data.keys():
368 config = data["config_file"]
369 config_dict[label] = config
370 if "meta_data" in data.keys():
371 _meta_data = data["meta_data"]
372 if "sampler" not in _meta_data.keys():
373 _meta_data["sampler"] = {"nsamples": len(samples)}
374 if "meta_data" not in _meta_data.keys():
375 _meta_data["meta_data"] = {}
376 meta_data_list.append(_meta_data)
377 else:
378 meta_data_list.append({"sampler": {}, "meta_data": {}})
379 if "weights" in parameters or b"weights" in parameters:
380 ind = (
381 parameters.index("weights") if "weights" in parameters
382 else parameters.index(b"weights")
383 )
384 weights_list.append(Array([sample[ind] for sample in samples]))
385 else:
386 weights_list.append(None)
387 if "version" in data.keys():
388 version = data["version"]
389 else:
390 version = "No version information found"
391 ver_list.append(version)
392 if "description" in data.keys():
393 description = data["description"]
394 else:
395 description = "No description found"
396 description_dict[label] = description
397 if "priors" in data.keys():
398 priors = data["priors"]
399 else:
400 priors = dict()
401 prior_dict[label] = priors
402 reversed_prior_dict = {}
403 for label in labels:
404 for key, item in prior_dict[label].items():
405 if key in reversed_prior_dict.keys():
406 reversed_prior_dict[key][label] = item
407 else:
408 reversed_prior_dict.update({key: {label: item}})
409 return {
410 "parameters": parameter_list,
411 "samples": sample_list,
412 "injection": inj_list,
413 "version": ver_list,
414 "kwargs": meta_data_list,
415 "weights": {i: j for i, j in zip(labels, weights_list)},
416 "labels": labels,
417 "config": config_dict,
418 "prior": reversed_prior_dict,
419 "mcmc_samples": mcmc_samples,
420 "history": history_dict,
421 "description": description_dict
422 }
424 @property
425 def injection_dict(self):
426 return {
427 label: self.injection_parameters[num] for num, label in
428 enumerate(self.labels)
429 }
431 @deprecation(
432 "The 'write_config_to_file' method may not be supported in future "
433 "releases. Please use the 'write' method with kwarg 'file_format='ini''"
434 )
435 def write_config_to_file(self, label, outdir="./", filename=None, **kwargs):
436 """Write the config file stored as a dictionary to file
438 Parameters
439 ----------
440 label: str
441 the label for the dictionary that you would like to write to file
442 outdir: str, optional
443 path indicating where you would like to configuration file to be
444 saved. Default is current working directory
445 filename: str, optional
446 name of the file you wish to write the config data to. Default
447 '{label}_config.ini'
448 """
449 PESummary.write(
450 self, _config=True, labels=[label], outdir=outdir, overwrite=True,
451 filenames={label: filename}, **kwargs
452 )
453 return filename
455 def _labels_for_write(self, labels):
456 """Check the input labels and raise an exception if the label does not exist
457 in the file
459 Parameters
460 ----------
461 labels: list
462 list of labels that you wish to check
463 """
464 if labels == "all":
465 labels = list(self.labels)
466 elif not all(label in self.labels for label in labels):
467 for label in labels:
468 if label not in self.labels:
469 raise ValueError(
470 "The label {} is not present in the file".format(label)
471 )
472 return labels
474 @staticmethod
475 def write(
476 self, package="core", labels="all", cls_properties=None, filenames=None,
477 _return=False, _config=False, **kwargs
478 ):
479 """Save the data to file
481 Parameters
482 ----------
483 package: str, optional
484 package you wish to use when writing the data
485 labels: list, optional
486 optional list of analyses to save to file
487 cls_properties: dict, optional
488 optional dictionary of class properties you wish to pass as kwargs to the
489 write function. Keys are the properties name and value is the property
490 filenames: dict, optional
491 dictionary of filenames keyed by analysis label
492 kwargs: dict, optional
493 all additional kwargs are passed to the pesummary.io.write function
494 """
495 from pesummary.io import write
497 if kwargs.get("filename", None) is not None:
498 raise ValueError(
499 "filename is not a valid kwarg for the PESummary class. If you wish "
500 "to provide a filename, please provide one for each analysis in the "
501 "form of a dictionary with kwargs 'filenames'"
502 )
503 labels = self._labels_for_write(labels)
504 _files = {}
505 for num, label in enumerate(labels):
506 ind = self.labels.index(label)
507 if cls_properties is not None:
508 for prop in cls_properties:
509 try:
510 kwargs[prop] = {label: cls_properties[prop][label]}
511 except (KeyError, TypeError):
512 try:
513 kwargs[prop] = cls_properties[prop][ind]
514 except (KeyError, TypeError):
515 kwargs[prop] = None
516 priors = getattr(self, "priors", {label: None})
517 if "analytic" in priors.keys() and label in priors["analytic"].keys():
518 kwargs.update({"analytic_priors": priors["analytic"][label]})
519 if not len(priors):
520 priors = {}
521 elif label in priors.keys() and priors[label] is None:
522 priors = None
523 elif all(label in value.keys() for value in priors.values()):
524 priors = {key: item[label] for key, item in priors.items()}
525 elif "samples" in priors.keys() and label in priors["samples"].keys():
526 priors = {"samples": {label: priors["samples"][label]}}
527 elif label not in priors.keys():
528 priors = {}
529 else:
530 priors = priors[label]
531 if filenames is None:
532 filename = None
533 elif isinstance(filenames, dict):
534 filename = filenames[label]
535 else:
536 filename = filenames
538 if _config or kwargs.get("file_format", "dat") == "ini":
539 kwargs["file_format"] = "ini"
540 _files[label] = write(
541 getattr(self, "config", {label: None})[label],
542 filename=filename, **kwargs
543 )
544 else:
545 _files[label] = write(
546 self.parameters[ind], self.samples[ind], package=package,
547 file_versions=self.input_version[ind], label=label,
548 file_kwargs=self.extra_kwargs[ind], priors=priors,
549 config=getattr(self, "config", {label: None})[label],
550 injection_data=getattr(self, "injection_dict", {label: None}),
551 filename=filename, **kwargs
552 )
553 if _return:
554 return _files
556 def to_bilby(self, labels="all", **kwargs):
557 """Convert a PESummary metafile to a bilby results object
559 Parameters
560 ----------
561 labels: list, optional
562 optional list of analyses to save to file
563 kwargs: dict, optional
564 all additional kwargs are passed to the pesummary.io.write function
565 """
566 return PESummary.write(
567 self, labels=labels, package="core", file_format="bilby",
568 _return=True, **kwargs
569 )
571 def to_dat(self, labels="all", **kwargs):
572 """Convert the samples stored in a PESummary metafile to a .dat file
574 Parameters
575 ----------
576 labels: list, optional
577 optional list of analyses to save to file
578 kwargs: dict, optional
579 all additional kwargs are passed to the pesummary.io.write function
580 """
581 return PESummary.write(
582 self, labels=labels, package="core", file_format="dat", **kwargs
583 )
586class PESummaryDeprecated(PESummary):
587 """
588 """
589 @deprecation(
590 "This file format is out-of-date and may not be supported in future "
591 "releases."
592 )
593 def __init__(self, path_to_results_file, **kwargs):
594 super(PESummaryDeprecated, self).__init__(path_to_results_file, **kwargs)
596 @property
597 def load_kwargs(self):
598 return {
599 "grab_data_from_dictionary": PESummaryDeprecated._grab_data_from_dictionary
600 }
602 @staticmethod
603 def _grab_data_from_dictionary(dictionary):
604 """
605 """
606 labels = list(dictionary["posterior_samples"].keys())
608 parameter_list, sample_list, inj_list, ver_list = [], [], [], []
609 meta_data_list, weights_list = [], []
610 for num, label in enumerate(labels):
611 posterior_samples = dictionary["posterior_samples"][label]
612 if isinstance(posterior_samples, (h5py._hl.dataset.Dataset, np.ndarray)):
613 parameters = [j for j in posterior_samples.dtype.names]
614 samples = [np.array(j).tolist() for j in posterior_samples]
615 else:
616 parameters = \
617 dictionary["posterior_samples"][label]["parameter_names"].copy()
618 samples = [
619 np.array(j).tolist() for j in
620 dictionary["posterior_samples"][label]["samples"]
621 ].copy()
622 if isinstance(parameters[0], bytes):
623 parameters = [
624 parameter.decode("utf-8") for parameter in parameters
625 ]
626 parameter_list.append(parameters)
627 if "injection_data" in dictionary.keys():
628 inj = dictionary["injection_data"][label]["injection_values"].copy()
630 def parse_injection_value(_value):
631 if isinstance(_value, (list, np.ndarray)):
632 _value = _value[0]
633 if isinstance(_value, bytes):
634 _value = _value.decode("utf-8")
635 if isinstance(_value, str):
636 if _value.lower() == "nan":
637 _value = np.nan
638 elif _value.lower() == "none":
639 _value = None
640 return _value
641 inj_list.append({
642 parameter: parse_injection_value(value)
643 for parameter, value in zip(parameters, inj)
644 })
645 sample_list.append(samples)
646 config = None
647 if "config_file" in dictionary.keys():
648 config, = load_recursively("config_file", dictionary)
649 if "meta_data" in dictionary.keys():
650 data, = load_recursively("meta_data", dictionary)
651 meta_data_list.append(data[label])
652 else:
653 meta_data_list.append({"sampler": {}, "meta_data": {}})
654 if "weights" in parameters or b"weights" in parameters:
655 ind = (
656 parameters.index("weights") if "weights" in parameters
657 else parameters.index(b"weights")
658 )
659 weights_list.append(Array([sample[ind] for sample in samples]))
660 else:
661 weights_list.append(None)
662 if "version" in dictionary.keys():
663 version, = load_recursively("version", dictionary)
664 else:
665 version = {label: "No version information found" for label in labels
666 + ["pesummary"]}
667 if "priors" in dictionary.keys():
668 priors, = load_recursively("priors", dictionary)
669 else:
670 priors = dict()
671 for label in list(version.keys()):
672 if label != "pesummary" and isinstance(version[label], bytes):
673 ver_list.append(version[label].decode("utf-8"))
674 elif label != "pesummary":
675 ver_list.append(version[label])
676 elif isinstance(version["pesummary"], bytes):
677 version["pesummary"] = version["pesummary"].decode("utf-8")
678 return {
679 "parameters": parameter_list,
680 "samples": sample_list,
681 "injection": inj_list,
682 "version": ver_list,
683 "kwargs": meta_data_list,
684 "weights": {i: j for i, j in zip(labels, weights_list)},
685 "labels": labels,
686 "config": config,
687 "prior": priors
688 }