Coverage for pesummary/core/file/formats/pesummary.py: 80.5%

1# Licensed under an MIT style license -- see LICENSE.md

3from glob import glob

4import os

5import h5py

6import json

7import numpy as np

9from pesummary.core.file.formats.base_read import MultiAnalysisRead

10from pesummary.utils.samples_dict import (

11 MCMCSamplesDict, MultiAnalysisSamplesDict, SamplesDict, Array

12)

13from pesummary.utils.dict import load_recursively

14from pesummary.utils.decorators import deprecation

16__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"]

19def write_pesummary(

20 *args, cls=None, outdir="./", label=None, config=None, injection_data=None,

21 file_kwargs=None, file_versions=None, mcmc_samples=False, hdf5=True, **kwargs

22):

23 """Write a set of samples to a pesummary file

25 Parameters

26 ----------

27 args: tuple

28 either a 2d tuple containing the parameters as first argument and samples

29 as the second argument, or a SamplesDict object containing the samples

30 cls: class, optional

31 PESummary metafile class to use

32 outdir: str, optional

33 directory to write the dat file

34 label: str, optional

35 The label of the analysis. This is used in the filename if a filename

36 if not specified

37 config: dict, optional

38 configuration file that you wish to save to file

39 injection_data: dict, optional

40 dictionary containing the injection values that you wish to save to file keyed

41 by parameter

42 file_kwargs: dict, optional

43 any kwargs that you wish to save to file

44 file_versions: dict, optional

45 version of the data you wish to save to file

46 mcmc_samples: Bool, optional

47 if True, the set of samples provided are from multiple MCMC chains

48 hdf5: Bool, optional

49 if True, save the pesummary file in hdf5 format

50 kwargs: dict

51 all other kwargs are passed to the pesummary.core.file.meta_file._MetaFile class

52 """

53 from pesummary.core.file.meta_file import _MetaFile

55 if cls is None:

56 cls = _MetaFile

58 default_label = "dataset"

59 if label is None:

60 labels = [default_label]

61 elif not isinstance(label, str):

62 raise ValueError("label must be a string")

63 else:

64 labels = [label]

66 if isinstance(args[0], MultiAnalysisSamplesDict):

67 labels = list(args[0].keys())

68 samples = args[0]

69 elif isinstance(args[0], (SamplesDict, MCMCSamplesDict)):

70 _samples = args[0]

71 if isinstance(args[0], SamplesDict):

72 mcmc_samples = False

73 else:

74 mcmc_samples = True

75 else:

76 _parameters, _samples = args

77 _samples = np.array(_samples).T

78 if mcmc_samples:

79 _samples = MCMCSamplesDict(_parameters, _samples)

80 elif len(_samples.shape) != 2:

81 raise ValueError(

82 "samples must be a 2 dimensional array. If you wish to save more "

83 "than one analysis to file, please provide the samples as a "

84 "pesummary.utils.samples_dict.MultiAnalysisSamplesDict object. If "

85 "you wish to save mcmc chains to file, please add the "

86 "mcmc_samples=True argument"

87 )

88 else:

89 _samples = SamplesDict(_parameters, _samples)

91 try:

92 samples = {labels[0]: _samples}

93 except NameError:

94 pass

96 if file_kwargs is None:

97 file_kwargs = {

98 label: {"sampler": {}, "meta_data": {}} for label in labels

99 }

100 elif not all(label in file_kwargs.keys() for label in labels):

101 file_kwargs = {label: file_kwargs for label in labels}

102

103 if file_versions is None or isinstance(file_versions, str):

104 file_versions = {label: "No version information found" for label in labels}

105 elif not all(label in file_versions.keys() for label in labels):

106 file_versions = {label: file_versions for label in labels}

107

108 if injection_data is None:

109 injection_data = {

110 label: {

111 param: float("nan") for param in samples[label].keys()

112 } for label in samples.keys()

113 }

114 elif not all(label in injection_data.keys() for label in labels):

115 injection_data = {label: injection_data for label in labels}

116

117 if config is None:

118 config = [None for label in labels]

119 elif isinstance(config, dict):

120 config = [config]

121 obj = cls(

122 samples, labels, config, injection_data, file_versions, file_kwargs,

123 mcmc_samples=mcmc_samples, outdir=outdir, hdf5=hdf5, **kwargs

124 )

125 obj.make_dictionary()

126 if not hdf5:

127 obj.save_to_json(obj.data, obj.meta_file)

128 else:

129 obj.save_to_hdf5(

130 obj.data, obj.labels, obj.samples, obj.meta_file,

131 mcmc_samples=mcmc_samples

132 )

133

134

135class PESummary(MultiAnalysisRead):

136 """This class handles the existing posterior_samples.h5 file

137

138 Parameters

139 ----------

140 path_to_results_file: str

141 path to the results file you wish to load

142 remove_nan_likelihood_samples: Bool, optional

143 if True, remove samples which have log_likelihood='nan'. Default True

144

145 Attributes

146 ----------

147 parameters: nd list

148 list of parameters stored in the result file for each analysis stored

149 in the result file

150 samples: 3d list

151 list of samples stored in the result file for each analysis stored

152 in the result file

153 samples_dict: nested dict

154 nested dictionary of samples stored in the result file keyed by their

155 respective label

156 input_version: str

157 version of the result file passed.

158 extra_kwargs: list

159 list of dictionaries containing kwargs that were extracted from each

160 analysis

161 injection_parameters: list

162 list of dictionaries of injection parameters for each analysis

163 injection_dict: dict

164 dictionary containing the injection parameters keyed by their respective

165 label

166 prior: dict

167 dictionary of prior samples stored in the result file

168 config: dict

169 dictionary containing the configuration file stored in the result file

170 labels: list

171 list of analyses stored in the result file

172 weights: dict

173 dictionary of weights for each sample for each analysis

174 pe_algorithm: dict

175 name of the algorithm used to generate the each analysis

176 preferred: str

177 name of the preferred analysis in the result file

178

179 Methods

180 -------

181 samples_dict_for_label: dict

182 dictionary of samples for a specific analysis

183 reduced_samples_dict: dict

184 dictionary of samples for one or more analyses

185 to_dat:

186 save the posterior samples to a .dat file

187 to_bilby:

188 convert the posterior samples to a bilby.core.result.Result object

189 to_latex_table:

190 convert the posterior samples to a latex table

191 generate_latex_macros:

192 generate a set of latex macros for the stored posterior samples

193 write_config_to_file:

194 write the config file stored in the result file to file

195 """

196 def __init__(self, path_to_results_file, **kwargs):

197 super(PESummary, self).__init__(path_to_results_file, **kwargs)

198 self.load(self._grab_data_from_pesummary_file, **self.load_kwargs)

199

200 @property

201 def load_kwargs(self):

202 return dict()

203

204 @property

205 def pe_algorithm(self):

206 _algorithm = {label: None for label in self.labels}

207 for num, _kwargs in enumerate(self.extra_kwargs):

208 _label = self.labels[num]

209 try:

210 _algorithm[_label] = _kwargs["sampler"]["pe_algorithm"]

211 except KeyError:

212 pass

213 return _algorithm

214

215 @property

216 def preferred(self):

217 _preferred = None

218 for num, _kwargs in enumerate(self.extra_kwargs):

219 if "other" in _kwargs.keys() and "preferred" in _kwargs["other"].keys():

220 import ast

221 if ast.literal_eval(_kwargs["other"]["preferred"]):

222 _preferred = self.labels[num]

223 break

224 if _preferred is None and len(self.labels) == 1:

225 _preferred = self.labels[0]

226 return _preferred

227

228 @classmethod

229 def load_file(cls, path, **kwargs):

230 if os.path.isdir(path):

231 files = glob(path + "/*")

232 if "home.html" in files:

233 path = glob(path + "/samples/posterior_samples*")[0]

234 else:

235 raise FileNotFoundError(

236 "Unable to find a file called 'posterior_samples' in "

237 "the directory %s" % (path + "/samples"))

238 return super(PESummary, cls).load_file(path, **kwargs)

239

240 @staticmethod

241 def _grab_data_from_pesummary_file(path, **kwargs):

242 """

243 """

244 func_map = {"h5": PESummary._grab_data_from_hdf5_file,

245 "hdf5": PESummary._grab_data_from_hdf5_file,

246 "json": PESummary._grab_data_from_json_file}

247 return func_map[MultiAnalysisRead.extension_from_path(path)](path, **kwargs)

248

249 @staticmethod

250 def _convert_hdf5_to_dict(dictionary, path="/"):

251 """

252 """

253 mydict = {}

254 for key, item in dictionary[path].items():

255 if isinstance(item, h5py._hl.dataset.Dataset):

256 _attrs = dict(item.attrs)

257 if len(_attrs):

258 mydict["{}_attrs".format(key)] = _attrs

259 mydict[key] = np.array(item)

260 elif isinstance(item, h5py._hl.group.Group):

261 mydict[key] = PESummary._convert_hdf5_to_dict(

262 dictionary, path=path + key + "/")

263 return mydict

264

265 @staticmethod

266 def _grab_data_from_hdf5_file(path, **kwargs):

267 """

268 """

269 function = kwargs.get(

270 "grab_data_from_dictionary", PESummary._grab_data_from_dictionary)

271 f = h5py.File(path, 'r')

272 data = PESummary._convert_hdf5_to_dict(f)

273 existing_data = function(data)

274 f.close()

275 return existing_data

276

277 @staticmethod

278 def _grab_data_from_json_file(path, **kwargs):

279 function = kwargs.get(

280 "grab_data_from_dictionary", PESummary._grab_data_from_dictionary)

281 with open(path) as f:

282 data = json.load(f)

283 return function(data)

284

285 @staticmethod

286 def _grab_data_from_dictionary(dictionary, ignore=[]):

287 """

288 """

289 labels = list(dictionary.keys())

290 if "version" in labels:

291 labels.remove("version")

292

293 history_dict = None

294 if "history" in labels:

295 history_dict = dictionary["history"]

296 labels.remove("history")

297

298 if len(ignore):

299 for _ignore in ignore:

300 if _ignore in labels:

301 labels.remove(_ignore)

302

303 parameter_list, sample_list, inj_list, ver_list = [], [], [], []

304 meta_data_list, weights_list = [], []

305 description_dict, prior_dict, config_dict = {}, {}, {}

306 mcmc_samples = False

307 for num, label in enumerate(labels):

308 if label == "version" or label == "history":

309 continue

310 data, = load_recursively(label, dictionary)

311 if "mcmc_chains" in data.keys():

312 mcmc_samples = True

313 dataset = data["mcmc_chains"]

314 chains = list(dataset.keys())

315 parameters = [j for j in dataset[chains[0]].dtype.names]

316 samples = [

317 [np.array(j.tolist()) for j in dataset[chain]] for chain

318 in chains

319 ]

320 else:

321 posterior_samples = data["posterior_samples"]

322 new_format = (h5py._hl.dataset.Dataset, np.ndarray)

323 if isinstance(posterior_samples, new_format):

324 parameters = [j for j in posterior_samples.dtype.names]

325 samples = [np.array(j.tolist()) for j in posterior_samples]

326 else:

327 parameters = \

328 posterior_samples["parameter_names"].copy()

329 samples = [

330 j for j in posterior_samples["samples"]

331 ].copy()

332 if isinstance(parameters[0], bytes):

333 parameters = [

334 parameter.decode("utf-8") for parameter in parameters

335 ]

336 parameter_list.append(parameters)

337 if "injection_data" in data.keys():

338 old_format = (h5py._hl.group.Group, dict)

339 _injection_data = data["injection_data"]

340 if not isinstance(_injection_data, old_format):

341 parameters = [j for j in _injection_data.dtype.names]

342 inj = np.array(_injection_data.tolist())

343 else:

344 inj = data["injection_data"]["injection_values"].copy()

345

346 def parse_injection_value(_value):

347 if isinstance(_value, (list, np.ndarray)):

348 _value = _value[0]

349 if isinstance(_value, bytes):

350 _value = _value.decode("utf-8")

351 if isinstance(_value, str):

352 if _value.lower() == "nan":

353 _value = np.nan

354 elif _value.lower() == "none":

355 _value = None

356 return _value

357 inj_list.append({

358 parameter: parse_injection_value(value)

359 for parameter, value in zip(parameters, inj)

360 })

361 else:

362 inj_list.append({

363 parameter: np.nan for parameter in parameters

364 })

365 sample_list.append(samples)

366 config = None

367 if "config_file" in data.keys():

368 config = data["config_file"]

369 config_dict[label] = config

370 if "meta_data" in data.keys():

371 _meta_data = data["meta_data"]

372 if "sampler" not in _meta_data.keys():

373 _meta_data["sampler"] = {"nsamples": len(samples)}

374 if "meta_data" not in _meta_data.keys():

375 _meta_data["meta_data"] = {}

376 meta_data_list.append(_meta_data)

377 else:

378 meta_data_list.append({"sampler": {}, "meta_data": {}})

379 if "weights" in parameters or b"weights" in parameters:

380 ind = (

381 parameters.index("weights") if "weights" in parameters

382 else parameters.index(b"weights")

383 )

384 weights_list.append(Array([sample[ind] for sample in samples]))

385 else:

386 weights_list.append(None)

387 if "version" in data.keys():

388 version = data["version"]

389 else:

390 version = "No version information found"

391 ver_list.append(version)

392 if "description" in data.keys():

393 description = data["description"]

394 else:

395 description = "No description found"

396 description_dict[label] = description

397 if "priors" in data.keys():

398 priors = data["priors"]

399 else:

400 priors = dict()

401 prior_dict[label] = priors

402 reversed_prior_dict = {}

403 for label in labels:

404 for key, item in prior_dict[label].items():

405 if key in reversed_prior_dict.keys():

406 reversed_prior_dict[key][label] = item

407 else:

408 reversed_prior_dict.update({key: {label: item}})

409 return {

410 "parameters": parameter_list,

411 "samples": sample_list,

412 "injection": inj_list,

413 "version": ver_list,

414 "kwargs": meta_data_list,

415 "weights": {i: j for i, j in zip(labels, weights_list)},

416 "labels": labels,

417 "config": config_dict,

418 "prior": reversed_prior_dict,

419 "mcmc_samples": mcmc_samples,

420 "history": history_dict,

421 "description": description_dict

422 }

423

424 @property

425 def injection_dict(self):

426 return {

427 label: self.injection_parameters[num] for num, label in

428 enumerate(self.labels)

429 }

430

431 @deprecation(

432 "The 'write_config_to_file' method may not be supported in future "

433 "releases. Please use the 'write' method with kwarg 'file_format='ini''"

434 )

435 def write_config_to_file(self, label, outdir="./", filename=None, **kwargs):

436 """Write the config file stored as a dictionary to file

437

438 Parameters

439 ----------

440 label: str

441 the label for the dictionary that you would like to write to file

442 outdir: str, optional

443 path indicating where you would like to configuration file to be

444 saved. Default is current working directory

445 filename: str, optional

446 name of the file you wish to write the config data to. Default

447 '{label}_config.ini'

448 """

449 PESummary.write(

450 self, _config=True, labels=[label], outdir=outdir, overwrite=True,

451 filenames={label: filename}, **kwargs

452 )

453 return filename

454

455 def _labels_for_write(self, labels):

456 """Check the input labels and raise an exception if the label does not exist

457 in the file

458

459 Parameters

460 ----------

461 labels: list

462 list of labels that you wish to check

463 """

464 if labels == "all":

465 labels = list(self.labels)

466 elif not all(label in self.labels for label in labels):

467 for label in labels:

468 if label not in self.labels:

469 raise ValueError(

470 "The label {} is not present in the file".format(label)

471 )

472 return labels

473

474 @staticmethod

475 def write(

476 self, package="core", labels="all", cls_properties=None, filenames=None,

477 _return=False, _config=False, **kwargs

478 ):

479 """Save the data to file

480

481 Parameters

482 ----------

483 package: str, optional

484 package you wish to use when writing the data

485 labels: list, optional

486 optional list of analyses to save to file

487 cls_properties: dict, optional

488 optional dictionary of class properties you wish to pass as kwargs to the

489 write function. Keys are the properties name and value is the property

490 filenames: dict, optional

491 dictionary of filenames keyed by analysis label

492 kwargs: dict, optional

493 all additional kwargs are passed to the pesummary.io.write function

494 """

495 from pesummary.io import write

496

497 if kwargs.get("filename", None) is not None:

498 raise ValueError(

499 "filename is not a valid kwarg for the PESummary class. If you wish "

500 "to provide a filename, please provide one for each analysis in the "

501 "form of a dictionary with kwargs 'filenames'"

502 )

503 labels = self._labels_for_write(labels)

504 _files = {}

505 for num, label in enumerate(labels):

506 ind = self.labels.index(label)

507 if cls_properties is not None:

508 for prop in cls_properties:

509 try:

510 kwargs[prop] = {label: cls_properties[prop][label]}

511 except (KeyError, TypeError):

512 try:

513 kwargs[prop] = cls_properties[prop][ind]

514 except (KeyError, TypeError):

515 kwargs[prop] = None

516 priors = getattr(self, "priors", {label: None})

517 if "analytic" in priors.keys() and label in priors["analytic"].keys():

518 kwargs.update({"analytic_priors": priors["analytic"][label]})

519 if not len(priors):

520 priors = {}

521 elif label in priors.keys() and priors[label] is None:

522 priors = None

523 elif all(label in value.keys() for value in priors.values()):

524 priors = {key: item[label] for key, item in priors.items()}

525 elif "samples" in priors.keys() and label in priors["samples"].keys():

526 priors = {"samples": {label: priors["samples"][label]}}

527 elif label not in priors.keys():

528 priors = {}

529 else:

530 priors = priors[label]

531 if filenames is None:

532 filename = None

533 elif isinstance(filenames, dict):

534 filename = filenames[label]

535 else:

536 filename = filenames

537

538 if _config or kwargs.get("file_format", "dat") == "ini":

539 kwargs["file_format"] = "ini"

540 _files[label] = write(

541 getattr(self, "config", {label: None})[label],

542 filename=filename, **kwargs

543 )

544 else:

545 _files[label] = write(

546 self.parameters[ind], self.samples[ind], package=package,

547 file_versions=self.input_version[ind], label=label,

548 file_kwargs=self.extra_kwargs[ind], priors=priors,

549 config=getattr(self, "config", {label: None})[label],

550 injection_data=getattr(self, "injection_dict", {label: None}),

551 filename=filename, **kwargs

552 )

553 if _return:

554 return _files

555

556 def to_bilby(self, labels="all", **kwargs):

557 """Convert a PESummary metafile to a bilby results object

558

559 Parameters

560 ----------

561 labels: list, optional

562 optional list of analyses to save to file

563 kwargs: dict, optional

564 all additional kwargs are passed to the pesummary.io.write function

565 """

566 return PESummary.write(

567 self, labels=labels, package="core", file_format="bilby",

568 _return=True, **kwargs

569 )

570

571 def to_dat(self, labels="all", **kwargs):

572 """Convert the samples stored in a PESummary metafile to a .dat file

573

574 Parameters

575 ----------

576 labels: list, optional

577 optional list of analyses to save to file

578 kwargs: dict, optional

579 all additional kwargs are passed to the pesummary.io.write function

580 """

581 return PESummary.write(

582 self, labels=labels, package="core", file_format="dat", **kwargs

583 )

584

585

586class PESummaryDeprecated(PESummary):

587 """

588 """

589 @deprecation(

590 "This file format is out-of-date and may not be supported in future "

591 "releases."

592 )

593 def __init__(self, path_to_results_file, **kwargs):

594 super(PESummaryDeprecated, self).__init__(path_to_results_file, **kwargs)

595

596 @property

597 def load_kwargs(self):

598 return {

599 "grab_data_from_dictionary": PESummaryDeprecated._grab_data_from_dictionary

600 }

601

602 @staticmethod

603 def _grab_data_from_dictionary(dictionary):

604 """

605 """

606 labels = list(dictionary["posterior_samples"].keys())

607

608 parameter_list, sample_list, inj_list, ver_list = [], [], [], []

609 meta_data_list, weights_list = [], []

610 for num, label in enumerate(labels):

611 posterior_samples = dictionary["posterior_samples"][label]

612 if isinstance(posterior_samples, (h5py._hl.dataset.Dataset, np.ndarray)):

613 parameters = [j for j in posterior_samples.dtype.names]

614 samples = [np.array(j).tolist() for j in posterior_samples]

615 else:

616 parameters = \

617 dictionary["posterior_samples"][label]["parameter_names"].copy()

618 samples = [

619 np.array(j).tolist() for j in

620 dictionary["posterior_samples"][label]["samples"]

621 ].copy()

622 if isinstance(parameters[0], bytes):

623 parameters = [

624 parameter.decode("utf-8") for parameter in parameters

625 ]

626 parameter_list.append(parameters)

627 if "injection_data" in dictionary.keys():

628 inj = dictionary["injection_data"][label]["injection_values"].copy()

629

630 def parse_injection_value(_value):

631 if isinstance(_value, (list, np.ndarray)):

632 _value = _value[0]

633 if isinstance(_value, bytes):

634 _value = _value.decode("utf-8")

635 if isinstance(_value, str):

636 if _value.lower() == "nan":

637 _value = np.nan

638 elif _value.lower() == "none":

639 _value = None

640 return _value

641 inj_list.append({

642 parameter: parse_injection_value(value)

643 for parameter, value in zip(parameters, inj)

644 })

645 sample_list.append(samples)

646 config = None

647 if "config_file" in dictionary.keys():

648 config, = load_recursively("config_file", dictionary)

649 if "meta_data" in dictionary.keys():

650 data, = load_recursively("meta_data", dictionary)

651 meta_data_list.append(data[label])

652 else:

653 meta_data_list.append({"sampler": {}, "meta_data": {}})

654 if "weights" in parameters or b"weights" in parameters:

655 ind = (

656 parameters.index("weights") if "weights" in parameters

657 else parameters.index(b"weights")

658 )

659 weights_list.append(Array([sample[ind] for sample in samples]))

660 else:

661 weights_list.append(None)

662 if "version" in dictionary.keys():

663 version, = load_recursively("version", dictionary)

664 else:

665 version = {label: "No version information found" for label in labels

666 + ["pesummary"]}

667 if "priors" in dictionary.keys():

668 priors, = load_recursively("priors", dictionary)

669 else:

670 priors = dict()

671 for label in list(version.keys()):

672 if label != "pesummary" and isinstance(version[label], bytes):

673 ver_list.append(version[label].decode("utf-8"))

674 elif label != "pesummary":

675 ver_list.append(version[label])

676 elif isinstance(version["pesummary"], bytes):

677 version["pesummary"] = version["pesummary"].decode("utf-8")

678 return {

679 "parameters": parameter_list,

680 "samples": sample_list,

681 "injection": inj_list,

682 "version": ver_list,

683 "kwargs": meta_data_list,

684 "weights": {i: j for i, j in zip(labels, weights_list)},

685 "labels": labels,

686 "config": config,

687 "prior": priors

688 }