Coverage for pesummary/gw/file/formats/lalinference.py: 48.9%

237 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-05-02 08:42 +0000

1# Licensed under an MIT style license -- see LICENSE.md 

2 

3import os 

4 

5import h5py 

6import numpy as np 

7 

8from pesummary.gw.file.formats.base_read import GWRead, GWSingleAnalysisRead 

9from pesummary.utils.utils import logger 

10from pesummary.utils.decorators import open_config 

11from pesummary import conf 

12 

13__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"] 

14SAMPLER_KWARGS = { 

15 "log_bayes_factor": conf.log_bayes_factor, 

16 "bayes_factor": conf.bayes_factor, 

17 "log_evidence": conf.log_evidence, 

18 "evidence": conf.evidence, 

19 "log_prior_volume": conf.log_prior_volume, 

20 "sampleRate": "sample_rate", 

21 "segmentLength": "segment_length" 

22} 

23 

24META_DATA = { 

25 "flow": "f_low", 

26 "f_low": "f_low", 

27 "fref": "f_ref", 

28 "f_ref": "f_ref", 

29 "LAL_PNORDER": "pn_order", 

30 "LAL_APPROXIMANT": "approximant", 

31 "number_of_live_points": "number_of_live_points", 

32 "segmentLength": "segment_length", 

33 "segmentStart": "segment_start", 

34 "sampleRate": "sample_rate", 

35} 

36 

37 

38def open_lalinference(path): 

39 """Grab the parameters and samples in a lalinference file 

40 

41 Parameters 

42 ---------- 

43 path: str 

44 path to the result file you wish to read in 

45 """ 

46 from pesummary.core.file.formats.hdf5 import _read_hdf5_with_h5py 

47 lalinference_names, samples, _dataset = _read_hdf5_with_h5py( 

48 path, return_posterior_dataset=True 

49 ) 

50 if "logdistance" in lalinference_names: 

51 lalinference_names.append("luminosity_distance") 

52 for num, i in enumerate(samples): 

53 samples[num].append( 

54 np.exp(i[lalinference_names.index("logdistance")])) 

55 if "costheta_jn" in lalinference_names: 

56 lalinference_names.append("theta_jn") 

57 for num, i in enumerate(samples): 

58 samples[num].append( 

59 np.arccos(i[lalinference_names.index("costheta_jn")])) 

60 extra_kwargs = LALInference.grab_extra_kwargs(path) 

61 extra_kwargs["sampler"]["nsamples"] = len(samples) 

62 extra_kwargs["sampler"]["pe_algorithm"] = "lalinference" 

63 try: 

64 version = _dataset.attrs["VERSION"].decode("utf-8") 

65 except Exception as e: 

66 version = None 

67 return { 

68 "parameters": lalinference_names, 

69 "samples": samples, 

70 "injection": None, 

71 "version": version, 

72 "kwargs": extra_kwargs 

73 } 

74 

75 

76class LALInference(GWSingleAnalysisRead): 

77 """PESummary wrapper of `lalinference` 

78 (https://git.ligo.org/lscsoft/lalsuite/lalinference). 

79 

80 Parameters 

81 ---------- 

82 path_to_results_file: str 

83 path to the results file you wish to load in with `LALInference` 

84 remove_nan_likelihood_samples: Bool, optional 

85 if True, remove samples which have log_likelihood='nan'. Default True 

86 

87 Attributes 

88 ---------- 

89 parameters: list 

90 list of parameters stored in the result file 

91 converted_parameters: list 

92 list of parameters that have been derived from the sampled distributions 

93 samples: 2d list 

94 list of samples stored in the result file 

95 samples_dict: dict 

96 dictionary of samples stored in the result file keyed by parameters 

97 input_version: str 

98 version of the result file passed. 

99 extra_kwargs: dict 

100 dictionary of kwargs that were extracted from the result file 

101 converted_parameters: list 

102 list of parameters that have been added 

103 pe_algorithm: str 

104 name of the algorithm used to generate the posterior samples 

105 

106 Methods 

107 ------- 

108 to_dat: 

109 save the posterior samples to a .dat file 

110 to_latex_table: 

111 convert the posterior samples to a latex table 

112 generate_latex_macros: 

113 generate a set of latex macros for the stored posterior samples 

114 to_lalinference: 

115 convert the posterior samples to a lalinference result file 

116 generate_all_posterior_samples: 

117 generate all posterior distributions that may be derived from 

118 sampled distributions 

119 """ 

120 def __init__(self, path_to_results_file, injection_file=None, **kwargs): 

121 super(LALInference, self).__init__(path_to_results_file, **kwargs) 

122 self.load(self._grab_data_from_lalinference_file) 

123 

124 @classmethod 

125 def load_file(cls, path, injection_file=None, **kwargs): 

126 if injection_file and not os.path.isfile(injection_file): 

127 raise IOError("%s does not exist" % (path)) 

128 return super(LALInference, cls).load_file( 

129 path, injection_file=injection_file, **kwargs 

130 ) 

131 

132 @staticmethod 

133 def guess_path_to_sampler(path): 

134 """Guess the path to the sampler group in a LALInference results file 

135 

136 Parameters 

137 ---------- 

138 path: str 

139 path to the LALInference results file 

140 """ 

141 def _find_name(name): 

142 c1 = "lalinference_nest" in name or "lalinference_mcmc" in name 

143 c2 = "lalinference_nest/" not in name and "lalinference_mcmc/" not in name 

144 if c1 and c2: 

145 return name 

146 

147 f = h5py.File(path, 'r') 

148 _path = f.visit(_find_name) 

149 f.close() 

150 return _path 

151 

152 @staticmethod 

153 def _parameters_in_lalinference_file(path): 

154 """Return the parameter names stored in the LALInference results file 

155 

156 Parameters 

157 ---------- 

158 """ 

159 f = h5py.File(path, 'r') 

160 path_to_samples = GWRead.guess_path_to_samples(path) 

161 parameters = list(f[path_to_samples].dtype.names) 

162 f.close() 

163 return parameters 

164 

165 @staticmethod 

166 def _samples_in_lalinference_file(path): 

167 """ 

168 """ 

169 f = h5py.File(path, 'r') 

170 path_to_samples = GWRead.guess_path_to_samples(path) 

171 samples = [list(i) for i in f[path_to_samples]] 

172 return samples 

173 

174 @property 

175 def calibration_spline_posterior(self): 

176 if not any("_spcal_amp" in i for i in self.parameters): 

177 return super(LALInference, self).calibration_spline_posterior 

178 keys_amp = np.sort( 

179 [param for param in self.parameters if "_spcal_amp" in param] 

180 ) 

181 keys_phase = np.sort( 

182 [param for param in self.parameters if "_spcal_phase" in param] 

183 ) 

184 IFOs = np.unique( 

185 [ 

186 param.split("_")[0] for param in self.parameters if 

187 "_spcal_" in param 

188 ] 

189 ) 

190 log_frequencies = {ifo: [] for ifo in IFOs} 

191 for key, value in self.extra_kwargs["other"].items(): 

192 if "_spcal_logfreq" in key: 

193 cond = ( 

194 key.replace("logfreq", "freq") not in 

195 self.extra_kwargs["other"].keys() 

196 ) 

197 if cond: 

198 log_frequencies[key.split("_")[0]].append(float(value)) 

199 elif "_spcal_freq" in key: 

200 log_frequencies[key.split("_")[0]].append(np.log(float(value))) 

201 amp_params = {ifo: [] for ifo in IFOs} 

202 phase_params = {ifo: [] for ifo in IFOs} 

203 zipped = zip( 

204 [keys_amp, keys_phase], [amp_params, phase_params] 

205 ) 

206 _samples = self.samples_dict 

207 for keys, dictionary in zipped: 

208 for key in keys: 

209 ifo = key.split("_")[0] 

210 ind = self.parameters.index(key) 

211 dictionary[ifo].append(_samples[key]) 

212 return log_frequencies, amp_params, phase_params 

213 

214 @staticmethod 

215 def grab_extra_kwargs(path): 

216 """Grab any additional information stored in the lalinference file 

217 """ 

218 kwargs = {"sampler": {}, "meta_data": {}, "other": {}} 

219 path_to_samples = GWRead.guess_path_to_samples(path) 

220 path_to_sampler = LALInference.guess_path_to_sampler(path) 

221 f = h5py.File(path, 'r') 

222 

223 attributes = dict(f[path_to_sampler].attrs.items()) 

224 for kwarg, item in attributes.items(): 

225 if kwarg in list(SAMPLER_KWARGS.keys()) and kwarg == "evidence": 

226 kwargs["sampler"][conf.log_evidence] = np.round(np.log(item), 2) 

227 elif kwarg in list(SAMPLER_KWARGS.keys()) and kwarg == "bayes_factor": 

228 kwargs["sampler"][conf.log_bayes_factor] = np.round( 

229 np.log(item), 2 

230 ) 

231 elif kwarg in list(SAMPLER_KWARGS.keys()): 

232 kwargs["sampler"][SAMPLER_KWARGS[kwarg]] = np.round(item, 2) 

233 else: 

234 kwargs["other"][kwarg] = item 

235 

236 attributes = dict(f[path_to_samples].attrs.items()) 

237 for kwarg, item in attributes.items(): 

238 if kwarg in list(META_DATA.keys()) and kwarg == "LAL_APPROXIMANT": 

239 try: 

240 from lalsimulation import GetStringFromApproximant 

241 

242 kwargs["meta_data"]["approximant"] = \ 

243 GetStringFromApproximant( 

244 int(attributes["LAL_APPROXIMANT"]) 

245 ) 

246 except Exception: 

247 kwargs["meta_data"]["approximant"] = \ 

248 int(attributes["LAL_APPROXIMANT"]) 

249 elif kwarg in list(META_DATA.keys()): 

250 kwargs["meta_data"][META_DATA[kwarg]] = item 

251 else: 

252 kwargs["other"][kwarg] = item 

253 f.close() 

254 return kwargs 

255 

256 @staticmethod 

257 def _grab_data_from_lalinference_file(path): 

258 """ 

259 """ 

260 return open_lalinference(path) 

261 

262 def add_fixed_parameters_from_config_file(self, config_file): 

263 """Search the conifiguration file and add fixed parameters to the 

264 list of parameters and samples 

265 

266 Parameters 

267 ---------- 

268 config_file: str 

269 path to the configuration file 

270 """ 

271 self._add_fixed_parameters_from_config_file( 

272 config_file, self._add_fixed_parameters) 

273 

274 def add_marginalized_parameters_from_config_file(self, config_file): 

275 """Search the configuration file and add the marginalized parameters 

276 to the list of parameters and samples 

277 

278 Parameters 

279 ---------- 

280 config_file: str 

281 path to the configuration file 

282 """ 

283 self._add_marginalized_parameters_from_config_file( 

284 config_file, self._add_marginalized_parameters) 

285 

286 @staticmethod 

287 @open_config(index=2) 

288 def _add_fixed_parameters(parameters, samples, config_file): 

289 """Open a LALInference configuration file and add the fixed parameters 

290 to the list of parameters and samples 

291 

292 Parameters 

293 ---------- 

294 parameters: list 

295 list of existing parameters 

296 samples: list 

297 list of existing samples 

298 config_file: str 

299 path to the configuration file 

300 """ 

301 from pesummary.gw.file.standard_names import standard_names 

302 

303 config = config_file 

304 if not config.error: 

305 fixed_data = None 

306 if "engine" in config.sections(): 

307 fixed_data = { 

308 key.split("fix-")[1]: item for key, item in 

309 config.items("engine") if "fix" in key} 

310 if fixed_data is not None: 

311 for i in fixed_data.keys(): 

312 fixed_parameter = i 

313 fixed_value = fixed_data[i] 

314 try: 

315 param = standard_names[fixed_parameter] 

316 if param in parameters: 

317 pass 

318 else: 

319 parameters.append(param) 

320 for num in range(len(samples)): 

321 samples[num].append(float(fixed_value)) 

322 except Exception: 

323 if fixed_parameter == "logdistance": 

324 if "luminosity_distance" not in parameters: 

325 parameters.append(standard_names["distance"]) 

326 for num in range(len(samples)): 

327 samples[num].append(float(fixed_value)) 

328 if fixed_parameter == "costheta_jn": 

329 if "theta_jn" not in parameters: 

330 parameters.append(standard_names["theta_jn"]) 

331 for num in range(len(samples)): 

332 samples[num].append(float(fixed_value)) 

333 return parameters, samples 

334 

335 @staticmethod 

336 @open_config(index=2) 

337 def _add_marginalized_parameters(parameters, samples, config_file): 

338 """Open a LALInference configuration file and add the marginalized 

339 parameters to the list of parameters and samples 

340 

341 Parameters 

342 ---------- 

343 parameters: list 

344 list of existing parameters 

345 samples: list 

346 list of existing samples 

347 config_file: str 

348 path to the configuration file 

349 """ 

350 config = config_file 

351 if not config.error: 

352 fixed_data = None 

353 if "engine" in config.sections(): 

354 marg_par = { 

355 key.split("marg")[1]: item for key, item in 

356 config.items("engine") if "marg" in key} 

357 for i in marg_par.keys(): 

358 if "time" in i and "geocent_time" not in parameters: 

359 if "marginalized_geocent_time" in parameters: 

360 ind = parameters.index("marginalized_geocent_time") 

361 parameters.remove(parameters[ind]) 

362 parameters.append("geocent_time") 

363 for num, j in enumerate(samples): 

364 samples[num].append(float(j[ind])) 

365 del j[ind] 

366 else: 

367 logger.warning("You have marginalized over time and " 

368 "there are no time samples. Manually " 

369 "setting time to 100000s") 

370 parameters.append("geocent_time") 

371 for num, j in enumerate(samples): 

372 samples[num].append(float(100000)) 

373 if "phi" in i and "phase" not in parameters: 

374 if "marginalized_phase" in parameters: 

375 ind = parameters.index("marginalized_phase") 

376 parameters.remove(parameters[ind]) 

377 parameters.append("phase") 

378 for num, j in enumerate(samples): 

379 samples[num].append(float(j[ind])) 

380 del j[ind] 

381 else: 

382 logger.warning("You have marginalized over phase and " 

383 "there are no phase samples. Manually " 

384 "setting the phase to be 0") 

385 parameters.append("phase") 

386 for num, j in enumerate(samples): 

387 samples[num].append(float(0)) 

388 if "dist" in i and "luminosity_distance" not in parameters: 

389 if "marginalized_distance" in parameters: 

390 ind = parameters.index("marginalized_distance") 

391 parameters.remove(parameters[ind]) 

392 parameters.append("luminosity_distance") 

393 for num, j in enumerate(samples): 

394 samples[num].append(float(j[ind])) 

395 del j[ind] 

396 else: 

397 logger.warning("You have marginalized over distance and " 

398 "there are no distance samples. Manually " 

399 "setting distance to 100Mpc") 

400 parameters.append("luminosity_distance") 

401 for num, j in enumerate(samples): 

402 samples[num].append(float(100.0)) 

403 return parameters, samples 

404 return parameters, samples 

405 

406 

407def _write_lalinference( 

408 parameters, samples, outdir="./", label=None, filename=None, overwrite=False, 

409 sampler="lalinference_nest", dat=False, **kwargs 

410): 

411 """Write a set of samples in LALInference file format 

412 

413 Parameters 

414 ---------- 

415 parameters: list 

416 list of parameters 

417 samples: 2d list 

418 list of samples. Columns correspond to a given parameter 

419 outdir: str 

420 The directory where you would like to write the lalinference file 

421 label: str 

422 The label of the analysis. This is used in the filename if a filename 

423 if not specified 

424 filename: str 

425 The name of the file that you wish to write 

426 overwrite: Bool 

427 If True, an existing file of the same name will be overwritten 

428 sampler: str 

429 The sampler which you wish to store in the result file. This may either 

430 be 'lalinference_nest' or 'lalinference_mcmc' 

431 dat: Bool 

432 If True, a LALInference dat file is produced 

433 """ 

434 from pesummary.gw.file.standard_names import lalinference_map 

435 from pesummary.utils.samples_dict import SamplesDict 

436 import copy 

437 

438 _samples = copy.deepcopy(samples) 

439 _parameters = copy.deepcopy(parameters) 

440 _samples = SamplesDict(_parameters, np.array(_samples).T.tolist()) 

441 if not filename and not label: 

442 from time import time 

443 

444 label = round(time()) 

445 if not filename: 

446 extension = "dat" if dat else "hdf5" 

447 filename = "lalinference_{}.{}".format(label, extension) 

448 

449 if os.path.isfile(os.path.join(outdir, filename)) and not overwrite: 

450 raise FileExistsError( 

451 "The file '{}' already exists in the directory {}".format( 

452 filename, outdir 

453 ) 

454 ) 

455 reverse_map = {item: key for key, item in lalinference_map.items()} 

456 no_key = [] 

457 for param in _parameters: 

458 if param in reverse_map.keys() and reverse_map[param] in _parameters: 

459 logger.warning( 

460 "The LALInference name for '{}' is '{}'. '{}' already found " 

461 "in the posterior table. Keeping both entries".format( 

462 param, reverse_map[param], reverse_map[param] 

463 ) 

464 ) 

465 elif param in reverse_map.keys(): 

466 _samples[reverse_map[param]] = _samples.pop(param) 

467 elif param not in lalinference_map.keys(): 

468 no_key.append(param) 

469 if len(no_key): 

470 logger.info( 

471 "Unable to find a LALInference name for the parameters: {}. " 

472 "Keeping the PESummary name.".format(", ".join(no_key)) 

473 ) 

474 lalinference_samples = _samples.to_structured_array() 

475 if dat: 

476 np.savetxt( 

477 os.path.join(outdir, filename), lalinference_samples, 

478 delimiter="\t", comments="", 

479 header="\t".join(lalinference_samples.dtype.names) 

480 ) 

481 else: 

482 with h5py.File(os.path.join(outdir, filename), "w") as f: 

483 lalinference = f.create_group("lalinference") 

484 sampler = lalinference.create_group(sampler) 

485 samples = sampler.create_dataset( 

486 "posterior_samples", data=lalinference_samples 

487 ) 

488 

489 

490def write_lalinference( 

491 parameters, samples, outdir="./", label=None, filename=None, overwrite=False, 

492 sampler="lalinference_nest", dat=False, **kwargs 

493): 

494 """Write a set of samples in LALInference file format 

495 

496 Parameters 

497 ---------- 

498 parameters: list 

499 list of parameters 

500 samples: 2d list 

501 list of samples. Columns correspond to a given parameter 

502 outdir: str 

503 The directory where you would like to write the lalinference file 

504 label: str 

505 The label of the analysis. This is used in the filename if a filename 

506 if not specified 

507 filename: str 

508 The name of the file that you wish to write 

509 overwrite: Bool 

510 If True, an existing file of the same name will be overwritten 

511 sampler: str 

512 The sampler which you wish to store in the result file. This may either 

513 be 'lalinference_nest' or 'lalinference_mcmc' 

514 dat: Bool 

515 If True, a LALInference dat file is produced 

516 """ 

517 from pesummary.io.write import _multi_analysis_write 

518 

519 _multi_analysis_write( 

520 _write_lalinference, parameters, samples, outdir=outdir, label=label, 

521 filename=filename, overwrite=overwrite, sampler=sampler, dat=dat, 

522 file_format="lalinference", **kwargs 

523 )