Coverage for pesummary/core/file/formats/hdf5.py: 58.1%

1# Licensed under an MIT style license -- see LICENSE.md

3import h5py

4import numpy as np

5from pesummary.core.file.formats.base_read import Read

6from pesummary.utils.dict import load_recursively, paths_to_key

8__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"]

11def read_hdf5(path, **kwargs):

12 """Grab the parameters and samples in a .hdf5 file

14 Parameters

15 ----------

16 path: str

17 path to the result file you wish to read in

18 kwargs: dict

19 all kwargs passed to _read_hdf5_with_deepdish or _read_hdf5_with_h5py functions

20 """

21 try:

22 return _read_hdf5_with_deepdish(path, **kwargs)

23 except Exception:

24 return _read_hdf5_with_h5py(path, **kwargs)

27def _read_hdf5_with_deepdish(path, remove_params=None, path_to_samples=None):

28 """Grab the parameters and samples in a .hdf5 file with deepdish

30 Parameters

31 ----------

32 path: str

33 path to the result file you wish to read in

34 remove_params: list, optional

35 parameters you wish to remove from the posterior table

36 """

37 import deepdish

39 f = deepdish.io.load(path)

40 if path_to_samples is None:

41 try:

42 path_to_samples, = paths_to_key("posterior", f)

43 path_to_samples = path_to_samples[0]

44 except ValueError:

45 try:

46 path_to_samples, = paths_to_key("posterior_samples", f)

47 path_to_samples = path_to_samples[0]

48 except ValueError:

49 raise ValueError(

50 "Unable to find a 'posterior' or 'posterior_samples' group "

51 "in the file '{}'".format(path)

52 )

53 reduced_f, = load_recursively(path_to_samples, f)

54 parameters = [i for i in reduced_f.keys()]

55 if remove_params is not None:

56 for param in remove_params:

57 if param in parameters:

58 parameters.remove(param)

59 data = np.zeros([len(reduced_f[parameters[0]]), len(parameters)])

60 for num, par in enumerate(parameters):

61 for key, i in enumerate(reduced_f[par]):

62 data[key][num] = float(np.real(i))

63 data = data.tolist()

64 for num, par in enumerate(parameters):

65 if par == "logL":

66 parameters[num] = "log_likelihood"

67 return parameters, data

70def _read_hdf5_with_h5py(

71 path, remove_params=None, path_to_samples=None,

72 return_posterior_dataset=False

73):

74 """Grab the parameters and samples in a .hdf5 file with h5py

76 Parameters

77 ----------

78 path: str

79 path to the result file you wish to read in

80 remove_params: list, optional

81 parameters you wish to remove from the posterior table

82 """

83 import h5py

84 import copy

86 if path_to_samples is None:

87 path_to_samples = Read.guess_path_to_samples(path)

89 f = h5py.File(path, 'r')

90 c1 = isinstance(f[path_to_samples], h5py._hl.group.Group)

91 if c1 and "parameter_names" not in f[path_to_samples].keys():

92 original_parameters = [i for i in f[path_to_samples].keys()]

93 if remove_params is not None:

94 parameters = [

95 i for i in original_parameters if i not in remove_params

96 ]

97 else:

98 parameters = copy.deepcopy(original_parameters)

99 n_samples = len(f[path_to_samples][parameters[0]])

100 try:

101 samples = np.array([

102 f[path_to_samples][original_parameters.index(i)] for i in

103 parameters

104 ]).T

105 except (AttributeError, KeyError, TypeError):

106 samples = np.array([f[path_to_samples][i] for i in parameters]).T

107 cond1 = "loglr" not in parameters or "log_likelihood" not in \

108 parameters

109 cond2 = "likelihood_stats" in f.keys() and "loglr" in \

110 f["likelihood_stats"]

111 if cond1 and cond2:

112 parameters.append("log_likelihood")

113 for num, i in enumerate(samples):

114 samples[num].append(float(f["likelihood_stats/loglr"][num]))

115 elif c1:

116 original_parameters = [

117 i.decode("utf-8") if isinstance(i, bytes) else i for i in

118 f[path_to_samples]["parameter_names"]

119 ]

120 if remove_params is not None:

121 parameters = [

122 i for i in original_parameters if i not in remove_params

123 ]

124 else:

125 parameters = copy.deepcopy(original_parameters)

126 samples = np.array(f[path_to_samples]["samples"])

127 elif isinstance(f[path_to_samples], h5py._hl.dataset.Dataset):

128 parameters = list(f[path_to_samples].dtype.names)

129 samples = np.array(f[path_to_samples]).view((float, len(parameters))).tolist()

130 if return_posterior_dataset:

131 return parameters, samples, f[path_to_samples]

132 f.close()

133 return parameters, samples

134

135

136def _write_hdf5(

137 parameters, samples, outdir="./", label=None, filename=None, overwrite=False,

138 dataset_name="posterior_samples", **kwargs

139):

140 """Write a set of samples to a hdf5 file

141

142 Parameters

143 ----------

144 parameters: list

145 list of parameters

146 samples: 2d list

147 list of samples. Columns correspond to a given parameter

148 outdir: str, optional

149 directory to write the dat file

150 label: str, optional

151 The label of the analysis. This is used in the filename if a filename

152 if not specified

153 filename: str, optional

154 The name of the file that you wish to write

155 overwrite: Bool, optional

156 If True, an existing file of the same name will be overwritten

157 dataset_name: str, optional

158 name of the dataset to store a set of samples. Default posterior_samples

159 """

160 from pesummary.utils.samples_dict import SamplesDict

161 from pesummary.utils.utils import check_filename

162

163 default_filename = "pesummary_{}.h5"

164 filename = check_filename(

165 default_filename=default_filename, outdir=outdir, label=label, filename=filename,

166 overwrite=overwrite

167 )

168 samples = SamplesDict(parameters, np.array(samples).T)

169 _samples = samples.to_structured_array()

170 with h5py.File(filename, "w") as f:

171 f.create_dataset(dataset_name, data=_samples)

172

173

174def write_hdf5(

175 parameters, samples, outdir="./", label=None, filename=None, overwrite=False,

176 **kwargs

177):

178 """Write a set of samples to a hdf5 file

179

180 Parameters

181 ----------

182 parameters: list

183 list of parameters

184 samples: 2d list

185 list of samples. Columns correspond to a given parameter

186 outdir: str, optional

187 directory to write the dat file

188 label: str, optional

189 The label of the analysis. This is used in the filename if a filename

190 if not specified

191 filename: str, optional

192 The name of the file that you wish to write

193 overwrite: Bool, optional

194 If True, an existing file of the same name will be overwritten

195 """

196 from pesummary.io.write import _multi_analysis_write

197

198 _multi_analysis_write(

199 _write_hdf5, parameters, samples, outdir=outdir, label=label,

200 filename=filename, overwrite=overwrite, file_format="hdf5", **kwargs

201 )