Coverage for pesummary/gw/fetch.py: 64.6%

79 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2025-11-05 13:38 +0000

1# Licensed under an MIT style license -- see LICENSE.md 

2 

3from pathlib import Path 

4 

5from pesummary.core.fetch import ( 

6 download_and_read_file, _download_authenticated_file 

7) 

8from pesummary.utils.utils import logger 

9from pesummary.utils.decorators import deprecation 

10from gwosc.api import fetch_event_json 

11import numpy as np 

12 

13__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"] 

14 

15 

16def fetch(url, download_kwargs={}, **kwargs): 

17 """Download and read files from LIGO authenticated URLs 

18 

19 Parameters 

20 ---------- 

21 url: str 

22 url you wish to download 

23 download_kwargs: dict, optional 

24 optional kwargs passed to _download_autheticated_file 

25 **kwargs: dict, optional 

26 additional kwargs passed to pesummary.io.read function 

27 """ 

28 if "idp" not in download_kwargs.keys(): 

29 download_kwargs["idp"] = "LIGO" 

30 return download_and_read_file( 

31 url, download_kwargs=download_kwargs, 

32 _function=_download_authenticated_file, **kwargs 

33 ) 

34 

35 

36def _DCC_url( 

37 event, type="posterior", catalog=None, sampling_rate=16384, format="gwf", 

38 duration=32, IFO="L1", version=None, download_latest_file=True 

39): 

40 """Return the url for posterior samples stored on the DCC for a given event 

41 

42 Parameters 

43 ---------- 

44 event: str 

45 name of the event you wish to return posterior samples for 

46 type: str, optional 

47 type of data you wish to query. Default "posterior" 

48 catalog: str, optional 

49 Name of catalog that hosts the event. Default None 

50 sampling_rate: int, optional 

51 sampling rate of strain data you wish to download. Only used when 

52 type="strain". Default 16384 

53 format: str, optional 

54 format of strain data you wish to download. Only used when 

55 type="strain". Default "gwf" 

56 duration: int, optional 

57 duration of strain data you wish to download. Only used when 

58 type="strain". Default 32 

59 IFO: str, optional 

60 detector strain data you wish to download. Only used when type="strain". 

61 Default 'L1' 

62 version: str, optional 

63 version of the file to download. Default None 

64 download_latest_file: bool, optional 

65 if True, download the latest file if multiple are available. If False 

66 a ValueError is raised as no unique file can be found 

67 """ 

68 if type not in ["posterior", "strain"]: 

69 raise ValueError( 

70 "Unknown data type: '{}'. Must be either 'posterior' or " 

71 "'strain'.".format(type) 

72 ) 

73 try: 

74 data, = fetch_event_json( 

75 event, catalog=catalog, version=version 

76 )["events"].values() 

77 data = list(data)[0] 

78 if isinstance(data, str): 

79 raise TypeError 

80 except TypeError: 

81 data = list( 

82 fetch_event_json( 

83 event, catalog=catalog, version=version 

84 )["events"].values() 

85 )[0] 

86 url = [] 

87 if type == "posterior": 

88 for key, item in data["parameters"].items(): 

89 if ("_pe_" in key) or "_pe" in key.lower(): 

90 url.append(item["data_url"]) 

91 elif type == "strain": 

92 strain = data["strain"] 

93 for _strain in strain: 

94 cond = ( 

95 _strain["sampling_rate"] == sampling_rate 

96 and _strain["format"] == format 

97 and _strain["duration"] == duration 

98 and _strain["detector"] == IFO 

99 ) 

100 if cond: 

101 url.append(_strain["url"]) 

102 

103 if not len(url): 

104 url = None 

105 else: 

106 url = np.unique(url) 

107 if len(url) == 1: 

108 url = url[0] 

109 if url is None: 

110 raise RuntimeError("Failed to find data URL for {}".format(event)) 

111 elif isinstance(url, np.ndarray): 

112 msg = "Multiple URLs found for {}: {}".format(event, ", ".join(url)) 

113 if download_latest_file: 

114 msg += ". Fetching most recent based on Unique IDs" 

115 if all("zenodo" in _ for _ in url) or all("dcc" in _ for _ in url): 

116 ids = [ 

117 int(_.split("/")[5]) if "zenodo" in _ else 

118 int(_.split("/")[6]) for _ in url 

119 ] 

120 ind = np.argmax(ids) 

121 else: 

122 for num, _ in enumerate(url): 

123 # if zenodo entry available, default to that version 

124 if "zenodo" in _: 

125 ind = num 

126 break 

127 else: 

128 # otherwise default to the first in the list 

129 ind = 0 

130 url = url[ind] 

131 msg += ": {}".format(url) 

132 logger.warning(msg) 

133 else: 

134 raise ValueError(msg) 

135 return url 

136 

137 

138@deprecation( 

139 "The 'fetch_open_data' function has changed its name to " 

140 "'fetch_open_samples' and 'fetch_open_data' may not be supported in future " 

141 "releases. Please update" 

142) 

143def fetch_open_data(event, **kwargs): 

144 """Download and read publically available gravitational wave posterior 

145 samples 

146 

147 Parameters 

148 ---------- 

149 event: str 

150 name of the gravitational wave event you wish to download data for 

151 """ 

152 return fetch_open_samples(event, **kwargs) 

153 

154 

155def _fetch_open_data( 

156 event, type="posterior", catalog=None, version=None, sampling_rate=16384, 

157 format="gwf", duration=32, IFO="L1", download_latest_file=True, **kwargs 

158): 

159 """Download and read publcally available gravitational wave data 

160 

161 Parameters 

162 ---------- 

163 event: str 

164 name of the gravitational wave event you wish to download data for 

165 type: str, optional 

166 type of data you wish to download. Default "posterior" 

167 catalog: str, optional 

168 Name of catalog that hosts the event. Default None 

169 version: str, optional 

170 Version of the file to download. Default None 

171 sampling_rate: int, optional 

172 sampling rate of strain data you wish to download. Only used when 

173 type="strain". Default 16384 

174 format: str, optional 

175 format of strain data you wish to download. Only used when 

176 type="strain". Default "gwf" 

177 duration: int, optional 

178 duration of strain data you wish to download. Only used when 

179 type="strain". Default 32 

180 IFO: str, optional 

181 detector strain data you wish to download. Only used when type="strain". 

182 Default 'L1' 

183 download_latest_file: bool, optional 

184 if True, download the latest file if multiple are available. If False 

185 a ValueError is raised as no unique file can be found 

186 """ 

187 try: 

188 url = _DCC_url( 

189 event, type=type, catalog=catalog, sampling_rate=sampling_rate, 

190 format=format, duration=duration, IFO=IFO, version=version, 

191 download_latest_file=download_latest_file 

192 ) 

193 except RuntimeError: 

194 raise ValueError( 

195 "Unknown URL for {}. If the URL is known, please run " 

196 "download_and_read_file(URL)".format(event) 

197 ) 

198 if type == "strain": 

199 kwargs.update({"IFO": IFO}) 

200 return download_and_read_file(url, **kwargs) 

201 

202 

203def fetch_open_samples(event, **kwargs): 

204 """Download and read publically available gravitational wave posterior 

205 samples 

206 

207 Parameters 

208 ---------- 

209 event: str 

210 name of the gravitational wave event you wish to download data for 

211 **kwargs: dict, optional 

212 all additional kwargs passed to _fetch_open_data 

213 """ 

214 # fetch posterior data 

215 out = _fetch_open_data(event, type="posterior", **kwargs) 

216 

217 # if asked to read the data, or unpack a tarball, just return it now 

218 if ( 

219 kwargs.get("read_file", True) 

220 or kwargs.get("unpack", False) 

221 ): 

222 return out 

223 

224 # otherwise, if Zenodo returned a file without a suffix, we need to add one 

225 # see https://git.ligo.org/gwosc/client/-/issues/95 

226 out = Path(out) 

227 if not out.suffix: 

228 out = out.rename(out.with_suffix(".h5")) 

229 return str(out) 

230 

231 

232def fetch_open_strain(event, format="gwf", **kwargs): 

233 """Download and read publically available gravitational wave strain data 

234 

235 Parameters 

236 ---------- 

237 event: str 

238 name of the gravitational wave event you wish to download data for 

239 format: str, optional 

240 format of strain data you wish to download. Default "gwf" 

241 **kwargs: dict, optional 

242 all additional kwargs passed to _fetch_open_data 

243 """ 

244 _kwargs = kwargs.copy() 

245 _kwargs["format"] = "gwf" 

246 return _fetch_open_data(event, type="strain", **_kwargs)