Coverage for pesummary/gw/fetch.py: 64.6%
79 statements
« prev ^ index » next coverage.py v7.4.4, created at 2025-11-05 13:38 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2025-11-05 13:38 +0000
1# Licensed under an MIT style license -- see LICENSE.md
3from pathlib import Path
5from pesummary.core.fetch import (
6 download_and_read_file, _download_authenticated_file
7)
8from pesummary.utils.utils import logger
9from pesummary.utils.decorators import deprecation
10from gwosc.api import fetch_event_json
11import numpy as np
13__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"]
16def fetch(url, download_kwargs={}, **kwargs):
17 """Download and read files from LIGO authenticated URLs
19 Parameters
20 ----------
21 url: str
22 url you wish to download
23 download_kwargs: dict, optional
24 optional kwargs passed to _download_autheticated_file
25 **kwargs: dict, optional
26 additional kwargs passed to pesummary.io.read function
27 """
28 if "idp" not in download_kwargs.keys():
29 download_kwargs["idp"] = "LIGO"
30 return download_and_read_file(
31 url, download_kwargs=download_kwargs,
32 _function=_download_authenticated_file, **kwargs
33 )
36def _DCC_url(
37 event, type="posterior", catalog=None, sampling_rate=16384, format="gwf",
38 duration=32, IFO="L1", version=None, download_latest_file=True
39):
40 """Return the url for posterior samples stored on the DCC for a given event
42 Parameters
43 ----------
44 event: str
45 name of the event you wish to return posterior samples for
46 type: str, optional
47 type of data you wish to query. Default "posterior"
48 catalog: str, optional
49 Name of catalog that hosts the event. Default None
50 sampling_rate: int, optional
51 sampling rate of strain data you wish to download. Only used when
52 type="strain". Default 16384
53 format: str, optional
54 format of strain data you wish to download. Only used when
55 type="strain". Default "gwf"
56 duration: int, optional
57 duration of strain data you wish to download. Only used when
58 type="strain". Default 32
59 IFO: str, optional
60 detector strain data you wish to download. Only used when type="strain".
61 Default 'L1'
62 version: str, optional
63 version of the file to download. Default None
64 download_latest_file: bool, optional
65 if True, download the latest file if multiple are available. If False
66 a ValueError is raised as no unique file can be found
67 """
68 if type not in ["posterior", "strain"]:
69 raise ValueError(
70 "Unknown data type: '{}'. Must be either 'posterior' or "
71 "'strain'.".format(type)
72 )
73 try:
74 data, = fetch_event_json(
75 event, catalog=catalog, version=version
76 )["events"].values()
77 data = list(data)[0]
78 if isinstance(data, str):
79 raise TypeError
80 except TypeError:
81 data = list(
82 fetch_event_json(
83 event, catalog=catalog, version=version
84 )["events"].values()
85 )[0]
86 url = []
87 if type == "posterior":
88 for key, item in data["parameters"].items():
89 if ("_pe_" in key) or "_pe" in key.lower():
90 url.append(item["data_url"])
91 elif type == "strain":
92 strain = data["strain"]
93 for _strain in strain:
94 cond = (
95 _strain["sampling_rate"] == sampling_rate
96 and _strain["format"] == format
97 and _strain["duration"] == duration
98 and _strain["detector"] == IFO
99 )
100 if cond:
101 url.append(_strain["url"])
103 if not len(url):
104 url = None
105 else:
106 url = np.unique(url)
107 if len(url) == 1:
108 url = url[0]
109 if url is None:
110 raise RuntimeError("Failed to find data URL for {}".format(event))
111 elif isinstance(url, np.ndarray):
112 msg = "Multiple URLs found for {}: {}".format(event, ", ".join(url))
113 if download_latest_file:
114 msg += ". Fetching most recent based on Unique IDs"
115 if all("zenodo" in _ for _ in url) or all("dcc" in _ for _ in url):
116 ids = [
117 int(_.split("/")[5]) if "zenodo" in _ else
118 int(_.split("/")[6]) for _ in url
119 ]
120 ind = np.argmax(ids)
121 else:
122 for num, _ in enumerate(url):
123 # if zenodo entry available, default to that version
124 if "zenodo" in _:
125 ind = num
126 break
127 else:
128 # otherwise default to the first in the list
129 ind = 0
130 url = url[ind]
131 msg += ": {}".format(url)
132 logger.warning(msg)
133 else:
134 raise ValueError(msg)
135 return url
138@deprecation(
139 "The 'fetch_open_data' function has changed its name to "
140 "'fetch_open_samples' and 'fetch_open_data' may not be supported in future "
141 "releases. Please update"
142)
143def fetch_open_data(event, **kwargs):
144 """Download and read publically available gravitational wave posterior
145 samples
147 Parameters
148 ----------
149 event: str
150 name of the gravitational wave event you wish to download data for
151 """
152 return fetch_open_samples(event, **kwargs)
155def _fetch_open_data(
156 event, type="posterior", catalog=None, version=None, sampling_rate=16384,
157 format="gwf", duration=32, IFO="L1", download_latest_file=True, **kwargs
158):
159 """Download and read publcally available gravitational wave data
161 Parameters
162 ----------
163 event: str
164 name of the gravitational wave event you wish to download data for
165 type: str, optional
166 type of data you wish to download. Default "posterior"
167 catalog: str, optional
168 Name of catalog that hosts the event. Default None
169 version: str, optional
170 Version of the file to download. Default None
171 sampling_rate: int, optional
172 sampling rate of strain data you wish to download. Only used when
173 type="strain". Default 16384
174 format: str, optional
175 format of strain data you wish to download. Only used when
176 type="strain". Default "gwf"
177 duration: int, optional
178 duration of strain data you wish to download. Only used when
179 type="strain". Default 32
180 IFO: str, optional
181 detector strain data you wish to download. Only used when type="strain".
182 Default 'L1'
183 download_latest_file: bool, optional
184 if True, download the latest file if multiple are available. If False
185 a ValueError is raised as no unique file can be found
186 """
187 try:
188 url = _DCC_url(
189 event, type=type, catalog=catalog, sampling_rate=sampling_rate,
190 format=format, duration=duration, IFO=IFO, version=version,
191 download_latest_file=download_latest_file
192 )
193 except RuntimeError:
194 raise ValueError(
195 "Unknown URL for {}. If the URL is known, please run "
196 "download_and_read_file(URL)".format(event)
197 )
198 if type == "strain":
199 kwargs.update({"IFO": IFO})
200 return download_and_read_file(url, **kwargs)
203def fetch_open_samples(event, **kwargs):
204 """Download and read publically available gravitational wave posterior
205 samples
207 Parameters
208 ----------
209 event: str
210 name of the gravitational wave event you wish to download data for
211 **kwargs: dict, optional
212 all additional kwargs passed to _fetch_open_data
213 """
214 # fetch posterior data
215 out = _fetch_open_data(event, type="posterior", **kwargs)
217 # if asked to read the data, or unpack a tarball, just return it now
218 if (
219 kwargs.get("read_file", True)
220 or kwargs.get("unpack", False)
221 ):
222 return out
224 # otherwise, if Zenodo returned a file without a suffix, we need to add one
225 # see https://git.ligo.org/gwosc/client/-/issues/95
226 out = Path(out)
227 if not out.suffix:
228 out = out.rename(out.with_suffix(".h5"))
229 return str(out)
232def fetch_open_strain(event, format="gwf", **kwargs):
233 """Download and read publically available gravitational wave strain data
235 Parameters
236 ----------
237 event: str
238 name of the gravitational wave event you wish to download data for
239 format: str, optional
240 format of strain data you wish to download. Default "gwf"
241 **kwargs: dict, optional
242 all additional kwargs passed to _fetch_open_data
243 """
244 _kwargs = kwargs.copy()
245 _kwargs["format"] = "gwf"
246 return _fetch_open_data(event, type="strain", **_kwargs)