Coverage for pesummary/core/fetch.py: 69.8%
96 statements
« prev ^ index » next coverage.py v7.4.4, created at 2024-12-09 22:34 +0000
« prev ^ index » next coverage.py v7.4.4, created at 2024-12-09 22:34 +0000
1# Licensed under an MIT style license -- see LICENSE.md
3import os
4import sys
5import shutil
6from pathlib import Path
7from astropy.utils.console import ProgressBarOrSpinner
8from astropy.utils.data import download_file, conf, _tempfilestodel
9from pesummary.io import read
10from pesummary.utils.utils import make_dir, CACHE_DIR
11from tempfile import NamedTemporaryFile
12import tarfile
14__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"]
16download_dir = os.path.join(CACHE_DIR, "data")
17make_dir(download_dir)
19try:
20 import ciecplib
21 CIECPLIB = True
22except ImportError:
23 CIECPLIB = False
26def _unpack_and_extract(path_to_file, filename, path=None):
27 """
28 """
29 path_to_file = Path(path_to_file)
30 if not tarfile.is_tarfile(path_to_file):
31 raise ValueError("unable to unpack file")
32 outdir = path_to_file.parent
33 tar = tarfile.open(path_to_file, 'r')
34 _files = tar.getnames()
35 if path is None:
36 print("Extracting all files from {}".format(path_to_file))
37 tar.extractall(path=outdir)
38 return outdir / Path(filename).stem
39 if not any(path in _file for _file in _files):
40 raise ValueError(
41 "Unable to find a file called '{}' in tarball. The list of "
42 "available files are: {}".format(path, ", ".join(_files))
43 )
44 _path = [_file for _file in _files if path in _file][0]
45 tar.extract(_path, path=outdir)
46 unpacked_file = path_to_file.parent / _path
47 if conf.delete_temporary_downloads_at_exit:
48 _tempfilestodel.append(unpacked_file)
49 return unpacked_file
52def _scp_file(path):
53 """Secure copy a file from a server
55 Parameters
56 ----------
57 path: str
58 file you wish to download. Should be of the form
59 '{username}@{servername}:{path_to_file}'.
60 """
61 import subprocess
63 pid = os.getpid()
64 prefix = "pesummary-download-%s-" % (pid)
65 with NamedTemporaryFile(prefix=prefix, delete=False) as f:
66 subprocess.run("scp {} {}".format(path, f.name), shell=True)
67 return f.name
70def _download_authenticated_file(
71 url, unpack=False, path=None, block_size=2**16, **kwargs
72):
73 """Downloads a URL from an authenticated site
75 Parameters
76 ----------
77 url: str
78 url you wish to download
79 **kwargs: dict, optional
80 additional kwargs passed to ciecplib.Session
81 """
82 if not CIECPLIB:
83 raise ImportError(
84 "Please install 'ciecplib' in order to download authenticated urls"
85 )
87 with ciecplib.Session(**kwargs) as sess:
88 pid = os.getpid()
89 prefix = "pesummary-download-%s-" % (pid)
90 response = sess.get(url, stream=True)
91 size = int(response.headers.get('content-length', 0))
92 dlmsg = "Downloading {}".format(url)
93 bytes_read = 0
94 with ProgressBarOrSpinner(size, dlmsg, file=sys.stdout) as p:
95 with NamedTemporaryFile(prefix=prefix, delete=False) as f:
96 for data in response.iter_content(block_size):
97 bytes_read += len(data)
98 p.update(bytes_read)
99 f.write(data)
101 if conf.delete_temporary_downloads_at_exit:
102 _tempfilestodel.append(f.name)
103 return f.name
106def _download_file(url, unpack=False, path=None, **kwargs):
107 """Downloads a URL and optionally caches the result
109 Parameters
110 ----------
111 url: str
112 url you wish to download
113 unpack: Bool, optional
114 if True, unpack tarball. Default False
115 **kwargs: dict, optional
116 additional kwargs passed to astropy.utils.data.download_file
117 """
118 return download_file(url, **kwargs)
121def download_and_read_file(
122 url, download_kwargs={}, read_file=True, delete_on_exit=True, outdir=None,
123 unpack=False, path=None, _function=_download_file,
124 **kwargs
125):
126 """Downloads a URL and reads the file with pesummary.io.read function
128 Parameters
129 ----------
130 url: str
131 url you wish to download
132 download_kwargs: dict, optional
133 optional kwargs passed to _download_file
134 read_file: Bool, optional
135 if True, read the downloaded file and return the opened object.
136 if False, return the path to the downloaded file. Default True
137 delete_on_exit: Bool, optional
138 if True, delete the file on exit. Default True
139 outdir: str, optional
140 save the file to outdir. Default the default directory from
141 tmpfile.NamedTemporaryFile
142 **kwargs: dict, optional
143 additional kwargs passed to pesummary.io.read function
144 """
145 conf.delete_temporary_downloads_at_exit = delete_on_exit
146 local = _function(url, **download_kwargs)
147 # zenodo adds /content to the end of the filename. This causes problems
148 # later down the line
149 if "zenodo" in url and Path(url).name == "content":
150 url = Path(url).parent
151 filename = Path(url).name
152 if unpack:
153 local = _unpack_and_extract(local, path=path, filename=filename)
154 filename = Path(local).name
155 if os.path.isdir(local):
156 filename = Path(filename).stem
157 if outdir is None:
158 outdir = Path(local).parent
159 if os.path.isdir(filename):
160 new_name = Path(outdir)
161 else:
162 if not os.path.isfile(Path(outdir) / filename):
163 new_name = Path(outdir) / filename
164 else:
165 new_name = Path(outdir) / (
166 Path(NamedTemporaryFile().name).name + "_" + filename
167 )
168 if download_kwargs.get("cache", None):
169 # user asked for cache, so copy it from there
170 shutil.copyfile(local, new_name)
171 else: # otherwise move it from wherever it is
172 shutil.move(local, new_name)
173 if not read_file:
174 if conf.delete_temporary_downloads_at_exit:
175 _tempfilestodel.append(new_name)
176 return new_name
177 data = read(new_name, **kwargs)
178 if conf.delete_temporary_downloads_at_exit:
179 shutil.move(new_name, local)
180 return data
183def scp_and_read_file(path, **kwargs):
184 """Secure copy and read a file with the pesummary.io.read function
186 Parameters
187 ----------
188 path: str
189 file you wish to download. Should be of the form
190 '{username}@{servername}:{path_to_file}'.
191 **kwargs: dict, optional
192 all kwargs passed to download_and_read_file
193 """
194 return download_and_read_file(path, _function=_scp_file, **kwargs)