Coverage for pesummary/core/fetch.py: 69.8%

96 statements  

« prev     ^ index     » next       coverage.py v7.4.4, created at 2024-05-02 08:42 +0000

1# Licensed under an MIT style license -- see LICENSE.md 

2 

3import os 

4import sys 

5import shutil 

6from pathlib import Path 

7from astropy.utils.console import ProgressBarOrSpinner 

8from astropy.utils.data import download_file, conf, _tempfilestodel 

9from pesummary.io import read 

10from pesummary.utils.utils import make_dir, CACHE_DIR 

11from tempfile import NamedTemporaryFile 

12import tarfile 

13 

14__author__ = ["Charlie Hoy <charlie.hoy@ligo.org>"] 

15 

16download_dir = os.path.join(CACHE_DIR, "data") 

17make_dir(download_dir) 

18 

19try: 

20 import ciecplib 

21 CIECPLIB = True 

22except ImportError: 

23 CIECPLIB = False 

24 

25 

26def _unpack_and_extract(path_to_file, filename, path=None): 

27 """ 

28 """ 

29 path_to_file = Path(path_to_file) 

30 if not tarfile.is_tarfile(path_to_file): 

31 raise ValueError("unable to unpack file") 

32 outdir = path_to_file.parent 

33 tar = tarfile.open(path_to_file, 'r') 

34 _files = tar.getnames() 

35 if path is None: 

36 print("Extracting all files from {}".format(path_to_file)) 

37 tar.extractall(path=outdir) 

38 return outdir / Path(filename).stem 

39 if not any(path in _file for _file in _files): 

40 raise ValueError( 

41 "Unable to find a file called '{}' in tarball. The list of " 

42 "available files are: {}".format(path, ", ".join(_files)) 

43 ) 

44 _path = [_file for _file in _files if path in _file][0] 

45 tar.extract(_path, path=outdir) 

46 unpacked_file = path_to_file.parent / _path 

47 if conf.delete_temporary_downloads_at_exit: 

48 _tempfilestodel.append(unpacked_file) 

49 return unpacked_file 

50 

51 

52def _scp_file(path): 

53 """Secure copy a file from a server 

54 

55 Parameters 

56 ---------- 

57 path: str 

58 file you wish to download. Should be of the form 

59 '{username}@{servername}:{path_to_file}'. 

60 """ 

61 import subprocess 

62 

63 pid = os.getpid() 

64 prefix = "pesummary-download-%s-" % (pid) 

65 with NamedTemporaryFile(prefix=prefix, delete=False) as f: 

66 subprocess.run("scp {} {}".format(path, f.name), shell=True) 

67 return f.name 

68 

69 

70def _download_authenticated_file( 

71 url, unpack=False, path=None, block_size=2**16, **kwargs 

72): 

73 """Downloads a URL from an authenticated site 

74 

75 Parameters 

76 ---------- 

77 url: str 

78 url you wish to download 

79 **kwargs: dict, optional 

80 additional kwargs passed to ciecplib.Session 

81 """ 

82 if not CIECPLIB: 

83 raise ImportError( 

84 "Please install 'ciecplib' in order to download authenticated urls" 

85 ) 

86 

87 with ciecplib.Session(**kwargs) as sess: 

88 pid = os.getpid() 

89 prefix = "pesummary-download-%s-" % (pid) 

90 response = sess.get(url, stream=True) 

91 size = int(response.headers.get('content-length', 0)) 

92 dlmsg = "Downloading {}".format(url) 

93 bytes_read = 0 

94 with ProgressBarOrSpinner(size, dlmsg, file=sys.stdout) as p: 

95 with NamedTemporaryFile(prefix=prefix, delete=False) as f: 

96 for data in response.iter_content(block_size): 

97 bytes_read += len(data) 

98 p.update(bytes_read) 

99 f.write(data) 

100 

101 if conf.delete_temporary_downloads_at_exit: 

102 _tempfilestodel.append(f.name) 

103 return f.name 

104 

105 

106def _download_file(url, unpack=False, path=None, **kwargs): 

107 """Downloads a URL and optionally caches the result 

108 

109 Parameters 

110 ---------- 

111 url: str 

112 url you wish to download 

113 unpack: Bool, optional 

114 if True, unpack tarball. Default False 

115 **kwargs: dict, optional 

116 additional kwargs passed to astropy.utils.data.download_file 

117 """ 

118 return download_file(url, **kwargs) 

119 

120 

121def download_and_read_file( 

122 url, download_kwargs={}, read_file=True, delete_on_exit=True, outdir=None, 

123 unpack=False, path=None, _function=_download_file, 

124 **kwargs 

125): 

126 """Downloads a URL and reads the file with pesummary.io.read function 

127 

128 Parameters 

129 ---------- 

130 url: str 

131 url you wish to download 

132 download_kwargs: dict, optional 

133 optional kwargs passed to _download_file 

134 read_file: Bool, optional 

135 if True, read the downloaded file and return the opened object. 

136 if False, return the path to the downloaded file. Default True 

137 delete_on_exit: Bool, optional 

138 if True, delete the file on exit. Default True 

139 outdir: str, optional 

140 save the file to outdir. Default the default directory from 

141 tmpfile.NamedTemporaryFile 

142 **kwargs: dict, optional 

143 additional kwargs passed to pesummary.io.read function 

144 """ 

145 conf.delete_temporary_downloads_at_exit = delete_on_exit 

146 local = _function(url, **download_kwargs) 

147 # zenodo adds /content to the end of the filename. This causes problems 

148 # later down the line 

149 if "zenodo" in url and Path(url).name == "content": 

150 url = Path(url).parent 

151 filename = Path(url).name 

152 if unpack: 

153 local = _unpack_and_extract(local, path=path, filename=filename) 

154 filename = Path(local).name 

155 if os.path.isdir(local): 

156 filename = Path(filename).stem 

157 if outdir is None: 

158 outdir = Path(local).parent 

159 if os.path.isdir(filename): 

160 new_name = Path(outdir) 

161 else: 

162 if not os.path.isfile(Path(outdir) / filename): 

163 new_name = Path(outdir) / filename 

164 else: 

165 new_name = Path(outdir) / ( 

166 Path(NamedTemporaryFile().name).name + "_" + filename 

167 ) 

168 if download_kwargs.get("cache", None): 

169 # user asked for cache, so copy it from there 

170 shutil.copyfile(local, new_name) 

171 else: # otherwise move it from wherever it is 

172 shutil.move(local, new_name) 

173 if not read_file: 

174 if conf.delete_temporary_downloads_at_exit: 

175 _tempfilestodel.append(new_name) 

176 return new_name 

177 data = read(new_name, **kwargs) 

178 if conf.delete_temporary_downloads_at_exit: 

179 shutil.move(new_name, local) 

180 return data 

181 

182 

183def scp_and_read_file(path, **kwargs): 

184 """Secure copy and read a file with the pesummary.io.read function 

185 

186 Parameters 

187 ---------- 

188 path: str 

189 file you wish to download. Should be of the form 

190 '{username}@{servername}:{path_to_file}'. 

191 **kwargs: dict, optional 

192 all kwargs passed to download_and_read_file 

193 """ 

194 return download_and_read_file(path, _function=_scp_file, **kwargs)