17from __future__
import print_function
19"""Modules extending the Cache file functionality from LAL
25from functools
import total_ordering
26from urllib.parse
import (
31import igwn_segments
as segments
33from ..
import git_version
34from ..
import CacheImport
35from ..
import LIGOTimeGPS
37__author__ =
"Duncan Macleod <duncan.macleod@ligo.org>"
38__version__ = git_version.id
39__date__ = git_version.date
41__all__ = [
'CacheEntry',
'lalcache_from_gluecache']
44 """Convert a glue.lal.Cache object to a lal.Cache object.
45 Writes cache to temporary file and reads to Cache.
48 LAL cache object
from GLUE to convert
49 type cache glue.lal.Cache
51 @returns a lal.Cache object representing the same data
53 with tempfile.NamedTemporaryFile(delete=
False, mode=
"w")
as t:
56 e.segment = type(e.segment)(int(e.segment[0]), int(e.segment[1]))
58 frcache = CacheImport(t.name)
71 A Python object representing one line in a LAL cache file.
73 The LAL cache format
is defined elsewhere,
and what follows
is meant
74 only to be informative,
not an official specification. Each line
in a
75 LAL cache identifies a single file,
and the line consists of five
76 columns of white-space delimited text.
78 The first column,
"observatory", generally stores the name of an
79 observatory site
or one
or more instruments (preferably delimited by
80 ",", but often there
is no delimiter between instrument names
in which
81 case they should be 2 characters each).
83 The second column,
"description", stores a short string tag that
is
84 usually all capitals
with "_" separating components,
in the style of
85 the description part of the LIGO-Virgo frame filename format.
87 The third
and fourth columns store the start time
and duration
in GPS
88 seconds of the interval spanned by the file identified by the cache
89 line. When the file does
not start on an integer second
or its
90 duration
is not an integer number of seconds, the conventions of the
91 LIGO-Virgo frame filename format apply.
93 The fifth (last) column stores the file
's URL.
95 The values for these columns are stored
in the .observatory,
96 .description, .segment
and .url attributes of instances of this
class,
97 respectively. The .segment attribute stores a igwn_segments.segment
98 object describing the interval spanned by the file. Any of these
99 attributes
except the URL
is allowed to be
None.
101 Example (parse a string):
103 >>> c =
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
109 Example (one-liners to read
and write a cache file):
112 >>> filename =
"874000000-20000.cache"
114 >>> inname = os.path.join(os.environ.get(
"LAL_TEST_SRCDIR",
"."), filename)
116 >>> cache = list(map(CacheEntry, open(inname)))
118 >>> print(*cache, sep =
"\\n", file = open(filename +
".new",
"w"))
120 Example (extract segmentlist dictionary
from LAL cache):
122 >>>
import igwn_segments
as segments
123 >>> seglists = segments.segmentlistdict()
124 >>>
for cacheentry
in cache:
125 ... seglists |= cacheentry.segmentlistdict
128 NOTE: the CacheEntry type defines a comparison operation
and a
129 .__hash__() implementation, both of which disregard the URL. That
is,
130 if two CacheEntry objects differ only by URL
and otherwise have same
131 metadata, they are considered to be redundant copies of the same data.
132 For example, uniquification
with a set() will retain only one redundant
133 copy, selected at random.
135 >>> x =
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
136 >>> y =
CacheEntry(
"H1 S5 815901601 576.5 gsiftp://data.server.org/bigpileofdata/H1-815901601-576.xml")
142 NOTE: this
is a pure Python object providing an alternative
143 representation of the contents of a LAL cache file to the C
144 implementation
in the LAL library proper. The two are
not
149 igwn_segments.utils..fromlalcache()
153 _regex = re.compile(
r"\A\s*(?P<obs>\S+)\s+(?P<dsc>\S+)\s+(?P<strt>\S+)\s+(?P<dur>\S+)\s+(?P<url>\S+)\s*\Z")
154 _url_regex = re.compile(
r"\A((.*/)*(?P<obs>[^/]+)-(?P<dsc>[^/]+)-(?P<strt>[^/]+)-(?P<dur>[^/\.]+)\.[^/]+)\Z")
156 def __init__(self, *args, **kwargs):
158 Intialize a CacheEntry object. The arguments can take two forms:
159 a single string argument, which is interpreted
and parsed
as a line
160 from a LAL cache file,
or four arguments used to explicitly
161 initialize the observatory, description, segment
and URL
in that
162 order. When parsing a single line of text
from a LAL cache, an
163 optional key-word argument
"coltype" can be provided to set the
164 type the start
and durations are parsed
as. The default
is
169 >>> c =
CacheEntry(
"H1",
"S5", segments.segment(815901601, 815902177.5),
"file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
171 [815901601 ... 815902177.5)
173 H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml
174 >>> c =
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
176 [815901601 ... 815902177.5)
177 >>> print(
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml", coltype = float).segment)
178 [815901601.0 ... 815902177.5)
181 alternative initialization mechanism.
185 match = self.
_regex.search(args[0])
187 match = match.groupdict()
188 except AttributeError:
189 raise ValueError(
"could not convert %s to CacheEntry" % repr(args[0]))
193 start = str(match[
"strt"])
194 duration = str(match[
"dur"])
195 coltype = kwargs.pop(
"coltype", LIGOTimeGPS)
196 if start ==
"-" and duration ==
"-":
200 start = coltype(start)
201 self.
segment = segments.segment(start, start + coltype(duration))
204 raise TypeError(
"unrecognized keyword arguments: %s" %
", ".join(kwargs))
209 raise TypeError(
"invalid arguments: %s" %
", ".join(kwargs))
212 raise TypeError(
"invalid arguments: %s" % args)
223 Convert the CacheEntry to a string in the format of a line
in a LAL
224 cache. Used to write the CacheEntry to a file.
228 >>> c =
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
230 'H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml'
234 duration = str(abs(self.
segment))
242 Compare two CacheEntry objects by observatory, then description,
243 then segment. CacheEntry objects that have different URLs but for
244 which all other metadata are the same are considered to be
245 equivalent. If two entries differ only by their URL, they are
246 considered to be redundant copies of the same data,
and by
247 comparing them
as equal the Python sort operation (which
is a
248 stable sort) will preserve their relative order. By preserving the
249 order of redundant copies, we allow the preference
for the order
in
250 which redundant copies are to be attempted to be conveyed by their
251 order
in the list,
and preserved.
253 if not isinstance(other, CacheEntry):
254 raise TypeError(
"can only compare CacheEntry to CacheEntry")
259 Compare two CacheEntry objects by observatory, then description,
260 then segment. CacheEntry objects that have different URLs but for
261 which all other metadata are the same are considered to be
262 equivalent. If two entries differ only by their URL, they are
263 considered to be redundant copies of the same data,
and by
264 comparing them
as equal the Python sort operation (which
is a
265 stable sort) will preserve their relative order. By preserving the
266 order of redundant copies, we allow the preference
for the order
in
267 which redundant copies are to be attempted to be conveyed by their
268 order
in the list,
and preserved.
270 if not isinstance(other, CacheEntry):
271 raise TypeError(
"can only compare CacheEntry to CacheEntry")
276 CacheEntry objects are hashed by the tuple (observatory,
277 description, segment), i.e., the URL is disregarded.
283 Return the path component of the URL. This makes CacheEntry
284 objects usable as path-like objects
in functions that accept
285 path-like objects, such
as open().
289 >>> c =
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
290 >>> os.path.basename(c)
291 'H1-815901601-576.xml'
293 if self.scheme
not in (
"",
"file"):
295 f
"cannot use {type(self).__name__} as path-like object with "
296 f
"scheme='{self.scheme}'"
298 raise ValueError(msg)
304 The cache entry's URL. The URL is constructed from the values of
305 the scheme, host, and path attributes. Assigning a value to the
306 URL attribute causes the value to be parsed
and the scheme, host
307 and path attributes updated.
309 return urlunparse((self.scheme, self.host, self.
path,
None,
None,
None))
313 self.scheme, self.host, self.
path = urlparse(url)[:3]
318 A segmentlistdict object describing the instruments and time
319 spanned by this CacheEntry. A new object
is constructed each time
320 this attribute
is accessed (segments are immutable so there
is no
321 reason to
try to share a reference to the CacheEntry
's internal
322 segment; modifications of one would not be reflected
in the other
327 >>> c =
CacheEntry(
"H1 S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1-815901601-576.xml")
328 >>> c.segmentlistdict[
'H1']
329 [
segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]
331 The \
"observatory\" column of the cache entry, which is frequently
332 used to store instrument names, is parsed into instrument names
for
333 the dictionary keys using the same rules
as
334 igwn_ligolw.lsctables.instrumentsproperty.get().
338 >>> c =
CacheEntry(
"H1H2, S5 815901601 576.5 file://localhost/home/kipp/tmp/1/H1H2-815901601-576.xml")
339 >>> c.segmentlistdict[
'H1H2']
340 [
segment(LIGOTimeGPS(815901601, 0), LIGOTimeGPS(815902177, 500000000))]
343 instruments = (
None,)
345 instruments = {obs
for obs
in map(str.strip, self.
observatory.split(
","))
if obs}
346 return segments.segmentlistdict((instrument, segments.segmentlist(self.
segment is not None and [self.
segment]
or []))
for instrument
in instruments)
351 Parse a URL in the style of T050017-00 into a CacheEntry. The
352 T050017-00 file name format
is, essentially,
354 observatory-description-start-duration.extension
358 >>> c = CacheEntry.from_T050017(
"file://localhost/data/node144/frames/S5/strain-L2/LLO/L-L1_RDS_C03_L2-8365/L-L1_RDS_C03_L2-836562330-83.gwf")
363 >>> os.path.basename(c.path)
364 'L-L1_RDS_C03_L2-836562330-83.gwf'
368 raise ValueError(
"could not convert %s to CacheEntry" % repr(url))
369 observatory = match.group(
"obs")
370 description = match.group(
"dsc")
372 start = str(match.group(
"strt"))
373 duration = str(match.group(
"dur"))
374 if start ==
"-" and duration ==
"-":
378 segment = segments.segment(coltype(start), coltype(start) + coltype(duration))
379 return cls(observatory, description, segment, url)
static size_t hash(const char *s)
A Python object representing one line in a LAL cache file.
def __fspath__(self)
Return the path component of the URL.
def __lt__(self, other)
Compare two CacheEntry objects by observatory, then description, then segment.
def segmentlistdict(self)
A segmentlistdict object describing the instruments and time spanned by this CacheEntry.
def __init__(self, *args, **kwargs)
Intialize a CacheEntry object.
def from_T050017(cls, url, coltype=LIGOTimeGPS)
Parse a URL in the style of T050017-00 into a CacheEntry.
def __str__(self)
Convert the CacheEntry to a string in the format of a line in a LAL cache.
def __eq__(self, other)
Compare two CacheEntry objects by observatory, then description, then segment.
def url(self)
The cache entry's URL.
def __hash__(self)
CacheEntry objects are hashed by the tuple (observatory, description, segment), i....
def lalcache_from_gluecache(cache)
Convert a glue.lal.Cache object to a lal.Cache object.