LALBurst 2.0.7.1-eeff03c
lalburst_cut.py
Go to the documentation of this file.
1##python
2#
3# Copyright (C) 2006,2013 Kipp Cannon
4#
5# This program is free software; you can redistribute it and/or modify it
6# under the terms of the GNU General Public License as published by the
7# Free Software Foundation; either version 2 of the License, or (at your
8# option) any later version.
9#
10# This program is distributed in the hope that it will be useful, but
11# WITHOUT ANY WARRANTY; without even the implied warranty of
12# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
13# Public License for more details.
14#
15# You should have received a copy of the GNU General Public License along
16# with this program; if not, write to the Free Software Foundation, Inc.,
17# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
19
20#
21# =============================================================================
22#
23# Preamble
24#
25# =============================================================================
26#
27
28
29from optparse import OptionParser
30import sys
31
32
33from igwn_ligolw import ligolw
34from igwn_ligolw import lsctables
35from igwn_ligolw import utils as ligolw_utils
36from igwn_ligolw.utils import segments as ligolw_segments
37from igwn_ligolw.utils import process as ligolw_process
38from igwn_ligolw.utils import search_summary as ligolw_search_summary
39from lalburst import git_version
40import igwn_segments as segments
41
42
43__author__ = "Kipp Cannon <kipp.cannon@ligo.org>"
44__version__ = "git id %s" % git_version.id
45__date__ = git_version.date
46
47
48process_program_name = "lalburst_cut"
49
50
51#
52# =============================================================================
53#
54# Command Line
55#
56# =============================================================================
57#
58
59
61 parser = OptionParser(
62 version = "Name: %%prog\n%s" % git_version.verbose_msg,
63 usage = "%prog [options] [file ...]",
64 description = "Removes sngl_burst events from XML files according to a variety of criteria. Files named on the command line are read one-by-one, and over-written with the new files. If no files are named on the command line, input is read from stdin and written to stdout. Note that, for the most part, this program does not understand coincidence information, and so if an injection or burst event is removed that participates in a coincidence, this program simply deletes the entire coincidence as well (before applying the --coinc-only cut)."
65 )
66 parser.add_option("--coinc-only", action = "store_true", help = "Discard events that are not participating in a coincident event.")
67 parser.add_option("--comment", metavar = "text", help = "Set the comment string to be recorded in the process table for this job (default = None).")
68 parser.add_option("--inj-made-only", action = "store_true", help = "Discard injections outside the search summary out segments.")
69 parser.add_option("--min-amplitude", metavar = "value", type = "float", help = "Discard events below the given amplitude.")
70 parser.add_option("--max-amplitude", metavar = "value", type = "float", help = "Discard events above the given amplitude.")
71 parser.add_option("--min-bandwidth", metavar = "Hz", type = "float", help = "Discard events narrower than the given bandwidth.")
72 parser.add_option("--max-bandwidth", metavar = "Hz", type = "float", help = "Discard events wider than the given bandwidth.")
73 parser.add_option("--min-central-freq", metavar = "Hz", type = "float", help = "Discard events with central frequency lower than that given.")
74 parser.add_option("--max-central-freq", metavar = "Hz", type = "float", help = "Discard events with central frequency higher than that given.")
75 parser.add_option("--min-confidence", metavar = "value", type = "float", help = "Discard events below the given confidence.")
76 parser.add_option("--max-confidence", metavar = "value", type = "float", help = "Discard events above the given confidence.")
77 parser.add_option("--min-duration", metavar = "seconds", type = "float", help = "Discard events shorter than the given duration.")
78 parser.add_option("--max-duration", metavar = "seconds", type = "float", help = "Discard events longer than the given duration.")
79 parser.add_option("--min-fhigh", metavar = "Hz", type = "float", help = "Discard events with highest frequency below the given frequency.")
80 parser.add_option("--max-fhigh", metavar = "Hz", type = "float", help = "Discard events with highest frequency above the given frequency.")
81 parser.add_option("--min-flow", metavar = "Hz", type = "float", help = "Discard events with lowest frequency below the given frequency.")
82 parser.add_option("--max-flow", metavar = "Hz", type = "float", help = "Discard events with loest frequency above the given frequency.")
83 parser.add_option("--min-hrss", metavar = "value", type = "float", help = "Discard events with h_rss below the given value.")
84 parser.add_option("--max-hrss", metavar = "value", type = "float", help = "Discard events with h_rss above the given value.")
85 parser.add_option("--cut-instrument", metavar = "name", action = "append", default = [], help = "Discard events from given instrument.")
86 parser.add_option("--min-peak-time", metavar = "seconds", help = "Discard events with peak time before the given GPS time.")
87 parser.add_option("--max-peak-time", metavar = "seconds", help = "Discard events with peak time after the given GPS time.")
88 parser.add_option("--min-start-time", metavar = "seconds", help = "Discard events starting before the given GPS time.")
89 parser.add_option("--max-start-time", metavar = "seconds", help = "Discard events starting after the given GPS time.")
90 parser.add_option("--min-stop-time", metavar = "seconds", help = "Discard events ending before the given GPS time.")
91 parser.add_option("--max-stop-time", metavar = "seconds", help = "Discard events ending after the given GPS time.")
92 parser.add_option("--min-snr", metavar = "value", type = "float", help = "Discard events below the given SNR.")
93 parser.add_option("--max-snr", metavar = "value", type = "float", help = "Discard events above the given SNR.")
94 parser.add_option("--program", metavar = "name", help = "Process events generated by the given program.")
95 parser.add_option("--veto-file", metavar = "filename", help = "Veto events using the veto segment list extracted from this XML file. The file must contain segment tables, and the veto list will be constructed from the segments named \"sngl_burst_veto\".")
96 parser.add_option("-v", "--verbose", action = "store_true", help = "Be verbose.")
97 options, filenames = parser.parse_args()
98
99 paramdict = options.__dict__.copy()
100
101 if options.inj_made_only and not options.program:
102 raise ValueError("must set --program when --inj-made-only is set")
103 options.cut_instrument = set(options.cut_instrument)
104
105 if options.min_peak_time is not None:
106 options.min_peak_time = lsctables.LIGOTimeGPS(options.min_peak_time)
107 if options.max_peak_time is not None:
108 options.max_peak_time = lsctables.LIGOTimeGPS(options.max_peak_time)
109 if options.min_start_time is not None:
110 options.min_start_time = lsctables.LIGOTimeGPS(options.min_start_time)
111 if options.max_start_time is not None:
112 options.max_start_time = lsctables.LIGOTimeGPS(options.max_start_time)
113 if options.min_stop_time is not None:
114 options.min_stop_time = lsctables.LIGOTimeGPS(options.min_stop_time)
115 if options.max_stop_time is not None:
116 options.max_stop_time = lsctables.LIGOTimeGPS(options.max_stop_time)
117
118 return options, paramdict, (filenames or [None])
119
120
121#
122# =============================================================================
123#
124# Input
125#
126# =============================================================================
127#
128
129
130#
131# Content handler
132#
133
134
135#
136# =============================================================================
137#
138# Preparation
139#
140# =============================================================================
141#
142
143
144def load_veto_segments(filename, verbose = False, contenthandler = None):
145 return ligolw_segments.segmenttable_get_by_name(ligolw_utils.load_filename(filename, verbose = verbose, contenthandler = contenthandler), "sngl_burst_veto").coalesce()
146
147
148class DocContents(object):
149 def __init__(self, xmldoc, program = None):
150 #
151 # Find the out segments
152 #
153
154 self.outsegs = ligolw_search_summary.segmentlistdict_fromsearchsummary(xmldoc, program).coalesce()
155
156 #
157 # Find the sngl_burst table
158 #
159
160 self.snglbursttable = lsctables.SnglBurstTable.get_table(xmldoc)
161
162 #
163 # Get the list of process IDs we care about
164 #
165
166 self.process_ids = set(self.snglbursttable.getColumnByName("process_id"))
167 if program is not None:
168 self.process_ids &= lsctables.ProcessTable.get_table(xmldoc).get_ids_by_program(program)
169
170 #
171 # Find the sim_burst table, or make a fake one
172 #
173
174 try:
175 self.simbursttable = lsctables.SimBurstTable.get_table(xmldoc)
176 except:
177 self.simbursttable = []
178
179 #
180 # Find the coinc tables, or make fake ones
181 #
182
183 try:
184 self.coinctable = lsctables.CoincTable.get_table(xmldoc)
185 self.coincmaptable = lsctables.CoincMapTable.get_table(xmldoc)
186 self.multibursttable = lsctables.MultiBurstTable.get_table(xmldoc)
187 except:
188 self.coinctable = []
189 self.coincmaptable = []
190 self.multibursttable = []
191
192
193#
194# =============================================================================
195#
196# Cuts
197#
198# =============================================================================
199#
200
201
202def remove_events_by_segment(contents, veto_segments):
203 ids = set()
204 for i in xrange(len(contents.snglbursttable) - 1, -1, -1):
205 burst = contents.snglbursttable[i]
206 if burst.process_id in contents.process_ids and burst.ifo in veto_segments and veto_segments[burst.ifo].intersects_segment(burst.period):
207 ids.add(burst.event_id)
208 del contents.snglbursttable[i]
209 return ids
210
211
212def remove_events_by_parameters(contents, testfunc):
213 ids = set()
214 for i in xrange(len(contents.snglbursttable) - 1, -1, -1):
215 burst = contents.snglbursttable[i]
216 if burst.process_id in contents.process_ids and not testfunc(burst):
217 ids.add(burst.event_id)
218 del contents.snglbursttable[i]
219 return ids
220
221
223 coinc_burst_ids = set(row.event_id for row in contents.coincmaptable.getColumnByName("event_id") if row.table_name == "sngl_burst")
224 for i in xrange(len(contents.snglbursttable) - 1, -1, -1):
225 burst = contents.snglbursttable[i]
226 if burst.process_id in contents.process_ids and burst.event_id not in coinc_burst_ids:
227 del contents.snglbursttable[i]
228
229
231 ids = set()
232 for i in xrange(len(contents.simbursttable) - 1, -1, -1):
233 sim = contents.simbursttable[i]
234 if True not in (sim.time_at_instrument(instrument) in seglist for instrument, seglist in contents.outsegs.iteritems()):
235 ids.add(sim.simulation_id)
236 del contents.simbursttable[i]
237 return ids
238
239
240def clean_coinc_tables(contents, removed_ids):
241 # FIXME FIXME FIXME: this is broken since the conversion to
242 # integer IDs. need to include table name constraints in these
243 # tests.
244
245 # remove dangling coinc_event_map rows
246 removed_coinc_ids = set()
247 for i in xrange(len(contents.coincmaptable) - 1, -1, -1):
248 if contents.coincmaptable[i].event_id in removed_ids:
249 removed_coinc_ids.add(contents.coincmaptable[i].coinc_event_id)
250 del contents.coincmaptable[i]
251
252 # remove broken coinc_event rows
253 for i in xrange(len(contents.coinctable) - 1, -1, -1):
254 if contents.coinctable[i].coinc_event_id in removed_coinc_ids:
255 del contents.coinctable[i]
256
257 # remove dangling coinc_event_map rows
258 for i in xrange(len(contents.coincmaptable) - 1, -1, -1):
259 if contents.coincmaptable[i].coinc_event_id in removed_coinc_ids:
260 del contents.coincmaptable[i]
261
262 # remove dangling multi_burst rows
263 for i in xrange(len(contents.multibursttable) - 1, -1, -1):
264 if contents.multibursttable[i].coinc_event_id in removed_coinc_ids:
265 del contents.multibursttable[i]
266
267 # recurse (e.g., removes injection coincs that point to the
268 # coinc_events that got deleted)
269 if removed_coinc_ids:
270 clean_coinc_tables(contents, removed_coinc_ids)
271
272
273#
274# =============================================================================
275#
276# Library API
277#
278# =============================================================================
279#
280
281
282def apply_filters(contents, burst_test_func, veto_segments, del_non_coincs = False, del_skipped_injections = False, verbose = False):
283 removed_ids = set()
284 if veto_segments:
285 if verbose:
286 print("applying veto segment list ...", file=sys.stderr)
287 removed_ids |= remove_events_by_segment(contents, veto_segments)
288 if verbose:
289 print("filtering sngl_burst rows by parameters ...", file=sys.stderr)
290 removed_ids |= remove_events_by_parameters(contents, burst_test_func)
291 if del_skipped_injections:
292 if verbose:
293 print("removing injections that weren't performed ...", file=sys.stderr)
295 if verbose:
296 print("removing broken coincidences ...", file=sys.stderr)
297 clean_coinc_tables(contents, removed_ids)
298 if del_non_coincs:
299 if verbose:
300 print("removing non-coincident events ...", file=sys.stderr)
302
303
304def ligolw_bucut(xmldoc, burst_test_func, veto_segments = segments.segmentlistdict(), del_non_coincs = False, del_skipped_injections = False, program = None, comment = None, verbose = False):
305 process = ligolw_process.register_to_xmldoc(xmldoc, process_program_name, paramdict, version = __version__, cvs_repository = "lscsoft", cvs_entry_time = __date__, comment = comment)
306
307 contents = DocContents(xmldoc, program)
308
309 apply_filters(contents, burst_test_func, veto_segments, del_non_coincs = del_non_coincs, del_skipped_injections = del_skipped_injections, verbose = verbose)
310
311 seg = contents.outsegs.extent_all()
312 ligolw_search_summary.append_search_summary(xmldoc, process, inseg = seg, outseg = seg, nevents = len(contents.snglbursttable))
313
314 process.set_end_time_now()
315
316 return xmldoc
317
318
319#
320# =============================================================================
321#
322# Main
323#
324# =============================================================================
325#
326
327
328#
329# Parse command line.
330#
331
332
333options, paramdict, filenames = parse_command_line()
334
335
336#
337# Define sngl_burst test function.
338#
339
340
342 def add_test(func, t):
343 return lambda arg: t(arg) and func(arg)
344
345 keep_this_sngl_burst = lambda burst: True
346
347 if options.min_amplitude is not None:
348 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.amplitude >= options.min_amplitude)
349 if options.max_amplitude is not None:
350 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.amplitude <= options.max_amplitude)
351 if options.min_bandwidth is not None:
352 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.bandwidth >= options.min_bandwidth)
353 if options.max_bandwidth is not None:
354 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.bandwidth <= options.max_bandwidth)
355 if options.min_central_freq is not None:
356 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.central_freq >= options.min_central_freq)
357 if options.max_central_freq is not None:
358 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.central_freq <= options.max_central_freq)
359 if options.min_confidence is not None:
360 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.confidence >= options.min_confidence)
361 if options.max_confidence is not None:
362 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.confidence <= options.max_confidence)
363 if options.min_duration is not None:
364 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.duration >= options.min_duration)
365 if options.max_duration is not None:
366 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.duration <= options.max_duration)
367 if options.min_fhigh is not None:
368 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.fhigh >= options.min_fhigh)
369 if options.max_fhigh is not None:
370 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.fhigh <= options.max_fhigh)
371 if options.min_flow is not None:
372 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.flow >= options.min_flow)
373 if options.max_flow is not None:
374 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.flow <= options.max_flow)
375 if options.min_hrss is not None:
376 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.hrss >= options.min_hrss)
377 if options.max_hrss is not None:
378 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.hrss <= options.max_hrss)
379 if options.min_snr is not None:
380 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.snr >= options.min_snr)
381 if options.max_snr is not None:
382 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.snr <= options.max_snr)
383 if options.cut_instrument:
384 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.ifo not in options.cut_instrument)
385 if options.min_peak_time is not None:
386 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.peak >= options.min_peak_time)
387 if options.max_peak_time is not None:
388 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.peak <= options.max_peak_time)
389 if options.min_start_time is not None:
390 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.start >= options.min_start_time)
391 if options.max_start_time is not None:
392 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.start <= options.max_start_time)
393 if options.min_stop_time is not None:
394 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.stop >= options.min_stop_time)
395 if options.max_stop_time is not None:
396 keep_this_sngl_burst = add_test(keep_this_sngl_burst, lambda burst: burst.stop <= options.max_stop_time)
397
398 return keep_this_sngl_burst
399
400
401keep_this_sngl_burst = make_keep_this_sngl_burst(options)
402
403
404
405#
406# Get veto segment information.
407#
408
409
410if options.veto_file:
411 veto_segments = load_veto_segments(options.veto_file, verbose = options.verbose)
412else:
413 veto_segments = segments.segmentlistdict()
414
415
416#
417# Do the work.
418#
419
420
421for filename in filenames:
422 xmldoc = ligolw_utils.load_filename(filename, verbose = options.verbose)
423 xmldoc = ligolw_bucut(xmldoc, keep_this_sngl_burst, veto_segments = veto_segments, del_non_coincs = options.coinc_only, del_skipped_injections = options.inj_made_only, program = options.program, comment = options.comment, verbose = options.verbose)
424 ligolw_utils.write_filename(xmldoc, filename, verbose = options.verbose)
425 xmldoc.unlink()
def __init__(self, xmldoc, program=None)
def apply_filters(contents, burst_test_func, veto_segments, del_non_coincs=False, del_skipped_injections=False, verbose=False)
def remove_non_coincidences(contents)
def remove_events_by_parameters(contents, testfunc)
def clean_coinc_tables(contents, removed_ids)
def load_veto_segments(filename, verbose=False, contenthandler=None)
def ligolw_bucut(xmldoc, burst_test_func, veto_segments=segments.segmentlistdict(), del_non_coincs=False, del_skipped_injections=False, program=None, comment=None, verbose=False)
def remove_events_by_segment(contents, veto_segments)
def parse_command_line()
Definition: lalburst_cut.py:60
def remove_skipped_injections(contents)
def make_keep_this_sngl_burst(options)