import multiprocessing as mp
import time as TIME
import numpy as np
import h5py
from ligo.segments import segment, segmentlist
from gwpy.segments import SegmentList, DataQualityFlag, DataQualityDict
from .. import exceptions
from .. import features
from .. import hookimpl
from .. import plots
from .. import utils
from .. import io
from ..io.reporters.hdf5 import HDF5Reporter
from ..series import SeriesInfo
from . import SupervisedClassifier, ClassifierModel, DEFAULT_DT
DEFAULT_NUM_PROC = 1
# how long we sleep between checking that mp.map_async results have finished
DEFAULT_MULTIPROCESSING_SLEEP = 0.001
DEFAULT_MULTIPROCESSING_CHUNKSIZE = None
[docs]class Vetolist(ClassifierModel):
"""
a representation of the OVL and DOVL vetolist
this is stored as the internal model within OVL and DOVL
*WRITE ME*
decribe protections of attributes and their meanings
_default_thresholds
_default_windows
_table_dtypes
_repr_head
_repr_body
"""
_default_thresholds = [15, 20, 25, 30, 35, 45, 55, 100, 500, 1000]
_default_windows = [0.500, 0.100, 0.050, 0.025, 0.010, 0.005, 0.001]
_table_dtypes = [
("channel", "|S75"),
("threshold", "float"),
("window", "float"),
("num_glitch", "float"),
("num_clean", "float"), # like livetime
("num_aux", "float"),
("num_vetod_exp", "float"),
("num_vetod_gch", "float"),
("num_vetod_cln", "float"), # like dsec
("poisson_signif", "float"),
("use_percentage", "float"),
("eff", "float"),
("fap", "float"),
("eff_fap", "float"),
("cnum_vetod_gch", "float"),
("cnum_vetod_cln", "float"),
("rank", "float"),
]
_repr_body = " ".join(
"%-40s" if "|S" in dtype[1] else "%15.9f" for dtype in _table_dtypes
) # NOTE: this may be fragile...
_repr_head = (
" ".join(
"%-40s" if dtype[0] == "channel" else "%15s" for dtype in _table_dtypes
)
) % tuple(
dtype[0] for dtype in _table_dtypes
) # NOTE: this may be fragile...
def __init__(
self,
start,
end,
segs=None,
model_id=None,
generate_id=False,
channels=None,
thresholds=None,
windows=None,
significance=None,
**kwargs,
):
ClassifierModel.__init__(
self, start, end, segs=segs, model_id=model_id, generate_id=generate_id
)
if not channels:
channels = []
if not thresholds:
thresholds = self._default_thresholds
if not windows:
windows = self._default_windows
self._channels = channels
self._thresholds = thresholds
self._windows = windows
self.significance = significance
# set up table
self.init_table()
# select how features are extracted
self.selector = features.Selector(
self.relevant_channels,
bounds={
self.significance: (self.minimum_threshold, np.inf),
**kwargs.get("bounds", {}),
},
)
@property
def channels(self):
return self._channels
@channels.setter
def channels(self, channels):
for chan in channels:
assert isinstance(chan, str), "channels must be strings"
self._channels = channels
@property
def relevant_channels(self):
return set(channel.decode("utf-8") for channel in self._table["channel"])
@property
def thresholds(self):
return self._thresholds
@thresholds.setter
def thresholds(self, thresholds):
for thr in thresholds:
assert isinstance(
thr, (int, float)
), "thresholds must be integers or floats"
self._thresholds = thresholds
@property
def relevant_thresholds(self):
return set(self._table["threshold"])
@property
def minimum_threshold(self):
return np.min(self["threshold"]) if len(self["threshold"]) else np.infty
@property
def windows(self):
return self._windows
@windows.setter
def windows(self, windows):
for win in windows:
assert isinstance(win, (int, float)), "windows must be integers or floats"
self._windows = windows
@property
def relevant_windows(self):
return set(self._table["window"])
def __iter__(self):
"""
returns a generator that will iterate over the internal table
"""
for line in self._table:
yield line
def __getitem__(self, index):
return self._table[index]
def __len__(self):
return len(self._table)
def __repr__(self):
return str(self._table)
def __str__(self):
"""
makes a nice ASCII formatted table instead of just printing the
structured array "as is", which is what __repr__ does
"""
s = [self._repr_head]
for configuration in self:
s.append(self._repr_body % tuple(configuration))
return "\n".join(s)
[docs] def dump(self, path, nickname=None, **kwargs):
"""
write this object into path should contain all info, not just the
configuration order Uses hdf5 format, and asserts that the path must end
in ".hdf5"
we can add attributes to this file via the kwargs specified in this
function's signature
WARNING: we may want to add default attrs to the hdf5 file, like the
code's version and the git repo location, etc. we also may want to add
information about how this file was produced, but that may be optional
and can be specified via kwargs
NOTE: we'll likely use an instance of HDF5Reporter rather than this
method
"""
assert path.endswith(".hdf5"), "path=%s must end with .hdf5" % (path)
if nickname is None:
nickname = "vetolist"
with h5py.File(path, "w") as file_obj:
dataset = file_obj.create_dataset(nickname, data=self._table)
dataset.attrs.update(kwargs)
[docs] def load(self, path, nickname=None, verbose=True):
"""
load the data from file into memory
will load all info, not just hte configuration order
Uses hdf5 format, and asserts that the path must end in ".hdf5"
if verbose: print attrs from dataset
NOTE:
we'll likely use an instance of HDF5Reporter rather than this method
"""
assert path.endswith(".hdf5"), "path=%s must end with .hdf5" % (path)
with h5py.File(path, "r") as file_obj:
if nickname is None:
keys = list(file_obj.keys())
assert (
len(keys) == 1
), "exactly 1 key required in the HDF5 file unless a nickname is chosen"
# expect a single key and just use that this makes it a bit more
# independent of who wrote this to disk
dataset = file_obj[keys[0]]
else:
dataset = file_obj[nickname]
# extract the numpy array from the data set
self._table = dataset[...]
# update which channels are present
self.channels = set(
channel.decode("utf-8") for channel in self.table["channel"]
)
self.windows = set(self.table["window"])
self.thresholds = set(self.table["threshold"])
if verbose:
for k, v in dataset.attrs.items():
print("%s\t%s" % (k, v))
[docs] def init_table(self):
"""
instantiate the table used to store data internally
"""
self._table = []
for channel in self.channels:
for threshold in self.thresholds:
for window in self.windows:
self._table.append(
(channel, threshold, window)
+ (np.nan,) * (len(self._table_dtypes) - 3)
)
# NOTE: we don't delegate to set_table because we don't need to check
# the table for consistency...
self._table = np.array(self._table, dtype=self._table_dtypes)
@property
def table(self):
return self._table
@table.setter
def table(self, table):
"""
WARNING: do not use this unless you really know what you're doing!
"""
assert isinstance(table, np.ndarray), "table is not a numpy.ndarray!"
assert len(table.dtype) == len(
self._table_dtypes
), "new table has the wrong number of columns!"
for a, b in zip(table.dtype.names, self._table_dtypes):
assert a == b[0], "ordering of new table is inconsistent with Vetolist!"
assert table.dtype[a] == b[1], (
"types of new table are inconsistent with Vetolist! "
"%s (new) vs %s (old)" % (table.dtype[a], b[1])
)
channels = self.channels
for channel in set(table["channel"]):
assert (
channel.decode("utf-8") in channels
), "new table has inconsistent channels with this Vetolist!"
thresholds = self.thresholds
for threshold in set(table["threshold"]):
assert (
threshold in thresholds
), "new table has inconsistent thresholds with this Vetolist!"
windows = self.windows
for window in set(table["window"]):
assert (
window in windows
), "new table has inconsistent windows with this Vetolist!"
self._table = table
[docs] def metric2rank(self, value, metric="eff_fap", **kwargs):
"""
map our metric value into a rank we define a somewhat ad-hoc monotonic
function with a separate scale for each metric
"""
if metric == "use_percentage":
return self._use_percentage_to_rank(value)
elif metric == "poisson_signif":
return self._poisson_signif_to_rank(value)
elif metric == "eff_fap":
return self._eff_fap_to_rank(value)
else:
raise ValueError("metric=%s not understood" % metric)
@staticmethod
def _use_percentage_to_rank(val):
"""
map use_percentage metric to a rank between 0 and 1.
"""
return val
@staticmethod
def _poisson_signif_to_rank(val):
"""
map poisson_signif metric to a rank between 0 and 1.
"""
score = 0.009 * np.power(val, 1.5)
return score / (1 + score)
@staticmethod
def _eff_fap_to_rank(val):
"""
map eff_fap metric to a rank between 0 and 1.
"""
score = 0.02 * val
return score / (1 + score)
[docs] def reorder(self, metric="eff_fap", metric_weights=None, **kwargs):
"""
re-order the vetolist according to this metric
we always sort so that bigger numbers show up first
"""
# assign ranks
if metric == "combined":
metric_names = ("eff_fap", "use_percentage", "poisson_signif")
if metric_weights:
weights = [metric_weights[metric] for metric in metric_names]
else:
# use equal weights
weights = [1 / 3.0, 1 / 3.0, 1 / 3.0]
metrics = np.array(
[self.metric2rank(self.table[m], metric=m) for m in metric_names]
)
self.table["rank"] = np.average(metrics, axis=0, weights=weights)
else:
self.table["rank"] = self.metric2rank(self.table[metric], metric=metric)
# reorder - sort by metric, then by threshold, then by window size
# NOTE: the old implementation of OVL did not re-order based on windows;
# it only used the thresholds and then the metric. this can produce
# slightly different orderings, but they probably will be statistically
# equivalent regarding performance
order = [
(ind, config["window"], config["threshold"], config["rank"])
for ind, config in enumerate(self)
]
order.sort(key=lambda x: (x[3], x[2], -x[1]), reverse=True)
self.table = self.table[[veto[0] for veto in order]]
[docs] def prune(self, minimum, metric="eff_fap", **kwargs):
"""
remove all configurations with metric<minimum
WARNING:
* this will cause data to be forgotten in an unrecoverable way!
* only use this if you are certain you know what you're doing!
"""
self.table = self.table[self.table[metric] >= minimum]
[docs] def redundancies(self, dataset, time, significance, verbose=False, Verbose=False):
"""
computes the intersection and overlap of vetosegments for each possible
configuration. This should contain all information necessary to
determine which channels are redundant.
we only return information based on the current model, which may have
trained itself down to a subset of the total possible configurations
returns table, headers table is a matrix of the livetime of the
intersections of veto segments from each configuration pair headers is a
list of the (channel, threshold, window) tuples, with the same order as
the columns in table
"""
verbose |= Verbose
# set up the data product
N = len(self)
table = np.empty((N, N), dtype=float)
headers = self[["channel", "threshold", "window"]]
if verbose:
print("caching triggers within dataset")
if not dataset.is_configured():
dataset.configure(self.selector)
dataset.load_data()
# iterate over configurations
if verbose:
print("computing aux segments")
auxsegs = []
for config in self:
# compute segments once for each configuration to avoid repeating
# work in the nested loop
segs = self._configuration2segments(config, dataset, time, significance)
auxsegs.append(dataset.segs & segs)
if verbose:
print("computing overlap between veto configurations")
tmp = "%d - %d"
for i, config1 in enumerate(self):
if Verbose:
print(tmp % (i, i))
segs1 = auxsegs[i]
# livetime of segments intersected with themselves
table[i, i] = utils.livetime(segs1)
# iterate over the rest of the configurations and fill in the
# intersections
for j, config2 in enumerate(self[i + 1 :], 1):
if Verbose:
print(tmp % (i, j))
segs2 = auxsegs[i + j]
table[i, i + j] = table[i + j, i] = utils.livetime(segs1 & segs2)
# return data
return table, headers
[docs] def nonredundant_segments(self, dataset, time, significance):
"""
Checks which configurations are redundant and then returns
the indices of the non-redundant configurations, and their segments
"""
intersect_table, _ = self.redundancies(dataset, time, significance)
segments = self.segments(dataset, time, significance)
keep_inds = []
for i, veto_segs in enumerate(segments):
# live time is element (i,i) of intersect_table
# check if veto was active
if intersect_table[i, i] == 0:
continue
# check if redundant with any previous veto
# redundant if same channel, and segments are a subset of other vetoes
keep = True
for j, other_segs in enumerate(segments[:i]):
# check if channels are the same
if self.table[i]["channel"] != self.table[j]["channel"]:
continue
# veto i is redundant with veto j if livetime
# of their intersection equals i's livetime
if intersect_table[i, i] == intersect_table[i, j]:
keep = False
break
if keep:
keep_inds.append(i)
return np.array(keep_inds), [segments[i] for i in keep_inds]
[docs] def save_as_data_quality_flags(
self, out_path, dataset, time, significance, remove_redundant=True
):
"""
Saves the veto list as a set of data quality flags in an hdf5 file
"""
if remove_redundant:
inds, configuration_segments = self.nonredundant_segments(
dataset, time, significance
)
if len(inds) > 0:
configurations = self[inds]
else:
configurations = np.array([])
else:
configurations = self[:]
configuration_segments = self.segments(dataset, time, significance)
veto_dict = DataQualityDict()
lookup_dict = {}
pairs = zip(configurations, configuration_segments)
for i, (configuration, aux_segs) in enumerate(pairs):
channel = configuration["channel"].decode("utf-8")
threshold = configuration["threshold"]
window = configuration["window"]
name = f"{channel}_{threshold}_{window}:1"
active = SegmentList(aux_segs)
known = dataset.segs
dq_flag = DataQualityFlag(
name=name, active=active, known=known, isgood=False
)
veto_dict[name] = dq_flag
lookup_dict[name] = configuration
veto_dict.write(out_path)
with h5py.File(out_path, "r+") as f:
for name, grp in f.items():
configuration = lookup_dict[name]
grp.attrs["channel"] = configuration["channel"].decode("utf-8")
grp.attrs["threshold"] = configuration["threshold"]
grp.attrs["window"] = configuration["window"]
grp.attrs["rank"] = configuration["rank"]
grp.attrs["eff"] = configuration["eff"]
grp.attrs["fap"] = configuration["fap"]
grp.attrs["use_percentage"] = configuration["use_percentage"]
grp.attrs["poisson_signif"] = configuration["poisson_signif"]
[docs] def feature_importance(self):
"""
should essentially be an ordered list of veto configurations
although this is really a structured numpy array
"""
return self.table[["channel"], ["threshold"], ["window"]]
[docs] def feature_importance_table(
self, time, significance, dataset=None, map2str=True, **kwargs
):
"""should return (columns, data) compatible with the DQR's
json.format_table (see use in idq/reports.py"""
cols = [
"channel",
"threshold",
"window",
"poisson_signif",
"use_percentage",
"eff_fap",
"rank",
"cnum_vetod_gch",
"cnum_vetod_cln",
]
# needed because numpy might break without this...
data = self.table[cols].copy()
ngch = self.table["num_glitch"]
if len(ngch): # the first element will be non-zero
data["cnum_vetod_gch"] /= ngch[0]
cols[-2] = "efficiency"
ncln = self.table["num_clean"]
if len(ncln): # the first element will be non-zero
data["cnum_vetod_cln"] /= ncln[0]
cols[-1] = "false alarm probability"
if dataset:
if not dataset.is_configured():
dataset.configure(self.selector)
dataset.load_data()
configuration_inds, configuration_segs = self.nonredundant_segments(
dataset, time, significance
)
if len(configuration_inds) > 0:
data = data[configuration_inds]
else:
data = np.array([], dtype=self._table_dtypes)
data = [list(_) for _ in data]
cols = cols[:-2]
if map2str:
data = [[_[0]] + ["%.3f" % __ for __ in _[1:-2]] for _ in data]
return cols, data
[docs] def segments(self, dataset, time, significance, **kwargs):
"""
generate veto segments for all configurations in the vetolist returns a
list of segment lists in the order corresponding to the configurations
within self.table
"""
return [
self._configuration2segments(configuration, dataset, time, significance)
for configuration in self
]
def _configuration2times(
self, configuration, dataset, time, significance, **kwargs
):
"""
get the times above threshold for this configuration
"""
channel = configuration["channel"].decode("utf-8") # extract parameters
threshold = configuration["threshold"]
try:
trgs = dataset.features[channel].filter(
f"{significance} >= {threshold}", f"{significance} <= {np.infty}"
)
# get times above threshold, we already filtered by threshold within
# call to triggers
return trgs[time]
except KeyError:
# no triggers
return None
def _configuration2segments(
self, configuration, dataset, time, significance, **kwargs
):
"""
generate veto segments for this configuration
"""
window = configuration["window"]
aux_times = self._configuration2times(
configuration, dataset, time, significance
)
if aux_times is None:
return segmentlist()
else:
return utils.times2segments(aux_times, window / 2.0)
[docs] def timeseries(
self,
dataset,
time,
significance,
metric="eff_fap",
dt=DEFAULT_DT,
segs=None,
**kwargs,
):
"""
generate timeseries for each segment in dataset.segments separately
should be useful wihtin OVL.evaluate, vetolist2segments, etc
iterates from the end of the table to the front, setting the values of
the timeseries to the metric of the corresponding configuration at each
step if more than one configuration is active at this time, the final
timeseries will have the value corresponding to the configuration that's
closer to the front of the list
-> this makes sense if you assume the configurations are ordered by
decending metric, which should be the case
we extract the 'rank' column, which is set via calls to self.reorder.
NOTE:
* monotonic decrease in metric as you move down the table is not
strictly guaranteed and will depend on whether the list has converged.
* it is also possible that the list will never converge because of
cycles (2 configurations flip back and forth forever with additional
iterations) This may be a weakness of the algorithm!
"""
if segs is None:
segs = dataset.segs
if not dataset.is_configured():
dataset.configure(self.selector)
dataset.load_data()
ranks = [
(np.zeros_like(t), t) for t in [np.arange(s, e, dt) for s, e in segs]
] # set up zero'd arrays for the ranks
for configuration in self[::-1]: # work from the bottom to the top
aux_segs = self._configuration2segments(
configuration, dataset, time, significance
)
for rank, t in ranks:
rank[utils.times_in_segments(t, aux_segs)] = configuration["rank"]
return ranks
# --- OVL configuration performance helper functions
# used when estimating the poisson significance
_gammln_cof = [
76.18009173,
-86.50532033,
24.01409822,
-1.231739516e0,
0.120858003e-2,
-0.536382e-5,
]
_gammln_stp = 2.50662827465
def _gammpln(a, x):
"""
lower incomplete gamma function
"""
if a == 0:
return 0.0 # np.log(1.)
elif x < 0.0:
raise ValueError("x<0")
elif a < 0.0:
raise ValueError("a<0")
elif x == 0.0:
if a == 0.0:
return 0.0 # np.log(1.)
else:
return -np.infty # np.log(0.)
elif x < a + 1.0:
return _gserln(a, x)[0]
else:
return np.log(1.0 - _gcf(a, x)[0])
def _gserln(a, x, itmax=10000, eps=3.0e-7):
"""
Series approx'n to the incomplete gamma function.
"""
gln = _gammln(a)
if x < 0.0:
raise ValueError("x<0")
elif x == 0.0:
return 0.0, gln
logx = np.log(x)
ap = a
s = 1.0 / a
delta = s
n = 1
while n <= itmax:
ap += 1.0
delta *= x / ap
s += delta
if abs(delta) < abs(s) * eps:
return np.log(s) - x + a * logx - gln, gln
n += 1
raise exceptions.MaxIter(itmax)
def _gammln(xx):
"""
Logarithm of the gamma function.
"""
x = xx - 1.0
tmp = x + 5.5
tmp = (x + 0.5) * np.log(tmp) - tmp
ser = 1.0
for j in range(6):
x += 1.0
ser += _gammln_cof[j] / x
return tmp + np.log(_gammln_stp * ser)
def _gcf(a, x, itmax=1000, eps=3.0e-7):
"""
Continued fraction approx'n of the incomplete gamma function.
"""
logx = np.log(x)
gln = _gammln(a)
gold = 0.0
a0 = 1.0
a1 = x
b0 = 0.0
b1 = 1.0
fac = 1.0
n = 1
while n <= itmax:
an = n
ana = an - a
a0 = (a1 + a0 * ana) * fac
b0 = (b1 + b0 * ana) * fac
anf = an * fac
a1 = x * a0 + anf * a1
b1 = x * b0 + anf * b1
if a1 != 0.0:
fac = 1.0 / a1
g = b1 * fac
if abs((g - gold) / g) < eps:
return (g * np.exp(-x + a * logx - gln), gln)
gold = g
n += 1
raise exceptions.MaxIter(itmax)
# defined here so that we can pickle this via multiprocessing
def _compute_dovl_configuration_performance_initializer(
local_dataset, local_model, local_time, local_significance, local_FAPtype
):
global dataset, model, time, significance, FAPtype
dataset = local_dataset
model = local_model
time = local_time
significance = local_significance
FAPtype = local_FAPtype
def _compute_dovl_configuration_performance_wrapper(cind):
_, statistics = _compute_dovl_configuration_performance(
dataset, cind, model, time, significance, FAPtype
)
return statistics
def _compute_dovl_configuration_performance(
dataset, ind, model, time, significance, FAPtype
):
"""
computes this configuration's performance a convenient wrapper that is used
within _recalculate and to avoid repeating code
assumes both segments and gch_times are not empty (i.e.: there is data to
veto)
"""
num_glitch, num_clean = dataset.vectors2num()
# figure out what is vetoed by this configuration
num_vetod_gch = 0.0
num_vetod_cln = 0.0
num_aux = 0.0
contained = []
configuration = model[ind]
seg = (
np.array([-0.5, 0.5]) * configuration["window"]
) # will be used repeatedly within the loop
for vector in dataset:
# we reference the classifier data stored in each FeatureVector, which
# is a bit of a cheat but should work... NOTE: if the dataset spans a
# much larger amount of time than [vector.time - window/2, vector.time +
# window/2] then the following is a substantial speed up compared to
# generating a full list of segments repeatedly for each vector the
# query to dataset may still be expensive (lots of I/O), so there's room
# for improvement here
aux_times = model._configuration2times(
configuration,
dataset,
time,
significance,
)
if aux_times is None:
continue
# count how many loud things remain nearby
naux = len(aux_times[utils.times_in_segments(aux_times, [seg + vector.time])])
if naux: # at least one loud thing close by
contained.append(vector)
num_aux += naux
num_vetod_gch += vector.label
num_vetod_cln += 1 - vector.label
# convert datatable to dataset
if contained:
dset = features.Dataset.from_datatable(
features.DataTable(rows=contained, names=vector.colnames),
start=dataset.start,
end=dataset.end,
segs=dataset.segs,
)
else:
dset = features.Dataset(start=dataset.start, end=dataset.end, segs=dataset.segs)
# compute ranking metrics
# we assume num_glitch, num_clean > 0 or else this function would not be called
eff = num_vetod_gch / num_glitch
if FAPtype == "DOVL":
fap = (num_vetod_cln + num_vetod_gch) / (num_clean + num_glitch)
elif FAPtype == "DOVLfap":
fap = num_vetod_cln / num_clean
else:
raise ValueError("FAPtype=%s not understood" % FAPtype)
num_vetod_exp = num_glitch * fap
poisson_signif = -_gammpln(num_vetod_gch, num_vetod_exp)
# nothing vetod, so we give it a low ranking
use_percentage = min(num_vetod_gch / num_aux, 1) if num_aux > 0 else 0.0
# handle this carefully so we don't throw things out that have fap=0 but eff>0
eff_fap = eff / fap if fap > 0 else np.infty if eff > 0 else 0.0
# return
statistics = (
num_vetod_exp,
num_vetod_gch,
num_aux,
num_vetod_cln,
poisson_signif,
use_percentage,
eff,
fap,
eff_fap,
)
return dset, statistics
# --- OVL classes themselves
[docs]class DOVL(SupervisedClassifier):
"""
Discrete OVL: a modified version of OVL that trains based on discrete
samples to estimate the deadtime. We note that there is an extension of this
that estimates the False Alarm Probability instead of the deadtime
(DOVLfap), but still uses discrete samples. The actual implementation of the
OVL algorithm itself is stored in a subclass (:class:idq.classifiers.OVL)
because DOVL has standard training signatures while OVL does not
*WRITE ME* describe what we "overwrite" from OVL (although we don't really
have convenient access to these...) train (a trivial delegation, and we
overwrite it simply because we want the signature to have a clearer variable
name) _recalculate
"""
_flavor = "dovl"
_required_kwargs = [
"incremental",
"minima",
"num_recalculate",
"metric",
"safe_channels_path",
"time",
"significance",
]
_allowed_metrics = "eff_fap use_percentage poisson_signif combined".split()
_default_incremental = 1000
_default_minima = {
"poisson_signif": 10,
"eff_fap": 3,
}
def __init__(self, *args, **kwargs):
SupervisedClassifier.__init__(self, *args, **kwargs)
assert (
self.kwargs["metric"] in self._allowed_metrics
), "metric=%s not allowed" % (self.kwargs["metric"])
assert isinstance(self.kwargs["minima"], dict), "minima must be a dictionary"
for k in self.kwargs["minima"].keys():
assert k in self._allowed_metrics, "minima's key=%s not allowed" % (k)
# record the column names for "time" and "significance"
self._time = self.kwargs.pop("time")
self._significance = self.kwargs.pop("significance")
@property
def time(self):
return self._time
@property
def significance(self):
return self._significance
[docs] def evaluate(self, dataset):
"""
sets the ranks for these feature vectors
modifies the objects in place!
"""
if not self.is_trained:
raise exceptions.UntrainedError(
"%s does not have an internal model" % self.flavor
)
verbose = self.kwargs.get("verbose", False)
if verbose:
template = (
"--- %6d / "
+ ("%6d" % len(dataset))
+ " --- gps = %.6f --- label = %s ---"
)
# load/cache triggers
model = self.model
if not dataset.is_configured():
dataset.configure(model.selector)
dataset.load_data()
# check required columns are available
self._check_columns(dataset.features)
# generate veto configuration
veto_segs = [
model._configuration2segments(c, dataset, self._time, self._significance)
for c in model
]
# evaluate one feature vector at a time
for ind, feature_vector in enumerate(dataset):
if verbose:
print(template % (ind + 1, feature_vector.gps, feature_vector.label))
# iterate over veto configurations
for c_idx, configuration in enumerate(model):
# check if configuration is active
if utils.time_in_segments(feature_vector.time, veto_segs[c_idx]):
feature_vector["rank"] = configuration["rank"]
if verbose:
print(model._repr_body % tuple(configuration))
break
else:
# we found nothing active, so set rank to zero
feature_vector["rank"] = 0
if verbose:
print("not found")
feature_vector["hash"] = self.model.hash
return dataset
[docs] def timeseries(self, info, dataset_factory, dt=DEFAULT_DT, segs=None, set_ok=None):
"""
delegates to Vetolist.timeseries
returns ranks
"""
if not self.is_trained:
raise exceptions.UntrainedError(
"%s does not have an internal model" % self.flavor
)
dataset = dataset_factory.unlabeled(dt=dt, segs=segs)
ranks = self.model.timeseries(
dataset, self._time, self._significance, dt=dt, segs=segs, **self.kwargs
)
return [
SeriesInfo.from_ranks(
info, t[0], dt, rank, self.model, self.calibration_map, set_ok=set_ok
)
for rank, t in ranks
]
[docs] def redundancies(self, dataset):
"""
computes the intersection and overlap of vetosegments for each possible
configuration. This should contain all information necessary to
determine which channels are redundant.
we only return information based on the current model, which may have
trained itself down to a subset of the total possible configurations
returns table, headers table is a matrix of the livetime of the
intersections of veto segments from each configuration pair headers is a
list of the (channel, threshold, window) tuples, with the same order as
the columns in table
"""
self._check_columns(dataset.features)
if not self.is_trained: # instantiate the Vetolist if needed
self.model = Vetolist(
dataset.start,
dataset.end,
segs=dataset.segs,
model_id=self._model_id,
channels=io.path2channels(self.kwargs["safe_channels_path"]),
significance=self.significance,
**self.kwargs,
)
return self.model.redundancies(dataset, self._time, self._significance)
[docs] def feature_importance(self):
"""
delegates to Vetolist.feature_importance
"""
if not self.is_trained:
raise exceptions.UntrainedError(
"%s does not have an internal model" % self.flavor
)
return self.model.feature_importance()
[docs] def feature_importance_table(self, dataset=None, **kwargs):
"""delegate to Vetolist.feature_importance_table"""
return self.model.feature_importance_table(
self._time,
self._significance,
dataset=dataset,
**kwargs,
)
def _check_columns(self, features):
"""
assert that required_columns are present in features
"""
for column in [self._time, self._significance]:
for table in features.values():
assert column in table.colnames, "column=%s required" % column
[docs] def train(
self, dataset
): # NOTE: the input signature here is the same as for other classifiers
"""
Instantiates a Vetolist and trains using the data within a dataset.
Algorithmic parameters include:
* channels
* thresholds
* windows
* num_recalculate
* incremental
* minima (key, value pairs)
and are specified through self.kwargs, set during instantiation.
NOTE: We do not explicitly call `self._check_columns(datachunk)`
and instead assume the user has already done this when constructing a dataset.
"""
# set up how we'll report models
reporter = HDF5Reporter(self.rootdir, dataset.start, dataset.end)
vetolistnickname = self.nickname + "Vetolist"
model = Vetolist(
dataset.start,
dataset.end,
segs=dataset.segs,
model_id=self._model_id,
channels=io.path2channels(self.kwargs["safe_channels_path"]),
significance=self.significance,
**self.kwargs,
)
self.model = model # set up the internal model
verbose = self.kwargs.get("verbose", False)
# cache data so we get all I/O out of the way once
if verbose:
print("loading triggers within dataset")
# configure feature selection and load
if not dataset.is_configured():
dataset.configure(model.selector)
dataset.load_data()
# evaluate the first round without hierarchical removal
# that's why we do all the jazz with incremental...
if verbose:
print("------------------- INITIAL RECALCULATE -----------------------")
to = TIME.time()
incremental = self.kwargs["incremental"]
self.kwargs["incremental"] = 0
self._recalculate(dataset, **self.kwargs) # actually compute stuff
self.kwargs["incremental"] = incremental
if verbose:
print("-------------------- %.6f sec" % (TIME.time() - to))
# write first iteration to disk
reporter.report(vetolistnickname + "Initial", model.table)
# re-order the list and prune under-performing configurations
for metric, minimum in self.kwargs["minima"].items():
model.prune(minimum, metric=metric)
model.reorder(**self.kwargs)
# iterate for num_recalculate
template = vetolistnickname + "Iter%d"
for _ in range(self.kwargs["num_recalculate"]):
if verbose:
print(
"------------------- RECALCULATE : %d -----------------------" % _
)
to = TIME.time()
self._recalculate(dataset, **self.kwargs) # calculate stuff
if verbose:
print("------------------- %.6f sec" % (TIME.time() - to))
# write iteration to disk
reporter.report(template % (_), model.table)
# re-order and prune
for metric, minimum in self.kwargs["minima"].items():
model.prune(minimum, metric=metric)
model.reorder(**self.kwargs)
# perform a final iteration and then write first iteration to disk
if verbose:
print("------------------- FINAL RECALCULATE -----------------------")
to = TIME.time()
self._recalculate(dataset, **self.kwargs)
if verbose:
print("------------------- %.6f sec" % (TIME.time() - to))
# re-order and write to disk
# NOTE: we do NOT prune at this point
model.reorder(**self.kwargs)
reporter.report(vetolistnickname + "Final", model.table)
# return self.model for syntactic consistency with parent
return self.model
[docs] def calibrate(self, dataset, bounded=True, **kwargs):
return super().calibrate(dataset, bounded=bounded, **kwargs)
def _recalculate(
self,
dataset,
incremental=None,
minima=None,
multiprocessing_sleep=DEFAULT_MULTIPROCESSING_SLEEP,
multiprocessing_chunksize=DEFAULT_MULTIPROCESSING_CHUNKSIZE,
**kwargs,
):
"""
an implementation of the recalculate algorithm that uses clean samples
instead of fractional deadtime Can we simply replace a "deadtime
estimate" function call here and otherwise use the same logic as OVL
itself? that may be complicated by
NOTE: we expect each feature in the dataset to contain : columns =
['time', 'significance'] that is checked within train once instead of
being checked here repeatedly
"""
# make a copy, sharing references to FeatureVectors do this so we can
# pop crap from working_dataset without affecting dataset (shared
# reference)
working_dataset = dataset.copy()
# do this because specifying in kwargs turns out to be difficult
# syntactically
if incremental is None:
incremental = self._default_incremental
if minima is None:
minima = self._default_minima
num_glitch, num_clean = dataset.vectors2num()
cnum_vetod_gch = 0.0
cnum_vetod_cln = 0.0
model = self.model
verbose = kwargs.get("verbose", False)
if verbose:
print(model._repr_head)
_repr_body = model._repr_body + " %.6f sec"
cind = 0
len_model = len(model)
num_proc = self.kwargs.get("num_proc", DEFAULT_NUM_PROC)
while (cind < len_model) and (
(incremental > 0) or (num_proc == 1)
): # process serially
configuration = model[cind]
if num_glitch and num_clean: # there's something to veto!
if verbose:
to = TIME.time()
# compute statistics
contained, statistics = _compute_dovl_configuration_performance(
working_dataset,
cind,
model,
self._time,
self._significance,
self.__class__.__name__,
)
(
num_vetod_exp,
num_vetod_gch,
num_aux,
num_vetod_cln,
poisson_signif,
use_percentage,
eff,
fap,
eff_fap,
) = statistics
# update configuration in place
configuration["num_glitch"] = num_glitch
configuration["num_clean"] = num_clean
configuration["num_aux"] = num_aux
configuration["num_vetod_exp"] = num_vetod_exp
configuration["num_vetod_gch"] = num_vetod_gch
configuration["num_vetod_cln"] = num_vetod_cln
configuration["poisson_signif"] = poisson_signif
configuration["use_percentage"] = use_percentage
configuration["eff"] = eff
configuration["fap"] = fap
configuration["eff_fap"] = eff_fap
# figure out if we'll keep this configuration moving forward
if incremental:
keep = True
for metric, threshold in minima.items():
keep *= configuration[metric] >= threshold
if keep:
incremental -= 1
num_clean -= num_vetod_cln
cnum_vetod_cln += num_vetod_cln
num_glitch -= num_vetod_gch
cnum_vetod_gch += num_vetod_gch
working_dataset -= (
contained # retain only the vectors that were not flagged
)
# NOTE: modifies dataset in place!
# finish updating configuration
configuration["cnum_vetod_gch"] = cnum_vetod_gch
configuration["cnum_vetod_cln"] = cnum_vetod_cln
if verbose:
print(_repr_body % (tuple(configuration) + (TIME.time() - to,)))
else:
# we're missing data of some kind, so we should just skip the
# rest of the configurations -> zero their contributions
for name in model._table.dtype.names:
if name not in [
"channel",
"threshold",
"window",
"num_glitch",
"num_clean",
"cnum_vetod_gch",
"cnum_vetod_cln",
]:
model[cind:][name] = 0
model[cind:]["num_glitch"] = num_glitch
model[cind:]["num_clean"] = num_clean
model[cind:]["cnum_vetod_gch"] = cnum_vetod_gch
model[cind:]["cnum_vetod_cln"] = cnum_vetod_cln
break
cind += 1
if (cind < len_model) and num_glitch and num_clean:
# we did not finish the list, so there could be something to do
if verbose:
print(
"processing remaining configurations in parallel with %d processes"
% num_proc
)
pool = mp.Pool(
num_proc,
initializer=_compute_dovl_configuration_performance_initializer,
initargs=(
working_dataset,
model,
self._time,
self._significance,
self.__class__.__name__,
),
)
if verbose:
to = TIME.time()
# set the things that will be the same for all remaning configurations
model[cind:]["num_glitch"] = num_glitch
model[cind:]["num_clean"] = num_clean
model[cind:]["cnum_vetod_gch"] = cnum_vetod_gch
model[cind:]["cnum_vetod_cln"] = cnum_vetod_cln
# process the rest of the channel's performances in parallel
result = pool.map_async(
_compute_dovl_configuration_performance_wrapper,
range(cind, len(model)),
chunksize=multiprocessing_chunksize,
)
while not result.ready():
TIME.sleep(multiprocessing_sleep)
if not result.successful():
raise RuntimeError("call to multiprocessing.pool.map_async failed")
for ind, statistics in enumerate(result.get()):
configuration = model[cind + ind]
(
num_vetod_exp,
num_vetod_gch,
num_aux,
num_vetod_cln,
poisson_signif,
use_percentage,
eff,
fap,
eff_fap,
) = statistics
# update configuration in place
configuration["num_aux"] = num_aux
configuration["num_vetod_exp"] = num_vetod_exp
configuration["num_vetod_gch"] = num_vetod_gch
configuration["num_vetod_cln"] = num_vetod_cln
configuration["poisson_signif"] = poisson_signif
configuration["use_percentage"] = use_percentage
configuration["eff"] = eff
configuration["fap"] = fap
configuration["eff_fap"] = eff_fap
if verbose:
print(_repr_body % (tuple(configuration) + (TIME.time() - to,)))
[docs]class DOVLfap(DOVL):
"""
an extension of DOVL that estimates the FAP in a slightly different way.
DOVL uses an approximation of the deadtime (comparable to what OVL does)
fap=(num_vetod_gch+num_vetod_cln)/(num_gch+num_cln)
DOVLfap uses an approximation close to the FAP
fap=num_vetod_cln/num_clean
while the FAP might appear more correct, the deadtime is likely to be more
numerically stable no infinities and thresholds on poisson_signif may prune
over-trained configureations
this is controlled based on a conditional within
_compute_dovl_configuration_performance
"""
_flavor = "dovlfap"
# --- classic OVL rather than the discrete equivalent
def _compute_ovl_configuration_performance_initializer(
local_segs,
local_gch_times,
local_model,
local_dataset,
local_time,
local_significance,
):
"""should NOT be used outside of multiprocessing"""
global segs, gch_times, model, dataset, time, significance
segs = local_segs
gch_times = local_gch_times
model = local_model
dataset = local_dataset
time = local_time
significance = local_significance
def _compute_ovl_configuration_performance_wrapper(cind):
"""should NOT be used outside of multiprocessing"""
_, _, statistics = _compute_ovl_configuration_performance(
segs, gch_times, cind, model, dataset, time, significance
)
return statistics
def _compute_ovl_configuration_performance(
segs, gch_times, cind, model, dataset, time, significance
):
"""
computes this configuration's performance
a convenient wrapper that is used within _recalculate to avoid repeating code
assumes both segments and gch_times are not empty (i.e.: there is data to veto)
"""
configuration = model[cind]
window = configuration["window"]
# NOTE: we do not filter by segs because that's all handled within
# _compute_configuration_performance
aux_segs = model._configuration2segments(configuration, dataset, time, significance)
aux_segs &= segs
num_glitch = len(gch_times)
livetime = utils.livetime(segs)
# NOTE: the old implementation of OVL did something like this num_aux =
# np.sum(utils.times_in_segments(aux_times, segs)) # filter by segments to
# get the count which could lead to the case where there was non-zero
# livetime in the aux_segs while num_aux=0 instead, we now define num_aux as
# the "effective number of triggers required to obtain the same amount of
# aux livetime this could lead to slightly different performance compared to
# the old implementation
num_aux = utils.livetime(aux_segs) / window
# figure out what is removed by this configuration
# and which times are contained in the segments
contained = utils.times_in_segments(gch_times, aux_segs)
num_vetod_gch = 1.0 * np.sum(contained)
aux_livetime = utils.livetime(aux_segs)
# compute ranking metrics
eff = num_vetod_gch / num_glitch
fap = aux_livetime / livetime
num_vetod_exp = num_glitch * fap # (num_glitch/livetime)*aux_livetime
poisson_signif = -_gammpln(num_vetod_gch, num_vetod_exp)
# nothing vetod, so we give it a low ranking
use_percentage = min(num_vetod_gch / num_aux, 1) if num_aux > 0 else 0.0
# nothing vetod, so we give it a low ranking
# note, we cannot have eff>0 with fap==0 within OVL, unlike within DOVL
# return
eff_fap = eff / fap if fap > 0 else 0.0
statistics = (
num_vetod_exp,
num_vetod_gch,
num_aux,
aux_livetime,
poisson_signif,
use_percentage,
eff,
fap,
eff_fap,
)
return contained, aux_segs, statistics
[docs]class OVL(DOVL):
"""
a wrapper for the Ordered Veto List (OVL) algorithm published in Essick et
al, CQG 30, 15 (2013) (DOI: 10.1088/0264-9381/30/15/155010) This algorithm
estimates False Alarm Probability based on measures of the deadtime
associated with segments generated around auxiliary events.
*WRITE ME* set this up so it takes in a path to an output directory and then
writes the Vetolist, data objects into that directory with appropriate names
(extract start, end from data objects)
describe inheritence for the extra attributes not declared within
SupervisedClassifier _allowed_metrics _default_incremental
_default_minima _gammln_cof _gammln_stp and the associated methods
_recalculate redundancies _check_columns _gcf _gammln _gserln _gammpln
"""
_flavor = "ovl"
[docs] def train(self, dataset):
"""
Instantiates a Vetolist and trains using the data within a dataset.
Algorithmic parameters include:
* channels
* thresholds
* windows
* num_recalculate
* incremental
* minima (key, value pairs)
and are specified through self.kwargs, set during instantiation.
"""
# sanity check training data
num_glitch, _ = dataset.vectors2num()
if num_glitch == 0:
raise ValueError(
"training OVL-based classifiers not allowed with zero target times. "
"this may indicate an issue with data discovery or not requesting "
"enough time for training."
)
# set up how we'll report models
reporter = HDF5Reporter(self.rootdir, dataset.start, dataset.end)
vetolistnickname = self.nickname + "Vetolist"
model = Vetolist(
dataset.start,
dataset.end,
segs=dataset.segs,
model_id=self._model_id,
channels=io.path2channels(self.kwargs["safe_channels_path"]),
significance=self.significance,
**self.kwargs,
)
self.model = model # set up the internal model
verbose = self.kwargs.get("verbose", False)
if verbose:
print("loading triggers within dataset")
# configure feature selection and load
if not dataset.is_configured():
dataset.configure(model.selector)
dataset.load_data()
# check required columns are available
self._check_columns(dataset.features)
# evaluate the first round without hierarchical removal
# that's why we do all the jazz with incremental...
if verbose:
print("------------------- INITIAL RECALCULATE -----------------------")
to = TIME.time()
incremental = self.kwargs["incremental"]
self.kwargs["incremental"] = 0
self._recalculate(dataset, **self.kwargs) # actually compute stuff
self.kwargs["incremental"] = incremental
if verbose:
print("-------------------- %.6f sec" % (TIME.time() - to))
# write first iteration to disk
reporter.report(vetolistnickname + "Initial", model.table)
# re-order the list and prune under-performing configurations
for metric, minimum in self.kwargs["minima"].items():
model.prune(minimum, metric=metric)
model.reorder(**self.kwargs)
# iterate for num_recalculate
template = vetolistnickname + "Iter%d"
for _ in range(self.kwargs["num_recalculate"]):
if verbose:
print(
"------------------- RECALCULATE : %d -----------------------" % _
)
to = TIME.time()
self._recalculate(dataset, **self.kwargs) # calculate stuff
if verbose:
print("------------------- %.6f sec" % (TIME.time() - to))
# write iteration to disk
reporter.report(template % (_), model.table)
# re-order and prune
for metric, minimum in self.kwargs["minima"].items():
model.prune(minimum, metric=metric)
model.reorder(**self.kwargs)
# iterate once more and write final iteration to disk
if verbose:
print("------------------- FINAL RECALCULATE -----------------------")
to = TIME.time()
self._recalculate(dataset, **self.kwargs) # actually compute stuff
if verbose:
print("-------------------- %.6f sec" % (TIME.time() - to))
# NOTE: we do NOT prune at this point!
model.reorder(**self.kwargs)
reporter.report(vetolistnickname + "Final", model.table)
# return self.model for syntactic consistency with parent
return self.model
def _recalculate(
self,
dataset,
incremental=None,
minima=None,
memory_light=False,
multiprocessing_sleep=DEFAULT_MULTIPROCESSING_SLEEP,
multiprocessing_chunksize=DEFAULT_MULTIPROCESSING_CHUNKSIZE,
**kwargs,
):
"""
the main learning calculation associated with OVL calculate the
performance of a set of ordered veto configurations on data does NOT
re-order or prune the list for you; you must do that by hand assumes
self.model already points to a Vetolist object
NOTE: we expect the dataset to contain : columns = [self._time,
self._significance] that is checked within train once instead of being
checked here repeatedly
"""
# do this because specifying in kwargs turns out to be difficult syntactically
if incremental is None:
incremental = self._default_incremental
if minima is None:
minima = self._default_minima
gch, _ = dataset.vectors2classes()
gch_times = np.array(gch.times) # make a copy for internal consumption
num_glitch = len(gch_times)
cnum_vetod_gch = 0
# list of segments representing remaining time, will be built up
# throughout the loop for hierarchical application of configurations
segs = segmentlist(
dataset.segs
) # make a copy so we don't update a shared reference accidentally
livetime = utils.livetime(segs)
cveto_livetime = 0
model = self.model
verbose = kwargs.get("verbose", False)
if verbose:
print(model._repr_head)
_repr_body = model._repr_body + " %.6f sec"
cind = 0
len_model = len(model)
num_proc = self.kwargs.get("num_proc", DEFAULT_NUM_PROC)
while (cind < len_model) and ((incremental > 0) or (num_proc == 1)):
# process serially, iterate over vetolist
configuration = model[cind]
if livetime and num_glitch:
if verbose:
to = TIME.time()
# compute statistics
(
contained,
aux_segs,
statistics,
) = _compute_ovl_configuration_performance(
segs,
gch_times,
cind,
model,
dataset,
self._time,
self._significance,
)
(
num_vetod_exp,
num_vetod_gch,
num_aux,
aux_livetime,
poisson_signif,
use_percentage,
eff,
fap,
eff_fap,
) = statistics
# update configuration in place
configuration["num_glitch"] = num_glitch
configuration["num_clean"] = livetime
configuration["num_aux"] = num_aux
configuration["num_vetod_exp"] = num_vetod_exp
configuration["num_vetod_gch"] = num_vetod_gch
configuration["num_vetod_cln"] = aux_livetime
configuration["poisson_signif"] = poisson_signif
configuration["use_percentage"] = use_percentage
configuration["eff"] = eff
configuration["fap"] = fap
configuration["eff_fap"] = eff_fap
# figure out if we'll keep this configuration moving forward
if incremental:
keep = True
for metric, threshold in minima.items():
keep *= configuration[metric] >= threshold
if keep:
incremental -= 1
livetime -= aux_livetime
cveto_livetime += aux_livetime
num_glitch -= num_vetod_gch
cnum_vetod_gch += num_vetod_gch
segs -= aux_segs # remove the vetod segments
gch_times = gch_times[
np.logical_not(contained)
] # retain only the triggers that were not flagged
# finish updating configuration
configuration["cnum_vetod_gch"] = cnum_vetod_gch
configuration["cnum_vetod_cln"] = cveto_livetime
# pop this channel to free up memory
if memory_light:
dataset.features.pop(
configuration["channel"]
) # garbage collector should gobble up the returned value
if verbose:
print(_repr_body % (tuple(configuration) + (TIME.time() - to,)))
else:
# we're missing data of some kind, so we should just skip the
# rest of the configurations -> zero their contributions
for name in model._table.dtype.names:
if name not in [
"channel",
"threshold",
"window",
"num_glitch",
"num_clean",
"cnum_vetod_gch",
"cnum_vetod_cln",
]:
model[cind:][name] = 0
model[cind:]["num_glitch"] = num_glitch
model[cind:]["num_clean"] = livetime
model[cind:]["cnum_vetod_gch"] = cnum_vetod_gch
model[cind:]["cnum_vetod_cln"] = cveto_livetime
break
cind += 1 # increment the counter
if (cind < len_model) and num_glitch and livetime:
# we did not finish the list before incremental ran out and we want
# to run on more than one core
if verbose:
print(
"processing remaining configurations in parallel with %d processes"
% num_proc
)
pool = mp.Pool(
num_proc,
initializer=_compute_ovl_configuration_performance_initializer,
initargs=(
segs,
gch_times,
model,
dataset,
self._time,
self._significance,
),
)
if verbose:
to = TIME.time()
# set the things that will be the same for all remaning configurations
model[cind:]["num_glitch"] = num_glitch
model[cind:]["num_clean"] = livetime
model[cind:]["cnum_vetod_gch"] = cnum_vetod_gch
model[cind:]["cnum_vetod_cln"] = cveto_livetime
# process the rest of the channel's performances in parallel
result = pool.map_async(
_compute_ovl_configuration_performance_wrapper, range(cind, len(model))
)
while not result.ready():
TIME.sleep(multiprocessing_sleep)
if not result.successful():
raise RuntimeError("call to multiprocessing.pool.map_async failed")
for ind, statistics in enumerate(result.get()):
configuration = model[cind + ind]
(
num_vetod_exp,
num_vetod_gch,
num_aux,
aux_livetime,
poisson_signif,
use_percentage,
eff,
fap,
eff_fap,
) = statistics
# update configuration in place
configuration["num_aux"] = num_aux
configuration["num_vetod_exp"] = num_vetod_exp
configuration["num_vetod_gch"] = num_vetod_gch
configuration["num_vetod_cln"] = aux_livetime
configuration["poisson_signif"] = poisson_signif
configuration["use_percentage"] = use_percentage
configuration["eff"] = eff
configuration["fap"] = fap
configuration["eff_fap"] = eff_fap
# pop this channel to free up memory
if memory_light:
dataset.features.pop(
configuration["channel"]
) # garbage collector should gobble up the returned value
if verbose:
print(_repr_body % (tuple(configuration) + (TIME.time() - to,)))
@hookimpl
def get_classifiers():
return {
"ovl": OVL,
"dovl": DOVL,
"dovl-fap": DOVLfap,
}