import numpy as np
import pluggy
from ligo.segments import segment, segmentlist
from .. import calibration
from .. import exceptions
from .. import names
from .. import utils
# the default sample rate for all calls to SupervisedClassifier.timeseries
DEFAULT_DT = 1.0 / 256 # sec
[docs]class ClassifierModel(object):
"""
a parent class that defines some basic attributes that all trained models
must have to track data provenance each classifier will likely extend this
class for their own purposes
"""
def __init__(self, start, end, segs=None, model_id=None, generate_id=False):
self._start = start
self._end = end
if segs is None:
segs = segmentlist([segment(start, end)])
self._segs = segs
if generate_id:
self._model_id = utils.generate_unique_id()
else:
self._model_id = model_id
self._hash = None
@property
def start(self):
return self._start
@start.setter
def start(self, new):
self._start = new
self._end = max(self._end, new)
self._segs &= segmentlist([segment(self._start, self._end)])
@property
def end(self):
return self._end
@end.setter
def end(self, new):
self._start = min(self._start, new)
self._end = new
self._segs &= segmentlist([segment(self._start, self._end)])
@property
def segs(self):
return self._segs
@segs.setter
def segs(self, new):
self._segs = segmentlist(new)
if len(self._segs):
self._start = min(self._start, self._segs[0][0])
self._end = max(self._end, self._segs[-1][1])
@property
def model_id(self):
try:
return self._model_id
except AttributeError:
try:
# classifiers with v0.5 metadata
return f"run{self._run_id}"
except AttributeError:
# pre v0.5 classifiers without any metadata
return None
@property
def hash(self):
"""the identifier used to locate this model."""
if self._hash is not None:
return self._hash
else:
return names.times_id2hash(self._start, self._end, self.model_id)
@hash.setter
def hash(self, new):
self._hash = new
[docs] def feature_importance_table(self, dataset, **kwargs):
"""should return (columns, data) compatible with the DQR's
json.format_table (see use in idq/reports.py)"""
raise NotImplementedError
# -------------------------------------------------
# Supervised Classification Objects
# -------------------------------------------------
[docs]class SupervisedClassifier(object):
"""
a parent class for classifiers. Children should overwrite methods as
necessary. This classifier will support everything required syntactically
for the pipeline to function, but will assign random ranks to all events.
"""
_flavor = "supervised_classifier"
_required_kwargs = []
# set during instantiation, but "declared" here for clarity
kwargs = dict()
def __init__(self, nickname, rootdir=".", model_id=None, **kwargs):
self._nickname = nickname
# stored in case the object wants to write intermediate data products to
# disk, etc
self.rootdir = rootdir
self._model_id = model_id
for kwarg in self._required_kwargs:
assert kwarg in kwargs, "kwarg=%s required" % kwarg
self.kwargs = kwargs
# set up calibration map
self._calibration_map = None
# set up model
self._model = None
@property
def nickname(self):
"""
this is a "private" variable because I don't ever want a user to muck
with this once it is set upon instantiation
"""
return self._nickname
@property
def flavor(self):
"""
this is a "private" variable because I don't ever want a user to muck
with this. I also want each child to have to declare this for
themselves. this should be considered like a "type" but may be easier to
deal with a string instead of a Type object
"""
return self._flavor
@property
def is_trained(self):
return self._model is not None
@property
def is_calibrated(self):
return self._calibration_map is not None
@property
def model(self):
if not self.is_trained:
raise exceptions.UntrainedError
return self._model
@model.setter
def model(self, model):
"""
update the model with a stored description.
WARNING:
you should only use this if you really know what you're doing!
"""
self._model = model
self._model_id = model.model_id
@property
def calibration_map(self):
if not self.is_calibrated:
raise exceptions.UncalibratedError
return self._calibration_map
@calibration_map.setter
def calibration_map(self, calibration_map):
"""
update the calibration_map with a stored description
WARNING:
you should only use this if you really know what you're doing!
"""
self._calibration_map = calibration_map
[docs] def calibrate(self, dataset, **kwargs):
"""
calibrate this algorithm based on the dataset of feature vectors.
requires all FeatureVectors in the dataset to have been evaluated
This should update self._calibration_map
"""
assert dataset.is_evaluated(), "dataset has not yet been evaluated!"
if not self.is_calibrated: # no previous calibration map
if self.kwargs.get(
"discrete_calibration", False
): # use a discrete calibration map
self.calibration_map = calibration.DiscreteCalibrationMap(
dataset, model_id=self._model_id, **kwargs
)
else:
self.calibration_map = calibration.CalibrationMap(
dataset, model_id=self._model_id, **kwargs
)
self.calibration_map.optimize(**kwargs)
else:
# add observations incrementally and rely on calibration_map to
# auto_optimize as needed
self.calibration_map.add_and_flush(dataset, **kwargs)
return self.calibration_map
[docs] def train(self, dataset):
"""
This classifier does NOT use data to make predictions. Instead, it
supports this method for syntactic completeness. Note: this does NOT
update self._model, and therefore self.feature_importance will continue
to raise exceptions
"""
# should point to the actual model, but we just set it to True so we can
# identify that this classifier has been trained
self.model = ClassifierModel(
dataset.start, dataset.end, dataset.segs, model_id=self._model_id
)
return self.model
[docs] def evaluate(self, dataset):
"""
This classifier assigns random ranks to all events independent of
training data set. data should have the shape (Nsamples, Nfeatures)
return an 1D array with length Nsamples representing the ranks assigned
to each sample in data
WARNING: this needs to be highly efficient if we're to use it to build
time-series!
"""
if not self.is_trained:
# not strictly necessary because we don't use a model here, but this
# should mimic the behavior of real classifiers
raise exceptions.UntrainedError(
"%s does not have an internal model" % self.flavor
)
for rank, feature_vector in zip(np.random.rand(len(dataset)), dataset):
feature_vector.rank = rank
feature_vector.hash = self.model.hash
return dataset
[docs] def timeseries(self, info, dataset_factory, dt=DEFAULT_DT, segs=None, set_ok=None):
"""
returns ranks
"""
if segs is None:
segs = dataset_factory.classifier_data.segs
return [
(np.random.random(len(t)), t[0], dt) for t in utils.segs2times(segs, dt)
]
[docs] def feature_importance(self):
"""
return a ranked list of important features within the trained model will
raise an UntrainedException if we do not have a trained model stored
internally
"""
if not self.is_trained:
raise exceptions.UntrainedError(
"%s does not have an internal model" % self.flavor
)
return [] # place-holder. We don't use any features for this random Classifier
[docs] def feature_importance_table(self, *args, **kwargs):
"""should return (columns, data) compatible with the DQR's
json.format_table (see use in idq/reports.py"""
if not self.is_trained:
raise exceptions.UntrainedError(
"%s does not have an internal model" % self.flavor
)
raise NotImplementedError
[docs]class IncrementalSupervisedClassifier(SupervisedClassifier):
"""
An extension of SupervisedClassifier that is meant to re-train itself
incrementally instead of a series of batch jobs (starting from scratch)
should be able to inherit much of the functionality from
SupervisedClassifier
"""
_flavor = "incremental_suprvised_classifier"
[docs] def train(self, dataset):
"""
this should incrementally update the internal model. Otherwise, the
classifier's behavior should be the same as SupervisedClassifier
"""
# should point to the actual model, but we just set it to True so we can
# identify that this classifier has been trained
self.model = True
return self.model
hookspec = pluggy.HookspecMarker("iDQ")
[docs]@hookspec
def get_classifiers():
"""
This hook is used to return SupervisedClassifiers in the form:
{"type[:specifier]": Classifier}
where the specifier (optional) refers to a flavor of that
particular classifier for more specificity
"""
[docs]@hookspec
def get_incremental_classifiers():
"""
This hook is used to return IncrementalSupervisedClassifiers in the form:
{"type[:specifier]": Classifier}
where the specifier (optional) refers to a flavor of that
particular classifier for more specificity
"""