Source code for datapipe_testbench.benchmark

#!/usr/bin/env python3
"""
Defines what is a Benchmark.
"""

import logging
import re
import warnings
from abc import abstractmethod
from dataclasses import dataclass
from enum import Enum, auto
from pathlib import Path

from matplotlib.figure import FigureBase

from .inputdataset import InputDataset
from .store import MetricsStore, ResultStore

__all__ = ["Benchmark", "BenchmarkResult", "ComparisonStatus", "MissingInputError"]



[docs]
class MissingInputError(RuntimeError):
    """Raised when required inputs are missing."""

    pass




[docs]
class ComparisonStatus(str, Enum):
    """Status of a benchmark check."""

    PASSED = auto()  #: benchmark passed
    WARNING = auto()  #: benchmark passed, but only barely, should check
    FAILED = auto()  #: benchmark failed
    OTHER = auto()  #: other failure, or couldn't complete benchmark




[docs]
@dataclass
class BenchmarkResult:
    """Output of a benchmark is one or more of these."""

    benchmark_name: str
    reference_dataset_name: str
    test_dataset_name: str
    status: ComparisonStatus
    comment: str
    plots: list[FigureBase] | None




[docs]
class Benchmark:
    """Defines what a "scientist" should define for a given benchmark.

    A benchmark is defined by two functions:

    1. ``generate_metrics()``, which will be used to transform event data into one or more ``Metric``
       stored in a ``MetricsStore`` corresponding to a single set of processed events.
    2. ``compare_to_reference()``, which compares one or more MetricsStores to a
       reference MetricsStore. This function is run after there are multiple MetricsStores
       available that have been processed with ``generate_metrics``.
    """

    def __init__(self):
        super().__init__()
        self._log = logging.getLogger(self.__class__.__name__)

    @property
    @abstractmethod
    def required_inputs(self) -> set[str]:
        """Return the set of required keys in the InputDataset that this Benchmark needs.

        These are checked before generating metrics
        """
        return set()


[docs]
    def check_input_dataset(self, input_dataset: InputDataset):
        """Ensure required inputs exist, or raise MissingInputError."""
        for input_name in self.required_inputs:
            value = getattr(input_dataset, input_name)
            if value is None:
                raise MissingInputError(
                    f"Benchmark {self.__class__.__name__}:  '{input_name}' must be "
                    "provided in the InputDataset."
                )
            if not Path(value).exists:
                raise MissingInputError(
                    f"Benchmark {self.__class__.__name__}: '{input_name}' = '{value}' "
                    "path does not exist."
                )



[docs]
    @abstractmethod
    def generate_metrics(self, metric_store: MetricsStore) -> dict | None:
        """Produce metrics for this benchmark later comparison.

        Called once per benchmark per ``MetricsStore``, i.e. for one set of input
        events, defines how to transform the event into to a metrics stored in a
        ``MetricsStore`` that can later be compared.

        Parameters
        ----------
        metric_store: MetricsStore
            Where to store the metrics generated by this ``Benchmark``. It must be
            initialized with an `datapipe_testbench.inputdataset.InputDataset`
            containing the inputs to use for the transformation into metrics.
        """



[docs]
    @abstractmethod
    def compare_to_reference(
        self, metric_store_list: list[MetricsStore], result_store: ResultStore
    ) -> dict | None:
        """Perform the comparison study on metrics associated with a set of experiments.

        This function takes a list of MetricStores that have been previously
        filled by `datapipe_testbench.benchmark.Benchmark.generate_metrics()`,
        generates plots and comparison results, and writes the output to a
        ResultsStore. If there are more than one MetricStore to compare, the
        first one is considered the _reference_.

        Parameters
        ----------
        metric_store_list: list[MetricsStore]
            The list of MetricStores to compare, the first of which is the _reference_ .
        result_store: ResultStore
            WHere the results of the comparison study are stored.

        """

        # TODO: some subclasses return a list of BenchmarkResults here,  Check how used


[docs]
    @abstractmethod
    def make_report(self, result_store: ResultStore):
        """Make a report for the benchmark using the results of the comparisons.

        The result_store must already contain the outputs of
        ``compare_to_reference()``.

        Parameters
        ----------
        result_store : ResultStore
            Contains the input comparisons and is where the report will be stored.
        """


    def _compare_metrics_in_stores(self, metric_store_list, result_store) -> dict:
        """Compare metrics loaded from stores.

        Helper function for use within ``compare_to_reference()`` when possible.

        Parameters
        ----------
        metric_store_list : list[MetricsStore]
            The stores containing the metrics to compare. The first store in the list is considered
            the reference.

        result_store : ResultStore
            The store into which comparison results will be saved.

        Returns
        -------
        dict[str, BenchmarkResult]
            List of benchmark results. Note there may be more of these than there are
            MetricsStores if the benchmark is applied for example to multiple telescope types,
            there would be one per telescope type per MetricsStore.
        """
        # setup space to load the histograms into a dict
        metrics = {k: [] for k in self.output_names.keys()}

        for name, metric in metrics.items():
            for ds in metric_store_list:
                metric.append(ds.retrieve_data(self.output_names[name]))
                metric[-1].label = ds.name

        comparisons = dict()
        for name, metric in metrics.items():
            ref, others = metric[0], metric[1:]
            compared = ref.compare(others)
            print(f"Ref metric {ref.name}, plots {compared.plots.keys()}")
            print(f"Output saved in {compared.name}")
            compared.store(result_store, self.metric_path)
            comparisons[name] = compared

        return comparisons

    @property
    def name(self):
        """Return friendly name of this benchmark."""
        parts = re.sub(
            "([A-Z][a-z]+)", r" \1", re.sub("([A-Z]+)", r" \1", self.__class__.__name__)
        ).split()

        if parts[-1] != "Benchmark":
            parts.append("Benchmark")

        return " ".join(parts)


[docs]
    @abstractmethod
    def outputs(self) -> dict[str, Path]:
        """Return mapping of key to Path of all outputs that this Benchmark generates."""
        if hasattr(self, "output_names"):
            warnings.warn(
                "This benchmark should implement outputs() and not self.output_names",
                category=DeprecationWarning,
            )
            return getattr(self, "output_names")



[docs]
    def missing_outputs(self, metric_store: MetricsStore):
        """Return list of missing outputs."""
        missing = []
        for name, path in self.outputs().items():
            if not metric_store.data_exists(path):
                missing.append(name)

        return missing