Source code for ctao_datamodel.models.dataproducts.ident

"""Defines how data products are classified."""

import datetime
import re
import uuid
from enum import StrEnum, auto, nonmember
from typing import ClassVar, Self

from pydantic import AliasChoices, model_serializer, model_validator

from ..._core import AstroField, ModelBase, StrFlag, doc, enum_doc
from ..common import SiteID

__all__ = [
    "DataDivision",
    "DataProcessingCategory",
    "DataLevel",
    "DataAssociation",
    "DataType",
    "InstanceIdentifier",
    "ProductType",
    "ProcessingSublevel",
    "ArrayElement",
    "ArrayElementClass",
    "ACADADataSource",
]

_NAMESPACE = "CTAO.DataProducts"


class DataLevel(StrEnum):
    """CTAO Data Level, see Top-Level Data Model."""

    _namespace = nonmember(_NAMESPACE)

    R0 = "R0"  #: Device-specific raw data
    R1 = "R1"  #: Standardized raw data
    DL0 = "DL0"  #: Archived raw data
    DL1 = "DL1"  #: Calibrated instrument data
    DL2 = "DL2"  #: Reconstructed air-shower data
    DL3 = "DL3"  #: Science-ready data
    DL4 = "DL4"  #: Binned science data
    DL5 = "DL5"  #: Advanced science data
    DL6 = "DL6"  #: Catalog and high-level data
    SIM = "SIM"  #: Simulated shower data
    SCIOPS = "SciOps"  #: Science Operations data


class DataDivision(StrEnum):
    """
    Primary data type.

    See the CTAO Top-level Data Model Specification for more info. If more than
    one are contained in the same data product, the most important one should be
    chosen, e.g. EVENT if all three are present.
    """

    _namespace = nonmember(_NAMESPACE)

    #: For R0,R1,DL0-DL3, Data linked to Cherenkov triggers or air-showers
    EVENT = "Event"

    #: DL0-DL3, Time series data with rates much slower than EVENT
    MONITORING = "Monitoring"

    #: DL0-DL3, data associated with e.g. EVENT or MONTITORING that is not a
    #: time-series or event list.
    SERVICE = "Service"

    #: For DL4-DL6, data associated with astrophysical models
    MODEL = "Model"

    #: For DL4-DL6, data generated from projecting and binning EVENT or related data
    BINNED = "Binned"

    #: Tabular data containing model fits and other metadata for a list of sources or targets
    CATALOG = "Catalog"


class DataAssociation(StrEnum):
    """The main associated instrument or analysis part."""

    _namespace = nonmember(_NAMESPACE)

    ARRAY = "Array"  #:  associated with a CTAO Array
    SUBARRAY = "Subarray"  #:  associated with a CTAO Subarray
    TELESCOPE = "Telescope"  #: associated with a single Cherenkov telescope
    REGION = "Region"  #: associated with a region on the sky sphere
    TARGET = "Target"  #: associated with a scientific target


class DataType(StrEnum):
    """
    The specific type of the product.

    This is used to disambiguate products with different data models within the
    same DataLevel, DataDivision, and DataAssociation. If two data products have
    different contents or do not share the same instance identifiers, they
    should have different DataTypes.

    This can also be used to set the IVOA ``data_product_type``, but may need some
    remapping to match IVOA terminology.
    """

    _namespace = nonmember(_NAMESPACE)

    ALL_SKY_CAMERA = "AllSkyCamera"  #: Image data from an All-Sky-Camera
    #: Spatio-spectral cube of estimated residual background
    BACKGROUND_MAP = "BackgroundMap"
    #: Product containing just the Background IRF component
    BACKGROUND_IRF_COMPONENT = "BackgroundIRFComponent"
    AIRSHOWER = "AirShower"  #: AirShower evens
    AIRSHOWER_SIM = "AirShowerSim"  #: AirShower simulation
    CALIBRATION = "Calibration"  #: Calibration
    CALIBRATION_SIM = "CalibrationSim"  #: Calibration simulation
    MUON_CANDIDATE = "MuonCandidate"  #: Muon candidate events
    CEILOMETER = "Ceilometer"  #: Data from a Cielometer
    #: Contemporary atmosphere profile
    CONTEMPORARY_ATMOSPHERE = "ContemporaryAtmosphere"
    COUNTS_MAP = "CountsMap"  #: Spatio-spectral cube of detected shower counts.
    EDISP_MAP = "EDispMap"  #: Spatio-spectral cube of projected EDisp IRF component
    EXPOSURE_MAP = "ExposureMap"  #: Spatio-spectral cube of effective-area * time
    EXCESS_MAP = "ExcessMap"  #: Spatio-spectral cube of excess counts above background
    #: Binary Spatio-spectral cube where 1 means excluded
    EXCLUSION_MAP = "ExclusionMap"
    FIT_SKY_MODEL = "FitSkyModel"  #: Astropysical model fit parameters
    FLUX_MAP = "FluxMap"  #: Spatio-spectral cube of flux
    FRAM = "Fram"  #: FRAM information
    GRB_CATALOG = "GRBCatalog"  #: Table of GRB detections and parameters
    GRID_IRF = "GridIRF"  #: IRF for a given observation grid point
    LIDAR = "Lidar"  #: LIDAR information
    LIGHTCURVE = "LightCurve"  #: Flux vs Time
    LONG_TERM_SCHEDULE = "LongTermSchedule"
    MEDIUM_TERM_SCHEDULE = "MediumTermSchedule"
    OBSERVATION = "Observation"  #: Observation bundle data
    OBSERVATION_SIM = "ObservationSim"  #: Simulated Observation bundle data
    OBSERVATION_CATALOG = "ObservationCatalog"  #: Table of ObservationBlock information
    PHASE_MAP = "PhaseMap"  #: Phase-folded spatio-spectral cube
    POINTING = "Pointing"  #: Pointing data
    PSF_MAP = "PSFMap"  #: Spatio-spectral cube of projected PSF IRF component
    RECO_MODEL = "RecoModel"  #: Reconstruction Model
    #: Spatio-spectral cube of detection signficance
    SIGNFICANCE_MAP = "SignificanceMap"
    SKYMODEL = "SkyModel"  #: Astropysical model definition
    SOURCE_CATALOG = "SourceCatalog"  #: Spatio-spectral cube of
    SPECTRUM = "Spectrum"  #: Flux vs Energy
    TAILORED_IRF = "TailoredIRF"  #: IRF tailored to an observation
    TELESCOPE_TRIGGERS = "TelescopeTriggers"  #: Telescope trigger
    TRIGGER = "Trigger"  #: Subarray Trigger
    #: Ring sampler baseline coefficients
    LST_DRS4_BASELINE_CALIBRATION = "LSTDRS4BaselineCalibration"
    #: Ring sampler correction coefficients
    LST_DRS4_SAMPLING_CALIBRATION = "LSTDRS4SamplingCalibration"
    #: Ring sampler time-lapse coefficients
    LST_DRS4_TIMELAPSE_CALIBRATION = "LSTDRS4TimelapseCalibration"
    #: NectarCam calibration coefficients
    NECTARCAM_CALIBRATION = "NectarCamCalibration"
    WEATHER = "Weather"  #: Weather station data


class ProductType(ModelBase):
    """
    Describes all possible types of CTAO data product.

    This can also be used as a hierarchy for defining DataSets containing data
    of the same type within a higher-level collection like a data release.
    """

    _namespace: ClassVar[str] = _NAMESPACE

    level: DataLevel = AstroField(
        description=enum_doc(DataLevel)
        + "This should be the primary, aor highest level if more than one is"
        " included in"
        " the data product.",
        fits_keyword="DATALEVL",
    )
    division: DataDivision = AstroField(
        description=enum_doc(DataDivision), fits_keyword="DATADIV"
    )
    association: DataAssociation = AstroField(
        description=enum_doc(DataAssociation), fits_keyword="DATAASSO"
    )
    type: DataType = AstroField(description=enum_doc(DataType), fits_keyword="DATATYPE")

    def __str__(self) -> str:
        """Return standard string representation of the ProductType."""
        return f"{self.level}/{self.division}/{self.association}/{self.type}"

[docs] @classmethod def from_str(cls, typestr: str) -> Self: """Construct a ProductType from it's string representation.""" level, division, association, type_ = typestr.split("/") return cls(level=level, division=division, association=association, type=type_)
@model_validator(mode="after") def _check_division(self) -> Self: """Perform consistency checks.""" if self.level in [ DataLevel.R0, DataLevel.R1, DataLevel.DL0, DataLevel.DL1, DataLevel.DL2, DataLevel.DL3, ]: if self.division in [DataDivision.BINNED, DataDivision.MODEL]: raise ValueError( f"R0-DL3 data products should not have division='{self.division}'" ) return self class DataProcessingCategory(StrEnum): """ Which data processing pipeline category produced this product. A=realtime, B=next-day, C=final/publication-quality. """ _namespace = nonmember(_NAMESPACE) A = "A" #: Real-time B = "B" #: Next-day C = "C" #: Final or reprocessed class ProcessingSublevel(StrFlag): """Used to distinguish partilly filled DL1 or DL2 products.""" _namespace = nonmember(_NAMESPACE) IMAGES = auto() #: DL1 Images PARAMETERS = auto() #: DL1 parameters GEOMETRY = auto() #: DL2 shower geometry parameters ENERGY = auto() #: DL2 shower energy parameters GAMMANESS = auto() #: DL2 shower gammaness parameter class FacilityName(StrEnum): """Name of observatory.""" _namespace = nonmember(_NAMESPACE) #: Use for observed data CTAO = "CTAO" #: Use for simulated data or data challenges SIMULATED_CTAO = "Simulated-CTAO" class ArrayElement(StrEnum): """Class of array element, for use in the ArrayElementID.""" _namespace = nonmember(_NAMESPACE) TELESCOPE = "TEL" LIDAR = "LIDAR" SUBARRAY = "SUB" AUX = "AUX" class ACADADataSource(ModelBase): """ACADA Data Source, see ACADA-DPPS ICD.""" _namespace: ClassVar = _NAMESPACE component: str = AstroField( description=( "ACADA data source name: the component, or instrument, " "generating the data. Normally will be the string of the ACS " "or OPC UA component instance name." ), examples=["SDH", "SWAT"], ) id: int = AstroField( description="Instance identifier of the DataSource component. " ) def __str__(self) -> str: """Return string rep of DataSource, e.g. SDH001.""" return f"{self.component}{self.id:03d}" @model_validator(mode="before") @classmethod def _parse_from_string(cls, value): """Return correct dict from a single-string representation.""" if isinstance(value, cls): return value # If it's a string like "TEL001" if isinstance(value, str): match = re.fullmatch(r"([A-Z]+)(\d+)", value) if not match: raise ValueError(f"Invalid ACADADataSource string: {value}") element_str, id_str = match.groups() return { "component": element_str.upper(), "id": int(id_str), } # Otherwise, let pydantic complain return value @model_serializer def _serialize(self) -> str: return str(self) class ArrayElementClass(StrEnum): """Classification of Array Element, following ACADA-DPPS ICD.""" TEL = "TEL" #: AE is a a telescope ACE = "ACE" #: AE is an array-common-element instrument class InstanceIdentifier(ModelBase): """ Keys that uniquely identify an instance of an observed data product. Which of these is required depends on the ProductType. """ _namespace: ClassVar = _NAMESPACE id: uuid.UUID = AstroField( description=( "A locally-generated unique ID for this data product instance. This is used" " to trace the data product and link it to others. It should be unique even" " when same data product is produced multiple times. A UUID4 (purely" " random) should be used to avoid id collisions and for cybersecurity" " (other UUID versions encode local IP info, for example)." ), default_factory=lambda: uuid.uuid4(), fits_keyword="DATAID", ) obs_id: int | None = AstroField( description=( "Unique identifier of the observation block, in a format defined in the " "Common Data Model Specification." ), ivoa_keyword="obs_id", fits_keyword="OBS_ID", default=None, ) # TODO: look up in VODF notes how we phrased this event_type_group: str | None = AstroField( description=( "For data products that are associated with a specific group of event " "types, this defines the name of that group." "Note that for data products that mix different event type groups, " "this is not necessary." ), default=None, ivoa_keyword="event_type", fits_keyword="EVTYPE", ) ae_id: int | None = AstroField( description=( "ID of a CTAO array element. See common data model. ``tel_id`` may be used " "as an alias in the case where the array element is a telescope." ), default=None, fits_keyword="AE_ID", validation_alias=AliasChoices("ae_id", "tel_id"), ) ae_class: ArrayElementClass | None = AstroField( enum_doc(ArrayElementClass), default=None, fits_keyword="AE_CLASS" ) subarray_id: int | None = AstroField( "Subarray id, for data products from a subarray.", default=None, fits_keyword="SUB_ID", ) chunk_id: int | None = AstroField( description=( "For files that are split into multiple pieces (chunks) to keep the file" " size under a limit, this describes the chunk number (from 0) of a" " particular data product." ), default=None, fits_keyword="CHUNK_ID", ) batch_id: int | None = AstroField( description=( "Batch identifier for intermediate data products that are grouped/merged in" " batches. " ), default=None, fits_keyword="BATCH_ID", ) calibration_service_id: int | None = AstroField( description=( "ID assigned during data acquisition to link " "calibration products used when producing a data product." ), default=None, ) observing_night: datetime.date | None = AstroField( description=( "Date associated with the start of data taking. It shall be created" " according to the date of the beginning of the observation night prior to" " the injection of data to DPPS. The date of the beginning of the" " observation night formally starts at 12:00 local civil time, is valid for" " the next 24 hours, and ends the next day at 12:00 local civil time." ), fits_keyword="OBSNIGHT", default=None, ) sublevel_id: ProcessingSublevel | None = AstroField( description=enum_doc(ProcessingSublevel), default=None, ) target_id: str | None = AstroField( description="Name of the target or ROI covered by this instance.", default=None, ) region_id: str | None = AstroField( description="Identifier for region-of-interest used for this instance.", default=None, ) observing_period_id: str | None = AstroField( description="ID of the observing period.", default=None, ) lunar_cycle_id: int | None = AstroField( description=( "ID of the lunar cycle associated with this instance. Combined with the" " observing_period_id, this gives a unique ID of the 'period' in which this" " data product was observed." ), default=None, ) facility_name: FacilityName = AstroField( description=( "Observatory or facility used to collect the data. If the data are" " simulated, it is recommended to explicitly use the word 'simulated' in" " the name." ), default=FacilityName.CTAO, fits_keyword="TELESCOP", ivoa_keyword="facility_name", ) site_id: SiteID | None = AstroField( description="CTAO site associated with this instance.", default=None, fits_keyword="INSTRUME", ivoa_keyword="instrument_name", ) particle_pdgid: int | None = AstroField( description=( "For simulated data products associated with a single point in simulation" " phase space, the primary particle ID in PDGID format, e.g. photon=22," " electron=11." ), default=None, ) category: DataProcessingCategory | None = AstroField( doc(DataProcessingCategory), default=None ) data_source: ACADADataSource | None = AstroField( description=doc(ACADADataSource), default=None, fits_keyword="DATASRC" ) assembly_name: str | None = AstroField( "For monitoring time-series, describes the assembly (group of monnitoring" " points) associated with the data product.", default=None, ) @model_validator(mode="after") def _check_lunar_cycle_id(self) -> Self: """Ensure if we have a lunar_cycle_id, we also have an observing_period_id.""" if self.lunar_cycle_id and not self.observing_period_id: raise ValueError("A `lunar_cycle_id` requires an `observing_period_id`.") return self # zenith_id: int | None = AstroField( # description=( # "For simulated data products associated with a single point in simulation phase space," # "the zenith angle of simulated pointing, to nearest integer." # ), # unit="deg", # ge=0, # le=90, # default=None, # ) # azimuth_id: int | None = AstroField( # description=( # "For simulated data products associated with a single point in simulation phase space," # "the azimuth of simulated telescope pointing, to nearest integer." # ), # unit="deg", # ge=0, # le=360, # default=None, # ) # nsb_id: int | None = AstroField( # description=( # "For simulated data products associated with a single point in simulation phase space," # "the NSB level as multiple of the nominal dark value, to nearest integer." # ), # default=None, # ) # source_cone_bin_id: int | None = AstroField( # description=( # "For simulated data products associated with a single point in simulation phase space," # "the view cone angle of simulated source (0=pointlike) to nearest integer." # ), # unit="deg", # ge=0, # le=180, # default=None, # ) # divergence_bin_id: float | None = AstroField( # description=( # "For simulated data products associated with a single point in simulation phase space," # "the divergence angle of telescopes." # ), # unit="deg", # default=None, # )