"""Defines how data products are classified."""
import datetime
import re
import uuid
from enum import StrEnum, auto, nonmember
from typing import ClassVar, Self
from pydantic import AliasChoices, model_serializer, model_validator
from ..._core import AstroField, ModelBase, StrFlag, doc, enum_doc
from ..common import SiteID
__all__ = [
"DataDivision",
"DataProcessingCategory",
"DataLevel",
"DataAssociation",
"DataType",
"InstanceIdentifier",
"ProductType",
"ProcessingSublevel",
"ArrayElement",
"ArrayElementClass",
"ACADADataSource",
]
_NAMESPACE = "CTAO.DataProducts"
class DataLevel(StrEnum):
"""CTAO Data Level, see Top-Level Data Model."""
_namespace = nonmember(_NAMESPACE)
R0 = "R0" #: Device-specific raw data
R1 = "R1" #: Standardized raw data
DL0 = "DL0" #: Archived raw data
DL1 = "DL1" #: Calibrated instrument data
DL2 = "DL2" #: Reconstructed air-shower data
DL3 = "DL3" #: Science-ready data
DL4 = "DL4" #: Binned science data
DL5 = "DL5" #: Advanced science data
DL6 = "DL6" #: Catalog and high-level data
SIM = "SIM" #: Simulated shower data
SCIOPS = "SciOps" #: Science Operations data
class DataDivision(StrEnum):
"""
Primary data type.
See the CTAO Top-level Data Model Specification for more info. If more than
one are contained in the same data product, the most important one should be
chosen, e.g. EVENT if all three are present.
"""
_namespace = nonmember(_NAMESPACE)
#: For R0,R1,DL0-DL3, Data linked to Cherenkov triggers or air-showers
EVENT = "Event"
#: DL0-DL3, Time series data with rates much slower than EVENT
MONITORING = "Monitoring"
#: DL0-DL3, data associated with e.g. EVENT or MONTITORING that is not a
#: time-series or event list.
SERVICE = "Service"
#: For DL4-DL6, data associated with astrophysical models
MODEL = "Model"
#: For DL4-DL6, data generated from projecting and binning EVENT or related data
BINNED = "Binned"
#: Tabular data containing model fits and other metadata for a list of sources or targets
CATALOG = "Catalog"
class DataAssociation(StrEnum):
"""The main associated instrument or analysis part."""
_namespace = nonmember(_NAMESPACE)
ARRAY = "Array" #: associated with a CTAO Array
SUBARRAY = "Subarray" #: associated with a CTAO Subarray
TELESCOPE = "Telescope" #: associated with a single Cherenkov telescope
REGION = "Region" #: associated with a region on the sky sphere
TARGET = "Target" #: associated with a scientific target
class DataType(StrEnum):
"""
The specific type of the product.
This is used to disambiguate products with different data models within the
same DataLevel, DataDivision, and DataAssociation. If two data products have
different contents or do not share the same instance identifiers, they
should have different DataTypes.
This can also be used to set the IVOA ``data_product_type``, but may need some
remapping to match IVOA terminology.
"""
_namespace = nonmember(_NAMESPACE)
ALL_SKY_CAMERA = "AllSkyCamera" #: Image data from an All-Sky-Camera
#: Spatio-spectral cube of estimated residual background
BACKGROUND_MAP = "BackgroundMap"
#: Product containing just the Background IRF component
BACKGROUND_IRF_COMPONENT = "BackgroundIRFComponent"
AIRSHOWER = "AirShower" #: AirShower evens
AIRSHOWER_SIM = "AirShowerSim" #: AirShower simulation
CALIBRATION = "Calibration" #: Calibration
CALIBRATION_SIM = "CalibrationSim" #: Calibration simulation
MUON_CANDIDATE = "MuonCandidate" #: Muon candidate events
CEILOMETER = "Ceilometer" #: Data from a Cielometer
#: Contemporary atmosphere profile
CONTEMPORARY_ATMOSPHERE = "ContemporaryAtmosphere"
COUNTS_MAP = "CountsMap" #: Spatio-spectral cube of detected shower counts.
EDISP_MAP = "EDispMap" #: Spatio-spectral cube of projected EDisp IRF component
EXPOSURE_MAP = "ExposureMap" #: Spatio-spectral cube of effective-area * time
EXCESS_MAP = "ExcessMap" #: Spatio-spectral cube of excess counts above background
#: Binary Spatio-spectral cube where 1 means excluded
EXCLUSION_MAP = "ExclusionMap"
FIT_SKY_MODEL = "FitSkyModel" #: Astropysical model fit parameters
FLUX_MAP = "FluxMap" #: Spatio-spectral cube of flux
FRAM = "Fram" #: FRAM information
GRB_CATALOG = "GRBCatalog" #: Table of GRB detections and parameters
GRID_IRF = "GridIRF" #: IRF for a given observation grid point
LIDAR = "Lidar" #: LIDAR information
LIGHTCURVE = "LightCurve" #: Flux vs Time
LONG_TERM_SCHEDULE = "LongTermSchedule"
MEDIUM_TERM_SCHEDULE = "MediumTermSchedule"
OBSERVATION = "Observation" #: Observation bundle data
OBSERVATION_SIM = "ObservationSim" #: Simulated Observation bundle data
OBSERVATION_CATALOG = "ObservationCatalog" #: Table of ObservationBlock information
PHASE_MAP = "PhaseMap" #: Phase-folded spatio-spectral cube
POINTING = "Pointing" #: Pointing data
PSF_MAP = "PSFMap" #: Spatio-spectral cube of projected PSF IRF component
RECO_MODEL = "RecoModel" #: Reconstruction Model
#: Spatio-spectral cube of detection signficance
SIGNFICANCE_MAP = "SignificanceMap"
SKYMODEL = "SkyModel" #: Astropysical model definition
SOURCE_CATALOG = "SourceCatalog" #: Spatio-spectral cube of
SPECTRUM = "Spectrum" #: Flux vs Energy
TAILORED_IRF = "TailoredIRF" #: IRF tailored to an observation
TELESCOPE_TRIGGERS = "TelescopeTriggers" #: Telescope trigger
TRIGGER = "Trigger" #: Subarray Trigger
#: Ring sampler baseline coefficients
LST_DRS4_BASELINE_CALIBRATION = "LSTDRS4BaselineCalibration"
#: Ring sampler correction coefficients
LST_DRS4_SAMPLING_CALIBRATION = "LSTDRS4SamplingCalibration"
#: Ring sampler time-lapse coefficients
LST_DRS4_TIMELAPSE_CALIBRATION = "LSTDRS4TimelapseCalibration"
#: NectarCam calibration coefficients
NECTARCAM_CALIBRATION = "NectarCamCalibration"
WEATHER = "Weather" #: Weather station data
class ProductType(ModelBase):
"""
Describes all possible types of CTAO data product.
This can also be used as a hierarchy for defining DataSets containing data
of the same type within a higher-level collection like a data release.
"""
_namespace: ClassVar[str] = _NAMESPACE
level: DataLevel = AstroField(
description=enum_doc(DataLevel)
+ "This should be the primary, aor highest level if more than one is"
" included in"
" the data product.",
fits_keyword="DATALEVL",
)
division: DataDivision = AstroField(
description=enum_doc(DataDivision), fits_keyword="DATADIV"
)
association: DataAssociation = AstroField(
description=enum_doc(DataAssociation), fits_keyword="DATAASSO"
)
type: DataType = AstroField(description=enum_doc(DataType), fits_keyword="DATATYPE")
def __str__(self) -> str:
"""Return standard string representation of the ProductType."""
return f"{self.level}/{self.division}/{self.association}/{self.type}"
[docs]
@classmethod
def from_str(cls, typestr: str) -> Self:
"""Construct a ProductType from it's string representation."""
level, division, association, type_ = typestr.split("/")
return cls(level=level, division=division, association=association, type=type_)
@model_validator(mode="after")
def _check_division(self) -> Self:
"""Perform consistency checks."""
if self.level in [
DataLevel.R0,
DataLevel.R1,
DataLevel.DL0,
DataLevel.DL1,
DataLevel.DL2,
DataLevel.DL3,
]:
if self.division in [DataDivision.BINNED, DataDivision.MODEL]:
raise ValueError(
f"R0-DL3 data products should not have division='{self.division}'"
)
return self
class DataProcessingCategory(StrEnum):
"""
Which data processing pipeline category produced this product.
A=realtime, B=next-day, C=final/publication-quality.
"""
_namespace = nonmember(_NAMESPACE)
A = "A" #: Real-time
B = "B" #: Next-day
C = "C" #: Final or reprocessed
class ProcessingSublevel(StrFlag):
"""Used to distinguish partilly filled DL1 or DL2 products."""
_namespace = nonmember(_NAMESPACE)
IMAGES = auto() #: DL1 Images
PARAMETERS = auto() #: DL1 parameters
GEOMETRY = auto() #: DL2 shower geometry parameters
ENERGY = auto() #: DL2 shower energy parameters
GAMMANESS = auto() #: DL2 shower gammaness parameter
class FacilityName(StrEnum):
"""Name of observatory."""
_namespace = nonmember(_NAMESPACE)
#: Use for observed data
CTAO = "CTAO"
#: Use for simulated data or data challenges
SIMULATED_CTAO = "Simulated-CTAO"
class ArrayElement(StrEnum):
"""Class of array element, for use in the ArrayElementID."""
_namespace = nonmember(_NAMESPACE)
TELESCOPE = "TEL"
LIDAR = "LIDAR"
SUBARRAY = "SUB"
AUX = "AUX"
class ACADADataSource(ModelBase):
"""ACADA Data Source, see ACADA-DPPS ICD."""
_namespace: ClassVar = _NAMESPACE
component: str = AstroField(
description=(
"ACADA data source name: the component, or instrument, "
"generating the data. Normally will be the string of the ACS "
"or OPC UA component instance name."
),
examples=["SDH", "SWAT"],
)
id: int = AstroField(
description="Instance identifier of the DataSource component. "
)
def __str__(self) -> str:
"""Return string rep of DataSource, e.g. SDH001."""
return f"{self.component}{self.id:03d}"
@model_validator(mode="before")
@classmethod
def _parse_from_string(cls, value):
"""Return correct dict from a single-string representation."""
if isinstance(value, cls):
return value
# If it's a string like "TEL001"
if isinstance(value, str):
match = re.fullmatch(r"([A-Z]+)(\d+)", value)
if not match:
raise ValueError(f"Invalid ACADADataSource string: {value}")
element_str, id_str = match.groups()
return {
"component": element_str.upper(),
"id": int(id_str),
}
# Otherwise, let pydantic complain
return value
@model_serializer
def _serialize(self) -> str:
return str(self)
class ArrayElementClass(StrEnum):
"""Classification of Array Element, following ACADA-DPPS ICD."""
TEL = "TEL" #: AE is a a telescope
ACE = "ACE" #: AE is an array-common-element instrument
class InstanceIdentifier(ModelBase):
"""
Keys that uniquely identify an instance of an observed data product.
Which of these is required depends on the ProductType.
"""
_namespace: ClassVar = _NAMESPACE
id: uuid.UUID = AstroField(
description=(
"A locally-generated unique ID for this data product instance. This is used"
" to trace the data product and link it to others. It should be unique even"
" when same data product is produced multiple times. A UUID4 (purely"
" random) should be used to avoid id collisions and for cybersecurity"
" (other UUID versions encode local IP info, for example)."
),
default_factory=lambda: uuid.uuid4(),
fits_keyword="DATAID",
)
obs_id: int | None = AstroField(
description=(
"Unique identifier of the observation block, in a format defined in the "
"Common Data Model Specification."
),
ivoa_keyword="obs_id",
fits_keyword="OBS_ID",
default=None,
)
# TODO: look up in VODF notes how we phrased this
event_type_group: str | None = AstroField(
description=(
"For data products that are associated with a specific group of event "
"types, this defines the name of that group."
"Note that for data products that mix different event type groups, "
"this is not necessary."
),
default=None,
ivoa_keyword="event_type",
fits_keyword="EVTYPE",
)
ae_id: int | None = AstroField(
description=(
"ID of a CTAO array element. See common data model. ``tel_id`` may be used "
"as an alias in the case where the array element is a telescope."
),
default=None,
fits_keyword="AE_ID",
validation_alias=AliasChoices("ae_id", "tel_id"),
)
ae_class: ArrayElementClass | None = AstroField(
enum_doc(ArrayElementClass), default=None, fits_keyword="AE_CLASS"
)
subarray_id: int | None = AstroField(
"Subarray id, for data products from a subarray.",
default=None,
fits_keyword="SUB_ID",
)
chunk_id: int | None = AstroField(
description=(
"For files that are split into multiple pieces (chunks) to keep the file"
" size under a limit, this describes the chunk number (from 0) of a"
" particular data product."
),
default=None,
fits_keyword="CHUNK_ID",
)
batch_id: int | None = AstroField(
description=(
"Batch identifier for intermediate data products that are grouped/merged in"
" batches. "
),
default=None,
fits_keyword="BATCH_ID",
)
calibration_service_id: int | None = AstroField(
description=(
"ID assigned during data acquisition to link "
"calibration products used when producing a data product."
),
default=None,
)
observing_night: datetime.date | None = AstroField(
description=(
"Date associated with the start of data taking. It shall be created"
" according to the date of the beginning of the observation night prior to"
" the injection of data to DPPS. The date of the beginning of the"
" observation night formally starts at 12:00 local civil time, is valid for"
" the next 24 hours, and ends the next day at 12:00 local civil time."
),
fits_keyword="OBSNIGHT",
default=None,
)
sublevel_id: ProcessingSublevel | None = AstroField(
description=enum_doc(ProcessingSublevel),
default=None,
)
target_id: str | None = AstroField(
description="Name of the target or ROI covered by this instance.",
default=None,
)
region_id: str | None = AstroField(
description="Identifier for region-of-interest used for this instance.",
default=None,
)
observing_period_id: str | None = AstroField(
description="ID of the observing period.",
default=None,
)
lunar_cycle_id: int | None = AstroField(
description=(
"ID of the lunar cycle associated with this instance. Combined with the"
" observing_period_id, this gives a unique ID of the 'period' in which this"
" data product was observed."
),
default=None,
)
facility_name: FacilityName = AstroField(
description=(
"Observatory or facility used to collect the data. If the data are"
" simulated, it is recommended to explicitly use the word 'simulated' in"
" the name."
),
default=FacilityName.CTAO,
fits_keyword="TELESCOP",
ivoa_keyword="facility_name",
)
site_id: SiteID | None = AstroField(
description="CTAO site associated with this instance.",
default=None,
fits_keyword="INSTRUME",
ivoa_keyword="instrument_name",
)
particle_pdgid: int | None = AstroField(
description=(
"For simulated data products associated with a single point in simulation"
" phase space, the primary particle ID in PDGID format, e.g. photon=22,"
" electron=11."
),
default=None,
)
category: DataProcessingCategory | None = AstroField(
doc(DataProcessingCategory), default=None
)
data_source: ACADADataSource | None = AstroField(
description=doc(ACADADataSource), default=None, fits_keyword="DATASRC"
)
assembly_name: str | None = AstroField(
"For monitoring time-series, describes the assembly (group of monnitoring"
" points) associated with the data product.",
default=None,
)
@model_validator(mode="after")
def _check_lunar_cycle_id(self) -> Self:
"""Ensure if we have a lunar_cycle_id, we also have an observing_period_id."""
if self.lunar_cycle_id and not self.observing_period_id:
raise ValueError("A `lunar_cycle_id` requires an `observing_period_id`.")
return self
# zenith_id: int | None = AstroField(
# description=(
# "For simulated data products associated with a single point in simulation phase space,"
# "the zenith angle of simulated pointing, to nearest integer."
# ),
# unit="deg",
# ge=0,
# le=90,
# default=None,
# )
# azimuth_id: int | None = AstroField(
# description=(
# "For simulated data products associated with a single point in simulation phase space,"
# "the azimuth of simulated telescope pointing, to nearest integer."
# ),
# unit="deg",
# ge=0,
# le=360,
# default=None,
# )
# nsb_id: int | None = AstroField(
# description=(
# "For simulated data products associated with a single point in simulation phase space,"
# "the NSB level as multiple of the nominal dark value, to nearest integer."
# ),
# default=None,
# )
# source_cone_bin_id: int | None = AstroField(
# description=(
# "For simulated data products associated with a single point in simulation phase space,"
# "the view cone angle of simulated source (0=pointlike) to nearest integer."
# ),
# unit="deg",
# ge=0,
# le=180,
# default=None,
# )
# divergence_bin_id: float | None = AstroField(
# description=(
# "For simulated data products associated with a single point in simulation phase space,"
# "the divergence angle of telescopes."
# ),
# unit="deg",
# default=None,
# )