Source code for astro.pipeline.models

"""Pipeline configuration models."""

from __future__ import annotations

import re
from dataclasses import dataclass
from enum import StrEnum

import pandera.polars as pa

_SAFE_NAME_PATTERN = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]*$")



[docs]
class ExecutionMode(StrEnum):
    """Ingest concurrency mode."""

    SERIAL = "serial"
    PARALLEL = "parallel"




[docs]
class StepExecutionMode(StrEnum):
    """Run-step scheduling mode within a single pipeline run."""

    SERIAL = "serial"
    PARALLEL = "parallel"




[docs]
@dataclass(frozen=True)
class IngestFileSpec:
    """Expected source file and Pandera schema for CLI ingest."""

    name: str
    source_pattern: str
    schema: pa.DataFrameSchema
    encoding: str = "utf-8"
    has_header: bool = True
    column_names: tuple[str, ...] | None = None

    def __post_init__(self) -> None:
        if not self.name:
            raise ValueError("Ingest file name must not be empty.")
        if not _SAFE_NAME_PATTERN.match(self.name):
            raise ValueError(
                "Ingest file name must be alphanumeric and may contain '.', '_', or '-'."
            )
        if not self.source_pattern:
            raise ValueError("Ingest source_pattern must not be empty.")
        if not self.encoding:
            raise ValueError("Ingest encoding must not be empty.")
        if not self.has_header and not self.column_names:
            raise ValueError("Headerless ingest requires column_names.")