Source code for loom.etl.declarative.source._specs

"""Normalized source spec types — one frozen dataclass per source kind.

Internal module — import from :mod:`loom.etl.declarative.source`.

Each spec carries exactly the fields required by the compiler and backends.
Specs are produced by the builder ``_to_spec()`` methods and consumed by the
compiler and executor; they are never constructed directly in user code.
"""

from __future__ import annotations

from dataclasses import dataclass, field
from enum import StrEnum
from typing import Any

from loom.etl.declarative._format import Format
from loom.etl.declarative._read_options import ReadOptions
from loom.etl.declarative.expr._refs import TableRef
from loom.etl.schema._schema import ColumnSchema, LoomType


[docs] class SourceKind(StrEnum): """Physical kind of an ETL source.""" TABLE = "table" FILE = "file" TEMP = "temp"
@dataclass(frozen=True) class JsonColumnSpec: """Declares that a string column contains JSON to be decoded at read time. Produced by :meth:`~loom.etl.declarative.source.FromTable.parse_json` and :meth:`~loom.etl.declarative.source.FromFile.parse_json`. Consumed by backend readers — never constructed directly in user code. Args: column: Name of the string column that holds the JSON payload. loom_type: Target :data:`~loom.etl._schema.LoomType` to decode into. """ column: str loom_type: LoomType
[docs] @dataclass(frozen=True) class TableSourceSpec: """Normalized internal representation of a Delta table ETL source. Produced by :meth:`~loom.etl.declarative.source.FromTable._to_spec`. Consumed by the compiler and executor — never exposed in user code. Args: alias: Name matching the ``execute()`` parameter. table_ref: Logical table reference. predicates: Compiled predicate nodes from ``.where()``. columns: Column names to project at scan time. When non-empty, only these columns are read from storage — all other columns are discarded before the frame reaches ``execute()``. The projection is pushed down to the Parquet row-group scanner, reducing I/O. schema: Optional user-declared schema applied at read time via ``with_columns(cast(...))``; casts each declared column to its :class:`~loom.etl._schema.LoomDtype`. Extra columns in the source pass through untouched. json_columns: JSON decode specs applied at read time. """ alias: str table_ref: TableRef predicates: tuple[Any, ...] = field(default_factory=tuple) columns: tuple[str, ...] = field(default_factory=tuple) schema: tuple[ColumnSchema, ...] = field(default_factory=tuple) json_columns: tuple[JsonColumnSpec, ...] = field(default_factory=tuple) @property def kind(self) -> SourceKind: """Physical kind — always :attr:`SourceKind.TABLE`.""" return SourceKind.TABLE @property def format(self) -> Format: """I/O format — always :attr:`Format.DELTA` for table sources.""" return Format.DELTA
[docs] @dataclass(frozen=True) class FileSourceSpec: """Normalized internal representation of a file-based ETL source. Produced by :meth:`~loom.etl.declarative.source.FromFile._to_spec`. Consumed by the compiler and executor — never exposed in user code. Args: alias: Name matching the ``execute()`` parameter. path: Literal file path/template, or logical alias when ``is_alias=True``. format: I/O format (CSV, JSON, XLSX, Parquet). is_alias: When ``True``, *path* is a logical alias resolved via :class:`~loom.etl.storage.FileLocator` at runtime. Set automatically by :meth:`~loom.etl.FromFile.alias`. read_options: Format-specific read options set via ``.with_options()``. columns: Column names to project at scan time. When non-empty, only these columns are loaded from the file. schema: Optional user-declared schema applied at read time via column-level casts. json_columns: JSON decode specs applied at read time. """ alias: str path: str format: Format is_alias: bool = False read_options: ReadOptions | None = None columns: tuple[str, ...] = field(default_factory=tuple) schema: tuple[ColumnSchema, ...] = field(default_factory=tuple) json_columns: tuple[JsonColumnSpec, ...] = field(default_factory=tuple) @property def kind(self) -> SourceKind: """Physical kind — always :attr:`SourceKind.FILE`.""" return SourceKind.FILE
[docs] @dataclass(frozen=True) class TempSourceSpec: """Normalized internal representation of an intermediate (temp) ETL source. Produced by :meth:`~loom.etl.declarative.source.FromTemp._to_spec`. Consumed by the executor to retrieve data from :class:`~loom.etl.checkpoint.CheckpointStore`. Args: alias: Name matching the ``execute()`` parameter. temp_name: Logical intermediate name matching :class:`~loom.etl.IntoTemp`. """ alias: str temp_name: str @property def kind(self) -> SourceKind: """Physical kind — always :attr:`SourceKind.TEMP`.""" return SourceKind.TEMP
# Type alias — the union of all typed source spec variants. SourceSpec = TableSourceSpec | FileSourceSpec | TempSourceSpec