Source code for loom.etl.declarative._read_options

"""Per-source read options for file-based ETL sources.

Declared on :class:`~loom.etl.FromFile` via ``.with_options()``::

    sources = Sources(
        report=FromFile("s3://raw/export.csv", format=Format.CSV)
            .with_options(CsvReadOptions(separator=";", has_header=False)),
        events=FromFile("s3://raw/events.json", format=Format.JSON)
            .with_options(JsonReadOptions(infer_schema_length=None)),
    )

Options are format-specific and forwarded to the backend reader at execution
time.  They are part of :class:`~loom.etl._source.SourceSpec` and therefore
visible to the compiler.

All option classes are immutable frozen dataclasses.  They accept only the
subset of reader options that is useful at the ETL declaration level — full
backend-specific parameter bags are intentionally not exposed.
"""

from __future__ import annotations

from dataclasses import dataclass, field


[docs] @dataclass(frozen=True) class CsvReadOptions: """Read options for CSV / TSV file sources. All fields default to the most common CSV convention. Args: separator: Column delimiter character. Defaults to ``","``. has_header: Whether the first row is a header. Defaults to ``True``. null_values: Strings that should be interpreted as ``null``. encoding: File encoding. Defaults to ``"utf8"``. infer_schema_length: Number of rows used to infer column types. ``None`` scans the whole file. Defaults to ``100``. skip_rows: Number of rows to skip before reading. Useful for files with metadata preambles. """ separator: str = "," has_header: bool = True null_values: tuple[str, ...] = field(default_factory=tuple) encoding: str = "utf8" infer_schema_length: int | None = 100 skip_rows: int = 0
[docs] @dataclass(frozen=True) class JsonReadOptions: """Read options for newline-delimited JSON (NDJSON) file sources. Args: infer_schema_length: Number of rows used to infer column types. ``None`` reads the whole file for inference. Defaults to ``100``. """ infer_schema_length: int | None = 100
[docs] @dataclass(frozen=True) class ExcelReadOptions: """Read options for Excel (``.xlsx``) file sources. Args: sheet_name: Sheet to read by name. ``None`` reads the first sheet (default). has_header: Whether the first row is a header. Defaults to ``True``. """ sheet_name: str | None = None has_header: bool = True
[docs] @dataclass(frozen=True) class ParquetReadOptions: """Read options for Parquet file sources. Parquet is self-describing — schema and types are embedded in the file metadata. Use :meth:`~loom.etl.FromFile.with_schema` when you need to override or enforce a specific type mapping. """
ReadOptions = CsvReadOptions | JsonReadOptions | ExcelReadOptions | ParquetReadOptions """Union of all supported per-source file read option types."""