Source code for loom.etl.declarative._write_options

"""Per-target write options for file-based ETL targets.

Declared on :class:`~loom.etl.IntoFile` via ``.with_options()``::

    target = IntoFile("s3://exports/report.csv", format=Format.CSV)
        .with_options(CsvWriteOptions(separator=";"))

    target = IntoFile("s3://exports/data.parquet", format=Format.PARQUET)
        .with_options(ParquetWriteOptions(compression="zstd"))

Options are format-specific and forwarded to the backend writer at execution
time.  They are part of :class:`~loom.etl._target.TargetSpec` and therefore
visible to the compiler.

Each class exposes only the parameters that carry **semantic meaning at the
framework level** (dialect contract, storage codec).  Everything else is
forwarded verbatim via the ``kwargs`` escape hatch as a tuple of
``(key, value)`` pairs::

    ParquetWriteOptions(
        compression="zstd",
        kwargs=(("statistics", True), ("row_group_size", 100_000)),
    )

    CsvWriteOptions(
        separator=";",
        kwargs=(("datetime_format", "%Y-%m-%d"), ("null_value", "N/A")),
    )

For Polars targets, ``kwargs`` maps to keyword arguments of
``polars.DataFrame.write_csv / write_parquet / write_ndjson``.
For Spark targets, each pair becomes a ``DataFrameWriter.option(key, value)``
call — so values are always coerced to ``str`` by the Spark writer.

For Delta table targets, per-write options are controlled via the storage
locator (``writer:`` key in YAML) which supports ``WriterProperties``
forwarded verbatim to delta-rs.  Use the locator for Delta compression
settings, not this module.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Literal



[docs]
@dataclass(frozen=True)
class CsvWriteOptions:
    """Write options for CSV / TSV file targets.

    Args:
        separator:  Column delimiter character.  Defaults to ``","``.
        has_header: Whether to write a header row.  Defaults to ``True``.
        kwargs:     Backend-specific keyword arguments forwarded verbatim.
                    For Polars: mapped to ``polars.DataFrame.write_csv()``.
                    For Spark: applied via ``DataFrameWriter.option(k, v)``.

    Example::

        CsvWriteOptions(
            separator=";",
            kwargs=(("datetime_format", "%Y-%m-%d"), ("null_value", "N/A")),
        )
    """

    separator: str = ","
    has_header: bool = True
    kwargs: tuple[tuple[str, Any], ...] = ()




[docs]
@dataclass(frozen=True)
class ParquetWriteOptions:
    """Write options for Parquet file targets.

    Args:
        compression: Parquet compression codec.  Defaults to ``"zstd"``.
        kwargs:      Backend-specific keyword arguments forwarded verbatim.
                     For Polars: mapped to ``polars.DataFrame.write_parquet()``.
                     For Spark: applied via ``DataFrameWriter.option(k, v)``.

    Example::

        ParquetWriteOptions(
            compression="zstd",
            kwargs=(("statistics", True), ("row_group_size", 100_000)),
        )
    """

    compression: Literal["lz4", "uncompressed", "snappy", "gzip", "brotli", "zstd"] = "zstd"
    kwargs: tuple[tuple[str, Any], ...] = ()




[docs]
@dataclass(frozen=True)
class JsonWriteOptions:
    """Write options for JSON (NDJSON) file targets.

    Args:
        kwargs: Backend-specific keyword arguments forwarded verbatim.
                For Polars: mapped to ``polars.DataFrame.write_ndjson()``.
                For Spark: applied via ``DataFrameWriter.option(k, v)``.

    Example::

        JsonWriteOptions(kwargs=(("compression", "gzip"),))
    """

    kwargs: tuple[tuple[str, Any], ...] = ()



WriteOptions = CsvWriteOptions | ParquetWriteOptions | JsonWriteOptions
"""Union of all supported per-target file write option types."""