Source code for loom.etl.declarative._write_options

"""Per-target write options for file-based ETL targets.

Declared on :class:`~loom.etl.IntoFile` via ``.with_options()``::

    target = IntoFile("s3://exports/report.csv", format=Format.CSV)
        .with_options(CsvWriteOptions(separator=";"))

    target = IntoFile("s3://exports/data.parquet", format=Format.PARQUET)
        .with_options(ParquetWriteOptions(compression="zstd"))

Options are format-specific and forwarded to the backend writer at execution
time.  They are part of :class:`~loom.etl._target.TargetSpec` and therefore
visible to the compiler.

Each class exposes only the parameters that carry **semantic meaning at the
framework level** (dialect contract, storage codec).  Everything else is
forwarded verbatim via the ``kwargs`` escape hatch as a tuple of
``(key, value)`` pairs::

    ParquetWriteOptions(
        compression="zstd",
        kwargs=(("statistics", True), ("row_group_size", 100_000)),
    )

    CsvWriteOptions(
        separator=";",
        kwargs=(("datetime_format", "%Y-%m-%d"), ("null_value", "N/A")),
    )

For Polars targets, ``kwargs`` maps to keyword arguments of
``polars.DataFrame.write_csv / write_parquet / write_ndjson``.
For Spark targets, each pair becomes a ``DataFrameWriter.option(key, value)``
call — so values are always coerced to ``str`` by the Spark writer.

For Delta table targets, per-write options are controlled via the storage
locator (``writer:`` key in YAML) which supports ``WriterProperties``
forwarded verbatim to delta-rs.  Use the locator for Delta compression
settings, not this module.
"""

from __future__ import annotations

from dataclasses import dataclass
from typing import Any, Literal


[docs] @dataclass(frozen=True) class CsvWriteOptions: """Write options for CSV / TSV file targets. Args: separator: Column delimiter character. Defaults to ``","``. has_header: Whether to write a header row. Defaults to ``True``. kwargs: Backend-specific keyword arguments forwarded verbatim. For Polars: mapped to ``polars.DataFrame.write_csv()``. For Spark: applied via ``DataFrameWriter.option(k, v)``. Example:: CsvWriteOptions( separator=";", kwargs=(("datetime_format", "%Y-%m-%d"), ("null_value", "N/A")), ) """ separator: str = "," has_header: bool = True kwargs: tuple[tuple[str, Any], ...] = ()
[docs] @dataclass(frozen=True) class ParquetWriteOptions: """Write options for Parquet file targets. Args: compression: Parquet compression codec. Defaults to ``"zstd"``. kwargs: Backend-specific keyword arguments forwarded verbatim. For Polars: mapped to ``polars.DataFrame.write_parquet()``. For Spark: applied via ``DataFrameWriter.option(k, v)``. Example:: ParquetWriteOptions( compression="zstd", kwargs=(("statistics", True), ("row_group_size", 100_000)), ) """ compression: Literal["lz4", "uncompressed", "snappy", "gzip", "brotli", "zstd"] = "zstd" kwargs: tuple[tuple[str, Any], ...] = ()
[docs] @dataclass(frozen=True) class JsonWriteOptions: """Write options for JSON (NDJSON) file targets. Args: kwargs: Backend-specific keyword arguments forwarded verbatim. For Polars: mapped to ``polars.DataFrame.write_ndjson()``. For Spark: applied via ``DataFrameWriter.option(k, v)``. Example:: JsonWriteOptions(kwargs=(("compression", "gzip"),)) """ kwargs: tuple[tuple[str, Any], ...] = ()
WriteOptions = CsvWriteOptions | ParquetWriteOptions | JsonWriteOptions """Union of all supported per-target file write option types."""