dlt.common.libs.deltalake
ensure_delta_compatible_arrow_schema
def ensure_delta_compatible_arrow_schema(
schema: pa.Schema,
partition_by: Optional[Union[List[str], str]] = None) -> pa.Schema
Returns Arrow schema compatible with Delta table format.
Casts schema to replace data types not supported by Delta.
ensure_delta_compatible_arrow_data
def ensure_delta_compatible_arrow_data(
data: Union[pa.Table, pa.RecordBatchReader],
partition_by: Optional[Union[List[str], str]] = None
) -> Union[pa.Table, pa.RecordBatchReader]
Returns Arrow data compatible with Delta table format.
Casts data schema to replace data types not supported by Delta.
get_delta_write_mode
def get_delta_write_mode(write_disposition: TWriteDisposition) -> str
Translates dlt write disposition to Delta write mode.
write_delta_table
def write_delta_table(
table_or_uri: Union[str, Path, DeltaTable],
data: Union[pa.Table, pa.RecordBatchReader],
write_disposition: TWriteDisposition,
partition_by: Optional[Union[List[str], str]] = None,
storage_options: Optional[Dict[str, str]] = None,
configuration: Optional[Mapping[str, Optional[str]]] = None) -> None
Writes in-memory Arrow data to on-disk Delta table.
Thin wrapper around deltalake.write_deltalake.
merge_delta_table
def merge_delta_table(table: DeltaTable, data: Union[pa.Table,
pa.RecordBatchReader],
schema: TTableSchema, load_table_name: str,
streamed_exec: bool) -> None
Merges in-memory Arrow data into on-disk Delta table.
get_delta_tables
def get_delta_tables(
pipeline: Pipeline,
*tables: str,
schema_name: str = None,
include_dlt_tables: bool = False) -> Dict[str, DeltaTable]
Returns Delta tables in pipeline.default_schema (default) or schema_name as deltalake.DeltaTable objects.
Returned object is a dictionary with table names as keys and DeltaTable objects as values.
Optionally filters dictionary by table names specified as *tables*.
Raises ValueError if table name specified as *tables is not found. You may try to switch to other
schemas via schema_name argument.
deltalake_storage_options
def deltalake_storage_options(
config: FilesystemConfiguration) -> Dict[str, str]
Returns dict that can be passed as storage_options in deltalake library.
evolve_delta_table_schema
def evolve_delta_table_schema(delta_table: DeltaTable,
arrow_schema: pa.Schema) -> DeltaTable
Evolves delta_table schema if different from arrow_schema.
We compare fields via names. Actual types and nullability are ignored. This is how schemas are evolved for other destinations. Existing columns are never modified. Variant columns are created.
Adds column(s) to delta_table present in arrow_schema but not in delta_table.