dlt.dataset.dataset
Dataset Objects
class Dataset()
Access to dataframes and arrow tables in the destination dataset via dbapi
ibis
def ibis(read_only: bool = False) -> IbisBackend
Get an ibis backend for the dataset.
This creates a connection to the destination.
The read_only
flag is currently only supported for duckdb destination.
schema
@property
def schema() -> dlt.Schema
dlt schema associated with the dataset.
If no provided at dataset initialization, it is fetched from the destination. Fallbacks to local dlt pipeline metadata.
tables
@property
def tables() -> list[str]
List of table names found in the dataset.
This only includes "completed tables". In other words, during the lifetime of a pipeline.run()
execution, tables may exist on the destination, but will only appear on the dataset once
pipeline.run()
is done.
_ipython_key_completions_
def _ipython_key_completions_() -> list[str]
Provide table names as completion suggestion in interactive environments.
sqlglot_schema
@property
def sqlglot_schema() -> SQLGlotSchema
SQLGlot schema of the dataset derived from the dlt schema.
destination_dialect
@property
def destination_dialect() -> TSqlGlotDialect
SQLGlot dialect of the dataset destination.
This is the target dialect when transpiling SQL queries.
dataset_name
@property
def dataset_name() -> str
Name of the dataset
is_same_physical_destination
def is_same_physical_destination(other: dlt.Dataset) -> bool
Returns true if the other dataset is on the same physical destination helpful if we want to run sql queries without extracting the data
query
def query(query: Union[str, sge.Select, ir.Expr],
query_dialect: Optional[TSqlGlotDialect] = None,
*,
_execute_raw_query: bool = False) -> dlt.Relation
Create a dlt.Relation
from an SQL query, SQLGlot expression or Ibis expression.
Arguments:
query
Union[str, sge.Select, ir.Expr] - The query that defines the relation.query_dialect
Optional[TSqlGlotDialect] - The dialect of the query. If specified, it will be used to transpile the query to the destination's dialect. Otherwise, the query is assumed to be the destination's dialect (accessible viaDataset.sqlglot_dialect
)
Returns:
dlt.Relation
- The relation for the query
__call__
def __call__(query: Union[str, sge.Select, ir.Expr],
query_dialect: Optional[TSqlGlotDialect] = None,
*,
_execute_raw_query: bool = False) -> dlt.Relation
Convenience method to proxy Dataset.query()
. See this method for details.
table
def table(
table_name: str,
table_type: Literal["relation", "ibis"] = "relation"
) -> Union[dlt.Relation, ir.Table]
Get a dlt.Relation
associated with a table from the dataset.
row_counts
def row_counts(*,
data_tables: bool = True,
dlt_tables: bool = False,
table_names: Optional[list[str]] = None,
load_id: Optional[str] = None) -> dlt.Relation
Create a dlt.Relation
with the query to get the row counts of all tables in the dataset.
Arguments:
data_tables
bool - Whether to include data tables. Defaults to True.dlt_tables
bool - Whether to include dlt tables. Defaults to False.table_names
Optional[list[str]] - The names of the tables to include. Defaults to None. Will override data_tables and dlt_tables if setload_id
Optional[str] - If set, only count rows associated with a given load id. Will exclude tables that do not have a load id.
Returns:
dlt.Relation
- Relation for the query that computes the requested row count.
__getitem__
def __getitem__(table_name: str) -> dlt.Relation
Get a dlt.Relation
for a table via dictionary notation.
This proxies Dataset.table()
.
__getattr__
def __getattr__(name: str) -> Any
Get a dlt.Relation
for a table via dictionary notation.
This proxies Dataset.table()
.
__enter__
def __enter__() -> Self
Context manager to keep the connection to the destination open between queries
__exit__
def __exit__(exc_type: Type[BaseException], exc_val: BaseException,
exc_tb: TracebackType) -> None
Context manager to keep the connection to the destination open between queries
is_same_physical_destination
def is_same_physical_destination(dataset1: dlt.Dataset,
dataset2: dlt.Dataset) -> bool
Check if both datasets are at the same physical destination.
This is done by comparing the fingerprint of both destination configs. There are potential false positive if two different config give access to the same destination.