extract.utils
get_data_item_format
def get_data_item_format(items: TDataItems) -> TDataItemFormat
Detect the format of the data item from items
.
Reverts to object
for empty lists
Returns:
The data file format.
resolve_column_value
def resolve_column_value(column_hint: TTableHintTemplate[TColumnNames],
item: TDataItem) -> Union[Any, List[Any]]
Extract values from the data item given a column hint. Returns either a single value or list of values when hint is a composite.
ensure_table_schema_columns
def ensure_table_schema_columns(
columns: TAnySchemaColumns) -> TTableSchemaColumns
Convert supported column schema types to a column dict which can be used in resource schema.
Arguments:
columns
- A dict of column schemas, a list of column schemas, or a pydantic model
ensure_table_schema_columns_hint
def ensure_table_schema_columns_hint(
columns: TTableHintTemplate[TAnySchemaColumns]
) -> TTableHintTemplate[TTableSchemaColumns]
Convert column schema hint to a hint returning TTableSchemaColumns
.
A callable hint is wrapped in another function which converts the original result.
reset_pipe_state
def reset_pipe_state(pipe: SupportsPipe,
source_state_: Optional[DictStrAny] = None) -> None
Resets the resource state for a pipe
and all its parent pipes
simulate_func_call
def simulate_func_call(
f: Union[Any, AnyFun], args_to_skip: int, *args: Any, **kwargs: Any
) -> Tuple[inspect.Signature, inspect.Signature, inspect.BoundArguments]
Simulates a call to a resource or transformer function before it will be wrapped for later execution in the pipe
Returns a tuple with a f
signature, modified signature in case of transformers and bound arguments
wrap_iterator
def wrap_iterator(gen: Iterator[TDataItems]) -> Iterator[TDataItems]
Wraps an iterator into a generator
wrap_async_iterator
def wrap_async_iterator(
gen: AsyncIterator[TDataItems]
) -> Generator[Awaitable[TDataItems], None, None]
Wraps an async generator into a list of awaitables
wrap_parallel_iterator
def wrap_parallel_iterator(f: TAnyFunOrGenerator) -> TAnyFunOrGenerator
Wraps a generator for parallel extraction
wrap_compat_transformer
def wrap_compat_transformer(name: str, f: AnyFun, sig: inspect.Signature,
*args: Any, **kwargs: Any) -> AnyFun
Creates a compatible wrapper over transformer function. A pure transformer function expects data item in first argument and one keyword argument called meta
wrap_resource_gen
def wrap_resource_gen(name: str, f: AnyFun, sig: inspect.Signature, *args: Any,
**kwargs: Any) -> AnyFun
Wraps a generator or generator function so it is evaluated on extraction