`graflo.architecture.contract.ingestion`¶

Declarative ingestion contract: resources, transforms, and ingestion model.

`DressConfig` ¶

Bases: ConfigBaseModel

Output dressing specification for pivoted transforms.

When a transform function returns a single scalar (e.g. round_str returns 6.43), DressConfig describes how to package that scalar together with the input field name into a dict.

Attributes:

Name	Type	Description
`key`	`str`	Output field that receives the input field name (e.g. "Open").
`value`	`str`	Output field that receives the function result (e.g. 6.43).

Source code in graflo/architecture/contract/ingestion/transform.py

class DressConfig(ConfigBaseModel):
    """Output dressing specification for pivoted transforms.

    When a transform function returns a single scalar (e.g. ``round_str``
    returns ``6.43``), DressConfig describes how to package that scalar together
    with the input field name into a dict.

    Attributes:
        key: Output field that receives the **input field name** (e.g. "Open").
        value: Output field that receives the **function result** (e.g. 6.43).
    """

    key: str = Field(description="Output field name for the input key.")
    value: str = Field(description="Output field name for the function result.")

`EdgeInferSpec` ¶

Bases: ConfigBaseModel

Selector for controlling inferred edge emission.

Source code in graflo/architecture/contract/ingestion/resource.py

class EdgeInferSpec(ConfigBaseModel):
    """Selector for controlling inferred edge emission."""

    source: str = PydanticField(..., description="Edge source vertex name.")
    target: str = PydanticField(..., description="Edge target vertex name.")
    relation: str | None = PydanticField(
        default=None,
        description=(
            "Optional relation discriminator. If omitted, selector applies to all relations "
            "for (source, target)."
        ),
    )

    @property
    def edge_id(self) -> EdgeId:
        return self.source, self.target, self.relation

    def matches(self, edge_id: EdgeId) -> bool:
        source, target, relation = edge_id
        return (
            self.source == source
            and self.target == target
            and (self.relation is None or self.relation == relation)
        )

`IngestionModel` ¶

Bases: ConfigBaseModel

Ingestion model (C): resources and transform registry.

Source code in graflo/architecture/contract/ingestion/model.py

class IngestionModel(ConfigBaseModel):
    """Ingestion model (C): resources and transform registry."""

    edges_on_duplicate: Literal["ignore", "upsert"] = PydanticField(
        default="ignore",
        description=(
            "How batch edge writes tolerate an already-matching edge. Passed through to "
            ":meth:`~graflo.db.conn.Connection.insert_edges_batch` where the target backend "
            "supports it."
        ),
    )
    resources: list[ResourceConfig] = PydanticField(
        default_factory=list,
        description="List of resource definitions (data pipelines mapping to vertices/edges).",
    )
    transforms: list[ProtoTransform] = PydanticField(
        default_factory=list,
        description="List of named transforms available to resources.",
    )

    _resources: dict[str, ResourceConfig] = PrivateAttr()
    _runtimes: dict[str, ResourceRuntime] = PrivateAttr(default_factory=dict)
    _transforms: dict[str, ProtoTransform] = PrivateAttr(default_factory=dict)
    _combined_edge_derivation: EdgeDerivationRegistry = PrivateAttr(
        default_factory=EdgeDerivationRegistry
    )

    @model_validator(mode="after")
    def _init_model(self) -> IngestionModel:
        """Build transform and resource lookup maps."""
        self._rebuild_config_state()
        return self

    def _rebuild_resource_map(self) -> None:
        """Validate resource name uniqueness and refresh lookup map."""
        names = [r.name for r in self.resources]
        c = Counter(names)
        for k, v in c.items():
            if v > 1:
                raise ValueError(f"resource name {k} used {v} times")
        object.__setattr__(self, "_resources", {r.name: r for r in self.resources})

    def _rebuild_transform_map(self) -> None:
        """Validate transform names and refresh name lookup map."""
        missing_names = [idx for idx, t in enumerate(self.transforms) if not t.name]
        if missing_names:
            raise ValueError(
                "All ingestion transforms must define a non-empty name. "
                f"Missing at indexes: {missing_names}"
            )

        transform_names = [t.name for t in self.transforms if t.name is not None]
        name_counts = Counter(transform_names)
        duplicates = sorted([name for name, count in name_counts.items() if count > 1])
        if duplicates:
            raise ValueError(f"Duplicate ingestion transform names found: {duplicates}")

        object.__setattr__(
            self,
            "_transforms",
            {t.name: t for t in self.transforms if t.name is not None},
        )

    def finish_init(
        self,
        core_schema: CoreSchema,
        *,
        strict_references: bool = False,
        dynamic_edge_feedback: bool = False,
        allowed_vertex_names: set[str] | None = None,
        target_db_flavor: DBType | None = None,
    ) -> None:
        """Build per-resource runtimes against graph model and transform library."""
        self._rebuild_config_state()
        runtimes: dict[str, ResourceRuntime] = {}
        for config in self.resources:
            runtimes[config.name] = ResourceRuntime(
                config,
                vertex_config=core_schema.vertex_config,
                edge_config=core_schema.edge_config,
                transforms=self._transforms,
                strict_references=strict_references,
                dynamic_edge_feedback=dynamic_edge_feedback,
                allowed_vertex_names=allowed_vertex_names,
                target_db_flavor=target_db_flavor,
            )
        object.__setattr__(self, "_runtimes", runtimes)

    def _rebuild_config_state(self) -> None:
        """Rebuild transform and resource lookup maps."""
        self._rebuild_transform_map()
        self._rebuild_resource_map()

    def fetch_resource(self, name: str | None = None) -> ResourceRuntime:
        """Fetch an initialized runtime resource by name."""
        if name is not None:
            runtime = self._runtimes.get(name)
            if runtime is None:
                raise ValueError(f"Resource {name} not found")
            return runtime
        if self._runtimes:
            return next(iter(self._runtimes.values()))
        if self.resources:
            raise RuntimeError(
                "IngestionModel resources exist but runtimes were not built; "
                "call finish_init() first."
            )
        raise ValueError("Empty resource container :(")

    def fetch_resource_config(self, name: str) -> ResourceConfig:
        """Fetch declarative resource config by name."""
        config = self._resources.get(name)
        if config is None:
            raise ValueError(f"Resource {name} not found")
        return config

    def prune_to_graph(
        self, core_schema: CoreSchema, disconnected: set[str] | None = None
    ) -> None:
        """Drop resource actors that reference disconnected vertices."""
        if disconnected is None:
            disconnected = (
                core_schema.vertex_config.vertex_set - core_schema.edge_config.vertices
            )
        if not disconnected:
            return

        def _mentions_disconnected(wrapper: ActorWrapper) -> bool:
            return bool(wrapper.actor.references_vertices() & disconnected)

        to_drop: list[ResourceConfig] = []
        for resource_config in self.resources:
            root = ActorWrapper(*resource_config.pipeline)
            if _mentions_disconnected(root):
                to_drop.append(resource_config)
                continue
            root.remove_descendants_if(_mentions_disconnected)
            if not any(a.references_vertices() for a in root.collect_actors()):
                to_drop.append(resource_config)

        for dropped in to_drop:
            self.resources.remove(dropped)
            self._resources.pop(dropped.name, None)
            self._runtimes.pop(dropped.name, None)
        if to_drop:
            self._rebuild_config_state()

`fetch_resource(name=None)` ¶

Fetch an initialized runtime resource by name.

Source code in graflo/architecture/contract/ingestion/model.py

def fetch_resource(self, name: str | None = None) -> ResourceRuntime:
    """Fetch an initialized runtime resource by name."""
    if name is not None:
        runtime = self._runtimes.get(name)
        if runtime is None:
            raise ValueError(f"Resource {name} not found")
        return runtime
    if self._runtimes:
        return next(iter(self._runtimes.values()))
    if self.resources:
        raise RuntimeError(
            "IngestionModel resources exist but runtimes were not built; "
            "call finish_init() first."
        )
    raise ValueError("Empty resource container :(")

`fetch_resource_config(name)` ¶

Fetch declarative resource config by name.

Source code in graflo/architecture/contract/ingestion/model.py

def fetch_resource_config(self, name: str) -> ResourceConfig:
    """Fetch declarative resource config by name."""
    config = self._resources.get(name)
    if config is None:
        raise ValueError(f"Resource {name} not found")
    return config

`finish_init(core_schema, *, strict_references=False, dynamic_edge_feedback=False, allowed_vertex_names=None, target_db_flavor=None)` ¶

Build per-resource runtimes against graph model and transform library.

Source code in graflo/architecture/contract/ingestion/model.py

def finish_init(
    self,
    core_schema: CoreSchema,
    *,
    strict_references: bool = False,
    dynamic_edge_feedback: bool = False,
    allowed_vertex_names: set[str] | None = None,
    target_db_flavor: DBType | None = None,
) -> None:
    """Build per-resource runtimes against graph model and transform library."""
    self._rebuild_config_state()
    runtimes: dict[str, ResourceRuntime] = {}
    for config in self.resources:
        runtimes[config.name] = ResourceRuntime(
            config,
            vertex_config=core_schema.vertex_config,
            edge_config=core_schema.edge_config,
            transforms=self._transforms,
            strict_references=strict_references,
            dynamic_edge_feedback=dynamic_edge_feedback,
            allowed_vertex_names=allowed_vertex_names,
            target_db_flavor=target_db_flavor,
        )
    object.__setattr__(self, "_runtimes", runtimes)

`prune_to_graph(core_schema, disconnected=None)` ¶

Drop resource actors that reference disconnected vertices.

Source code in graflo/architecture/contract/ingestion/model.py

def prune_to_graph(
    self, core_schema: CoreSchema, disconnected: set[str] | None = None
) -> None:
    """Drop resource actors that reference disconnected vertices."""
    if disconnected is None:
        disconnected = (
            core_schema.vertex_config.vertex_set - core_schema.edge_config.vertices
        )
    if not disconnected:
        return

    def _mentions_disconnected(wrapper: ActorWrapper) -> bool:
        return bool(wrapper.actor.references_vertices() & disconnected)

    to_drop: list[ResourceConfig] = []
    for resource_config in self.resources:
        root = ActorWrapper(*resource_config.pipeline)
        if _mentions_disconnected(root):
            to_drop.append(resource_config)
            continue
        root.remove_descendants_if(_mentions_disconnected)
        if not any(a.references_vertices() for a in root.collect_actors()):
            to_drop.append(resource_config)

    for dropped in to_drop:
        self.resources.remove(dropped)
        self._resources.pop(dropped.name, None)
        self._runtimes.pop(dropped.name, None)
    if to_drop:
        self._rebuild_config_state()

`KeySelectionConfig` ¶

Bases: ConfigBaseModel

Selection of document keys for key-target transforms.

Source code in graflo/architecture/contract/ingestion/transform.py

class KeySelectionConfig(ConfigBaseModel):
    """Selection of document keys for key-target transforms."""

    mode: Literal["all", "include", "exclude"] = Field(
        default="all",
        description=(
            "How keys are selected for target='keys': all=all keys, "
            "include=only specified keys, exclude=all except specified keys."
        ),
    )
    names: tuple[str, ...] = Field(
        default_factory=tuple,
        description="Keys used by include/exclude modes.",
    )

    @model_validator(mode="before")
    @classmethod
    def _normalize_names(cls, data: Any) -> Any:
        if not isinstance(data, dict):
            return data
        data = dict(data)
        names = data.get("names")
        if isinstance(names, str):
            data["names"] = (names,)
        elif isinstance(names, list):
            data["names"] = tuple(names)
        elif names is None:
            data["names"] = ()
        return data

    @model_validator(mode="after")
    def _validate_mode_names(self) -> Self:
        if self.mode == "all" and self.names:
            raise ValueError("keys.names must be empty when keys.mode='all'.")
        if self.mode in {"include", "exclude"} and not self.names:
            raise ValueError(
                "keys.names must be provided when keys.mode is include/exclude."
            )
        return self

`ProtoTransform` ¶

Bases: ConfigBaseModel

Base class for transform definitions.

This class provides the foundation for data transformations, supporting both functional transformations and declarative mappings.

Attributes:

Name	Type	Description
`name`	`str \| None`	Optional name of the transform
`module`	`str \| None`	Optional module containing the transform function
`params`	`dict[str, Any]`	Dictionary of transform parameters
`foo`	`str \| None`	Optional name of the transform function
`input`	`tuple[str, ...]`	Tuple of input field names
`output`	`tuple[str, ...]`	Tuple of output field names
`dress`	`DressConfig \| None`	Optional pivot dressing for scalar functional results
`target`	`Literal['values', 'keys']`	Whether to transform field values or document keys
`keys`	`KeySelectionConfig`	Key selection when target is keys
`_foo`	`Any`	Internal reference to the transform function

Source code in graflo/architecture/contract/ingestion/transform.py

class ProtoTransform(ConfigBaseModel):
    """Base class for transform definitions.

    This class provides the foundation for data transformations, supporting both
    functional transformations and declarative mappings.

    Attributes:
        name: Optional name of the transform
        module: Optional module containing the transform function
        params: Dictionary of transform parameters
        foo: Optional name of the transform function
        input: Tuple of input field names
        output: Tuple of output field names
        dress: Optional pivot dressing for scalar functional results
        target: Whether to transform field values or document keys
        keys: Key selection when target is keys
        _foo: Internal reference to the transform function
    """

    name: str | None = Field(
        default=None,
        description="Optional name for this transform (e.g. for reference in ingestion_model.transforms).",
    )
    module: str | None = Field(
        default=None,
        description="Python module path containing the transform function (e.g. my_package.transforms).",
    )
    params: dict[str, Any] = Field(
        default_factory=dict,
        description="Extra parameters passed to the transform function at runtime.",
    )
    foo: str | None = Field(
        default=None,
        description="Name of the callable in module to use as the transform function.",
    )
    input: tuple[str, ...] = Field(
        default_factory=tuple,
        description="Input field names passed to the transform function.",
    )
    output: tuple[str, ...] = Field(
        default_factory=tuple,
        description="Output field names produced by the transform (defaults to input if unset).",
    )
    input_groups: tuple[tuple[str, ...], ...] = Field(
        default_factory=tuple,
        description=(
            "Explicit groups of input fields for repeated tuple-style value calls."
        ),
    )
    output_groups: tuple[tuple[str, ...], ...] = Field(
        default_factory=tuple,
        description=(
            "Explicit output field groups aligned with input_groups for grouped value calls."
        ),
    )
    dress: DressConfig | None = Field(
        default=None,
        description=(
            "Dressing spec for pivoted output. Applies to ingestion_model.transforms "
            "entries and to inline transform steps. "
            "dress.key receives the input field name, dress.value receives the "
            "function result. E.g. dress={key: name, value: value} with "
            "input=(Open,) produces {name: 'Open', value: <result>}."
        ),
    )
    target: Literal["values", "keys"] = Field(
        default="values",
        description=(
            "Transform target. values=apply function to input values; "
            "keys=apply function to selected document keys."
        ),
    )
    keys: KeySelectionConfig = Field(
        default_factory=KeySelectionConfig,
        description="Key selection for key-target transforms.",
    )

    _foo: Any = PrivateAttr(default=None)

    @model_validator(mode="before")
    @classmethod
    def _normalize_input_output(cls, data: Any) -> Any:
        if not isinstance(data, dict):
            return data
        data = dict(data)
        if "dress" in data and isinstance(data["dress"], (list, tuple)):
            raise ValueError(
                "List-style `dress` is no longer supported. "
                "Use a dict: dress={key: ..., value: ...}."
            )
        for key in ("input", "output"):
            if key in data:
                if data[key] is not None:
                    data[key] = _tuple_it(data[key])
                else:
                    data[key] = ()
        for key in ("input_groups", "output_groups"):
            if key in data:
                if data[key] is None:
                    data[key] = ()
                else:
                    data[key] = _tuple_groups_it(data[key])
        _normalize_keys_in_dict(data)
        return data

    @model_validator(mode="after")
    def _init_foo_and_output(self) -> Self:
        if self.module is not None and self.foo is not None:
            try:
                _module = importlib.import_module(self.module)
            except Exception as e:
                raise TypeError(f"Provided module {self.module} is not valid: {e}")
            try:
                object.__setattr__(self, "_foo", getattr(_module, self.foo))
            except Exception as e:
                raise ValueError(
                    f"Could not instantiate transform function. Exception: {e}"
                )
        if self.dress is not None:
            if self.target == "keys":
                raise ValueError("target='keys' is not compatible with dress.")
            object.__setattr__(self, "output", (self.dress.key, self.dress.value))
        elif not self.output and self.input:
            object.__setattr__(self, "output", self.input)
        return self

    @classmethod
    def get_fields_members(cls) -> list[str]:
        """Get list of field members (public model fields)."""
        return list(cls.model_fields.keys())

    def apply(self, *args: Any, **kwargs: Any) -> Any:
        """Apply the raw transform function to the given arguments.

        This is the core function invocation without any input extraction or
        output dressing — purely ``self._foo(*args, **kwargs, **self.params)``.

        Raises:
            TransformException: If no transform function has been set.
        """
        if self._foo is None:
            raise TransformException("No transform function set")
        return self._foo(*args, **kwargs, **self.params)

    def __lt__(self, other: object) -> bool:
        """Compare transforms for ordering.

        Args:
            other: Other transform to compare with

        Returns:
            bool: True if this transform should be ordered before other
        """
        if not isinstance(other, ProtoTransform):
            return NotImplemented
        if self._foo is None and other._foo is not None:
            return True
        return False

`lt(other)` ¶

Compare transforms for ordering.

Parameters:

Name	Type	Description	Default
`other`	`object`	Other transform to compare with	required

Returns:

Name	Type	Description
`bool`	`bool`	True if this transform should be ordered before other

Source code in graflo/architecture/contract/ingestion/transform.py

def __lt__(self, other: object) -> bool:
    """Compare transforms for ordering.

    Args:
        other: Other transform to compare with

    Returns:
        bool: True if this transform should be ordered before other
    """
    if not isinstance(other, ProtoTransform):
        return NotImplemented
    if self._foo is None and other._foo is not None:
        return True
    return False

`apply(*args, **kwargs)` ¶

Apply the raw transform function to the given arguments.

This is the core function invocation without any input extraction or output dressing — purely self._foo(*args, **kwargs, **self.params).

Raises:

Type	Description
`TransformException`	If no transform function has been set.

Source code in graflo/architecture/contract/ingestion/transform.py

def apply(self, *args: Any, **kwargs: Any) -> Any:
    """Apply the raw transform function to the given arguments.

    This is the core function invocation without any input extraction or
    output dressing — purely ``self._foo(*args, **kwargs, **self.params)``.

    Raises:
        TransformException: If no transform function has been set.
    """
    if self._foo is None:
        raise TransformException("No transform function set")
    return self._foo(*args, **kwargs, **self.params)

`get_fields_members()` `classmethod` ¶

Get list of field members (public model fields).

Source code in graflo/architecture/contract/ingestion/transform.py

@classmethod
def get_fields_members(cls) -> list[str]:
    """Get list of field members (public model fields)."""
    return list(cls.model_fields.keys())

`ResourceConfig` ¶

Bases: ConfigBaseModel

Declarative resource definition (serializable contract).

Source code in graflo/architecture/contract/ingestion/resource.py

class ResourceConfig(ConfigBaseModel):
    """Declarative resource definition (serializable contract)."""

    model_config = {"extra": "forbid"}

    name: str = PydanticField(
        ...,
        description="Name of the resource (e.g. table or file identifier).",
    )
    pipeline: list[dict[str, Any]] = PydanticField(
        ...,
        description="Pipeline of actor steps to apply in sequence (vertex, edge, transform, descend). "
        'Each step is a dict, e.g. {"vertex": "user"} or {"edge": {"from": "a", "to": "b"}}.',
        validation_alias=AliasChoices("pipeline", "apply"),
    )
    encoding: EncodingType = PydanticField(
        default=EncodingType.UTF_8,
        description="Character encoding for input/output (e.g. utf-8, ISO-8859-1).",
    )
    merge_collections: list[str] = PydanticField(
        default_factory=list,
        description="List of collection names to merge when writing to the graph.",
    )
    extra_weights: list[ResourceExtraWeightEntry] = PydanticField(
        default_factory=list,
        description="Additional edge attribute / vertex-weight enrichment for this resource.",
    )
    types: dict[str, str] = PydanticField(
        default_factory=dict,
        description='Field name to Python type expression for casting (e.g. {"amount": "float"}).',
    )
    infer_edges: bool = PydanticField(
        default=True,
        description=(
            "If True, infer edges from current vertex population. "
            "If False, emit only edges explicitly declared as edge actors in the pipeline."
        ),
    )
    infer_edge_only: list[EdgeInferSpec] = PydanticField(
        default_factory=list,
        description=(
            "Optional allow-list for inferred edges. Applies only to inferred (greedy) edges, "
            "not explicit edge actors."
        ),
    )
    infer_edge_except: list[EdgeInferSpec] = PydanticField(
        default_factory=list,
        description=(
            "Optional deny-list for inferred edges. Applies only to inferred (greedy) edges, "
            "not explicit edge actors."
        ),
    )
    drop_trivial_input_fields: bool = PydanticField(
        default=False,
        description=(
            "If True, remove top-level input keys whose value is None or the empty string before "
            "the actor pipeline runs."
        ),
    )
    fail_fast: bool = PydanticField(
        default=False,
        description=(
            "If True, a transform step fails when required input keys are missing in the "
            "current document (rename: all source keys must be present; call: all input keys). "
            "If False (default), rename applies only to keys present in the document and "
            "functional transforms skip the step when inputs are missing."
        ),
    )
    tolerate_transform_errors: bool = PydanticField(
        default=True,
        description=(
            "If True, a failing transform step sets its declared output fields to None, "
            "records the error, and continues the pipeline."
        ),
    )

    @model_validator(mode="after")
    def _validate_policy(self) -> ResourceConfig:
        if self.infer_edge_only and self.infer_edge_except:
            raise ValueError(
                "Resource infer_edge_only and infer_edge_except are mutually exclusive."
            )
        return self

    def collect_vertex_names(self) -> set[str]:
        """Vertex types referenced by this resource (pipeline and related config)."""
        names = collect_vertex_names_from_pipeline(self.pipeline)
        names.update(self.merge_collections)
        for spec in self.infer_edge_only:
            names.add(spec.source)
            names.add(spec.target)
        for spec in self.infer_edge_except:
            names.add(spec.source)
            names.add(spec.target)
        for entry in self.extra_weights:
            names.add(entry.edge.source)
            names.add(entry.edge.target)
            for weight in entry.vertex_weights:
                if weight.name is not None:
                    names.add(weight.name)
        return names

    def pipeline_actor_count(self) -> int:
        """Count actors in the pipeline without binding schema context."""
        from graflo.architecture.pipeline.runtime.actor import ActorWrapper

        return ActorWrapper(*self.pipeline).count()

`collect_vertex_names()` ¶

Vertex types referenced by this resource (pipeline and related config).

Source code in graflo/architecture/contract/ingestion/resource.py

def collect_vertex_names(self) -> set[str]:
    """Vertex types referenced by this resource (pipeline and related config)."""
    names = collect_vertex_names_from_pipeline(self.pipeline)
    names.update(self.merge_collections)
    for spec in self.infer_edge_only:
        names.add(spec.source)
        names.add(spec.target)
    for spec in self.infer_edge_except:
        names.add(spec.source)
        names.add(spec.target)
    for entry in self.extra_weights:
        names.add(entry.edge.source)
        names.add(entry.edge.target)
        for weight in entry.vertex_weights:
            if weight.name is not None:
                names.add(weight.name)
    return names

`pipeline_actor_count()` ¶

Count actors in the pipeline without binding schema context.

Source code in graflo/architecture/contract/ingestion/resource.py

def pipeline_actor_count(self) -> int:
    """Count actors in the pipeline without binding schema context."""
    from graflo.architecture.pipeline.runtime.actor import ActorWrapper

    return ActorWrapper(*self.pipeline).count()

`ResourceExtraWeightEntry` ¶

Bases: ConfigBaseModel

Schema edge plus optional vertex-derived weight rules for DB enrichment.

Source code in graflo/architecture/contract/ingestion/resource.py

class ResourceExtraWeightEntry(ConfigBaseModel):
    """Schema edge plus optional vertex-derived weight rules for DB enrichment."""

    edge: Edge
    vertex_weights: list[Weight] = PydanticField(default_factory=list)

    @model_validator(mode="before")
    @classmethod
    def _from_yaml(cls, data: Any) -> Any:
        if data is None:
            return data
        if isinstance(data, Edge):
            return {"edge": data, "vertex_weights": []}
        if not isinstance(data, dict):
            raise TypeError(
                f"extra_weights item must be dict or Edge, got {type(data)}"
            )
        d = dict(data)
        vw_raw = d.pop("vertex_weights", None) or []
        if not isinstance(vw_raw, list):
            vw_raw = [vw_raw]
        v_w = [Weight.model_validate(x) for x in vw_raw]
        if "edge" in d and isinstance(d["edge"], dict):
            edge = Edge.model_validate(dict(d.pop("edge")))
            if d:
                raise ValueError(
                    f"extra_weights entry has unexpected keys with 'edge': {sorted(d)}"
                )
            return {"edge": edge, "vertex_weights": v_w}
        edge = Edge.model_validate(d)
        return {"edge": edge, "vertex_weights": v_w}

`ResourceRuntime` ¶

Fully initialized resource executor for document casting.

Source code in graflo/architecture/contract/runtime/resource.py

class ResourceRuntime:
    """Fully initialized resource executor for document casting."""

    def __init__(
        self,
        config: ResourceConfig,
        vertex_config: VertexConfig,
        edge_config: EdgeConfig,
        transforms: dict[str, ProtoTransform],
        *,
        strict_references: bool = False,
        dynamic_edge_feedback: bool = False,
        allowed_vertex_names: set[str] | None = None,
        target_db_flavor: DBType | None = None,
    ) -> None:
        self.config = config
        self._type_casters = resolve_type_casters(config.types)
        self._root = ActorWrapper(*config.pipeline)
        self._executor = ActorExecutor(self._root)

        runtime_vertex_config, local_edge_config = self._filter_vertex_edge_configs(
            vertex_config,
            edge_config,
            allowed_vertex_names=allowed_vertex_names,
        )
        self._vertex_config = runtime_vertex_config
        self._edge_config = local_edge_config

        self._validate_vertex_references(vertex_config)
        self._validate_infer_edge_spec_targets(self._edge_config)

        edge_derivation_registry = EdgeDerivationRegistry()
        self._edge_derivation_registry = edge_derivation_registry

        infer_edge_except = self._build_infer_except()
        init_ctx = self._build_init_context(
            transforms=transforms,
            edge_derivation=edge_derivation_registry,
            infer_edge_except=infer_edge_except,
            strict_references=strict_references,
            allowed_vertex_names=allowed_vertex_names,
            target_db_flavor=target_db_flavor,
        )
        logger.debug("total resource actor count : %s", self._root.count())
        self._root.finish_init(init_ctx=init_ctx)

        if dynamic_edge_feedback:
            self._propagate_dynamic_edges(edge_config, vertex_config=vertex_config)

        logger.debug("total resource actor count (after init): %s", self._root.count())
        self._init_extra_weights(vertex_config)

    @property
    def name(self) -> str:
        return self.config.name

    @property
    def vertex_config(self) -> VertexConfig:
        return self._vertex_config

    @property
    def edge_config(self) -> EdgeConfig:
        return self._edge_config

    @property
    def root(self) -> ActorWrapper:
        return self._root

    @property
    def type_casters(self) -> dict[str, Callable[..., Any]]:
        return self._type_casters

    def collect_vertex_names(self) -> set[str]:
        return self.config.collect_vertex_names()

    def count(self) -> int:
        return self._root.count()

    @staticmethod
    def edge_ids_from_pipeline(pipeline: list[dict[str, Any]]) -> set[EdgeId]:
        """Collect (source, target, None) for every static EdgeActor in *pipeline*."""
        root = ActorWrapper(*pipeline)
        edge_actors = [a for a in root.collect_actors() if isinstance(a, EdgeActor)]
        return {
            (ea.edge.source, ea.edge.target, None)
            for ea in edge_actors
            if ea.edge is not None
        }

    def _filter_vertex_edge_configs(
        self,
        vertex_config: VertexConfig,
        edge_config: EdgeConfig,
        *,
        allowed_vertex_names: set[str] | None,
    ) -> tuple[VertexConfig, EdgeConfig]:
        runtime_vertex_config = filter_vertex_config_for_resource(
            vertex_config,
            resource_vertex_names=self.collect_vertex_names(),
            allowed_vertex_names=allowed_vertex_names,
        )
        local_edge_config = EdgeConfig.model_validate(
            edge_config.to_dict(skip_defaults=False)
        )
        return runtime_vertex_config, local_edge_config

    def _validate_vertex_references(self, vertex_config: VertexConfig) -> None:
        known_vertices = set(vertex_config.vertex_set)
        referenced_vertices: set[str] = set()

        for spec in self.config.infer_edge_only:
            referenced_vertices.add(spec.source)
            referenced_vertices.add(spec.target)
        for spec in self.config.infer_edge_except:
            referenced_vertices.add(spec.source)
            referenced_vertices.add(spec.target)
        for source, target, _ in self.edge_ids_from_pipeline(self.config.pipeline):
            referenced_vertices.add(source)
            referenced_vertices.add(target)

        missing_vertices = sorted(referenced_vertices - known_vertices)
        if missing_vertices:
            raise ValueError(
                "Resource dynamic edge references undefined vertices: "
                f"{missing_vertices}. "
                "Declare these vertices in vertex_config before using dynamic/inferred edges."
            )

    def _validate_infer_edge_spec_targets(self, edge_config: EdgeConfig) -> None:
        known_edge_ids = {edge_id for edge_id, _ in edge_config.items()}

        def _validate_list(field_name: str, specs: list[EdgeInferSpec]) -> None:
            unknown: list[EdgeId] = []
            for spec in specs:
                if not any(spec.matches(edge_id) for edge_id in known_edge_ids):
                    unknown.append(spec.edge_id)
            if unknown:
                raise ValueError(
                    f"Resource {field_name} contains unknown edge selectors: {unknown}"
                )

        _validate_list("infer_edge_only", self.config.infer_edge_only)
        _validate_list("infer_edge_except", self.config.infer_edge_except)

    def _build_infer_except(self) -> set[EdgeId]:
        infer_edge_except = {spec.edge_id for spec in self.config.infer_edge_except}
        if not self.config.infer_edge_only:
            infer_edge_except |= self.edge_ids_from_pipeline(self.config.pipeline)
        return infer_edge_except

    def _build_init_context(
        self,
        *,
        transforms: dict[str, ProtoTransform],
        edge_derivation: EdgeDerivationRegistry,
        infer_edge_except: set[EdgeId],
        strict_references: bool,
        allowed_vertex_names: set[str] | None,
        target_db_flavor: DBType | None,
    ) -> ActorInitContext:
        return ActorInitContext(
            vertex_config=self._vertex_config,
            edge_config=self._edge_config,
            transforms=transforms,
            edge_derivation=edge_derivation,
            allowed_vertex_names=allowed_vertex_names,
            infer_edges=self.config.infer_edges,
            infer_edge_only={spec.edge_id for spec in self.config.infer_edge_only},
            infer_edge_except=infer_edge_except,
            strict_references=strict_references,
            fail_fast=self.config.fail_fast,
            tolerate_transform_errors=self.config.tolerate_transform_errors,
            target_db_flavor=target_db_flavor,
        )

    def _propagate_dynamic_edges(
        self,
        edge_config: EdgeConfig,
        *,
        vertex_config: VertexConfig,
    ) -> None:
        baseline_edge_ids = {edge_id for edge_id, _ in edge_config.items()}
        for edge_id, edge in self._edge_config.items():
            if edge_id in baseline_edge_ids:
                continue
            edge_config.update_edges(
                edge.model_copy(deep=True), vertex_config=vertex_config
            )

    def _init_extra_weights(self, vertex_config: VertexConfig) -> None:
        reg = self._edge_derivation_registry
        for entry in self.config.extra_weights:
            entry.edge.finish_init(vertex_config)
            if reg is not None and entry.vertex_weights:
                reg.merge_vertex_weights(entry.edge.edge_id, entry.vertex_weights)

    def cast_document(self, doc: dict) -> ResourceCastResult:
        """Process a document and return entities plus any tolerated transform failures."""
        work_doc: dict[str, Any] = (
            strip_trivial_top_level_fields(doc)
            if self.config.drop_trivial_input_fields
            else dict(doc)
        )
        if self._type_casters:
            apply_type_casters(work_doc, self._type_casters)
        extraction_ctx = self._executor.extract(work_doc)
        result = self._executor.assemble_result(extraction_ctx)
        return ResourceCastResult(
            entities=result.entities,
            transform_failures=list(extraction_ctx.transform_failures),
        )

    def __call__(self, doc: dict) -> defaultdict[GraphEntity, list]:
        return self.cast_document(doc).entities

`cast_document(doc)` ¶

Process a document and return entities plus any tolerated transform failures.

Source code in graflo/architecture/contract/runtime/resource.py

def cast_document(self, doc: dict) -> ResourceCastResult:
    """Process a document and return entities plus any tolerated transform failures."""
    work_doc: dict[str, Any] = (
        strip_trivial_top_level_fields(doc)
        if self.config.drop_trivial_input_fields
        else dict(doc)
    )
    if self._type_casters:
        apply_type_casters(work_doc, self._type_casters)
    extraction_ctx = self._executor.extract(work_doc)
    result = self._executor.assemble_result(extraction_ctx)
    return ResourceCastResult(
        entities=result.entities,
        transform_failures=list(extraction_ctx.transform_failures),
    )

`edge_ids_from_pipeline(pipeline)` `staticmethod` ¶

Collect (source, target, None) for every static EdgeActor in pipeline.

Source code in graflo/architecture/contract/runtime/resource.py

@staticmethod
def edge_ids_from_pipeline(pipeline: list[dict[str, Any]]) -> set[EdgeId]:
    """Collect (source, target, None) for every static EdgeActor in *pipeline*."""
    root = ActorWrapper(*pipeline)
    edge_actors = [a for a in root.collect_actors() if isinstance(a, EdgeActor)]
    return {
        (ea.edge.source, ea.edge.target, None)
        for ea in edge_actors
        if ea.edge is not None
    }

`Transform` ¶

Bases: ProtoTransform

Concrete transform implementation.

Wraps a ProtoTransform with input extraction, output dressing, field mapping, and transform composition.

Attributes:

Name	Type	Description
`fields`	`tuple[str, ...]`	Tuple of fields to transform
`rename`	`dict[str, str]`	Dictionary mapping input fields to output fields
`functional_transform`	`bool`	Whether this is a functional transform

Source code in graflo/architecture/contract/ingestion/transform.py

class Transform(ProtoTransform):
    """Concrete transform implementation.

    Wraps a ProtoTransform with input extraction, output dressing, field
    mapping, and transform composition.

    Attributes:
        fields: Tuple of fields to transform
        rename: Dictionary mapping input fields to output fields
        functional_transform: Whether this is a functional transform
    """

    fields: tuple[str, ...] = Field(
        default_factory=tuple,
        description="Field names for declarative transform (used to derive input when input unset).",
    )
    rename: dict[str, str] = Field(
        default_factory=dict,
        description="Mapping of input_key -> output_key for pure field renaming (no function).",
    )
    strategy: Literal["single", "each", "all"] = Field(
        default="single",
        description=(
            "Functional call strategy. "
            "single: call function once with all input values. "
            "each: call function once per input field (unary). "
            "all: pass full document as a single argument."
        ),
    )
    passthrough_group_output: bool = Field(
        default=True,
        description=(
            "When grouped mode omits outputs, map function results back to input group keys."
        ),
    )

    functional_transform: bool = Field(
        default=False,
        description="True when a callable (module.foo) is set; False for pure map/dress transforms.",
    )

    @model_validator(mode="before")
    @classmethod
    def _normalize_fields(cls, data: Any) -> Any:
        if not isinstance(data, dict):
            return data
        data = dict(data)
        if "fields" in data and data["fields"] is not None:
            data["fields"] = _tuple_it(data["fields"])
        if "switch" in data:
            raise ValueError(
                "Legacy `switch` is no longer supported. Use `input` + `dress`."
            )
        return data

    @model_validator(mode="after")
    def _init_derived(self) -> Self:
        explicit_map = bool(self.rename)
        object.__setattr__(self, "functional_transform", self._foo is not None)
        next_input, next_output, _next_map = self._derive_effective_io_and_map()
        object.__setattr__(self, "input", next_input)
        object.__setattr__(self, "output", next_output)
        self._validate_configuration(explicit_map=explicit_map)
        return self

    def _derive_grouped_default_output(self) -> tuple[str, ...]:
        if not self.input_groups or self.output or self.output_groups:
            return self.output
        if not self.passthrough_group_output:
            return self.output
        scalar_names: list[str] = []
        for group in self.input_groups:
            if len(group) != 1:
                return self.output
            scalar_names.append(group[0])
        return tuple(scalar_names) if scalar_names else self.output

    def _derive_effective_io_and_map(
        self,
    ) -> tuple[tuple[str, ...], tuple[str, ...], dict[str, str]]:
        """Compute effective input/output/map once using explicit precedence."""
        next_input = self.input
        next_output = self._derive_grouped_default_output()
        next_map = dict(self.rename)

        if self.fields and not next_input:
            next_input = self.fields

        if next_map:
            if not next_input and not next_output:
                next_input = tuple(next_map.keys())
                next_output = tuple(next_map.values())
            elif not next_input:
                next_input = tuple(next_map.keys())
            elif not next_output:
                next_output = tuple(next_map.values())

        if self.dress is not None:
            next_output = (self.dress.key, self.dress.value)
        elif not next_output and next_input:
            next_output = next_input

        if (
            not next_map
            and next_input
            and next_output
            and len(next_input) == len(next_output)
        ):
            next_map = {src: dst for src, dst in zip(next_input, next_output)}

        return next_input, next_output, next_map

    def _init_io_from_map(self, force_init: bool = False) -> None:
        """Backwards-compatible shim; prefer sync_io_from_map()."""
        if not self.rename:
            return
        map_input = tuple(self.rename.keys())
        map_output = tuple(self.rename.values())
        if force_init or (not self.input and not self.output):
            object.__setattr__(self, "input", map_input)
            object.__setattr__(self, "output", map_output)
            return
        if not self.input:
            object.__setattr__(self, "input", map_input)
        elif not self.output:
            object.__setattr__(self, "output", map_output)

    def _validate_configuration(self, *, explicit_map: bool) -> None:
        """Validate that the transform has enough information to operate."""
        if self.target == "keys":
            if self.input_groups or self.output_groups:
                raise ValueError(
                    "target='keys' does not accept input_groups/output_groups."
                )
            if self._foo is None:
                raise ValueError("target='keys' requires a functional transform.")
            if self.rename:
                raise ValueError("target='keys' cannot be combined with map.")
            if self.input or self.output or self.fields:
                raise ValueError(
                    "target='keys' does not accept input/output/fields; use keys selector."
                )
            if self.dress is not None:
                raise ValueError("target='keys' is not compatible with dress.")
            if self.strategy != "single":
                raise ValueError(
                    "target='keys' uses implicit per-key execution and does not accept strategy."
                )
            return

        # Reject only user-specified map+function conflict. A derived map
        # (from input/output defaults) is valid for functional transforms.
        if explicit_map and self.rename and self._foo is not None:
            raise ValueError("map and functional transform cannot be used together.")
        if self.dress is not None:
            if len(self.input) != 1:
                raise ValueError("dress requires exactly one input field.")
        if self.strategy != "single" and self._foo is None:
            raise ValueError("strategy applies only to functional transforms.")
        if self.input_groups:
            if self._foo is None:
                raise ValueError(
                    "input_groups requires a functional transform (module + foo)."
                )
            if self.strategy != "single":
                raise ValueError(
                    "input_groups mode is explicit grouped execution and does not accept strategy."
                )
            if self.input or self.fields:
                raise ValueError("input_groups cannot be combined with input/fields.")
            if self.rename:
                raise ValueError("input_groups cannot be combined with map.")
            if self.dress is not None:
                raise ValueError("input_groups is not compatible with dress.")
            if self.output_groups and self.output:
                raise ValueError(
                    "Provide either output or output_groups for input_groups mode, not both."
                )
            if self.output_groups and len(self.output_groups) != len(self.input_groups):
                raise ValueError(
                    "output_groups must have same number of groups as input_groups."
                )
            if self.output and len(self.output) != len(self.input_groups):
                raise ValueError(
                    "When using input_groups with scalar outputs, output length must match number of input_groups."
                )
        elif self.output_groups:
            raise ValueError("output_groups requires input_groups.")
        if self._foo is not None and not self.input:
            if self.strategy != "all" and not self.input_groups:
                raise ValueError(
                    "Functional transforms require `input` (string or list of field names)."
                )
        if self.strategy == "all":
            if self.input or self.fields:
                raise ValueError("strategy='all' does not accept input/fields.")
            if self.dress is not None:
                raise ValueError("strategy='all' is not compatible with dress.")
        if self.strategy == "each":
            if not self.input:
                raise ValueError("strategy='each' requires one or more input fields.")
            if self.output and len(self.input) != len(self.output):
                raise ValueError(
                    "strategy='each' requires output length to match input length."
                )
        if (
            self._foo is None
            and self.dress is None
            and self.input
            and self.output
            and len(self.input) != len(self.output)
        ):
            raise ValueError(
                "Non-functional transforms require input/output to have the same length."
            )
        if (
            not self.input
            and not self.output
            and not self.input_groups
            and not self.output_groups
            and not self.name
            and not (self._foo is not None and self.strategy == "all")
        ):
            raise ValueError(
                "Either input/output, fields, map or name must be provided in "
                "Transform constructor."
            )

    def _refresh_derived(self) -> None:
        """Re-run derived input/output after mutating attributes (merge_from)."""
        if self.rename or not self.input or not self.output:
            return
        if len(self.input) != len(self.output):
            return

    def __call__(self, *nargs: Any, **kwargs: Any) -> dict[str, Any] | Any:
        """Execute the transform.

        Args:
            *nargs: Positional arguments for the transform
            **kwargs: Keyword arguments for the transform

        Returns:
            dict: Transformed data
        """
        if self.target == "keys":
            input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
            if input_doc is None:
                raise TransformException(
                    "target='keys' requires a document dictionary."
                )
            return self._transform_keys(input_doc, **kwargs)

        if self.input_groups:
            input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
            if input_doc is None:
                raise TransformException(
                    "input_groups transforms require a document dictionary."
                )
            return self._transform_input_groups(input_doc, **kwargs)

        if self.is_mapping:
            input_doc = nargs[0]
            if isinstance(input_doc, dict):
                if self.rename:
                    present = {
                        self.rename[src]: input_doc[src]
                        for src in self.rename
                        if src in input_doc
                    }
                    return present
                output_values = [input_doc[k] for k in self.input]
            else:
                output_values = list(nargs)
            if self.dress is not None and len(output_values) == 1:
                # Non-functional dress shorthand: keep scalar value.
                output_values = output_values[0]
        else:
            if self.strategy == "all":
                if nargs and isinstance(nargs[0], dict):
                    output_values = self.apply(nargs[0], **kwargs)
                else:
                    output_values = self.apply(*nargs, **kwargs)
            elif self.strategy == "each":
                if nargs and isinstance(input_doc := nargs[0], dict):
                    output_values = [
                        self.apply(input_doc[k], **kwargs) for k in self.input
                    ]
                else:
                    output_values = [self.apply(value, **kwargs) for value in nargs]
            else:
                if nargs and isinstance(input_doc := nargs[0], dict):
                    new_args = [input_doc[k] for k in self.input]
                    output_values = self.apply(*new_args, **kwargs)
                else:
                    output_values = self.apply(*nargs, **kwargs)

        if self.output:
            r = self._dress_as_dict(output_values)
        else:
            r = output_values
        return r

    def _apply_grouped_result(
        self,
        out: dict[str, Any],
        result: Any,
        input_group: tuple[str, ...],
        output_group: tuple[str, ...] | None,
        *,
        group_index: int,
    ) -> None:
        if output_group is not None:
            if isinstance(result, (list, tuple)):
                values = list(result)
            else:
                values = [result]
            if len(values) != len(output_group):
                raise TransformException(
                    f"input_groups[{group_index}] produced {len(values)} values, "
                    f"but output_groups[{group_index}] expects {len(output_group)}."
                )
            pairs = zip(output_group, values)
        elif self.output:
            pairs = ((self.output[group_index], result),)
        else:
            if isinstance(result, (list, tuple)):
                values = list(result)
                if len(values) != len(input_group):
                    raise TransformException(
                        f"input_groups[{group_index}] has {len(input_group)} fields, "
                        f"but transform returned {len(values)} values. "
                        "Provide output/output_groups explicitly to resolve mapping."
                    )
                pairs = zip(input_group, values)
            else:
                if len(input_group) != 1:
                    raise TransformException(
                        f"input_groups[{group_index}] has {len(input_group)} fields "
                        "but transform returned a scalar. "
                        "Provide output/output_groups explicitly for scalar group results."
                    )
                pairs = ((input_group[0], result),)
        for key, value in pairs:
            if key in out:
                raise TransformException(
                    f"Grouped transform produced duplicate output key '{key}'."
                )
            out[key] = value

    def _transform_input_groups(
        self, doc: dict[str, Any], **kwargs: Any
    ) -> dict[str, Any]:
        out: dict[str, Any] = {}
        for idx, input_group in enumerate(self.input_groups):
            values = [doc[k] for k in input_group]
            result = self.apply(*values, **kwargs)
            output_group = self.output_groups[idx] if self.output_groups else None
            self._apply_grouped_result(
                out,
                result,
                input_group,
                output_group,
                group_index=idx,
            )
        return out

    @property
    def is_mapping(self) -> bool:
        """True when the transform is pure mapping (no function)."""
        return self._foo is None

    def planned_output_field_names(
        self, doc: dict[str, Any] | None = None
    ) -> tuple[str, ...]:
        """Return output field names this transform would write on success."""
        if self.target == "keys":
            if doc is None:
                return ()
            return tuple(sorted(self._selected_keys(doc)))

        if self.input_groups:
            if self.output_groups:
                names: list[str] = []
                for group in self.output_groups:
                    names.extend(group)
                return tuple(dict.fromkeys(names))
            if self.output:
                return self.output
            scalar_names: list[str] = []
            for group in self.input_groups:
                if len(group) != 1:
                    return ()
                scalar_names.append(group[0])
            return tuple(scalar_names)

        if self.dress is not None:
            return (self.dress.key, self.dress.value)

        if self.rename:
            if doc is None:
                return tuple(self.rename.values())
            return tuple(self.rename[src] for src in self.rename if src in doc)

        if self.output:
            return self.output

        return ()

    def _dress_as_dict(self, transform_result: Any) -> dict[str, Any]:
        """Convert transform result to dictionary format.

        When ``dress`` is set the result is pivoted: the input field name is
        stored under ``dress.key`` and the function result under ``dress.value``.
        Otherwise the result is mapped positionally to ``output`` fields.
        """
        if self.dress is not None:
            return {
                self.dress.key: self.input[0],
                self.dress.value: transform_result,
            }
        elif isinstance(transform_result, (list, tuple)):
            return {k: v for k, v in zip(self.output, transform_result)}
        else:
            return {self.output[-1]: transform_result}

    def _selected_keys(self, doc: dict[str, Any]) -> set[str]:
        if self.keys.mode == "all":
            return set(doc.keys())
        selected = set(self.keys.names)
        if self.keys.mode == "include":
            return selected
        return {k for k in doc if k not in selected}

    def _transform_keys(self, doc: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
        selected = self._selected_keys(doc)
        out: dict[str, Any] = {}
        for key, value in doc.items():
            new_key = self.apply(key, **kwargs) if key in selected else key
            if not isinstance(new_key, str):
                raise TransformException(
                    "Key transform functions must return str values."
                )
            if new_key in out:
                raise TransformException(
                    f"Key transform collision detected for key '{new_key}'."
                )
            out[new_key] = value
        return out

    @property
    def is_dummy(self) -> bool:
        """Check if this is a dummy transform.

        Returns:
            bool: True if this is a dummy transform
        """
        return self.name is not None and not self.rename and self._foo is None

    def merge_from(self, t: Transform) -> Transform:
        """Merge another transform's configuration into a copy of it.

        Returns a new Transform with values from self overriding t where set.
        Does not override ConfigBaseModel.update (in-place); use this for
        copy-and-merge semantics.

        Args:
            t: Transform to merge from

        Returns:
            Transform: New transform with merged configuration
        """
        t_copy = deepcopy(t)
        if self.input:
            t_copy.input = self.input
        if self.output:
            t_copy.output = self.output
        if self.params:
            t_copy.params = {**t_copy.params, **self.params}
        t_copy._refresh_derived()
        return t_copy

    def get_barebone(
        self, other: Transform | None
    ) -> tuple[Transform | None, Transform | None]:
        """Get the barebone transform configuration.

        Args:
            other: Optional transform to use as base

        Returns:
            tuple[Transform | None, Transform | None]: Updated self transform
            and transform to store in library
        """
        self_param = self.to_dict(exclude_defaults=True)
        if self.foo is not None:
            # self will be the lib transform
            return None, self
        elif other is not None and other.foo is not None:
            # init self from other
            self_param.pop("foo", None)
            self_param.pop("module", None)
            other_param = other.to_dict(exclude_defaults=True)
            other_param.update(self_param)
            return Transform(**other_param), None
        else:
            return None, None

`is_dummy` `property` ¶

Check if this is a dummy transform.

Returns:

Name	Type	Description
`bool`	`bool`	True if this is a dummy transform

`is_mapping` `property` ¶

True when the transform is pure mapping (no function).

`call(*nargs, **kwargs)` ¶

Execute the transform.

Parameters:

Name	Type	Description	Default
`*nargs`	`Any`	Positional arguments for the transform	`()`
`**kwargs`	`Any`	Keyword arguments for the transform	`{}`

Returns:

Name	Type	Description
`dict`	`dict[str, Any] \| Any`	Transformed data

Source code in graflo/architecture/contract/ingestion/transform.py

def __call__(self, *nargs: Any, **kwargs: Any) -> dict[str, Any] | Any:
    """Execute the transform.

    Args:
        *nargs: Positional arguments for the transform
        **kwargs: Keyword arguments for the transform

    Returns:
        dict: Transformed data
    """
    if self.target == "keys":
        input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
        if input_doc is None:
            raise TransformException(
                "target='keys' requires a document dictionary."
            )
        return self._transform_keys(input_doc, **kwargs)

    if self.input_groups:
        input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
        if input_doc is None:
            raise TransformException(
                "input_groups transforms require a document dictionary."
            )
        return self._transform_input_groups(input_doc, **kwargs)

    if self.is_mapping:
        input_doc = nargs[0]
        if isinstance(input_doc, dict):
            if self.rename:
                present = {
                    self.rename[src]: input_doc[src]
                    for src in self.rename
                    if src in input_doc
                }
                return present
            output_values = [input_doc[k] for k in self.input]
        else:
            output_values = list(nargs)
        if self.dress is not None and len(output_values) == 1:
            # Non-functional dress shorthand: keep scalar value.
            output_values = output_values[0]
    else:
        if self.strategy == "all":
            if nargs and isinstance(nargs[0], dict):
                output_values = self.apply(nargs[0], **kwargs)
            else:
                output_values = self.apply(*nargs, **kwargs)
        elif self.strategy == "each":
            if nargs and isinstance(input_doc := nargs[0], dict):
                output_values = [
                    self.apply(input_doc[k], **kwargs) for k in self.input
                ]
            else:
                output_values = [self.apply(value, **kwargs) for value in nargs]
        else:
            if nargs and isinstance(input_doc := nargs[0], dict):
                new_args = [input_doc[k] for k in self.input]
                output_values = self.apply(*new_args, **kwargs)
            else:
                output_values = self.apply(*nargs, **kwargs)

    if self.output:
        r = self._dress_as_dict(output_values)
    else:
        r = output_values
    return r

`get_barebone(other)` ¶

Get the barebone transform configuration.

Parameters:

Name	Type	Description	Default
`other`	`Transform \| None`	Optional transform to use as base	required

Returns:

Type	Description
`Transform \| None`	tuple[Transform \| None, Transform \| None]: Updated self transform
`Transform \| None`	and transform to store in library

Source code in graflo/architecture/contract/ingestion/transform.py

def get_barebone(
    self, other: Transform | None
) -> tuple[Transform | None, Transform | None]:
    """Get the barebone transform configuration.

    Args:
        other: Optional transform to use as base

    Returns:
        tuple[Transform | None, Transform | None]: Updated self transform
        and transform to store in library
    """
    self_param = self.to_dict(exclude_defaults=True)
    if self.foo is not None:
        # self will be the lib transform
        return None, self
    elif other is not None and other.foo is not None:
        # init self from other
        self_param.pop("foo", None)
        self_param.pop("module", None)
        other_param = other.to_dict(exclude_defaults=True)
        other_param.update(self_param)
        return Transform(**other_param), None
    else:
        return None, None

`merge_from(t)` ¶

Merge another transform's configuration into a copy of it.

Returns a new Transform with values from self overriding t where set. Does not override ConfigBaseModel.update (in-place); use this for copy-and-merge semantics.

Parameters:

Name	Type	Description	Default
`t`	`Transform`	Transform to merge from	required

Returns:

Name	Type	Description
`Transform`	`Transform`	New transform with merged configuration

Source code in graflo/architecture/contract/ingestion/transform.py

def merge_from(self, t: Transform) -> Transform:
    """Merge another transform's configuration into a copy of it.

    Returns a new Transform with values from self overriding t where set.
    Does not override ConfigBaseModel.update (in-place); use this for
    copy-and-merge semantics.

    Args:
        t: Transform to merge from

    Returns:
        Transform: New transform with merged configuration
    """
    t_copy = deepcopy(t)
    if self.input:
        t_copy.input = self.input
    if self.output:
        t_copy.output = self.output
    if self.params:
        t_copy.params = {**t_copy.params, **self.params}
    t_copy._refresh_derived()
    return t_copy

`planned_output_field_names(doc=None)` ¶

Return output field names this transform would write on success.

Source code in graflo/architecture/contract/ingestion/transform.py

def planned_output_field_names(
    self, doc: dict[str, Any] | None = None
) -> tuple[str, ...]:
    """Return output field names this transform would write on success."""
    if self.target == "keys":
        if doc is None:
            return ()
        return tuple(sorted(self._selected_keys(doc)))

    if self.input_groups:
        if self.output_groups:
            names: list[str] = []
            for group in self.output_groups:
                names.extend(group)
            return tuple(dict.fromkeys(names))
        if self.output:
            return self.output
        scalar_names: list[str] = []
        for group in self.input_groups:
            if len(group) != 1:
                return ()
            scalar_names.append(group[0])
        return tuple(scalar_names)

    if self.dress is not None:
        return (self.dress.key, self.dress.value)

    if self.rename:
        if doc is None:
            return tuple(self.rename.values())
        return tuple(self.rename[src] for src in self.rename if src in doc)

    if self.output:
        return self.output

    return ()

`TransformException` ¶

Bases: Exception

Base exception for transform-related errors.

Source code in graflo/architecture/contract/ingestion/transform.py

class TransformException(Exception):
    """Base exception for transform-related errors."""

    pass

`build_resource_runtime(config, vertex_config, edge_config, transforms=None, *, strict_references=False, dynamic_edge_feedback=False, allowed_vertex_names=None, target_db_flavor=None)` ¶

Construct a fully initialized :class:ResourceRuntime from declarative config.

Source code in graflo/architecture/contract/runtime/resource.py

def build_resource_runtime(
    config: ResourceConfig,
    vertex_config: VertexConfig,
    edge_config: EdgeConfig,
    transforms: dict[str, ProtoTransform] | None = None,
    *,
    strict_references: bool = False,
    dynamic_edge_feedback: bool = False,
    allowed_vertex_names: set[str] | None = None,
    target_db_flavor: DBType | None = None,
) -> ResourceRuntime:
    """Construct a fully initialized :class:`ResourceRuntime` from declarative config."""
    return ResourceRuntime(
        config,
        vertex_config,
        edge_config,
        transforms or {},
        strict_references=strict_references,
        dynamic_edge_feedback=dynamic_edge_feedback,
        allowed_vertex_names=allowed_vertex_names,
        target_db_flavor=target_db_flavor,
    )

`collect_vertex_names_from_pipeline(steps)` ¶

Collect vertex names referenced by pipeline steps (including nested descend).

Source code in graflo/architecture/contract/ingestion/resource.py

def collect_vertex_names_from_pipeline(steps: list[Any]) -> set[str]:
    """Collect vertex names referenced by pipeline steps (including nested descend)."""
    names: set[str] = set()
    for step in steps:
        if not isinstance(step, dict):
            continue
        normalized = normalize_actor_step(dict(step))
        step_type = normalized.get("type")
        if step_type == "vertex" and isinstance(normalized.get("vertex"), str):
            names.add(normalized["vertex"])
        elif step_type == "vertex_router":
            type_map = normalized.get("type_map")
            if isinstance(type_map, dict):
                for value in type_map.values():
                    if isinstance(value, str):
                        names.add(value)
            vertex_from_map = normalized.get("vertex_from_map")
            if isinstance(vertex_from_map, dict):
                for key in vertex_from_map:
                    if isinstance(key, str):
                        names.add(key)
        elif step_type == "edge":
            source = normalized.get("source") or normalized.get("from")
            target = normalized.get("target") or normalized.get("to")
            if isinstance(source, str):
                names.add(source)
            if isinstance(target, str):
                names.add(target)
            vertex_weights = normalized.get("vertex_weights")
            if isinstance(vertex_weights, list):
                for weight in vertex_weights:
                    if isinstance(weight, dict) and isinstance(weight.get("name"), str):
                        names.add(weight["name"])
        elif step_type == "descend":
            sub_pipeline = normalized.get("pipeline")
            if isinstance(sub_pipeline, list):
                names |= collect_vertex_names_from_pipeline(sub_pipeline)
    return names

`filter_vertex_config_for_resource(vertex_config, *, resource_vertex_names, allowed_vertex_names)` ¶

Derive a filtered VertexConfig for runtime actor execution.

Source code in graflo/architecture/contract/runtime/resource.py

def filter_vertex_config_for_resource(
    vertex_config: VertexConfig,
    *,
    resource_vertex_names: set[str],
    allowed_vertex_names: set[str] | None,
) -> VertexConfig:
    """Derive a filtered VertexConfig for runtime actor execution."""
    if resource_vertex_names:
        effective = set(resource_vertex_names)
        if allowed_vertex_names is not None:
            effective &= allowed_vertex_names
    elif allowed_vertex_names is not None:
        effective = set(allowed_vertex_names)
    else:
        return vertex_config
    filtered_vertices = [v for v in vertex_config.vertices if v.name in effective]
    filtered_force_types = {
        name: types
        for name, types in vertex_config.force_types.items()
        if name in effective
    }
    return VertexConfig(
        vertices=filtered_vertices,
        force_types=filtered_force_types,
        identity_from_all_properties=vertex_config.identity_from_all_properties,
    )

`resolve_type_caster(name)` ¶

Resolve a type caster by name from a strict allowlist.

Source code in graflo/util/casting.py

def resolve_type_caster(name: str) -> Callable[..., Any] | None:
    """Resolve a type caster by name from a strict allowlist."""
    if not isinstance(name, str):
        return None
    candidate = SAFE_TYPE_CASTERS.get(name)
    if candidate is not None:
        return candidate
    if "." in name:
        module_name, attr_name = name.split(".", 1)
        if module_name == "builtins":
            builtin_attr = getattr(builtins, attr_name, None)
            if callable(builtin_attr) and attr_name in SAFE_TYPE_CASTERS:
                return SAFE_TYPE_CASTERS[attr_name]
    return None

`strip_trivial_top_level_fields(doc)` ¶

Return a shallow copy of doc without None or empty-string values.

Source code in graflo/architecture/contract/runtime/resource.py

def strip_trivial_top_level_fields(doc: dict[str, Any]) -> dict[str, Any]:
    """Return a shallow copy of *doc* without None or empty-string values."""
    return {k: v for k, v in doc.items() if v is not None and v != ""}

graflo.architecture.contract.ingestion¶

DressConfig ¶

EdgeInferSpec ¶

IngestionModel ¶

fetch_resource(name=None) ¶

fetch_resource_config(name) ¶

finish_init(core_schema, *, strict_references=False, dynamic_edge_feedback=False, allowed_vertex_names=None, target_db_flavor=None) ¶

prune_to_graph(core_schema, disconnected=None) ¶

KeySelectionConfig ¶

ProtoTransform ¶

__lt__(other) ¶

apply(*args, **kwargs) ¶

get_fields_members() classmethod ¶

ResourceConfig ¶

collect_vertex_names() ¶

pipeline_actor_count() ¶

ResourceExtraWeightEntry ¶

ResourceRuntime ¶

cast_document(doc) ¶

edge_ids_from_pipeline(pipeline) staticmethod ¶

Transform ¶

is_dummy property ¶

is_mapping property ¶

__call__(*nargs, **kwargs) ¶

get_barebone(other) ¶

merge_from(t) ¶

planned_output_field_names(doc=None) ¶

TransformException ¶

build_resource_runtime(config, vertex_config, edge_config, transforms=None, *, strict_references=False, dynamic_edge_feedback=False, allowed_vertex_names=None, target_db_flavor=None) ¶

collect_vertex_names_from_pipeline(steps) ¶

filter_vertex_config_for_resource(vertex_config, *, resource_vertex_names, allowed_vertex_names) ¶

resolve_type_caster(name) ¶

strip_trivial_top_level_fields(doc) ¶

`graflo.architecture.contract.ingestion`¶

`DressConfig` ¶

`EdgeInferSpec` ¶

`IngestionModel` ¶

`fetch_resource(name=None)` ¶

`fetch_resource_config(name)` ¶

`finish_init(core_schema, *, strict_references=False, dynamic_edge_feedback=False, allowed_vertex_names=None, target_db_flavor=None)` ¶

`prune_to_graph(core_schema, disconnected=None)` ¶

`KeySelectionConfig` ¶

`ProtoTransform` ¶

`lt(other)` ¶

`apply(*args, **kwargs)` ¶

`get_fields_members()` `classmethod` ¶

`ResourceConfig` ¶

`collect_vertex_names()` ¶

`pipeline_actor_count()` ¶

`ResourceExtraWeightEntry` ¶

`ResourceRuntime` ¶

`cast_document(doc)` ¶

`edge_ids_from_pipeline(pipeline)` `staticmethod` ¶

`Transform` ¶

`is_dummy` `property` ¶

`is_mapping` `property` ¶

`call(*nargs, **kwargs)` ¶

`get_barebone(other)` ¶

`merge_from(t)` ¶

`planned_output_field_names(doc=None)` ¶

`TransformException` ¶

`build_resource_runtime(config, vertex_config, edge_config, transforms=None, *, strict_references=False, dynamic_edge_feedback=False, allowed_vertex_names=None, target_db_flavor=None)` ¶

`collect_vertex_names_from_pipeline(steps)` ¶

`filter_vertex_config_for_resource(vertex_config, *, resource_vertex_names, allowed_vertex_names)` ¶

`resolve_type_caster(name)` ¶

`strip_trivial_top_level_fields(doc)` ¶