Skip to content

graflo.architecture.contract.ingestion

Declarative ingestion contract: resources, transforms, and ingestion model.

DressConfig

Bases: ConfigBaseModel

Output dressing specification for pivoted transforms.

When a transform function returns a single scalar (e.g. round_str returns 6.43), DressConfig describes how to package that scalar together with the input field name into a dict.

Attributes:

Name Type Description
key str

Output field that receives the input field name (e.g. "Open").

value str

Output field that receives the function result (e.g. 6.43).

Source code in graflo/architecture/contract/ingestion/transform.py
class DressConfig(ConfigBaseModel):
    """Output dressing specification for pivoted transforms.

    When a transform function returns a single scalar (e.g. ``round_str``
    returns ``6.43``), DressConfig describes how to package that scalar together
    with the input field name into a dict.

    Attributes:
        key: Output field that receives the **input field name** (e.g. "Open").
        value: Output field that receives the **function result** (e.g. 6.43).
    """

    key: str = Field(description="Output field name for the input key.")
    value: str = Field(description="Output field name for the function result.")

EdgeInferSpec

Bases: ConfigBaseModel

Selector for controlling inferred edge emission.

Source code in graflo/architecture/contract/ingestion/resource.py
class EdgeInferSpec(ConfigBaseModel):
    """Selector for controlling inferred edge emission."""

    source: str = PydanticField(..., description="Edge source vertex name.")
    target: str = PydanticField(..., description="Edge target vertex name.")
    relation: str | None = PydanticField(
        default=None,
        description=(
            "Optional relation discriminator. If omitted, selector applies to all relations "
            "for (source, target)."
        ),
    )

    @property
    def edge_id(self) -> EdgeId:
        return self.source, self.target, self.relation

    def matches(self, edge_id: EdgeId) -> bool:
        source, target, relation = edge_id
        return (
            self.source == source
            and self.target == target
            and (self.relation is None or self.relation == relation)
        )

IngestionModel

Bases: ConfigBaseModel

Ingestion model (C): resources and transform registry.

Source code in graflo/architecture/contract/ingestion/model.py
class IngestionModel(ConfigBaseModel):
    """Ingestion model (C): resources and transform registry."""

    edges_on_duplicate: Literal["ignore", "upsert"] = PydanticField(
        default="ignore",
        description=(
            "How batch edge writes tolerate an already-matching edge. Passed through to "
            ":meth:`~graflo.db.conn.Connection.insert_edges_batch` where the target backend "
            "supports it."
        ),
    )
    resources: list[ResourceConfig] = PydanticField(
        default_factory=list,
        description="List of resource definitions (data pipelines mapping to vertices/edges).",
    )
    transforms: list[ProtoTransform] = PydanticField(
        default_factory=list,
        description="List of named transforms available to resources.",
    )

    _resources: dict[str, ResourceConfig] = PrivateAttr()
    _runtimes: dict[str, ResourceRuntime] = PrivateAttr(default_factory=dict)
    _transforms: dict[str, ProtoTransform] = PrivateAttr(default_factory=dict)
    _combined_edge_derivation: EdgeDerivationRegistry = PrivateAttr(
        default_factory=EdgeDerivationRegistry
    )

    @model_validator(mode="after")
    def _init_model(self) -> IngestionModel:
        """Build transform and resource lookup maps."""
        self._rebuild_config_state()
        return self

    def _rebuild_resource_map(self) -> None:
        """Validate resource name uniqueness and refresh lookup map."""
        names = [r.name for r in self.resources]
        c = Counter(names)
        for k, v in c.items():
            if v > 1:
                raise ValueError(f"resource name {k} used {v} times")
        object.__setattr__(self, "_resources", {r.name: r for r in self.resources})

    def _rebuild_transform_map(self) -> None:
        """Validate transform names and refresh name lookup map."""
        missing_names = [idx for idx, t in enumerate(self.transforms) if not t.name]
        if missing_names:
            raise ValueError(
                "All ingestion transforms must define a non-empty name. "
                f"Missing at indexes: {missing_names}"
            )

        transform_names = [t.name for t in self.transforms if t.name is not None]
        name_counts = Counter(transform_names)
        duplicates = sorted([name for name, count in name_counts.items() if count > 1])
        if duplicates:
            raise ValueError(f"Duplicate ingestion transform names found: {duplicates}")

        object.__setattr__(
            self,
            "_transforms",
            {t.name: t for t in self.transforms if t.name is not None},
        )

    def finish_init(
        self,
        core_schema: CoreSchema,
        *,
        strict_references: bool = False,
        dynamic_edge_feedback: bool = False,
        allowed_vertex_names: set[str] | None = None,
        target_db_flavor: DBType | None = None,
    ) -> None:
        """Build per-resource runtimes against graph model and transform library."""
        self._rebuild_config_state()
        runtimes: dict[str, ResourceRuntime] = {}
        for config in self.resources:
            runtimes[config.name] = ResourceRuntime(
                config,
                vertex_config=core_schema.vertex_config,
                edge_config=core_schema.edge_config,
                transforms=self._transforms,
                strict_references=strict_references,
                dynamic_edge_feedback=dynamic_edge_feedback,
                allowed_vertex_names=allowed_vertex_names,
                target_db_flavor=target_db_flavor,
            )
        object.__setattr__(self, "_runtimes", runtimes)

    def _rebuild_config_state(self) -> None:
        """Rebuild transform and resource lookup maps."""
        self._rebuild_transform_map()
        self._rebuild_resource_map()

    def fetch_resource(self, name: str | None = None) -> ResourceRuntime:
        """Fetch an initialized runtime resource by name."""
        if name is not None:
            runtime = self._runtimes.get(name)
            if runtime is None:
                raise ValueError(f"Resource {name} not found")
            return runtime
        if self._runtimes:
            return next(iter(self._runtimes.values()))
        if self.resources:
            raise RuntimeError(
                "IngestionModel resources exist but runtimes were not built; "
                "call finish_init() first."
            )
        raise ValueError("Empty resource container :(")

    def fetch_resource_config(self, name: str) -> ResourceConfig:
        """Fetch declarative resource config by name."""
        config = self._resources.get(name)
        if config is None:
            raise ValueError(f"Resource {name} not found")
        return config

    def prune_to_graph(
        self, core_schema: CoreSchema, disconnected: set[str] | None = None
    ) -> None:
        """Drop resource actors that reference disconnected vertices."""
        if disconnected is None:
            disconnected = (
                core_schema.vertex_config.vertex_set - core_schema.edge_config.vertices
            )
        if not disconnected:
            return

        def _mentions_disconnected(wrapper: ActorWrapper) -> bool:
            return bool(wrapper.actor.references_vertices() & disconnected)

        to_drop: list[ResourceConfig] = []
        for resource_config in self.resources:
            root = ActorWrapper(*resource_config.pipeline)
            if _mentions_disconnected(root):
                to_drop.append(resource_config)
                continue
            root.remove_descendants_if(_mentions_disconnected)
            if not any(a.references_vertices() for a in root.collect_actors()):
                to_drop.append(resource_config)

        for dropped in to_drop:
            self.resources.remove(dropped)
            self._resources.pop(dropped.name, None)
            self._runtimes.pop(dropped.name, None)
        if to_drop:
            self._rebuild_config_state()

fetch_resource(name=None)

Fetch an initialized runtime resource by name.

Source code in graflo/architecture/contract/ingestion/model.py
def fetch_resource(self, name: str | None = None) -> ResourceRuntime:
    """Fetch an initialized runtime resource by name."""
    if name is not None:
        runtime = self._runtimes.get(name)
        if runtime is None:
            raise ValueError(f"Resource {name} not found")
        return runtime
    if self._runtimes:
        return next(iter(self._runtimes.values()))
    if self.resources:
        raise RuntimeError(
            "IngestionModel resources exist but runtimes were not built; "
            "call finish_init() first."
        )
    raise ValueError("Empty resource container :(")

fetch_resource_config(name)

Fetch declarative resource config by name.

Source code in graflo/architecture/contract/ingestion/model.py
def fetch_resource_config(self, name: str) -> ResourceConfig:
    """Fetch declarative resource config by name."""
    config = self._resources.get(name)
    if config is None:
        raise ValueError(f"Resource {name} not found")
    return config

finish_init(core_schema, *, strict_references=False, dynamic_edge_feedback=False, allowed_vertex_names=None, target_db_flavor=None)

Build per-resource runtimes against graph model and transform library.

Source code in graflo/architecture/contract/ingestion/model.py
def finish_init(
    self,
    core_schema: CoreSchema,
    *,
    strict_references: bool = False,
    dynamic_edge_feedback: bool = False,
    allowed_vertex_names: set[str] | None = None,
    target_db_flavor: DBType | None = None,
) -> None:
    """Build per-resource runtimes against graph model and transform library."""
    self._rebuild_config_state()
    runtimes: dict[str, ResourceRuntime] = {}
    for config in self.resources:
        runtimes[config.name] = ResourceRuntime(
            config,
            vertex_config=core_schema.vertex_config,
            edge_config=core_schema.edge_config,
            transforms=self._transforms,
            strict_references=strict_references,
            dynamic_edge_feedback=dynamic_edge_feedback,
            allowed_vertex_names=allowed_vertex_names,
            target_db_flavor=target_db_flavor,
        )
    object.__setattr__(self, "_runtimes", runtimes)

prune_to_graph(core_schema, disconnected=None)

Drop resource actors that reference disconnected vertices.

Source code in graflo/architecture/contract/ingestion/model.py
def prune_to_graph(
    self, core_schema: CoreSchema, disconnected: set[str] | None = None
) -> None:
    """Drop resource actors that reference disconnected vertices."""
    if disconnected is None:
        disconnected = (
            core_schema.vertex_config.vertex_set - core_schema.edge_config.vertices
        )
    if not disconnected:
        return

    def _mentions_disconnected(wrapper: ActorWrapper) -> bool:
        return bool(wrapper.actor.references_vertices() & disconnected)

    to_drop: list[ResourceConfig] = []
    for resource_config in self.resources:
        root = ActorWrapper(*resource_config.pipeline)
        if _mentions_disconnected(root):
            to_drop.append(resource_config)
            continue
        root.remove_descendants_if(_mentions_disconnected)
        if not any(a.references_vertices() for a in root.collect_actors()):
            to_drop.append(resource_config)

    for dropped in to_drop:
        self.resources.remove(dropped)
        self._resources.pop(dropped.name, None)
        self._runtimes.pop(dropped.name, None)
    if to_drop:
        self._rebuild_config_state()

KeySelectionConfig

Bases: ConfigBaseModel

Selection of document keys for key-target transforms.

Source code in graflo/architecture/contract/ingestion/transform.py
class KeySelectionConfig(ConfigBaseModel):
    """Selection of document keys for key-target transforms."""

    mode: Literal["all", "include", "exclude"] = Field(
        default="all",
        description=(
            "How keys are selected for target='keys': all=all keys, "
            "include=only specified keys, exclude=all except specified keys."
        ),
    )
    names: tuple[str, ...] = Field(
        default_factory=tuple,
        description="Keys used by include/exclude modes.",
    )

    @model_validator(mode="before")
    @classmethod
    def _normalize_names(cls, data: Any) -> Any:
        if not isinstance(data, dict):
            return data
        data = dict(data)
        names = data.get("names")
        if isinstance(names, str):
            data["names"] = (names,)
        elif isinstance(names, list):
            data["names"] = tuple(names)
        elif names is None:
            data["names"] = ()
        return data

    @model_validator(mode="after")
    def _validate_mode_names(self) -> Self:
        if self.mode == "all" and self.names:
            raise ValueError("keys.names must be empty when keys.mode='all'.")
        if self.mode in {"include", "exclude"} and not self.names:
            raise ValueError(
                "keys.names must be provided when keys.mode is include/exclude."
            )
        return self

ProtoTransform

Bases: ConfigBaseModel

Base class for transform definitions.

This class provides the foundation for data transformations, supporting both functional transformations and declarative mappings.

Attributes:

Name Type Description
name str | None

Optional name of the transform

module str | None

Optional module containing the transform function

params dict[str, Any]

Dictionary of transform parameters

foo str | None

Optional name of the transform function

input tuple[str, ...]

Tuple of input field names

output tuple[str, ...]

Tuple of output field names

dress DressConfig | None

Optional pivot dressing for scalar functional results

target Literal['values', 'keys']

Whether to transform field values or document keys

keys KeySelectionConfig

Key selection when target is keys

_foo Any

Internal reference to the transform function

Source code in graflo/architecture/contract/ingestion/transform.py
class ProtoTransform(ConfigBaseModel):
    """Base class for transform definitions.

    This class provides the foundation for data transformations, supporting both
    functional transformations and declarative mappings.

    Attributes:
        name: Optional name of the transform
        module: Optional module containing the transform function
        params: Dictionary of transform parameters
        foo: Optional name of the transform function
        input: Tuple of input field names
        output: Tuple of output field names
        dress: Optional pivot dressing for scalar functional results
        target: Whether to transform field values or document keys
        keys: Key selection when target is keys
        _foo: Internal reference to the transform function
    """

    name: str | None = Field(
        default=None,
        description="Optional name for this transform (e.g. for reference in ingestion_model.transforms).",
    )
    module: str | None = Field(
        default=None,
        description="Python module path containing the transform function (e.g. my_package.transforms).",
    )
    params: dict[str, Any] = Field(
        default_factory=dict,
        description="Extra parameters passed to the transform function at runtime.",
    )
    foo: str | None = Field(
        default=None,
        description="Name of the callable in module to use as the transform function.",
    )
    input: tuple[str, ...] = Field(
        default_factory=tuple,
        description="Input field names passed to the transform function.",
    )
    output: tuple[str, ...] = Field(
        default_factory=tuple,
        description="Output field names produced by the transform (defaults to input if unset).",
    )
    input_groups: tuple[tuple[str, ...], ...] = Field(
        default_factory=tuple,
        description=(
            "Explicit groups of input fields for repeated tuple-style value calls."
        ),
    )
    output_groups: tuple[tuple[str, ...], ...] = Field(
        default_factory=tuple,
        description=(
            "Explicit output field groups aligned with input_groups for grouped value calls."
        ),
    )
    dress: DressConfig | None = Field(
        default=None,
        description=(
            "Dressing spec for pivoted output. Applies to ingestion_model.transforms "
            "entries and to inline transform steps. "
            "dress.key receives the input field name, dress.value receives the "
            "function result. E.g. dress={key: name, value: value} with "
            "input=(Open,) produces {name: 'Open', value: <result>}."
        ),
    )
    target: Literal["values", "keys"] = Field(
        default="values",
        description=(
            "Transform target. values=apply function to input values; "
            "keys=apply function to selected document keys."
        ),
    )
    keys: KeySelectionConfig = Field(
        default_factory=KeySelectionConfig,
        description="Key selection for key-target transforms.",
    )

    _foo: Any = PrivateAttr(default=None)

    @model_validator(mode="before")
    @classmethod
    def _normalize_input_output(cls, data: Any) -> Any:
        if not isinstance(data, dict):
            return data
        data = dict(data)
        if "dress" in data and isinstance(data["dress"], (list, tuple)):
            raise ValueError(
                "List-style `dress` is no longer supported. "
                "Use a dict: dress={key: ..., value: ...}."
            )
        for key in ("input", "output"):
            if key in data:
                if data[key] is not None:
                    data[key] = _tuple_it(data[key])
                else:
                    data[key] = ()
        for key in ("input_groups", "output_groups"):
            if key in data:
                if data[key] is None:
                    data[key] = ()
                else:
                    data[key] = _tuple_groups_it(data[key])
        _normalize_keys_in_dict(data)
        return data

    @model_validator(mode="after")
    def _init_foo_and_output(self) -> Self:
        if self.module is not None and self.foo is not None:
            try:
                _module = importlib.import_module(self.module)
            except Exception as e:
                raise TypeError(f"Provided module {self.module} is not valid: {e}")
            try:
                object.__setattr__(self, "_foo", getattr(_module, self.foo))
            except Exception as e:
                raise ValueError(
                    f"Could not instantiate transform function. Exception: {e}"
                )
        if self.dress is not None:
            if self.target == "keys":
                raise ValueError("target='keys' is not compatible with dress.")
            object.__setattr__(self, "output", (self.dress.key, self.dress.value))
        elif not self.output and self.input:
            object.__setattr__(self, "output", self.input)
        return self

    @classmethod
    def get_fields_members(cls) -> list[str]:
        """Get list of field members (public model fields)."""
        return list(cls.model_fields.keys())

    def apply(self, *args: Any, **kwargs: Any) -> Any:
        """Apply the raw transform function to the given arguments.

        This is the core function invocation without any input extraction or
        output dressing — purely ``self._foo(*args, **kwargs, **self.params)``.

        Raises:
            TransformException: If no transform function has been set.
        """
        if self._foo is None:
            raise TransformException("No transform function set")
        return self._foo(*args, **kwargs, **self.params)

    def __lt__(self, other: object) -> bool:
        """Compare transforms for ordering.

        Args:
            other: Other transform to compare with

        Returns:
            bool: True if this transform should be ordered before other
        """
        if not isinstance(other, ProtoTransform):
            return NotImplemented
        if self._foo is None and other._foo is not None:
            return True
        return False

__lt__(other)

Compare transforms for ordering.

Parameters:

Name Type Description Default
other object

Other transform to compare with

required

Returns:

Name Type Description
bool bool

True if this transform should be ordered before other

Source code in graflo/architecture/contract/ingestion/transform.py
def __lt__(self, other: object) -> bool:
    """Compare transforms for ordering.

    Args:
        other: Other transform to compare with

    Returns:
        bool: True if this transform should be ordered before other
    """
    if not isinstance(other, ProtoTransform):
        return NotImplemented
    if self._foo is None and other._foo is not None:
        return True
    return False

apply(*args, **kwargs)

Apply the raw transform function to the given arguments.

This is the core function invocation without any input extraction or output dressing — purely self._foo(*args, **kwargs, **self.params).

Raises:

Type Description
TransformException

If no transform function has been set.

Source code in graflo/architecture/contract/ingestion/transform.py
def apply(self, *args: Any, **kwargs: Any) -> Any:
    """Apply the raw transform function to the given arguments.

    This is the core function invocation without any input extraction or
    output dressing — purely ``self._foo(*args, **kwargs, **self.params)``.

    Raises:
        TransformException: If no transform function has been set.
    """
    if self._foo is None:
        raise TransformException("No transform function set")
    return self._foo(*args, **kwargs, **self.params)

get_fields_members() classmethod

Get list of field members (public model fields).

Source code in graflo/architecture/contract/ingestion/transform.py
@classmethod
def get_fields_members(cls) -> list[str]:
    """Get list of field members (public model fields)."""
    return list(cls.model_fields.keys())

ResourceConfig

Bases: ConfigBaseModel

Declarative resource definition (serializable contract).

Source code in graflo/architecture/contract/ingestion/resource.py
class ResourceConfig(ConfigBaseModel):
    """Declarative resource definition (serializable contract)."""

    model_config = {"extra": "forbid"}

    name: str = PydanticField(
        ...,
        description="Name of the resource (e.g. table or file identifier).",
    )
    pipeline: list[dict[str, Any]] = PydanticField(
        ...,
        description="Pipeline of actor steps to apply in sequence (vertex, edge, transform, descend). "
        'Each step is a dict, e.g. {"vertex": "user"} or {"edge": {"from": "a", "to": "b"}}.',
        validation_alias=AliasChoices("pipeline", "apply"),
    )
    encoding: EncodingType = PydanticField(
        default=EncodingType.UTF_8,
        description="Character encoding for input/output (e.g. utf-8, ISO-8859-1).",
    )
    merge_collections: list[str] = PydanticField(
        default_factory=list,
        description="List of collection names to merge when writing to the graph.",
    )
    extra_weights: list[ResourceExtraWeightEntry] = PydanticField(
        default_factory=list,
        description="Additional edge attribute / vertex-weight enrichment for this resource.",
    )
    types: dict[str, str] = PydanticField(
        default_factory=dict,
        description='Field name to Python type expression for casting (e.g. {"amount": "float"}).',
    )
    infer_edges: bool = PydanticField(
        default=True,
        description=(
            "If True, infer edges from current vertex population. "
            "If False, emit only edges explicitly declared as edge actors in the pipeline."
        ),
    )
    infer_edge_only: list[EdgeInferSpec] = PydanticField(
        default_factory=list,
        description=(
            "Optional allow-list for inferred edges. Applies only to inferred (greedy) edges, "
            "not explicit edge actors."
        ),
    )
    infer_edge_except: list[EdgeInferSpec] = PydanticField(
        default_factory=list,
        description=(
            "Optional deny-list for inferred edges. Applies only to inferred (greedy) edges, "
            "not explicit edge actors."
        ),
    )
    drop_trivial_input_fields: bool = PydanticField(
        default=False,
        description=(
            "If True, remove top-level input keys whose value is None or the empty string before "
            "the actor pipeline runs."
        ),
    )
    fail_fast: bool = PydanticField(
        default=False,
        description=(
            "If True, a transform step fails when required input keys are missing in the "
            "current document (rename: all source keys must be present; call: all input keys). "
            "If False (default), rename applies only to keys present in the document and "
            "functional transforms skip the step when inputs are missing."
        ),
    )
    tolerate_transform_errors: bool = PydanticField(
        default=True,
        description=(
            "If True, a failing transform step sets its declared output fields to None, "
            "records the error, and continues the pipeline."
        ),
    )

    @model_validator(mode="after")
    def _validate_policy(self) -> ResourceConfig:
        if self.infer_edge_only and self.infer_edge_except:
            raise ValueError(
                "Resource infer_edge_only and infer_edge_except are mutually exclusive."
            )
        return self

    def collect_vertex_names(self) -> set[str]:
        """Vertex types referenced by this resource (pipeline and related config)."""
        names = collect_vertex_names_from_pipeline(self.pipeline)
        names.update(self.merge_collections)
        for spec in self.infer_edge_only:
            names.add(spec.source)
            names.add(spec.target)
        for spec in self.infer_edge_except:
            names.add(spec.source)
            names.add(spec.target)
        for entry in self.extra_weights:
            names.add(entry.edge.source)
            names.add(entry.edge.target)
            for weight in entry.vertex_weights:
                if weight.name is not None:
                    names.add(weight.name)
        return names

    def pipeline_actor_count(self) -> int:
        """Count actors in the pipeline without binding schema context."""
        from graflo.architecture.pipeline.runtime.actor import ActorWrapper

        return ActorWrapper(*self.pipeline).count()

collect_vertex_names()

Vertex types referenced by this resource (pipeline and related config).

Source code in graflo/architecture/contract/ingestion/resource.py
def collect_vertex_names(self) -> set[str]:
    """Vertex types referenced by this resource (pipeline and related config)."""
    names = collect_vertex_names_from_pipeline(self.pipeline)
    names.update(self.merge_collections)
    for spec in self.infer_edge_only:
        names.add(spec.source)
        names.add(spec.target)
    for spec in self.infer_edge_except:
        names.add(spec.source)
        names.add(spec.target)
    for entry in self.extra_weights:
        names.add(entry.edge.source)
        names.add(entry.edge.target)
        for weight in entry.vertex_weights:
            if weight.name is not None:
                names.add(weight.name)
    return names

pipeline_actor_count()

Count actors in the pipeline without binding schema context.

Source code in graflo/architecture/contract/ingestion/resource.py
def pipeline_actor_count(self) -> int:
    """Count actors in the pipeline without binding schema context."""
    from graflo.architecture.pipeline.runtime.actor import ActorWrapper

    return ActorWrapper(*self.pipeline).count()

ResourceExtraWeightEntry

Bases: ConfigBaseModel

Schema edge plus optional vertex-derived weight rules for DB enrichment.

Source code in graflo/architecture/contract/ingestion/resource.py
class ResourceExtraWeightEntry(ConfigBaseModel):
    """Schema edge plus optional vertex-derived weight rules for DB enrichment."""

    edge: Edge
    vertex_weights: list[Weight] = PydanticField(default_factory=list)

    @model_validator(mode="before")
    @classmethod
    def _from_yaml(cls, data: Any) -> Any:
        if data is None:
            return data
        if isinstance(data, Edge):
            return {"edge": data, "vertex_weights": []}
        if not isinstance(data, dict):
            raise TypeError(
                f"extra_weights item must be dict or Edge, got {type(data)}"
            )
        d = dict(data)
        vw_raw = d.pop("vertex_weights", None) or []
        if not isinstance(vw_raw, list):
            vw_raw = [vw_raw]
        v_w = [Weight.model_validate(x) for x in vw_raw]
        if "edge" in d and isinstance(d["edge"], dict):
            edge = Edge.model_validate(dict(d.pop("edge")))
            if d:
                raise ValueError(
                    f"extra_weights entry has unexpected keys with 'edge': {sorted(d)}"
                )
            return {"edge": edge, "vertex_weights": v_w}
        edge = Edge.model_validate(d)
        return {"edge": edge, "vertex_weights": v_w}

ResourceRuntime

Fully initialized resource executor for document casting.

Source code in graflo/architecture/contract/runtime/resource.py
class ResourceRuntime:
    """Fully initialized resource executor for document casting."""

    def __init__(
        self,
        config: ResourceConfig,
        vertex_config: VertexConfig,
        edge_config: EdgeConfig,
        transforms: dict[str, ProtoTransform],
        *,
        strict_references: bool = False,
        dynamic_edge_feedback: bool = False,
        allowed_vertex_names: set[str] | None = None,
        target_db_flavor: DBType | None = None,
    ) -> None:
        self.config = config
        self._type_casters = resolve_type_casters(config.types)
        self._root = ActorWrapper(*config.pipeline)
        self._executor = ActorExecutor(self._root)

        runtime_vertex_config, local_edge_config = self._filter_vertex_edge_configs(
            vertex_config,
            edge_config,
            allowed_vertex_names=allowed_vertex_names,
        )
        self._vertex_config = runtime_vertex_config
        self._edge_config = local_edge_config

        self._validate_vertex_references(vertex_config)
        self._validate_infer_edge_spec_targets(self._edge_config)

        edge_derivation_registry = EdgeDerivationRegistry()
        self._edge_derivation_registry = edge_derivation_registry

        infer_edge_except = self._build_infer_except()
        init_ctx = self._build_init_context(
            transforms=transforms,
            edge_derivation=edge_derivation_registry,
            infer_edge_except=infer_edge_except,
            strict_references=strict_references,
            allowed_vertex_names=allowed_vertex_names,
            target_db_flavor=target_db_flavor,
        )
        logger.debug("total resource actor count : %s", self._root.count())
        self._root.finish_init(init_ctx=init_ctx)

        if dynamic_edge_feedback:
            self._propagate_dynamic_edges(edge_config, vertex_config=vertex_config)

        logger.debug("total resource actor count (after init): %s", self._root.count())
        self._init_extra_weights(vertex_config)

    @property
    def name(self) -> str:
        return self.config.name

    @property
    def vertex_config(self) -> VertexConfig:
        return self._vertex_config

    @property
    def edge_config(self) -> EdgeConfig:
        return self._edge_config

    @property
    def root(self) -> ActorWrapper:
        return self._root

    @property
    def type_casters(self) -> dict[str, Callable[..., Any]]:
        return self._type_casters

    def collect_vertex_names(self) -> set[str]:
        return self.config.collect_vertex_names()

    def count(self) -> int:
        return self._root.count()

    @staticmethod
    def edge_ids_from_pipeline(pipeline: list[dict[str, Any]]) -> set[EdgeId]:
        """Collect (source, target, None) for every static EdgeActor in *pipeline*."""
        root = ActorWrapper(*pipeline)
        edge_actors = [a for a in root.collect_actors() if isinstance(a, EdgeActor)]
        return {
            (ea.edge.source, ea.edge.target, None)
            for ea in edge_actors
            if ea.edge is not None
        }

    def _filter_vertex_edge_configs(
        self,
        vertex_config: VertexConfig,
        edge_config: EdgeConfig,
        *,
        allowed_vertex_names: set[str] | None,
    ) -> tuple[VertexConfig, EdgeConfig]:
        runtime_vertex_config = filter_vertex_config_for_resource(
            vertex_config,
            resource_vertex_names=self.collect_vertex_names(),
            allowed_vertex_names=allowed_vertex_names,
        )
        local_edge_config = EdgeConfig.model_validate(
            edge_config.to_dict(skip_defaults=False)
        )
        return runtime_vertex_config, local_edge_config

    def _validate_vertex_references(self, vertex_config: VertexConfig) -> None:
        known_vertices = set(vertex_config.vertex_set)
        referenced_vertices: set[str] = set()

        for spec in self.config.infer_edge_only:
            referenced_vertices.add(spec.source)
            referenced_vertices.add(spec.target)
        for spec in self.config.infer_edge_except:
            referenced_vertices.add(spec.source)
            referenced_vertices.add(spec.target)
        for source, target, _ in self.edge_ids_from_pipeline(self.config.pipeline):
            referenced_vertices.add(source)
            referenced_vertices.add(target)

        missing_vertices = sorted(referenced_vertices - known_vertices)
        if missing_vertices:
            raise ValueError(
                "Resource dynamic edge references undefined vertices: "
                f"{missing_vertices}. "
                "Declare these vertices in vertex_config before using dynamic/inferred edges."
            )

    def _validate_infer_edge_spec_targets(self, edge_config: EdgeConfig) -> None:
        known_edge_ids = {edge_id for edge_id, _ in edge_config.items()}

        def _validate_list(field_name: str, specs: list[EdgeInferSpec]) -> None:
            unknown: list[EdgeId] = []
            for spec in specs:
                if not any(spec.matches(edge_id) for edge_id in known_edge_ids):
                    unknown.append(spec.edge_id)
            if unknown:
                raise ValueError(
                    f"Resource {field_name} contains unknown edge selectors: {unknown}"
                )

        _validate_list("infer_edge_only", self.config.infer_edge_only)
        _validate_list("infer_edge_except", self.config.infer_edge_except)

    def _build_infer_except(self) -> set[EdgeId]:
        infer_edge_except = {spec.edge_id for spec in self.config.infer_edge_except}
        if not self.config.infer_edge_only:
            infer_edge_except |= self.edge_ids_from_pipeline(self.config.pipeline)
        return infer_edge_except

    def _build_init_context(
        self,
        *,
        transforms: dict[str, ProtoTransform],
        edge_derivation: EdgeDerivationRegistry,
        infer_edge_except: set[EdgeId],
        strict_references: bool,
        allowed_vertex_names: set[str] | None,
        target_db_flavor: DBType | None,
    ) -> ActorInitContext:
        return ActorInitContext(
            vertex_config=self._vertex_config,
            edge_config=self._edge_config,
            transforms=transforms,
            edge_derivation=edge_derivation,
            allowed_vertex_names=allowed_vertex_names,
            infer_edges=self.config.infer_edges,
            infer_edge_only={spec.edge_id for spec in self.config.infer_edge_only},
            infer_edge_except=infer_edge_except,
            strict_references=strict_references,
            fail_fast=self.config.fail_fast,
            tolerate_transform_errors=self.config.tolerate_transform_errors,
            target_db_flavor=target_db_flavor,
        )

    def _propagate_dynamic_edges(
        self,
        edge_config: EdgeConfig,
        *,
        vertex_config: VertexConfig,
    ) -> None:
        baseline_edge_ids = {edge_id for edge_id, _ in edge_config.items()}
        for edge_id, edge in self._edge_config.items():
            if edge_id in baseline_edge_ids:
                continue
            edge_config.update_edges(
                edge.model_copy(deep=True), vertex_config=vertex_config
            )

    def _init_extra_weights(self, vertex_config: VertexConfig) -> None:
        reg = self._edge_derivation_registry
        for entry in self.config.extra_weights:
            entry.edge.finish_init(vertex_config)
            if reg is not None and entry.vertex_weights:
                reg.merge_vertex_weights(entry.edge.edge_id, entry.vertex_weights)

    def cast_document(self, doc: dict) -> ResourceCastResult:
        """Process a document and return entities plus any tolerated transform failures."""
        work_doc: dict[str, Any] = (
            strip_trivial_top_level_fields(doc)
            if self.config.drop_trivial_input_fields
            else dict(doc)
        )
        if self._type_casters:
            apply_type_casters(work_doc, self._type_casters)
        extraction_ctx = self._executor.extract(work_doc)
        result = self._executor.assemble_result(extraction_ctx)
        return ResourceCastResult(
            entities=result.entities,
            transform_failures=list(extraction_ctx.transform_failures),
        )

    def __call__(self, doc: dict) -> defaultdict[GraphEntity, list]:
        return self.cast_document(doc).entities

cast_document(doc)

Process a document and return entities plus any tolerated transform failures.

Source code in graflo/architecture/contract/runtime/resource.py
def cast_document(self, doc: dict) -> ResourceCastResult:
    """Process a document and return entities plus any tolerated transform failures."""
    work_doc: dict[str, Any] = (
        strip_trivial_top_level_fields(doc)
        if self.config.drop_trivial_input_fields
        else dict(doc)
    )
    if self._type_casters:
        apply_type_casters(work_doc, self._type_casters)
    extraction_ctx = self._executor.extract(work_doc)
    result = self._executor.assemble_result(extraction_ctx)
    return ResourceCastResult(
        entities=result.entities,
        transform_failures=list(extraction_ctx.transform_failures),
    )

edge_ids_from_pipeline(pipeline) staticmethod

Collect (source, target, None) for every static EdgeActor in pipeline.

Source code in graflo/architecture/contract/runtime/resource.py
@staticmethod
def edge_ids_from_pipeline(pipeline: list[dict[str, Any]]) -> set[EdgeId]:
    """Collect (source, target, None) for every static EdgeActor in *pipeline*."""
    root = ActorWrapper(*pipeline)
    edge_actors = [a for a in root.collect_actors() if isinstance(a, EdgeActor)]
    return {
        (ea.edge.source, ea.edge.target, None)
        for ea in edge_actors
        if ea.edge is not None
    }

Transform

Bases: ProtoTransform

Concrete transform implementation.

Wraps a ProtoTransform with input extraction, output dressing, field mapping, and transform composition.

Attributes:

Name Type Description
fields tuple[str, ...]

Tuple of fields to transform

rename dict[str, str]

Dictionary mapping input fields to output fields

functional_transform bool

Whether this is a functional transform

Source code in graflo/architecture/contract/ingestion/transform.py
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
class Transform(ProtoTransform):
    """Concrete transform implementation.

    Wraps a ProtoTransform with input extraction, output dressing, field
    mapping, and transform composition.

    Attributes:
        fields: Tuple of fields to transform
        rename: Dictionary mapping input fields to output fields
        functional_transform: Whether this is a functional transform
    """

    fields: tuple[str, ...] = Field(
        default_factory=tuple,
        description="Field names for declarative transform (used to derive input when input unset).",
    )
    rename: dict[str, str] = Field(
        default_factory=dict,
        description="Mapping of input_key -> output_key for pure field renaming (no function).",
    )
    strategy: Literal["single", "each", "all"] = Field(
        default="single",
        description=(
            "Functional call strategy. "
            "single: call function once with all input values. "
            "each: call function once per input field (unary). "
            "all: pass full document as a single argument."
        ),
    )
    passthrough_group_output: bool = Field(
        default=True,
        description=(
            "When grouped mode omits outputs, map function results back to input group keys."
        ),
    )

    functional_transform: bool = Field(
        default=False,
        description="True when a callable (module.foo) is set; False for pure map/dress transforms.",
    )

    @model_validator(mode="before")
    @classmethod
    def _normalize_fields(cls, data: Any) -> Any:
        if not isinstance(data, dict):
            return data
        data = dict(data)
        if "fields" in data and data["fields"] is not None:
            data["fields"] = _tuple_it(data["fields"])
        if "switch" in data:
            raise ValueError(
                "Legacy `switch` is no longer supported. Use `input` + `dress`."
            )
        return data

    @model_validator(mode="after")
    def _init_derived(self) -> Self:
        explicit_map = bool(self.rename)
        object.__setattr__(self, "functional_transform", self._foo is not None)
        next_input, next_output, _next_map = self._derive_effective_io_and_map()
        object.__setattr__(self, "input", next_input)
        object.__setattr__(self, "output", next_output)
        self._validate_configuration(explicit_map=explicit_map)
        return self

    def _derive_grouped_default_output(self) -> tuple[str, ...]:
        if not self.input_groups or self.output or self.output_groups:
            return self.output
        if not self.passthrough_group_output:
            return self.output
        scalar_names: list[str] = []
        for group in self.input_groups:
            if len(group) != 1:
                return self.output
            scalar_names.append(group[0])
        return tuple(scalar_names) if scalar_names else self.output

    def _derive_effective_io_and_map(
        self,
    ) -> tuple[tuple[str, ...], tuple[str, ...], dict[str, str]]:
        """Compute effective input/output/map once using explicit precedence."""
        next_input = self.input
        next_output = self._derive_grouped_default_output()
        next_map = dict(self.rename)

        if self.fields and not next_input:
            next_input = self.fields

        if next_map:
            if not next_input and not next_output:
                next_input = tuple(next_map.keys())
                next_output = tuple(next_map.values())
            elif not next_input:
                next_input = tuple(next_map.keys())
            elif not next_output:
                next_output = tuple(next_map.values())

        if self.dress is not None:
            next_output = (self.dress.key, self.dress.value)
        elif not next_output and next_input:
            next_output = next_input

        if (
            not next_map
            and next_input
            and next_output
            and len(next_input) == len(next_output)
        ):
            next_map = {src: dst for src, dst in zip(next_input, next_output)}

        return next_input, next_output, next_map

    def _init_io_from_map(self, force_init: bool = False) -> None:
        """Backwards-compatible shim; prefer sync_io_from_map()."""
        if not self.rename:
            return
        map_input = tuple(self.rename.keys())
        map_output = tuple(self.rename.values())
        if force_init or (not self.input and not self.output):
            object.__setattr__(self, "input", map_input)
            object.__setattr__(self, "output", map_output)
            return
        if not self.input:
            object.__setattr__(self, "input", map_input)
        elif not self.output:
            object.__setattr__(self, "output", map_output)

    def _validate_configuration(self, *, explicit_map: bool) -> None:
        """Validate that the transform has enough information to operate."""
        if self.target == "keys":
            if self.input_groups or self.output_groups:
                raise ValueError(
                    "target='keys' does not accept input_groups/output_groups."
                )
            if self._foo is None:
                raise ValueError("target='keys' requires a functional transform.")
            if self.rename:
                raise ValueError("target='keys' cannot be combined with map.")
            if self.input or self.output or self.fields:
                raise ValueError(
                    "target='keys' does not accept input/output/fields; use keys selector."
                )
            if self.dress is not None:
                raise ValueError("target='keys' is not compatible with dress.")
            if self.strategy != "single":
                raise ValueError(
                    "target='keys' uses implicit per-key execution and does not accept strategy."
                )
            return

        # Reject only user-specified map+function conflict. A derived map
        # (from input/output defaults) is valid for functional transforms.
        if explicit_map and self.rename and self._foo is not None:
            raise ValueError("map and functional transform cannot be used together.")
        if self.dress is not None:
            if len(self.input) != 1:
                raise ValueError("dress requires exactly one input field.")
        if self.strategy != "single" and self._foo is None:
            raise ValueError("strategy applies only to functional transforms.")
        if self.input_groups:
            if self._foo is None:
                raise ValueError(
                    "input_groups requires a functional transform (module + foo)."
                )
            if self.strategy != "single":
                raise ValueError(
                    "input_groups mode is explicit grouped execution and does not accept strategy."
                )
            if self.input or self.fields:
                raise ValueError("input_groups cannot be combined with input/fields.")
            if self.rename:
                raise ValueError("input_groups cannot be combined with map.")
            if self.dress is not None:
                raise ValueError("input_groups is not compatible with dress.")
            if self.output_groups and self.output:
                raise ValueError(
                    "Provide either output or output_groups for input_groups mode, not both."
                )
            if self.output_groups and len(self.output_groups) != len(self.input_groups):
                raise ValueError(
                    "output_groups must have same number of groups as input_groups."
                )
            if self.output and len(self.output) != len(self.input_groups):
                raise ValueError(
                    "When using input_groups with scalar outputs, output length must match number of input_groups."
                )
        elif self.output_groups:
            raise ValueError("output_groups requires input_groups.")
        if self._foo is not None and not self.input:
            if self.strategy != "all" and not self.input_groups:
                raise ValueError(
                    "Functional transforms require `input` (string or list of field names)."
                )
        if self.strategy == "all":
            if self.input or self.fields:
                raise ValueError("strategy='all' does not accept input/fields.")
            if self.dress is not None:
                raise ValueError("strategy='all' is not compatible with dress.")
        if self.strategy == "each":
            if not self.input:
                raise ValueError("strategy='each' requires one or more input fields.")
            if self.output and len(self.input) != len(self.output):
                raise ValueError(
                    "strategy='each' requires output length to match input length."
                )
        if (
            self._foo is None
            and self.dress is None
            and self.input
            and self.output
            and len(self.input) != len(self.output)
        ):
            raise ValueError(
                "Non-functional transforms require input/output to have the same length."
            )
        if (
            not self.input
            and not self.output
            and not self.input_groups
            and not self.output_groups
            and not self.name
            and not (self._foo is not None and self.strategy == "all")
        ):
            raise ValueError(
                "Either input/output, fields, map or name must be provided in "
                "Transform constructor."
            )

    def _refresh_derived(self) -> None:
        """Re-run derived input/output after mutating attributes (merge_from)."""
        if self.rename or not self.input or not self.output:
            return
        if len(self.input) != len(self.output):
            return

    def __call__(self, *nargs: Any, **kwargs: Any) -> dict[str, Any] | Any:
        """Execute the transform.

        Args:
            *nargs: Positional arguments for the transform
            **kwargs: Keyword arguments for the transform

        Returns:
            dict: Transformed data
        """
        if self.target == "keys":
            input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
            if input_doc is None:
                raise TransformException(
                    "target='keys' requires a document dictionary."
                )
            return self._transform_keys(input_doc, **kwargs)

        if self.input_groups:
            input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
            if input_doc is None:
                raise TransformException(
                    "input_groups transforms require a document dictionary."
                )
            return self._transform_input_groups(input_doc, **kwargs)

        if self.is_mapping:
            input_doc = nargs[0]
            if isinstance(input_doc, dict):
                if self.rename:
                    present = {
                        self.rename[src]: input_doc[src]
                        for src in self.rename
                        if src in input_doc
                    }
                    return present
                output_values = [input_doc[k] for k in self.input]
            else:
                output_values = list(nargs)
            if self.dress is not None and len(output_values) == 1:
                # Non-functional dress shorthand: keep scalar value.
                output_values = output_values[0]
        else:
            if self.strategy == "all":
                if nargs and isinstance(nargs[0], dict):
                    output_values = self.apply(nargs[0], **kwargs)
                else:
                    output_values = self.apply(*nargs, **kwargs)
            elif self.strategy == "each":
                if nargs and isinstance(input_doc := nargs[0], dict):
                    output_values = [
                        self.apply(input_doc[k], **kwargs) for k in self.input
                    ]
                else:
                    output_values = [self.apply(value, **kwargs) for value in nargs]
            else:
                if nargs and isinstance(input_doc := nargs[0], dict):
                    new_args = [input_doc[k] for k in self.input]
                    output_values = self.apply(*new_args, **kwargs)
                else:
                    output_values = self.apply(*nargs, **kwargs)

        if self.output:
            r = self._dress_as_dict(output_values)
        else:
            r = output_values
        return r

    def _apply_grouped_result(
        self,
        out: dict[str, Any],
        result: Any,
        input_group: tuple[str, ...],
        output_group: tuple[str, ...] | None,
        *,
        group_index: int,
    ) -> None:
        if output_group is not None:
            if isinstance(result, (list, tuple)):
                values = list(result)
            else:
                values = [result]
            if len(values) != len(output_group):
                raise TransformException(
                    f"input_groups[{group_index}] produced {len(values)} values, "
                    f"but output_groups[{group_index}] expects {len(output_group)}."
                )
            pairs = zip(output_group, values)
        elif self.output:
            pairs = ((self.output[group_index], result),)
        else:
            if isinstance(result, (list, tuple)):
                values = list(result)
                if len(values) != len(input_group):
                    raise TransformException(
                        f"input_groups[{group_index}] has {len(input_group)} fields, "
                        f"but transform returned {len(values)} values. "
                        "Provide output/output_groups explicitly to resolve mapping."
                    )
                pairs = zip(input_group, values)
            else:
                if len(input_group) != 1:
                    raise TransformException(
                        f"input_groups[{group_index}] has {len(input_group)} fields "
                        "but transform returned a scalar. "
                        "Provide output/output_groups explicitly for scalar group results."
                    )
                pairs = ((input_group[0], result),)
        for key, value in pairs:
            if key in out:
                raise TransformException(
                    f"Grouped transform produced duplicate output key '{key}'."
                )
            out[key] = value

    def _transform_input_groups(
        self, doc: dict[str, Any], **kwargs: Any
    ) -> dict[str, Any]:
        out: dict[str, Any] = {}
        for idx, input_group in enumerate(self.input_groups):
            values = [doc[k] for k in input_group]
            result = self.apply(*values, **kwargs)
            output_group = self.output_groups[idx] if self.output_groups else None
            self._apply_grouped_result(
                out,
                result,
                input_group,
                output_group,
                group_index=idx,
            )
        return out

    @property
    def is_mapping(self) -> bool:
        """True when the transform is pure mapping (no function)."""
        return self._foo is None

    def planned_output_field_names(
        self, doc: dict[str, Any] | None = None
    ) -> tuple[str, ...]:
        """Return output field names this transform would write on success."""
        if self.target == "keys":
            if doc is None:
                return ()
            return tuple(sorted(self._selected_keys(doc)))

        if self.input_groups:
            if self.output_groups:
                names: list[str] = []
                for group in self.output_groups:
                    names.extend(group)
                return tuple(dict.fromkeys(names))
            if self.output:
                return self.output
            scalar_names: list[str] = []
            for group in self.input_groups:
                if len(group) != 1:
                    return ()
                scalar_names.append(group[0])
            return tuple(scalar_names)

        if self.dress is not None:
            return (self.dress.key, self.dress.value)

        if self.rename:
            if doc is None:
                return tuple(self.rename.values())
            return tuple(self.rename[src] for src in self.rename if src in doc)

        if self.output:
            return self.output

        return ()

    def _dress_as_dict(self, transform_result: Any) -> dict[str, Any]:
        """Convert transform result to dictionary format.

        When ``dress`` is set the result is pivoted: the input field name is
        stored under ``dress.key`` and the function result under ``dress.value``.
        Otherwise the result is mapped positionally to ``output`` fields.
        """
        if self.dress is not None:
            return {
                self.dress.key: self.input[0],
                self.dress.value: transform_result,
            }
        elif isinstance(transform_result, (list, tuple)):
            return {k: v for k, v in zip(self.output, transform_result)}
        else:
            return {self.output[-1]: transform_result}

    def _selected_keys(self, doc: dict[str, Any]) -> set[str]:
        if self.keys.mode == "all":
            return set(doc.keys())
        selected = set(self.keys.names)
        if self.keys.mode == "include":
            return selected
        return {k for k in doc if k not in selected}

    def _transform_keys(self, doc: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
        selected = self._selected_keys(doc)
        out: dict[str, Any] = {}
        for key, value in doc.items():
            new_key = self.apply(key, **kwargs) if key in selected else key
            if not isinstance(new_key, str):
                raise TransformException(
                    "Key transform functions must return str values."
                )
            if new_key in out:
                raise TransformException(
                    f"Key transform collision detected for key '{new_key}'."
                )
            out[new_key] = value
        return out

    @property
    def is_dummy(self) -> bool:
        """Check if this is a dummy transform.

        Returns:
            bool: True if this is a dummy transform
        """
        return self.name is not None and not self.rename and self._foo is None

    def merge_from(self, t: Transform) -> Transform:
        """Merge another transform's configuration into a copy of it.

        Returns a new Transform with values from self overriding t where set.
        Does not override ConfigBaseModel.update (in-place); use this for
        copy-and-merge semantics.

        Args:
            t: Transform to merge from

        Returns:
            Transform: New transform with merged configuration
        """
        t_copy = deepcopy(t)
        if self.input:
            t_copy.input = self.input
        if self.output:
            t_copy.output = self.output
        if self.params:
            t_copy.params = {**t_copy.params, **self.params}
        t_copy._refresh_derived()
        return t_copy

    def get_barebone(
        self, other: Transform | None
    ) -> tuple[Transform | None, Transform | None]:
        """Get the barebone transform configuration.

        Args:
            other: Optional transform to use as base

        Returns:
            tuple[Transform | None, Transform | None]: Updated self transform
            and transform to store in library
        """
        self_param = self.to_dict(exclude_defaults=True)
        if self.foo is not None:
            # self will be the lib transform
            return None, self
        elif other is not None and other.foo is not None:
            # init self from other
            self_param.pop("foo", None)
            self_param.pop("module", None)
            other_param = other.to_dict(exclude_defaults=True)
            other_param.update(self_param)
            return Transform(**other_param), None
        else:
            return None, None

is_dummy property

Check if this is a dummy transform.

Returns:

Name Type Description
bool bool

True if this is a dummy transform

is_mapping property

True when the transform is pure mapping (no function).

__call__(*nargs, **kwargs)

Execute the transform.

Parameters:

Name Type Description Default
*nargs Any

Positional arguments for the transform

()
**kwargs Any

Keyword arguments for the transform

{}

Returns:

Name Type Description
dict dict[str, Any] | Any

Transformed data

Source code in graflo/architecture/contract/ingestion/transform.py
def __call__(self, *nargs: Any, **kwargs: Any) -> dict[str, Any] | Any:
    """Execute the transform.

    Args:
        *nargs: Positional arguments for the transform
        **kwargs: Keyword arguments for the transform

    Returns:
        dict: Transformed data
    """
    if self.target == "keys":
        input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
        if input_doc is None:
            raise TransformException(
                "target='keys' requires a document dictionary."
            )
        return self._transform_keys(input_doc, **kwargs)

    if self.input_groups:
        input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
        if input_doc is None:
            raise TransformException(
                "input_groups transforms require a document dictionary."
            )
        return self._transform_input_groups(input_doc, **kwargs)

    if self.is_mapping:
        input_doc = nargs[0]
        if isinstance(input_doc, dict):
            if self.rename:
                present = {
                    self.rename[src]: input_doc[src]
                    for src in self.rename
                    if src in input_doc
                }
                return present
            output_values = [input_doc[k] for k in self.input]
        else:
            output_values = list(nargs)
        if self.dress is not None and len(output_values) == 1:
            # Non-functional dress shorthand: keep scalar value.
            output_values = output_values[0]
    else:
        if self.strategy == "all":
            if nargs and isinstance(nargs[0], dict):
                output_values = self.apply(nargs[0], **kwargs)
            else:
                output_values = self.apply(*nargs, **kwargs)
        elif self.strategy == "each":
            if nargs and isinstance(input_doc := nargs[0], dict):
                output_values = [
                    self.apply(input_doc[k], **kwargs) for k in self.input
                ]
            else:
                output_values = [self.apply(value, **kwargs) for value in nargs]
        else:
            if nargs and isinstance(input_doc := nargs[0], dict):
                new_args = [input_doc[k] for k in self.input]
                output_values = self.apply(*new_args, **kwargs)
            else:
                output_values = self.apply(*nargs, **kwargs)

    if self.output:
        r = self._dress_as_dict(output_values)
    else:
        r = output_values
    return r

get_barebone(other)

Get the barebone transform configuration.

Parameters:

Name Type Description Default
other Transform | None

Optional transform to use as base

required

Returns:

Type Description
Transform | None

tuple[Transform | None, Transform | None]: Updated self transform

Transform | None

and transform to store in library

Source code in graflo/architecture/contract/ingestion/transform.py
def get_barebone(
    self, other: Transform | None
) -> tuple[Transform | None, Transform | None]:
    """Get the barebone transform configuration.

    Args:
        other: Optional transform to use as base

    Returns:
        tuple[Transform | None, Transform | None]: Updated self transform
        and transform to store in library
    """
    self_param = self.to_dict(exclude_defaults=True)
    if self.foo is not None:
        # self will be the lib transform
        return None, self
    elif other is not None and other.foo is not None:
        # init self from other
        self_param.pop("foo", None)
        self_param.pop("module", None)
        other_param = other.to_dict(exclude_defaults=True)
        other_param.update(self_param)
        return Transform(**other_param), None
    else:
        return None, None

merge_from(t)

Merge another transform's configuration into a copy of it.

Returns a new Transform with values from self overriding t where set. Does not override ConfigBaseModel.update (in-place); use this for copy-and-merge semantics.

Parameters:

Name Type Description Default
t Transform

Transform to merge from

required

Returns:

Name Type Description
Transform Transform

New transform with merged configuration

Source code in graflo/architecture/contract/ingestion/transform.py
def merge_from(self, t: Transform) -> Transform:
    """Merge another transform's configuration into a copy of it.

    Returns a new Transform with values from self overriding t where set.
    Does not override ConfigBaseModel.update (in-place); use this for
    copy-and-merge semantics.

    Args:
        t: Transform to merge from

    Returns:
        Transform: New transform with merged configuration
    """
    t_copy = deepcopy(t)
    if self.input:
        t_copy.input = self.input
    if self.output:
        t_copy.output = self.output
    if self.params:
        t_copy.params = {**t_copy.params, **self.params}
    t_copy._refresh_derived()
    return t_copy

planned_output_field_names(doc=None)

Return output field names this transform would write on success.

Source code in graflo/architecture/contract/ingestion/transform.py
def planned_output_field_names(
    self, doc: dict[str, Any] | None = None
) -> tuple[str, ...]:
    """Return output field names this transform would write on success."""
    if self.target == "keys":
        if doc is None:
            return ()
        return tuple(sorted(self._selected_keys(doc)))

    if self.input_groups:
        if self.output_groups:
            names: list[str] = []
            for group in self.output_groups:
                names.extend(group)
            return tuple(dict.fromkeys(names))
        if self.output:
            return self.output
        scalar_names: list[str] = []
        for group in self.input_groups:
            if len(group) != 1:
                return ()
            scalar_names.append(group[0])
        return tuple(scalar_names)

    if self.dress is not None:
        return (self.dress.key, self.dress.value)

    if self.rename:
        if doc is None:
            return tuple(self.rename.values())
        return tuple(self.rename[src] for src in self.rename if src in doc)

    if self.output:
        return self.output

    return ()

TransformException

Bases: Exception

Base exception for transform-related errors.

Source code in graflo/architecture/contract/ingestion/transform.py
class TransformException(Exception):
    """Base exception for transform-related errors."""

    pass

build_resource_runtime(config, vertex_config, edge_config, transforms=None, *, strict_references=False, dynamic_edge_feedback=False, allowed_vertex_names=None, target_db_flavor=None)

Construct a fully initialized :class:ResourceRuntime from declarative config.

Source code in graflo/architecture/contract/runtime/resource.py
def build_resource_runtime(
    config: ResourceConfig,
    vertex_config: VertexConfig,
    edge_config: EdgeConfig,
    transforms: dict[str, ProtoTransform] | None = None,
    *,
    strict_references: bool = False,
    dynamic_edge_feedback: bool = False,
    allowed_vertex_names: set[str] | None = None,
    target_db_flavor: DBType | None = None,
) -> ResourceRuntime:
    """Construct a fully initialized :class:`ResourceRuntime` from declarative config."""
    return ResourceRuntime(
        config,
        vertex_config,
        edge_config,
        transforms or {},
        strict_references=strict_references,
        dynamic_edge_feedback=dynamic_edge_feedback,
        allowed_vertex_names=allowed_vertex_names,
        target_db_flavor=target_db_flavor,
    )

collect_vertex_names_from_pipeline(steps)

Collect vertex names referenced by pipeline steps (including nested descend).

Source code in graflo/architecture/contract/ingestion/resource.py
def collect_vertex_names_from_pipeline(steps: list[Any]) -> set[str]:
    """Collect vertex names referenced by pipeline steps (including nested descend)."""
    names: set[str] = set()
    for step in steps:
        if not isinstance(step, dict):
            continue
        normalized = normalize_actor_step(dict(step))
        step_type = normalized.get("type")
        if step_type == "vertex" and isinstance(normalized.get("vertex"), str):
            names.add(normalized["vertex"])
        elif step_type == "vertex_router":
            type_map = normalized.get("type_map")
            if isinstance(type_map, dict):
                for value in type_map.values():
                    if isinstance(value, str):
                        names.add(value)
            vertex_from_map = normalized.get("vertex_from_map")
            if isinstance(vertex_from_map, dict):
                for key in vertex_from_map:
                    if isinstance(key, str):
                        names.add(key)
        elif step_type == "edge":
            source = normalized.get("source") or normalized.get("from")
            target = normalized.get("target") or normalized.get("to")
            if isinstance(source, str):
                names.add(source)
            if isinstance(target, str):
                names.add(target)
            vertex_weights = normalized.get("vertex_weights")
            if isinstance(vertex_weights, list):
                for weight in vertex_weights:
                    if isinstance(weight, dict) and isinstance(weight.get("name"), str):
                        names.add(weight["name"])
        elif step_type == "descend":
            sub_pipeline = normalized.get("pipeline")
            if isinstance(sub_pipeline, list):
                names |= collect_vertex_names_from_pipeline(sub_pipeline)
    return names

filter_vertex_config_for_resource(vertex_config, *, resource_vertex_names, allowed_vertex_names)

Derive a filtered VertexConfig for runtime actor execution.

Source code in graflo/architecture/contract/runtime/resource.py
def filter_vertex_config_for_resource(
    vertex_config: VertexConfig,
    *,
    resource_vertex_names: set[str],
    allowed_vertex_names: set[str] | None,
) -> VertexConfig:
    """Derive a filtered VertexConfig for runtime actor execution."""
    if resource_vertex_names:
        effective = set(resource_vertex_names)
        if allowed_vertex_names is not None:
            effective &= allowed_vertex_names
    elif allowed_vertex_names is not None:
        effective = set(allowed_vertex_names)
    else:
        return vertex_config
    filtered_vertices = [v for v in vertex_config.vertices if v.name in effective]
    filtered_force_types = {
        name: types
        for name, types in vertex_config.force_types.items()
        if name in effective
    }
    return VertexConfig(
        vertices=filtered_vertices,
        force_types=filtered_force_types,
        identity_from_all_properties=vertex_config.identity_from_all_properties,
    )

resolve_type_caster(name)

Resolve a type caster by name from a strict allowlist.

Source code in graflo/util/casting.py
def resolve_type_caster(name: str) -> Callable[..., Any] | None:
    """Resolve a type caster by name from a strict allowlist."""
    if not isinstance(name, str):
        return None
    candidate = SAFE_TYPE_CASTERS.get(name)
    if candidate is not None:
        return candidate
    if "." in name:
        module_name, attr_name = name.split(".", 1)
        if module_name == "builtins":
            builtin_attr = getattr(builtins, attr_name, None)
            if callable(builtin_attr) and attr_name in SAFE_TYPE_CASTERS:
                return SAFE_TYPE_CASTERS[attr_name]
    return None

strip_trivial_top_level_fields(doc)

Return a shallow copy of doc without None or empty-string values.

Source code in graflo/architecture/contract/runtime/resource.py
def strip_trivial_top_level_fields(doc: dict[str, Any]) -> dict[str, Any]:
    """Return a shallow copy of *doc* without None or empty-string values."""
    return {k: v for k, v in doc.items() if v is not None and v != ""}