`graflo.architecture.contract`¶

Declarative contracts: manifest, bindings, ingestion models, resources, transforms.

`Bindings` ¶

Bases: ConfigBaseModel

Named resource connectors with explicit resource linkage.

Source code in graflo/architecture/contract/bindings/core.py

class Bindings(ConfigBaseModel):
    """Named resource connectors with explicit resource linkage."""

    connectors: list[FileConnector | TableConnector | SparqlConnector] = Field(
        default_factory=list
    )
    # Accept dict entries at init-time (see validators below).
    # Internally and at runtime, Graflo uses typed lists derived from these.
    resource_connector: list[ResourceConnectorBinding | dict[str, str]] = Field(
        default_factory=list
    )
    # Connector -> runtime endpoint config indirection (proxy by name).
    connector_connection: list[ConnectorConnectionBinding | dict[str, str]] = Field(
        default_factory=list
    )
    _resource_connector_typed: list[ResourceConnectorBinding] = PrivateAttr(
        default_factory=list
    )
    _connector_connection_typed: list[ConnectorConnectionBinding] = PrivateAttr(
        default_factory=list
    )
    _connectors_index: dict[str, ResourceConnector] = PrivateAttr(default_factory=dict)
    _connectors_name_index: dict[str, str] = PrivateAttr(default_factory=dict)
    _resource_to_connector_hash: dict[str, str] = PrivateAttr(default_factory=dict)
    _connector_to_conn_proxy: dict[str, str] = PrivateAttr(default_factory=dict)

    @property
    def connector_connection_bindings(
        self,
    ) -> list[ConnectorConnectionBinding]:
        # Expose typed entries for downstream components (type-checker friendly).
        return self._connector_connection_typed

    def _rebuild_indexes(self) -> None:
        self._connectors_index = {}
        self._connectors_name_index = {}
        for connector in self.connectors:
            existing = self._connectors_index.get(connector.hash)
            if existing is not None:
                raise ValueError(
                    "Connector hash collision detected for connectors "
                    f"'{type(existing).__name__}' and '{type(connector).__name__}' "
                    f"(hash='{connector.hash}')."
                )
            self._connectors_index[connector.hash] = connector

            if connector.name:
                existing_hash = self._connectors_name_index.get(connector.name)
                if existing_hash is not None and existing_hash != connector.hash:
                    raise ValueError(
                        "Connector names must be unique when provided. "
                        f"Duplicate connector name '{connector.name}'."
                    )
                self._connectors_name_index[connector.name] = connector.hash

    @field_validator("resource_connector", mode="before")
    @classmethod
    def _coerce_resource_connector_entries(
        cls, v: Any
    ) -> list[ResourceConnectorBinding]:
        if v is None:
            return []
        if not isinstance(v, list):
            raise ValueError(
                "resource_connector must be a list of {resource, connector} entries"
            )

        coerced: list[ResourceConnectorBinding] = []
        for i, item in enumerate(v):
            if isinstance(item, ResourceConnectorBinding):
                coerced.append(item)
                continue

            if isinstance(item, dict):
                missing = [k for k in ("resource", "connector") if k not in item]
                if missing:
                    raise ValueError(
                        f"Invalid resource_connector entry at index {i}: missing required keys {missing}. "
                        "Expected keys: ['resource', 'connector']."
                    )

                try:
                    coerced.append(ResourceConnectorBinding.model_validate(item))
                except Exception as e:  # noqa: BLE001
                    # Keep the message concise and contextual; nested pydantic
                    # errors can be noisy for config authors.
                    raise ValueError(
                        f"Invalid resource_connector entry at index {i}: {item!r}."
                    ) from e
                continue

            raise ValueError(
                f"Invalid resource_connector entry at index {i}: expected dict or "
                f"ResourceConnectorBinding, got {type(item).__name__}."
            )

        return coerced

    @field_validator("connector_connection", mode="before")
    @classmethod
    def _coerce_connector_connection_entries(
        cls, v: Any
    ) -> list[ConnectorConnectionBinding]:
        if v is None:
            return []
        if not isinstance(v, list):
            raise ValueError(
                "connector_connection must be a list of {connector, conn_proxy} entries"
            )

        coerced: list[ConnectorConnectionBinding] = []
        for i, item in enumerate(v):
            if isinstance(item, ConnectorConnectionBinding):
                coerced.append(item)
                continue

            if isinstance(item, dict):
                missing = [k for k in ("connector", "conn_proxy") if k not in item]
                if missing:
                    raise ValueError(
                        f"Invalid connector_connection entry at index {i}: missing required keys {missing}. "
                        "Expected keys: ['connector', 'conn_proxy']."
                    )
                try:
                    coerced.append(ConnectorConnectionBinding.model_validate(item))
                except Exception as e:  # noqa: BLE001
                    raise ValueError(
                        f"Invalid connector_connection entry at index {i}: {item!r}."
                    ) from e
                continue

            raise ValueError(
                f"Invalid connector_connection entry at index {i}: expected dict or "
                f"ConnectorConnectionBinding, got {type(item).__name__}."
            )

        return coerced

    @staticmethod
    def default_connector_name(connector: ResourceConnector) -> str:
        if connector.name:
            return connector.name
        if isinstance(connector, FileConnector):
            return connector.regex or str(connector.sub_path)
        if isinstance(connector, TableConnector):
            return connector.table_name
        if isinstance(connector, SparqlConnector):
            return connector.rdf_class
        raise TypeError(f"Unsupported connector type: {type(connector)!r}")

    @model_validator(mode="after")
    def _populate_resource_connector(self) -> Self:
        self._rebuild_indexes()
        self._resource_to_connector_hash = {}

        # Create typed views so internal code never has to handle dicts.
        self._resource_connector_typed = [
            ResourceConnectorBinding.model_validate(m) if isinstance(m, dict) else m
            for m in self.resource_connector
        ]
        self._connector_connection_typed = [
            ConnectorConnectionBinding.model_validate(m) if isinstance(m, dict) else m
            for m in self.connector_connection
        ]

        for connector in self.connectors:
            if connector.resource_name is None:
                continue
            existing_hash = self._resource_to_connector_hash.get(
                connector.resource_name
            )
            if existing_hash is not None and existing_hash != connector.hash:
                raise ValueError(
                    "Conflicting resource binding for resource "
                    f"'{connector.resource_name}'."
                )
            self._resource_to_connector_hash[connector.resource_name] = connector.hash

        for mapping in self._resource_connector_typed:
            connector_hash = self._connectors_name_index.get(mapping.connector)
            if connector_hash is None:
                raise ValueError(
                    f"resource_connector references unknown connector '{mapping.connector}' "
                    f"for resource '{mapping.resource}'."
                )
            existing_hash = self._resource_to_connector_hash.get(mapping.resource)
            if existing_hash is not None and existing_hash != connector_hash:
                raise ValueError(
                    f"Conflicting resource binding for resource '{mapping.resource}'."
                )
            self._resource_to_connector_hash[mapping.resource] = connector_hash
        self._rebuild_connector_to_conn_proxy()
        return self

    def _resolve_connector_ref_to_hash(self, connector_ref: str) -> str:
        """Resolve a connector reference to its canonical connector hash.

        The contract allows referencing either:
        - ``connector.hash`` (canonical internal id), or
        - ``connector.name`` (when a name is provided / auto-filled).
        - ``resource_name`` (alias when ``connector.name`` is omitted in manifests).
        """
        if connector_ref in self._connectors_index:
            return connector_ref
        resolved_hash = self._connectors_name_index.get(connector_ref)
        if resolved_hash is None:
            resolved_hash = self._resource_to_connector_hash.get(connector_ref)
        if resolved_hash is None:
            raise ValueError(f"Unknown connector reference '{connector_ref}'")
        return resolved_hash

    def _rebuild_connector_to_conn_proxy(self) -> None:
        self._connector_to_conn_proxy = {}
        for mapping in self._connector_connection_typed:
            connector_hash = self._resolve_connector_ref_to_hash(mapping.connector)
            existing = self._connector_to_conn_proxy.get(connector_hash)
            if existing is not None and existing != mapping.conn_proxy:
                raise ValueError(
                    "Conflicting conn_proxy mapping for connector "
                    f"'{connector_hash}' (existing='{existing}', new='{mapping.conn_proxy}')."
                )
            self._connector_to_conn_proxy[connector_hash] = mapping.conn_proxy

    def get_conn_proxy_for_connector(
        self, connector: TableConnector | FileConnector | SparqlConnector
    ) -> str | None:
        """Return the mapped runtime proxy name for a given connector."""
        return self._connector_to_conn_proxy.get(connector.hash)

    def bind_connector_to_conn_proxy(
        self,
        connector: TableConnector | FileConnector | SparqlConnector,
        conn_proxy: str,
    ) -> None:
        """Bind a connector to a non-secret runtime proxy name.

        Uses ``connector.name`` when available, falling back to ``connector.hash``.
        """
        # Ensure indexes include the connector and that a default name is set.
        if connector.hash not in self._connectors_index:
            self.add_connector(connector)
        # Pick a contract reference string that's stable and user-friendly.
        connector_ref = connector.name or connector.hash

        # Ensure uniqueness by connector.hash (not by ref-string).
        connector_hash = connector.hash
        existing_idx: int | None = None
        for i, m in enumerate(self._connector_connection_typed):
            try:
                if self._resolve_connector_ref_to_hash(m.connector) == connector_hash:
                    existing_idx = i
                    break
            except ValueError:
                continue

        if existing_idx is None:
            self._connector_connection_typed.append(
                ConnectorConnectionBinding(
                    connector=connector_ref, conn_proxy=conn_proxy
                )
            )
        else:
            self._connector_connection_typed[existing_idx] = ConnectorConnectionBinding(
                connector=connector_ref, conn_proxy=conn_proxy
            )
        # Keep the public contract field in sync for serialization / downstream.
        self.connector_connection = list(self._connector_connection_typed)

        self._rebuild_connector_to_conn_proxy()

    @classmethod
    def from_dict(cls, data: dict[str, Any] | list[Any]) -> Self:
        if isinstance(data, list):
            raise ValueError(
                "Bindings.from_dict expects a mapping with 'connectors' and optional "
                "'resource_connector'. List-style connector payloads are not supported."
            )
        legacy_keys = {
            "postgres_connections",
            "table_connectors",
            "file_connectors",
            "sparql_connectors",
        }
        found_legacy = sorted(k for k in legacy_keys if k in data)
        if found_legacy:
            raise ValueError(
                "Legacy Bindings init keys are not supported. "
                f"Unsupported keys: {', '.join(found_legacy)}."
            )
        return cls.model_validate(data)

    def add_connector(
        self,
        connector: TableConnector | FileConnector | SparqlConnector,
    ) -> None:
        if connector.name is None:
            object.__setattr__(
                connector, "name", self.default_connector_name(connector)
            )
        existing_name_hash = None
        if connector.name:
            existing_name_hash = self._connectors_name_index.get(connector.name)
        if (
            connector.name
            and existing_name_hash is not None
            and existing_name_hash != connector.hash
        ):
            raise ValueError(
                "Connector names must be unique when provided. "
                f"Duplicate connector name '{connector.name}'."
            )

        if connector.hash in self._connectors_index:
            old_connector = self._connectors_index[connector.hash]
            for idx, existing in enumerate(self.connectors):
                if existing is old_connector:
                    self.connectors[idx] = connector
                    break
        else:
            self.connectors.append(connector)
        self._rebuild_indexes()
        if connector.resource_name is not None:
            existing_hash = self._resource_to_connector_hash.get(
                connector.resource_name
            )
            if existing_hash is not None and existing_hash != connector.hash:
                raise ValueError(
                    "Conflicting resource binding for resource "
                    f"'{connector.resource_name}'."
                )
            self._resource_to_connector_hash[connector.resource_name] = connector.hash

    def bind_resource(
        self,
        resource_name: str,
        connector: TableConnector | FileConnector | SparqlConnector,
    ) -> None:
        if connector.hash not in self._connectors_index:
            raise KeyError(f"Connector not found for hash='{connector.hash}'")
        self._resource_to_connector_hash[resource_name] = connector.hash
        connector_name = connector.name or self.default_connector_name(connector)
        mapping_idx = None
        for idx, mapping in enumerate(self._resource_connector_typed):
            if mapping.resource == resource_name:
                mapping_idx = idx
                break
        new_mapping = ResourceConnectorBinding(
            resource=resource_name,
            connector=connector_name,
        )
        if mapping_idx is None:
            self._resource_connector_typed.append(new_mapping)
        else:
            self._resource_connector_typed[mapping_idx] = new_mapping
        # Keep the public contract field in sync for serialization / downstream.
        self.resource_connector = list(self._resource_connector_typed)

    def get_connector_for_resource(
        self, resource_name: str
    ) -> TableConnector | FileConnector | SparqlConnector | None:
        connector_hash = self._resource_to_connector_hash.get(resource_name)
        if connector_hash is None:
            return None
        connector = self._connectors_index.get(connector_hash)
        if isinstance(connector, (TableConnector, FileConnector, SparqlConnector)):
            return connector
        return None

    def get_resource_type(self, resource_name: str) -> ResourceType | None:
        connector = self.get_connector_for_resource(resource_name)
        if connector is None:
            return None
        return connector.get_resource_type()

    def get_table_info(self, resource_name: str) -> tuple[str, str | None] | None:
        connector = self.get_connector_for_resource(resource_name)
        if isinstance(connector, TableConnector):
            return (connector.table_name, connector.schema_name)
        return None

`bind_connector_to_conn_proxy(connector, conn_proxy)` ¶

Bind a connector to a non-secret runtime proxy name.

Uses connector.name when available, falling back to connector.hash.

Source code in graflo/architecture/contract/bindings/core.py

def bind_connector_to_conn_proxy(
    self,
    connector: TableConnector | FileConnector | SparqlConnector,
    conn_proxy: str,
) -> None:
    """Bind a connector to a non-secret runtime proxy name.

    Uses ``connector.name`` when available, falling back to ``connector.hash``.
    """
    # Ensure indexes include the connector and that a default name is set.
    if connector.hash not in self._connectors_index:
        self.add_connector(connector)
    # Pick a contract reference string that's stable and user-friendly.
    connector_ref = connector.name or connector.hash

    # Ensure uniqueness by connector.hash (not by ref-string).
    connector_hash = connector.hash
    existing_idx: int | None = None
    for i, m in enumerate(self._connector_connection_typed):
        try:
            if self._resolve_connector_ref_to_hash(m.connector) == connector_hash:
                existing_idx = i
                break
        except ValueError:
            continue

    if existing_idx is None:
        self._connector_connection_typed.append(
            ConnectorConnectionBinding(
                connector=connector_ref, conn_proxy=conn_proxy
            )
        )
    else:
        self._connector_connection_typed[existing_idx] = ConnectorConnectionBinding(
            connector=connector_ref, conn_proxy=conn_proxy
        )
    # Keep the public contract field in sync for serialization / downstream.
    self.connector_connection = list(self._connector_connection_typed)

    self._rebuild_connector_to_conn_proxy()

`get_conn_proxy_for_connector(connector)` ¶

Return the mapped runtime proxy name for a given connector.

Source code in graflo/architecture/contract/bindings/core.py

def get_conn_proxy_for_connector(
    self, connector: TableConnector | FileConnector | SparqlConnector
) -> str | None:
    """Return the mapped runtime proxy name for a given connector."""
    return self._connector_to_conn_proxy.get(connector.hash)

`FileConnector` ¶

Bases: ResourceConnector

Connector for matching files.

Attributes:

Name	Type	Description
`regex`	`str \| None`	Regular expression pattern for matching filenames
`sub_path`	`Path`	Path to search for matching files (default: "./")
`date_field`	`str \| None`	Name of the date field to filter on (for date-based filtering)
`date_filter`	`str \| None`	SQL-style date filter condition (e.g., "> '2020-10-10'")
`date_range_start`	`str \| None`	Start date for range filtering (e.g., "2015-11-11")
`date_range_days`	`int \| None`	Number of days after start date (used with date_range_start)

Source code in graflo/architecture/contract/bindings/connectors.py

class FileConnector(ResourceConnector):
    """Connector for matching files.

    Attributes:
        regex: Regular expression pattern for matching filenames
        sub_path: Path to search for matching files (default: "./")
        date_field: Name of the date field to filter on (for date-based filtering)
        date_filter: SQL-style date filter condition (e.g., "> '2020-10-10'")
        date_range_start: Start date for range filtering (e.g., "2015-11-11")
        date_range_days: Number of days after start date (used with date_range_start)
    """

    regex: str | None = None
    sub_path: pathlib.Path = Field(default_factory=lambda: pathlib.Path("./"))
    date_field: str | None = None
    date_filter: str | None = None
    date_range_start: str | None = None
    date_range_days: int | None = None

    @model_validator(mode="after")
    def _validate_file_connector(self) -> Self:
        """Ensure sub_path is a Path and validate date filtering parameters."""
        if not isinstance(self.sub_path, pathlib.Path):
            object.__setattr__(self, "sub_path", pathlib.Path(self.sub_path))
        if (self.date_filter or self.date_range_start) and not self.date_field:
            raise ValueError(
                "date_field is required when using date_filter or date_range_start"
            )
        if self.date_range_days is not None and not self.date_range_start:
            raise ValueError("date_range_start is required when using date_range_days")
        return self

    def matches(self, resource_identifier: str) -> bool:
        """Check if connector matches a filename.

        Args:
            resource_identifier: Filename to match

        Returns:
            bool: True if connector matches
        """
        if self.regex is None:
            return False
        return bool(re.match(self.regex, resource_identifier))

    def get_resource_type(self) -> ResourceType:
        """Get resource type.

        FileConnector always represents a FILE resource type.
        The specific file format (CSV, JSON, JSONL, Parquet, etc.) is
        automatically detected by the loader based on file extensions.
        """
        return ResourceType.FILE

`get_resource_type()` ¶

Get resource type.

FileConnector always represents a FILE resource type. The specific file format (CSV, JSON, JSONL, Parquet, etc.) is automatically detected by the loader based on file extensions.

Source code in graflo/architecture/contract/bindings/connectors.py

def get_resource_type(self) -> ResourceType:
    """Get resource type.

    FileConnector always represents a FILE resource type.
    The specific file format (CSV, JSON, JSONL, Parquet, etc.) is
    automatically detected by the loader based on file extensions.
    """
    return ResourceType.FILE

`matches(resource_identifier)` ¶

Check if connector matches a filename.

Parameters:

Name	Type	Description	Default
`resource_identifier`	`str`	Filename to match	required

Returns:

Name	Type	Description
`bool`	`bool`	True if connector matches

Source code in graflo/architecture/contract/bindings/connectors.py

def matches(self, resource_identifier: str) -> bool:
    """Check if connector matches a filename.

    Args:
        resource_identifier: Filename to match

    Returns:
        bool: True if connector matches
    """
    if self.regex is None:
        return False
    return bool(re.match(self.regex, resource_identifier))

`GraphManifest` ¶

Bases: ConfigBaseModel

Canonical config contract for graph schema, ingestion, and bindings.

Source code in graflo/architecture/contract/manifest.py

class GraphManifest(ConfigBaseModel):
    """Canonical config contract for graph schema, ingestion, and bindings."""

    model_config = ConfigDict(populate_by_name=True)

    graph_schema: Schema | None = PydanticField(
        default=None,
        description="Logical graph schema contract.",
        validation_alias=AliasChoices("schema", "graph_schema"),
        serialization_alias="schema",
    )
    ingestion_model: IngestionModel | None = PydanticField(
        default=None,
        description="Ingestion resources and transforms.",
    )
    bindings: Bindings | None = PydanticField(
        default=None,
        description="Bindings mapping resources to concrete data sources.",
    )

    @classmethod
    def from_config(cls, data: dict[str, Any]) -> "GraphManifest":
        """Build a manifest from a Python mapping payload."""
        return cls.from_dict(data)

    @model_validator(mode="after")
    def _validate_manifest(self) -> "GraphManifest":
        if (
            self.graph_schema is None
            and self.ingestion_model is None
            and self.bindings is None
        ):
            raise ValueError(
                "GraphManifest requires at least one block: "
                "schema, ingestion_model, or bindings."
            )
        return self

    def finish_init(
        self,
        *,
        strict_references: bool = False,
        dynamic_edge_feedback: bool = False,
    ) -> None:
        """Initialize model internals and cross-block runtime links."""
        if self.graph_schema is not None:
            self.graph_schema.finish_init()
        if self.graph_schema is not None and self.ingestion_model is not None:
            self.ingestion_model.finish_init(
                self.graph_schema.core_schema,
                strict_references=strict_references,
                dynamic_edge_feedback=dynamic_edge_feedback,
            )

    def require_schema(self) -> Schema:
        if self.graph_schema is None:
            raise ValueError("GraphManifest is missing required 'schema' block.")
        return self.graph_schema

    def require_ingestion_model(self) -> IngestionModel:
        if self.ingestion_model is None:
            raise ValueError(
                "GraphManifest is missing required 'ingestion_model' block."
            )
        return self.ingestion_model

    def require_bindings(self) -> Bindings:
        if self.bindings is None:
            raise ValueError("GraphManifest is missing required 'bindings' block.")
        return self.bindings

`finish_init(*, strict_references=False, dynamic_edge_feedback=False)` ¶

Initialize model internals and cross-block runtime links.

Source code in graflo/architecture/contract/manifest.py

def finish_init(
    self,
    *,
    strict_references: bool = False,
    dynamic_edge_feedback: bool = False,
) -> None:
    """Initialize model internals and cross-block runtime links."""
    if self.graph_schema is not None:
        self.graph_schema.finish_init()
    if self.graph_schema is not None and self.ingestion_model is not None:
        self.ingestion_model.finish_init(
            self.graph_schema.core_schema,
            strict_references=strict_references,
            dynamic_edge_feedback=dynamic_edge_feedback,
        )

`from_config(data)` `classmethod` ¶

Build a manifest from a Python mapping payload.

Source code in graflo/architecture/contract/manifest.py

@classmethod
def from_config(cls, data: dict[str, Any]) -> "GraphManifest":
    """Build a manifest from a Python mapping payload."""
    return cls.from_dict(data)

`IngestionModel` ¶

Bases: ConfigBaseModel

Ingestion model (C): resources and transform registry.

Source code in graflo/architecture/contract/declarations/ingestion_model/model.py

class IngestionModel(ConfigBaseModel):
    """Ingestion model (C): resources and transform registry."""

    resources: list[Resource] = PydanticField(
        default_factory=list,
        description="List of resource definitions (data pipelines mapping to vertices/edges).",
    )
    transforms: list[ProtoTransform] = PydanticField(
        default_factory=list,
        description="List of named transforms available to resources.",
    )

    _resources: dict[str, Resource] = PrivateAttr()
    _transforms: dict[str, ProtoTransform] = PrivateAttr(default_factory=dict)

    @model_validator(mode="after")
    def _init_model(self) -> IngestionModel:
        """Build transform and resource lookup maps."""
        self._rebuild_runtime_state()
        return self

    def _rebuild_resource_map(self) -> None:
        """Validate resource name uniqueness and refresh lookup map."""
        names = [r.name for r in self.resources]
        c = Counter(names)
        for k, v in c.items():
            if v > 1:
                raise ValueError(f"resource name {k} used {v} times")
        object.__setattr__(self, "_resources", {r.name: r for r in self.resources})

    def _rebuild_transform_map(self) -> None:
        """Validate transform names and refresh name lookup map."""
        missing_names = [idx for idx, t in enumerate(self.transforms) if not t.name]
        if missing_names:
            raise ValueError(
                "All ingestion transforms must define a non-empty name. "
                f"Missing at indexes: {missing_names}"
            )

        transform_names = [t.name for t in self.transforms if t.name is not None]
        name_counts = Counter(transform_names)
        duplicates = sorted([name for name, count in name_counts.items() if count > 1])
        if duplicates:
            raise ValueError(f"Duplicate ingestion transform names found: {duplicates}")

        object.__setattr__(
            self,
            "_transforms",
            {t.name: t for t in self.transforms if t.name is not None},
        )

    def finish_init(
        self,
        core_schema: CoreSchema,
        *,
        strict_references: bool = False,
        dynamic_edge_feedback: bool = False,
    ) -> None:
        """Initialize resources against graph model and transform library."""
        self._rebuild_runtime_state()
        for r in self.resources:
            r.finish_init(
                vertex_config=core_schema.vertex_config,
                edge_config=core_schema.edge_config,
                transforms=self._transforms,
                strict_references=strict_references,
                dynamic_edge_feedback=dynamic_edge_feedback,
            )

    def _rebuild_runtime_state(self) -> None:
        """Rebuild transform and resource lookup maps."""
        self._rebuild_transform_map()
        self._rebuild_resource_map()

    def fetch_resource(self, name: str | None = None) -> Resource:
        """Fetch a resource by name or get the first available resource.

        Args:
            name: Optional name of the resource to fetch

        Returns:
            Resource: The requested resource

        Raises:
            ValueError: If the requested resource is not found or if no resources exist
        """
        _current_resource = None

        if name is not None:
            if name in self._resources:
                _current_resource = self._resources[name]
            else:
                raise ValueError(f"Resource {name} not found")
        else:
            if self._resources:
                _current_resource = self.resources[0]
            else:
                raise ValueError("Empty resource container :(")
        return _current_resource

    def prune_to_graph(
        self, core_schema: CoreSchema, disconnected: set[str] | None = None
    ) -> None:
        """Drop resource actors that reference disconnected vertices."""
        if disconnected is None:
            disconnected = (
                core_schema.vertex_config.vertex_set - core_schema.edge_config.vertices
            )
        if not disconnected:
            return

        def _mentions_disconnected(wrapper) -> bool:
            return bool(wrapper.actor.references_vertices() & disconnected)

        to_drop: list[Resource] = []
        for resource in self.resources:
            root = resource.root
            if _mentions_disconnected(root):
                to_drop.append(resource)
                continue
            root.remove_descendants_if(_mentions_disconnected)
            if not any(a.references_vertices() for a in root.collect_actors()):
                to_drop.append(resource)

        for r in to_drop:
            self.resources.remove(r)
            self._resources.pop(r.name, None)

`fetch_resource(name=None)` ¶

Fetch a resource by name or get the first available resource.

Parameters:

Name	Type	Description	Default
`name`	`str \| None`	Optional name of the resource to fetch	`None`

Returns:

Name	Type	Description
`Resource`	`Resource`	The requested resource

Raises:

Type	Description
`ValueError`	If the requested resource is not found or if no resources exist

Source code in graflo/architecture/contract/declarations/ingestion_model/model.py

def fetch_resource(self, name: str | None = None) -> Resource:
    """Fetch a resource by name or get the first available resource.

    Args:
        name: Optional name of the resource to fetch

    Returns:
        Resource: The requested resource

    Raises:
        ValueError: If the requested resource is not found or if no resources exist
    """
    _current_resource = None

    if name is not None:
        if name in self._resources:
            _current_resource = self._resources[name]
        else:
            raise ValueError(f"Resource {name} not found")
    else:
        if self._resources:
            _current_resource = self.resources[0]
        else:
            raise ValueError("Empty resource container :(")
    return _current_resource

`finish_init(core_schema, *, strict_references=False, dynamic_edge_feedback=False)` ¶

Initialize resources against graph model and transform library.

Source code in graflo/architecture/contract/declarations/ingestion_model/model.py

def finish_init(
    self,
    core_schema: CoreSchema,
    *,
    strict_references: bool = False,
    dynamic_edge_feedback: bool = False,
) -> None:
    """Initialize resources against graph model and transform library."""
    self._rebuild_runtime_state()
    for r in self.resources:
        r.finish_init(
            vertex_config=core_schema.vertex_config,
            edge_config=core_schema.edge_config,
            transforms=self._transforms,
            strict_references=strict_references,
            dynamic_edge_feedback=dynamic_edge_feedback,
        )

`prune_to_graph(core_schema, disconnected=None)` ¶

Drop resource actors that reference disconnected vertices.

Source code in graflo/architecture/contract/declarations/ingestion_model/model.py

def prune_to_graph(
    self, core_schema: CoreSchema, disconnected: set[str] | None = None
) -> None:
    """Drop resource actors that reference disconnected vertices."""
    if disconnected is None:
        disconnected = (
            core_schema.vertex_config.vertex_set - core_schema.edge_config.vertices
        )
    if not disconnected:
        return

    def _mentions_disconnected(wrapper) -> bool:
        return bool(wrapper.actor.references_vertices() & disconnected)

    to_drop: list[Resource] = []
    for resource in self.resources:
        root = resource.root
        if _mentions_disconnected(root):
            to_drop.append(resource)
            continue
        root.remove_descendants_if(_mentions_disconnected)
        if not any(a.references_vertices() for a in root.collect_actors()):
            to_drop.append(resource)

    for r in to_drop:
        self.resources.remove(r)
        self._resources.pop(r.name, None)

`JoinClause` ¶

Bases: ConfigBaseModel

Specification for a SQL JOIN operation.

Used by TableConnector to describe multi-table queries. Each JoinClause adds one JOIN to the generated SQL.

Attributes:

Name	Type	Description
`table`	`str`	Table name to join (e.g. "all_classes").
`schema_name`	`str \| None`	Optional schema override for the joined table.
`alias`	`str \| None`	SQL alias for the joined table (e.g. "s", "t"). Required when the same table is joined more than once.
`on_self`	`str`	Column on the base (left) table used in the ON condition.
`on_other`	`str`	Column on the joined (right) table used in the ON condition.
`join_type`	`str`	Type of join -- LEFT, INNER, etc. Defaults to LEFT.
`select_fields`	`list[str] \| None`	Explicit list of columns to SELECT from this join. When None every column of the joined table is included (aliased with the join alias prefix).

Source code in graflo/architecture/contract/bindings/connectors.py

class JoinClause(ConfigBaseModel):
    """Specification for a SQL JOIN operation.

    Used by TableConnector to describe multi-table queries. Each JoinClause
    adds one JOIN to the generated SQL.

    Attributes:
        table: Table name to join (e.g. "all_classes").
        schema_name: Optional schema override for the joined table.
        alias: SQL alias for the joined table (e.g. "s", "t"). Required when
            the same table is joined more than once.
        on_self: Column on the base (left) table used in the ON condition.
        on_other: Column on the joined (right) table used in the ON condition.
        join_type: Type of join -- LEFT, INNER, etc. Defaults to LEFT.
        select_fields: Explicit list of columns to SELECT from this join.
            When None every column of the joined table is included (aliased
            with the join alias prefix).
    """

    table: str = Field(..., description="Table name to join.")
    schema_name: str | None = Field(
        default=None, description="Schema override for the joined table."
    )
    alias: str | None = Field(
        default=None, description="SQL alias for the joined table."
    )
    on_self: str = Field(
        ..., description="Column on the base table for the ON condition."
    )
    on_other: str = Field(
        ..., description="Column on the joined table for the ON condition."
    )
    join_type: str = Field(default="LEFT", description="JOIN type (LEFT, INNER, etc.).")
    select_fields: list[str] | None = Field(
        default=None,
        description="Columns to SELECT from this join (None = all columns).",
    )

`ProtoTransform` ¶

Bases: ConfigBaseModel

Base class for transform definitions.

This class provides the foundation for data transformations, supporting both functional transformations and declarative mappings.

Attributes:

Name	Type	Description
`name`	`str \| None`	Optional name of the transform
`module`	`str \| None`	Optional module containing the transform function
`params`	`dict[str, Any]`	Dictionary of transform parameters
`foo`	`str \| None`	Optional name of the transform function
`input`	`tuple[str, ...]`	Tuple of input field names
`output`	`tuple[str, ...]`	Tuple of output field names
`dress`	`DressConfig \| None`	Optional pivot dressing for scalar functional results
`target`	`Literal['values', 'keys']`	Whether to transform field values or document keys
`keys`	`KeySelectionConfig`	Key selection when target is keys
`_foo`	`Any`	Internal reference to the transform function

Source code in graflo/architecture/contract/declarations/transform.py

class ProtoTransform(ConfigBaseModel):
    """Base class for transform definitions.

    This class provides the foundation for data transformations, supporting both
    functional transformations and declarative mappings.

    Attributes:
        name: Optional name of the transform
        module: Optional module containing the transform function
        params: Dictionary of transform parameters
        foo: Optional name of the transform function
        input: Tuple of input field names
        output: Tuple of output field names
        dress: Optional pivot dressing for scalar functional results
        target: Whether to transform field values or document keys
        keys: Key selection when target is keys
        _foo: Internal reference to the transform function
    """

    name: str | None = Field(
        default=None,
        description="Optional name for this transform (e.g. for reference in ingestion_model.transforms).",
    )
    module: str | None = Field(
        default=None,
        description="Python module path containing the transform function (e.g. my_package.transforms).",
    )
    params: dict[str, Any] = Field(
        default_factory=dict,
        description="Extra parameters passed to the transform function at runtime.",
    )
    foo: str | None = Field(
        default=None,
        description="Name of the callable in module to use as the transform function.",
    )
    input: tuple[str, ...] = Field(
        default_factory=tuple,
        description="Input field names passed to the transform function.",
    )
    output: tuple[str, ...] = Field(
        default_factory=tuple,
        description="Output field names produced by the transform (defaults to input if unset).",
    )
    input_groups: tuple[tuple[str, ...], ...] = Field(
        default_factory=tuple,
        description=(
            "Explicit groups of input fields for repeated tuple-style value calls."
        ),
    )
    output_groups: tuple[tuple[str, ...], ...] = Field(
        default_factory=tuple,
        description=(
            "Explicit output field groups aligned with input_groups for grouped value calls."
        ),
    )
    dress: DressConfig | None = Field(
        default=None,
        description=(
            "Dressing spec for pivoted output. Applies to ingestion_model.transforms "
            "entries and to inline transform steps. "
            "dress.key receives the input field name, dress.value receives the "
            "function result. E.g. dress={key: name, value: value} with "
            "input=(Open,) produces {name: 'Open', value: <result>}."
        ),
    )
    target: Literal["values", "keys"] = Field(
        default="values",
        description=(
            "Transform target. values=apply function to input values; "
            "keys=apply function to selected document keys."
        ),
    )
    keys: KeySelectionConfig = Field(
        default_factory=KeySelectionConfig,
        description="Key selection for key-target transforms.",
    )

    _foo: Any = PrivateAttr(default=None)

    @model_validator(mode="before")
    @classmethod
    def _normalize_input_output(cls, data: Any) -> Any:
        if not isinstance(data, dict):
            return data
        data = dict(data)
        if "dress" in data and isinstance(data["dress"], (list, tuple)):
            raise ValueError(
                "List-style `dress` is no longer supported. "
                "Use a dict: dress={key: ..., value: ...}."
            )
        for key in ("input", "output"):
            if key in data:
                if data[key] is not None:
                    data[key] = _tuple_it(data[key])
                else:
                    data[key] = ()
        for key in ("input_groups", "output_groups"):
            if key in data:
                if data[key] is None:
                    data[key] = ()
                else:
                    data[key] = _tuple_groups_it(data[key])
        _normalize_keys_in_dict(data)
        return data

    @model_validator(mode="after")
    def _init_foo_and_output(self) -> Self:
        if self.module is not None and self.foo is not None:
            try:
                _module = importlib.import_module(self.module)
            except Exception as e:
                raise TypeError(f"Provided module {self.module} is not valid: {e}")
            try:
                object.__setattr__(self, "_foo", getattr(_module, self.foo))
            except Exception as e:
                raise ValueError(
                    f"Could not instantiate transform function. Exception: {e}"
                )
        if self.dress is not None:
            if self.target == "keys":
                raise ValueError("target='keys' is not compatible with dress.")
            object.__setattr__(self, "output", (self.dress.key, self.dress.value))
        elif not self.output and self.input:
            object.__setattr__(self, "output", self.input)
        return self

    @classmethod
    def get_fields_members(cls) -> list[str]:
        """Get list of field members (public model fields)."""
        return list(cls.model_fields.keys())

    def apply(self, *args: Any, **kwargs: Any) -> Any:
        """Apply the raw transform function to the given arguments.

        This is the core function invocation without any input extraction or
        output dressing — purely ``self._foo(*args, **kwargs, **self.params)``.

        Raises:
            TransformException: If no transform function has been set.
        """
        if self._foo is None:
            raise TransformException("No transform function set")
        return self._foo(*args, **kwargs, **self.params)

    def __lt__(self, other: object) -> bool:
        """Compare transforms for ordering.

        Args:
            other: Other transform to compare with

        Returns:
            bool: True if this transform should be ordered before other
        """
        if not isinstance(other, ProtoTransform):
            return NotImplemented
        if self._foo is None and other._foo is not None:
            return True
        return False

`lt(other)` ¶

Compare transforms for ordering.

Parameters:

Name	Type	Description	Default
`other`	`object`	Other transform to compare with	required

Returns:

Name	Type	Description
`bool`	`bool`	True if this transform should be ordered before other

Source code in graflo/architecture/contract/declarations/transform.py

def __lt__(self, other: object) -> bool:
    """Compare transforms for ordering.

    Args:
        other: Other transform to compare with

    Returns:
        bool: True if this transform should be ordered before other
    """
    if not isinstance(other, ProtoTransform):
        return NotImplemented
    if self._foo is None and other._foo is not None:
        return True
    return False

`apply(*args, **kwargs)` ¶

Apply the raw transform function to the given arguments.

This is the core function invocation without any input extraction or output dressing — purely self._foo(*args, **kwargs, **self.params).

Raises:

Type	Description
`TransformException`	If no transform function has been set.

Source code in graflo/architecture/contract/declarations/transform.py

def apply(self, *args: Any, **kwargs: Any) -> Any:
    """Apply the raw transform function to the given arguments.

    This is the core function invocation without any input extraction or
    output dressing — purely ``self._foo(*args, **kwargs, **self.params)``.

    Raises:
        TransformException: If no transform function has been set.
    """
    if self._foo is None:
        raise TransformException("No transform function set")
    return self._foo(*args, **kwargs, **self.params)

`get_fields_members()` `classmethod` ¶

Get list of field members (public model fields).

Source code in graflo/architecture/contract/declarations/transform.py

@classmethod
def get_fields_members(cls) -> list[str]:
    """Get list of field members (public model fields)."""
    return list(cls.model_fields.keys())

`Resource` ¶

Bases: ConfigBaseModel

Resource configuration and processing.

Represents a data resource that can be processed and transformed into graph structures. Manages the processing pipeline through actors and handles data encoding, transformation, and mapping. Suitable for LLM-generated schema constituents.

Dynamic vertex-type routing is handled by vertex_router steps in the pipeline (see :class:~graflo.architecture.pipeline.runtime.actor.VertexRouterActor).

Source code in graflo/architecture/contract/declarations/resource.py

class Resource(ConfigBaseModel):
    """Resource configuration and processing.

    Represents a data resource that can be processed and transformed into graph
    structures. Manages the processing pipeline through actors and handles data
    encoding, transformation, and mapping. Suitable for LLM-generated schema
    constituents.

    Dynamic vertex-type routing is handled by ``vertex_router`` steps in the
    pipeline (see :class:`~graflo.architecture.pipeline.runtime.actor.VertexRouterActor`).
    """

    model_config = {"extra": "forbid"}

    name: str = PydanticField(
        ...,
        description="Name of the resource (e.g. table or file identifier).",
    )
    pipeline: list[dict[str, Any]] = PydanticField(
        ...,
        description="Pipeline of actor steps to apply in sequence (vertex, edge, transform, descend). "
        'Each step is a dict, e.g. {"vertex": "user"} or {"edge": {"from": "a", "to": "b"}}.',
        validation_alias=AliasChoices("pipeline", "apply"),
    )
    encoding: EncodingType = PydanticField(
        default=EncodingType.UTF_8,
        description="Character encoding for input/output (e.g. utf-8, ISO-8859-1).",
    )
    merge_collections: list[str] = PydanticField(
        default_factory=list,
        description="List of collection names to merge when writing to the graph.",
    )
    extra_weights: list[Edge] = PydanticField(
        default_factory=list,
        description="Additional edge weight configurations for this resource.",
    )
    types: dict[str, str] = PydanticField(
        default_factory=dict,
        description='Field name to Python type expression for casting (e.g. {"amount": "float"}).',
    )
    infer_edges: bool = PydanticField(
        default=True,
        description=(
            "If True, infer edges from current vertex population. "
            "If False, emit only edges explicitly declared as edge actors in the pipeline."
        ),
    )
    infer_edge_only: list[EdgeInferSpec] = PydanticField(
        default_factory=list,
        description=(
            "Optional allow-list for inferred edges. Applies only to inferred (greedy) edges, "
            "not explicit edge actors."
        ),
    )
    infer_edge_except: list[EdgeInferSpec] = PydanticField(
        default_factory=list,
        description=(
            "Optional deny-list for inferred edges. Applies only to inferred (greedy) edges, "
            "not explicit edge actors."
        ),
    )

    _root: ActorWrapper = PrivateAttr()
    _types: dict[str, Callable[..., Any]] = PrivateAttr(default_factory=dict)
    _vertex_config: VertexConfig = PrivateAttr()
    _edge_config: EdgeConfig = PrivateAttr()
    _executor: ActorExecutor = PrivateAttr()
    _initialized: bool = PrivateAttr(default=False)

    @model_validator(mode="after")
    def _build_root_and_types(self) -> Resource:
        """Build root ActorWrapper and resolve safe cast functions."""
        from graflo.architecture.pipeline.runtime.actor import ActorWrapper
        from graflo.architecture.pipeline.runtime.executor import ActorExecutor

        object.__setattr__(self, "_root", ActorWrapper(*self.pipeline))
        object.__setattr__(self, "_executor", ActorExecutor(self._root))
        object.__setattr__(self, "_types", {})
        for k, v in self.types.items():
            caster = _resolve_type_caster(v)
            if caster is not None:
                self._types[k] = caster
            else:
                logger.error(
                    "For resource %s for field %s failed to resolve cast type %s",
                    self.name,
                    k,
                    v,
                )
        # Placeholders until schema binds real configs.
        object.__setattr__(self, "_vertex_config", VertexConfig(vertices=[]))
        object.__setattr__(self, "_edge_config", EdgeConfig())
        object.__setattr__(self, "_initialized", False)
        self._validate_infer_edge_spec_policy()
        return self

    def _validate_infer_edge_spec_policy(self) -> None:
        if self.infer_edge_only and self.infer_edge_except:
            raise ValueError(
                "Resource infer_edge_only and infer_edge_except are mutually exclusive."
            )

    def _validate_infer_edge_spec_targets(self, edge_config: EdgeConfig) -> None:
        known_edge_ids = {edge_id for edge_id, _ in edge_config.items()}

        def _validate_list(field_name: str, specs: list[EdgeInferSpec]) -> None:
            unknown: list[EdgeId] = []
            for spec in specs:
                if not any(spec.matches(edge_id) for edge_id in known_edge_ids):
                    unknown.append(spec.edge_id)
            if unknown:
                raise ValueError(
                    f"Resource {field_name} contains unknown edge selectors: {unknown}"
                )

        _validate_list("infer_edge_only", self.infer_edge_only)
        _validate_list("infer_edge_except", self.infer_edge_except)

    @property
    def vertex_config(self) -> VertexConfig:
        """Vertex configuration (set by Schema.finish_init)."""
        return self._vertex_config

    @property
    def edge_config(self) -> EdgeConfig:
        """Edge configuration (set by Schema.finish_init)."""
        return self._edge_config

    @property
    def root(self) -> ActorWrapper:
        """Root actor wrapper for the processing pipeline."""
        return self._root

    def finish_init(
        self,
        vertex_config: VertexConfig,
        edge_config: EdgeConfig,
        transforms: dict[str, ProtoTransform],
        *,
        strict_references: bool = False,
        dynamic_edge_feedback: bool = False,
    ) -> None:
        """Complete resource initialization.

        Initializes the resource with vertex and edge configurations,
        and sets up the processing pipeline. Called by Schema after load.

        Args:
            vertex_config: Configuration for vertices
            edge_config: Configuration for edges
            transforms: Dictionary of available transforms
        """
        self._rebuild_runtime(
            vertex_config=vertex_config,
            edge_config=edge_config,
            transforms=transforms,
            strict_references=strict_references,
            dynamic_edge_feedback=dynamic_edge_feedback,
        )

    def _edge_ids_from_edge_actors(self) -> set[EdgeId]:
        """Collect (source, target, None) for every EdgeActor in this resource's pipeline.

        Used to auto-add to infer_edge_except so inferred edges do not duplicate
        edges produced by explicit edge actors.
        """
        from graflo.architecture.pipeline.runtime.actor import EdgeActor

        edge_actors = [
            a for a in self.root.collect_actors() if isinstance(a, EdgeActor)
        ]
        return {(ea.edge.source, ea.edge.target, None) for ea in edge_actors}

    def _validate_dynamic_edge_vertices_exist(
        self, vertex_config: VertexConfig
    ) -> None:
        """Ensure all vertices implied by dynamic edge controls are declared."""
        known_vertices = set(vertex_config.vertex_set)
        referenced_vertices: set[str] = set()

        for spec in self.infer_edge_only:
            referenced_vertices.add(spec.source)
            referenced_vertices.add(spec.target)

        for spec in self.infer_edge_except:
            referenced_vertices.add(spec.source)
            referenced_vertices.add(spec.target)

        for source, target, _ in self._edge_ids_from_edge_actors():
            referenced_vertices.add(source)
            referenced_vertices.add(target)

        missing_vertices = sorted(referenced_vertices - known_vertices)
        if missing_vertices:
            raise ValueError(
                "Resource dynamic edge references undefined vertices: "
                f"{missing_vertices}. "
                "Declare these vertices in vertex_config before using dynamic/inferred edges."
            )

    def _rebuild_runtime(
        self,
        *,
        vertex_config: VertexConfig,
        edge_config: EdgeConfig,
        transforms: dict[str, ProtoTransform],
        strict_references: bool = False,
        dynamic_edge_feedback: bool = False,
    ) -> None:
        """Rebuild runtime actor initialization state from typed context."""
        object.__setattr__(self, "_vertex_config", vertex_config)
        # Runtime actors may register dynamic edges; keep per-resource edge state.
        local_edge_config = EdgeConfig.model_validate(
            edge_config.to_dict(skip_defaults=False)
        )
        object.__setattr__(self, "_edge_config", local_edge_config)
        self._validate_dynamic_edge_vertices_exist(vertex_config)
        self._validate_infer_edge_spec_targets(self._edge_config)

        baseline_edge_ids = {edge_id for edge_id, _ in edge_config.items()}
        infer_edge_except = {spec.edge_id for spec in self.infer_edge_except}
        # When not using infer_edge_only, auto-add (s,t,None) to infer_edge_except
        # for any edge type handled by explicit EdgeActors in this resource.
        if not self.infer_edge_only:
            infer_edge_except |= self._edge_ids_from_edge_actors()

        from graflo.architecture.pipeline.runtime.actor import ActorInitContext

        logger.debug("total resource actor count : %s", self.root.count())
        init_ctx = ActorInitContext(
            vertex_config=vertex_config,
            edge_config=self._edge_config,
            transforms=transforms,
            infer_edges=self.infer_edges,
            infer_edge_only={spec.edge_id for spec in self.infer_edge_only},
            infer_edge_except=infer_edge_except,
            strict_references=strict_references,
        )
        self.root.finish_init(init_ctx=init_ctx)
        object.__setattr__(self, "_initialized", True)

        if dynamic_edge_feedback:
            # Edge actors register static edge definitions into the resource-local edge
            # config during finish_init(). Optionally propagate newly discovered edges
            # to the shared schema-level edge_config so schema definition and DB
            # writers can see them.
            for edge_id, edge in self._edge_config.items():
                if edge_id in baseline_edge_ids:
                    continue
                edge_config.update_edges(
                    edge.model_copy(deep=True), vertex_config=vertex_config
                )

        logger.debug("total resource actor count (after finit): %s", self.root.count())

        for e in self.extra_weights:
            e.finish_init(vertex_config)

    def __call__(self, doc: dict) -> defaultdict[GraphEntity, list]:
        """Process a document through the resource pipeline.

        Args:
            doc: Document to process

        Returns:
            defaultdict[GraphEntity, list]: Processed graph entities
        """
        if not self._initialized:
            raise RuntimeError(
                f"Resource '{self.name}' must be initialized via finish_init() before use."
            )
        extraction_ctx = self._executor.extract(doc)
        result = self._executor.assemble_result(extraction_ctx)
        return result.entities

    def count(self) -> int:
        """Total number of actors in the resource pipeline."""
        return self.root.count()

`edge_config` `property` ¶

Edge configuration (set by Schema.finish_init).

`root` `property` ¶

Root actor wrapper for the processing pipeline.

`vertex_config` `property` ¶

Vertex configuration (set by Schema.finish_init).

`call(doc)` ¶

Process a document through the resource pipeline.

Parameters:

Name	Type	Description	Default
`doc`	`dict`	Document to process	required

Returns:

Type	Description
`defaultdict[GraphEntity, list]`	defaultdict[GraphEntity, list]: Processed graph entities

Source code in graflo/architecture/contract/declarations/resource.py

def __call__(self, doc: dict) -> defaultdict[GraphEntity, list]:
    """Process a document through the resource pipeline.

    Args:
        doc: Document to process

    Returns:
        defaultdict[GraphEntity, list]: Processed graph entities
    """
    if not self._initialized:
        raise RuntimeError(
            f"Resource '{self.name}' must be initialized via finish_init() before use."
        )
    extraction_ctx = self._executor.extract(doc)
    result = self._executor.assemble_result(extraction_ctx)
    return result.entities

`count()` ¶

Total number of actors in the resource pipeline.

Source code in graflo/architecture/contract/declarations/resource.py

def count(self) -> int:
    """Total number of actors in the resource pipeline."""
    return self.root.count()

`finish_init(vertex_config, edge_config, transforms, *, strict_references=False, dynamic_edge_feedback=False)` ¶

Complete resource initialization.

Initializes the resource with vertex and edge configurations, and sets up the processing pipeline. Called by Schema after load.

Parameters:

Name	Type	Description	Default
`vertex_config`	`VertexConfig`	Configuration for vertices	required
`edge_config`	`EdgeConfig`	Configuration for edges	required
`transforms`	`dict[str, ProtoTransform]`	Dictionary of available transforms	required

Source code in graflo/architecture/contract/declarations/resource.py

def finish_init(
    self,
    vertex_config: VertexConfig,
    edge_config: EdgeConfig,
    transforms: dict[str, ProtoTransform],
    *,
    strict_references: bool = False,
    dynamic_edge_feedback: bool = False,
) -> None:
    """Complete resource initialization.

    Initializes the resource with vertex and edge configurations,
    and sets up the processing pipeline. Called by Schema after load.

    Args:
        vertex_config: Configuration for vertices
        edge_config: Configuration for edges
        transforms: Dictionary of available transforms
    """
    self._rebuild_runtime(
        vertex_config=vertex_config,
        edge_config=edge_config,
        transforms=transforms,
        strict_references=strict_references,
        dynamic_edge_feedback=dynamic_edge_feedback,
    )

`ResourceConnector` ¶

Bases: ConfigBaseModel, ABC

Abstract base class for resource connectors (files or tables).

Provides common API for connector matching and resource identification. All concrete connector types inherit from this class.

Connectors only describe source-side matching/query behavior. Resource-to- connector linkage is handled by Bindings.

Source code in graflo/architecture/contract/bindings/connectors.py

class ResourceConnector(ConfigBaseModel, abc.ABC):
    """Abstract base class for resource connectors (files or tables).

    Provides common API for connector matching and resource identification.
    All concrete connector types inherit from this class.

    Connectors only describe source-side matching/query behavior. Resource-to-
    connector linkage is handled by ``Bindings``.
    """

    name: str | None = Field(
        default=None,
        description="Optional connector name used by top-level resource_connector mapping.",
    )
    resource_name: str | None = Field(
        default=None,
        description="Optional direct resource binding declared on the connector itself.",
    )
    hash: str = Field(
        default="",
        exclude=True,
        description="Deterministic internal connector id derived from defining fields.",
    )

    def _hash_payload(self) -> dict[str, Any]:
        payload = self.model_dump(
            mode="json",
            by_alias=True,
            exclude={"hash", "name", "resource_name"},
        )
        payload["_connector_type"] = type(self).__name__
        return payload

    @model_validator(mode="after")
    def _compute_hash(self) -> Self:
        canonical = json.dumps(
            self._hash_payload(), sort_keys=True, separators=(",", ":")
        )
        object.__setattr__(
            self,
            "hash",
            hashlib.sha256(canonical.encode("utf-8")).hexdigest(),
        )
        return self

    @abc.abstractmethod
    def matches(self, resource_identifier: str) -> bool:
        """Check if connector matches a resource identifier.

        Args:
            resource_identifier: Identifier to match (filename or table name)

        Returns:
            bool: True if connector matches
        """
        pass

    @abc.abstractmethod
    def get_resource_type(self) -> ResourceType:
        """Get the type of resource this connector matches.

        Returns:
            ResourceType: Resource type enum value
        """
        pass

`get_resource_type()` `abstractmethod` ¶

Get the type of resource this connector matches.

Returns:

Name	Type	Description
`ResourceType`	`ResourceType`	Resource type enum value

Source code in graflo/architecture/contract/bindings/connectors.py

@abc.abstractmethod
def get_resource_type(self) -> ResourceType:
    """Get the type of resource this connector matches.

    Returns:
        ResourceType: Resource type enum value
    """
    pass

`matches(resource_identifier)` `abstractmethod` ¶

Check if connector matches a resource identifier.

Parameters:

Name	Type	Description	Default
`resource_identifier`	`str`	Identifier to match (filename or table name)	required

Returns:

Name	Type	Description
`bool`	`bool`	True if connector matches

Source code in graflo/architecture/contract/bindings/connectors.py

@abc.abstractmethod
def matches(self, resource_identifier: str) -> bool:
    """Check if connector matches a resource identifier.

    Args:
        resource_identifier: Identifier to match (filename or table name)

    Returns:
        bool: True if connector matches
    """
    pass

`ResourceType` ¶

Bases: BaseEnum

Resource types for data sources.

Resource types distinguish between different data source categories. File type detection (CSV, JSON, JSONL, Parquet, etc.) is handled automatically by the loader based on file extensions.

Attributes:

Name	Type	Description
`FILE`		File-based data source (any format: CSV, JSON, JSONL, Parquet, etc.)
`SQL_TABLE`		SQL database table (e.g., PostgreSQL table)
`SPARQL`		SPARQL / RDF data source (endpoint or .ttl/.rdf files via rdflib)

Source code in graflo/architecture/contract/bindings/connectors.py

class ResourceType(BaseEnum):
    """Resource types for data sources.

    Resource types distinguish between different data source categories.
    File type detection (CSV, JSON, JSONL, Parquet, etc.) is handled
    automatically by the loader based on file extensions.

    Attributes:
        FILE: File-based data source (any format: CSV, JSON, JSONL, Parquet, etc.)
        SQL_TABLE: SQL database table (e.g., PostgreSQL table)
        SPARQL: SPARQL / RDF data source (endpoint or .ttl/.rdf files via rdflib)
    """

    FILE = "file"
    SQL_TABLE = "sql_table"
    SPARQL = "sparql"

`SparqlConnector` ¶

Bases: ResourceConnector

Connector for matching SPARQL / RDF data sources.

Each SparqlConnector targets instances of a single rdf:Class. It can be backed either by a remote SPARQL endpoint (Fuseki, Blazegraph, ...) or by a local RDF file parsed with rdflib.

Attributes:

Name	Type	Description
`rdf_class`	`str`	Full URI of the `rdf:Class` whose instances this connector fetches (e.g. `"http://example.org/Person"`).
`endpoint_url`	`str \| None`	SPARQL query endpoint URL. When set, instances are fetched via HTTP. When `None` the connector is for local file mode.
`graph_uri`	`str \| None`	Named-graph URI to restrict the query to (optional).
`sparql_query`	`str \| None`	Custom SPARQL `SELECT` query override. When provided the auto-generated per-class query is skipped.
`rdf_file`	`Path \| None`	Path to a local RDF file (`.ttl`, `.rdf`, `.n3`, `.jsonld`). Mutually exclusive with endpoint_url.

Source code in graflo/architecture/contract/bindings/connectors.py

class SparqlConnector(ResourceConnector):
    """Connector for matching SPARQL / RDF data sources.

    Each ``SparqlConnector`` targets instances of a single ``rdf:Class``.
    It can be backed either by a remote SPARQL endpoint (Fuseki, Blazegraph, ...)
    or by a local RDF file parsed with *rdflib*.

    Attributes:
        rdf_class: Full URI of the ``rdf:Class`` whose instances this connector
            fetches (e.g. ``"http://example.org/Person"``).
        endpoint_url: SPARQL query endpoint URL.  When set, instances are
            fetched via HTTP.  When ``None`` the connector is for local file mode.
        graph_uri: Named-graph URI to restrict the query to (optional).
        sparql_query: Custom SPARQL ``SELECT`` query override.  When provided
            the auto-generated per-class query is skipped.
        rdf_file: Path to a local RDF file (``.ttl``, ``.rdf``, ``.n3``,
            ``.jsonld``).  Mutually exclusive with *endpoint_url*.
    """

    rdf_class: str = Field(
        ..., description="URI of the rdf:Class to fetch instances of"
    )
    endpoint_url: str | None = Field(
        default=None, description="SPARQL query endpoint URL"
    )
    graph_uri: str | None = Field(
        default=None, description="Named graph URI (optional)"
    )
    sparql_query: str | None = Field(
        default=None, description="Custom SPARQL query override"
    )
    rdf_file: pathlib.Path | None = Field(
        default=None, description="Path to a local RDF file"
    )

    def matches(self, resource_identifier: str) -> bool:
        """Match by the local name (fragment) of the rdf:Class URI.

        Args:
            resource_identifier: Identifier to match against

        Returns:
            True when *resource_identifier* equals the class local name
        """
        local_name = self.rdf_class.rsplit("#", 1)[-1].rsplit("/", 1)[-1]
        return resource_identifier == local_name

    def get_resource_type(self) -> ResourceType:
        """Return ``ResourceType.SPARQL``."""
        return ResourceType.SPARQL

    def build_select_query(self) -> str:
        """Build a SPARQL SELECT query for instances of ``rdf_class``.

        If *sparql_query* is set it is returned as-is.  Otherwise a simple
        per-class query is generated::

            SELECT ?s ?p ?o WHERE {
              ?s a <rdf_class> .
              ?s ?p ?o .
            }

        Returns:
            SPARQL query string
        """
        if self.sparql_query:
            return self.sparql_query

        graph_open = f"GRAPH <{self.graph_uri}> {{" if self.graph_uri else ""
        graph_close = "}" if self.graph_uri else ""

        return (
            "SELECT ?s ?p ?o WHERE { "
            f"{graph_open} "
            f"?s a <{self.rdf_class}> . "
            f"?s ?p ?o . "
            f"{graph_close} "
            "}"
        )

`build_select_query()` ¶

Build a SPARQL SELECT query for instances of rdf_class.

If sparql_query is set it is returned as-is. Otherwise a simple per-class query is generated::

SELECT ?s ?p ?o WHERE {
  ?s a <rdf_class> .
  ?s ?p ?o .
}

Returns:

Type	Description
`str`	SPARQL query string

Source code in graflo/architecture/contract/bindings/connectors.py

def build_select_query(self) -> str:
    """Build a SPARQL SELECT query for instances of ``rdf_class``.

    If *sparql_query* is set it is returned as-is.  Otherwise a simple
    per-class query is generated::

        SELECT ?s ?p ?o WHERE {
          ?s a <rdf_class> .
          ?s ?p ?o .
        }

    Returns:
        SPARQL query string
    """
    if self.sparql_query:
        return self.sparql_query

    graph_open = f"GRAPH <{self.graph_uri}> {{" if self.graph_uri else ""
    graph_close = "}" if self.graph_uri else ""

    return (
        "SELECT ?s ?p ?o WHERE { "
        f"{graph_open} "
        f"?s a <{self.rdf_class}> . "
        f"?s ?p ?o . "
        f"{graph_close} "
        "}"
    )

`get_resource_type()` ¶

Return ResourceType.SPARQL.

Source code in graflo/architecture/contract/bindings/connectors.py

def get_resource_type(self) -> ResourceType:
    """Return ``ResourceType.SPARQL``."""
    return ResourceType.SPARQL

`matches(resource_identifier)` ¶

Match by the local name (fragment) of the rdf:Class URI.

Parameters:

Name	Type	Description	Default
`resource_identifier`	`str`	Identifier to match against	required

Returns:

Type	Description
`bool`	True when resource_identifier equals the class local name

Source code in graflo/architecture/contract/bindings/connectors.py

def matches(self, resource_identifier: str) -> bool:
    """Match by the local name (fragment) of the rdf:Class URI.

    Args:
        resource_identifier: Identifier to match against

    Returns:
        True when *resource_identifier* equals the class local name
    """
    local_name = self.rdf_class.rsplit("#", 1)[-1].rsplit("/", 1)[-1]
    return resource_identifier == local_name

`TableConnector` ¶

Bases: ResourceConnector

Connector for matching database tables.

Supports simple single-table queries as well as multi-table JOINs and pushdown filters via FilterExpression.

Attributes:

Name	Type	Description
`table_name`	`str`	Exact table name or regex pattern
`schema_name`	`str \| None`	Schema name (optional, defaults to public)
`database`	`str \| None`	Database name (optional)
`date_field`	`str \| None`	Name of the date field to filter on (for date-based filtering)
`date_filter`	`str \| None`	SQL-style date filter condition (e.g., "> '2020-10-10'")
`date_range_start`	`str \| None`	Start date for range filtering (e.g., "2015-11-11")
`date_range_days`	`int \| None`	Number of days after start date (used with date_range_start)
`filters`	`list[Any]`	General-purpose pushdown filters rendered as SQL WHERE fragments.
`joins`	`list[JoinClause]`	Multi-table JOIN specifications (auto-generated or explicit).
`select_columns`	`list[str] \| None`	Explicit SELECT column list. None means `*` for the base table (plus aliased columns from joins).

Source code in graflo/architecture/contract/bindings/connectors.py

class TableConnector(ResourceConnector):
    """Connector for matching database tables.

    Supports simple single-table queries as well as multi-table JOINs and
    pushdown filters via ``FilterExpression``.

    Attributes:
        table_name: Exact table name or regex pattern
        schema_name: Schema name (optional, defaults to public)
        database: Database name (optional)
        date_field: Name of the date field to filter on (for date-based filtering)
        date_filter: SQL-style date filter condition (e.g., "> '2020-10-10'")
        date_range_start: Start date for range filtering (e.g., "2015-11-11")
        date_range_days: Number of days after start date (used with date_range_start)
        filters: General-purpose pushdown filters rendered as SQL WHERE fragments.
        joins: Multi-table JOIN specifications (auto-generated or explicit).
        select_columns: Explicit SELECT column list. None means ``*`` for the
            base table (plus aliased columns from joins).
    """

    table_name: str = Field(
        default="", validation_alias=AliasChoices("table_name", "table")
    )
    schema_name: str | None = Field(
        default=None, validation_alias=AliasChoices("schema_name", "schema")
    )
    database: str | None = None
    date_field: str | None = None
    date_filter: str | None = None
    date_range_start: str | None = None
    date_range_days: int | None = None
    filters: list[Any] = Field(
        default_factory=list,
        description="Pushdown FilterExpression filters (rendered to SQL WHERE).",
    )
    joins: list[JoinClause] = Field(
        default_factory=list,
        description="JOIN clauses for multi-table queries.",
    )
    select_columns: list[str] | None = Field(
        default=None,
        description="Explicit SELECT columns. None = SELECT * (plus join aliases).",
    )
    view: Any = Field(
        default=None,
        description="SelectSpec or dict for declarative view (alternative to table+joins+filters).",
    )

    @field_validator("view", mode="before")
    @classmethod
    def _coerce_view(cls, v: Any) -> Any:
        if v is None:
            return None
        if isinstance(v, dict):
            from graflo.filter.select import SelectSpec

            return SelectSpec.from_dict(v)
        return v

    @model_validator(mode="after")
    def _validate_table_connector(self) -> Self:
        """Validate table_name and date filtering parameters."""
        if not self.table_name:
            raise ValueError("table_name is required for TableConnector")
        if (self.date_filter or self.date_range_start) and not self.date_field:
            raise ValueError(
                "date_field is required when using date_filter or date_range_start"
            )
        if self.date_range_days is not None and not self.date_range_start:
            raise ValueError("date_range_start is required when using date_range_days")
        return self

    def matches(self, resource_identifier: str) -> bool:
        """Check if connector matches a table name.

        Args:
            resource_identifier: Table name to match (format: schema.table or just table)

        Returns:
            bool: True if connector matches
        """
        if not self.table_name:
            return False

        # Compile regex expression
        if self.table_name.startswith("^") or self.table_name.endswith("$"):
            # Already a regex expression
            compiled_regex = re.compile(self.table_name)
        else:
            # Exact match expression
            compiled_regex = re.compile(f"^{re.escape(self.table_name)}$")

        # Check if resource_identifier matches
        if compiled_regex.match(resource_identifier):
            return True

        # If schema_name is specified, also check schema.table format
        if self.schema_name:
            full_name = f"{self.schema_name}.{resource_identifier}"
            if compiled_regex.match(full_name):
                return True

        return False

    def get_resource_type(self) -> ResourceType:
        """Get resource type."""
        return ResourceType.SQL_TABLE

    def build_where_clause(self) -> str:
        """Build SQL WHERE clause from date filtering parameters **and** general filters.

        Returns:
            WHERE clause string (without the WHERE keyword) or empty string if no filters
        """
        from graflo.filter.onto import FilterExpression
        from graflo.onto import ExpressionFlavor

        conditions: list[str] = []

        # Date-specific conditions (legacy fields)
        if self.date_field:
            if self.date_range_start and self.date_range_days is not None:
                conditions.append(
                    f"\"{self.date_field}\" >= '{self.date_range_start}'::date"
                )
                conditions.append(
                    f"\"{self.date_field}\" < '{self.date_range_start}'::date + INTERVAL '{self.date_range_days} days'"
                )
            elif self.date_filter:
                filter_parts = self.date_filter.strip().split(None, 1)
                if len(filter_parts) == 2:
                    operator, value = filter_parts
                    if not (value.startswith("'") and value.endswith("'")):
                        if len(value) == 10 and value.count("-") == 2:
                            value = f"'{value}'"
                    conditions.append(f'"{self.date_field}" {operator} {value}')
                else:
                    conditions.append(f'"{self.date_field}" {self.date_filter}')

        # General-purpose FilterExpression filters
        for filt in self.filters:
            if isinstance(filt, FilterExpression):
                rendered = filt(kind=ExpressionFlavor.SQL)
                if rendered:
                    conditions.append(str(rendered))

        if conditions:
            return " AND ".join(conditions)
        return ""

    def build_query(self, effective_schema: str | None = None) -> str:
        """Build a complete SQL SELECT query.

        When ``view`` is set, delegates to ``view.build_sql()``. Otherwise
        incorporates the base table, any JoinClauses, explicit select_columns,
        date filters, and FilterExpression filters.

        Args:
            effective_schema: Schema to use if ``self.schema_name`` is None.

        Returns:
            Complete SQL query string.
        """
        schema = self.schema_name or effective_schema or "public"
        if self.view is not None:
            from graflo.filter.select import SelectSpec

            if isinstance(self.view, SelectSpec):
                return self.view.build_sql(schema=schema, base_table=self.table_name)
        base_alias = "r" if self.joins else None
        base_ref = f'"{schema}"."{self.table_name}"'
        if base_alias:
            base_ref_aliased = f"{base_ref} {base_alias}"
        else:
            base_ref_aliased = base_ref

        # --- SELECT ---
        select_parts: list[str] = []
        if self.select_columns is not None:
            select_parts = list(self.select_columns)
        elif self.joins:
            select_parts.append(f"{base_alias}.*")
            for jc in self.joins:
                alias = jc.alias or jc.table
                jc_schema = jc.schema_name or schema
                if jc.select_fields is not None:
                    for col in jc.select_fields:
                        select_parts.append(f'{alias}."{col}" AS "{alias}__{col}"')
                else:
                    select_parts.append(f"{alias}.*")
        else:
            select_parts.append("*")

        select_clause = ", ".join(select_parts)

        # --- FROM + JOINs ---
        from_clause = base_ref_aliased
        for jc in self.joins:
            jc_schema = jc.schema_name or schema
            alias = jc.alias or jc.table
            join_ref = f'"{jc_schema}"."{jc.table}"'
            left_col = (
                f'{base_alias}."{jc.on_self}"' if base_alias else f'"{jc.on_self}"'
            )
            right_col = f'{alias}."{jc.on_other}"'
            from_clause += (
                f" {jc.join_type} JOIN {join_ref} {alias} ON {left_col} = {right_col}"
            )

        query = f"SELECT {select_clause} FROM {from_clause}"

        # --- WHERE ---
        where = self.build_where_clause()
        if where:
            query += f" WHERE {where}"

        return query

`build_query(effective_schema=None)` ¶

Build a complete SQL SELECT query.

When view is set, delegates to view.build_sql(). Otherwise incorporates the base table, any JoinClauses, explicit select_columns, date filters, and FilterExpression filters.

Parameters:

Name	Type	Description	Default
`effective_schema`	`str \| None`	Schema to use if `self.schema_name` is None.	`None`

Returns:

Type	Description
`str`	Complete SQL query string.

Source code in graflo/architecture/contract/bindings/connectors.py

def build_query(self, effective_schema: str | None = None) -> str:
    """Build a complete SQL SELECT query.

    When ``view`` is set, delegates to ``view.build_sql()``. Otherwise
    incorporates the base table, any JoinClauses, explicit select_columns,
    date filters, and FilterExpression filters.

    Args:
        effective_schema: Schema to use if ``self.schema_name`` is None.

    Returns:
        Complete SQL query string.
    """
    schema = self.schema_name or effective_schema or "public"
    if self.view is not None:
        from graflo.filter.select import SelectSpec

        if isinstance(self.view, SelectSpec):
            return self.view.build_sql(schema=schema, base_table=self.table_name)
    base_alias = "r" if self.joins else None
    base_ref = f'"{schema}"."{self.table_name}"'
    if base_alias:
        base_ref_aliased = f"{base_ref} {base_alias}"
    else:
        base_ref_aliased = base_ref

    # --- SELECT ---
    select_parts: list[str] = []
    if self.select_columns is not None:
        select_parts = list(self.select_columns)
    elif self.joins:
        select_parts.append(f"{base_alias}.*")
        for jc in self.joins:
            alias = jc.alias or jc.table
            jc_schema = jc.schema_name or schema
            if jc.select_fields is not None:
                for col in jc.select_fields:
                    select_parts.append(f'{alias}."{col}" AS "{alias}__{col}"')
            else:
                select_parts.append(f"{alias}.*")
    else:
        select_parts.append("*")

    select_clause = ", ".join(select_parts)

    # --- FROM + JOINs ---
    from_clause = base_ref_aliased
    for jc in self.joins:
        jc_schema = jc.schema_name or schema
        alias = jc.alias or jc.table
        join_ref = f'"{jc_schema}"."{jc.table}"'
        left_col = (
            f'{base_alias}."{jc.on_self}"' if base_alias else f'"{jc.on_self}"'
        )
        right_col = f'{alias}."{jc.on_other}"'
        from_clause += (
            f" {jc.join_type} JOIN {join_ref} {alias} ON {left_col} = {right_col}"
        )

    query = f"SELECT {select_clause} FROM {from_clause}"

    # --- WHERE ---
    where = self.build_where_clause()
    if where:
        query += f" WHERE {where}"

    return query

`build_where_clause()` ¶

Build SQL WHERE clause from date filtering parameters and general filters.

Returns:

Type	Description
`str`	WHERE clause string (without the WHERE keyword) or empty string if no filters

Source code in graflo/architecture/contract/bindings/connectors.py

def build_where_clause(self) -> str:
    """Build SQL WHERE clause from date filtering parameters **and** general filters.

    Returns:
        WHERE clause string (without the WHERE keyword) or empty string if no filters
    """
    from graflo.filter.onto import FilterExpression
    from graflo.onto import ExpressionFlavor

    conditions: list[str] = []

    # Date-specific conditions (legacy fields)
    if self.date_field:
        if self.date_range_start and self.date_range_days is not None:
            conditions.append(
                f"\"{self.date_field}\" >= '{self.date_range_start}'::date"
            )
            conditions.append(
                f"\"{self.date_field}\" < '{self.date_range_start}'::date + INTERVAL '{self.date_range_days} days'"
            )
        elif self.date_filter:
            filter_parts = self.date_filter.strip().split(None, 1)
            if len(filter_parts) == 2:
                operator, value = filter_parts
                if not (value.startswith("'") and value.endswith("'")):
                    if len(value) == 10 and value.count("-") == 2:
                        value = f"'{value}'"
                conditions.append(f'"{self.date_field}" {operator} {value}')
            else:
                conditions.append(f'"{self.date_field}" {self.date_filter}')

    # General-purpose FilterExpression filters
    for filt in self.filters:
        if isinstance(filt, FilterExpression):
            rendered = filt(kind=ExpressionFlavor.SQL)
            if rendered:
                conditions.append(str(rendered))

    if conditions:
        return " AND ".join(conditions)
    return ""

`get_resource_type()` ¶

Get resource type.

Source code in graflo/architecture/contract/bindings/connectors.py

def get_resource_type(self) -> ResourceType:
    """Get resource type."""
    return ResourceType.SQL_TABLE

`matches(resource_identifier)` ¶

Check if connector matches a table name.

Parameters:

Name	Type	Description	Default
`resource_identifier`	`str`	Table name to match (format: schema.table or just table)	required

Returns:

Name	Type	Description
`bool`	`bool`	True if connector matches

Source code in graflo/architecture/contract/bindings/connectors.py

def matches(self, resource_identifier: str) -> bool:
    """Check if connector matches a table name.

    Args:
        resource_identifier: Table name to match (format: schema.table or just table)

    Returns:
        bool: True if connector matches
    """
    if not self.table_name:
        return False

    # Compile regex expression
    if self.table_name.startswith("^") or self.table_name.endswith("$"):
        # Already a regex expression
        compiled_regex = re.compile(self.table_name)
    else:
        # Exact match expression
        compiled_regex = re.compile(f"^{re.escape(self.table_name)}$")

    # Check if resource_identifier matches
    if compiled_regex.match(resource_identifier):
        return True

    # If schema_name is specified, also check schema.table format
    if self.schema_name:
        full_name = f"{self.schema_name}.{resource_identifier}"
        if compiled_regex.match(full_name):
            return True

    return False

`Transform` ¶

Bases: ProtoTransform

Concrete transform implementation.

Wraps a ProtoTransform with input extraction, output dressing, field mapping, and transform composition.

Attributes:

Name	Type	Description
`fields`	`tuple[str, ...]`	Tuple of fields to transform
`rename`	`dict[str, str]`	Dictionary mapping input fields to output fields
`functional_transform`	`bool`	Whether this is a functional transform

Source code in graflo/architecture/contract/declarations/transform.py

class Transform(ProtoTransform):
    """Concrete transform implementation.

    Wraps a ProtoTransform with input extraction, output dressing, field
    mapping, and transform composition.

    Attributes:
        fields: Tuple of fields to transform
        rename: Dictionary mapping input fields to output fields
        functional_transform: Whether this is a functional transform
    """

    fields: tuple[str, ...] = Field(
        default_factory=tuple,
        description="Field names for declarative transform (used to derive input when input unset).",
    )
    rename: dict[str, str] = Field(
        default_factory=dict,
        description="Mapping of input_key -> output_key for pure field renaming (no function).",
    )
    strategy: Literal["single", "each", "all"] = Field(
        default="single",
        description=(
            "Functional call strategy. "
            "single: call function once with all input values. "
            "each: call function once per input field (unary). "
            "all: pass full document as a single argument."
        ),
    )
    passthrough_group_output: bool = Field(
        default=True,
        description=(
            "When grouped mode omits outputs, map function results back to input group keys."
        ),
    )

    functional_transform: bool = Field(
        default=False,
        description="True when a callable (module.foo) is set; False for pure map/dress transforms.",
    )

    @model_validator(mode="before")
    @classmethod
    def _normalize_fields(cls, data: Any) -> Any:
        if not isinstance(data, dict):
            return data
        data = dict(data)
        if "fields" in data and data["fields"] is not None:
            data["fields"] = _tuple_it(data["fields"])
        if "switch" in data:
            raise ValueError(
                "Legacy `switch` is no longer supported. Use `input` + `dress`."
            )
        return data

    @model_validator(mode="after")
    def _init_derived(self) -> Self:
        explicit_map = bool(self.rename)
        object.__setattr__(self, "functional_transform", self._foo is not None)
        next_input, next_output, next_map = self._derive_effective_io_and_map()
        object.__setattr__(self, "input", next_input)
        object.__setattr__(self, "output", next_output)
        object.__setattr__(self, "map", next_map)
        self._validate_configuration(explicit_map=explicit_map)
        return self

    def _derive_grouped_default_output(self) -> tuple[str, ...]:
        if not self.input_groups or self.output or self.output_groups:
            return self.output
        if not self.passthrough_group_output:
            return self.output
        scalar_names: list[str] = []
        for group in self.input_groups:
            if len(group) != 1:
                return self.output
            scalar_names.append(group[0])
        return tuple(scalar_names) if scalar_names else self.output

    def _derive_effective_io_and_map(
        self,
    ) -> tuple[tuple[str, ...], tuple[str, ...], dict[str, str]]:
        """Compute effective input/output/map once using explicit precedence."""
        next_input = self.input
        next_output = self._derive_grouped_default_output()
        next_map = dict(self.rename)

        if self.fields and not next_input:
            next_input = self.fields

        if next_map:
            if not next_input and not next_output:
                next_input = tuple(next_map.keys())
                next_output = tuple(next_map.values())
            elif not next_input:
                next_input = tuple(next_map.keys())
            elif not next_output:
                next_output = tuple(next_map.values())

        if self.dress is not None:
            next_output = (self.dress.key, self.dress.value)
        elif not next_output and next_input:
            next_output = next_input

        if (
            not next_map
            and next_input
            and next_output
            and len(next_input) == len(next_output)
        ):
            next_map = {src: dst for src, dst in zip(next_input, next_output)}

        return next_input, next_output, next_map

    def _init_io_from_map(self, force_init: bool = False) -> None:
        """Backwards-compatible shim; prefer sync_io_from_map()."""
        if not self.rename:
            return
        map_input = tuple(self.rename.keys())
        map_output = tuple(self.rename.values())
        if force_init or (not self.input and not self.output):
            object.__setattr__(self, "input", map_input)
            object.__setattr__(self, "output", map_output)
            return
        if not self.input:
            object.__setattr__(self, "input", map_input)
        elif not self.output:
            object.__setattr__(self, "output", map_output)

    def _validate_configuration(self, *, explicit_map: bool) -> None:
        """Validate that the transform has enough information to operate."""
        if self.target == "keys":
            if self.input_groups or self.output_groups:
                raise ValueError(
                    "target='keys' does not accept input_groups/output_groups."
                )
            if self._foo is None:
                raise ValueError("target='keys' requires a functional transform.")
            if self.rename:
                raise ValueError("target='keys' cannot be combined with map.")
            if self.input or self.output or self.fields:
                raise ValueError(
                    "target='keys' does not accept input/output/fields; use keys selector."
                )
            if self.dress is not None:
                raise ValueError("target='keys' is not compatible with dress.")
            if self.strategy != "single":
                raise ValueError(
                    "target='keys' uses implicit per-key execution and does not accept strategy."
                )
            return

        # Reject only user-specified map+function conflict. A derived map
        # (from input/output defaults) is valid for functional transforms.
        if explicit_map and self.rename and self._foo is not None:
            raise ValueError("map and functional transform cannot be used together.")
        if self.dress is not None:
            if self._foo is None:
                raise ValueError(
                    "dress requires a functional transform (module + foo)."
                )
            if len(self.input) != 1:
                raise ValueError("dress requires exactly one input field.")
        if self.strategy != "single" and self._foo is None:
            raise ValueError("strategy applies only to functional transforms.")
        if self.input_groups:
            if self._foo is None:
                raise ValueError(
                    "input_groups requires a functional transform (module + foo)."
                )
            if self.strategy != "single":
                raise ValueError(
                    "input_groups mode is explicit grouped execution and does not accept strategy."
                )
            if self.input or self.fields:
                raise ValueError("input_groups cannot be combined with input/fields.")
            if self.rename:
                raise ValueError("input_groups cannot be combined with map.")
            if self.dress is not None:
                raise ValueError("input_groups is not compatible with dress.")
            if self.output_groups and self.output:
                raise ValueError(
                    "Provide either output or output_groups for input_groups mode, not both."
                )
            if self.output_groups and len(self.output_groups) != len(self.input_groups):
                raise ValueError(
                    "output_groups must have same number of groups as input_groups."
                )
            if self.output and len(self.output) != len(self.input_groups):
                raise ValueError(
                    "When using input_groups with scalar outputs, output length must match number of input_groups."
                )
        elif self.output_groups:
            raise ValueError("output_groups requires input_groups.")
        if self._foo is not None and not self.input:
            if self.strategy != "all" and not self.input_groups:
                raise ValueError(
                    "Functional transforms require `input` (string or list of field names)."
                )
        if self.strategy == "all":
            if self.input or self.fields:
                raise ValueError("strategy='all' does not accept input/fields.")
            if self.dress is not None:
                raise ValueError("strategy='all' is not compatible with dress.")
        if self.strategy == "each":
            if not self.input:
                raise ValueError("strategy='each' requires one or more input fields.")
            if self.output and len(self.input) != len(self.output):
                raise ValueError(
                    "strategy='each' requires output length to match input length."
                )
        if (
            self._foo is None
            and self.dress is None
            and self.input
            and self.output
            and len(self.input) != len(self.output)
        ):
            raise ValueError(
                "Non-functional transforms require input/output to have the same length."
            )
        if (
            not self.input
            and not self.output
            and not self.input_groups
            and not self.output_groups
            and not self.name
            and not (self._foo is not None and self.strategy == "all")
        ):
            raise ValueError(
                "Either input/output, fields, map or name must be provided in "
                "Transform constructor."
            )

    def _refresh_derived(self) -> None:
        """Re-run derived state (e.g. map from input/output) after mutating attributes."""
        if self.rename or not self.input or not self.output:
            return
        if len(self.input) != len(self.output):
            return
        object.__setattr__(
            self, "map", {src: dst for src, dst in zip(self.input, self.output)}
        )

    def __call__(self, *nargs: Any, **kwargs: Any) -> dict[str, Any] | Any:
        """Execute the transform.

        Args:
            *nargs: Positional arguments for the transform
            **kwargs: Keyword arguments for the transform

        Returns:
            dict: Transformed data
        """
        if self.target == "keys":
            input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
            if input_doc is None:
                raise TransformException(
                    "target='keys' requires a document dictionary."
                )
            return self._transform_keys(input_doc, **kwargs)

        if self.input_groups:
            input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
            if input_doc is None:
                raise TransformException(
                    "input_groups transforms require a document dictionary."
                )
            return self._transform_input_groups(input_doc, **kwargs)

        if self.is_mapping:
            input_doc = nargs[0]
            if isinstance(input_doc, dict):
                output_values = [input_doc[k] for k in self.input]
            else:
                output_values = list(nargs)
        else:
            if self.strategy == "all":
                if nargs and isinstance(nargs[0], dict):
                    output_values = self.apply(nargs[0], **kwargs)
                else:
                    output_values = self.apply(*nargs, **kwargs)
            elif self.strategy == "each":
                if nargs and isinstance(input_doc := nargs[0], dict):
                    output_values = [
                        self.apply(input_doc[k], **kwargs) for k in self.input
                    ]
                else:
                    output_values = [self.apply(value, **kwargs) for value in nargs]
            else:
                if nargs and isinstance(input_doc := nargs[0], dict):
                    new_args = [input_doc[k] for k in self.input]
                    output_values = self.apply(*new_args, **kwargs)
                else:
                    output_values = self.apply(*nargs, **kwargs)

        if self.output:
            r = self._dress_as_dict(output_values)
        else:
            r = output_values
        return r

    def _apply_grouped_result(
        self,
        out: dict[str, Any],
        result: Any,
        input_group: tuple[str, ...],
        output_group: tuple[str, ...] | None,
        *,
        group_index: int,
    ) -> None:
        if output_group is not None:
            if isinstance(result, (list, tuple)):
                values = list(result)
            else:
                values = [result]
            if len(values) != len(output_group):
                raise TransformException(
                    f"input_groups[{group_index}] produced {len(values)} values, "
                    f"but output_groups[{group_index}] expects {len(output_group)}."
                )
            pairs = zip(output_group, values)
        elif self.output:
            pairs = ((self.output[group_index], result),)
        else:
            if isinstance(result, (list, tuple)):
                values = list(result)
                if len(values) != len(input_group):
                    raise TransformException(
                        f"input_groups[{group_index}] has {len(input_group)} fields, "
                        f"but transform returned {len(values)} values. "
                        "Provide output/output_groups explicitly to resolve mapping."
                    )
                pairs = zip(input_group, values)
            else:
                if len(input_group) != 1:
                    raise TransformException(
                        f"input_groups[{group_index}] has {len(input_group)} fields "
                        "but transform returned a scalar. "
                        "Provide output/output_groups explicitly for scalar group results."
                    )
                pairs = ((input_group[0], result),)
        for key, value in pairs:
            if key in out:
                raise TransformException(
                    f"Grouped transform produced duplicate output key '{key}'."
                )
            out[key] = value

    def _transform_input_groups(
        self, doc: dict[str, Any], **kwargs: Any
    ) -> dict[str, Any]:
        out: dict[str, Any] = {}
        for idx, input_group in enumerate(self.input_groups):
            values = [doc[k] for k in input_group]
            result = self.apply(*values, **kwargs)
            output_group = self.output_groups[idx] if self.output_groups else None
            self._apply_grouped_result(
                out,
                result,
                input_group,
                output_group,
                group_index=idx,
            )
        return out

    @property
    def is_mapping(self) -> bool:
        """True when the transform is pure mapping (no function)."""
        return self._foo is None

    def _dress_as_dict(self, transform_result: Any) -> dict[str, Any]:
        """Convert transform result to dictionary format.

        When ``dress`` is set the result is pivoted: the input field name is
        stored under ``dress.key`` and the function result under ``dress.value``.
        Otherwise the result is mapped positionally to ``output`` fields.
        """
        if self.dress is not None:
            return {
                self.dress.key: self.input[0],
                self.dress.value: transform_result,
            }
        elif isinstance(transform_result, (list, tuple)):
            return {k: v for k, v in zip(self.output, transform_result)}
        else:
            return {self.output[-1]: transform_result}

    def _selected_keys(self, doc: dict[str, Any]) -> set[str]:
        if self.keys.mode == "all":
            return set(doc.keys())
        selected = set(self.keys.names)
        if self.keys.mode == "include":
            return selected
        return {k for k in doc if k not in selected}

    def _transform_keys(self, doc: dict[str, Any], **kwargs: Any) -> dict[str, Any]:
        selected = self._selected_keys(doc)
        out: dict[str, Any] = {}
        for key, value in doc.items():
            new_key = self.apply(key, **kwargs) if key in selected else key
            if not isinstance(new_key, str):
                raise TransformException(
                    "Key transform functions must return str values."
                )
            if new_key in out:
                raise TransformException(
                    f"Key transform collision detected for key '{new_key}'."
                )
            out[new_key] = value
        return out

    @property
    def is_dummy(self) -> bool:
        """Check if this is a dummy transform.

        Returns:
            bool: True if this is a dummy transform
        """
        return self.name is not None and not self.rename and self._foo is None

    def merge_from(self, t: Transform) -> Transform:
        """Merge another transform's configuration into a copy of it.

        Returns a new Transform with values from self overriding t where set.
        Does not override ConfigBaseModel.update (in-place); use this for
        copy-and-merge semantics.

        Args:
            t: Transform to merge from

        Returns:
            Transform: New transform with merged configuration
        """
        t_copy = deepcopy(t)
        if self.input:
            t_copy.input = self.input
        if self.output:
            t_copy.output = self.output
        if self.params:
            t_copy.params = {**t_copy.params, **self.params}
        t_copy._refresh_derived()
        return t_copy

    def get_barebone(
        self, other: Transform | None
    ) -> tuple[Transform | None, Transform | None]:
        """Get the barebone transform configuration.

        Args:
            other: Optional transform to use as base

        Returns:
            tuple[Transform | None, Transform | None]: Updated self transform
            and transform to store in library
        """
        self_param = self.to_dict(exclude_defaults=True)
        if self.foo is not None:
            # self will be the lib transform
            return None, self
        elif other is not None and other.foo is not None:
            # init self from other
            self_param.pop("foo", None)
            self_param.pop("module", None)
            other_param = other.to_dict(exclude_defaults=True)
            other_param.update(self_param)
            return Transform(**other_param), None
        else:
            return None, None

`is_dummy` `property` ¶

Check if this is a dummy transform.

Returns:

Name	Type	Description
`bool`	`bool`	True if this is a dummy transform

`is_mapping` `property` ¶

True when the transform is pure mapping (no function).

`call(*nargs, **kwargs)` ¶

Execute the transform.

Parameters:

Name	Type	Description	Default
`*nargs`	`Any`	Positional arguments for the transform	`()`
`**kwargs`	`Any`	Keyword arguments for the transform	`{}`

Returns:

Name	Type	Description
`dict`	`dict[str, Any] \| Any`	Transformed data

Source code in graflo/architecture/contract/declarations/transform.py

def __call__(self, *nargs: Any, **kwargs: Any) -> dict[str, Any] | Any:
    """Execute the transform.

    Args:
        *nargs: Positional arguments for the transform
        **kwargs: Keyword arguments for the transform

    Returns:
        dict: Transformed data
    """
    if self.target == "keys":
        input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
        if input_doc is None:
            raise TransformException(
                "target='keys' requires a document dictionary."
            )
        return self._transform_keys(input_doc, **kwargs)

    if self.input_groups:
        input_doc = nargs[0] if nargs and isinstance(nargs[0], dict) else None
        if input_doc is None:
            raise TransformException(
                "input_groups transforms require a document dictionary."
            )
        return self._transform_input_groups(input_doc, **kwargs)

    if self.is_mapping:
        input_doc = nargs[0]
        if isinstance(input_doc, dict):
            output_values = [input_doc[k] for k in self.input]
        else:
            output_values = list(nargs)
    else:
        if self.strategy == "all":
            if nargs and isinstance(nargs[0], dict):
                output_values = self.apply(nargs[0], **kwargs)
            else:
                output_values = self.apply(*nargs, **kwargs)
        elif self.strategy == "each":
            if nargs and isinstance(input_doc := nargs[0], dict):
                output_values = [
                    self.apply(input_doc[k], **kwargs) for k in self.input
                ]
            else:
                output_values = [self.apply(value, **kwargs) for value in nargs]
        else:
            if nargs and isinstance(input_doc := nargs[0], dict):
                new_args = [input_doc[k] for k in self.input]
                output_values = self.apply(*new_args, **kwargs)
            else:
                output_values = self.apply(*nargs, **kwargs)

    if self.output:
        r = self._dress_as_dict(output_values)
    else:
        r = output_values
    return r

`get_barebone(other)` ¶

Get the barebone transform configuration.

Parameters:

Name	Type	Description	Default
`other`	`Transform \| None`	Optional transform to use as base	required

Returns:

Type	Description
`Transform \| None`	tuple[Transform \| None, Transform \| None]: Updated self transform
`Transform \| None`	and transform to store in library

Source code in graflo/architecture/contract/declarations/transform.py

def get_barebone(
    self, other: Transform | None
) -> tuple[Transform | None, Transform | None]:
    """Get the barebone transform configuration.

    Args:
        other: Optional transform to use as base

    Returns:
        tuple[Transform | None, Transform | None]: Updated self transform
        and transform to store in library
    """
    self_param = self.to_dict(exclude_defaults=True)
    if self.foo is not None:
        # self will be the lib transform
        return None, self
    elif other is not None and other.foo is not None:
        # init self from other
        self_param.pop("foo", None)
        self_param.pop("module", None)
        other_param = other.to_dict(exclude_defaults=True)
        other_param.update(self_param)
        return Transform(**other_param), None
    else:
        return None, None

`merge_from(t)` ¶

Merge another transform's configuration into a copy of it.

Returns a new Transform with values from self overriding t where set. Does not override ConfigBaseModel.update (in-place); use this for copy-and-merge semantics.

Parameters:

Name	Type	Description	Default
`t`	`Transform`	Transform to merge from	required

Returns:

Name	Type	Description
`Transform`	`Transform`	New transform with merged configuration

Source code in graflo/architecture/contract/declarations/transform.py

def merge_from(self, t: Transform) -> Transform:
    """Merge another transform's configuration into a copy of it.

    Returns a new Transform with values from self overriding t where set.
    Does not override ConfigBaseModel.update (in-place); use this for
    copy-and-merge semantics.

    Args:
        t: Transform to merge from

    Returns:
        Transform: New transform with merged configuration
    """
    t_copy = deepcopy(t)
    if self.input:
        t_copy.input = self.input
    if self.output:
        t_copy.output = self.output
    if self.params:
        t_copy.params = {**t_copy.params, **self.params}
    t_copy._refresh_derived()
    return t_copy

graflo.architecture.contract¶

Bindings ¶

bind_connector_to_conn_proxy(connector, conn_proxy) ¶

get_conn_proxy_for_connector(connector) ¶

FileConnector ¶

get_resource_type() ¶

matches(resource_identifier) ¶

GraphManifest ¶

finish_init(*, strict_references=False, dynamic_edge_feedback=False) ¶

from_config(data) classmethod ¶

IngestionModel ¶

fetch_resource(name=None) ¶

finish_init(core_schema, *, strict_references=False, dynamic_edge_feedback=False) ¶

prune_to_graph(core_schema, disconnected=None) ¶

JoinClause ¶

ProtoTransform ¶

__lt__(other) ¶

apply(*args, **kwargs) ¶

get_fields_members() classmethod ¶

Resource ¶

edge_config property ¶

root property ¶

vertex_config property ¶

__call__(doc) ¶

count() ¶

finish_init(vertex_config, edge_config, transforms, *, strict_references=False, dynamic_edge_feedback=False) ¶

ResourceConnector ¶

get_resource_type() abstractmethod ¶

matches(resource_identifier) abstractmethod ¶

ResourceType ¶

SparqlConnector ¶

build_select_query() ¶

get_resource_type() ¶

matches(resource_identifier) ¶

TableConnector ¶

build_query(effective_schema=None) ¶

build_where_clause() ¶

get_resource_type() ¶

matches(resource_identifier) ¶

Transform ¶

is_dummy property ¶

is_mapping property ¶

__call__(*nargs, **kwargs) ¶

get_barebone(other) ¶

merge_from(t) ¶

`graflo.architecture.contract`¶

`Bindings` ¶

`bind_connector_to_conn_proxy(connector, conn_proxy)` ¶

`get_conn_proxy_for_connector(connector)` ¶

`FileConnector` ¶

`get_resource_type()` ¶

`matches(resource_identifier)` ¶

`GraphManifest` ¶

`finish_init(*, strict_references=False, dynamic_edge_feedback=False)` ¶

`from_config(data)` `classmethod` ¶

`IngestionModel` ¶

`fetch_resource(name=None)` ¶

`finish_init(core_schema, *, strict_references=False, dynamic_edge_feedback=False)` ¶

`prune_to_graph(core_schema, disconnected=None)` ¶

`JoinClause` ¶

`ProtoTransform` ¶

`lt(other)` ¶

`apply(*args, **kwargs)` ¶

`get_fields_members()` `classmethod` ¶

`Resource` ¶

`edge_config` `property` ¶

`root` `property` ¶

`vertex_config` `property` ¶

`call(doc)` ¶

`count()` ¶

`finish_init(vertex_config, edge_config, transforms, *, strict_references=False, dynamic_edge_feedback=False)` ¶

`ResourceConnector` ¶

`get_resource_type()` `abstractmethod` ¶

`matches(resource_identifier)` `abstractmethod` ¶

`ResourceType` ¶

`SparqlConnector` ¶

`build_select_query()` ¶

`get_resource_type()` ¶

`matches(resource_identifier)` ¶

`TableConnector` ¶

`build_query(effective_schema=None)` ¶

`build_where_clause()` ¶

`get_resource_type()` ¶

`matches(resource_identifier)` ¶

`Transform` ¶

`is_dummy` `property` ¶

`is_mapping` `property` ¶

`call(*nargs, **kwargs)` ¶

`get_barebone(other)` ¶

`merge_from(t)` ¶