Skip to content

ontocast.tool.vector_store.core

Core contracts and models for ontology vector storage.

GraphAtom

Bases: BasePydanticModel

Embedding-ready ontology entity atom.

Source code in ontocast/tool/vector_store/core.py
class GraphAtom(BasePydanticModel):
    """Embedding-ready ontology entity atom."""

    atom_id: str = Field(
        description="Deterministic hash identifier for the atom content."
    )
    ontology_iri: str = Field(description="Source ontology IRI.")
    ontology_id: str | None = Field(
        default=None, description="Optional source ontology identifier."
    )
    ontology_hash: str | None = Field(
        default=None, description="Hash/version of the source ontology."
    )
    ontology_version: str | None = Field(
        default=None, description="Semantic version of the source ontology."
    )
    iri: str = Field(description="Focal entity IRI represented by this atom.")
    entity_role: str | None = Field(
        default=None,
        description="Role of focal entity in graph context: resource or predicate.",
    )
    core_representation: str = Field(
        description="High-precision natural language text (labels, types, descriptions)."
    )
    minimal_representation: str = Field(
        default="",
        description=(
            "IRI local name with camelCase/PascalCase split into space-separated terms; "
            "used for BM25 (keyword) indexing."
        ),
    )
    neighborhood_representation: str = Field(
        description="Neighborhood relation text for disambiguation context."
    )
    created_at: datetime = Field(
        default_factory=lambda: datetime.now(timezone.utc),
        description="Atom creation timestamp (UTC).",
    )
    score: float | None = Field(
        default=None,
        description="Optional similarity score populated by vector search.",
    )

    @field_validator("entity_role", mode="before")
    @classmethod
    def _normalize_entity_role(cls, value: str | None) -> str | None:
        if value is None:
            return None
        return canonicalize_entity_role(str(value))

    @property
    def representation(self) -> str:
        """Combined embedding text view for generic consumers."""
        return combine_embedding_text(self)

representation property

Combined embedding text view for generic consumers.

OntologySearchHit

Bases: BasePydanticModel

Typed retrieval result that separates atom payload from ranking metadata.

Source code in ontocast/tool/vector_store/core.py
class OntologySearchHit(BasePydanticModel):
    """Typed retrieval result that separates atom payload from ranking metadata."""

    atom: GraphAtom
    score: float = Field(description="Channel-specific retrieval score.")

OntologySearchHitsByChannel

Bases: BasePydanticModel

Per-query retrieval hits split by vector channel (dense core/neighborhood + optional BM25).

Source code in ontocast/tool/vector_store/core.py
class OntologySearchHitsByChannel(BasePydanticModel):
    """Per-query retrieval hits split by vector channel (dense core/neighborhood + optional BM25)."""

    core_hits: list[OntologySearchHit] = Field(
        default_factory=list,
        description="Top hits from the dense core vector channel.",
    )
    neighborhood_hits: list[OntologySearchHit] = Field(
        default_factory=list,
        description="Top hits from the dense neighborhood vector channel.",
    )
    bm25_hits: list[OntologySearchHit] = Field(
        default_factory=list,
        description="Top hits from the sparse BM25 lane (minimal IRI text).",
    )

VectorStoreTool

Bases: Tool

Abstract interface for vector store implementations.

Source code in ontocast/tool/vector_store/core.py
class VectorStoreTool(Tool):
    """Abstract interface for vector store implementations."""

    @abc.abstractmethod
    async def initialize(self) -> None:
        """Prepare schema/collections in the backing vector store."""

    @abc.abstractmethod
    def index_ontology(self, ontology: Ontology) -> int:
        """Index an ontology and return number of indexed atoms."""

    @abc.abstractmethod
    def search_patches(
        self,
        query: str,
        top_k: int | None = None,
        filter_iri: str | None = None,
        filter_version: str | None = None,
        filter_hash: str | None = None,
    ) -> list[GraphAtom]:
        """Search ontology patches by query text (``top_k`` None → store default)."""

    @abc.abstractmethod
    def delete_ontology(
        self,
        iri: str,
        version: str | None = None,
        ontology_hash: str | None = None,
    ) -> None:
        """Delete all indexed atoms for a specific ontology IRI."""

    def supports_tenancy_partition(self) -> bool:
        """True if :meth:`clean_tenancy` clears isolated collections for (tenant, project)."""
        return False

    async def clean_tenancy(self, tenant: str, project: str) -> None:
        """Drop or empty vector collections derived from ``tenant`` / ``project``."""
        raise NotImplementedError(
            f"{type(self).__name__} does not isolate vectors by tenant/project"
        )

clean_tenancy(tenant, project) async

Drop or empty vector collections derived from tenant / project.

Source code in ontocast/tool/vector_store/core.py
async def clean_tenancy(self, tenant: str, project: str) -> None:
    """Drop or empty vector collections derived from ``tenant`` / ``project``."""
    raise NotImplementedError(
        f"{type(self).__name__} does not isolate vectors by tenant/project"
    )

delete_ontology(iri, version=None, ontology_hash=None) abstractmethod

Delete all indexed atoms for a specific ontology IRI.

Source code in ontocast/tool/vector_store/core.py
@abc.abstractmethod
def delete_ontology(
    self,
    iri: str,
    version: str | None = None,
    ontology_hash: str | None = None,
) -> None:
    """Delete all indexed atoms for a specific ontology IRI."""

index_ontology(ontology) abstractmethod

Index an ontology and return number of indexed atoms.

Source code in ontocast/tool/vector_store/core.py
@abc.abstractmethod
def index_ontology(self, ontology: Ontology) -> int:
    """Index an ontology and return number of indexed atoms."""

initialize() abstractmethod async

Prepare schema/collections in the backing vector store.

Source code in ontocast/tool/vector_store/core.py
@abc.abstractmethod
async def initialize(self) -> None:
    """Prepare schema/collections in the backing vector store."""

search_patches(query, top_k=None, filter_iri=None, filter_version=None, filter_hash=None) abstractmethod

Search ontology patches by query text (top_k None → store default).

Source code in ontocast/tool/vector_store/core.py
@abc.abstractmethod
def search_patches(
    self,
    query: str,
    top_k: int | None = None,
    filter_iri: str | None = None,
    filter_version: str | None = None,
    filter_hash: str | None = None,
) -> list[GraphAtom]:
    """Search ontology patches by query text (``top_k`` None → store default)."""

supports_tenancy_partition()

True if :meth:clean_tenancy clears isolated collections for (tenant, project).

Source code in ontocast/tool/vector_store/core.py
def supports_tenancy_partition(self) -> bool:
    """True if :meth:`clean_tenancy` clears isolated collections for (tenant, project)."""
    return False

canonicalize_entity_role(role)

Normalize role labels to vector-store vocabulary.

Source code in ontocast/tool/vector_store/core.py
def canonicalize_entity_role(role: str | None) -> str | None:
    """Normalize role labels to vector-store vocabulary."""
    if role is None:
        return None
    normalized = role.strip().lower()
    if normalized in VECTOR_ENTITY_ROLES:
        return normalized
    if normalized in {"property", "predicate"}:
        return ROLE_PREDICATE
    if normalized in {"class", "instance", "resource"}:
        return ROLE_RESOURCE
    return None