Skip to content

ontocast.onto

AgentState

Bases: BasePydanticModel

State for the ontology-based knowledge graph agent.

This class maintains the state of the agent during document processing, including input text, chunks, ontologies, and workflow status.

Attributes:

Name Type Description
input_text str

Input text to process.

current_domain str

IRI used for forming document namespace.

doc_hid Optional[str]

An almost unique hash/id for the parent document.

files dict[str, bytes]

Files to process.

current_chunk Optional[Chunk]

Current document chunk for processing.

chunks list[Chunk]

List of chunks of the input text.

chunks_processed list[Chunk]

List of processed chunks.

current_ontology Ontology

Current ontology object.

ontology_addendum Ontology

Additional ontology content.

failure_stage Optional[str]

Stage where failure occurred.

failure_reason Optional[str]

Reason for failure.

success_score Optional[float]

Score indicating success level.

status Status

Current workflow status.

node_visits defaultdict[WorkflowNode, int]

Number of visits per node.

max_visits int

Maximum number of visits allowed per node.

max_chunks Optional[int]

Maximum number of chunks to process.

Source code in ontocast/onto.py
class AgentState(BasePydanticModel):
    """State for the ontology-based knowledge graph agent.

    This class maintains the state of the agent during document processing,
    including input text, chunks, ontologies, and workflow status.

    Attributes:
        input_text: Input text to process.
        current_domain: IRI used for forming document namespace.
        doc_hid: An almost unique hash/id for the parent document.
        files: Files to process.
        current_chunk: Current document chunk for processing.
        chunks: List of chunks of the input text.
        chunks_processed: List of processed chunks.
        current_ontology: Current ontology object.
        ontology_addendum: Additional ontology content.
        failure_stage: Stage where failure occurred.
        failure_reason: Reason for failure.
        success_score: Score indicating success level.
        status: Current workflow status.
        node_visits: Number of visits per node.
        max_visits: Maximum number of visits allowed per node.
        max_chunks: Maximum number of chunks to process.
    """

    input_text: str = Field(description="Input text", default="")
    current_domain: str = Field(
        description="IRI used for forming document namespace", default=DEFAULT_DOMAIN
    )
    doc_hid: Optional[str] = Field(
        description="An almost unique hash / id for the parent document of the chunk",
        default=None,
    )
    files: dict[str, bytes] = Field(
        default_factory=lambda: dict(), description="Files to process"
    )
    current_chunk: Optional[Chunk] = Field(
        description="Current document chunk for processing", default=None
    )
    chunks: list[Chunk] = Field(
        default_factory=lambda: list(), description="Chunks of the input text"
    )
    chunks_processed: list[Chunk] = Field(
        default_factory=lambda: list(), description="Chunks of the input text"
    )
    current_ontology: Ontology = Field(
        default_factory=lambda: Ontology(
            short_name=ONTOLOGY_VOID_ID,
            title="null title",
            description="null description",
            graph=RDFGraph(),
            iri=ONTOLOGY_VOID_IRI,
        ),
        description="Ontology object that contain the semantic graph "
        "as well as the description, name, short name, version, "
        "and IRI of the ontology",
    )
    aggregated_facts: Optional[RDFGraph] = Field(
        description="RDF triples representing aggregated facts "
        "from the current document",
        default_factory=RDFGraph,
    )
    ontology_addendum: Ontology = Field(
        default_factory=lambda: Ontology(
            short_name=ONTOLOGY_VOID_ID,
            title="null title",
            description="null description",
            graph=RDFGraph(),
            iri=ONTOLOGY_VOID_IRI,
        ),
        description="Ontology object that contain the semantic graph "
        "as well as the description, name, short name, version, "
        "and IRI of the ontology",
    )
    failure_stage: Optional[str] = None
    failure_reason: Optional[str] = None
    success_score: Optional[float] = 0.0
    status: Status = Status.SUCCESS
    node_visits: defaultdict[WorkflowNode, int] = Field(
        default_factory=lambda: defaultdict(int),
        description="Number of visits per node",
    )
    max_visits: int = Field(
        default=3, description="Maximum number of visits allowed per node"
    )
    max_chunks: Optional[int] = None
    model_config = ConfigDict(arbitrary_types_allowed=True)

    def model_post_init(self, __context):
        """Post-initialization hook for the model."""
        pass

    def __init__(self, **kwargs):
        """Initialize the agent state with given keyword arguments."""
        super().__init__(**kwargs)
        self.current_domain = os.getenv("CURRENT_DOMAIN", DEFAULT_DOMAIN)

    def set_text(self, text):
        """Set the input text and generate document hash.

        Args:
            text: The input text to set.
        """
        self.input_text = text
        self.doc_hid = render_text_hash(self.input_text)

    def set_failure(self, stage: str, reason: str, success_score: float = 0.0):
        """Set failure state with stage and reason.

        Args:
            stage: The stage where the failure occurred.
            reason: The reason for the failure.
            success_score: The success score at failure (default: 0.0).
        """
        self.failure_stage = stage
        self.failure_reason = reason
        self.success_score = success_score
        self.status = Status.FAILED

    def clear_failure(self):
        """Clear failure state and set status to success."""
        self.failure_stage = None
        self.failure_reason = None
        self.success_score = 0.0
        self.status = Status.SUCCESS

    @property
    def doc_iri(self):
        """Get the document IRI.

        Returns:
            str: The document IRI.
        """
        return f"{self.current_domain}/doc/{self.doc_hid}"

    @property
    def doc_namespace(self):
        """Get the document namespace.

        Returns:
            str: The document namespace.
        """
        return iri2namespace(self.doc_iri, ontology=False)

doc_iri property

Get the document IRI.

Returns:

Name Type Description
str

The document IRI.

doc_namespace property

Get the document namespace.

Returns:

Name Type Description
str

The document namespace.

__init__(**kwargs)

Initialize the agent state with given keyword arguments.

Source code in ontocast/onto.py
def __init__(self, **kwargs):
    """Initialize the agent state with given keyword arguments."""
    super().__init__(**kwargs)
    self.current_domain = os.getenv("CURRENT_DOMAIN", DEFAULT_DOMAIN)

clear_failure()

Clear failure state and set status to success.

Source code in ontocast/onto.py
def clear_failure(self):
    """Clear failure state and set status to success."""
    self.failure_stage = None
    self.failure_reason = None
    self.success_score = 0.0
    self.status = Status.SUCCESS

model_post_init(__context)

Post-initialization hook for the model.

Source code in ontocast/onto.py
def model_post_init(self, __context):
    """Post-initialization hook for the model."""
    pass

set_failure(stage, reason, success_score=0.0)

Set failure state with stage and reason.

Parameters:

Name Type Description Default
stage str

The stage where the failure occurred.

required
reason str

The reason for the failure.

required
success_score float

The success score at failure (default: 0.0).

0.0
Source code in ontocast/onto.py
def set_failure(self, stage: str, reason: str, success_score: float = 0.0):
    """Set failure state with stage and reason.

    Args:
        stage: The stage where the failure occurred.
        reason: The reason for the failure.
        success_score: The success score at failure (default: 0.0).
    """
    self.failure_stage = stage
    self.failure_reason = reason
    self.success_score = success_score
    self.status = Status.FAILED

set_text(text)

Set the input text and generate document hash.

Parameters:

Name Type Description Default
text

The input text to set.

required
Source code in ontocast/onto.py
def set_text(self, text):
    """Set the input text and generate document hash.

    Args:
        text: The input text to set.
    """
    self.input_text = text
    self.doc_hid = render_text_hash(self.input_text)

BasePydanticModel

Bases: BaseModel

Base class for Pydantic models with serialization capabilities.

Source code in ontocast/onto.py
class BasePydanticModel(BaseModel):
    """Base class for Pydantic models with serialization capabilities."""

    def __init__(self, **kwargs):
        """Initialize the model with given keyword arguments."""
        super().__init__(**kwargs)

    def serialize(self, file_path: str | pathlib.Path) -> None:
        """Serialize the state to a JSON file.

        Args:
            file_path: Path to save the JSON file.
        """
        state_json = self.model_dump_json(indent=4)
        if isinstance(file_path, str):
            file_path = pathlib.Path(file_path)
        file_path.write_text(state_json)

    @classmethod
    def load(cls, file_path: str | pathlib.Path):
        """Load state from a JSON file.

        Args:
            file_path: Path to the JSON file.

        Returns:
            The loaded model instance.
        """
        if isinstance(file_path, str):
            file_path = pathlib.Path(file_path)
        state_json = file_path.read_text()
        return cls.model_validate_json(state_json)

__init__(**kwargs)

Initialize the model with given keyword arguments.

Source code in ontocast/onto.py
def __init__(self, **kwargs):
    """Initialize the model with given keyword arguments."""
    super().__init__(**kwargs)

load(file_path) classmethod

Load state from a JSON file.

Parameters:

Name Type Description Default
file_path str | Path

Path to the JSON file.

required

Returns:

Type Description

The loaded model instance.

Source code in ontocast/onto.py
@classmethod
def load(cls, file_path: str | pathlib.Path):
    """Load state from a JSON file.

    Args:
        file_path: Path to the JSON file.

    Returns:
        The loaded model instance.
    """
    if isinstance(file_path, str):
        file_path = pathlib.Path(file_path)
    state_json = file_path.read_text()
    return cls.model_validate_json(state_json)

serialize(file_path)

Serialize the state to a JSON file.

Parameters:

Name Type Description Default
file_path str | Path

Path to save the JSON file.

required
Source code in ontocast/onto.py
def serialize(self, file_path: str | pathlib.Path) -> None:
    """Serialize the state to a JSON file.

    Args:
        file_path: Path to save the JSON file.
    """
    state_json = self.model_dump_json(indent=4)
    if isinstance(file_path, str):
        file_path = pathlib.Path(file_path)
    file_path.write_text(state_json)

Chunk

Bases: BaseModel

A chunk of text with associated metadata and RDF graph.

Attributes:

Name Type Description
text str

Text content of the chunk.

hid str

An almost unique (hash) id for the chunk.

doc_iri str

IRI of parent document.

graph Optional[RDFGraph]

RDF triples representing the facts from the current document.

processed bool

Whether chunk has been processed.

Source code in ontocast/onto.py
class Chunk(BaseModel):
    """A chunk of text with associated metadata and RDF graph.

    Attributes:
        text: Text content of the chunk.
        hid: An almost unique (hash) id for the chunk.
        doc_iri: IRI of parent document.
        graph: RDF triples representing the facts from the current document.
        processed: Whether chunk has been processed.
    """

    text: str = Field(description="Text of the chunk")
    hid: str = Field(description="An almost unique (hash) id for the chunk")
    doc_iri: str = Field(description="IRI of parent doc")
    graph: Optional[RDFGraph] = Field(
        description="RDF triples representing the facts from a document chunk",
        default_factory=RDFGraph,
    )
    processed: bool = Field(default=False, description="Was the chunk processed?")

    @property
    def iri(self):
        """Get the IRI for this chunk.

        Returns:
            str: The chunk IRI.
        """
        return f"{self.doc_iri}/chunk/{self.hid}"

    @property
    def namespace(self):
        """Get the namespace for this chunk.

        Returns:
            str: The chunk namespace.
        """
        return iri2namespace(self.iri, ontology=False)

    def sanitize(self):
        self.graph = self.graph.unbind_chunk_namespaces()
        self.graph.sanitize_prefixes_namespaces()

iri property

Get the IRI for this chunk.

Returns:

Name Type Description
str

The chunk IRI.

namespace property

Get the namespace for this chunk.

Returns:

Name Type Description
str

The chunk namespace.

FailureStages

Bases: StrEnum

Enumeration of possible failure stages in the workflow.

Source code in ontocast/onto.py
class FailureStages(StrEnum):
    """Enumeration of possible failure stages in the workflow."""

    NO_CHUNKS_TO_PROCESS = "No chunks to process"
    ONTOLOGY_CRITIQUE = "The produced ontology did not pass the critique stage."
    FACTS_CRITIQUE = "The produced graph of facts did not pass the critique stage."
    PARSE_TEXT_TO_ONTOLOGY_TRIPLES = "Failed to parse the text into ontology triples."
    PARSE_TEXT_TO_FACTS_TRIPLES = "Failed to parse the text into facts triples."
    SUBLIMATE_ONTOLOGY = (
        "The produced semantic could not be validated "
        "or separated into ontology and facts (technical issue)."
    )

KGCritiqueReport

Bases: BaseModel

Report from knowledge graph critique process.

Attributes:

Name Type Description
facts_graph_derivation_success bool

True if the facts graph derivation was performed successfully, False otherwise.

facts_graph_derivation_score float

Score 0-100 for how well the triples of facts represent the original document.

facts_graph_derivation_critique_comment Optional[str]

A concrete explanation of why the semantic graph of facts derivation is not satisfactory.

Source code in ontocast/onto.py
class KGCritiqueReport(BaseModel):
    """Report from knowledge graph critique process.

    Attributes:
        facts_graph_derivation_success: True if the facts graph derivation
            was performed successfully, False otherwise.
        facts_graph_derivation_score: Score 0-100 for how well the triples
            of facts represent the original document.
        facts_graph_derivation_critique_comment: A concrete explanation of
            why the semantic graph of facts derivation is not satisfactory.
    """

    facts_graph_derivation_success: bool = Field(
        description="True if the facts graph derivation "
        "was performed successfully, False otherwise."
    )
    facts_graph_derivation_score: float = Field(
        description="Score 0-100 for how well the triples of facts "
        "represent the original document. 0 is the worst, 100 is the best."
    )
    facts_graph_derivation_critique_comment: Optional[str] = Field(
        description="A concrete explanation of why the semantic graph "
        "of facts derivation is not satisfactory. "
        "The explanation should be very specific and detailed."
    )

Ontology

Bases: OntologyProperties

A Pydantic model representing an ontology with its RDF graph and description.

Attributes:

Name Type Description
graph RDFGraph

The RDF graph containing the ontology data.

Source code in ontocast/onto.py
class Ontology(OntologyProperties):
    """A Pydantic model representing an ontology with its RDF graph and description.

    Attributes:
        graph: The RDF graph containing the ontology data.
    """

    graph: RDFGraph = Field(
        default_factory=RDFGraph,
        description="Semantic triples (abstract entities/relations) "
        "that define the ontology in turtle (ttl) format as a string.",
    )

    model_config = ConfigDict(arbitrary_types_allowed=True)

    def __iadd__(self, other: Union["Ontology", RDFGraph]) -> "Ontology":
        """In-place addition operator for Ontology instances.

        Merges the RDF graphs and takes properties from the right-hand operand.

        Args:
            other: The ontology or graph to add to this one.

        Returns:
            Ontology: self after modification.
        """
        if isinstance(other, Ontology):
            self.graph += other.graph
            self.title = other.title
            self.short_name = other.short_name
            self.description = other.description
            self.iri = other.iri
            self.version = other.version
        else:
            self.graph += other
        return self

    @classmethod
    def from_file(cls, file_path: pathlib.Path, format: str = "turtle", **kwargs):
        """Create an Ontology instance by loading a graph from a file.

        Args:
            file_path: Path to the ontology file.
            format: Format of the input file (default: "turtle").
            **kwargs: Additional arguments to pass to the constructor.

        Returns:
            Ontology: A new Ontology instance.
        """
        graph: RDFGraph = RDFGraph()
        graph.parse(file_path, format=format)
        return cls(graph=graph, **kwargs)

    def set_properties(self, **kwargs):
        """Set ontology properties from keyword arguments.

        Args:
            **kwargs: Property values to set.
        """
        self.__dict__.update(**kwargs)

    def describe(self) -> str:
        """Get a human-readable description of the ontology.

        Returns:
            str: A formatted description string.
        """
        return (
            f"Ontology name: {self.short_name}\n"
            f"Description: {self.description}\n"
            f"Ontology IRI: {self.iri}\n"
        )

__iadd__(other)

In-place addition operator for Ontology instances.

Merges the RDF graphs and takes properties from the right-hand operand.

Parameters:

Name Type Description Default
other Union[Ontology, RDFGraph]

The ontology or graph to add to this one.

required

Returns:

Name Type Description
Ontology Ontology

self after modification.

Source code in ontocast/onto.py
def __iadd__(self, other: Union["Ontology", RDFGraph]) -> "Ontology":
    """In-place addition operator for Ontology instances.

    Merges the RDF graphs and takes properties from the right-hand operand.

    Args:
        other: The ontology or graph to add to this one.

    Returns:
        Ontology: self after modification.
    """
    if isinstance(other, Ontology):
        self.graph += other.graph
        self.title = other.title
        self.short_name = other.short_name
        self.description = other.description
        self.iri = other.iri
        self.version = other.version
    else:
        self.graph += other
    return self

describe()

Get a human-readable description of the ontology.

Returns:

Name Type Description
str str

A formatted description string.

Source code in ontocast/onto.py
def describe(self) -> str:
    """Get a human-readable description of the ontology.

    Returns:
        str: A formatted description string.
    """
    return (
        f"Ontology name: {self.short_name}\n"
        f"Description: {self.description}\n"
        f"Ontology IRI: {self.iri}\n"
    )

from_file(file_path, format='turtle', **kwargs) classmethod

Create an Ontology instance by loading a graph from a file.

Parameters:

Name Type Description Default
file_path Path

Path to the ontology file.

required
format str

Format of the input file (default: "turtle").

'turtle'
**kwargs

Additional arguments to pass to the constructor.

{}

Returns:

Name Type Description
Ontology

A new Ontology instance.

Source code in ontocast/onto.py
@classmethod
def from_file(cls, file_path: pathlib.Path, format: str = "turtle", **kwargs):
    """Create an Ontology instance by loading a graph from a file.

    Args:
        file_path: Path to the ontology file.
        format: Format of the input file (default: "turtle").
        **kwargs: Additional arguments to pass to the constructor.

    Returns:
        Ontology: A new Ontology instance.
    """
    graph: RDFGraph = RDFGraph()
    graph.parse(file_path, format=format)
    return cls(graph=graph, **kwargs)

set_properties(**kwargs)

Set ontology properties from keyword arguments.

Parameters:

Name Type Description Default
**kwargs

Property values to set.

{}
Source code in ontocast/onto.py
def set_properties(self, **kwargs):
    """Set ontology properties from keyword arguments.

    Args:
        **kwargs: Property values to set.
    """
    self.__dict__.update(**kwargs)

OntologyProperties

Bases: BaseModel

Properties of an ontology.

Attributes:

Name Type Description
short_name Optional[str]

A short name (identifier) for the ontology.

title Optional[str]

Ontology title.

description Optional[str]

A concise description of the ontology.

version Optional[str]

Version of the ontology.

iri Optional[str]

Ontology IRI (Internationalized Resource Identifier).

Source code in ontocast/onto.py
class OntologyProperties(BaseModel):
    """Properties of an ontology.

    Attributes:
        short_name: A short name (identifier) for the ontology.
        title: Ontology title.
        description: A concise description of the ontology.
        version: Version of the ontology.
        iri: Ontology IRI (Internationalized Resource Identifier).
    """

    short_name: Optional[str] = Field(
        default=None,
        description="A short name (identifier) for the ontology. "
        "It should be an abbreviation. Must be provided.",
    )
    title: Optional[str] = Field(
        default=None, description="Ontology title. Must be provided."
    )
    description: Optional[str] = Field(
        default=None,
        description="A concise description (3-4 sentences) of the ontology "
        "(domain, purpose, applicability, etc.)",
    )
    version: Optional[str] = Field(
        description="Version of the ontology",
        default="0.0.0",
    )
    iri: Optional[str] = Field(
        default=None,
        description="Ontology IRI (Internationalized Resource Identifier)",
    )

    @property
    def namespace(self):
        """Get the namespace for this ontology.

        Returns:
            str: The namespace string.
        """
        return iri2namespace(self.iri, ontology=True)

namespace property

Get the namespace for this ontology.

Returns:

Name Type Description
str

The namespace string.

OntologySelectorReport

Bases: BasePydanticModel

Report from ontology selection process.

Attributes:

Name Type Description
short_name Optional[str]

A short name (identifier) for the ontology that could be used to represent the domain of the document, None if no ontology is suitable.

present bool

Whether an ontology that could represent the domain of the document is present in the list of ontologies.

Source code in ontocast/onto.py
class OntologySelectorReport(BasePydanticModel):
    """Report from ontology selection process.

    Attributes:
        short_name: A short name (identifier) for the ontology that could be used
            to represent the domain of the document, None if no ontology is suitable.
        present: Whether an ontology that could represent the domain of the document
            is present in the list of ontologies.
    """

    short_name: Optional[str] = Field(
        description="A short name (identifier) for the ontology "
        "that could be used to represent "
        "the domain of the document, None if no ontology is suitable"
    )
    present: bool = Field(
        description="Whether an ontology that could represent "
        "the domain of the document is present in the list of ontologies"
    )

OntologyUpdateCritiqueReport

Bases: BaseModel

Report from ontology update critique process.

Attributes:

Name Type Description
ontology_update_success bool

True if the ontology update was performed successfully, False otherwise.

ontology_update_score float

Score 0-100 for how well the update improves the original domain ontology of the document.

ontology_update_critique_comment Optional[str]

A concrete explanation of why the ontology update is not satisfactory.

Source code in ontocast/onto.py
class OntologyUpdateCritiqueReport(BaseModel):
    """Report from ontology update critique process.

    Attributes:
        ontology_update_success: True if the ontology update was performed
            successfully, False otherwise.
        ontology_update_score: Score 0-100 for how well the update improves
            the original domain ontology of the document.
        ontology_update_critique_comment: A concrete explanation of why the
            ontology update is not satisfactory.
    """

    ontology_update_success: bool = Field(
        description="True if the ontology update "
        "was performed successfully, False otherwise."
    )
    ontology_update_score: float = Field(
        description="Score 0-100 for how well the update improves "
        "the original domain ontology of the document. "
        "0 is the worst, 100 is the best."
    )
    ontology_update_critique_comment: Optional[str] = Field(
        description="A concrete explanation of why "
        "the ontology update is not satisfactory. "
        "The explanation should be very specific and detailed."
    )

RDFGraph

Bases: Graph

Subclass of rdflib.Graph with Pydantic schema support.

This class extends rdflib.Graph to provide serialization and deserialization capabilities for Pydantic models, with special handling for Turtle format.

Source code in ontocast/onto.py
class RDFGraph(Graph):
    """Subclass of rdflib.Graph with Pydantic schema support.

    This class extends rdflib.Graph to provide serialization and deserialization
    capabilities for Pydantic models, with special handling for Turtle format.
    """

    @classmethod
    def __get_pydantic_core_schema__(cls, _source_type, handler: GetCoreSchemaHandler):
        """Get the Pydantic core schema for this class.

        Args:
            _source_type: The source type.
            handler: The core schema handler.

        Returns:
            A union schema that handles both Graph instances and string conversion.
        """
        return core_schema.union_schema(
            [
                core_schema.is_instance_schema(cls),
                core_schema.chain_schema(
                    [
                        core_schema.str_schema(),
                        core_schema.no_info_plain_validator_function(
                            cls._from_turtle_str
                        ),
                    ]
                ),
            ],
            serialization=core_schema.plain_serializer_function_ser_schema(
                cls._to_turtle_str,
                info_arg=False,
                return_schema=core_schema.str_schema(),
            ),
        )

    def __add__(self, other: Union["RDFGraph", Graph]) -> "RDFGraph":
        """Addition operator for RDFGraph instances.

        Merges the RDF graphs while maintaining the RDFGraph type.

        Args:
            other: The graph to add to this one.

        Returns:
            RDFGraph: A new RDFGraph containing the merged triples.
        """
        # Create a new RDFGraph instance
        result = RDFGraph()

        # Copy all triples from both graphs
        for triple in self:
            result.add(triple)
        for triple in other:
            result.add(triple)

        # Copy namespace bindings
        for prefix, uri in self.namespaces():
            result.bind(prefix, uri)
        for prefix, uri in other.namespaces():
            result.bind(prefix, uri)

        return result

    def __iadd__(self, other: Union["RDFGraph", Graph]) -> "RDFGraph":
        """In-place addition operator for RDFGraph instances.

        Merges the RDF graphs while maintaining the RDFGraph type.

        Args:
            other: The graph to add to this one.

        Returns:
            RDFGraph: self after modification.
        """
        # Call parent's __iadd__ to merge the graphs
        super().__iadd__(other)
        # Return self to maintain RDFGraph type
        return self

    @staticmethod
    def _ensure_prefixes(turtle_str: str) -> str:
        """Ensure all common prefixes are declared in the Turtle string.

        Args:
            turtle_str: The input Turtle string.

        Returns:
            str: The Turtle string with all common prefixes declared.
        """
        declared_prefixes = set(
            match.group(1) for match in PREFIX_PATTERN.finditer(turtle_str)
        )

        missing = {
            prefix: uri
            for prefix, uri in COMMON_PREFIXES.items()
            if prefix not in declared_prefixes
        }

        if not missing:
            return turtle_str

        prefix_block = (
            "\n".join(f"@prefix {prefix}: {uri} ." for prefix, uri in missing.items())
            + "\n\n"
        )

        return prefix_block + turtle_str

    @classmethod
    def _from_turtle_str(cls, turtle_str: str) -> "RDFGraph":
        """Create an RDFGraph instance from a Turtle string.

        Args:
            turtle_str: The input Turtle string.

        Returns:
            RDFGraph: A new RDFGraph instance.
        """
        turtle_str = bytes(turtle_str, "utf-8").decode("unicode_escape")
        patched_turtle = cls._ensure_prefixes(turtle_str)
        g = cls()
        g.parse(data=patched_turtle, format="turtle")
        return g

    @staticmethod
    def _to_turtle_str(g: Any) -> str:
        """Convert an RDFGraph to a Turtle string.

        Args:
            g: The RDFGraph instance.

        Returns:
            str: The Turtle string representation.
        """
        return g.serialize(format="turtle")

    def __new__(cls, *args, **kwargs):
        """Create a new RDFGraph instance."""
        instance = super().__new__(cls)
        return instance

    def sanitize_prefixes_namespaces(self):
        """
        Rematches prefixes in an RDFLib graph to correct namespaces when a namespace
        with the same URI exists. Handles cases where prefixes might not be bound
        as namespaces.

        Args:
            self (RDFGraph): The RDFLib graph to process

        Returns:
           RDFGraph: The graph with corrected prefix-namespace mappings
        """
        # Get the namespace manager
        ns_manager = self.namespace_manager

        # Collect all current prefix-URI mappings
        current_prefixes = dict(ns_manager.namespaces())

        # Group URIs by their string representation to find duplicates
        uri_to_prefixes = defaultdict(list)
        for prefix, uri in current_prefixes.items():
            uri_to_prefixes[str(uri)].append((prefix, uri))

        # Find the "canonical" namespace objects for each URI
        # (the actual Namespace objects that might be registered)
        canonical_namespaces = {}

        # Check if any of the URIs correspond to well-known namespaces
        # by trying to create Namespace objects and seeing if they're already registered
        for uri_str, prefix_uri_pairs in uri_to_prefixes.items():
            # Try to find if there's already a proper Namespace object for this URI
            namespace_candidates = []

            for prefix, uri_obj in prefix_uri_pairs:
                # Check if this is already a proper Namespace object
                if isinstance(uri_obj, Namespace):
                    namespace_candidates.append(uri_obj)
                else:
                    # Try to create a Namespace and see if it matches existing ones
                    try:
                        ns = Namespace(uri_str)
                        namespace_candidates.append(ns)
                    except:
                        continue

            # Use the first valid namespace candidate as canonical
            if namespace_candidates:
                canonical_namespaces[uri_str] = namespace_candidates[0]

        # Now rebuild the namespace manager with corrected mappings
        # Clear existing bindings first
        new_ns_manager = NamespaceManager(self)

        # Track which prefixes we want to keep/reassign
        final_mappings = {}

        for uri_str, prefix_uri_pairs in uri_to_prefixes.items():
            if len(prefix_uri_pairs) == 1:
                # No duplicates, keep as-is but ensure we use canonical namespace
                prefix, _ = prefix_uri_pairs[0]
                canonical_ns = canonical_namespaces.get(uri_str)
                if canonical_ns:
                    final_mappings[prefix] = canonical_ns
                else:
                    # Fallback to creating a new Namespace
                    final_mappings[prefix] = Namespace(uri_str)
            else:
                # Multiple prefixes for same URI - need to decide which to keep
                # Priority: 1) Proper Namespace objects,
                #           2) Shorter prefixes,
                #           3) Alphabetical
                prefix_uri_pairs.sort(
                    key=lambda x: (
                        not isinstance(x[1], Namespace),  # Namespace objects first
                        len(x[0]),  # Shorter prefixes next
                        x[0],  # Alphabetical order
                    )
                )

                # Keep the best prefix, map others to it if needed
                best_prefix, _ = prefix_uri_pairs[0]
                canonical_ns = canonical_namespaces.get(uri_str, Namespace(uri_str))
                final_mappings[best_prefix] = canonical_ns

                other_prefixes = [p for p, _ in prefix_uri_pairs[1:]]
                if other_prefixes:
                    logger.debug(
                        f"Consolidating prefixes {other_prefixes} "
                        f"-> '{best_prefix}' for URI: {uri_str}"
                    )

        # Apply the final mappings
        for prefix, namespace in final_mappings.items():
            new_ns_manager.bind(prefix, namespace, override=True)

        # Replace the graph's namespace manager
        self.namespace_manager = new_ns_manager

    def unbind_chunk_namespaces(self, chunk_pattern="/chunk/") -> "RDFGraph":
        """
        Unbinds namespace prefixes that point to URIs containing a chunk pattern.
        Returns a new graph with chunk namespaces dereferenced (expanded to full URIs).

        Args:
            chunk_pattern (str): The pattern to look for in URIs (default: "/chunk/")

        Returns:
            RDFGraph: New graph with chunk-related namespaces unbound
        """
        current_prefixes = dict(self.namespace_manager.namespaces())

        # Find prefixes that point to URIs containing the chunk pattern
        chunk_prefixes = []
        for prefix, uri in current_prefixes.items():
            uri_str = str(uri)
            if chunk_pattern in uri_str:
                chunk_prefixes.append((prefix, uri_str))

        # Create new graph
        new_graph = RDFGraph()

        # Copy all triples (URIs are already expanded internally)
        for triple in self:
            new_graph.add(triple)

        # Bind only non-chunk namespace prefixes to the new graph
        for prefix, uri in current_prefixes.items():
            uri_str = str(uri)
            if chunk_pattern not in uri_str:
                new_graph.bind(prefix, uri)

        # Log what was removed
        if chunk_prefixes:
            logger.debug(f"Unbound {len(chunk_prefixes)} chunk-related namespace(s):")
            for prefix, uri in chunk_prefixes:
                logger.debug(f"  - '{prefix}': {uri}")

        return new_graph

__add__(other)

Addition operator for RDFGraph instances.

Merges the RDF graphs while maintaining the RDFGraph type.

Parameters:

Name Type Description Default
other Union[RDFGraph, Graph]

The graph to add to this one.

required

Returns:

Name Type Description
RDFGraph RDFGraph

A new RDFGraph containing the merged triples.

Source code in ontocast/onto.py
def __add__(self, other: Union["RDFGraph", Graph]) -> "RDFGraph":
    """Addition operator for RDFGraph instances.

    Merges the RDF graphs while maintaining the RDFGraph type.

    Args:
        other: The graph to add to this one.

    Returns:
        RDFGraph: A new RDFGraph containing the merged triples.
    """
    # Create a new RDFGraph instance
    result = RDFGraph()

    # Copy all triples from both graphs
    for triple in self:
        result.add(triple)
    for triple in other:
        result.add(triple)

    # Copy namespace bindings
    for prefix, uri in self.namespaces():
        result.bind(prefix, uri)
    for prefix, uri in other.namespaces():
        result.bind(prefix, uri)

    return result

__get_pydantic_core_schema__(_source_type, handler) classmethod

Get the Pydantic core schema for this class.

Parameters:

Name Type Description Default
_source_type

The source type.

required
handler GetCoreSchemaHandler

The core schema handler.

required

Returns:

Type Description

A union schema that handles both Graph instances and string conversion.

Source code in ontocast/onto.py
@classmethod
def __get_pydantic_core_schema__(cls, _source_type, handler: GetCoreSchemaHandler):
    """Get the Pydantic core schema for this class.

    Args:
        _source_type: The source type.
        handler: The core schema handler.

    Returns:
        A union schema that handles both Graph instances and string conversion.
    """
    return core_schema.union_schema(
        [
            core_schema.is_instance_schema(cls),
            core_schema.chain_schema(
                [
                    core_schema.str_schema(),
                    core_schema.no_info_plain_validator_function(
                        cls._from_turtle_str
                    ),
                ]
            ),
        ],
        serialization=core_schema.plain_serializer_function_ser_schema(
            cls._to_turtle_str,
            info_arg=False,
            return_schema=core_schema.str_schema(),
        ),
    )

__iadd__(other)

In-place addition operator for RDFGraph instances.

Merges the RDF graphs while maintaining the RDFGraph type.

Parameters:

Name Type Description Default
other Union[RDFGraph, Graph]

The graph to add to this one.

required

Returns:

Name Type Description
RDFGraph RDFGraph

self after modification.

Source code in ontocast/onto.py
def __iadd__(self, other: Union["RDFGraph", Graph]) -> "RDFGraph":
    """In-place addition operator for RDFGraph instances.

    Merges the RDF graphs while maintaining the RDFGraph type.

    Args:
        other: The graph to add to this one.

    Returns:
        RDFGraph: self after modification.
    """
    # Call parent's __iadd__ to merge the graphs
    super().__iadd__(other)
    # Return self to maintain RDFGraph type
    return self

__new__(*args, **kwargs)

Create a new RDFGraph instance.

Source code in ontocast/onto.py
def __new__(cls, *args, **kwargs):
    """Create a new RDFGraph instance."""
    instance = super().__new__(cls)
    return instance

sanitize_prefixes_namespaces()

Rematches prefixes in an RDFLib graph to correct namespaces when a namespace with the same URI exists. Handles cases where prefixes might not be bound as namespaces.

Parameters:

Name Type Description Default
self RDFGraph

The RDFLib graph to process

required

Returns:

Name Type Description
RDFGraph

The graph with corrected prefix-namespace mappings

Source code in ontocast/onto.py
def sanitize_prefixes_namespaces(self):
    """
    Rematches prefixes in an RDFLib graph to correct namespaces when a namespace
    with the same URI exists. Handles cases where prefixes might not be bound
    as namespaces.

    Args:
        self (RDFGraph): The RDFLib graph to process

    Returns:
       RDFGraph: The graph with corrected prefix-namespace mappings
    """
    # Get the namespace manager
    ns_manager = self.namespace_manager

    # Collect all current prefix-URI mappings
    current_prefixes = dict(ns_manager.namespaces())

    # Group URIs by their string representation to find duplicates
    uri_to_prefixes = defaultdict(list)
    for prefix, uri in current_prefixes.items():
        uri_to_prefixes[str(uri)].append((prefix, uri))

    # Find the "canonical" namespace objects for each URI
    # (the actual Namespace objects that might be registered)
    canonical_namespaces = {}

    # Check if any of the URIs correspond to well-known namespaces
    # by trying to create Namespace objects and seeing if they're already registered
    for uri_str, prefix_uri_pairs in uri_to_prefixes.items():
        # Try to find if there's already a proper Namespace object for this URI
        namespace_candidates = []

        for prefix, uri_obj in prefix_uri_pairs:
            # Check if this is already a proper Namespace object
            if isinstance(uri_obj, Namespace):
                namespace_candidates.append(uri_obj)
            else:
                # Try to create a Namespace and see if it matches existing ones
                try:
                    ns = Namespace(uri_str)
                    namespace_candidates.append(ns)
                except:
                    continue

        # Use the first valid namespace candidate as canonical
        if namespace_candidates:
            canonical_namespaces[uri_str] = namespace_candidates[0]

    # Now rebuild the namespace manager with corrected mappings
    # Clear existing bindings first
    new_ns_manager = NamespaceManager(self)

    # Track which prefixes we want to keep/reassign
    final_mappings = {}

    for uri_str, prefix_uri_pairs in uri_to_prefixes.items():
        if len(prefix_uri_pairs) == 1:
            # No duplicates, keep as-is but ensure we use canonical namespace
            prefix, _ = prefix_uri_pairs[0]
            canonical_ns = canonical_namespaces.get(uri_str)
            if canonical_ns:
                final_mappings[prefix] = canonical_ns
            else:
                # Fallback to creating a new Namespace
                final_mappings[prefix] = Namespace(uri_str)
        else:
            # Multiple prefixes for same URI - need to decide which to keep
            # Priority: 1) Proper Namespace objects,
            #           2) Shorter prefixes,
            #           3) Alphabetical
            prefix_uri_pairs.sort(
                key=lambda x: (
                    not isinstance(x[1], Namespace),  # Namespace objects first
                    len(x[0]),  # Shorter prefixes next
                    x[0],  # Alphabetical order
                )
            )

            # Keep the best prefix, map others to it if needed
            best_prefix, _ = prefix_uri_pairs[0]
            canonical_ns = canonical_namespaces.get(uri_str, Namespace(uri_str))
            final_mappings[best_prefix] = canonical_ns

            other_prefixes = [p for p, _ in prefix_uri_pairs[1:]]
            if other_prefixes:
                logger.debug(
                    f"Consolidating prefixes {other_prefixes} "
                    f"-> '{best_prefix}' for URI: {uri_str}"
                )

    # Apply the final mappings
    for prefix, namespace in final_mappings.items():
        new_ns_manager.bind(prefix, namespace, override=True)

    # Replace the graph's namespace manager
    self.namespace_manager = new_ns_manager

unbind_chunk_namespaces(chunk_pattern='/chunk/')

Unbinds namespace prefixes that point to URIs containing a chunk pattern. Returns a new graph with chunk namespaces dereferenced (expanded to full URIs).

Parameters:

Name Type Description Default
chunk_pattern str

The pattern to look for in URIs (default: "/chunk/")

'/chunk/'

Returns:

Name Type Description
RDFGraph RDFGraph

New graph with chunk-related namespaces unbound

Source code in ontocast/onto.py
def unbind_chunk_namespaces(self, chunk_pattern="/chunk/") -> "RDFGraph":
    """
    Unbinds namespace prefixes that point to URIs containing a chunk pattern.
    Returns a new graph with chunk namespaces dereferenced (expanded to full URIs).

    Args:
        chunk_pattern (str): The pattern to look for in URIs (default: "/chunk/")

    Returns:
        RDFGraph: New graph with chunk-related namespaces unbound
    """
    current_prefixes = dict(self.namespace_manager.namespaces())

    # Find prefixes that point to URIs containing the chunk pattern
    chunk_prefixes = []
    for prefix, uri in current_prefixes.items():
        uri_str = str(uri)
        if chunk_pattern in uri_str:
            chunk_prefixes.append((prefix, uri_str))

    # Create new graph
    new_graph = RDFGraph()

    # Copy all triples (URIs are already expanded internally)
    for triple in self:
        new_graph.add(triple)

    # Bind only non-chunk namespace prefixes to the new graph
    for prefix, uri in current_prefixes.items():
        uri_str = str(uri)
        if chunk_pattern not in uri_str:
            new_graph.bind(prefix, uri)

    # Log what was removed
    if chunk_prefixes:
        logger.debug(f"Unbound {len(chunk_prefixes)} chunk-related namespace(s):")
        for prefix, uri in chunk_prefixes:
            logger.debug(f"  - '{prefix}': {uri}")

    return new_graph

SemanticTriplesFactsReport

Bases: BaseModel

Report containing semantic triples and evaluation scores.

Attributes:

Name Type Description
semantic_graph RDFGraph

Semantic triples (facts) representing the document in turtle (ttl) format.

ontology_relevance_score Optional[float]

Score 0-100 for how relevant the ontology is to the document. 0 is the worst, 100 is the best.

triples_generation_score Optional[float]

Score 0-100 for how well the facts extraction / triples generation was performed. 0 is the worst, 100 is the best.

Source code in ontocast/onto.py
class SemanticTriplesFactsReport(BaseModel):
    """Report containing semantic triples and evaluation scores.

    Attributes:
        semantic_graph: Semantic triples (facts) representing the document
            in turtle (ttl) format.
        ontology_relevance_score: Score 0-100 for how relevant the ontology
            is to the document. 0 is the worst, 100 is the best.
        triples_generation_score: Score 0-100 for how well the facts extraction /
            triples generation was performed. 0 is the worst, 100 is the best.
    """

    semantic_graph: RDFGraph = Field(
        default_factory=RDFGraph,
        description="Semantic triples (facts) representing "
        "the document in turtle (ttl) format.",
    )
    ontology_relevance_score: Optional[float] = Field(
        description="Score 0-100 for how relevant "
        "the ontology is to the document. "
        "0 is the worst, 100 is the best."
    )
    triples_generation_score: Optional[float] = Field(
        description="Score 0-100 for how well "
        "the facts extraction / triples generation was performed. "
        "0 is the worst, 100 is the best."
    )

Status

Bases: StrEnum

Enumeration of possible workflow status values.

Source code in ontocast/onto.py
class Status(StrEnum):
    """Enumeration of possible workflow status values."""

    SUCCESS = "success"
    FAILED = "failed"
    COUNTS_EXCEEDED = "counts exceeded"

ToolType

Bases: StrEnum

Enumeration of tool types used in the workflow.

Source code in ontocast/onto.py
class ToolType(StrEnum):
    """Enumeration of tool types used in the workflow."""

    LLM = "llm"
    TRIPLE_STORE = "triple store manager"
    ONTOLOGY_MANAGER = "ontology manager"
    CONVERTER = "document converter"
    CHUNKER = "document chunker"

WorkflowNode

Bases: StrEnum

Enumeration of workflow nodes in the processing pipeline.

Source code in ontocast/onto.py
class WorkflowNode(StrEnum):
    """Enumeration of workflow nodes in the processing pipeline."""

    CONVERT_TO_MD = "Convert to Markdown"
    CHUNK = "Chunk Text"
    SELECT_ONTOLOGY = "Select Ontology"
    TEXT_TO_ONTOLOGY = "Text to Ontology"
    TEXT_TO_FACTS = "Text to Facts"
    SUBLIMATE_ONTOLOGY = "Sublimate Ontology"
    CRITICISE_ONTOLOGY = "Criticise Ontology"
    CRITICISE_FACTS = "Criticise Facts"
    CHUNKS_EMPTY = "Chunks Empty?"
    AGGREGATE_FACTS = "Aggregate Facts"

iri2namespace(iri, ontology=False)

Convert an IRI to a namespace string.

Parameters:

Name Type Description Default
iri str

The IRI to convert.

required
ontology bool

If True, append '#' for ontology namespace, otherwise '/'.

False

Returns:

Name Type Description
str str

The converted namespace string.

Source code in ontocast/onto.py
def iri2namespace(iri: str, ontology: bool = False) -> str:
    """Convert an IRI to a namespace string.

    Args:
        iri: The IRI to convert.
        ontology: If True, append '#' for ontology namespace, otherwise '/'.

    Returns:
        str: The converted namespace string.
    """
    iri = iri.rstrip("#")
    return f"{iri}#" if ontology else f"{iri}/"