`ontocast.onto`¶

`AgentState` ¶

Bases: BasePydanticModel

State for the ontology-based knowledge graph agent.

This class maintains the state of the agent during document processing, including input text, chunks, ontologies, and workflow status.

Attributes:

Name	Type	Description
`input_text`	`str`	Input text to process.
`current_domain`	`str`	IRI used for forming document namespace.
`doc_hid`	`Optional[str]`	An almost unique hash/id for the parent document.
`files`	`dict[str, bytes]`	Files to process.
`current_chunk`	`Optional[Chunk]`	Current document chunk for processing.
`chunks`	`list[Chunk]`	List of chunks of the input text.
`chunks_processed`	`list[Chunk]`	List of processed chunks.
`current_ontology`	`Ontology`	Current ontology object.
`ontology_addendum`	`Ontology`	Additional ontology content.
`failure_stage`	`Optional[str]`	Stage where failure occurred.
`failure_reason`	`Optional[str]`	Reason for failure.
`success_score`	`Optional[float]`	Score indicating success level.
`status`	`Status`	Current workflow status.
`node_visits`	`defaultdict[WorkflowNode, int]`	Number of visits per node.
`max_visits`	`int`	Maximum number of visits allowed per node.
`max_chunks`	`Optional[int]`	Maximum number of chunks to process.

Source code in ontocast/onto.py

class AgentState(BasePydanticModel):
    """State for the ontology-based knowledge graph agent.

    This class maintains the state of the agent during document processing,
    including input text, chunks, ontologies, and workflow status.

    Attributes:
        input_text: Input text to process.
        current_domain: IRI used for forming document namespace.
        doc_hid: An almost unique hash/id for the parent document.
        files: Files to process.
        current_chunk: Current document chunk for processing.
        chunks: List of chunks of the input text.
        chunks_processed: List of processed chunks.
        current_ontology: Current ontology object.
        ontology_addendum: Additional ontology content.
        failure_stage: Stage where failure occurred.
        failure_reason: Reason for failure.
        success_score: Score indicating success level.
        status: Current workflow status.
        node_visits: Number of visits per node.
        max_visits: Maximum number of visits allowed per node.
        max_chunks: Maximum number of chunks to process.
    """

    input_text: str = Field(description="Input text", default="")
    current_domain: str = Field(
        description="IRI used for forming document namespace", default=DEFAULT_DOMAIN
    )
    doc_hid: Optional[str] = Field(
        description="An almost unique hash / id for the parent document of the chunk",
        default=None,
    )
    files: dict[str, bytes] = Field(
        default_factory=lambda: dict(), description="Files to process"
    )
    current_chunk: Optional[Chunk] = Field(
        description="Current document chunk for processing", default=None
    )
    chunks: list[Chunk] = Field(
        default_factory=lambda: list(), description="Chunks of the input text"
    )
    chunks_processed: list[Chunk] = Field(
        default_factory=lambda: list(), description="Chunks of the input text"
    )
    current_ontology: Ontology = Field(
        default_factory=lambda: Ontology(
            short_name=ONTOLOGY_VOID_ID,
            title="null title",
            description="null description",
            graph=RDFGraph(),
            iri=ONTOLOGY_VOID_IRI,
        ),
        description="Ontology object that contain the semantic graph "
        "as well as the description, name, short name, version, "
        "and IRI of the ontology",
    )
    aggregated_facts: Optional[RDFGraph] = Field(
        description="RDF triples representing aggregated facts "
        "from the current document",
        default_factory=RDFGraph,
    )
    ontology_addendum: Ontology = Field(
        default_factory=lambda: Ontology(
            short_name=ONTOLOGY_VOID_ID,
            title="null title",
            description="null description",
            graph=RDFGraph(),
            iri=ONTOLOGY_VOID_IRI,
        ),
        description="Ontology object that contain the semantic graph "
        "as well as the description, name, short name, version, "
        "and IRI of the ontology",
    )
    failure_stage: Optional[str] = None
    failure_reason: Optional[str] = None
    success_score: Optional[float] = 0.0
    status: Status = Status.SUCCESS
    node_visits: defaultdict[WorkflowNode, int] = Field(
        default_factory=lambda: defaultdict(int),
        description="Number of visits per node",
    )
    max_visits: int = Field(
        default=3, description="Maximum number of visits allowed per node"
    )
    max_chunks: Optional[int] = None
    model_config = ConfigDict(arbitrary_types_allowed=True)

    def model_post_init(self, __context):
        """Post-initialization hook for the model."""
        pass

    def __init__(self, **kwargs):
        """Initialize the agent state with given keyword arguments."""
        super().__init__(**kwargs)
        self.current_domain = os.getenv("CURRENT_DOMAIN", DEFAULT_DOMAIN)

    def set_text(self, text):
        """Set the input text and generate document hash.

        Args:
            text: The input text to set.
        """
        self.input_text = text
        self.doc_hid = render_text_hash(self.input_text)

    def set_failure(self, stage: str, reason: str, success_score: float = 0.0):
        """Set failure state with stage and reason.

        Args:
            stage: The stage where the failure occurred.
            reason: The reason for the failure.
            success_score: The success score at failure (default: 0.0).
        """
        self.failure_stage = stage
        self.failure_reason = reason
        self.success_score = success_score
        self.status = Status.FAILED

    def clear_failure(self):
        """Clear failure state and set status to success."""
        self.failure_stage = None
        self.failure_reason = None
        self.success_score = 0.0
        self.status = Status.SUCCESS

    @property
    def doc_iri(self):
        """Get the document IRI.

        Returns:
            str: The document IRI.
        """
        return f"{self.current_domain}/doc/{self.doc_hid}"

    @property
    def doc_namespace(self):
        """Get the document namespace.

        Returns:
            str: The document namespace.
        """
        return iri2namespace(self.doc_iri, ontology=False)

`doc_iri` `property` ¶

Get the document IRI.

Returns:

Name	Type	Description
`str`		The document IRI.

`doc_namespace` `property` ¶

Get the document namespace.

Returns:

Name	Type	Description
`str`		The document namespace.

`init(**kwargs)` ¶

Initialize the agent state with given keyword arguments.

Source code in ontocast/onto.py

def __init__(self, **kwargs):
    """Initialize the agent state with given keyword arguments."""
    super().__init__(**kwargs)
    self.current_domain = os.getenv("CURRENT_DOMAIN", DEFAULT_DOMAIN)

`clear_failure()` ¶

Clear failure state and set status to success.

Source code in ontocast/onto.py

def clear_failure(self):
    """Clear failure state and set status to success."""
    self.failure_stage = None
    self.failure_reason = None
    self.success_score = 0.0
    self.status = Status.SUCCESS

`model_post_init(__context)` ¶

Post-initialization hook for the model.

Source code in ontocast/onto.py

def model_post_init(self, __context):
    """Post-initialization hook for the model."""
    pass

`set_failure(stage, reason, success_score=0.0)` ¶

Set failure state with stage and reason.

Parameters:

Name	Type	Description	Default
`stage`	`str`	The stage where the failure occurred.	required
`reason`	`str`	The reason for the failure.	required
`success_score`	`float`	The success score at failure (default: 0.0).	`0.0`

Source code in ontocast/onto.py

def set_failure(self, stage: str, reason: str, success_score: float = 0.0):
    """Set failure state with stage and reason.

    Args:
        stage: The stage where the failure occurred.
        reason: The reason for the failure.
        success_score: The success score at failure (default: 0.0).
    """
    self.failure_stage = stage
    self.failure_reason = reason
    self.success_score = success_score
    self.status = Status.FAILED

`set_text(text)` ¶

Set the input text and generate document hash.

Parameters:

Name	Type	Description	Default
`text`		The input text to set.	required

Source code in ontocast/onto.py

def set_text(self, text):
    """Set the input text and generate document hash.

    Args:
        text: The input text to set.
    """
    self.input_text = text
    self.doc_hid = render_text_hash(self.input_text)

`BasePydanticModel` ¶

Bases: BaseModel

Base class for Pydantic models with serialization capabilities.

Source code in ontocast/onto.py

class BasePydanticModel(BaseModel):
    """Base class for Pydantic models with serialization capabilities."""

    def __init__(self, **kwargs):
        """Initialize the model with given keyword arguments."""
        super().__init__(**kwargs)

    def serialize(self, file_path: str | pathlib.Path) -> None:
        """Serialize the state to a JSON file.

        Args:
            file_path: Path to save the JSON file.
        """
        state_json = self.model_dump_json(indent=4)
        if isinstance(file_path, str):
            file_path = pathlib.Path(file_path)
        file_path.write_text(state_json)

    @classmethod
    def load(cls, file_path: str | pathlib.Path):
        """Load state from a JSON file.

        Args:
            file_path: Path to the JSON file.

        Returns:
            The loaded model instance.
        """
        if isinstance(file_path, str):
            file_path = pathlib.Path(file_path)
        state_json = file_path.read_text()
        return cls.model_validate_json(state_json)

`init(**kwargs)` ¶

Initialize the model with given keyword arguments.

Source code in ontocast/onto.py

def __init__(self, **kwargs):
    """Initialize the model with given keyword arguments."""
    super().__init__(**kwargs)

`load(file_path)` `classmethod` ¶

Load state from a JSON file.

Parameters:

Name	Type	Description	Default
`file_path`	`str \| Path`	Path to the JSON file.	required

Returns:

Type	Description
	The loaded model instance.

Source code in ontocast/onto.py

@classmethod
def load(cls, file_path: str | pathlib.Path):
    """Load state from a JSON file.

    Args:
        file_path: Path to the JSON file.

    Returns:
        The loaded model instance.
    """
    if isinstance(file_path, str):
        file_path = pathlib.Path(file_path)
    state_json = file_path.read_text()
    return cls.model_validate_json(state_json)

`serialize(file_path)` ¶

Serialize the state to a JSON file.

Parameters:

Name	Type	Description	Default
`file_path`	`str \| Path`	Path to save the JSON file.	required

Source code in ontocast/onto.py

def serialize(self, file_path: str | pathlib.Path) -> None:
    """Serialize the state to a JSON file.

    Args:
        file_path: Path to save the JSON file.
    """
    state_json = self.model_dump_json(indent=4)
    if isinstance(file_path, str):
        file_path = pathlib.Path(file_path)
    file_path.write_text(state_json)

`Chunk` ¶

Bases: BaseModel

A chunk of text with associated metadata and RDF graph.

Attributes:

Name	Type	Description
`text`	`str`	Text content of the chunk.
`hid`	`str`	An almost unique (hash) id for the chunk.
`doc_iri`	`str`	IRI of parent document.
`graph`	`Optional[RDFGraph]`	RDF triples representing the facts from the current document.
`processed`	`bool`	Whether chunk has been processed.

Source code in ontocast/onto.py

class Chunk(BaseModel):
    """A chunk of text with associated metadata and RDF graph.

    Attributes:
        text: Text content of the chunk.
        hid: An almost unique (hash) id for the chunk.
        doc_iri: IRI of parent document.
        graph: RDF triples representing the facts from the current document.
        processed: Whether chunk has been processed.
    """

    text: str = Field(description="Text of the chunk")
    hid: str = Field(description="An almost unique (hash) id for the chunk")
    doc_iri: str = Field(description="IRI of parent doc")
    graph: Optional[RDFGraph] = Field(
        description="RDF triples representing the facts from a document chunk",
        default_factory=RDFGraph,
    )
    processed: bool = Field(default=False, description="Was the chunk processed?")

    @property
    def iri(self):
        """Get the IRI for this chunk.

        Returns:
            str: The chunk IRI.
        """
        return f"{self.doc_iri}/chunk/{self.hid}"

    @property
    def namespace(self):
        """Get the namespace for this chunk.

        Returns:
            str: The chunk namespace.
        """
        return iri2namespace(self.iri, ontology=False)

    def sanitize(self):
        self.graph = self.graph.unbind_chunk_namespaces()
        self.graph.sanitize_prefixes_namespaces()

`iri` `property` ¶

Get the IRI for this chunk.

Returns:

Name	Type	Description
`str`		The chunk IRI.

`namespace` `property` ¶

Get the namespace for this chunk.

Returns:

Name	Type	Description
`str`		The chunk namespace.

`FailureStages` ¶

Bases: StrEnum

Enumeration of possible failure stages in the workflow.

Source code in ontocast/onto.py

class FailureStages(StrEnum):
    """Enumeration of possible failure stages in the workflow."""

    NO_CHUNKS_TO_PROCESS = "No chunks to process"
    ONTOLOGY_CRITIQUE = "The produced ontology did not pass the critique stage."
    FACTS_CRITIQUE = "The produced graph of facts did not pass the critique stage."
    PARSE_TEXT_TO_ONTOLOGY_TRIPLES = "Failed to parse the text into ontology triples."
    PARSE_TEXT_TO_FACTS_TRIPLES = "Failed to parse the text into facts triples."
    SUBLIMATE_ONTOLOGY = (
        "The produced semantic could not be validated "
        "or separated into ontology and facts (technical issue)."
    )

`KGCritiqueReport` ¶

Bases: BaseModel

Report from knowledge graph critique process.

Attributes:

Name	Type	Description
`facts_graph_derivation_success`	`bool`	True if the facts graph derivation was performed successfully, False otherwise.
`facts_graph_derivation_score`	`float`	Score 0-100 for how well the triples of facts represent the original document.
`facts_graph_derivation_critique_comment`	`Optional[str]`	A concrete explanation of why the semantic graph of facts derivation is not satisfactory.

Source code in ontocast/onto.py

class KGCritiqueReport(BaseModel):
    """Report from knowledge graph critique process.

    Attributes:
        facts_graph_derivation_success: True if the facts graph derivation
            was performed successfully, False otherwise.
        facts_graph_derivation_score: Score 0-100 for how well the triples
            of facts represent the original document.
        facts_graph_derivation_critique_comment: A concrete explanation of
            why the semantic graph of facts derivation is not satisfactory.
    """

    facts_graph_derivation_success: bool = Field(
        description="True if the facts graph derivation "
        "was performed successfully, False otherwise."
    )
    facts_graph_derivation_score: float = Field(
        description="Score 0-100 for how well the triples of facts "
        "represent the original document. 0 is the worst, 100 is the best."
    )
    facts_graph_derivation_critique_comment: Optional[str] = Field(
        description="A concrete explanation of why the semantic graph "
        "of facts derivation is not satisfactory. "
        "The explanation should be very specific and detailed."
    )

`Ontology` ¶

Bases: OntologyProperties

A Pydantic model representing an ontology with its RDF graph and description.

Attributes:

Name	Type	Description
`graph`	`RDFGraph`	The RDF graph containing the ontology data.

Source code in ontocast/onto.py

class Ontology(OntologyProperties):
    """A Pydantic model representing an ontology with its RDF graph and description.

    Attributes:
        graph: The RDF graph containing the ontology data.
    """

    graph: RDFGraph = Field(
        default_factory=RDFGraph,
        description="Semantic triples (abstract entities/relations) "
        "that define the ontology in turtle (ttl) format as a string.",
    )

    model_config = ConfigDict(arbitrary_types_allowed=True)

    def __iadd__(self, other: Union["Ontology", RDFGraph]) -> "Ontology":
        """In-place addition operator for Ontology instances.

        Merges the RDF graphs and takes properties from the right-hand operand.

        Args:
            other: The ontology or graph to add to this one.

        Returns:
            Ontology: self after modification.
        """
        if isinstance(other, Ontology):
            self.graph += other.graph
            self.title = other.title
            self.short_name = other.short_name
            self.description = other.description
            self.iri = other.iri
            self.version = other.version
        else:
            self.graph += other
        return self

    @classmethod
    def from_file(cls, file_path: pathlib.Path, format: str = "turtle", **kwargs):
        """Create an Ontology instance by loading a graph from a file.

        Args:
            file_path: Path to the ontology file.
            format: Format of the input file (default: "turtle").
            **kwargs: Additional arguments to pass to the constructor.

        Returns:
            Ontology: A new Ontology instance.
        """
        graph: RDFGraph = RDFGraph()
        graph.parse(file_path, format=format)
        return cls(graph=graph, **kwargs)

    def set_properties(self, **kwargs):
        """Set ontology properties from keyword arguments.

        Args:
            **kwargs: Property values to set.
        """
        self.__dict__.update(**kwargs)

    def describe(self) -> str:
        """Get a human-readable description of the ontology.

        Returns:
            str: A formatted description string.
        """
        return (
            f"Ontology name: {self.short_name}\n"
            f"Description: {self.description}\n"
            f"Ontology IRI: {self.iri}\n"
        )

`iadd(other)` ¶

In-place addition operator for Ontology instances.

Merges the RDF graphs and takes properties from the right-hand operand.

Parameters:

Name	Type	Description	Default
`other`	`Union[Ontology, RDFGraph]`	The ontology or graph to add to this one.	required

Returns:

Name	Type	Description
`Ontology`	`Ontology`	self after modification.

Source code in ontocast/onto.py

def __iadd__(self, other: Union["Ontology", RDFGraph]) -> "Ontology":
    """In-place addition operator for Ontology instances.

    Merges the RDF graphs and takes properties from the right-hand operand.

    Args:
        other: The ontology or graph to add to this one.

    Returns:
        Ontology: self after modification.
    """
    if isinstance(other, Ontology):
        self.graph += other.graph
        self.title = other.title
        self.short_name = other.short_name
        self.description = other.description
        self.iri = other.iri
        self.version = other.version
    else:
        self.graph += other
    return self

`describe()` ¶

Get a human-readable description of the ontology.

Returns:

Name	Type	Description
`str`	`str`	A formatted description string.

Source code in ontocast/onto.py

def describe(self) -> str:
    """Get a human-readable description of the ontology.

    Returns:
        str: A formatted description string.
    """
    return (
        f"Ontology name: {self.short_name}\n"
        f"Description: {self.description}\n"
        f"Ontology IRI: {self.iri}\n"
    )

`from_file(file_path, format='turtle', **kwargs)` `classmethod` ¶

Create an Ontology instance by loading a graph from a file.

Parameters:

Name	Type	Description	Default
`file_path`	`Path`	Path to the ontology file.	required
`format`	`str`	Format of the input file (default: "turtle").	`'turtle'`
`**kwargs`		Additional arguments to pass to the constructor.	`{}`

Returns:

Name	Type	Description
`Ontology`		A new Ontology instance.

Source code in ontocast/onto.py

@classmethod
def from_file(cls, file_path: pathlib.Path, format: str = "turtle", **kwargs):
    """Create an Ontology instance by loading a graph from a file.

    Args:
        file_path: Path to the ontology file.
        format: Format of the input file (default: "turtle").
        **kwargs: Additional arguments to pass to the constructor.

    Returns:
        Ontology: A new Ontology instance.
    """
    graph: RDFGraph = RDFGraph()
    graph.parse(file_path, format=format)
    return cls(graph=graph, **kwargs)

`set_properties(**kwargs)` ¶

Set ontology properties from keyword arguments.

Parameters:

Name	Type	Description	Default
`**kwargs`		Property values to set.	`{}`

Source code in ontocast/onto.py

def set_properties(self, **kwargs):
    """Set ontology properties from keyword arguments.

    Args:
        **kwargs: Property values to set.
    """
    self.__dict__.update(**kwargs)

`OntologyProperties` ¶

Bases: BaseModel

Properties of an ontology.

Attributes:

Name	Type	Description
`short_name`	`Optional[str]`	A short name (identifier) for the ontology.
`title`	`Optional[str]`	Ontology title.
`description`	`Optional[str]`	A concise description of the ontology.
`version`	`Optional[str]`	Version of the ontology.
`iri`	`Optional[str]`	Ontology IRI (Internationalized Resource Identifier).

Source code in ontocast/onto.py

class OntologyProperties(BaseModel):
    """Properties of an ontology.

    Attributes:
        short_name: A short name (identifier) for the ontology.
        title: Ontology title.
        description: A concise description of the ontology.
        version: Version of the ontology.
        iri: Ontology IRI (Internationalized Resource Identifier).
    """

    short_name: Optional[str] = Field(
        default=None,
        description="A short name (identifier) for the ontology. "
        "It should be an abbreviation. Must be provided.",
    )
    title: Optional[str] = Field(
        default=None, description="Ontology title. Must be provided."
    )
    description: Optional[str] = Field(
        default=None,
        description="A concise description (3-4 sentences) of the ontology "
        "(domain, purpose, applicability, etc.)",
    )
    version: Optional[str] = Field(
        description="Version of the ontology",
        default="0.0.0",
    )
    iri: Optional[str] = Field(
        default=None,
        description="Ontology IRI (Internationalized Resource Identifier)",
    )

    @property
    def namespace(self):
        """Get the namespace for this ontology.

        Returns:
            str: The namespace string.
        """
        return iri2namespace(self.iri, ontology=True)

`namespace` `property` ¶

Get the namespace for this ontology.

Returns:

Name	Type	Description
`str`		The namespace string.

`OntologySelectorReport` ¶

Bases: BasePydanticModel

Report from ontology selection process.

Attributes:

Name	Type	Description
`short_name`	`Optional[str]`	A short name (identifier) for the ontology that could be used to represent the domain of the document, None if no ontology is suitable.
`present`	`bool`	Whether an ontology that could represent the domain of the document is present in the list of ontologies.

Source code in ontocast/onto.py

class OntologySelectorReport(BasePydanticModel):
    """Report from ontology selection process.

    Attributes:
        short_name: A short name (identifier) for the ontology that could be used
            to represent the domain of the document, None if no ontology is suitable.
        present: Whether an ontology that could represent the domain of the document
            is present in the list of ontologies.
    """

    short_name: Optional[str] = Field(
        description="A short name (identifier) for the ontology "
        "that could be used to represent "
        "the domain of the document, None if no ontology is suitable"
    )
    present: bool = Field(
        description="Whether an ontology that could represent "
        "the domain of the document is present in the list of ontologies"
    )

`OntologyUpdateCritiqueReport` ¶

Bases: BaseModel

Report from ontology update critique process.

Attributes:

Name	Type	Description
`ontology_update_success`	`bool`	True if the ontology update was performed successfully, False otherwise.
`ontology_update_score`	`float`	Score 0-100 for how well the update improves the original domain ontology of the document.
`ontology_update_critique_comment`	`Optional[str]`	A concrete explanation of why the ontology update is not satisfactory.

Source code in ontocast/onto.py

class OntologyUpdateCritiqueReport(BaseModel):
    """Report from ontology update critique process.

    Attributes:
        ontology_update_success: True if the ontology update was performed
            successfully, False otherwise.
        ontology_update_score: Score 0-100 for how well the update improves
            the original domain ontology of the document.
        ontology_update_critique_comment: A concrete explanation of why the
            ontology update is not satisfactory.
    """

    ontology_update_success: bool = Field(
        description="True if the ontology update "
        "was performed successfully, False otherwise."
    )
    ontology_update_score: float = Field(
        description="Score 0-100 for how well the update improves "
        "the original domain ontology of the document. "
        "0 is the worst, 100 is the best."
    )
    ontology_update_critique_comment: Optional[str] = Field(
        description="A concrete explanation of why "
        "the ontology update is not satisfactory. "
        "The explanation should be very specific and detailed."
    )

`RDFGraph` ¶

Bases: Graph

Subclass of rdflib.Graph with Pydantic schema support.

This class extends rdflib.Graph to provide serialization and deserialization capabilities for Pydantic models, with special handling for Turtle format.

Source code in ontocast/onto.py

class RDFGraph(Graph):
    """Subclass of rdflib.Graph with Pydantic schema support.

    This class extends rdflib.Graph to provide serialization and deserialization
    capabilities for Pydantic models, with special handling for Turtle format.
    """

    @classmethod
    def __get_pydantic_core_schema__(cls, _source_type, handler: GetCoreSchemaHandler):
        """Get the Pydantic core schema for this class.

        Args:
            _source_type: The source type.
            handler: The core schema handler.

        Returns:
            A union schema that handles both Graph instances and string conversion.
        """
        return core_schema.union_schema(
            [
                core_schema.is_instance_schema(cls),
                core_schema.chain_schema(
                    [
                        core_schema.str_schema(),
                        core_schema.no_info_plain_validator_function(
                            cls._from_turtle_str
                        ),
                    ]
                ),
            ],
            serialization=core_schema.plain_serializer_function_ser_schema(
                cls._to_turtle_str,
                info_arg=False,
                return_schema=core_schema.str_schema(),
            ),
        )

    def __add__(self, other: Union["RDFGraph", Graph]) -> "RDFGraph":
        """Addition operator for RDFGraph instances.

        Merges the RDF graphs while maintaining the RDFGraph type.

        Args:
            other: The graph to add to this one.

        Returns:
            RDFGraph: A new RDFGraph containing the merged triples.
        """
        # Create a new RDFGraph instance
        result = RDFGraph()

        # Copy all triples from both graphs
        for triple in self:
            result.add(triple)
        for triple in other:
            result.add(triple)

        # Copy namespace bindings
        for prefix, uri in self.namespaces():
            result.bind(prefix, uri)
        for prefix, uri in other.namespaces():
            result.bind(prefix, uri)

        return result

    def __iadd__(self, other: Union["RDFGraph", Graph]) -> "RDFGraph":
        """In-place addition operator for RDFGraph instances.

        Merges the RDF graphs while maintaining the RDFGraph type.

        Args:
            other: The graph to add to this one.

        Returns:
            RDFGraph: self after modification.
        """
        # Call parent's __iadd__ to merge the graphs
        super().__iadd__(other)
        # Return self to maintain RDFGraph type
        return self

    @staticmethod
    def _ensure_prefixes(turtle_str: str) -> str:
        """Ensure all common prefixes are declared in the Turtle string.

        Args:
            turtle_str: The input Turtle string.

        Returns:
            str: The Turtle string with all common prefixes declared.
        """
        declared_prefixes = set(
            match.group(1) for match in PREFIX_PATTERN.finditer(turtle_str)
        )

        missing = {
            prefix: uri
            for prefix, uri in COMMON_PREFIXES.items()
            if prefix not in declared_prefixes
        }

        if not missing:
            return turtle_str

        prefix_block = (
            "\n".join(f"@prefix {prefix}: {uri} ." for prefix, uri in missing.items())
            + "\n\n"
        )

        return prefix_block + turtle_str

    @classmethod
    def _from_turtle_str(cls, turtle_str: str) -> "RDFGraph":
        """Create an RDFGraph instance from a Turtle string.

        Args:
            turtle_str: The input Turtle string.

        Returns:
            RDFGraph: A new RDFGraph instance.
        """
        turtle_str = bytes(turtle_str, "utf-8").decode("unicode_escape")
        patched_turtle = cls._ensure_prefixes(turtle_str)
        g = cls()
        g.parse(data=patched_turtle, format="turtle")
        return g

    @staticmethod
    def _to_turtle_str(g: Any) -> str:
        """Convert an RDFGraph to a Turtle string.

        Args:
            g: The RDFGraph instance.

        Returns:
            str: The Turtle string representation.
        """
        return g.serialize(format="turtle")

    def __new__(cls, *args, **kwargs):
        """Create a new RDFGraph instance."""
        instance = super().__new__(cls)
        return instance

    def sanitize_prefixes_namespaces(self):
        """
        Rematches prefixes in an RDFLib graph to correct namespaces when a namespace
        with the same URI exists. Handles cases where prefixes might not be bound
        as namespaces.

        Args:
            self (RDFGraph): The RDFLib graph to process

        Returns:
           RDFGraph: The graph with corrected prefix-namespace mappings
        """
        # Get the namespace manager
        ns_manager = self.namespace_manager

        # Collect all current prefix-URI mappings
        current_prefixes = dict(ns_manager.namespaces())

        # Group URIs by their string representation to find duplicates
        uri_to_prefixes = defaultdict(list)
        for prefix, uri in current_prefixes.items():
            uri_to_prefixes[str(uri)].append((prefix, uri))

        # Find the "canonical" namespace objects for each URI
        # (the actual Namespace objects that might be registered)
        canonical_namespaces = {}

        # Check if any of the URIs correspond to well-known namespaces
        # by trying to create Namespace objects and seeing if they're already registered
        for uri_str, prefix_uri_pairs in uri_to_prefixes.items():
            # Try to find if there's already a proper Namespace object for this URI
            namespace_candidates = []

            for prefix, uri_obj in prefix_uri_pairs:
                # Check if this is already a proper Namespace object
                if isinstance(uri_obj, Namespace):
                    namespace_candidates.append(uri_obj)
                else:
                    # Try to create a Namespace and see if it matches existing ones
                    try:
                        ns = Namespace(uri_str)
                        namespace_candidates.append(ns)
                    except:
                        continue

            # Use the first valid namespace candidate as canonical
            if namespace_candidates:
                canonical_namespaces[uri_str] = namespace_candidates[0]

        # Now rebuild the namespace manager with corrected mappings
        # Clear existing bindings first
        new_ns_manager = NamespaceManager(self)

        # Track which prefixes we want to keep/reassign
        final_mappings = {}

        for uri_str, prefix_uri_pairs in uri_to_prefixes.items():
            if len(prefix_uri_pairs) == 1:
                # No duplicates, keep as-is but ensure we use canonical namespace
                prefix, _ = prefix_uri_pairs[0]
                canonical_ns = canonical_namespaces.get(uri_str)
                if canonical_ns:
                    final_mappings[prefix] = canonical_ns
                else:
                    # Fallback to creating a new Namespace
                    final_mappings[prefix] = Namespace(uri_str)
            else:
                # Multiple prefixes for same URI - need to decide which to keep
                # Priority: 1) Proper Namespace objects,
                #           2) Shorter prefixes,
                #           3) Alphabetical
                prefix_uri_pairs.sort(
                    key=lambda x: (
                        not isinstance(x[1], Namespace),  # Namespace objects first
                        len(x[0]),  # Shorter prefixes next
                        x[0],  # Alphabetical order
                    )
                )

                # Keep the best prefix, map others to it if needed
                best_prefix, _ = prefix_uri_pairs[0]
                canonical_ns = canonical_namespaces.get(uri_str, Namespace(uri_str))
                final_mappings[best_prefix] = canonical_ns

                other_prefixes = [p for p, _ in prefix_uri_pairs[1:]]
                if other_prefixes:
                    logger.debug(
                        f"Consolidating prefixes {other_prefixes} "
                        f"-> '{best_prefix}' for URI: {uri_str}"
                    )

        # Apply the final mappings
        for prefix, namespace in final_mappings.items():
            new_ns_manager.bind(prefix, namespace, override=True)

        # Replace the graph's namespace manager
        self.namespace_manager = new_ns_manager

    def unbind_chunk_namespaces(self, chunk_pattern="/chunk/") -> "RDFGraph":
        """
        Unbinds namespace prefixes that point to URIs containing a chunk pattern.
        Returns a new graph with chunk namespaces dereferenced (expanded to full URIs).

        Args:
            chunk_pattern (str): The pattern to look for in URIs (default: "/chunk/")

        Returns:
            RDFGraph: New graph with chunk-related namespaces unbound
        """
        current_prefixes = dict(self.namespace_manager.namespaces())

        # Find prefixes that point to URIs containing the chunk pattern
        chunk_prefixes = []
        for prefix, uri in current_prefixes.items():
            uri_str = str(uri)
            if chunk_pattern in uri_str:
                chunk_prefixes.append((prefix, uri_str))

        # Create new graph
        new_graph = RDFGraph()

        # Copy all triples (URIs are already expanded internally)
        for triple in self:
            new_graph.add(triple)

        # Bind only non-chunk namespace prefixes to the new graph
        for prefix, uri in current_prefixes.items():
            uri_str = str(uri)
            if chunk_pattern not in uri_str:
                new_graph.bind(prefix, uri)

        # Log what was removed
        if chunk_prefixes:
            logger.debug(f"Unbound {len(chunk_prefixes)} chunk-related namespace(s):")
            for prefix, uri in chunk_prefixes:
                logger.debug(f"  - '{prefix}': {uri}")

        return new_graph

`add(other)` ¶

Addition operator for RDFGraph instances.

Merges the RDF graphs while maintaining the RDFGraph type.

Parameters:

Name	Type	Description	Default
`other`	`Union[RDFGraph, Graph]`	The graph to add to this one.	required

Returns:

Name	Type	Description
`RDFGraph`	`RDFGraph`	A new RDFGraph containing the merged triples.

Source code in ontocast/onto.py

def __add__(self, other: Union["RDFGraph", Graph]) -> "RDFGraph":
    """Addition operator for RDFGraph instances.

    Merges the RDF graphs while maintaining the RDFGraph type.

    Args:
        other: The graph to add to this one.

    Returns:
        RDFGraph: A new RDFGraph containing the merged triples.
    """
    # Create a new RDFGraph instance
    result = RDFGraph()

    # Copy all triples from both graphs
    for triple in self:
        result.add(triple)
    for triple in other:
        result.add(triple)

    # Copy namespace bindings
    for prefix, uri in self.namespaces():
        result.bind(prefix, uri)
    for prefix, uri in other.namespaces():
        result.bind(prefix, uri)

    return result

`__get_pydantic_core_schema__(_source_type, handler)` `classmethod` ¶

Get the Pydantic core schema for this class.

Parameters:

Name	Type	Description	Default
`_source_type`		The source type.	required
`handler`	`GetCoreSchemaHandler`	The core schema handler.	required

Returns:

Type	Description
	A union schema that handles both Graph instances and string conversion.

Source code in ontocast/onto.py

@classmethod
def __get_pydantic_core_schema__(cls, _source_type, handler: GetCoreSchemaHandler):
    """Get the Pydantic core schema for this class.

    Args:
        _source_type: The source type.
        handler: The core schema handler.

    Returns:
        A union schema that handles both Graph instances and string conversion.
    """
    return core_schema.union_schema(
        [
            core_schema.is_instance_schema(cls),
            core_schema.chain_schema(
                [
                    core_schema.str_schema(),
                    core_schema.no_info_plain_validator_function(
                        cls._from_turtle_str
                    ),
                ]
            ),
        ],
        serialization=core_schema.plain_serializer_function_ser_schema(
            cls._to_turtle_str,
            info_arg=False,
            return_schema=core_schema.str_schema(),
        ),
    )

`iadd(other)` ¶

In-place addition operator for RDFGraph instances.

Merges the RDF graphs while maintaining the RDFGraph type.

Parameters:

Name	Type	Description	Default
`other`	`Union[RDFGraph, Graph]`	The graph to add to this one.	required

Returns:

Name	Type	Description
`RDFGraph`	`RDFGraph`	self after modification.

Source code in ontocast/onto.py

def __iadd__(self, other: Union["RDFGraph", Graph]) -> "RDFGraph":
    """In-place addition operator for RDFGraph instances.

    Merges the RDF graphs while maintaining the RDFGraph type.

    Args:
        other: The graph to add to this one.

    Returns:
        RDFGraph: self after modification.
    """
    # Call parent's __iadd__ to merge the graphs
    super().__iadd__(other)
    # Return self to maintain RDFGraph type
    return self

`new(*args, **kwargs)` ¶

Create a new RDFGraph instance.

Source code in ontocast/onto.py

def __new__(cls, *args, **kwargs):
    """Create a new RDFGraph instance."""
    instance = super().__new__(cls)
    return instance

`sanitize_prefixes_namespaces()` ¶

Rematches prefixes in an RDFLib graph to correct namespaces when a namespace with the same URI exists. Handles cases where prefixes might not be bound as namespaces.

Parameters:

Name	Type	Description	Default
`self`	`RDFGraph`	The RDFLib graph to process	required

Returns:

Name	Type	Description
`RDFGraph`		The graph with corrected prefix-namespace mappings

Source code in ontocast/onto.py

def sanitize_prefixes_namespaces(self):
    """
    Rematches prefixes in an RDFLib graph to correct namespaces when a namespace
    with the same URI exists. Handles cases where prefixes might not be bound
    as namespaces.

    Args:
        self (RDFGraph): The RDFLib graph to process

    Returns:
       RDFGraph: The graph with corrected prefix-namespace mappings
    """
    # Get the namespace manager
    ns_manager = self.namespace_manager

    # Collect all current prefix-URI mappings
    current_prefixes = dict(ns_manager.namespaces())

    # Group URIs by their string representation to find duplicates
    uri_to_prefixes = defaultdict(list)
    for prefix, uri in current_prefixes.items():
        uri_to_prefixes[str(uri)].append((prefix, uri))

    # Find the "canonical" namespace objects for each URI
    # (the actual Namespace objects that might be registered)
    canonical_namespaces = {}

    # Check if any of the URIs correspond to well-known namespaces
    # by trying to create Namespace objects and seeing if they're already registered
    for uri_str, prefix_uri_pairs in uri_to_prefixes.items():
        # Try to find if there's already a proper Namespace object for this URI
        namespace_candidates = []

        for prefix, uri_obj in prefix_uri_pairs:
            # Check if this is already a proper Namespace object
            if isinstance(uri_obj, Namespace):
                namespace_candidates.append(uri_obj)
            else:
                # Try to create a Namespace and see if it matches existing ones
                try:
                    ns = Namespace(uri_str)
                    namespace_candidates.append(ns)
                except:
                    continue

        # Use the first valid namespace candidate as canonical
        if namespace_candidates:
            canonical_namespaces[uri_str] = namespace_candidates[0]

    # Now rebuild the namespace manager with corrected mappings
    # Clear existing bindings first
    new_ns_manager = NamespaceManager(self)

    # Track which prefixes we want to keep/reassign
    final_mappings = {}

    for uri_str, prefix_uri_pairs in uri_to_prefixes.items():
        if len(prefix_uri_pairs) == 1:
            # No duplicates, keep as-is but ensure we use canonical namespace
            prefix, _ = prefix_uri_pairs[0]
            canonical_ns = canonical_namespaces.get(uri_str)
            if canonical_ns:
                final_mappings[prefix] = canonical_ns
            else:
                # Fallback to creating a new Namespace
                final_mappings[prefix] = Namespace(uri_str)
        else:
            # Multiple prefixes for same URI - need to decide which to keep
            # Priority: 1) Proper Namespace objects,
            #           2) Shorter prefixes,
            #           3) Alphabetical
            prefix_uri_pairs.sort(
                key=lambda x: (
                    not isinstance(x[1], Namespace),  # Namespace objects first
                    len(x[0]),  # Shorter prefixes next
                    x[0],  # Alphabetical order
                )
            )

            # Keep the best prefix, map others to it if needed
            best_prefix, _ = prefix_uri_pairs[0]
            canonical_ns = canonical_namespaces.get(uri_str, Namespace(uri_str))
            final_mappings[best_prefix] = canonical_ns

            other_prefixes = [p for p, _ in prefix_uri_pairs[1:]]
            if other_prefixes:
                logger.debug(
                    f"Consolidating prefixes {other_prefixes} "
                    f"-> '{best_prefix}' for URI: {uri_str}"
                )

    # Apply the final mappings
    for prefix, namespace in final_mappings.items():
        new_ns_manager.bind(prefix, namespace, override=True)

    # Replace the graph's namespace manager
    self.namespace_manager = new_ns_manager

`unbind_chunk_namespaces(chunk_pattern='/chunk/')` ¶

Unbinds namespace prefixes that point to URIs containing a chunk pattern. Returns a new graph with chunk namespaces dereferenced (expanded to full URIs).

Parameters:

Name	Type	Description	Default
`chunk_pattern`	`str`	The pattern to look for in URIs (default: "/chunk/")	`'/chunk/'`

Returns:

Name	Type	Description
`RDFGraph`	`RDFGraph`	New graph with chunk-related namespaces unbound

Source code in ontocast/onto.py

def unbind_chunk_namespaces(self, chunk_pattern="/chunk/") -> "RDFGraph":
    """
    Unbinds namespace prefixes that point to URIs containing a chunk pattern.
    Returns a new graph with chunk namespaces dereferenced (expanded to full URIs).

    Args:
        chunk_pattern (str): The pattern to look for in URIs (default: "/chunk/")

    Returns:
        RDFGraph: New graph with chunk-related namespaces unbound
    """
    current_prefixes = dict(self.namespace_manager.namespaces())

    # Find prefixes that point to URIs containing the chunk pattern
    chunk_prefixes = []
    for prefix, uri in current_prefixes.items():
        uri_str = str(uri)
        if chunk_pattern in uri_str:
            chunk_prefixes.append((prefix, uri_str))

    # Create new graph
    new_graph = RDFGraph()

    # Copy all triples (URIs are already expanded internally)
    for triple in self:
        new_graph.add(triple)

    # Bind only non-chunk namespace prefixes to the new graph
    for prefix, uri in current_prefixes.items():
        uri_str = str(uri)
        if chunk_pattern not in uri_str:
            new_graph.bind(prefix, uri)

    # Log what was removed
    if chunk_prefixes:
        logger.debug(f"Unbound {len(chunk_prefixes)} chunk-related namespace(s):")
        for prefix, uri in chunk_prefixes:
            logger.debug(f"  - '{prefix}': {uri}")

    return new_graph

`SemanticTriplesFactsReport` ¶

Bases: BaseModel

Report containing semantic triples and evaluation scores.

Attributes:

Name	Type	Description
`semantic_graph`	`RDFGraph`	Semantic triples (facts) representing the document in turtle (ttl) format.
`ontology_relevance_score`	`Optional[float]`	Score 0-100 for how relevant the ontology is to the document. 0 is the worst, 100 is the best.
`triples_generation_score`	`Optional[float]`	Score 0-100 for how well the facts extraction / triples generation was performed. 0 is the worst, 100 is the best.

Source code in ontocast/onto.py

class SemanticTriplesFactsReport(BaseModel):
    """Report containing semantic triples and evaluation scores.

    Attributes:
        semantic_graph: Semantic triples (facts) representing the document
            in turtle (ttl) format.
        ontology_relevance_score: Score 0-100 for how relevant the ontology
            is to the document. 0 is the worst, 100 is the best.
        triples_generation_score: Score 0-100 for how well the facts extraction /
            triples generation was performed. 0 is the worst, 100 is the best.
    """

    semantic_graph: RDFGraph = Field(
        default_factory=RDFGraph,
        description="Semantic triples (facts) representing "
        "the document in turtle (ttl) format.",
    )
    ontology_relevance_score: Optional[float] = Field(
        description="Score 0-100 for how relevant "
        "the ontology is to the document. "
        "0 is the worst, 100 is the best."
    )
    triples_generation_score: Optional[float] = Field(
        description="Score 0-100 for how well "
        "the facts extraction / triples generation was performed. "
        "0 is the worst, 100 is the best."
    )

`Status` ¶

Bases: StrEnum

Enumeration of possible workflow status values.

Source code in ontocast/onto.py

class Status(StrEnum):
    """Enumeration of possible workflow status values."""

    SUCCESS = "success"
    FAILED = "failed"
    COUNTS_EXCEEDED = "counts exceeded"

`ToolType` ¶

Bases: StrEnum

Enumeration of tool types used in the workflow.

Source code in ontocast/onto.py

class ToolType(StrEnum):
    """Enumeration of tool types used in the workflow."""

    LLM = "llm"
    TRIPLE_STORE = "triple store manager"
    ONTOLOGY_MANAGER = "ontology manager"
    CONVERTER = "document converter"
    CHUNKER = "document chunker"

`WorkflowNode` ¶

Bases: StrEnum

Enumeration of workflow nodes in the processing pipeline.

Source code in ontocast/onto.py

class WorkflowNode(StrEnum):
    """Enumeration of workflow nodes in the processing pipeline."""

    CONVERT_TO_MD = "Convert to Markdown"
    CHUNK = "Chunk Text"
    SELECT_ONTOLOGY = "Select Ontology"
    TEXT_TO_ONTOLOGY = "Text to Ontology"
    TEXT_TO_FACTS = "Text to Facts"
    SUBLIMATE_ONTOLOGY = "Sublimate Ontology"
    CRITICISE_ONTOLOGY = "Criticise Ontology"
    CRITICISE_FACTS = "Criticise Facts"
    CHUNKS_EMPTY = "Chunks Empty?"
    AGGREGATE_FACTS = "Aggregate Facts"

`iri2namespace(iri, ontology=False)` ¶

Convert an IRI to a namespace string.

Parameters:

Name	Type	Description	Default
`iri`	`str`	The IRI to convert.	required
`ontology`	`bool`	If True, append '#' for ontology namespace, otherwise '/'.	`False`

Returns:

Name	Type	Description
`str`	`str`	The converted namespace string.

Source code in ontocast/onto.py

def iri2namespace(iri: str, ontology: bool = False) -> str:
    """Convert an IRI to a namespace string.

    Args:
        iri: The IRI to convert.
        ontology: If True, append '#' for ontology namespace, otherwise '/'.

    Returns:
        str: The converted namespace string.
    """
    iri = iri.rstrip("#")
    return f"{iri}#" if ontology else f"{iri}/"

ontocast.onto¶

AgentState ¶

doc_iri property ¶

doc_namespace property ¶

__init__(**kwargs) ¶

clear_failure() ¶

model_post_init(__context) ¶

set_failure(stage, reason, success_score=0.0) ¶

set_text(text) ¶

BasePydanticModel ¶

__init__(**kwargs) ¶

load(file_path) classmethod ¶

serialize(file_path) ¶

Chunk ¶

iri property ¶

namespace property ¶

FailureStages ¶

KGCritiqueReport ¶

Ontology ¶

__iadd__(other) ¶

describe() ¶

from_file(file_path, format='turtle', **kwargs) classmethod ¶

set_properties(**kwargs) ¶

OntologyProperties ¶

namespace property ¶

OntologySelectorReport ¶

OntologyUpdateCritiqueReport ¶

RDFGraph ¶

__add__(other) ¶

__get_pydantic_core_schema__(_source_type, handler) classmethod ¶

__iadd__(other) ¶

__new__(*args, **kwargs) ¶

sanitize_prefixes_namespaces() ¶

unbind_chunk_namespaces(chunk_pattern='/chunk/') ¶

SemanticTriplesFactsReport ¶

Status ¶

ToolType ¶

WorkflowNode ¶

iri2namespace(iri, ontology=False) ¶

`ontocast.onto`¶

`AgentState` ¶

`doc_iri` `property` ¶

`doc_namespace` `property` ¶

`init(**kwargs)` ¶

`clear_failure()` ¶

`model_post_init(__context)` ¶

`set_failure(stage, reason, success_score=0.0)` ¶

`set_text(text)` ¶

`BasePydanticModel` ¶

`init(**kwargs)` ¶

`load(file_path)` `classmethod` ¶

`serialize(file_path)` ¶

`Chunk` ¶

`iri` `property` ¶

`namespace` `property` ¶

`FailureStages` ¶

`KGCritiqueReport` ¶

`Ontology` ¶

`iadd(other)` ¶

`describe()` ¶

`from_file(file_path, format='turtle', **kwargs)` `classmethod` ¶

`set_properties(**kwargs)` ¶

`OntologyProperties` ¶

`namespace` `property` ¶

`OntologySelectorReport` ¶

`OntologyUpdateCritiqueReport` ¶

`RDFGraph` ¶

`add(other)` ¶

`__get_pydantic_core_schema__(_source_type, handler)` `classmethod` ¶

`iadd(other)` ¶

`new(*args, **kwargs)` ¶

`sanitize_prefixes_namespaces()` ¶

`unbind_chunk_namespaces(chunk_pattern='/chunk/')` ¶

`SemanticTriplesFactsReport` ¶

`Status` ¶

`ToolType` ¶

`WorkflowNode` ¶

`iri2namespace(iri, ontology=False)` ¶