Skip to content

ontocast.tool

ConverterTool

Bases: Tool

Tool for converting documents to structured data.

This class provides functionality for converting various document formats into structured data that can be processed by the OntoCast system.

Attributes:

Name Type Description
supported_extensions set[str]

Set of supported file extensions.

Source code in ontocast/tool/converter.py
class ConverterTool(Tool):
    """Tool for converting documents to structured data.

    This class provides functionality for converting various document formats
    into structured data that can be processed by the OntoCast system.

    Attributes:
        supported_extensions: Set of supported file extensions.
    """

    supported_extensions: set[str] = {".pdf", ".ppt", ".pptx"}

    def __init__(
        self,
        **kwargs,
    ):
        """Initialize the converter tool.

        Args:
            **kwargs: Additional keyword arguments passed to the parent class.
        """
        super().__init__(**kwargs)
        self._converter = DocumentConverter()

    def __call__(self, file_input: Union[bytes, str]) -> Dict[str, Any]:
        """Convert a document to structured data.

        Args:
            file_input: The input file as either a BytesIO object or file path.

        Returns:
            Dict[str, Any]: The converted document data.
        """
        if isinstance(file_input, bytes):
            ds = DocumentStream(name="doc", stream=BytesIO(file_input))
            result = self._converter.convert(ds)
            doc = result.document.export_to_markdown()
            return {"text": doc}
        else:
            # For non-BytesIO input (like plain text), return as is
            return {"text": file_input}

__call__(file_input)

Convert a document to structured data.

Parameters:

Name Type Description Default
file_input Union[bytes, str]

The input file as either a BytesIO object or file path.

required

Returns:

Type Description
Dict[str, Any]

Dict[str, Any]: The converted document data.

Source code in ontocast/tool/converter.py
def __call__(self, file_input: Union[bytes, str]) -> Dict[str, Any]:
    """Convert a document to structured data.

    Args:
        file_input: The input file as either a BytesIO object or file path.

    Returns:
        Dict[str, Any]: The converted document data.
    """
    if isinstance(file_input, bytes):
        ds = DocumentStream(name="doc", stream=BytesIO(file_input))
        result = self._converter.convert(ds)
        doc = result.document.export_to_markdown()
        return {"text": doc}
    else:
        # For non-BytesIO input (like plain text), return as is
        return {"text": file_input}

__init__(**kwargs)

Initialize the converter tool.

Parameters:

Name Type Description Default
**kwargs

Additional keyword arguments passed to the parent class.

{}
Source code in ontocast/tool/converter.py
def __init__(
    self,
    **kwargs,
):
    """Initialize the converter tool.

    Args:
        **kwargs: Additional keyword arguments passed to the parent class.
    """
    super().__init__(**kwargs)
    self._converter = DocumentConverter()

FilesystemTripleStoreManager

Bases: TripleStoreManager

Filesystem-based implementation of triple store management.

This class provides a concrete implementation of triple store management using the local filesystem for storage.

Attributes:

Name Type Description
working_directory Path

Path to the working directory for storing data.

ontology_path Optional[Path]

Optional path to the ontology directory.

Source code in ontocast/tool/triple_manager.py
class FilesystemTripleStoreManager(TripleStoreManager):
    """Filesystem-based implementation of triple store management.

    This class provides a concrete implementation of triple store management
    using the local filesystem for storage.

    Attributes:
        working_directory: Path to the working directory for storing data.
        ontology_path: Optional path to the ontology directory.
    """

    working_directory: pathlib.Path
    ontology_path: Optional[pathlib.Path]

    def __init__(self, **kwargs):
        """Initialize the filesystem triple store manager.

        Args:
            **kwargs: Additional keyword arguments passed to the parent class.
        """
        super().__init__(**kwargs)

    def fetch_ontologies(self) -> list[Ontology]:
        """Fetch all available ontologies from the filesystem.

        Returns:
            list[Ontology]: List of available ontologies.
        """
        ontologies = []
        if self.ontology_path is not None:
            sorted_files = sorted(self.ontology_path.glob("*.ttl"))
            for fname in sorted_files:
                try:
                    ontology = Ontology.from_file(fname)
                    ontologies.append(ontology)
                except Exception as e:
                    logging.error(f"Failed to load ontology {fname}: {str(e)}")
        return ontologies

    def serialize_ontology(self, o: Ontology, **kwargs):
        """Store an ontology in the filesystem.

        Args:
            o: The ontology to store.
            **kwargs: Additional keyword arguments for serialization.
        """
        fname = f"ontology_{o.short_name}_{o.version}"
        o.graph.serialize(
            format="turtle", destination=self.working_directory / f"{fname}.ttl"
        )

    def serialize_facts(self, g: Graph, **kwargs):
        """Store a graph in the filesystem.

        Args:
            g: The graph to store.
            **kwargs: Additional keyword arguments for serialization.
                spec: Optional specification for the filename.
        """
        spec = kwargs.pop("spec", None)
        if spec is None:
            fname = "current.ttl"
        elif isinstance(spec, str):
            s = spec.split("/")[-2:]
            s = "_".join([x for x in s if x])
            fname = f"facts_{s}.ttl"
        else:
            raise TypeError(f"string expected for spec {spec}")
        filename = self.working_directory / fname
        g.serialize(format="turtle", destination=filename)

__init__(**kwargs)

Initialize the filesystem triple store manager.

Parameters:

Name Type Description Default
**kwargs

Additional keyword arguments passed to the parent class.

{}
Source code in ontocast/tool/triple_manager.py
def __init__(self, **kwargs):
    """Initialize the filesystem triple store manager.

    Args:
        **kwargs: Additional keyword arguments passed to the parent class.
    """
    super().__init__(**kwargs)

fetch_ontologies()

Fetch all available ontologies from the filesystem.

Returns:

Type Description
list[Ontology]

list[Ontology]: List of available ontologies.

Source code in ontocast/tool/triple_manager.py
def fetch_ontologies(self) -> list[Ontology]:
    """Fetch all available ontologies from the filesystem.

    Returns:
        list[Ontology]: List of available ontologies.
    """
    ontologies = []
    if self.ontology_path is not None:
        sorted_files = sorted(self.ontology_path.glob("*.ttl"))
        for fname in sorted_files:
            try:
                ontology = Ontology.from_file(fname)
                ontologies.append(ontology)
            except Exception as e:
                logging.error(f"Failed to load ontology {fname}: {str(e)}")
    return ontologies

serialize_facts(g, **kwargs)

Store a graph in the filesystem.

Parameters:

Name Type Description Default
g Graph

The graph to store.

required
**kwargs

Additional keyword arguments for serialization. spec: Optional specification for the filename.

{}
Source code in ontocast/tool/triple_manager.py
def serialize_facts(self, g: Graph, **kwargs):
    """Store a graph in the filesystem.

    Args:
        g: The graph to store.
        **kwargs: Additional keyword arguments for serialization.
            spec: Optional specification for the filename.
    """
    spec = kwargs.pop("spec", None)
    if spec is None:
        fname = "current.ttl"
    elif isinstance(spec, str):
        s = spec.split("/")[-2:]
        s = "_".join([x for x in s if x])
        fname = f"facts_{s}.ttl"
    else:
        raise TypeError(f"string expected for spec {spec}")
    filename = self.working_directory / fname
    g.serialize(format="turtle", destination=filename)

serialize_ontology(o, **kwargs)

Store an ontology in the filesystem.

Parameters:

Name Type Description Default
o Ontology

The ontology to store.

required
**kwargs

Additional keyword arguments for serialization.

{}
Source code in ontocast/tool/triple_manager.py
def serialize_ontology(self, o: Ontology, **kwargs):
    """Store an ontology in the filesystem.

    Args:
        o: The ontology to store.
        **kwargs: Additional keyword arguments for serialization.
    """
    fname = f"ontology_{o.short_name}_{o.version}"
    o.graph.serialize(
        format="turtle", destination=self.working_directory / f"{fname}.ttl"
    )

LLMTool

Bases: Tool

Tool for interacting with language models.

This class provides a unified interface for working with different language model providers (OpenAI, Ollama) through LangChain. It supports both synchronous and asynchronous operations.

Attributes:

Name Type Description
provider str

The LLM provider to use (default: "openai").

model str

The specific model to use (default: "gpt-4o-mini").

api_key Optional[str]

Optional API key for the provider.

base_url Optional[str]

Optional base URL for the provider.

temperature float

Temperature parameter for generation (default: 0.1).

Source code in ontocast/tool/llm.py
class LLMTool(Tool):
    """Tool for interacting with language models.

    This class provides a unified interface for working with different language model
    providers (OpenAI, Ollama) through LangChain. It supports both synchronous and
    asynchronous operations.

    Attributes:
        provider: The LLM provider to use (default: "openai").
        model: The specific model to use (default: "gpt-4o-mini").
        api_key: Optional API key for the provider.
        base_url: Optional base URL for the provider.
        temperature: Temperature parameter for generation (default: 0.1).
    """

    provider: str = Field(default="openai")
    model: str = Field(default="gpt-4o-mini")
    api_key: Optional[str] = None
    base_url: Optional[str] = None
    temperature: float = 0.1

    def __init__(
        self,
        **kwargs,
    ):
        """Initialize the LLM tool.

        Args:
            **kwargs: Additional keyword arguments passed to the parent class.
        """
        super().__init__(**kwargs)
        self._llm = None

    @classmethod
    def create(cls, **kwargs):
        """Create a new LLM tool instance synchronously.

        Args:
            **kwargs: Keyword arguments for initialization.

        Returns:
            LLMTool: A new instance of the LLM tool.
        """
        return asyncio.run(cls.acreate(**kwargs))

    @classmethod
    async def acreate(cls, **kwargs):
        """Create a new LLM tool instance asynchronously.

        Args:
            **kwargs: Keyword arguments for initialization.

        Returns:
            LLMTool: A new instance of the LLM tool.
        """
        self = cls.__new__(cls)
        self.__init__(**kwargs)
        await self.setup()
        return self

    async def setup(self):
        """Set up the language model based on the configured provider.

        Raises:
            ValueError: If the provider is not supported.
        """
        if self.provider == "openai":
            self._llm = ChatOpenAI(
                model=self.model,
                temperature=self.temperature,
            )
        elif self.provider == "ollama":
            self._llm = ChatOllama(
                model=self.model, base_url=self.base_url, temperature=self.temperature
            )
        else:
            raise ValueError(f"Unsupported provider: {self.provider}")

    def __call__(self, *args: Any, **kwds: Any) -> Any:
        """Call the language model directly.

        Args:
            *args: Positional arguments passed to the LLM.
            **kwds: Keyword arguments passed to the LLM.

        Returns:
            Any: The LLM's response.
        """
        return self.llm.invoke(*args, **kwds)

    @property
    def llm(self) -> BaseChatModel:
        """Get the underlying language model instance.

        Returns:
            BaseChatModel: The configured language model.

        Raises:
            RuntimeError: If the LLM has not been properly initialized.
        """
        if self._llm is None:
            raise RuntimeError(
                "LLM resource not properly initialized. Call setup() first."
            )
        return self._llm

    async def complete(self, prompt: str, **kwargs) -> Any:
        """Generate a completion for the given prompt.

        Args:
            prompt: The input prompt for generation.
            **kwargs: Additional keyword arguments for generation.

        Returns:
            Any: The generated completion.
        """
        response = await self.llm.ainvoke(prompt)
        return response.content

    async def extract(self, prompt: str, output_schema: Type[T], **kwargs) -> T:
        """Extract structured data from the prompt according to a schema.

        Args:
            prompt: The input prompt for extraction.
            output_schema: The Pydantic model class defining the output structure.
            **kwargs: Additional keyword arguments for extraction.

        Returns:
            T: The extracted data conforming to the output schema.
        """
        parser = PydanticOutputParser(pydantic_object=output_schema)
        format_instructions = parser.get_format_instructions()

        full_prompt = f"{prompt}\n\n{format_instructions}"
        response = await self.llm.ainvoke(full_prompt)

        return parser.parse(response.content)

llm property

Get the underlying language model instance.

Returns:

Name Type Description
BaseChatModel BaseChatModel

The configured language model.

Raises:

Type Description
RuntimeError

If the LLM has not been properly initialized.

__call__(*args, **kwds)

Call the language model directly.

Parameters:

Name Type Description Default
*args Any

Positional arguments passed to the LLM.

()
**kwds Any

Keyword arguments passed to the LLM.

{}

Returns:

Name Type Description
Any Any

The LLM's response.

Source code in ontocast/tool/llm.py
def __call__(self, *args: Any, **kwds: Any) -> Any:
    """Call the language model directly.

    Args:
        *args: Positional arguments passed to the LLM.
        **kwds: Keyword arguments passed to the LLM.

    Returns:
        Any: The LLM's response.
    """
    return self.llm.invoke(*args, **kwds)

__init__(**kwargs)

Initialize the LLM tool.

Parameters:

Name Type Description Default
**kwargs

Additional keyword arguments passed to the parent class.

{}
Source code in ontocast/tool/llm.py
def __init__(
    self,
    **kwargs,
):
    """Initialize the LLM tool.

    Args:
        **kwargs: Additional keyword arguments passed to the parent class.
    """
    super().__init__(**kwargs)
    self._llm = None

acreate(**kwargs) async classmethod

Create a new LLM tool instance asynchronously.

Parameters:

Name Type Description Default
**kwargs

Keyword arguments for initialization.

{}

Returns:

Name Type Description
LLMTool

A new instance of the LLM tool.

Source code in ontocast/tool/llm.py
@classmethod
async def acreate(cls, **kwargs):
    """Create a new LLM tool instance asynchronously.

    Args:
        **kwargs: Keyword arguments for initialization.

    Returns:
        LLMTool: A new instance of the LLM tool.
    """
    self = cls.__new__(cls)
    self.__init__(**kwargs)
    await self.setup()
    return self

complete(prompt, **kwargs) async

Generate a completion for the given prompt.

Parameters:

Name Type Description Default
prompt str

The input prompt for generation.

required
**kwargs

Additional keyword arguments for generation.

{}

Returns:

Name Type Description
Any Any

The generated completion.

Source code in ontocast/tool/llm.py
async def complete(self, prompt: str, **kwargs) -> Any:
    """Generate a completion for the given prompt.

    Args:
        prompt: The input prompt for generation.
        **kwargs: Additional keyword arguments for generation.

    Returns:
        Any: The generated completion.
    """
    response = await self.llm.ainvoke(prompt)
    return response.content

create(**kwargs) classmethod

Create a new LLM tool instance synchronously.

Parameters:

Name Type Description Default
**kwargs

Keyword arguments for initialization.

{}

Returns:

Name Type Description
LLMTool

A new instance of the LLM tool.

Source code in ontocast/tool/llm.py
@classmethod
def create(cls, **kwargs):
    """Create a new LLM tool instance synchronously.

    Args:
        **kwargs: Keyword arguments for initialization.

    Returns:
        LLMTool: A new instance of the LLM tool.
    """
    return asyncio.run(cls.acreate(**kwargs))

extract(prompt, output_schema, **kwargs) async

Extract structured data from the prompt according to a schema.

Parameters:

Name Type Description Default
prompt str

The input prompt for extraction.

required
output_schema Type[T]

The Pydantic model class defining the output structure.

required
**kwargs

Additional keyword arguments for extraction.

{}

Returns:

Name Type Description
T T

The extracted data conforming to the output schema.

Source code in ontocast/tool/llm.py
async def extract(self, prompt: str, output_schema: Type[T], **kwargs) -> T:
    """Extract structured data from the prompt according to a schema.

    Args:
        prompt: The input prompt for extraction.
        output_schema: The Pydantic model class defining the output structure.
        **kwargs: Additional keyword arguments for extraction.

    Returns:
        T: The extracted data conforming to the output schema.
    """
    parser = PydanticOutputParser(pydantic_object=output_schema)
    format_instructions = parser.get_format_instructions()

    full_prompt = f"{prompt}\n\n{format_instructions}"
    response = await self.llm.ainvoke(full_prompt)

    return parser.parse(response.content)

setup() async

Set up the language model based on the configured provider.

Raises:

Type Description
ValueError

If the provider is not supported.

Source code in ontocast/tool/llm.py
async def setup(self):
    """Set up the language model based on the configured provider.

    Raises:
        ValueError: If the provider is not supported.
    """
    if self.provider == "openai":
        self._llm = ChatOpenAI(
            model=self.model,
            temperature=self.temperature,
        )
    elif self.provider == "ollama":
        self._llm = ChatOllama(
            model=self.model, base_url=self.base_url, temperature=self.temperature
        )
    else:
        raise ValueError(f"Unsupported provider: {self.provider}")

OntologyManager

Bases: Tool

Manager for handling multiple ontologies.

This class provides functionality for managing a collection of ontologies, including selection and retrieval operations.

Attributes:

Name Type Description
ontologies list[Ontology]

List of managed ontologies.

Source code in ontocast/tool/ontology_manager.py
class OntologyManager(Tool):
    """Manager for handling multiple ontologies.

    This class provides functionality for managing a collection of ontologies,
    including selection and retrieval operations.

    Attributes:
        ontologies: List of managed ontologies.
    """

    ontologies: list[Ontology] = Field(default_factory=list)

    def __init__(self, **kwargs):
        """Initialize the ontology manager.

        Args:
            **kwargs: Additional keyword arguments passed to the parent class.
        """
        super().__init__(**kwargs)

    def update_ontology(self, short_name: str, ontology_addendum: RDFGraph):
        """Update an existing ontology with additional triples.

        Args:
            short_name: The short name of the ontology to update.
            ontology_addendum: The RDF graph containing additional triples to add.
        """
        current_idx = next(
            i for i, o in enumerate(self.ontologies) if o.short_name == short_name
        )
        self.ontologies[current_idx] += ontology_addendum

    def get_ontology_names(self) -> list[str]:
        """Get a list of all ontology short names.

        Returns:
            list[str]: List of ontology short names.
        """
        return [o.short_name for o in self.ontologies]

    def get_ontology(self, short_name: str) -> Ontology:
        """Get an ontology by its short name.

        Args:
            short_name: The short name of the ontology to retrieve.

        Returns:
            Ontology: The matching ontology if found, NULL_ONTOLOGY otherwise.
        """
        if short_name in [o.short_name for o in self.ontologies]:
            current_idx = next(
                i for i, o in enumerate(self.ontologies) if o.short_name == short_name
            )
            return self.ontologies[current_idx]
        else:
            return NULL_ONTOLOGY

__init__(**kwargs)

Initialize the ontology manager.

Parameters:

Name Type Description Default
**kwargs

Additional keyword arguments passed to the parent class.

{}
Source code in ontocast/tool/ontology_manager.py
def __init__(self, **kwargs):
    """Initialize the ontology manager.

    Args:
        **kwargs: Additional keyword arguments passed to the parent class.
    """
    super().__init__(**kwargs)

get_ontology(short_name)

Get an ontology by its short name.

Parameters:

Name Type Description Default
short_name str

The short name of the ontology to retrieve.

required

Returns:

Name Type Description
Ontology Ontology

The matching ontology if found, NULL_ONTOLOGY otherwise.

Source code in ontocast/tool/ontology_manager.py
def get_ontology(self, short_name: str) -> Ontology:
    """Get an ontology by its short name.

    Args:
        short_name: The short name of the ontology to retrieve.

    Returns:
        Ontology: The matching ontology if found, NULL_ONTOLOGY otherwise.
    """
    if short_name in [o.short_name for o in self.ontologies]:
        current_idx = next(
            i for i, o in enumerate(self.ontologies) if o.short_name == short_name
        )
        return self.ontologies[current_idx]
    else:
        return NULL_ONTOLOGY

get_ontology_names()

Get a list of all ontology short names.

Returns:

Type Description
list[str]

list[str]: List of ontology short names.

Source code in ontocast/tool/ontology_manager.py
def get_ontology_names(self) -> list[str]:
    """Get a list of all ontology short names.

    Returns:
        list[str]: List of ontology short names.
    """
    return [o.short_name for o in self.ontologies]

update_ontology(short_name, ontology_addendum)

Update an existing ontology with additional triples.

Parameters:

Name Type Description Default
short_name str

The short name of the ontology to update.

required
ontology_addendum RDFGraph

The RDF graph containing additional triples to add.

required
Source code in ontocast/tool/ontology_manager.py
def update_ontology(self, short_name: str, ontology_addendum: RDFGraph):
    """Update an existing ontology with additional triples.

    Args:
        short_name: The short name of the ontology to update.
        ontology_addendum: The RDF graph containing additional triples to add.
    """
    current_idx = next(
        i for i, o in enumerate(self.ontologies) if o.short_name == short_name
    )
    self.ontologies[current_idx] += ontology_addendum

Tool

Bases: BasePydanticModel

Base class for all OntoCast tools.

This class serves as the foundation for all tools in the OntoCast system. It provides common functionality and interface that all tools must implement. Tools should inherit from this class and implement their specific functionality.

Source code in ontocast/tool/onto.py
class Tool(BasePydanticModel):
    """Base class for all OntoCast tools.

    This class serves as the foundation for all tools in the OntoCast system.
    It provides common functionality and interface that all tools must implement.
    Tools should inherit from this class and implement their specific functionality.

    Attributes:
        Inherits all attributes from BasePydanticModel.
    """

    def __init__(self, **kwargs):
        """Initialize the tool.

        Args:
            **kwargs: Keyword arguments passed to the parent class.
        """
        super().__init__(**kwargs)

__init__(**kwargs)

Initialize the tool.

Parameters:

Name Type Description Default
**kwargs

Keyword arguments passed to the parent class.

{}
Source code in ontocast/tool/onto.py
def __init__(self, **kwargs):
    """Initialize the tool.

    Args:
        **kwargs: Keyword arguments passed to the parent class.
    """
    super().__init__(**kwargs)

TripleStoreManager

Bases: Tool

Base class for managing RDF triple stores.

This class defines the interface for triple store management operations, including fetching and storing ontologies and their graphs.

Source code in ontocast/tool/triple_manager.py
class TripleStoreManager(Tool):
    """Base class for managing RDF triple stores.

    This class defines the interface for triple store management operations,
    including fetching and storing ontologies and their graphs.

    """

    def __init__(self, **kwargs):
        """Initialize the triple store manager.

        Args:
            **kwargs: Additional keyword arguments passed to the parent class.
        """
        super().__init__(**kwargs)

    @abc.abstractmethod
    def fetch_ontologies(self) -> list[Ontology]:
        """Fetch all available ontologies.

        Returns:
            list[Ontology]: List of available ontologies.
        """
        return []

    @abc.abstractmethod
    def serialize_ontology(self, o: Ontology, **kwargs):
        """Store an ontology in the triple store.

        Args:
            o: The ontology to store.
            **kwargs: Additional keyword arguments for serialization.
        """
        pass

    @abc.abstractmethod
    def serialize_facts(self, g: Graph, **kwargs):
        """Store a graph with a given name.

        Args:
            g: The graph to store.
            **kwargs: Additional keyword arguments for serialization.
        """
        pass

__init__(**kwargs)

Initialize the triple store manager.

Parameters:

Name Type Description Default
**kwargs

Additional keyword arguments passed to the parent class.

{}
Source code in ontocast/tool/triple_manager.py
def __init__(self, **kwargs):
    """Initialize the triple store manager.

    Args:
        **kwargs: Additional keyword arguments passed to the parent class.
    """
    super().__init__(**kwargs)

fetch_ontologies() abstractmethod

Fetch all available ontologies.

Returns:

Type Description
list[Ontology]

list[Ontology]: List of available ontologies.

Source code in ontocast/tool/triple_manager.py
@abc.abstractmethod
def fetch_ontologies(self) -> list[Ontology]:
    """Fetch all available ontologies.

    Returns:
        list[Ontology]: List of available ontologies.
    """
    return []

serialize_facts(g, **kwargs) abstractmethod

Store a graph with a given name.

Parameters:

Name Type Description Default
g Graph

The graph to store.

required
**kwargs

Additional keyword arguments for serialization.

{}
Source code in ontocast/tool/triple_manager.py
@abc.abstractmethod
def serialize_facts(self, g: Graph, **kwargs):
    """Store a graph with a given name.

    Args:
        g: The graph to store.
        **kwargs: Additional keyword arguments for serialization.
    """
    pass

serialize_ontology(o, **kwargs) abstractmethod

Store an ontology in the triple store.

Parameters:

Name Type Description Default
o Ontology

The ontology to store.

required
**kwargs

Additional keyword arguments for serialization.

{}
Source code in ontocast/tool/triple_manager.py
@abc.abstractmethod
def serialize_ontology(self, o: Ontology, **kwargs):
    """Store an ontology in the triple store.

    Args:
        o: The ontology to store.
        **kwargs: Additional keyword arguments for serialization.
    """
    pass