Skip to content

ontocast.tool.representation_text

Shared text normalization and deterministic triple rendering helpers.

normalize_identifier(text)

Normalize identifier-like text with camel/snake/kebab splitting.

Source code in ontocast/tool/representation_text.py
def normalize_identifier(text: str) -> str:
    """Normalize identifier-like text with camel/snake/kebab splitting."""
    with_boundaries = re.sub(r"([a-z])([A-Z])", r"\1 \2", text)
    with_boundaries = re.sub(r"([A-Z]+)([A-Z][a-z])", r"\1 \2", with_boundaries)
    return normalize_text(with_boundaries)

normalize_text(text)

Normalize free text for embedding and matching.

Source code in ontocast/tool/representation_text.py
def normalize_text(text: str) -> str:
    """Normalize free text for embedding and matching."""
    text_no_diacritics = "".join(
        char
        for char in unicodedata.normalize("NFD", text)
        if unicodedata.category(char) != "Mn"
    )
    normalized = text_no_diacritics.replace("_", " ").replace("-", " ").strip().lower()
    return re.sub(r"\s+", " ", normalized)

normalize_uri_local_name(uri)

Normalize the local part of a URI.

Source code in ontocast/tool/representation_text.py
def normalize_uri_local_name(uri: URIRef) -> str:
    """Normalize the local part of a URI."""
    _, local = split_namespace_local(str(uri))
    return normalize_identifier(local)

render_term_for_text(term)

Render a graph term into deterministic text.

Source code in ontocast/tool/representation_text.py
def render_term_for_text(term: Node) -> str:
    """Render a graph term into deterministic text."""
    if isinstance(term, URIRef):
        return normalize_uri_local_name(term)
    if isinstance(term, Literal):
        return normalize_text(str(term))
    if isinstance(term, BNode):
        return "blank node"
    return normalize_text(str(term))

role_from_predicate_usage(*, is_predicate)

Map predicate-position usage to vector-store role vocabulary.

Source code in ontocast/tool/representation_text.py
def role_from_predicate_usage(*, is_predicate: bool) -> str:
    """Map predicate-position usage to vector-store role vocabulary."""
    return ROLE_PREDICATE if is_predicate else ROLE_RESOURCE

stable_sorted_triples(triples)

Return a deterministic ordering of triples.

Source code in ontocast/tool/representation_text.py
def stable_sorted_triples(
    triples: list[tuple[Node, Node, Node]],
) -> list[tuple[Node, Node, Node]]:
    """Return a deterministic ordering of triples."""
    return sorted(triples, key=lambda triple: str(triple))