Convert a single raw input payload on the state into text.
Strict 1-in / 1-out agent node: state.raw_input must contain exactly
one {filename: bytes} entry. Batch fan-out (e.g. JSONL) is the caller's
responsibility and must happen before this node is invoked.
Parameters:
| Name |
Type |
Description |
Default |
state
|
AgentState
|
The current agent state with a single raw input payload.
|
required
|
tools
|
ToolBox
|
The toolbox instance providing utility functions.
|
required
|
Returns:
| Name | Type |
Description |
AgentState |
AgentState
|
Updated state with input_text populated, or with
|
|
AgentState
|
status == Status.FAILED if conversion could not be performed.
|
Source code in ontocast/agent/convert_document.py
| def convert_document(state: AgentState, tools: ToolBox) -> AgentState:
"""Convert a single raw input payload on the state into text.
Strict 1-in / 1-out agent node: ``state.raw_input`` must contain exactly
one ``{filename: bytes}`` entry. Batch fan-out (e.g. JSONL) is the caller's
responsibility and must happen before this node is invoked.
Args:
state: The current agent state with a single raw input payload.
tools: The toolbox instance providing utility functions.
Returns:
AgentState: Updated state with ``input_text`` populated, or with
``status == Status.FAILED`` if conversion could not be performed.
"""
logger.debug("Converting document")
state.status = Status.SUCCESS
raw_input = state.raw_input
if len(raw_input) != 1:
logger.error(
"convert_document expects exactly one raw input entry, received %d",
len(raw_input),
)
state.status = Status.FAILED
return state
filename, file_content = next(iter(raw_input.items()))
file_extension = pathlib.Path(filename).suffix.lower()
logger.debug("Converting %s with extension %s", filename, file_extension)
if file_extension in tools.converter.supported_extensions:
result = tools.converter(file_content)
state.set_text(result["text"])
blocked = _fail_when_fixed_catalog_ontology_missing(state)
return blocked if blocked is not None else state
if file_extension == ".json":
result_json = json.loads(file_content.decode("utf-8"))
json_text = _extract_json_payload_text(result_json, state, filename)
if json_text is None:
state.status = Status.FAILED
return state
state.set_text(json_text)
blocked = _fail_when_fixed_catalog_ontology_missing(state)
return blocked if blocked is not None else state
if file_extension == ".txt":
state.set_text(json.loads(file_content.decode("utf-8")))
blocked = _fail_when_fixed_catalog_ontology_missing(state)
return blocked if blocked is not None else state
logger.error("Unsupported file extension %s for %s", file_extension, filename)
state.status = Status.FAILED
return state
|