Files
dify/api/graphon/utils/json_in_md_parser.py

59 lines
1.9 KiB
Python

from __future__ import annotations
import json
class OutputParserError(ValueError):
"""Raised when a markdown-wrapped JSON payload cannot be parsed or validated."""
def parse_json_markdown(json_string: str) -> dict | list:
"""Extract and parse the first JSON object or array embedded in markdown text."""
json_string = json_string.strip()
starts = ["```json", "```", "``", "`", "{", "["]
ends = ["```", "``", "`", "}", "]"]
end_index = -1
start_index = 0
for start_marker in starts:
start_index = json_string.find(start_marker)
if start_index != -1:
if json_string[start_index] not in ("{", "["):
start_index += len(start_marker)
break
if start_index != -1:
for end_marker in ends:
end_index = json_string.rfind(end_marker, start_index)
if end_index != -1:
if json_string[end_index] in ("}", "]"):
end_index += 1
break
if start_index == -1 or end_index == -1 or start_index >= end_index:
raise ValueError("could not find json block in the output.")
extracted_content = json_string[start_index:end_index].strip()
return json.loads(extracted_content)
def parse_and_check_json_markdown(text: str, expected_keys: list[str]) -> dict:
try:
json_obj = parse_json_markdown(text)
except json.JSONDecodeError as exc:
raise OutputParserError(f"got invalid json object. error: {exc}") from exc
if isinstance(json_obj, list):
if len(json_obj) == 1 and isinstance(json_obj[0], dict):
json_obj = json_obj[0]
else:
raise OutputParserError(f"got invalid return object. obj:{json_obj}")
for key in expected_keys:
if key not in json_obj:
raise OutputParserError(
f"got invalid return object. expected key `{key}` to be present, but got {json_obj}"
)
return json_obj