Merge branch 'main' into feat/rag-2

This commit is contained in:
twwu
2025-08-18 11:16:18 +08:00
99 changed files with 3421 additions and 1810 deletions

View File

@ -1,5 +1,7 @@
import json
from collections.abc import Iterable, Sequence
from typing import Any
import orjson
from .segment_group import SegmentGroup
from .segments import ArrayFileSegment, FileSegment, Segment
@ -12,15 +14,20 @@ def to_selector(node_id: str, name: str, paths: Iterable[str] = ()) -> Sequence[
return selectors
class SegmentJSONEncoder(json.JSONEncoder):
def default(self, o):
if isinstance(o, ArrayFileSegment):
return [v.model_dump() for v in o.value]
elif isinstance(o, FileSegment):
return o.value.model_dump()
elif isinstance(o, SegmentGroup):
return [self.default(seg) for seg in o.value]
elif isinstance(o, Segment):
return o.value
else:
super().default(o)
def segment_orjson_default(o: Any) -> Any:
"""Default function for orjson serialization of Segment types"""
if isinstance(o, ArrayFileSegment):
return [v.model_dump() for v in o.value]
elif isinstance(o, FileSegment):
return o.value.model_dump()
elif isinstance(o, SegmentGroup):
return [segment_orjson_default(seg) for seg in o.value]
elif isinstance(o, Segment):
return o.value
raise TypeError(f"Object of type {type(o).__name__} is not JSON serializable")
def dumps_with_segments(obj: Any, ensure_ascii: bool = False) -> str:
"""JSON dumps with segment support using orjson"""
option = orjson.OPT_NON_STR_KEYS
return orjson.dumps(obj, default=segment_orjson_default, option=option).decode("utf-8")