refactor(api): migrate console/service_api.dataset.hit_testing to BaseModel (#36533)

This commit is contained in:
chariri
2026-05-27 15:51:42 +09:00
committed by GitHub
parent a8d380bcaf
commit b034449a0c
13 changed files with 732 additions and 441 deletions

View File

@ -1,7 +1,6 @@
import logging
from typing import Any
from typing import Any, cast
from flask_restx import marshal
from pydantic import BaseModel, Field
from werkzeug.exceptions import Forbidden, InternalServerError, NotFound
@ -19,10 +18,10 @@ from core.errors.error import (
ProviderTokenNotInitError,
QuotaExceededError,
)
from fields.hit_testing_fields import hit_testing_record_fields
from graphon.model_runtime.errors.invoke import InvokeError
from libs.login import current_user
from models.account import Account
from models.dataset import Dataset
from services.dataset_service import DatasetService
from services.entities.knowledge_entities.knowledge_entities import RetrievalModel
from services.hit_testing_service import HitTestingService
@ -38,16 +37,6 @@ class HitTestingPayload(BaseModel):
class DatasetsHitTestingBase:
@staticmethod
def _extract_hit_testing_query(query: Any) -> str:
"""Return the query string from the service response shape."""
if isinstance(query, dict):
content = query.get("content")
if isinstance(content, str):
return content
raise ValueError("Invalid hit testing query response")
@staticmethod
def _prepare_hit_testing_records(records: Any) -> list[dict[str, Any]]:
"""Ensure collection fields match the API schema before response validation."""
@ -63,6 +52,7 @@ class DatasetsHitTestingBase:
segment = normalized_record.get("segment")
if isinstance(segment, dict):
normalized_segment = dict(segment)
normalized_segment.setdefault("sign_content", None)
if normalized_segment.get("keywords") is None:
normalized_segment["keywords"] = []
normalized_record["segment"] = normalized_segment
@ -73,12 +63,15 @@ class DatasetsHitTestingBase:
if normalized_record.get("files") is None:
normalized_record["files"] = []
normalized_record.setdefault("tsne_position", None)
normalized_record.setdefault("summary", None)
normalized_records.append(normalized_record)
return normalized_records
@staticmethod
def get_and_validate_dataset(dataset_id: str):
def get_and_validate_dataset(dataset_id: str) -> Dataset:
assert isinstance(current_user, Account)
dataset = DatasetService.get_dataset(dataset_id)
if dataset is None:
@ -92,33 +85,35 @@ class DatasetsHitTestingBase:
return dataset
@staticmethod
def hit_testing_args_check(args: dict[str, Any]):
def hit_testing_args_check(args: dict[str, Any]) -> None:
HitTestingService.hit_testing_args_check(args)
@staticmethod
def parse_args(payload: dict[str, Any]) -> dict[str, Any]:
def parse_args(payload: dict[str, Any] | None) -> dict[str, Any]:
"""Validate and return hit-testing arguments from an incoming payload."""
hit_testing_payload = HitTestingPayload.model_validate(payload or {})
return hit_testing_payload.model_dump(exclude_none=True)
@staticmethod
def perform_hit_testing(dataset, args):
def perform_hit_testing(dataset: Dataset, args: dict[str, Any]) -> dict[str, Any]:
assert isinstance(current_user, Account)
try:
response = HitTestingService.retrieve(
dataset=dataset,
query=args.get("query"),
query=cast(str, args.get("query")),
account=current_user,
retrieval_model=args.get("retrieval_model"),
external_retrieval_model=args.get("external_retrieval_model"),
external_retrieval_model=cast(dict[str, Any], args.get("external_retrieval_model")),
attachment_ids=args.get("attachment_ids"),
limit=10,
)
query = response.get("query")
if not isinstance(query, dict) or not isinstance(query.get("content"), str):
raise ValueError("Invalid hit testing query response")
return {
"query": DatasetsHitTestingBase._extract_hit_testing_query(response.get("query")),
"records": DatasetsHitTestingBase._prepare_hit_testing_records(
marshal(response.get("records", []), hit_testing_record_fields)
),
"query": {"content": query["content"]},
"records": DatasetsHitTestingBase._prepare_hit_testing_records(response.get("records", [])),
}
except services.errors.index.IndexNotInitializedError:
raise DatasetNotInitializedError()