fix: fix cannot extract elements from a scalar (#36769)

This commit is contained in:
wangxiaolei
2026-05-28 15:31:36 +08:00
committed by fatelei
parent 80cd289e87
commit 2fe8dbd7ca
2 changed files with 52 additions and 5 deletions

View File

@ -3,7 +3,7 @@ import uuid
from flask import request
from flask_restx import Resource, marshal
from pydantic import BaseModel, Field
from sqlalchemy import String, cast, func, or_, select
from sqlalchemy import String, case, cast, func, literal, or_, select
from sqlalchemy.dialects.postgresql import JSONB
from werkzeug.exceptions import Forbidden, NotFound
@ -159,12 +159,17 @@ class DatasetDocumentSegmentListApi(Resource):
# Use database-specific methods for JSON array search
if dify_config.SQLALCHEMY_DATABASE_URI_SCHEME == "postgresql":
# PostgreSQL: Use jsonb_array_elements_text to properly handle Unicode/Chinese text
# Guard with jsonb_typeof to avoid "cannot extract elements from a scalar" error
# when keywords is null or a non-array JSON value.
# Feed the set-returning function a JSON array in every row. Filtering in
# the subquery is not enough because PostgreSQL can still evaluate the
# SRF on scalar JSON before applying the predicate.
keywords_jsonb = cast(DocumentSegment.keywords, JSONB)
keywords_array = case(
(func.jsonb_typeof(keywords_jsonb) == "array", keywords_jsonb),
else_=cast(literal("[]"), JSONB),
)
keywords_condition = func.array_to_string(
func.array(
select(func.jsonb_array_elements_text(cast(DocumentSegment.keywords, JSONB)))
.where(func.jsonb_typeof(cast(DocumentSegment.keywords, JSONB)) == "array")
select(func.jsonb_array_elements_text(keywords_array))
.correlate(DocumentSegment)
.scalar_subquery()
),

View File

@ -1036,6 +1036,48 @@ class TestSegmentListAdvancedCases:
assert status == 200
assert response["total"] == 1
def test_segment_list_postgres_keyword_filter_handles_scalar_keywords(self, app: Flask):
api = DatasetDocumentSegmentListApi()
method = unwrap(api.get)
dataset = MagicMock()
document = MagicMock()
pagination = MagicMock(items=[], total=0, pages=0)
with (
app.test_request_context("/?keyword=test"),
patch(
"controllers.console.datasets.datasets_segments.current_account_with_tenant",
return_value=(MagicMock(), "11111111-1111-1111-1111-111111111111"),
),
patch(
"controllers.console.datasets.datasets_segments.DatasetService.get_dataset",
return_value=dataset,
),
patch(
"controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_segments.DocumentService.get_document",
return_value=document,
),
patch(
"controllers.console.datasets.datasets_segments.dify_config",
SimpleNamespace(SQLALCHEMY_DATABASE_URI_SCHEME="postgresql"),
),
patch(
"controllers.console.datasets.datasets_segments.db.paginate",
return_value=pagination,
) as paginate_mock,
):
method(api, "22222222-2222-2222-2222-222222222222", "33333333-3333-3333-3333-333333333333")
query = paginate_mock.call_args.kwargs["select"]
sql = str(query.compile(compile_kwargs={"literal_binds": True}))
assert "jsonb_array_elements_text(CASE" in sql
assert "ELSE CAST('[]' AS JSONB)" in sql
def test_segment_list_permission_denied(self, app: Flask):
"""Test segment list with permission denied"""
api = DatasetDocumentSegmentListApi()