Compare commits

...

3 Commits

Author SHA1 Message Date
2fe8dbd7ca fix: fix cannot extract elements from a scalar (#36769) 2026-05-28 15:50:27 +08:00
80cd289e87 fix: replace .distinct() with .group_by(Conversation.id) for PostgreSQL JSON compatibility (#36610)
Co-authored-by: cocoon <kuishou68@users.noreply.github.com>
Co-authored-by: Asuka Minato <i@asukaminato.eu.org>
Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
(cherry picked from commit e617435d03)
2026-05-28 13:19:27 +08:00
a14bc8a371 fix: fix DocumentSegment.keywords can not a valid json (#36715) 2026-05-27 17:11:06 +08:00
3 changed files with 54 additions and 4 deletions

View File

@ -137,7 +137,7 @@ class CompletionConversationApi(Resource):
.join( # type: ignore
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
)
.distinct()
.group_by(Conversation.id)
)
elif args.annotation_status == "not_annotated":
query = (
@ -275,7 +275,7 @@ class ChatConversationApi(Resource):
.join( # type: ignore
MessageAnnotation, MessageAnnotation.conversation_id == Conversation.id
)
.distinct()
.group_by(Conversation.id)
)
case "not_annotated":
query = (

View File

@ -3,7 +3,7 @@ import uuid
from flask import request
from flask_restx import Resource, marshal
from pydantic import BaseModel, Field
from sqlalchemy import String, cast, func, or_, select
from sqlalchemy import String, case, cast, func, literal, or_, select
from sqlalchemy.dialects.postgresql import JSONB
from werkzeug.exceptions import Forbidden, NotFound
@ -159,9 +159,17 @@ class DatasetDocumentSegmentListApi(Resource):
# Use database-specific methods for JSON array search
if dify_config.SQLALCHEMY_DATABASE_URI_SCHEME == "postgresql":
# PostgreSQL: Use jsonb_array_elements_text to properly handle Unicode/Chinese text
# Feed the set-returning function a JSON array in every row. Filtering in
# the subquery is not enough because PostgreSQL can still evaluate the
# SRF on scalar JSON before applying the predicate.
keywords_jsonb = cast(DocumentSegment.keywords, JSONB)
keywords_array = case(
(func.jsonb_typeof(keywords_jsonb) == "array", keywords_jsonb),
else_=cast(literal("[]"), JSONB),
)
keywords_condition = func.array_to_string(
func.array(
select(func.jsonb_array_elements_text(cast(DocumentSegment.keywords, JSONB)))
select(func.jsonb_array_elements_text(keywords_array))
.correlate(DocumentSegment)
.scalar_subquery()
),

View File

@ -1036,6 +1036,48 @@ class TestSegmentListAdvancedCases:
assert status == 200
assert response["total"] == 1
def test_segment_list_postgres_keyword_filter_handles_scalar_keywords(self, app: Flask):
api = DatasetDocumentSegmentListApi()
method = unwrap(api.get)
dataset = MagicMock()
document = MagicMock()
pagination = MagicMock(items=[], total=0, pages=0)
with (
app.test_request_context("/?keyword=test"),
patch(
"controllers.console.datasets.datasets_segments.current_account_with_tenant",
return_value=(MagicMock(), "11111111-1111-1111-1111-111111111111"),
),
patch(
"controllers.console.datasets.datasets_segments.DatasetService.get_dataset",
return_value=dataset,
),
patch(
"controllers.console.datasets.datasets_segments.DatasetService.check_dataset_permission",
return_value=None,
),
patch(
"controllers.console.datasets.datasets_segments.DocumentService.get_document",
return_value=document,
),
patch(
"controllers.console.datasets.datasets_segments.dify_config",
SimpleNamespace(SQLALCHEMY_DATABASE_URI_SCHEME="postgresql"),
),
patch(
"controllers.console.datasets.datasets_segments.db.paginate",
return_value=pagination,
) as paginate_mock,
):
method(api, "22222222-2222-2222-2222-222222222222", "33333333-3333-3333-3333-333333333333")
query = paginate_mock.call_args.kwargs["select"]
sql = str(query.compile(compile_kwargs={"literal_binds": True}))
assert "jsonb_array_elements_text(CASE" in sql
assert "ELSE CAST('[]' AS JSONB)" in sql
def test_segment_list_permission_denied(self, app: Flask):
"""Test segment list with permission denied"""
api = DatasetDocumentSegmentListApi()