From 2cc206ee859f39e54c8a28ddb818cf7c77102e5b Mon Sep 17 00:00:00 2001 From: Achieve3318 Date: Tue, 12 May 2026 15:53:35 +0800 Subject: [PATCH] Test : aggregation edge cases for list and scalar values (#14170) This PR adds focused unit tests for aggregate_by_field in OceanBase memory utilities to improve behavior coverage for real-world input shapes. - Adds test coverage for list-valued aggregation fields, including whitespace trimming and skipping invalid list entries. - Adds test coverage for scalar field values to ensure blank/non-string values are ignored. - Confirms aggregation output remains correct and stable for mixed-quality message payloads. ### Why this helps It strengthens regression protection for aggregation logic used by memory retrieval flows, with no production code changes and minimal review risk. --- .../memory/utils/test_ob_conn_aggregation.py | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/test/unit_test/memory/utils/test_ob_conn_aggregation.py b/test/unit_test/memory/utils/test_ob_conn_aggregation.py index cf136eb20..a409a5c25 100644 --- a/test/unit_test/memory/utils/test_ob_conn_aggregation.py +++ b/test/unit_test/memory/utils/test_ob_conn_aggregation.py @@ -20,6 +20,8 @@ Tests the pure aggregation logic used by OBConnection.get_aggregation, without requiring a real OceanBase instance or heavy dependencies. """ +import pytest + from memory.utils.aggregation_utils import aggregate_by_field @@ -53,3 +55,24 @@ class TestAggregateByField: ] out = aggregate_by_field(messages, "message_type_kwd") assert set(out) == {("user", 2), ("assistant", 1)} + + @pytest.mark.p2 + def test_aggregates_list_values_and_trims_whitespace(self): + messages = [ + {"id": "m1", "tags_kwd": [" alpha ", "beta", ""]}, + {"id": "m2", "tags_kwd": ["alpha", " beta "]}, + {"id": "m3", "tags_kwd": ["gamma", None, 1]}, + ] + out = aggregate_by_field(messages, "tags_kwd") + assert set(out) == {("alpha", 2), ("beta", 2), ("gamma", 1)} + + @pytest.mark.p2 + def test_ignores_non_string_and_blank_scalar_values(self): + messages = [ + {"id": "m1", "message_type_kwd": " "}, + {"id": "m2", "message_type_kwd": None}, + {"id": "m3", "message_type_kwd": 1}, + {"id": "m4", "message_type_kwd": "assistant"}, + ] + out = aggregate_by_field(messages, "message_type_kwd") + assert out == [("assistant", 1)]