Test : aggregation edge cases for list and scalar values (#14170)

This PR adds focused unit tests for aggregate_by_field in OceanBase memory utilities to improve behavior coverage for real-world input shapes. - Adds test coverage for list-valued aggregation fields, including whitespace trimming and skipping invalid list entries. - Adds test coverage for scalar field values to ensure blank/non-string values are ignored. - Confirms aggregation output remains correct and stable for mixed-quality message payloads. ### Why this helps It strengthens regression protection for aggregation logic used by memory retrieval flows, with no production code changes and minimal review risk.
2026-05-26 02:37:21 +08:00 · 2026-05-12 15:53:35 +08:00
parent f85e18afbc
commit 2cc206ee85
1 changed files with 23 additions and 0 deletions
--- a/test/unit_test/memory/utils/test_ob_conn_aggregation.py
+++ b/test/unit_test/memory/utils/test_ob_conn_aggregation.py
@ -20,6 +20,8 @@ Tests the pure aggregation logic used by OBConnection.get_aggregation,
 without requiring a real OceanBase instance or heavy dependencies.
 """

+import pytest
+
 from memory.utils.aggregation_utils import aggregate_by_field


@ -53,3 +55,24 @@ class TestAggregateByField:
        ]
        out = aggregate_by_field(messages, "message_type_kwd")
        assert set(out) == {("user", 2), ("assistant", 1)}
+
+    @pytest.mark.p2
+    def test_aggregates_list_values_and_trims_whitespace(self):
+        messages = [
+            {"id": "m1", "tags_kwd": [" alpha ", "beta", ""]},
+            {"id": "m2", "tags_kwd": ["alpha", " beta "]},
+            {"id": "m3", "tags_kwd": ["gamma", None, 1]},
+        ]
+        out = aggregate_by_field(messages, "tags_kwd")
+        assert set(out) == {("alpha", 2), ("beta", 2), ("gamma", 1)}
+
+    @pytest.mark.p2
+    def test_ignores_non_string_and_blank_scalar_values(self):
+        messages = [
+            {"id": "m1", "message_type_kwd": "  "},
+            {"id": "m2", "message_type_kwd": None},
+            {"id": "m3", "message_type_kwd": 1},
+            {"id": "m4", "message_type_kwd": "assistant"},
+        ]
+        out = aggregate_by_field(messages, "message_type_kwd")
+        assert out == [("assistant", 1)]