mirror of
https://github.com/langgenius/dify.git
synced 2026-04-21 03:07:39 +08:00
fix(rag): include is_summary and original_chunk_id in default vector projection (#34950)
Co-authored-by: VFootball Dev <vfootball@example.com>
This commit is contained in:
@ -41,7 +41,23 @@ class AbstractVectorFactory(ABC):
|
||||
class Vector:
|
||||
def __init__(self, dataset: Dataset, attributes: list | None = None):
|
||||
if attributes is None:
|
||||
attributes = ["doc_id", "dataset_id", "document_id", "doc_hash", "doc_type"]
|
||||
# `is_summary` and `original_chunk_id` are stored on summary vectors
|
||||
# by `SummaryIndexService` and read back by `RetrievalService` to
|
||||
# route summary hits through their original parent chunks. They
|
||||
# must be listed here so vector backends that use this list as an
|
||||
# explicit return-properties projection (notably Weaviate) actually
|
||||
# return those fields; without them, summary hits silently
|
||||
# collapse into `is_summary = False` branches and the summary
|
||||
# retrieval path is a no-op. See #34884.
|
||||
attributes = [
|
||||
"doc_id",
|
||||
"dataset_id",
|
||||
"document_id",
|
||||
"doc_hash",
|
||||
"doc_type",
|
||||
"is_summary",
|
||||
"original_chunk_id",
|
||||
]
|
||||
self._dataset = dataset
|
||||
self._embeddings = self._get_embeddings()
|
||||
self._attributes = attributes
|
||||
|
||||
@ -121,7 +121,18 @@ def test_vector_init_uses_default_and_custom_attributes(vector_factory_module):
|
||||
default_vector = vector_factory_module.Vector(dataset)
|
||||
custom_vector = vector_factory_module.Vector(dataset, attributes=["doc_id"])
|
||||
|
||||
assert default_vector._attributes == ["doc_id", "dataset_id", "document_id", "doc_hash", "doc_type"]
|
||||
# `is_summary` and `original_chunk_id` must be in the default return-properties
|
||||
# projection so summary index retrieval works on backends that honor the list
|
||||
# as an explicit projection (e.g. Weaviate). See #34884.
|
||||
assert default_vector._attributes == [
|
||||
"doc_id",
|
||||
"dataset_id",
|
||||
"document_id",
|
||||
"doc_hash",
|
||||
"doc_type",
|
||||
"is_summary",
|
||||
"original_chunk_id",
|
||||
]
|
||||
assert custom_vector._attributes == ["doc_id"]
|
||||
assert default_vector._embeddings == "embeddings"
|
||||
assert default_vector._vector_processor == "processor"
|
||||
|
||||
Reference in New Issue
Block a user