From 1ef11e1849681392dfdcd26c9e066005e9858670 Mon Sep 17 00:00:00 2001
From: FFXN <lizy@dify.ai>
Date: Wed, 28 Jan 2026 19:03:01 +0800
Subject: [PATCH] fix: fix session problem.

---
 api/services/summary_index_service.py | 12 +++++++++++-
 api/tasks/document_indexing_task.py   | 13 ++++++++++---
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/api/services/summary_index_service.py b/api/services/summary_index_service.py
index 9274ea3ad9..a4c40ad22a 100644
--- a/api/services/summary_index_service.py
+++ b/api/services/summary_index_service.py
@@ -219,10 +219,13 @@ class SummaryIndexService:
                         session.query(DocumentSegmentSummary).filter_by(id=summary_record_id).first()
                     )
                     if summary_record_in_session:
+                        # Update all fields including summary_content (in case it was updated in outer session)
                         summary_record_in_session.summary_index_node_id = summary_index_node_id
                         summary_record_in_session.summary_index_node_hash = summary_hash
                         summary_record_in_session.tokens = embedding_tokens  # Save embedding tokens
                         summary_record_in_session.status = "completed"
+                        # Ensure summary_content is preserved (use the latest from summary_record parameter)
+                        summary_record_in_session.summary_content = summary_content
                         # Explicitly update updated_at to ensure it's refreshed even if other fields haven't changed
                         summary_record_in_session.updated_at = datetime.now(UTC).replace(tzinfo=None)
                         session.add(summary_record_in_session)
@@ -232,6 +235,7 @@ class SummaryIndexService:
                         summary_record.summary_index_node_hash = summary_hash
                         summary_record.tokens = embedding_tokens
                         summary_record.status = "completed"
+                        summary_record.summary_content = summary_content
                         summary_record.updated_at = summary_record_in_session.updated_at
                 # Success, exit function
                 return
@@ -428,6 +432,9 @@ class SummaryIndexService:
 
                 # Update summary content
                 summary_record_in_session.summary_content = summary_content
+                session.add(summary_record_in_session)
+                # Flush to ensure summary_content is saved before vectorize_summary queries it
+                session.flush()
 
                 # Log LLM usage for summary generation
                 if llm_usage and llm_usage.total_tokens > 0:
@@ -442,6 +449,7 @@ class SummaryIndexService:
                 # Vectorize summary (will delete old vector if exists before creating new one)
                 # Pass the session-managed record to vectorize_summary
                 # vectorize_summary will update status to "completed" and tokens in its own session
+                # vectorize_summary will also ensure summary_content is preserved
                 SummaryIndexService.vectorize_summary(summary_record_in_session, segment, dataset)
 
                 # Refresh the object from database to get the updated status and tokens from vectorize_summary
@@ -834,7 +842,8 @@ class SummaryIndexService:
                     summary_record.status = "generating"
                     summary_record.error = None  # Clear any previous errors
                     session.add(summary_record)
-                    # Don't flush here - wait until after vectorization succeeds
+                    # Flush to ensure summary_content is saved before vectorize_summary queries it
+                    session.flush()
 
                     # Delete old vector if exists (before vectorization)
                     if old_summary_node_id:
@@ -849,6 +858,7 @@ class SummaryIndexService:
                             )
 
                     # Re-vectorize summary (this will update status to "completed" and tokens in its own session)
+                    # vectorize_summary will also ensure summary_content is preserved
                     try:
                         SummaryIndexService.vectorize_summary(summary_record, segment, dataset)
                         # Refresh the object from database to get the updated status and tokens from vectorize_summary
diff --git a/api/tasks/document_indexing_task.py b/api/tasks/document_indexing_task.py
index 39b84d62b8..34496e9c6f 100644
--- a/api/tasks/document_indexing_task.py
+++ b/api/tasks/document_indexing_task.py
@@ -124,12 +124,17 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
                         )
                         if document:
                             logger.info(
-                                "Checking document %s for summary generation: status=%s, doc_form=%s",
+                                "Checking document %s for summary generation: status=%s, doc_form=%s, need_summary=%s",
                                 document_id,
                                 document.indexing_status,
                                 document.doc_form,
+                                document.need_summary,
                             )
-                            if document.indexing_status == "completed" and document.doc_form != "qa_model":
+                            if (
+                                document.indexing_status == "completed"
+                                and document.doc_form != "qa_model"
+                                and document.need_summary is True
+                            ):
                                 try:
                                     generate_summary_index_task.delay(dataset.id, document_id, None)
                                     logger.info(
@@ -146,10 +151,12 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]):
                                     # Don't fail the entire indexing process if summary task queuing fails
                             else:
                                 logger.info(
-                                    "Skipping summary generation for document %s: status=%s, doc_form=%s",
+                                    "Skipping summary generation for document %s: "
+                                    "status=%s, doc_form=%s, need_summary=%s",
                                     document_id,
                                     document.indexing_status,
                                     document.doc_form,
+                                    document.need_summary,
                                 )
                         else:
                             logger.warning("Document %s not found after indexing", document_id)