From 1ef11e1849681392dfdcd26c9e066005e9858670 Mon Sep 17 00:00:00 2001 From: FFXN Date: Wed, 28 Jan 2026 19:03:01 +0800 Subject: [PATCH] fix: fix session problem. --- api/services/summary_index_service.py | 12 +++++++++++- api/tasks/document_indexing_task.py | 13 ++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/api/services/summary_index_service.py b/api/services/summary_index_service.py index 9274ea3ad9..a4c40ad22a 100644 --- a/api/services/summary_index_service.py +++ b/api/services/summary_index_service.py @@ -219,10 +219,13 @@ class SummaryIndexService: session.query(DocumentSegmentSummary).filter_by(id=summary_record_id).first() ) if summary_record_in_session: + # Update all fields including summary_content (in case it was updated in outer session) summary_record_in_session.summary_index_node_id = summary_index_node_id summary_record_in_session.summary_index_node_hash = summary_hash summary_record_in_session.tokens = embedding_tokens # Save embedding tokens summary_record_in_session.status = "completed" + # Ensure summary_content is preserved (use the latest from summary_record parameter) + summary_record_in_session.summary_content = summary_content # Explicitly update updated_at to ensure it's refreshed even if other fields haven't changed summary_record_in_session.updated_at = datetime.now(UTC).replace(tzinfo=None) session.add(summary_record_in_session) @@ -232,6 +235,7 @@ class SummaryIndexService: summary_record.summary_index_node_hash = summary_hash summary_record.tokens = embedding_tokens summary_record.status = "completed" + summary_record.summary_content = summary_content summary_record.updated_at = summary_record_in_session.updated_at # Success, exit function return @@ -428,6 +432,9 @@ class SummaryIndexService: # Update summary content summary_record_in_session.summary_content = summary_content + session.add(summary_record_in_session) + # Flush to ensure summary_content is saved before vectorize_summary queries it + session.flush() # Log LLM usage for summary generation if llm_usage and llm_usage.total_tokens > 0: @@ -442,6 +449,7 @@ class SummaryIndexService: # Vectorize summary (will delete old vector if exists before creating new one) # Pass the session-managed record to vectorize_summary # vectorize_summary will update status to "completed" and tokens in its own session + # vectorize_summary will also ensure summary_content is preserved SummaryIndexService.vectorize_summary(summary_record_in_session, segment, dataset) # Refresh the object from database to get the updated status and tokens from vectorize_summary @@ -834,7 +842,8 @@ class SummaryIndexService: summary_record.status = "generating" summary_record.error = None # Clear any previous errors session.add(summary_record) - # Don't flush here - wait until after vectorization succeeds + # Flush to ensure summary_content is saved before vectorize_summary queries it + session.flush() # Delete old vector if exists (before vectorization) if old_summary_node_id: @@ -849,6 +858,7 @@ class SummaryIndexService: ) # Re-vectorize summary (this will update status to "completed" and tokens in its own session) + # vectorize_summary will also ensure summary_content is preserved try: SummaryIndexService.vectorize_summary(summary_record, segment, dataset) # Refresh the object from database to get the updated status and tokens from vectorize_summary diff --git a/api/tasks/document_indexing_task.py b/api/tasks/document_indexing_task.py index 39b84d62b8..34496e9c6f 100644 --- a/api/tasks/document_indexing_task.py +++ b/api/tasks/document_indexing_task.py @@ -124,12 +124,17 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]): ) if document: logger.info( - "Checking document %s for summary generation: status=%s, doc_form=%s", + "Checking document %s for summary generation: status=%s, doc_form=%s, need_summary=%s", document_id, document.indexing_status, document.doc_form, + document.need_summary, ) - if document.indexing_status == "completed" and document.doc_form != "qa_model": + if ( + document.indexing_status == "completed" + and document.doc_form != "qa_model" + and document.need_summary is True + ): try: generate_summary_index_task.delay(dataset.id, document_id, None) logger.info( @@ -146,10 +151,12 @@ def _document_indexing(dataset_id: str, document_ids: Sequence[str]): # Don't fail the entire indexing process if summary task queuing fails else: logger.info( - "Skipping summary generation for document %s: status=%s, doc_form=%s", + "Skipping summary generation for document %s: " + "status=%s, doc_form=%s, need_summary=%s", document_id, document.indexing_status, document.doc_form, + document.need_summary, ) else: logger.warning("Document %s not found after indexing", document_id)