mirror of
https://github.com/langgenius/dify.git
synced 2026-04-27 05:58:14 +08:00
Merge remote main and resolve conflicts for summaryindex feature
- Resolved conflicts in 9 task files by adopting session_factory pattern from main - Preserved all summaryindex functionality including enable/disable logic - Updated all task files to use session_factory.create_session() instead of db.session - Merged new features from main (FileService, DocumentBatchDownloadZipPayload, etc.)
This commit is contained in:
@ -5,8 +5,8 @@ import click
|
||||
from celery import shared_task
|
||||
from sqlalchemy import select
|
||||
|
||||
from core.db.session_factory import session_factory
|
||||
from core.rag.index_processor.index_processor_factory import IndexProcessorFactory
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from libs.datetime_utils import naive_utc_now
|
||||
from models.dataset import Document, DocumentSegment
|
||||
@ -25,67 +25,70 @@ def remove_document_from_index_task(document_id: str):
|
||||
logger.info(click.style(f"Start remove document segments from index: {document_id}", fg="green"))
|
||||
start_at = time.perf_counter()
|
||||
|
||||
document = db.session.query(Document).where(Document.id == document_id).first()
|
||||
if not document:
|
||||
logger.info(click.style(f"Document not found: {document_id}", fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
with session_factory.create_session() as session:
|
||||
document = session.query(Document).where(Document.id == document_id).first()
|
||||
if not document:
|
||||
logger.info(click.style(f"Document not found: {document_id}", fg="red"))
|
||||
return
|
||||
|
||||
if document.indexing_status != "completed":
|
||||
logger.info(click.style(f"Document is not completed, remove is not allowed: {document_id}", fg="red"))
|
||||
db.session.close()
|
||||
return
|
||||
if document.indexing_status != "completed":
|
||||
logger.info(click.style(f"Document is not completed, remove is not allowed: {document_id}", fg="red"))
|
||||
return
|
||||
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
|
||||
try:
|
||||
dataset = document.dataset
|
||||
try:
|
||||
dataset = document.dataset
|
||||
|
||||
if not dataset:
|
||||
raise Exception("Document has no dataset")
|
||||
if not dataset:
|
||||
raise Exception("Document has no dataset")
|
||||
|
||||
index_processor = IndexProcessorFactory(document.doc_form).init_index_processor()
|
||||
index_processor = IndexProcessorFactory(document.doc_form).init_index_processor()
|
||||
|
||||
segments = db.session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document.id)).all()
|
||||
segments = session.scalars(select(DocumentSegment).where(DocumentSegment.document_id == document.id)).all()
|
||||
|
||||
# Disable summary indexes for all segments in this document
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
# Disable summary indexes for all segments in this document
|
||||
from services.summary_index_service import SummaryIndexService
|
||||
|
||||
segment_ids_list = [segment.id for segment in segments]
|
||||
if segment_ids_list:
|
||||
try:
|
||||
SummaryIndexService.disable_summaries_for_segments(
|
||||
dataset=dataset,
|
||||
segment_ids=segment_ids_list,
|
||||
disabled_by=document.disabled_by,
|
||||
segment_ids_list = [segment.id for segment in segments]
|
||||
if segment_ids_list:
|
||||
try:
|
||||
SummaryIndexService.disable_summaries_for_segments(
|
||||
dataset=dataset,
|
||||
segment_ids=segment_ids_list,
|
||||
disabled_by=document.disabled_by,
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to disable summaries for document %s: %s", document.id, str(e))
|
||||
|
||||
index_node_ids = [segment.index_node_id for segment in segments]
|
||||
if index_node_ids:
|
||||
try:
|
||||
index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False)
|
||||
except Exception:
|
||||
logger.exception("clean dataset %s from index failed", dataset.id)
|
||||
# update segment to disable
|
||||
session.query(DocumentSegment).where(DocumentSegment.document_id == document.id).update(
|
||||
{
|
||||
DocumentSegment.enabled: False,
|
||||
DocumentSegment.disabled_at: naive_utc_now(),
|
||||
DocumentSegment.disabled_by: document.disabled_by,
|
||||
DocumentSegment.updated_at: naive_utc_now(),
|
||||
}
|
||||
)
|
||||
session.commit()
|
||||
|
||||
end_at = time.perf_counter()
|
||||
logger.info(
|
||||
click.style(
|
||||
f"Document removed from index: {document.id} latency: {end_at - start_at}",
|
||||
fg="green",
|
||||
)
|
||||
except Exception as e:
|
||||
logger.warning("Failed to disable summaries for document %s: %s", document.id, str(e))
|
||||
|
||||
index_node_ids = [segment.index_node_id for segment in segments]
|
||||
if index_node_ids:
|
||||
try:
|
||||
index_processor.clean(dataset, index_node_ids, with_keywords=True, delete_child_chunks=False)
|
||||
except Exception:
|
||||
logger.exception("clean dataset %s from index failed", dataset.id)
|
||||
# update segment to disable
|
||||
db.session.query(DocumentSegment).where(DocumentSegment.document_id == document.id).update(
|
||||
{
|
||||
DocumentSegment.enabled: False,
|
||||
DocumentSegment.disabled_at: naive_utc_now(),
|
||||
DocumentSegment.disabled_by: document.disabled_by,
|
||||
DocumentSegment.updated_at: naive_utc_now(),
|
||||
}
|
||||
)
|
||||
db.session.commit()
|
||||
|
||||
end_at = time.perf_counter()
|
||||
logger.info(click.style(f"Document removed from index: {document.id} latency: {end_at - start_at}", fg="green"))
|
||||
except Exception:
|
||||
logger.exception("remove document from index failed")
|
||||
if not document.archived:
|
||||
document.enabled = True
|
||||
db.session.commit()
|
||||
finally:
|
||||
redis_client.delete(indexing_cache_key)
|
||||
db.session.close()
|
||||
)
|
||||
except Exception:
|
||||
logger.exception("remove document from index failed")
|
||||
if not document.archived:
|
||||
document.enabled = True
|
||||
session.commit()
|
||||
finally:
|
||||
redis_client.delete(indexing_cache_key)
|
||||
|
||||
Reference in New Issue
Block a user