mirror of
https://github.com/langgenius/dify.git
synced 2026-05-04 01:18:05 +08:00
feat: Optimize codes.
This commit is contained in:
@ -3811,6 +3811,39 @@ class SegmentService:
|
||||
)
|
||||
return result if isinstance(result, DocumentSegment) else None
|
||||
|
||||
@classmethod
|
||||
def get_segments_by_document_and_dataset(
|
||||
cls,
|
||||
document_id: str,
|
||||
dataset_id: str,
|
||||
status: str | None = None,
|
||||
enabled: bool | None = None,
|
||||
) -> Sequence[DocumentSegment]:
|
||||
"""
|
||||
Get segments for a document in a dataset with optional filtering.
|
||||
|
||||
Args:
|
||||
document_id: Document ID
|
||||
dataset_id: Dataset ID
|
||||
status: Optional status filter (e.g., "completed")
|
||||
enabled: Optional enabled filter (True/False)
|
||||
|
||||
Returns:
|
||||
Sequence of DocumentSegment instances
|
||||
"""
|
||||
query = select(DocumentSegment).where(
|
||||
DocumentSegment.document_id == document_id,
|
||||
DocumentSegment.dataset_id == dataset_id,
|
||||
)
|
||||
|
||||
if status is not None:
|
||||
query = query.where(DocumentSegment.status == status)
|
||||
|
||||
if enabled is not None:
|
||||
query = query.where(DocumentSegment.enabled == enabled)
|
||||
|
||||
return db.session.scalars(query).all()
|
||||
|
||||
|
||||
class DatasetCollectionBindingService:
|
||||
@classmethod
|
||||
|
||||
@ -542,42 +542,42 @@ class SummaryIndexService:
|
||||
)
|
||||
session.commit() # Commit initial records
|
||||
|
||||
summary_records = []
|
||||
summary_records = []
|
||||
|
||||
for segment in segments:
|
||||
# For parent-child mode, only process parent chunks
|
||||
# In parent-child mode, all DocumentSegments are parent chunks,
|
||||
# so we process all of them. Child chunks are stored in ChildChunk table
|
||||
# and are not DocumentSegments, so they won't be in the segments list.
|
||||
# This check is mainly for clarity and future-proofing.
|
||||
if only_parent_chunks:
|
||||
# In parent-child mode, all segments in the query are parent chunks
|
||||
# Child chunks are not DocumentSegments, so they won't appear here
|
||||
# We can process all segments
|
||||
pass
|
||||
for segment in segments:
|
||||
# For parent-child mode, only process parent chunks
|
||||
# In parent-child mode, all DocumentSegments are parent chunks,
|
||||
# so we process all of them. Child chunks are stored in ChildChunk table
|
||||
# and are not DocumentSegments, so they won't be in the segments list.
|
||||
# This check is mainly for clarity and future-proofing.
|
||||
if only_parent_chunks:
|
||||
# In parent-child mode, all segments in the query are parent chunks
|
||||
# Child chunks are not DocumentSegments, so they won't appear here
|
||||
# We can process all segments
|
||||
pass
|
||||
|
||||
try:
|
||||
summary_record = SummaryIndexService.generate_and_vectorize_summary(
|
||||
segment, dataset, summary_index_setting
|
||||
)
|
||||
summary_records.append(summary_record)
|
||||
except Exception as e:
|
||||
logger.exception("Failed to generate summary for segment %s", segment.id)
|
||||
# Update summary record with error status
|
||||
SummaryIndexService.update_summary_record_error(
|
||||
segment=segment,
|
||||
dataset=dataset,
|
||||
error=str(e),
|
||||
)
|
||||
# Continue with other segments
|
||||
continue
|
||||
try:
|
||||
summary_record = SummaryIndexService.generate_and_vectorize_summary(
|
||||
segment, dataset, summary_index_setting
|
||||
)
|
||||
summary_records.append(summary_record)
|
||||
except Exception as e:
|
||||
logger.exception("Failed to generate summary for segment %s", segment.id)
|
||||
# Update summary record with error status
|
||||
SummaryIndexService.update_summary_record_error(
|
||||
segment=segment,
|
||||
dataset=dataset,
|
||||
error=str(e),
|
||||
)
|
||||
# Continue with other segments
|
||||
continue
|
||||
|
||||
logger.info(
|
||||
"Completed summary generation for document %s: %s summaries generated and vectorized",
|
||||
document.id,
|
||||
len(summary_records),
|
||||
)
|
||||
return summary_records
|
||||
logger.info(
|
||||
"Completed summary generation for document %s: %s summaries generated and vectorized",
|
||||
document.id,
|
||||
len(summary_records),
|
||||
)
|
||||
return summary_records
|
||||
|
||||
@staticmethod
|
||||
def disable_summaries_for_segments(
|
||||
|
||||
Reference in New Issue
Block a user