feat: Optimize codes.

2026-05-04 01:18:05 +08:00 · 2026-01-28 17:09:31 +08:00
parent 6db70ffd9e
commit a300bc5616
7 changed files with 392 additions and 375 deletions
--- a/api/services/dataset_service.py
+++ b/api/services/dataset_service.py
@ -3811,6 +3811,39 @@ class SegmentService:
        )
        return result if isinstance(result, DocumentSegment) else None

+    @classmethod
+    def get_segments_by_document_and_dataset(
+        cls,
+        document_id: str,
+        dataset_id: str,
+        status: str | None = None,
+        enabled: bool | None = None,
+    ) -> Sequence[DocumentSegment]:
+        """
+        Get segments for a document in a dataset with optional filtering.
+
+        Args:
+            document_id: Document ID
+            dataset_id: Dataset ID
+            status: Optional status filter (e.g., "completed")
+            enabled: Optional enabled filter (True/False)
+
+        Returns:
+            Sequence of DocumentSegment instances
+        """
+        query = select(DocumentSegment).where(
+            DocumentSegment.document_id == document_id,
+            DocumentSegment.dataset_id == dataset_id,
+        )
+
+        if status is not None:
+            query = query.where(DocumentSegment.status == status)
+
+        if enabled is not None:
+            query = query.where(DocumentSegment.enabled == enabled)
+
+        return db.session.scalars(query).all()
+

 class DatasetCollectionBindingService:
    @classmethod
--- a/api/services/summary_index_service.py
+++ b/api/services/summary_index_service.py
@ -542,42 +542,42 @@ class SummaryIndexService:
            )
            session.commit()  # Commit initial records

-        summary_records = []
+            summary_records = []

-        for segment in segments:
-            # For parent-child mode, only process parent chunks
-            # In parent-child mode, all DocumentSegments are parent chunks,
-            # so we process all of them. Child chunks are stored in ChildChunk table
-            # and are not DocumentSegments, so they won't be in the segments list.
-            # This check is mainly for clarity and future-proofing.
-            if only_parent_chunks:
-                # In parent-child mode, all segments in the query are parent chunks
-                # Child chunks are not DocumentSegments, so they won't appear here
-                # We can process all segments
-                pass
+            for segment in segments:
+                # For parent-child mode, only process parent chunks
+                # In parent-child mode, all DocumentSegments are parent chunks,
+                # so we process all of them. Child chunks are stored in ChildChunk table
+                # and are not DocumentSegments, so they won't be in the segments list.
+                # This check is mainly for clarity and future-proofing.
+                if only_parent_chunks:
+                    # In parent-child mode, all segments in the query are parent chunks
+                    # Child chunks are not DocumentSegments, so they won't appear here
+                    # We can process all segments
+                    pass

-            try:
-                summary_record = SummaryIndexService.generate_and_vectorize_summary(
-                    segment, dataset, summary_index_setting
-                )
-                summary_records.append(summary_record)
-            except Exception as e:
-                logger.exception("Failed to generate summary for segment %s", segment.id)
-                # Update summary record with error status
-                SummaryIndexService.update_summary_record_error(
-                    segment=segment,
-                    dataset=dataset,
-                    error=str(e),
-                )
-                # Continue with other segments
-                continue
+                try:
+                    summary_record = SummaryIndexService.generate_and_vectorize_summary(
+                        segment, dataset, summary_index_setting
+                    )
+                    summary_records.append(summary_record)
+                except Exception as e:
+                    logger.exception("Failed to generate summary for segment %s", segment.id)
+                    # Update summary record with error status
+                    SummaryIndexService.update_summary_record_error(
+                        segment=segment,
+                        dataset=dataset,
+                        error=str(e),
+                    )
+                    # Continue with other segments
+                    continue

-        logger.info(
-            "Completed summary generation for document %s: %s summaries generated and vectorized",
-            document.id,
-            len(summary_records),
-        )
-        return summary_records
+            logger.info(
+                "Completed summary generation for document %s: %s summaries generated and vectorized",
+                document.id,
+                len(summary_records),
+            )
+            return summary_records

    @staticmethod
    def disable_summaries_for_segments(