feat: allow fail fast (#30262)

2026-05-05 01:48:04 +08:00 · 2025-12-30 09:27:40 +08:00
parent 5338cf85b1
commit 30dd50ff83
3 changed files with 102 additions and 37 deletions
--- a/api/core/rag/retrieval/dataset_retrieval.py
+++ b/api/core/rag/retrieval/dataset_retrieval.py
@ -516,6 +516,9 @@ class DatasetRetrieval:
                    ].embedding_model_provider
                    weights["vector_setting"]["embedding_model_name"] = available_datasets[0].embedding_model
        with measure_time() as timer:
+            cancel_event = threading.Event()
+            thread_exceptions: list[Exception] = []
+
            if query:
                query_thread = threading.Thread(
                    target=self._multiple_retrieve_thread,
@ -534,6 +537,8 @@ class DatasetRetrieval:
                        "score_threshold": score_threshold,
                        "query": query,
                        "attachment_id": None,
+                        "cancel_event": cancel_event,
+                        "thread_exceptions": thread_exceptions,
                    },
                )
                all_threads.append(query_thread)
@ -557,12 +562,25 @@ class DatasetRetrieval:
                            "score_threshold": score_threshold,
                            "query": None,
                            "attachment_id": attachment_id,
+                            "cancel_event": cancel_event,
+                            "thread_exceptions": thread_exceptions,
                        },
                    )
                    all_threads.append(attachment_thread)
                    attachment_thread.start()
-            for thread in all_threads:
-                thread.join()
+
+            # Poll threads with short timeout to detect errors quickly (fail-fast)
+            while any(t.is_alive() for t in all_threads):
+                for thread in all_threads:
+                    thread.join(timeout=0.1)
+                    if thread_exceptions:
+                        cancel_event.set()
+                        break
+                if thread_exceptions:
+                    break
+
+            if thread_exceptions:
+                raise thread_exceptions[0]
        self._on_query(query, attachment_ids, dataset_ids, app_id, user_from, user_id)

        if all_documents:
@ -1404,40 +1422,53 @@ class DatasetRetrieval:
        score_threshold: float,
        query: str | None,
        attachment_id: str | None,
+        cancel_event: threading.Event | None = None,
+        thread_exceptions: list[Exception] | None = None,
    ):
-        with flask_app.app_context():
-            threads = []
-            all_documents_item: list[Document] = []
-            index_type = None
-            for dataset in available_datasets:
-                index_type = dataset.indexing_technique
-                document_ids_filter = None
-                if dataset.provider != "external":
-                    if metadata_condition and not metadata_filter_document_ids:
-                        continue
-                    if metadata_filter_document_ids:
-                        document_ids = metadata_filter_document_ids.get(dataset.id, [])
-                        if document_ids:
-                            document_ids_filter = document_ids
-                        else:
+        try:
+            with flask_app.app_context():
+                threads = []
+                all_documents_item: list[Document] = []
+                index_type = None
+                for dataset in available_datasets:
+                    # Check for cancellation signal
+                    if cancel_event and cancel_event.is_set():
+                        break
+                    index_type = dataset.indexing_technique
+                    document_ids_filter = None
+                    if dataset.provider != "external":
+                        if metadata_condition and not metadata_filter_document_ids:
                            continue
-                retrieval_thread = threading.Thread(
-                    target=self._retriever,
-                    kwargs={
-                        "flask_app": flask_app,
-                        "dataset_id": dataset.id,
-                        "query": query,
-                        "top_k": top_k,
-                        "all_documents": all_documents_item,
-                        "document_ids_filter": document_ids_filter,
-                        "metadata_condition": metadata_condition,
-                        "attachment_ids": [attachment_id] if attachment_id else None,
-                    },
-                )
-                threads.append(retrieval_thread)
-                retrieval_thread.start()
-            for thread in threads:
-                thread.join()
+                        if metadata_filter_document_ids:
+                            document_ids = metadata_filter_document_ids.get(dataset.id, [])
+                            if document_ids:
+                                document_ids_filter = document_ids
+                            else:
+                                continue
+                    retrieval_thread = threading.Thread(
+                        target=self._retriever,
+                        kwargs={
+                            "flask_app": flask_app,
+                            "dataset_id": dataset.id,
+                            "query": query,
+                            "top_k": top_k,
+                            "all_documents": all_documents_item,
+                            "document_ids_filter": document_ids_filter,
+                            "metadata_condition": metadata_condition,
+                            "attachment_ids": [attachment_id] if attachment_id else None,
+                        },
+                    )
+                    threads.append(retrieval_thread)
+                    retrieval_thread.start()
+
+                # Poll threads with short timeout to respond quickly to cancellation
+                while any(t.is_alive() for t in threads):
+                    for thread in threads:
+                        thread.join(timeout=0.1)
+                        if cancel_event and cancel_event.is_set():
+                            break
+                    if cancel_event and cancel_event.is_set():
+                        break

            if reranking_enable:
                # do rerank for searched documents
@ -1470,3 +1501,8 @@ class DatasetRetrieval:
                    all_documents_item = all_documents_item[:top_k] if top_k else all_documents_item
            if all_documents_item:
                all_documents.extend(all_documents_item)
+        except Exception as e:
+            if cancel_event:
+                cancel_event.set()
+            if thread_exceptions is not None:
+                thread_exceptions.append(e)