Merge remote-tracking branch 'upstream/main' into feat/rag-2

This commit is contained in:
QuantumGhost
2025-09-16 14:59:35 +08:00
791 changed files with 24372 additions and 7085 deletions

View File

@ -5,7 +5,7 @@ import re
import threading
import time
import uuid
from typing import Any, Optional
from typing import Any
from flask import current_app
from sqlalchemy import select
@ -230,9 +230,9 @@ class IndexingRunner:
tenant_id: str,
extract_settings: list[ExtractSetting],
tmp_processing_rule: dict,
doc_form: Optional[str] = None,
doc_form: str | None = None,
doc_language: str = "English",
dataset_id: Optional[str] = None,
dataset_id: str | None = None,
indexing_technique: str = "economy",
) -> IndexingEstimate:
"""
@ -422,7 +422,7 @@ class IndexingRunner:
max_tokens: int,
chunk_overlap: int,
separator: str,
embedding_model_instance: Optional[ModelInstance],
embedding_model_instance: ModelInstance | None,
) -> TextSplitter:
"""
Get the NodeParser object according to the processing rule.
@ -530,6 +530,7 @@ class IndexingRunner:
# chunk nodes by chunk size
indexing_start_at = time.perf_counter()
tokens = 0
create_keyword_thread = None
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
# create keyword index
create_keyword_thread = threading.Thread(
@ -568,7 +569,11 @@ class IndexingRunner:
for future in futures:
tokens += future.result()
if dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX and dataset.indexing_technique == "economy":
if (
dataset_document.doc_form != IndexType.PARENT_CHILD_INDEX
and dataset.indexing_technique == "economy"
and create_keyword_thread is not None
):
create_keyword_thread.join()
indexing_end_at = time.perf_counter()
@ -651,7 +656,7 @@ class IndexingRunner:
@staticmethod
def _update_document_index_status(
document_id: str, after_indexing_status: str, extra_update_params: Optional[dict] = None
document_id: str, after_indexing_status: str, extra_update_params: dict | None = None
):
"""
Update the document indexing status.