diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index 492ae69e2..b31057bc0 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -15,11 +15,15 @@ import time +start_ts = time.time() + +# LiteLLM fetches a model cost map from GitHub during import unless this is set. +# Parser pods should not block startup on external network access. +import os +os.environ.setdefault("LITELLM_LOCAL_MODEL_COST_MAP", "True") # no internet, save about 10s from common.misc_utils import thread_pool_exec -start_ts = time.time() - import asyncio import socket # from beartype import BeartypeConf @@ -47,7 +51,6 @@ from rag.utils.raptor_utils import ( ) from common.log_utils import init_root_logger from common.config_utils import show_configs -from rag.graphrag.general.index import run_graphrag_for_kb from rag.graphrag.utils import get_llm_cache, set_llm_cache, get_tags_from_cache, set_tags_to_cache from rag.prompts.generator import keyword_extraction, question_proposal, content_tagging, run_toc_from_text, \ gen_metadata @@ -80,7 +83,6 @@ from rag.app import laws, paper, presentation, manual, qa, table, book, resume, from rag.nlp import search, rag_tokenizer, add_positions from rag.raptor import ( RAPTOR_TREE_BUILDER, - RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor, ) from common.token_utils import num_tokens_from_string, truncate from rag.utils.redis_conn import REDIS_CONN, RedisDistributedLock @@ -982,6 +984,7 @@ async def run_raptor_for_kb(row, kb_parser_config, chat_mdl, embd_mdl, vector_si """Run RAPTOR and append generated summary chunks for one doc id.""" nonlocal tk_count, res logging.info("RAPTOR: using tree_builder=%s clustering_method=%s for doc %s", tree_builder, clustering_method, did) + from rag.raptor import RecursiveAbstractiveProcessing4TreeOrganizedRetrieval as Raptor # Lazy load, save around 8s raptor = Raptor( raptor_config.get("max_cluster", 64), chat_mdl, @@ -1401,6 +1404,7 @@ async def do_handle_task(task): with_community = graphrag_conf.get("community", False) async with kg_limiter: # await run_graphrag(task, task_language, with_resolution, with_community, chat_model, embedding_model, progress_callback) + from rag.graphrag.general.index import run_graphrag_for_kb # Lazy load, save around 2s result = await run_graphrag_for_kb( row=task, doc_ids=task.get("doc_ids", []),