diff --git a/rag/nlp/search.py b/rag/nlp/search.py index d2129e77f..36fb88843 100644 --- a/rag/nlp/search.py +++ b/rag/nlp/search.py @@ -619,6 +619,8 @@ class Dealer: chunks[id2idx[cid]]["similarity"] += sim continue chunk = self.dataStore.get(cid, idx_nms, kb_ids) + if not chunk: + continue d = { "chunk_id": cid, "content_ltks": chunk["content_ltks"], diff --git a/rag/prompts/generator.py b/rag/prompts/generator.py index de047f17e..5a11294ba 100644 --- a/rag/prompts/generator.py +++ b/rag/prompts/generator.py @@ -827,6 +827,11 @@ async def relevant_chunks_with_toc(query: str, toc:list[dict], chat_mdl, topn: i META_DATA = load_prompt("meta_data") async def gen_metadata(chat_mdl, schema:dict, content:str): template = PROMPT_JINJA_ENV.from_string(META_DATA) + for k, desc in schema.items(): + if "enum" in desc and not desc.get("enum"): + del desc["enum"] + if desc.get("enum"): + desc["description"] += "\n** Extracted values must strictly match the given list specified by `enum`. **" system_prompt = template.render(content=content, schema=schema) user_prompt = "Output: " _, msg = message_fit_in(form_message(system_prompt, user_prompt), chat_mdl.max_length) diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index ee041da1e..371bd8185 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -374,13 +374,13 @@ async def build_chunks(task, progress_callback): chat_mdl = LLMBundle(task["tenant_id"], LLMType.CHAT, llm_name=task["llm_id"], lang=task["language"]) async def gen_metadata_task(chat_mdl, d): - cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "metadata", {}) + cached = get_llm_cache(chat_mdl.llm_name, d["content_with_weight"], "metadata", task["parser_config"]["metadata"]) if not cached: async with chat_limiter: cached = await gen_metadata(chat_mdl, metadata_schema(task["parser_config"]["metadata"]), d["content_with_weight"]) - set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "metadata", {}) + set_llm_cache(chat_mdl.llm_name, d["content_with_weight"], cached, "metadata", task["parser_config"]["metadata"]) if cached: d["metadata_obj"] = cached tasks = []