mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-01-19 03:35:11 +08:00
fix(infinity): Use comma separator for important_kwd to preserve mult… (#12618)
## Problem The \`important_kwd\` field in Infinity connector was using mismatched separators: - **Storage**: \`list2str(v)\` uses space as default separator - **Reading**: \`v.split()\` splits by all whitespace This causes multi-word keywords like \`\"Senior Fund Manager\"\` to be incorrectly split into \`[\"Senior\", \"Fund\", \"Manager\"]\`. ## Solution Use comma \`,\` as separator for both storing and reading, consistent with: 1. The LLM output format in \`keyword_prompt.md\` (\"delimited by ENGLISH COMMA\") 2. The \`cached.split(\",\")\` in \`task_executor.py\` ## Changes - \`insert()\`: \`list2str(v)\` → \`list2str(v, \",\")\` - \`update()\`: \`list2str(v)\` → \`list2str(v, \",\")\` - \`get_fields()\`: \`v.split()\` → \`v.split(\",\") if v else []\` ## Impact This bug affects: - Python-level reranking weight calculation (\`important_kwd * 5\`) - API response keyword display - Search precision due to fragmented keywords
This commit is contained in:
@ -340,7 +340,7 @@ class InfinityConnection(InfinityConnectionBase):
|
||||
if not d.get("docnm_kwd"):
|
||||
d["docnm"] = self.list2str(v)
|
||||
elif k == "important_kwd":
|
||||
d["important_keywords"] = self.list2str(v)
|
||||
d["important_keywords"] = self.list2str(v, ",")
|
||||
elif k == "important_tks":
|
||||
if not d.get("important_kwd"):
|
||||
d["important_keywords"] = v
|
||||
@ -429,7 +429,7 @@ class InfinityConnection(InfinityConnectionBase):
|
||||
if not new_value.get("docnm_kwd"):
|
||||
new_value["docnm"] = v
|
||||
elif k == "important_kwd":
|
||||
new_value["important_keywords"] = self.list2str(v)
|
||||
new_value["important_keywords"] = self.list2str(v, ",")
|
||||
elif k == "important_tks":
|
||||
if not new_value.get("important_kwd"):
|
||||
new_value["important_keywords"] = v
|
||||
@ -532,7 +532,7 @@ class InfinityConnection(InfinityConnectionBase):
|
||||
res[field] = res["docnm"]
|
||||
if "important_keywords" in res.columns:
|
||||
if "important_kwd" in fields_all:
|
||||
res["important_kwd"] = res["important_keywords"].apply(lambda v: v.split())
|
||||
res["important_kwd"] = res["important_keywords"].apply(lambda v: v.split(",") if v else [])
|
||||
if "important_tks" in fields_all:
|
||||
res["important_tks"] = res["important_keywords"]
|
||||
if "questions" in res.columns:
|
||||
|
||||
Reference in New Issue
Block a user