diff --git a/rag/llm/chat_model.py b/rag/llm/chat_model.py
index dc59e1fb8..eb1a0f826 100644
--- a/rag/llm/chat_model.py
+++ b/rag/llm/chat_model.py
@@ -1263,7 +1263,7 @@ class LiteLLMBase(ABC):
         if self.model_name.lower().find("qwen3") >= 0:
             kwargs["extra_body"] = {"enable_thinking": False}
 
-        completion_args = self._construct_completion_args(history=hist, stream=False, tools=False, **gen_conf)
+        completion_args = self._construct_completion_args(history=hist, stream=False, tools=False, **{**gen_conf, **kwargs})
 
         for attempt in range(self.max_retries + 1):
             try: