Files
ragflow/agent/component/invoke.py
Daniil Sivak dee68c571b Feat: support variable interpolation in headers (#13680)
Closes #13277

### What problem does this PR solve?

Adds `{variable_name}` (and `{component@variable}`) interpolation
support to HTTP header values in the `Invoke` component, matching the
existing URL interpolation behavior.

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

<img width="1280" height="867" alt="image"
src="https://github.com/user-attachments/assets/8ab7b4e9-7cc0-4a7f-8a5f-f838a15a5fda"
/>

---------

Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
2026-03-18 22:38:20 +08:00

187 lines
7.1 KiB
Python

#
# Copyright 2024 The InfiniFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json
import logging
import os
import re
import time
from abc import ABC
import requests
from agent.component.base import ComponentBase, ComponentParamBase
from common.connection_utils import timeout
from deepdoc.parser import HtmlParser
class InvokeParam(ComponentParamBase):
"""
Define the Crawler component parameters.
"""
def __init__(self):
super().__init__()
self.proxy = None
self.headers = ""
self.method = "get"
self.variables = []
self.url = ""
self.timeout = 60
self.clean_html = False
self.datatype = "json" # New parameter to determine data posting type
def check(self):
self.check_valid_value(self.method.lower(), "Type of content from the crawler", ["get", "post", "put"])
self.check_empty(self.url, "End point URL")
self.check_positive_integer(self.timeout, "Timeout time in second")
self.check_boolean(self.clean_html, "Clean HTML")
self.check_valid_value(self.datatype.lower(), "Data post type", ["json", "formdata"]) # Check for valid datapost value
class Invoke(ComponentBase, ABC):
component_name = "Invoke"
def get_input_form(self) -> dict[str, dict]:
res = {}
for item in self._param.variables or []:
if not isinstance(item, dict):
continue
ref = (item.get("ref") or "").strip()
if not ref or ref in res:
continue
elements = self.get_input_elements_from_text("{" + ref + "}")
element = elements.get(ref, {})
res[ref] = {
"type": "line",
"name": element.get("name") or item.get("key") or ref,
}
return res
@timeout(int(os.environ.get("COMPONENT_EXEC_TIMEOUT", 3)))
def _invoke(self, **kwargs):
if self.check_if_canceled("Invoke processing"):
return
args = {}
for para in self._param.variables:
if para.get("value"):
args[para["key"]] = para["value"]
elif para.get("ref") in kwargs:
args[para["key"]] = kwargs[para["ref"]]
self.set_input_value(para["ref"], kwargs[para["ref"]])
else:
args[para["key"]] = self._canvas.get_variable_value(para["ref"])
self.set_input_value(para["ref"], args[para["key"]])
url = self._param.url.strip()
def replace_variable(match):
var_name = match.group(1)
try:
value = self._canvas.get_variable_value(var_name)
return str(value or "")
except Exception:
return ""
variable_pattern = r"\{([a-zA-Z_][a-zA-Z0-9_.@-]*)\}"
# {base_url} or {component_id@variable_name}
url = re.sub(variable_pattern, replace_variable, url)
if url.find("http") != 0:
url = "http://" + url
method = self._param.method.lower()
headers = {}
if self._param.headers:
try:
parsed_headers = json.loads(self._param.headers)
except json.JSONDecodeError as e:
logging.warning(
"Invoke headers are not valid JSON, ignoring headers. raw=%r error=%s",
self._param.headers,
e,
)
parsed_headers = {}
if not isinstance(parsed_headers, dict):
logging.warning(
"Invoke headers JSON is of type %s, expected an object; ignoring headers.",
type(parsed_headers).__name__,
)
parsed_headers = {}
headers = parsed_headers
for key, value in list(headers.items()):
if isinstance(value, str):
headers[key] = re.sub(variable_pattern, replace_variable, value)
proxies = None
if re.sub(r"https?:?/?/?", "", self._param.proxy):
proxies = {"http": self._param.proxy, "https": self._param.proxy}
last_e = ""
for _ in range(self._param.max_retries + 1):
if self.check_if_canceled("Invoke processing"):
return
try:
if method == "get":
response = requests.get(url=url, params=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
if self._param.clean_html:
sections = HtmlParser()(None, response.content)
self.set_output("result", "\n".join(sections))
else:
self.set_output("result", response.text)
if method == "put":
if self._param.datatype.lower() == "json":
response = requests.put(url=url, json=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
else:
response = requests.put(url=url, data=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
if self._param.clean_html:
sections = HtmlParser()(None, response.content)
self.set_output("result", "\n".join(sections))
else:
self.set_output("result", response.text)
if method == "post":
if self._param.datatype.lower() == "json":
response = requests.post(url=url, json=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
else:
response = requests.post(url=url, data=args, headers=headers, proxies=proxies, timeout=self._param.timeout)
if self._param.clean_html:
sections = HtmlParser()(None, response.content)
self.set_output("result", "\n".join(sections))
else:
self.set_output("result", response.text)
return self.output("result")
except Exception as e:
if self.check_if_canceled("Invoke processing"):
return
last_e = e
logging.exception(f"Http request error: {e}")
time.sleep(self._param.delay_after_error)
if last_e:
self.set_output("_ERROR", str(last_e))
return f"Http request error: {last_e}"
assert False, self.output()
def thoughts(self) -> str:
return "Waiting for the server respond..."