From 7204a7ccef375b03fdba8fbdbf534d6145ff7817 Mon Sep 17 00:00:00 2001 From: -LAN- Date: Tue, 19 May 2026 22:08:20 +0800 Subject: [PATCH] test(api): expand remote fetcher coverage --- .../core/file/test_remote_fetcher.py | 249 ++++++++++++++++++ 1 file changed, 249 insertions(+) diff --git a/api/tests/unit_tests/core/file/test_remote_fetcher.py b/api/tests/unit_tests/core/file/test_remote_fetcher.py index cddecd35ce..34f2e6cc6f 100644 --- a/api/tests/unit_tests/core/file/test_remote_fetcher.py +++ b/api/tests/unit_tests/core/file/test_remote_fetcher.py @@ -6,6 +6,7 @@ from types import SimpleNamespace from unittest.mock import MagicMock import httpx +import pytest from core.file import remote_fetcher @@ -152,6 +153,60 @@ def test_make_request_get_signed_upload_file_url_reads_storage_without_ssrf(monk ssrf_make_request.assert_not_called() +def test_make_request_head_signed_upload_file_url_returns_metadata_without_ssrf(monkeypatch): + _patch_file_fetcher_config(monkeypatch) + _patch_session(monkeypatch) + upload_file = SimpleNamespace( + id=UPLOAD_FILE_ID, + key="upload_files/tenant/hello.txt", + name="hello.txt", + mime_type="text/plain", + size=5, + extension="txt", + ) + monkeypatch.setattr(remote_fetcher._file_access_controller, "get_upload_file", MagicMock(return_value=upload_file)) + load_once = MagicMock(return_value=b"hello") + monkeypatch.setattr(remote_fetcher.storage, "load_once", load_once) + ssrf_make_request = MagicMock() + monkeypatch.setattr(remote_fetcher.ssrf_proxy, "make_request", ssrf_make_request) + url = _signed_url( + base_url="http://localhost:5001", + path=f"/files/{UPLOAD_FILE_ID}/file-preview", + payload=f"file-preview|{UPLOAD_FILE_ID}", + ) + + response = remote_fetcher.make_request("HEAD", url) + + assert response.status_code == 200 + assert response.content == b"" + assert response.headers["Content-Type"] == "text/plain" + assert response.headers["Content-Length"] == "5" + assert response.request.method == "HEAD" + load_once.assert_not_called() + ssrf_make_request.assert_not_called() + + +def test_make_request_get_unsigned_dify_url_delegates_to_ssrf_proxy(monkeypatch): + _patch_file_fetcher_config(monkeypatch) + get_upload_file = MagicMock() + monkeypatch.setattr(remote_fetcher._file_access_controller, "get_upload_file", get_upload_file) + url = f"http://localhost:5001/files/{UPLOAD_FILE_ID}/file-preview?timestamp=1700000000&nonce=nonce" + proxy_response = httpx.Response(403, request=httpx.Request("GET", url)) + ssrf_make_request = MagicMock(return_value=proxy_response) + monkeypatch.setattr(remote_fetcher.ssrf_proxy, "make_request", ssrf_make_request) + + response = remote_fetcher.make_request("GET", url, timeout=3) + + assert response is proxy_response + get_upload_file.assert_not_called() + ssrf_make_request.assert_called_once_with( + method="GET", + url=url, + max_retries=remote_fetcher.SSRF_DEFAULT_MAX_RETRIES, + timeout=3, + ) + + def test_make_request_post_signed_upload_file_url_delegates_to_ssrf_proxy(monkeypatch): _patch_file_fetcher_config(monkeypatch) get_upload_file = MagicMock() @@ -223,6 +278,61 @@ def test_image_preview_url_with_file_preview_signature_delegates_to_ssrf_proxy(m ssrf_get.assert_called_once_with(url=url, max_retries=remote_fetcher.SSRF_DEFAULT_MAX_RETRIES) +def test_duplicate_signature_query_value_delegates_to_ssrf_proxy(monkeypatch): + _patch_file_fetcher_config(monkeypatch) + url = ( + _signed_url( + base_url="http://localhost:5001", + path=f"/files/{UPLOAD_FILE_ID}/file-preview", + payload=f"file-preview|{UPLOAD_FILE_ID}", + ) + + "&sign=second" + ) + proxy_response = httpx.Response(403, request=httpx.Request("GET", url)) + ssrf_get = MagicMock(return_value=proxy_response) + monkeypatch.setattr(remote_fetcher.ssrf_proxy, "get", ssrf_get) + + response = remote_fetcher.get(url) + + assert response is proxy_response + ssrf_get.assert_called_once_with(url=url, max_retries=remote_fetcher.SSRF_DEFAULT_MAX_RETRIES) + + +def test_malformed_timestamp_delegates_to_ssrf_proxy(monkeypatch): + _patch_file_fetcher_config(monkeypatch) + url = _signed_url( + base_url="http://localhost:5001", + path=f"/files/{UPLOAD_FILE_ID}/file-preview", + payload=f"file-preview|{UPLOAD_FILE_ID}", + ).replace("timestamp=1700000000", "timestamp=not-an-int") + proxy_response = httpx.Response(403, request=httpx.Request("GET", url)) + ssrf_get = MagicMock(return_value=proxy_response) + monkeypatch.setattr(remote_fetcher.ssrf_proxy, "get", ssrf_get) + + response = remote_fetcher.get(url) + + assert response is proxy_response + ssrf_get.assert_called_once_with(url=url, max_retries=remote_fetcher.SSRF_DEFAULT_MAX_RETRIES) + + +def test_expired_signature_delegates_to_ssrf_proxy(monkeypatch): + _patch_file_fetcher_config(monkeypatch) + monkeypatch.setattr(remote_fetcher.time, "time", lambda: 1700004001) + url = _signed_url( + base_url="http://localhost:5001", + path=f"/files/{UPLOAD_FILE_ID}/file-preview", + payload=f"file-preview|{UPLOAD_FILE_ID}", + ) + proxy_response = httpx.Response(403, request=httpx.Request("GET", url)) + ssrf_get = MagicMock(return_value=proxy_response) + monkeypatch.setattr(remote_fetcher.ssrf_proxy, "get", ssrf_get) + + response = remote_fetcher.get(url) + + assert response is proxy_response + ssrf_get.assert_called_once_with(url=url, max_retries=remote_fetcher.SSRF_DEFAULT_MAX_RETRIES) + + def test_invalid_signature_delegates_to_ssrf_proxy(monkeypatch): _patch_file_fetcher_config(monkeypatch) proxy_response = httpx.Response(403, request=httpx.Request("GET", "http://localhost:5001/bad")) @@ -274,6 +384,38 @@ def test_unsupported_dify_path_delegates_to_ssrf_proxy(monkeypatch): ) +def test_invalid_url_scheme_delegates_to_ssrf_proxy(monkeypatch): + _patch_file_fetcher_config(monkeypatch) + url = f"file:///tmp/files/{UPLOAD_FILE_ID}/file-preview?timestamp=1700000000&nonce=nonce&sign=ignored" + proxy_response = httpx.Response(403, request=httpx.Request("GET", url)) + ssrf_get = MagicMock(return_value=proxy_response) + monkeypatch.setattr(remote_fetcher.ssrf_proxy, "get", ssrf_get) + + response = remote_fetcher.get(url) + + assert response is proxy_response + ssrf_get.assert_called_once_with(url=url, max_retries=remote_fetcher.SSRF_DEFAULT_MAX_RETRIES) + + +def test_invalid_configured_file_origin_delegates_to_ssrf_proxy(monkeypatch): + _patch_file_fetcher_config(monkeypatch) + monkeypatch.setattr(remote_fetcher.dify_config, "FILES_URL", "") + monkeypatch.setattr(remote_fetcher.dify_config, "INTERNAL_FILES_URL", "file:///tmp/files") + url = _signed_url( + base_url="http://localhost:5001", + path=f"/files/{UPLOAD_FILE_ID}/file-preview", + payload=f"file-preview|{UPLOAD_FILE_ID}", + ) + proxy_response = httpx.Response(403, request=httpx.Request("GET", url)) + ssrf_get = MagicMock(return_value=proxy_response) + monkeypatch.setattr(remote_fetcher.ssrf_proxy, "get", ssrf_get) + + response = remote_fetcher.get(url) + + assert response is proxy_response + ssrf_get.assert_called_once_with(url=url, max_retries=remote_fetcher.SSRF_DEFAULT_MAX_RETRIES) + + def test_signed_upload_file_url_returns_404_when_record_missing(monkeypatch): _patch_file_fetcher_config(monkeypatch) _patch_session(monkeypatch) @@ -326,6 +468,25 @@ def test_get_signed_tool_file_url_reads_storage_without_ssrf(monkeypatch): ssrf_get.assert_not_called() +def test_signed_tool_file_url_returns_404_when_record_missing(monkeypatch): + _patch_file_fetcher_config(monkeypatch) + _patch_session(monkeypatch) + monkeypatch.setattr(remote_fetcher._file_access_controller, "get_tool_file", MagicMock(return_value=None)) + ssrf_get = MagicMock() + monkeypatch.setattr(remote_fetcher.ssrf_proxy, "get", ssrf_get) + url = _signed_url( + base_url="http://localhost:5001", + path=f"/files/tools/{TOOL_FILE_ID}.txt", + payload=f"file-preview|{TOOL_FILE_ID}", + ) + + response = remote_fetcher.get(url) + + assert response.status_code == 404 + assert response.content == b"" + ssrf_get.assert_not_called() + + def test_get_signed_datasource_file_url_reads_upload_storage_without_ssrf(monkeypatch): _patch_file_fetcher_config(monkeypatch) _patch_session(monkeypatch) @@ -354,3 +515,91 @@ def test_get_signed_datasource_file_url_reads_upload_storage_without_ssrf(monkey assert response.content == b"data" remote_fetcher.storage.load_once.assert_called_once_with("datasources/tenant/data.txt") ssrf_get.assert_not_called() + + +def test_get_signed_datasource_file_url_reads_tool_storage_when_upload_missing(monkeypatch): + _patch_file_fetcher_config(monkeypatch) + _patch_session(monkeypatch) + tool_file = SimpleNamespace( + id=DATASOURCE_FILE_ID, + file_key="datasources/tenant/tool-data.txt", + name="tool-data.txt", + mimetype="text/plain", + size=9, + ) + monkeypatch.setattr(remote_fetcher._file_access_controller, "get_upload_file", MagicMock(return_value=None)) + monkeypatch.setattr(remote_fetcher._file_access_controller, "get_tool_file", MagicMock(return_value=tool_file)) + monkeypatch.setattr(remote_fetcher.storage, "load_once", MagicMock(return_value=b"tool-data")) + ssrf_get = MagicMock() + monkeypatch.setattr(remote_fetcher.ssrf_proxy, "get", ssrf_get) + url = _signed_url( + base_url="http://localhost:5001", + path=f"/files/datasources/{DATASOURCE_FILE_ID}.txt", + payload=f"file-preview|{DATASOURCE_FILE_ID}", + ) + + response = remote_fetcher.get(url) + + assert response.status_code == 200 + assert response.content == b"tool-data" + assert response.headers["Content-Type"] == "text/plain" + assert response.headers["Content-Length"] == "9" + remote_fetcher.storage.load_once.assert_called_once_with("datasources/tenant/tool-data.txt") + ssrf_get.assert_not_called() + + +def test_signed_datasource_file_url_returns_404_when_records_missing(monkeypatch): + _patch_file_fetcher_config(monkeypatch) + _patch_session(monkeypatch) + monkeypatch.setattr(remote_fetcher._file_access_controller, "get_upload_file", MagicMock(return_value=None)) + monkeypatch.setattr(remote_fetcher._file_access_controller, "get_tool_file", MagicMock(return_value=None)) + ssrf_get = MagicMock() + monkeypatch.setattr(remote_fetcher.ssrf_proxy, "get", ssrf_get) + url = _signed_url( + base_url="http://localhost:5001", + path=f"/files/datasources/{DATASOURCE_FILE_ID}.txt", + payload=f"file-preview|{DATASOURCE_FILE_ID}", + ) + + response = remote_fetcher.get(url) + + assert response.status_code == 404 + assert response.content == b"" + ssrf_get.assert_not_called() + + +@pytest.mark.parametrize("method_name", ["post", "put", "delete", "patch"]) +def test_non_get_helpers_delegate_to_ssrf_proxy(monkeypatch, method_name): + url = "https://example.com/file.txt" + proxy_response = httpx.Response(200, request=httpx.Request(method_name.upper(), url)) + proxy_method = MagicMock(return_value=proxy_response) + monkeypatch.setattr(remote_fetcher.ssrf_proxy, method_name, proxy_method) + + response = getattr(remote_fetcher, method_name)(url, max_retries=2, timeout=3) + + assert response is proxy_response + proxy_method.assert_called_once_with(url=url, max_retries=2, timeout=3) + + +def test_graphon_remote_file_fetcher_exposes_ssrf_error_types(): + fetcher = remote_fetcher.GraphonRemoteFileFetcher() + + assert fetcher.max_retries_exceeded_error is remote_fetcher.max_retries_exceeded_error + assert fetcher.request_error is remote_fetcher.request_error + + +@pytest.mark.parametrize("method_name", ["get", "head", "post", "put", "delete", "patch"]) +def test_graphon_remote_file_fetcher_adapts_fetcher_responses(monkeypatch, method_name): + url = "https://example.com/file.txt" + response = httpx.Response(200, request=httpx.Request(method_name.upper(), url), content=b"ok") + fetch_method = MagicMock(return_value=response) + graphon_response = object() + adapter = MagicMock(return_value=graphon_response) + monkeypatch.setattr(remote_fetcher, method_name, fetch_method) + monkeypatch.setattr(remote_fetcher, "_to_graphon_http_response", adapter) + + result = getattr(remote_fetcher.GraphonRemoteFileFetcher(), method_name)(url, max_retries=2, timeout=3) + + assert result is graphon_response + fetch_method.assert_called_once_with(url=url, max_retries=2, timeout=3) + adapter.assert_called_once_with(response)