fix: drop some type fixme (#20344)

2026-05-05 09:58:04 +08:00 · 2025-05-30 14:10:09 +08:00
parent 9b47f9f786
commit 5a991295e0
10 changed files with 43 additions and 45 deletions
--- a/api/services/ops_service.py
+++ b/api/services/ops_service.py
@ -1,5 +1,6 @@
-from typing import Optional
+from typing import Any, Optional

+from core.ops.entities.config_entity import BaseTracingConfig
 from core.ops.ops_trace_manager import OpsTraceManager, provider_config_map
 from extensions.ext_database import db
 from models.model import App, TraceAppConfig
@ -92,13 +93,12 @@ class OpsService:
        except KeyError:
            return {"error": f"Invalid tracing provider: {tracing_provider}"}

-        config_class, other_keys = (
-            provider_config_map[tracing_provider]["config_class"],
-            provider_config_map[tracing_provider]["other_keys"],
-        )
-        # FIXME: ignore type error
-        default_config_instance = config_class(**tracing_config)  # type: ignore
-        for key in other_keys:  # type: ignore
+        provider_config: dict[str, Any] = provider_config_map[tracing_provider]
+        config_class: type[BaseTracingConfig] = provider_config["config_class"]
+        other_keys: list[str] = provider_config["other_keys"]
+
+        default_config_instance: BaseTracingConfig = config_class(**tracing_config)
+        for key in other_keys:
            if key in tracing_config and tracing_config[key] == "":
                tracing_config[key] = getattr(default_config_instance, key, None)

--- a/api/services/website_service.py
+++ b/api/services/website_service.py
@ -173,26 +173,27 @@ class WebsiteService:
        return crawl_status_data

    @classmethod
-    def get_crawl_url_data(cls, job_id: str, provider: str, url: str, tenant_id: str) -> dict[Any, Any] | None:
+    def get_crawl_url_data(cls, job_id: str, provider: str, url: str, tenant_id: str) -> dict[str, Any] | None:
        credentials = ApiKeyAuthService.get_auth_credentials(tenant_id, "website", provider)
        # decrypt api_key
        api_key = encrypter.decrypt_token(tenant_id=tenant_id, token=credentials.get("config").get("api_key"))
-        # FIXME data is redefine too many times here, use Any to ease the type checking, fix it later
-        data: Any
+
        if provider == "firecrawl":
+            crawl_data: list[dict[str, Any]] | None = None
            file_key = "website_files/" + job_id + ".txt"
            if storage.exists(file_key):
-                d = storage.load_once(file_key)
-                if d:
-                    data = json.loads(d.decode("utf-8"))
+                stored_data = storage.load_once(file_key)
+                if stored_data:
+                    crawl_data = json.loads(stored_data.decode("utf-8"))
            else:
                firecrawl_app = FirecrawlApp(api_key=api_key, base_url=credentials.get("config").get("base_url", None))
                result = firecrawl_app.check_crawl_status(job_id)
                if result.get("status") != "completed":
                    raise ValueError("Crawl job is not completed")
-                data = result.get("data")
-            if data:
-                for item in data:
+                crawl_data = result.get("data")
+
+            if crawl_data:
+                for item in crawl_data:
                    if item.get("source_url") == url:
                        return dict(item)
            return None
@ -211,23 +212,24 @@ class WebsiteService:
                    raise ValueError("Failed to crawl")
                return dict(response.json().get("data", {}))
            else:
-                api_key = encrypter.decrypt_token(tenant_id=tenant_id, token=credentials.get("config").get("api_key"))
-                response = requests.post(
+                # Get crawl status first
+                status_response = requests.post(
                    "https://adaptivecrawlstatus-kir3wx7b3a-uc.a.run.app",
                    headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
                    json={"taskId": job_id},
                )
-                data = response.json().get("data", {})
-                if data.get("status") != "completed":
+                status_data = status_response.json().get("data", {})
+                if status_data.get("status") != "completed":
                    raise ValueError("Crawl job is not completed")

-                response = requests.post(
+                # Get processed data
+                data_response = requests.post(
                    "https://adaptivecrawlstatus-kir3wx7b3a-uc.a.run.app",
                    headers={"Content-Type": "application/json", "Authorization": f"Bearer {api_key}"},
-                    json={"taskId": job_id, "urls": list(data.get("processed", {}).keys())},
+                    json={"taskId": job_id, "urls": list(status_data.get("processed", {}).keys())},
                )
-                data = response.json().get("data", {})
-                for item in data.get("processed", {}).values():
+                processed_data = data_response.json().get("data", {})
+                for item in processed_data.get("processed", {}).values():
                    if item.get("data", {}).get("url") == url:
                        return dict(item.get("data", {}))
            return None