add old auth transform

This commit is contained in:
jyong
2025-07-28 19:29:36 +08:00
parent 829e6f0d1a
commit 657e813c7f
9 changed files with 68 additions and 32 deletions

View File

@ -1,4 +1,5 @@
from typing import Optional
from core.rag.extractor.extractor_base import BaseExtractor
from core.rag.models.document import Document
from services.website_service import WebsiteService
@ -16,8 +17,15 @@ class FirecrawlWebExtractor(BaseExtractor):
only_main_content: Only return the main content of the page excluding headers, navs, footers, etc.
"""
def __init__(self, url: str, job_id: str, tenant_id: str, mode: str = "crawl", only_main_content: bool = True,
credential_id: Optional[str] = None):
def __init__(
self,
url: str,
job_id: str,
tenant_id: str,
mode: str = "crawl",
only_main_content: bool = True,
credential_id: Optional[str] = None,
):
"""Initialize with url, api_key, base_url and mode."""
self._url = url
self.job_id = job_id
@ -30,7 +38,9 @@ class FirecrawlWebExtractor(BaseExtractor):
"""Extract content from the URL."""
documents = []
if self.mode == "crawl":
crawl_data = WebsiteService.get_crawl_url_data(self.job_id, "firecrawl", self._url, self.tenant_id, self.credential_id)
crawl_data = WebsiteService.get_crawl_url_data(
self.job_id, "firecrawl", self._url, self.tenant_id, self.credential_id
)
if crawl_data is None:
return []
document = Document(