Refact: optimize confluence performance (#13497)

### What problem does this PR solve?

Refact: optimize confluence performance #13494

### Type of change

- [x] Refactoring
This commit is contained in:
Magicbook1108
2026-03-10 15:02:24 +08:00
committed by GitHub
parent 9ba43ae4ee
commit 675810e0cf
2 changed files with 3 additions and 2 deletions

View File

@ -1310,7 +1310,7 @@ class ConfluenceConnector(
self._confluence_client: OnyxConfluence | None = None
self._low_timeout_confluence_client: OnyxConfluence | None = None
self._fetched_titles: set[str] = set()
self.allow_images = False
self.allow_images = True
# Track document names to detect duplicates
self._document_name_counts: dict[str, int] = {}
self._document_name_paths: dict[str, list[str]] = {}
@ -1597,7 +1597,7 @@ class ConfluenceConnector(
id=page_url,
source=DocumentSource.CONFLUENCE,
semantic_identifier=semantic_identifier,
extension=".html", # Confluence pages are HTML
extension=".txt", # Confluence pages are HTML
blob=page_content.encode("utf-8"), # Encode page content as bytes
doc_updated_at=datetime_from_string(page["version"]["when"]),
size_bytes=len(page_content.encode("utf-8")), # Calculate size in bytes

View File

@ -275,6 +275,7 @@ class Confluence(SyncBase):
space=space,
page_id=page_id,
index_recursively=index_recursively,
)
credentials_provider = StaticCredentialsProvider(tenant_id=task["tenant_id"],