mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-04-25 13:05:58 +08:00
Refact: optimize confluence performance (#13497)
### What problem does this PR solve? Refact: optimize confluence performance #13494 ### Type of change - [x] Refactoring
This commit is contained in:
@ -1310,7 +1310,7 @@ class ConfluenceConnector(
|
||||
self._confluence_client: OnyxConfluence | None = None
|
||||
self._low_timeout_confluence_client: OnyxConfluence | None = None
|
||||
self._fetched_titles: set[str] = set()
|
||||
self.allow_images = False
|
||||
self.allow_images = True
|
||||
# Track document names to detect duplicates
|
||||
self._document_name_counts: dict[str, int] = {}
|
||||
self._document_name_paths: dict[str, list[str]] = {}
|
||||
@ -1597,7 +1597,7 @@ class ConfluenceConnector(
|
||||
id=page_url,
|
||||
source=DocumentSource.CONFLUENCE,
|
||||
semantic_identifier=semantic_identifier,
|
||||
extension=".html", # Confluence pages are HTML
|
||||
extension=".txt", # Confluence pages are HTML
|
||||
blob=page_content.encode("utf-8"), # Encode page content as bytes
|
||||
doc_updated_at=datetime_from_string(page["version"]["when"]),
|
||||
size_bytes=len(page_content.encode("utf-8")), # Calculate size in bytes
|
||||
|
||||
@ -275,6 +275,7 @@ class Confluence(SyncBase):
|
||||
space=space,
|
||||
page_id=page_id,
|
||||
index_recursively=index_recursively,
|
||||
|
||||
)
|
||||
|
||||
credentials_provider = StaticCredentialsProvider(tenant_id=task["tenant_id"],
|
||||
|
||||
Reference in New Issue
Block a user