mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-05-06 02:07:49 +08:00
Improve: optimize file name (with path) in box container. (#13124)
### What problem does this PR solve? Refact: optimize file name (with path) in box container. ### Type of change - [x] Performance Improvement <img width="2357" height="1258" alt="image" src="https://github.com/user-attachments/assets/f4c5c90b-d885-4514-b7bc-f17ab62b045f" />
This commit is contained in:
@ -38,9 +38,10 @@ class BoxConnector(LoadConnector, PollConnector):
|
||||
|
||||
def _yield_files_recursive(
|
||||
self,
|
||||
folder_id,
|
||||
folder_id: str,
|
||||
start: SecondsSinceUnixEpoch | None,
|
||||
end: SecondsSinceUnixEpoch | None
|
||||
end: SecondsSinceUnixEpoch | None,
|
||||
relative_folder_path: str = "",
|
||||
) -> GenerateDocumentsOutput:
|
||||
|
||||
if self.box_client is None:
|
||||
@ -59,6 +60,7 @@ class BoxConnector(LoadConnector, PollConnector):
|
||||
file = self.box_client.files.get_file_by_id(
|
||||
entry.id
|
||||
)
|
||||
modified_time: SecondsSinceUnixEpoch | None = None
|
||||
raw_time = (
|
||||
getattr(file, "created_at", None)
|
||||
or getattr(file, "content_created_at", None)
|
||||
@ -72,13 +74,18 @@ class BoxConnector(LoadConnector, PollConnector):
|
||||
continue
|
||||
|
||||
content_bytes = self.box_client.downloads.download_file(file.id)
|
||||
semantic_identifier = (
|
||||
f"{relative_folder_path} / {file.name}"
|
||||
if relative_folder_path
|
||||
else file.name
|
||||
)
|
||||
|
||||
batch.append(
|
||||
Document(
|
||||
id=f"box:{file.id}",
|
||||
blob=content_bytes.read(),
|
||||
source=DocumentSource.BOX,
|
||||
semantic_identifier=file.name,
|
||||
semantic_identifier=semantic_identifier,
|
||||
extension=get_file_ext(file.name),
|
||||
doc_updated_at=modified_time,
|
||||
size_bytes=file.size,
|
||||
@ -86,7 +93,17 @@ class BoxConnector(LoadConnector, PollConnector):
|
||||
)
|
||||
)
|
||||
elif entry.type == 'folder':
|
||||
yield from self._yield_files_recursive(folder_id=entry.id, start=start, end=end)
|
||||
child_relative_path = (
|
||||
f"{relative_folder_path} / {entry.name}"
|
||||
if relative_folder_path
|
||||
else entry.name
|
||||
)
|
||||
yield from self._yield_files_recursive(
|
||||
folder_id=entry.id,
|
||||
start=start,
|
||||
end=end,
|
||||
relative_folder_path=child_relative_path
|
||||
)
|
||||
|
||||
if batch:
|
||||
yield batch
|
||||
@ -159,4 +176,4 @@ class BoxConnector(LoadConnector, PollConnector):
|
||||
|
||||
if __name__ == "__main__":
|
||||
pass
|
||||
# app.run(port=4999)
|
||||
# app.run(port=4999)
|
||||
|
||||
Reference in New Issue
Block a user