Improve: optimize file name (with path) in box container. (#13124)

### What problem does this PR solve?

Refact: optimize file name (with path) in box container. 

### Type of change

- [x] Performance Improvement

<img width="2357" height="1258" alt="image"
src="https://github.com/user-attachments/assets/f4c5c90b-d885-4514-b7bc-f17ab62b045f"
/>
This commit is contained in:
Magicbook1108
2026-02-12 15:40:55 +08:00
committed by GitHub
parent e72291bc9a
commit e89fd686e2
2 changed files with 22 additions and 12 deletions

View File

@ -38,9 +38,10 @@ class BoxConnector(LoadConnector, PollConnector):
def _yield_files_recursive(
self,
folder_id,
folder_id: str,
start: SecondsSinceUnixEpoch | None,
end: SecondsSinceUnixEpoch | None
end: SecondsSinceUnixEpoch | None,
relative_folder_path: str = "",
) -> GenerateDocumentsOutput:
if self.box_client is None:
@ -59,6 +60,7 @@ class BoxConnector(LoadConnector, PollConnector):
file = self.box_client.files.get_file_by_id(
entry.id
)
modified_time: SecondsSinceUnixEpoch | None = None
raw_time = (
getattr(file, "created_at", None)
or getattr(file, "content_created_at", None)
@ -72,13 +74,18 @@ class BoxConnector(LoadConnector, PollConnector):
continue
content_bytes = self.box_client.downloads.download_file(file.id)
semantic_identifier = (
f"{relative_folder_path} / {file.name}"
if relative_folder_path
else file.name
)
batch.append(
Document(
id=f"box:{file.id}",
blob=content_bytes.read(),
source=DocumentSource.BOX,
semantic_identifier=file.name,
semantic_identifier=semantic_identifier,
extension=get_file_ext(file.name),
doc_updated_at=modified_time,
size_bytes=file.size,
@ -86,7 +93,17 @@ class BoxConnector(LoadConnector, PollConnector):
)
)
elif entry.type == 'folder':
yield from self._yield_files_recursive(folder_id=entry.id, start=start, end=end)
child_relative_path = (
f"{relative_folder_path} / {entry.name}"
if relative_folder_path
else entry.name
)
yield from self._yield_files_recursive(
folder_id=entry.id,
start=start,
end=end,
relative_folder_path=child_relative_path
)
if batch:
yield batch
@ -159,4 +176,4 @@ class BoxConnector(LoadConnector, PollConnector):
if __name__ == "__main__":
pass
# app.run(port=4999)
# app.run(port=4999)