Files
ragflow/common/data_source
Yesid Cano Castro d1afcc9e71 feat(seafile): add library and directory sync scope support (#13153)
### What problem does this PR solve?

The SeaFile connector currently synchronises the entire account — every
library
visible to the authenticated user. This is impractical for users who
only need
a subset of their data indexed, especially on large SeaFile instances
with many
shared libraries.

This PR introduces granular sync scope support, allowing users to choose
between
syncing their entire account, a single library, or a specific directory
within a
library. It also adds support for SeaFile library-scoped API tokens
(`/api/v2.1/via-repo-token/` endpoints), enabling tighter access control
without
exposing account-level credentials.


### Type of change

- [ ] Bug Fix (non-breaking change which fixes an issue)
- [x] New Feature (non-breaking change which adds functionality)
- [ ] Documentation Update
- [ ] Refactoring
- [ ] Performance Improvement
- [ ] Other (please describe):

### Test

```
from seafile_connector import SeaFileConnector
import logging
import os

logging.basicConfig(level=logging.DEBUG)

URL = os.environ.get("SEAFILE_URL", "https://seafile.example.com")
TOKEN = os.environ.get("SEAFILE_TOKEN", "")
REPO_ID = os.environ.get("SEAFILE_REPO_ID", "")
SYNC_PATH = os.environ.get("SEAFILE_SYNC_PATH", "/Documents")
REPO_TOKEN = os.environ.get("SEAFILE_REPO_TOKEN", "")

def _test_scope(scope, repo_id=None, sync_path=None):
    print(f"\n{'='*50}")
    print(f"Testing scope: {scope}")
    print(f"{'='*50}")

    creds = {"seafile_token": TOKEN} if TOKEN else {}
    if REPO_TOKEN and scope in ("library", "directory"):
        creds["repo_token"] = REPO_TOKEN

    connector = SeaFileConnector(
        seafile_url=URL,
        batch_size=5,
        sync_scope=scope,
        include_shared = False,
        repo_id=repo_id,
        sync_path=sync_path,
    )
    connector.load_credentials(creds)
    connector.validate_connector_settings()

    count = 0
    for batch in connector.load_from_state():
        for doc in batch:
            count += 1
            print(f"  [{count}] {doc.semantic_identifier} "
                  f"({doc.size_bytes} bytes, {doc.extension})")

    print(f"\n-> {scope} scope: {count} document(s) found.\n")

# 1. Account scope
if TOKEN:
    _test_scope("account")
else:
    print("\nSkipping account scope (set SEAFILE_TOKEN)")

# 2. Library scope
if REPO_ID and (TOKEN or REPO_TOKEN):
    _test_scope("library", repo_id=REPO_ID)
else:
    print("\nSkipping library scope (set SEAFILE_REPO_ID + token)")

# 3. Directory scope
if REPO_ID and SYNC_PATH and (TOKEN or REPO_TOKEN):
    _test_scope("directory", repo_id=REPO_ID, sync_path=SYNC_PATH)
else:
    print("\nSkipping directory scope (set SEAFILE_REPO_ID + SEAFILE_SYNC_PATH + token)")
```
2026-02-28 10:24:28 +08:00
..
2025-12-31 17:18:30 +08:00
2025-12-17 15:43:25 +08:00
2026-01-04 19:16:29 +08:00
2025-12-08 12:21:18 +08:00
2025-12-30 15:09:52 +08:00