Feat: Initialize context engine CLI (#13776)

### What problem does this PR solve?

- Add multiple output format to ragflow_cli
- Initialize contextengine to Go module
  - ls datasets/ls files
  - cat file
  - search -d dir -q query

issue: #13714

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yingfeng
2026-03-26 21:07:06 +08:00
committed by GitHub
parent 3b1e77a6d4
commit 6e309f9d0a
14 changed files with 3753 additions and 155 deletions

View File

@ -0,0 +1,49 @@
# ContextFS - Context Engine File System
ContextFS is a context engine interface for RAGFlow, providing users with a Unix-like file system interface to manage datasets, tools, skills, and memories.
## Directory Structure
```
user_id/
├── datasets/
│ └── my_dataset/
│ └── ...
├── tools/
│ ├── registry.json
│ └── tool_name/
│ ├── DOC.md
│ └── ...
├── skills/
│ ├── registry.json
│ └── skill_name/
│ ├── SKILL.md
│ └── ...
└── memories/
└── memory_id/
├── sessions/
│ ├── messages/
│ ├── summaries/
│ │ └── session_id/
│ │ └── summary-{datetime}.md
│ └── tools/
│ └── session_id/
│ └── {tool_name}.md # User level of memory on Tools usage
├── users/
│ ├── profile.md
│ ├── preferences/
│ └── entities/
└── agents/
└── agent_space/
├── tools/
│ └── {tool_name}.md # Agent level of memory on Tools usage
└── skills/
└── {skill_name}.md # Agent level of memory on Skills usage
```
## Supported Commands
- `ls [path]` - List directory contents
- `cat <path>` - Display file contents(only for text files)
- `search <query>` - Search content

View File

@ -0,0 +1,781 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import (
stdctx "context"
"encoding/json"
"fmt"
"strconv"
"strings"
"time"
)
// HTTPResponse represents an HTTP response
type HTTPResponse struct {
StatusCode int
Body []byte
Headers map[string][]string
Duration float64
}
// HTTPClientInterface defines the interface needed from HTTPClient
type HTTPClientInterface interface {
Request(method, path string, useAPIBase bool, authKind string, headers map[string]string, jsonBody map[string]interface{}) (*HTTPResponse, error)
}
// DatasetProvider handles datasets and their documents
// Path structure:
// - datasets/ -> List all datasets
// - datasets/{name} -> List documents in dataset
// - datasets/{name}/{doc_name} -> Get document info
type DatasetProvider struct {
BaseProvider
httpClient HTTPClientInterface
}
// NewDatasetProvider creates a new DatasetProvider
func NewDatasetProvider(httpClient HTTPClientInterface) *DatasetProvider {
return &DatasetProvider{
BaseProvider: BaseProvider{
name: "datasets",
description: "Dataset management provider",
rootPath: "datasets",
},
httpClient: httpClient,
}
}
// Supports returns true if this provider can handle the given path
func (p *DatasetProvider) Supports(path string) bool {
normalized := normalizePath(path)
return normalized == "datasets" || strings.HasPrefix(normalized, "datasets/")
}
// List lists nodes at the given path
func (p *DatasetProvider) List(ctx stdctx.Context, subPath string, opts *ListOptions) (*Result, error) {
// subPath is the path relative to "datasets/"
// Empty subPath means list all datasets
// "{name}/files" means list documents in a dataset
// Check if trying to access hidden .knowledgebase
if subPath == ".knowledgebase" || strings.HasPrefix(subPath, ".knowledgebase/") {
return nil, fmt.Errorf("invalid path: .knowledgebase is not accessible")
}
if subPath == "" {
return p.listDatasets(ctx, opts)
}
parts := SplitPath(subPath)
if len(parts) == 1 {
// datasets/{name} - list documents in the dataset (default behavior)
return p.listDocuments(ctx, parts[0], opts)
}
if len(parts) == 2 {
// datasets/{name}/{doc_name} - get document info
return p.getDocumentNode(ctx, parts[0], parts[1])
}
return nil, fmt.Errorf("invalid path: %s", subPath)
}
// Search searches for datasets or documents
func (p *DatasetProvider) Search(ctx stdctx.Context, subPath string, opts *SearchOptions) (*Result, error) {
if opts.Query == "" {
return p.List(ctx, subPath, &ListOptions{
Limit: opts.Limit,
Offset: opts.Offset,
})
}
// If searching under a specific dataset's files
parts := SplitPath(subPath)
if len(parts) >= 2 && parts[1] == "files" {
datasetName := parts[0]
return p.searchDocuments(ctx, datasetName, opts)
}
// Otherwise search datasets
return p.searchDatasets(ctx, opts)
}
// Cat retrieves document content
// For datasets:
// - cat datasets -> Error: datasets is a directory, not a file
// - cat datasets/kb_name -> Error: kb_name is a directory, not a file
// - cat datasets/kb_name/doc_name -> Would retrieve document content (if implemented)
func (p *DatasetProvider) Cat(ctx stdctx.Context, subPath string) ([]byte, error) {
if subPath == "" {
return nil, fmt.Errorf("'datasets' is a directory, not a file")
}
parts := SplitPath(subPath)
if len(parts) == 1 {
// datasets/{name} - this is a dataset (directory)
return nil, fmt.Errorf("'%s' is a directory, not a file", parts[0])
}
if len(parts) == 2 {
// datasets/{name}/{doc_name} - this could be a document
// For now, document content retrieval is not implemented
return nil, fmt.Errorf("document content retrieval not yet implemented for '%s'", parts[1])
}
return nil, fmt.Errorf("invalid path for cat: %s", subPath)
}
// ==================== Dataset Operations ====================
func (p *DatasetProvider) listDatasets(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
resp, err := p.httpClient.Request("GET", "/datasets", true, "auto", nil, nil)
if err != nil {
return nil, err
}
var apiResp struct {
Code int `json:"code"`
Data []map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
nodes := make([]*Node, 0, len(apiResp.Data))
for _, ds := range apiResp.Data {
node := p.datasetToNode(ds)
// Skip hidden .knowledgebase dataset (trim whitespace for safety)
if strings.TrimSpace(node.Name) == ".knowledgebase" {
continue
}
nodes = append(nodes, node)
}
total := len(nodes)
// Apply limit if specified
if opts != nil && opts.Limit > 0 && opts.Limit < len(nodes) {
nodes = nodes[:opts.Limit]
}
return &Result{
Nodes: nodes,
Total: total,
}, nil
}
func (p *DatasetProvider) getDataset(ctx stdctx.Context, name string) (*Node, error) {
// Check if trying to access hidden .knowledgebase
if name == ".knowledgebase" {
return nil, fmt.Errorf("invalid path: .knowledgebase is not accessible")
}
// First list all datasets to find the one with matching name
resp, err := p.httpClient.Request("GET", "/datasets", true, "auto", nil, nil)
if err != nil {
return nil, err
}
var apiResp struct {
Code int `json:"code"`
Data []map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
for _, ds := range apiResp.Data {
if getString(ds["name"]) == name {
return p.datasetToNode(ds), nil
}
}
return nil, fmt.Errorf("%s: dataset '%s'", ErrNotFound, name)
}
func (p *DatasetProvider) searchDatasets(ctx stdctx.Context, opts *SearchOptions) (*Result, error) {
// If no query is provided, just list datasets
if opts.Query == "" {
return p.listDatasets(ctx, &ListOptions{
Limit: opts.Limit,
Offset: opts.Offset,
})
}
// Use retrieval API for semantic search
return p.searchWithRetrieval(ctx, opts)
}
// searchWithRetrieval performs semantic search using the retrieval API
func (p *DatasetProvider) searchWithRetrieval(ctx stdctx.Context, opts *SearchOptions) (*Result, error) {
// Determine kb_ids to search in
var kbIDs []string
var datasetsToSearch []*Node
if len(opts.Dirs) > 0 && opts.Dirs[0] != "datasets" {
// Search in specific datasets
for _, dir := range opts.Dirs {
// Extract dataset name from path (e.g., "datasets/kb1" -> "kb1")
datasetName := dir
if strings.HasPrefix(dir, "datasets/") {
datasetName = dir[len("datasets/"):]
}
ds, err := p.getDataset(ctx, datasetName)
if err != nil {
// Try case-insensitive match
allResult, listErr := p.listDatasets(ctx, nil)
if listErr == nil {
for _, d := range allResult.Nodes {
if strings.EqualFold(d.Name, datasetName) {
ds = d
err = nil
break
}
}
}
if err != nil {
return nil, fmt.Errorf("dataset not found: %s", datasetName)
}
}
datasetsToSearch = append(datasetsToSearch, ds)
kbID := getString(ds.Metadata["id"])
if kbID != "" {
kbIDs = append(kbIDs, kbID)
}
}
} else {
// Search in all datasets
allResult, err := p.listDatasets(ctx, nil)
if err != nil {
return nil, err
}
datasetsToSearch = allResult.Nodes
for _, ds := range datasetsToSearch {
kbID := getString(ds.Metadata["id"])
if kbID != "" {
kbIDs = append(kbIDs, kbID)
}
}
}
if len(kbIDs) == 0 {
return &Result{
Nodes: []*Node{},
Total: 0,
}, nil
}
// Build kb_id -> dataset name mapping
kbIDToName := make(map[string]string)
for _, ds := range datasetsToSearch {
kbID := getString(ds.Metadata["id"])
if kbID != "" && ds.Name != "" {
kbIDToName[kbID] = ds.Name
}
}
// Build retrieval request
payload := map[string]interface{}{
"kb_id": kbIDs,
"question": opts.Query,
}
// Set top_k (default to 10 if not specified)
topK := opts.TopK
if topK <= 0 {
topK = 10
}
payload["top_k"] = topK
// Set similarity threshold (default to 0.2 if not specified to match UI behavior)
threshold := opts.Threshold
if threshold <= 0 {
threshold = 0.2
}
payload["similarity_threshold"] = threshold
// Call retrieval API (useAPIBase=false because the route is /v1/chunk/retrieval_test, not /api/v1/...)
resp, err := p.httpClient.Request("POST", "/chunk/retrieval_test", false, "auto", nil, payload)
if err != nil {
return nil, fmt.Errorf("retrieval request failed: %w", err)
}
var apiResp struct {
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
// Parse chunks from response
var nodes []*Node
if chunksData, ok := apiResp.Data["chunks"].([]interface{}); ok {
for _, chunk := range chunksData {
if chunkMap, ok := chunk.(map[string]interface{}); ok {
node := p.chunkToNodeWithKBMapping(chunkMap, kbIDToName)
nodes = append(nodes, node)
}
}
}
// Apply top_k limit if specified (API may return more results)
if topK > 0 && len(nodes) > topK {
nodes = nodes[:topK]
}
return &Result{
Nodes: nodes,
Total: len(nodes),
}, nil
}
// chunkToNodeWithKBMapping converts a chunk map to a Node with kb_id -> name mapping
func (p *DatasetProvider) chunkToNodeWithKBMapping(chunk map[string]interface{}, kbIDToName map[string]string) *Node {
// Extract chunk content - try multiple field names
content := ""
if v, ok := chunk["content_with_weight"].(string); ok && v != "" {
content = v
} else if v, ok := chunk["content"].(string); ok && v != "" {
content = v
} else if v, ok := chunk["content_ltks"].(string); ok && v != "" {
content = v
} else if v, ok := chunk["text"].(string); ok && v != "" {
content = v
}
// Get chunk_id for URI
chunkID := ""
if v, ok := chunk["chunk_id"].(string); ok {
chunkID = v
} else if v, ok := chunk["id"].(string); ok {
chunkID = v
}
// Get document name and ID
docName := ""
if v, ok := chunk["docnm_kwd"].(string); ok && v != "" {
docName = v
} else if v, ok := chunk["docnm"].(string); ok && v != "" {
docName = v
} else if v, ok := chunk["doc_name"].(string); ok && v != "" {
docName = v
}
docID := ""
if v, ok := chunk["doc_id"].(string); ok && v != "" {
docID = v
}
// Get dataset/kb name from mapping or chunk data
datasetName := ""
datasetID := ""
// First try to get kb_id from chunk (could be string or array)
if v, ok := chunk["kb_id"].(string); ok && v != "" {
datasetID = v
} else if v, ok := chunk["kb_id"].([]interface{}); ok && len(v) > 0 {
if s, ok := v[0].(string); ok {
datasetID = s
}
}
// Look up dataset name from mapping using kb_id
if datasetID != "" && kbIDToName != nil {
if name, ok := kbIDToName[datasetID]; ok && name != "" {
datasetName = name
}
}
// Fallback to kb_name from chunk if mapping doesn't have it
if datasetName == "" {
if v, ok := chunk["kb_name"].(string); ok && v != "" {
datasetName = v
}
}
// Build URI path: prefer names over IDs for readability
// Format: datasets/{dataset_name}/{doc_name}
path := "/datasets"
if datasetName != "" {
path += "/" + datasetName
} else if datasetID != "" {
path += "/" + datasetID
}
if docName != "" {
path += "/" + docName
} else if docID != "" {
path += "/" + docID
}
// Use doc_name or chunk_id as the name if content is empty
name := content
if name == "" {
if docName != "" {
name = docName
} else if chunkID != "" {
name = "chunk:" + chunkID[:min(len(chunkID), 16)]
} else {
name = "(empty)"
}
}
node := &Node{
Name: name,
Path: path,
Type: NodeTypeDocument,
Metadata: chunk,
}
// Parse timestamps if available
if createTime, ok := chunk["create_time"]; ok {
node.CreatedAt = parseTime(createTime)
}
if updateTime, ok := chunk["update_time"]; ok {
node.UpdatedAt = parseTime(updateTime)
}
return node
}
// chunkToNode converts a chunk map to a Node (legacy, uses chunk data only)
func (p *DatasetProvider) chunkToNode(chunk map[string]interface{}) *Node {
return p.chunkToNodeWithKBMapping(chunk, nil)
}
// ==================== Document Operations ====================
func (p *DatasetProvider) listDocuments(ctx stdctx.Context, datasetName string, opts *ListOptions) (*Result, error) {
// First get the dataset ID
ds, err := p.getDataset(ctx, datasetName)
if err != nil {
return nil, err
}
datasetID := getString(ds.Metadata["id"])
if datasetID == "" {
return nil, fmt.Errorf("dataset ID not found")
}
// Build query parameters
params := make(map[string]string)
if opts != nil {
if opts.Limit > 0 {
params["page_size"] = fmt.Sprintf("%d", opts.Limit)
}
if opts.Offset > 0 {
params["page"] = fmt.Sprintf("%d", opts.Offset/opts.Limit+1)
}
}
path := fmt.Sprintf("/datasets/%s/documents", datasetID)
resp, err := p.httpClient.Request("GET", path, true, "auto", params, nil)
if err != nil {
return nil, err
}
var apiResp struct {
Code int `json:"code"`
Data struct {
Docs []map[string]interface{} `json:"docs"`
} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
nodes := make([]*Node, 0, len(apiResp.Data.Docs))
for _, doc := range apiResp.Data.Docs {
node := p.documentToNode(doc, datasetName)
nodes = append(nodes, node)
}
return &Result{
Nodes: nodes,
Total: len(nodes),
}, nil
}
func (p *DatasetProvider) getDocumentNode(ctx stdctx.Context, datasetName, docName string) (*Result, error) {
node, err := p.getDocument(ctx, datasetName, docName)
if err != nil {
return nil, err
}
return &Result{
Nodes: []*Node{node},
Total: 1,
}, nil
}
func (p *DatasetProvider) getDocument(ctx stdctx.Context, datasetName, docName string) (*Node, error) {
// List all documents and find the matching one
result, err := p.listDocuments(ctx, datasetName, nil)
if err != nil {
return nil, err
}
for _, node := range result.Nodes {
if node.Name == docName {
return node, nil
}
}
return nil, fmt.Errorf("%s: document '%s' in dataset '%s'", ErrNotFound, docName, datasetName)
}
func (p *DatasetProvider) searchDocuments(ctx stdctx.Context, datasetName string, opts *SearchOptions) (*Result, error) {
// If no query is provided, just list documents
if opts.Query == "" {
return p.listDocuments(ctx, datasetName, &ListOptions{
Limit: opts.Limit,
Offset: opts.Offset,
})
}
// Use retrieval API for semantic search in specific dataset
ds, err := p.getDataset(ctx, datasetName)
if err != nil {
return nil, err
}
kbID := getString(ds.Metadata["id"])
if kbID == "" {
return nil, fmt.Errorf("dataset ID not found for '%s'", datasetName)
}
// Build kb_id -> dataset name mapping
kbIDToName := map[string]string{kbID: datasetName}
// Build retrieval request for specific dataset
payload := map[string]interface{}{
"kb_id": []string{kbID},
"question": opts.Query,
}
// Set top_k (default to 10 if not specified)
topK := opts.TopK
if topK <= 0 {
topK = 10
}
payload["top_k"] = topK
// Set similarity threshold (default to 0.2 if not specified to match UI behavior)
threshold := opts.Threshold
if threshold <= 0 {
threshold = 0.2
}
payload["similarity_threshold"] = threshold
// Call retrieval API (useAPIBase=false because the route is /v1/chunk/retrieval_test, not /api/v1/...)
resp, err := p.httpClient.Request("POST", "/chunk/retrieval_test", false, "auto", nil, payload)
if err != nil {
return nil, fmt.Errorf("retrieval request failed: %w", err)
}
var apiResp struct {
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
// Parse chunks from response
var nodes []*Node
if chunksData, ok := apiResp.Data["chunks"].([]interface{}); ok {
for _, chunk := range chunksData {
if chunkMap, ok := chunk.(map[string]interface{}); ok {
node := p.chunkToNodeWithKBMapping(chunkMap, kbIDToName)
nodes = append(nodes, node)
}
}
}
// Apply top_k limit if specified (API may return more results)
if topK > 0 && len(nodes) > topK {
nodes = nodes[:topK]
}
return &Result{
Nodes: nodes,
Total: len(nodes),
}, nil
}
// ==================== Helper Functions ====================
func (p *DatasetProvider) datasetToNode(ds map[string]interface{}) *Node {
name := getString(ds["name"])
node := &Node{
Name: name,
Path: "/datasets/" + name,
Type: NodeTypeDirectory,
Metadata: ds,
}
// Parse timestamps - try multiple field names
if createTime, ok := ds["create_time"]; ok && createTime != nil {
node.CreatedAt = parseTime(createTime)
} else if createDate, ok := ds["create_date"]; ok && createDate != nil {
node.CreatedAt = parseTime(createDate)
}
if updateTime, ok := ds["update_time"]; ok && updateTime != nil {
node.UpdatedAt = parseTime(updateTime)
} else if updateDate, ok := ds["update_date"]; ok && updateDate != nil {
node.UpdatedAt = parseTime(updateDate)
}
return node
}
func (p *DatasetProvider) documentToNode(doc map[string]interface{}, datasetName string) *Node {
name := getString(doc["name"])
node := &Node{
Name: name,
Path: "datasets/" + datasetName + "/" + name,
Type: NodeTypeDocument,
Metadata: doc,
}
// Parse size
if size, ok := doc["size"]; ok {
node.Size = int64(getFloat(size))
}
// Parse timestamps
if createTime, ok := doc["create_time"]; ok {
node.CreatedAt = parseTime(createTime)
}
if updateTime, ok := doc["update_time"]; ok {
node.UpdatedAt = parseTime(updateTime)
}
return node
}
func getString(v interface{}) string {
if v == nil {
return ""
}
if s, ok := v.(string); ok {
return s
}
return fmt.Sprintf("%v", v)
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func getFloat(v interface{}) float64 {
if v == nil {
return 0
}
switch val := v.(type) {
case float64:
return val
case float32:
return float64(val)
case int:
return float64(val)
case int64:
return float64(val)
default:
return 0
}
}
func parseTime(v interface{}) time.Time {
if v == nil {
return time.Time{}
}
var ts int64
switch val := v.(type) {
case float64:
ts = int64(val)
case int64:
ts = val
case int:
ts = int64(val)
case string:
// Trim quotes if present
val = strings.Trim(val, `"`)
// Try to parse as number (timestamp)
if parsed, err := strconv.ParseInt(val, 10, 64); err == nil {
ts = parsed
} else {
// If it's already a formatted date string, try parsing it
formats := []string{
"2006-01-02 15:04:05",
"2006-01-02T15:04:05",
"2006-01-02T15:04:05Z",
"2006-01-02",
}
for _, format := range formats {
if t, err := time.Parse(format, val); err == nil {
return t
}
}
return time.Time{}
}
default:
return time.Time{}
}
// Convert milliseconds to seconds if timestamp is in milliseconds (13 digits)
if ts > 1e12 {
ts = ts / 1000
}
return time.Unix(ts, 0)
}

View File

@ -0,0 +1,312 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import (
stdctx "context"
"fmt"
"strings"
"time"
)
// Engine is the core of the Context Engine
// It manages providers and routes commands to the appropriate provider
type Engine struct {
providers []Provider
}
// NewEngine creates a new Context Engine
func NewEngine() *Engine {
return &Engine{
providers: make([]Provider, 0),
}
}
// RegisterProvider registers a provider with the engine
func (e *Engine) RegisterProvider(provider Provider) {
e.providers = append(e.providers, provider)
}
// GetProviders returns all registered providers
func (e *Engine) GetProviders() []ProviderInfo {
infos := make([]ProviderInfo, 0, len(e.providers))
for _, p := range e.providers {
infos = append(infos, ProviderInfo{
Name: p.Name(),
Description: p.Description(),
})
}
return infos
}
// Execute executes a command and returns the result
func (e *Engine) Execute(ctx stdctx.Context, cmd *Command) (*Result, error) {
switch cmd.Type {
case CommandList:
return e.List(ctx, cmd.Path, parseListOptions(cmd.Params))
case CommandSearch:
return e.Search(ctx, cmd.Path, parseSearchOptions(cmd.Params))
case CommandCat:
_, err := e.Cat(ctx, cmd.Path)
return nil, err
default:
return nil, fmt.Errorf("unknown command type: %s", cmd.Type)
}
}
// resolveProvider finds the provider for a given path
func (e *Engine) resolveProvider(path string) (Provider, string, error) {
path = normalizePath(path)
for _, provider := range e.providers {
if provider.Supports(path) {
// Parse the subpath relative to the provider root
// Get provider name to calculate subPath
providerName := provider.Name()
var subPath string
if path == providerName {
subPath = ""
} else if strings.HasPrefix(path, providerName+"/") {
subPath = path[len(providerName)+1:]
} else {
subPath = path
}
return provider, subPath, nil
}
}
// If no provider supports this path, check if FileProvider can handle it as a fallback
// This allows paths like "myskills" to be treated as "files/myskills"
if fileProvider := e.getFileProvider(); fileProvider != nil {
// Check if the path looks like a file manager path (single component, not matching other providers)
parts := SplitPath(path)
if len(parts) > 0 && parts[0] != "datasets" {
return fileProvider, path, nil
}
}
return nil, "", fmt.Errorf("%s: %s", ErrProviderNotFound, path)
}
// List lists nodes at the given path
// If path is empty, returns:
// 1. Built-in providers (e.g., datasets)
// 2. Top-level directories from files provider (if any)
func (e *Engine) List(ctx stdctx.Context, path string, opts *ListOptions) (*Result, error) {
// Normalize path
path = normalizePath(path)
// If path is empty, return list of providers and files root directories
if path == "" || path == "/" {
return e.listRoot(ctx, opts)
}
provider, subPath, err := e.resolveProvider(path)
if err != nil {
// If not found, try to find in files provider as a fallback
// This allows "ls myfolder" to work as "ls files/myfolder"
if fileProvider := e.getFileProvider(); fileProvider != nil {
result, ferr := fileProvider.List(ctx, path, opts)
if ferr == nil {
return result, nil
}
}
return nil, err
}
return provider.List(ctx, subPath, opts)
}
// listRoot returns the root listing:
// 1. Built-in providers (datasets, etc.)
// 2. Top-level folders from files provider (file_manager)
func (e *Engine) listRoot(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
nodes := make([]*Node, 0)
// Add built-in providers first (like datasets)
for _, p := range e.providers {
// Skip files provider from this list - we'll add its children instead
if p.Name() == "files" {
continue
}
nodes = append(nodes, &Node{
Name: p.Name(),
Path: "/" + p.Name(),
Type: NodeTypeDirectory,
CreatedAt: time.Now(),
Metadata: map[string]interface{}{
"description": p.Description(),
},
})
}
// Add top-level folders from files provider (file_manager)
if fileProvider := e.getFileProvider(); fileProvider != nil {
filesResult, err := fileProvider.List(ctx, "", opts)
if err == nil {
for _, node := range filesResult.Nodes {
// Only add folders (directories), not files
if node.Type == NodeTypeDirectory {
// Ensure path doesn't have /files/ prefix for display
node.Path = strings.TrimPrefix(node.Path, "files/")
node.Path = strings.TrimPrefix(node.Path, "/")
nodes = append(nodes, node)
}
}
}
}
return &Result{
Nodes: nodes,
Total: len(nodes),
}, nil
}
// getFileProvider returns the files provider if registered
func (e *Engine) getFileProvider() Provider {
for _, p := range e.providers {
if p.Name() == "files" {
return p
}
}
return nil
}
// Search searches for nodes matching the query
func (e *Engine) Search(ctx stdctx.Context, path string, opts *SearchOptions) (*Result, error) {
provider, subPath, err := e.resolveProvider(path)
if err != nil {
return nil, err
}
return provider.Search(ctx, subPath, opts)
}
// Cat retrieves the content of a file/document
func (e *Engine) Cat(ctx stdctx.Context, path string) ([]byte, error) {
provider, subPath, err := e.resolveProvider(path)
if err != nil {
// If not found, try to find in files provider as a fallback
// This allows "cat myfolder/file.txt" to work as "cat files/myfolder/file.txt"
if fileProvider := e.getFileProvider(); fileProvider != nil {
return fileProvider.Cat(ctx, path)
}
return nil, err
}
return provider.Cat(ctx, subPath)
}
// ParsePath parses a path and returns path information
func (e *Engine) ParsePath(path string) (*PathInfo, error) {
path = normalizePath(path)
components := SplitPath(path)
if len(components) == 0 {
return nil, fmt.Errorf("empty path")
}
providerName := components[0]
isRoot := len(components) == 1
// Find the provider
var provider Provider
for _, p := range e.providers {
if p.Name() == providerName || strings.HasPrefix(path, p.Name()) {
provider = p
break
}
}
if provider == nil {
return nil, fmt.Errorf("%s: %s", ErrProviderNotFound, path)
}
info := &PathInfo{
Provider: providerName,
Path: path,
Components: components,
IsRoot: isRoot,
}
// Extract resource ID or name if available
if len(components) >= 2 {
info.ResourceName = components[1]
}
return info, nil
}
// parseListOptions parses command params into ListOptions
func parseListOptions(params map[string]interface{}) *ListOptions {
opts := &ListOptions{}
if params == nil {
return opts
}
if recursive, ok := params["recursive"].(bool); ok {
opts.Recursive = recursive
}
if limit, ok := params["limit"].(int); ok {
opts.Limit = limit
}
if offset, ok := params["offset"].(int); ok {
opts.Offset = offset
}
if sortBy, ok := params["sort_by"].(string); ok {
opts.SortBy = sortBy
}
if sortOrder, ok := params["sort_order"].(string); ok {
opts.SortOrder = sortOrder
}
return opts
}
// parseSearchOptions parses command params into SearchOptions
func parseSearchOptions(params map[string]interface{}) *SearchOptions {
opts := &SearchOptions{}
if params == nil {
return opts
}
if query, ok := params["query"].(string); ok {
opts.Query = query
}
if limit, ok := params["limit"].(int); ok {
opts.Limit = limit
}
if offset, ok := params["offset"].(int); ok {
opts.Offset = offset
}
if recursive, ok := params["recursive"].(bool); ok {
opts.Recursive = recursive
}
if topK, ok := params["top_k"].(int); ok {
opts.TopK = topK
}
if threshold, ok := params["threshold"].(float64); ok {
opts.Threshold = threshold
}
if dirs, ok := params["dirs"].([]string); ok {
opts.Dirs = dirs
}
return opts
}

View File

@ -0,0 +1,594 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import (
stdctx "context"
"encoding/json"
"fmt"
"strings"
)
// FileProvider handles file operations using Python backend /files API
// Path structure:
// - files/ -> List root folder contents
// - files/{folder_name}/ -> List folder contents
// - files/{folder_name}/{file_name} -> Get file info/content
//
// Note: Uses Python backend API (useAPIBase=true):
// - GET /files?parent_id={id} -> List files/folders in parent
// - GET /files/{file_id} -> Get file info
// - POST /files -> Create folder or upload file
// - DELETE /files -> Delete files
// - GET /files/{file_id}/parent -> Get parent folder
// - GET /files/{file_id}/ancestors -> Get ancestor folders
type FileProvider struct {
BaseProvider
httpClient HTTPClientInterface
folderCache map[string]string // path -> folder ID cache
rootID string // root folder ID
}
// NewFileProvider creates a new FileProvider
func NewFileProvider(httpClient HTTPClientInterface) *FileProvider {
return &FileProvider{
BaseProvider: BaseProvider{
name: "files",
description: "File manager provider (Python server)",
rootPath: "files",
},
httpClient: httpClient,
folderCache: make(map[string]string),
}
}
// Supports returns true if this provider can handle the given path
func (p *FileProvider) Supports(path string) bool {
normalized := normalizePath(path)
return normalized == "files" || strings.HasPrefix(normalized, "files/")
}
// List lists nodes at the given path
// Path structure: files/ or files/{folder_name}/ or files/{folder_name}/{sub_path}/...
func (p *FileProvider) List(ctx stdctx.Context, subPath string, opts *ListOptions) (*Result, error) {
// subPath is the path relative to "files/"
// Empty subPath means list root folder
if subPath == "" {
return p.listRootFolder(ctx, opts)
}
parts := SplitPath(subPath)
if len(parts) == 1 {
// files/{folder_name} - list contents of this folder
return p.listFolderByName(ctx, parts[0], opts)
}
// For multi-level paths like myskills/skill-name/dir1, recursively traverse
return p.listPathRecursive(ctx, parts, opts)
}
// listPathRecursive recursively traverses the path and lists the final component
func (p *FileProvider) listPathRecursive(ctx stdctx.Context, parts []string, opts *ListOptions) (*Result, error) {
if len(parts) == 0 {
return nil, fmt.Errorf("empty path")
}
// Start from root to find the first folder
currentFolderID, err := p.getFolderIDByName(ctx, parts[0])
if err != nil {
return nil, err
}
currentPath := parts[0]
// Traverse through intermediate directories
for i := 1; i < len(parts); i++ {
partName := parts[i]
// List contents of current folder to find the next part
result, err := p.listFilesByParentID(ctx, currentFolderID, currentPath, nil)
if err != nil {
return nil, err
}
// Find the next component
found := false
for _, node := range result.Nodes {
if node.Name == partName {
if i == len(parts)-1 {
// This is the last component - if it's a directory, list its contents
if node.Type == NodeTypeDirectory {
childID := getString(node.Metadata["id"])
if childID == "" {
return nil, fmt.Errorf("folder ID not found for '%s'", partName)
}
newPath := currentPath + "/" + partName
p.folderCache[newPath] = childID
return p.listFilesByParentID(ctx, childID, newPath, opts)
}
// It's a file - return the file node
return &Result{
Nodes: []*Node{node},
Total: 1,
}, nil
}
// Not the last component - must be a directory
if node.Type != NodeTypeDirectory {
return nil, fmt.Errorf("'%s' is not a directory", partName)
}
childID := getString(node.Metadata["id"])
if childID == "" {
return nil, fmt.Errorf("folder ID not found for '%s'", partName)
}
currentFolderID = childID
currentPath = currentPath + "/" + partName
p.folderCache[currentPath] = currentFolderID
found = true
break
}
}
if !found {
return nil, fmt.Errorf("%s: '%s' in '%s'", ErrNotFound, partName, currentPath)
}
}
// Should have returned in the loop, but just in case
return p.listFilesByParentID(ctx, currentFolderID, currentPath, opts)
}
// Search searches for files/folders
func (p *FileProvider) Search(ctx stdctx.Context, subPath string, opts *SearchOptions) (*Result, error) {
if opts.Query == "" {
return p.List(ctx, subPath, &ListOptions{
Limit: opts.Limit,
Offset: opts.Offset,
})
}
// For now, search is not implemented - just list and filter by name
result, err := p.List(ctx, subPath, &ListOptions{
Limit: opts.Limit,
Offset: opts.Offset,
})
if err != nil {
return nil, err
}
// Simple name filtering
var filtered []*Node
query := strings.ToLower(opts.Query)
for _, node := range result.Nodes {
if strings.Contains(strings.ToLower(node.Name), query) {
filtered = append(filtered, node)
}
}
return &Result{
Nodes: filtered,
Total: len(filtered),
}, nil
}
// Cat retrieves file content
func (p *FileProvider) Cat(ctx stdctx.Context, subPath string) ([]byte, error) {
if subPath == "" {
return nil, fmt.Errorf("cat requires a file path: files/{folder}/{file}")
}
parts := SplitPath(subPath)
if len(parts) < 2 {
return nil, fmt.Errorf("invalid path format, expected: files/{folder}/{file}")
}
// Find the file by recursively traversing the path
node, err := p.findNodeByPath(ctx, parts)
if err != nil {
return nil, err
}
if node.Type == NodeTypeDirectory {
return nil, fmt.Errorf("'%s' is a directory, not a file", subPath)
}
fileID := getString(node.Metadata["id"])
if fileID == "" {
return nil, fmt.Errorf("file ID not found")
}
// Download file content
return p.downloadFile(ctx, fileID)
}
// findNodeByPath recursively traverses the path to find the target node
func (p *FileProvider) findNodeByPath(ctx stdctx.Context, parts []string) (*Node, error) {
if len(parts) == 0 {
return nil, fmt.Errorf("empty path")
}
// Start from root to find the first folder
currentFolderID, err := p.getFolderIDByName(ctx, parts[0])
if err != nil {
return nil, err
}
currentPath := parts[0]
// Traverse through intermediate directories
for i := 1; i < len(parts); i++ {
partName := parts[i]
// List contents of current folder to find the next part
result, err := p.listFilesByParentID(ctx, currentFolderID, currentPath, nil)
if err != nil {
return nil, err
}
// Find the next component
found := false
for _, node := range result.Nodes {
if node.Name == partName {
if i == len(parts)-1 {
// This is the last component - return it
return node, nil
}
// Not the last component - must be a directory
if node.Type != NodeTypeDirectory {
return nil, fmt.Errorf("'%s' is not a directory", partName)
}
childID := getString(node.Metadata["id"])
if childID == "" {
return nil, fmt.Errorf("folder ID not found for '%s'", partName)
}
currentFolderID = childID
currentPath = currentPath + "/" + partName
p.folderCache[currentPath] = currentFolderID
found = true
break
}
}
if !found {
return nil, fmt.Errorf("%s: '%s' in '%s'", ErrNotFound, partName, currentPath)
}
}
return nil, fmt.Errorf("%s: '%s'", ErrNotFound, strings.Join(parts, "/"))
}
// ==================== Python Server API Methods ====================
// getRootID gets or caches the root folder ID
func (p *FileProvider) getRootID(ctx stdctx.Context) (string, error) {
if p.rootID != "" {
return p.rootID, nil
}
// List files without parent_id to get root folder
resp, err := p.httpClient.Request("GET", "/files", true, "auto", nil, nil)
if err != nil {
return "", err
}
var apiResp struct {
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return "", err
}
if apiResp.Code != 0 {
return "", fmt.Errorf("API error: %s", apiResp.Message)
}
// Try to find root folder ID from response
if rootID, ok := apiResp.Data["root_id"].(string); ok && rootID != "" {
p.rootID = rootID
return rootID, nil
}
// If no explicit root_id, use empty parent_id for root listing
return "", nil
}
// listRootFolder lists the contents of root folder
func (p *FileProvider) listRootFolder(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
// Get root folder ID first
rootID, err := p.getRootID(ctx)
if err != nil {
return nil, err
}
// List files using root folder ID as parent
return p.listFilesByParentID(ctx, rootID, "", opts)
}
// listFilesByParentID lists files/folders by parent ID
func (p *FileProvider) listFilesByParentID(ctx stdctx.Context, parentID string, parentPath string, opts *ListOptions) (*Result, error) {
// Build query parameters
queryParams := make([]string, 0)
if parentID != "" {
queryParams = append(queryParams, fmt.Sprintf("parent_id=%s", parentID))
}
// Always set page=1 and page_size to ensure we get results
pageSize := 100
if opts != nil && opts.Limit > 0 {
pageSize = opts.Limit
}
queryParams = append(queryParams, fmt.Sprintf("page_size=%d", pageSize))
queryParams = append(queryParams, "page=1")
// Build URL with query string
path := "/files"
if len(queryParams) > 0 {
path = path + "?" + strings.Join(queryParams, "&")
}
resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
if err != nil {
return nil, err
}
var apiResp struct {
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
// Extract files list from data - API returns {"total": N, "files": [...], "parent_folder": {...}}
var files []map[string]interface{}
if fileList, ok := apiResp.Data["files"].([]interface{}); ok {
for _, f := range fileList {
if fileMap, ok := f.(map[string]interface{}); ok {
files = append(files, fileMap)
}
}
}
nodes := make([]*Node, 0, len(files))
for _, f := range files {
name := getString(f["name"])
// Skip hidden .knowledgebase folder
if strings.TrimSpace(name) == ".knowledgebase" {
continue
}
node := p.fileToNode(f, parentPath)
nodes = append(nodes, node)
// Cache folder ID
if node.Type == NodeTypeDirectory || getString(f["type"]) == "folder" {
if id := getString(f["id"]); id != "" {
cacheKey := node.Name
if parentPath != "" {
cacheKey = parentPath + "/" + node.Name
}
p.folderCache[cacheKey] = id
}
}
}
return &Result{
Nodes: nodes,
Total: len(nodes),
}, nil
}
// listFolderByName lists contents of a folder by its name
func (p *FileProvider) listFolderByName(ctx stdctx.Context, folderName string, opts *ListOptions) (*Result, error) {
folderID, err := p.getFolderIDByName(ctx, folderName)
if err != nil {
return nil, err
}
// List files in the folder using folder ID as parent_id
return p.listFilesByParentID(ctx, folderID, folderName, opts)
}
// getFolderIDByName finds folder ID by its name in root
func (p *FileProvider) getFolderIDByName(ctx stdctx.Context, folderName string) (string, error) {
// Check cache first
if id, ok := p.folderCache[folderName]; ok {
return id, nil
}
// List root folder to find the folder
rootID, _ := p.getRootID(ctx)
queryParams := make([]string, 0)
if rootID != "" {
queryParams = append(queryParams, fmt.Sprintf("parent_id=%s", rootID))
}
queryParams = append(queryParams, "page_size=100", "page=1")
path := "/files"
if len(queryParams) > 0 {
path = path + "?" + strings.Join(queryParams, "&")
}
resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
if err != nil {
return "", err
}
var apiResp struct {
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return "", err
}
if apiResp.Code != 0 {
return "", fmt.Errorf("API error: %s", apiResp.Message)
}
// Search for folder by name
var files []map[string]interface{}
if fileList, ok := apiResp.Data["files"].([]interface{}); ok {
for _, f := range fileList {
if fileMap, ok := f.(map[string]interface{}); ok {
files = append(files, fileMap)
}
}
} else if fileList, ok := apiResp.Data["docs"].([]interface{}); ok {
for _, f := range fileList {
if fileMap, ok := f.(map[string]interface{}); ok {
files = append(files, fileMap)
}
}
}
for _, f := range files {
name := getString(f["name"])
fileType := getString(f["type"])
id := getString(f["id"])
// Match by name and ensure it's a folder
if name == folderName && fileType == "folder" && id != "" {
p.folderCache[folderName] = id
return id, nil
}
}
return "", fmt.Errorf("%s: folder '%s'", ErrNotFound, folderName)
}
// getFileNode gets a file node by folder and file name
// If fileName is a directory, returns the directory contents instead of the directory node
func (p *FileProvider) getFileNode(ctx stdctx.Context, folderName, fileName string) (*Result, error) {
folderID, err := p.getFolderIDByName(ctx, folderName)
if err != nil {
return nil, err
}
// List files in folder to find the file
result, err := p.listFilesByParentID(ctx, folderID, folderName, nil)
if err != nil {
return nil, err
}
// Find the specific file
for _, node := range result.Nodes {
if node.Name == fileName {
// If it's a directory, list its contents instead of returning the node itself
if node.Type == NodeTypeDirectory {
childFolderID := getString(node.Metadata["id"])
if childFolderID == "" {
return nil, fmt.Errorf("folder ID not found for '%s'", fileName)
}
// Cache the folder ID
cacheKey := folderName + "/" + fileName
p.folderCache[cacheKey] = childFolderID
// Return directory contents
return p.listFilesByParentID(ctx, childFolderID, cacheKey, nil)
}
// Return file node
return &Result{
Nodes: []*Node{node},
Total: 1,
}, nil
}
}
return nil, fmt.Errorf("%s: file '%s' in folder '%s'", ErrNotFound, fileName, folderName)
}
// downloadFile downloads file content
func (p *FileProvider) downloadFile(ctx stdctx.Context, fileID string) ([]byte, error) {
path := fmt.Sprintf("/files/%s", fileID)
resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
// Try to parse error response
var apiResp struct {
Code int `json:"code"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err == nil && apiResp.Code != 0 {
return nil, fmt.Errorf("%s", apiResp.Message)
}
return nil, fmt.Errorf("HTTP error %d", resp.StatusCode)
}
// Return raw file content
return resp.Body, nil
}
// ==================== Conversion Functions ====================
// fileToNode converts a file map to a Node
func (p *FileProvider) fileToNode(f map[string]interface{}, parentPath string) *Node {
name := getString(f["name"])
fileType := getString(f["type"])
fileID := getString(f["id"])
// Determine node type
nodeType := NodeTypeFile
if fileType == "folder" {
nodeType = NodeTypeDirectory
}
// Build path
path := name
if parentPath != "" {
path = parentPath + "/" + name
}
node := &Node{
Name: name,
Path: path,
Type: nodeType,
Metadata: f,
}
// Parse size
if size, ok := f["size"]; ok {
node.Size = int64(getFloat(size))
}
// Parse timestamps
if createTime, ok := f["create_time"]; ok && createTime != nil {
node.CreatedAt = parseTime(createTime)
}
if updateTime, ok := f["update_time"]; ok && updateTime != nil {
node.UpdatedAt = parseTime(updateTime)
}
// Store ID for later use
if fileID != "" {
if node.Metadata == nil {
node.Metadata = make(map[string]interface{})
}
node.Metadata["id"] = fileID
}
return node
}

View File

@ -0,0 +1,180 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import (
stdctx "context"
)
// Provider is the interface for all context providers
// Each provider handles a specific resource type (datasets, chats, agents, etc.)
type Provider interface {
// Name returns the provider name (e.g., "datasets", "chats")
Name() string
// Description returns a human-readable description of the provider
Description() string
// Supports returns true if this provider can handle the given path
Supports(path string) bool
// List lists nodes at the given path
List(ctx stdctx.Context, path string, opts *ListOptions) (*Result, error)
// Search searches for nodes matching the query under the given path
Search(ctx stdctx.Context, path string, opts *SearchOptions) (*Result, error)
// Cat retrieves the content of a file/document at the given path
Cat(ctx stdctx.Context, path string) ([]byte, error)
}
// BaseProvider provides common functionality for all providers
type BaseProvider struct {
name string
description string
rootPath string
}
// Name returns the provider name
func (p *BaseProvider) Name() string {
return p.name
}
// Description returns the provider description
func (p *BaseProvider) Description() string {
return p.description
}
// GetRootPath returns the root path for this provider
func (p *BaseProvider) GetRootPath() string {
return p.rootPath
}
// IsRootPath checks if the given path is the root path for this provider
func (p *BaseProvider) IsRootPath(path string) bool {
return normalizePath(path) == normalizePath(p.rootPath)
}
// ParsePath parses a path and returns the subpath relative to the provider root
func (p *BaseProvider) ParsePath(path string) string {
normalized := normalizePath(path)
rootNormalized := normalizePath(p.rootPath)
if normalized == rootNormalized {
return ""
}
if len(normalized) > len(rootNormalized) && normalized[:len(rootNormalized)+1] == rootNormalized+"/" {
return normalized[len(rootNormalized)+1:]
}
return normalized
}
// SplitPath splits a path into components
func SplitPath(path string) []string {
path = normalizePath(path)
if path == "" {
return []string{}
}
parts := splitString(path, '/')
result := make([]string, 0, len(parts))
for _, part := range parts {
if part != "" {
result = append(result, part)
}
}
return result
}
// normalizePath normalizes a path (removes leading/trailing slashes, handles "." and "..")
func normalizePath(path string) string {
path = trimSpace(path)
if path == "" {
return ""
}
// Remove leading slashes
for len(path) > 0 && path[0] == '/' {
path = path[1:]
}
// Remove trailing slashes
for len(path) > 0 && path[len(path)-1] == '/' {
path = path[:len(path)-1]
}
// Handle "." and ".."
parts := splitString(path, '/')
result := make([]string, 0, len(parts))
for _, part := range parts {
switch part {
case "", ".":
// Skip empty and current directory
continue
case "..":
// Go up one directory
if len(result) > 0 {
result = result[:len(result)-1]
}
default:
result = append(result, part)
}
}
return joinStrings(result, "/")
}
// Helper functions to avoid importing strings package in basic operations
func trimSpace(s string) string {
start := 0
end := len(s)
for start < end && (s[start] == ' ' || s[start] == '\t' || s[start] == '\n' || s[start] == '\r') {
start++
}
for end > start && (s[end-1] == ' ' || s[end-1] == '\t' || s[end-1] == '\n' || s[end-1] == '\r') {
end--
}
return s[start:end]
}
func splitString(s string, sep byte) []string {
var result []string
start := 0
for i := 0; i < len(s); i++ {
if s[i] == sep {
result = append(result, s[start:i])
start = i + 1
}
}
result = append(result, s[start:])
return result
}
func joinStrings(strs []string, sep string) string {
if len(strs) == 0 {
return ""
}
if len(strs) == 1 {
return strs[0]
}
result := strs[0]
for i := 1; i < len(strs); i++ {
result += sep + strs[i]
}
return result
}

View File

@ -0,0 +1,116 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import "time"
// NodeType represents the type of a node in the context filesystem
type NodeType string
const (
NodeTypeDirectory NodeType = "directory"
NodeTypeFile NodeType = "file"
NodeTypeDataset NodeType = "dataset"
NodeTypeDocument NodeType = "document"
NodeTypeChat NodeType = "chat"
NodeTypeAgent NodeType = "agent"
NodeTypeUnknown NodeType = "unknown"
)
// Node represents a node in the context filesystem
// This is the unified output format for all providers
type Node struct {
Name string `json:"name"`
Path string `json:"path"`
Type NodeType `json:"type"`
Size int64 `json:"size,omitempty"`
CreatedAt time.Time `json:"created_at,omitempty"`
UpdatedAt time.Time `json:"updated_at,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
// CommandType represents the type of command
type CommandType string
const (
CommandList CommandType = "ls"
CommandSearch CommandType = "search"
CommandCat CommandType = "cat"
)
// Command represents a context engine command
type Command struct {
Type CommandType `json:"type"`
Path string `json:"path"`
Params map[string]interface{} `json:"params,omitempty"`
}
// ListOptions represents options for list operations
type ListOptions struct {
Recursive bool `json:"recursive,omitempty"`
Limit int `json:"limit,omitempty"`
Offset int `json:"offset,omitempty"`
SortBy string `json:"sort_by,omitempty"`
SortOrder string `json:"sort_order,omitempty"` // "asc" or "desc"
}
// SearchOptions represents options for search operations
type SearchOptions struct {
Query string `json:"query"`
Limit int `json:"limit,omitempty"`
Offset int `json:"offset,omitempty"`
Recursive bool `json:"recursive,omitempty"`
TopK int `json:"top_k,omitempty"` // Number of top results to return (default: 10)
Threshold float64 `json:"threshold,omitempty"` // Similarity threshold (default: 0.2)
Dirs []string `json:"dirs,omitempty"` // List of directories to search in
}
// Result represents the result of a command execution
type Result struct {
Nodes []*Node `json:"nodes"`
Total int `json:"total"`
HasMore bool `json:"has_more"`
NextOffset int `json:"next_offset,omitempty"`
Error error `json:"-"`
}
// PathInfo represents parsed path information
type PathInfo struct {
Provider string // The provider name (e.g., "datasets", "chats")
Path string // The full path
Components []string // Path components
IsRoot bool // Whether this is the root path for the provider
ResourceID string // Resource ID if applicable
ResourceName string // Resource name if applicable
}
// ProviderInfo holds metadata about a provider
type ProviderInfo struct {
Name string `json:"name"`
Description string `json:"description"`
RootPath string `json:"root_path"`
}
// Common error messages
const (
ErrInvalidPath = "invalid path"
ErrProviderNotFound = "provider not found for path"
ErrNotSupported = "operation not supported"
ErrNotFound = "resource not found"
ErrUnauthorized = "unauthorized"
ErrInternal = "internal error"
)

View File

@ -0,0 +1,304 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import (
"encoding/json"
"fmt"
"time"
)
// FormatNode formats a node for display
func FormatNode(node *Node, format string) map[string]interface{} {
switch format {
case "json":
return map[string]interface{}{
"name": node.Name,
"path": node.Path,
"type": string(node.Type),
"size": node.Size,
"created_at": node.CreatedAt.Format(time.RFC3339),
"updated_at": node.UpdatedAt.Format(time.RFC3339),
}
case "table":
return map[string]interface{}{
"name": node.Name,
"path": node.Path,
"type": string(node.Type),
"size": formatSize(node.Size),
"created_at": formatTime(node.CreatedAt),
"updated_at": formatTime(node.UpdatedAt),
}
default: // "plain"
return map[string]interface{}{
"name": node.Name,
"path": node.Path,
"type": string(node.Type),
"created_at": formatTime(node.CreatedAt),
"updated_at": formatTime(node.UpdatedAt),
}
}
}
// FormatNodes formats a list of nodes for display
func FormatNodes(nodes []*Node, format string) []map[string]interface{} {
result := make([]map[string]interface{}, 0, len(nodes))
for _, node := range nodes {
result = append(result, FormatNode(node, format))
}
return result
}
// formatSize formats a size in bytes to human-readable format
func formatSize(size int64) string {
if size == 0 {
return "-"
}
const (
KB = 1024
MB = 1024 * KB
GB = 1024 * MB
TB = 1024 * GB
)
switch {
case size >= TB:
return fmt.Sprintf("%.2f TB", float64(size)/TB)
case size >= GB:
return fmt.Sprintf("%.2f GB", float64(size)/GB)
case size >= MB:
return fmt.Sprintf("%.2f MB", float64(size)/MB)
case size >= KB:
return fmt.Sprintf("%.2f KB", float64(size)/KB)
default:
return fmt.Sprintf("%d B", size)
}
}
// formatTime formats a time to a readable string
func formatTime(t time.Time) string {
if t.IsZero() {
return "-"
}
return t.Format("2006-01-02 15:04:05")
}
// ResultToMap converts a Result to a map for JSON serialization
func ResultToMap(result *Result) map[string]interface{} {
if result == nil {
return map[string]interface{}{
"nodes": []interface{}{},
"total": 0,
}
}
nodes := make([]map[string]interface{}, 0, len(result.Nodes))
for _, node := range result.Nodes {
nodes = append(nodes, nodeToMap(node))
}
return map[string]interface{}{
"nodes": nodes,
"total": result.Total,
"has_more": result.HasMore,
"next_offset": result.NextOffset,
}
}
// nodeToMap converts a Node to a map
func nodeToMap(node *Node) map[string]interface{} {
m := map[string]interface{}{
"name": node.Name,
"path": node.Path,
"type": string(node.Type),
}
if node.Size > 0 {
m["size"] = node.Size
}
if !node.CreatedAt.IsZero() {
m["created_at"] = node.CreatedAt.Format(time.RFC3339)
}
if !node.UpdatedAt.IsZero() {
m["updated_at"] = node.UpdatedAt.Format(time.RFC3339)
}
if len(node.Metadata) > 0 {
m["metadata"] = node.Metadata
}
return m
}
// MarshalJSON marshals a Result to JSON bytes
func (r *Result) MarshalJSON() ([]byte, error) {
return json.Marshal(ResultToMap(r))
}
// PrintResult prints a result in the specified format
func PrintResult(result *Result, format string) {
if result == nil {
fmt.Println("No results")
return
}
switch format {
case "json":
data, _ := json.MarshalIndent(ResultToMap(result), "", " ")
fmt.Println(string(data))
case "table":
printTable(result.Nodes)
default: // "plain"
for _, node := range result.Nodes {
fmt.Println(node.Path)
}
}
}
// printTable prints nodes in a simple table format
func printTable(nodes []*Node) {
if len(nodes) == 0 {
fmt.Println("No results")
return
}
// Print header
fmt.Printf("%-40s %-12s %-12s %-20s %-20s\n", "NAME", "TYPE", "SIZE", "CREATED", "UPDATED")
fmt.Println(string(make([]byte, 104)))
// Print rows
for _, node := range nodes {
fmt.Printf("%-40s %-12s %-12s %-20s %-20s\n",
truncateString(node.Name, 40),
node.Type,
formatSize(node.Size),
formatTime(node.CreatedAt),
formatTime(node.UpdatedAt),
)
}
}
// truncateString truncates a string to the specified length
func truncateString(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen-3] + "..."
}
// IsValidPath checks if a path is valid
func IsValidPath(path string) bool {
if path == "" {
return false
}
// Check for invalid characters
invalidChars := []string{"..", "//", "\\", "*", "?", "<", ">", "|", "\x00"}
for _, char := range invalidChars {
if containsString(path, char) {
return false
}
}
return true
}
// containsString checks if a string contains a substring
func containsString(s, substr string) bool {
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}
// JoinPath joins path components
func JoinPath(components ...string) string {
if len(components) == 0 {
return ""
}
result := components[0]
for i := 1; i < len(components); i++ {
if result == "" {
result = components[i]
} else if components[i] == "" {
continue
} else {
// Remove trailing slash from result
for len(result) > 0 && result[len(result)-1] == '/' {
result = result[:len(result)-1]
}
// Remove leading slash from component
start := 0
for start < len(components[i]) && components[i][start] == '/' {
start++
}
result = result + "/" + components[i][start:]
}
}
return result
}
// GetParentPath returns the parent path of a given path
func GetParentPath(path string) string {
path = normalizePath(path)
parts := SplitPath(path)
if len(parts) <= 1 {
return ""
}
return joinStrings(parts[:len(parts)-1], "/")
}
// GetBaseName returns the last component of a path
func GetBaseName(path string) string {
path = normalizePath(path)
parts := SplitPath(path)
if len(parts) == 0 {
return ""
}
return parts[len(parts)-1]
}
// HasPrefix checks if a path has the given prefix
func HasPrefix(path, prefix string) bool {
path = normalizePath(path)
prefix = normalizePath(prefix)
if prefix == "" {
return true
}
if path == prefix {
return true
}
if len(path) > len(prefix) && path[:len(prefix)+1] == prefix+"/" {
return true
}
return false
}