Feat: Initialize context engine CLI (#13776)

### What problem does this PR solve? - Add multiple output format to ragflow_cli - Initialize contextengine to Go module - ls datasets/ls files - cat file - search -d dir -q query issue: #13714 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
2026-03-28 01:50:38 +08:00 · 2026-03-26 21:07:06 +08:00
parent 3b1e77a6d4
commit 6e309f9d0a
14 changed files with 3753 additions and 155 deletions
--- a/internal/cli/contextengine/README.md
+++ b/internal/cli/contextengine/README.md
@ -0,0 +1,49 @@
+# ContextFS - Context Engine File System
+
+ContextFS is a context engine interface for RAGFlow, providing users with a Unix-like file system interface to manage datasets, tools, skills, and memories.
+
+## Directory Structure
+
+```
+user_id/
+├── datasets/
+│   └── my_dataset/
+│       └── ...
+├── tools/
+│   ├── registry.json
+│   └── tool_name/
+│       ├── DOC.md
+│       └── ...
+├── skills/
+│   ├── registry.json
+│   └── skill_name/
+│       ├── SKILL.md
+│       └── ...
+└── memories/
+    └── memory_id/
+        ├── sessions/
+        │   ├── messages/
+        │   ├── summaries/
+        │   │   └── session_id/
+        │   │       └── summary-{datetime}.md
+        │   └── tools/
+        │       └── session_id/
+        │           └── {tool_name}.md          # User level of memory on Tools usage
+        ├── users/
+        │   ├── profile.md
+        │   ├── preferences/
+        │   └── entities/
+        └── agents/
+            └── agent_space/
+                ├── tools/
+                │   └── {tool_name}.md          # Agent level of memory on Tools usage
+                └── skills/
+                    └── {skill_name}.md         # Agent level of memory on Skills usage
+```
+
+
+## Supported Commands
+
+- `ls [path]` - List directory contents
+- `cat <path>` - Display file contents(only for text files)
+- `search <query>` - Search content
--- a/internal/cli/contextengine/dataset_provider.go
+++ b/internal/cli/contextengine/dataset_provider.go
@ -0,0 +1,781 @@
+//
+//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+package contextengine
+
+import (
+	stdctx "context"
+	"encoding/json"
+	"fmt"
+	"strconv"
+	"strings"
+	"time"
+)
+
+// HTTPResponse represents an HTTP response
+type HTTPResponse struct {
+	StatusCode int
+	Body       []byte
+	Headers    map[string][]string
+	Duration   float64
+}
+
+// HTTPClientInterface defines the interface needed from HTTPClient
+type HTTPClientInterface interface {
+	Request(method, path string, useAPIBase bool, authKind string, headers map[string]string, jsonBody map[string]interface{}) (*HTTPResponse, error)
+}
+
+// DatasetProvider handles datasets and their documents
+// Path structure:
+//   - datasets/              -> List all datasets
+//   - datasets/{name}        -> List documents in dataset
+//   - datasets/{name}/{doc_name} -> Get document info
+type DatasetProvider struct {
+	BaseProvider
+	httpClient HTTPClientInterface
+}
+
+// NewDatasetProvider creates a new DatasetProvider
+func NewDatasetProvider(httpClient HTTPClientInterface) *DatasetProvider {
+	return &DatasetProvider{
+		BaseProvider: BaseProvider{
+			name:        "datasets",
+			description: "Dataset management provider",
+			rootPath:    "datasets",
+		},
+		httpClient: httpClient,
+	}
+}
+
+// Supports returns true if this provider can handle the given path
+func (p *DatasetProvider) Supports(path string) bool {
+	normalized := normalizePath(path)
+	return normalized == "datasets" || strings.HasPrefix(normalized, "datasets/")
+}
+
+// List lists nodes at the given path
+func (p *DatasetProvider) List(ctx stdctx.Context, subPath string, opts *ListOptions) (*Result, error) {
+	// subPath is the path relative to "datasets/"
+	// Empty subPath means list all datasets
+	// "{name}/files" means list documents in a dataset
+
+	// Check if trying to access hidden .knowledgebase
+	if subPath == ".knowledgebase" || strings.HasPrefix(subPath, ".knowledgebase/") {
+		return nil, fmt.Errorf("invalid path: .knowledgebase is not accessible")
+	}
+
+	if subPath == "" {
+		return p.listDatasets(ctx, opts)
+	}
+
+	parts := SplitPath(subPath)
+	if len(parts) == 1 {
+		// datasets/{name} - list documents in the dataset (default behavior)
+		return p.listDocuments(ctx, parts[0], opts)
+	}
+
+	if len(parts) == 2 {
+		// datasets/{name}/{doc_name} - get document info
+		return p.getDocumentNode(ctx, parts[0], parts[1])
+	}
+
+	return nil, fmt.Errorf("invalid path: %s", subPath)
+}
+
+// Search searches for datasets or documents
+func (p *DatasetProvider) Search(ctx stdctx.Context, subPath string, opts *SearchOptions) (*Result, error) {
+	if opts.Query == "" {
+		return p.List(ctx, subPath, &ListOptions{
+			Limit:  opts.Limit,
+			Offset: opts.Offset,
+		})
+	}
+
+	// If searching under a specific dataset's files
+	parts := SplitPath(subPath)
+	if len(parts) >= 2 && parts[1] == "files" {
+		datasetName := parts[0]
+		return p.searchDocuments(ctx, datasetName, opts)
+	}
+
+	// Otherwise search datasets
+	return p.searchDatasets(ctx, opts)
+}
+
+// Cat retrieves document content
+// For datasets:
+//   - cat datasets          -> Error: datasets is a directory, not a file
+//   - cat datasets/kb_name  -> Error: kb_name is a directory, not a file
+//   - cat datasets/kb_name/doc_name -> Would retrieve document content (if implemented)
+func (p *DatasetProvider) Cat(ctx stdctx.Context, subPath string) ([]byte, error) {
+	if subPath == "" {
+		return nil, fmt.Errorf("'datasets' is a directory, not a file")
+	}
+
+	parts := SplitPath(subPath)
+	if len(parts) == 1 {
+		// datasets/{name} - this is a dataset (directory)
+		return nil, fmt.Errorf("'%s' is a directory, not a file", parts[0])
+	}
+
+	if len(parts) == 2 {
+		// datasets/{name}/{doc_name} - this could be a document
+		// For now, document content retrieval is not implemented
+		return nil, fmt.Errorf("document content retrieval not yet implemented for '%s'", parts[1])
+	}
+
+	return nil, fmt.Errorf("invalid path for cat: %s", subPath)
+}
+
+// ==================== Dataset Operations ====================
+
+func (p *DatasetProvider) listDatasets(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
+	resp, err := p.httpClient.Request("GET", "/datasets", true, "auto", nil, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var apiResp struct {
+		Code    int                      `json:"code"`
+		Data    []map[string]interface{} `json:"data"`
+		Message string                   `json:"message"`
+	}
+
+	if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
+		return nil, err
+	}
+
+	if apiResp.Code != 0 {
+		return nil, fmt.Errorf("API error: %s", apiResp.Message)
+	}
+
+	nodes := make([]*Node, 0, len(apiResp.Data))
+	for _, ds := range apiResp.Data {
+		node := p.datasetToNode(ds)
+		// Skip hidden .knowledgebase dataset (trim whitespace for safety)
+		if strings.TrimSpace(node.Name) == ".knowledgebase" {
+			continue
+		}
+		nodes = append(nodes, node)
+	}
+
+	total := len(nodes)
+
+	// Apply limit if specified
+	if opts != nil && opts.Limit > 0 && opts.Limit < len(nodes) {
+		nodes = nodes[:opts.Limit]
+	}
+
+	return &Result{
+		Nodes: nodes,
+		Total: total,
+	}, nil
+}
+
+func (p *DatasetProvider) getDataset(ctx stdctx.Context, name string) (*Node, error) {
+	// Check if trying to access hidden .knowledgebase
+	if name == ".knowledgebase" {
+		return nil, fmt.Errorf("invalid path: .knowledgebase is not accessible")
+	}
+
+	// First list all datasets to find the one with matching name
+	resp, err := p.httpClient.Request("GET", "/datasets", true, "auto", nil, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var apiResp struct {
+		Code    int                      `json:"code"`
+		Data    []map[string]interface{} `json:"data"`
+		Message string                   `json:"message"`
+	}
+
+	if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
+		return nil, err
+	}
+
+	if apiResp.Code != 0 {
+		return nil, fmt.Errorf("API error: %s", apiResp.Message)
+	}
+
+	for _, ds := range apiResp.Data {
+		if getString(ds["name"]) == name {
+			return p.datasetToNode(ds), nil
+		}
+	}
+
+	return nil, fmt.Errorf("%s: dataset '%s'", ErrNotFound, name)
+}
+
+func (p *DatasetProvider) searchDatasets(ctx stdctx.Context, opts *SearchOptions) (*Result, error) {
+	// If no query is provided, just list datasets
+	if opts.Query == "" {
+		return p.listDatasets(ctx, &ListOptions{
+			Limit:  opts.Limit,
+			Offset: opts.Offset,
+		})
+	}
+
+	// Use retrieval API for semantic search
+	return p.searchWithRetrieval(ctx, opts)
+}
+
+// searchWithRetrieval performs semantic search using the retrieval API
+func (p *DatasetProvider) searchWithRetrieval(ctx stdctx.Context, opts *SearchOptions) (*Result, error) {
+	// Determine kb_ids to search in
+	var kbIDs []string
+	var datasetsToSearch []*Node
+
+	if len(opts.Dirs) > 0 && opts.Dirs[0] != "datasets" {
+		// Search in specific datasets
+		for _, dir := range opts.Dirs {
+			// Extract dataset name from path (e.g., "datasets/kb1" -> "kb1")
+			datasetName := dir
+			if strings.HasPrefix(dir, "datasets/") {
+				datasetName = dir[len("datasets/"):]
+			}
+			ds, err := p.getDataset(ctx, datasetName)
+			if err != nil {
+				// Try case-insensitive match
+				allResult, listErr := p.listDatasets(ctx, nil)
+				if listErr == nil {
+					for _, d := range allResult.Nodes {
+						if strings.EqualFold(d.Name, datasetName) {
+							ds = d
+							err = nil
+							break
+						}
+					}
+				}
+				if err != nil {
+					return nil, fmt.Errorf("dataset not found: %s", datasetName)
+				}
+			}
+			datasetsToSearch = append(datasetsToSearch, ds)
+			kbID := getString(ds.Metadata["id"])
+			if kbID != "" {
+				kbIDs = append(kbIDs, kbID)
+			}
+		}
+	} else {
+		// Search in all datasets
+		allResult, err := p.listDatasets(ctx, nil)
+		if err != nil {
+			return nil, err
+		}
+		datasetsToSearch = allResult.Nodes
+		for _, ds := range datasetsToSearch {
+			kbID := getString(ds.Metadata["id"])
+			if kbID != "" {
+				kbIDs = append(kbIDs, kbID)
+			}
+		}
+	}
+
+	if len(kbIDs) == 0 {
+		return &Result{
+			Nodes: []*Node{},
+			Total: 0,
+		}, nil
+	}
+
+	// Build kb_id -> dataset name mapping
+	kbIDToName := make(map[string]string)
+	for _, ds := range datasetsToSearch {
+		kbID := getString(ds.Metadata["id"])
+		if kbID != "" && ds.Name != "" {
+			kbIDToName[kbID] = ds.Name
+		}
+	}
+
+	// Build retrieval request
+	payload := map[string]interface{}{
+		"kb_id":    kbIDs,
+		"question": opts.Query,
+	}
+
+	// Set top_k (default to 10 if not specified)
+	topK := opts.TopK
+	if topK <= 0 {
+		topK = 10
+	}
+	payload["top_k"] = topK
+
+	// Set similarity threshold (default to 0.2 if not specified to match UI behavior)
+	threshold := opts.Threshold
+	if threshold <= 0 {
+		threshold = 0.2
+	}
+	payload["similarity_threshold"] = threshold
+
+	// Call retrieval API (useAPIBase=false because the route is /v1/chunk/retrieval_test, not /api/v1/...)
+	resp, err := p.httpClient.Request("POST", "/chunk/retrieval_test", false, "auto", nil, payload)
+	if err != nil {
+		return nil, fmt.Errorf("retrieval request failed: %w", err)
+	}
+
+	var apiResp struct {
+		Code    int                    `json:"code"`
+		Data    map[string]interface{} `json:"data"`
+		Message string                 `json:"message"`
+	}
+
+	if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
+		return nil, err
+	}
+
+	if apiResp.Code != 0 {
+		return nil, fmt.Errorf("API error: %s", apiResp.Message)
+	}
+
+	// Parse chunks from response
+	var nodes []*Node
+	if chunksData, ok := apiResp.Data["chunks"].([]interface{}); ok {
+		for _, chunk := range chunksData {
+			if chunkMap, ok := chunk.(map[string]interface{}); ok {
+				node := p.chunkToNodeWithKBMapping(chunkMap, kbIDToName)
+				nodes = append(nodes, node)
+			}
+		}
+	}
+
+	// Apply top_k limit if specified (API may return more results)
+	if topK > 0 && len(nodes) > topK {
+		nodes = nodes[:topK]
+	}
+
+	return &Result{
+		Nodes: nodes,
+		Total: len(nodes),
+	}, nil
+}
+
+// chunkToNodeWithKBMapping converts a chunk map to a Node with kb_id -> name mapping
+func (p *DatasetProvider) chunkToNodeWithKBMapping(chunk map[string]interface{}, kbIDToName map[string]string) *Node {
+	// Extract chunk content - try multiple field names
+	content := ""
+	if v, ok := chunk["content_with_weight"].(string); ok && v != "" {
+		content = v
+	} else if v, ok := chunk["content"].(string); ok && v != "" {
+		content = v
+	} else if v, ok := chunk["content_ltks"].(string); ok && v != "" {
+		content = v
+	} else if v, ok := chunk["text"].(string); ok && v != "" {
+		content = v
+	}
+
+	// Get chunk_id for URI
+	chunkID := ""
+	if v, ok := chunk["chunk_id"].(string); ok {
+		chunkID = v
+	} else if v, ok := chunk["id"].(string); ok {
+		chunkID = v
+	}
+
+	// Get document name and ID
+	docName := ""
+	if v, ok := chunk["docnm_kwd"].(string); ok && v != "" {
+		docName = v
+	} else if v, ok := chunk["docnm"].(string); ok && v != "" {
+		docName = v
+	} else if v, ok := chunk["doc_name"].(string); ok && v != "" {
+		docName = v
+	}
+
+	docID := ""
+	if v, ok := chunk["doc_id"].(string); ok && v != "" {
+		docID = v
+	}
+
+	// Get dataset/kb name from mapping or chunk data
+	datasetName := ""
+	datasetID := ""
+
+	// First try to get kb_id from chunk (could be string or array)
+	if v, ok := chunk["kb_id"].(string); ok && v != "" {
+		datasetID = v
+	} else if v, ok := chunk["kb_id"].([]interface{}); ok && len(v) > 0 {
+		if s, ok := v[0].(string); ok {
+			datasetID = s
+		}
+	}
+
+	// Look up dataset name from mapping using kb_id
+	if datasetID != "" && kbIDToName != nil {
+		if name, ok := kbIDToName[datasetID]; ok && name != "" {
+			datasetName = name
+		}
+	}
+
+	// Fallback to kb_name from chunk if mapping doesn't have it
+	if datasetName == "" {
+		if v, ok := chunk["kb_name"].(string); ok && v != "" {
+			datasetName = v
+		}
+	}
+
+	// Build URI path: prefer names over IDs for readability
+	// Format: datasets/{dataset_name}/{doc_name}
+	path := "/datasets"
+	if datasetName != "" {
+		path += "/" + datasetName
+	} else if datasetID != "" {
+		path += "/" + datasetID
+	}
+	if docName != "" {
+		path += "/" + docName
+	} else if docID != "" {
+		path += "/" + docID
+	}
+
+	// Use doc_name or chunk_id as the name if content is empty
+	name := content
+	if name == "" {
+		if docName != "" {
+			name = docName
+		} else if chunkID != "" {
+			name = "chunk:" + chunkID[:min(len(chunkID), 16)]
+		} else {
+			name = "(empty)"
+		}
+	}
+
+	node := &Node{
+		Name:     name,
+		Path:     path,
+		Type:     NodeTypeDocument,
+		Metadata: chunk,
+	}
+
+	// Parse timestamps if available
+	if createTime, ok := chunk["create_time"]; ok {
+		node.CreatedAt = parseTime(createTime)
+	}
+	if updateTime, ok := chunk["update_time"]; ok {
+		node.UpdatedAt = parseTime(updateTime)
+	}
+
+	return node
+}
+
+// chunkToNode converts a chunk map to a Node (legacy, uses chunk data only)
+func (p *DatasetProvider) chunkToNode(chunk map[string]interface{}) *Node {
+	return p.chunkToNodeWithKBMapping(chunk, nil)
+}
+
+// ==================== Document Operations ====================
+
+func (p *DatasetProvider) listDocuments(ctx stdctx.Context, datasetName string, opts *ListOptions) (*Result, error) {
+	// First get the dataset ID
+	ds, err := p.getDataset(ctx, datasetName)
+	if err != nil {
+		return nil, err
+	}
+
+	datasetID := getString(ds.Metadata["id"])
+	if datasetID == "" {
+		return nil, fmt.Errorf("dataset ID not found")
+	}
+
+	// Build query parameters
+	params := make(map[string]string)
+	if opts != nil {
+		if opts.Limit > 0 {
+			params["page_size"] = fmt.Sprintf("%d", opts.Limit)
+		}
+		if opts.Offset > 0 {
+			params["page"] = fmt.Sprintf("%d", opts.Offset/opts.Limit+1)
+		}
+	}
+
+	path := fmt.Sprintf("/datasets/%s/documents", datasetID)
+	resp, err := p.httpClient.Request("GET", path, true, "auto", params, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var apiResp struct {
+		Code    int                      `json:"code"`
+		Data    struct {
+			Docs []map[string]interface{} `json:"docs"`
+		} `json:"data"`
+		Message string `json:"message"`
+	}
+
+	if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
+		return nil, err
+	}
+
+	if apiResp.Code != 0 {
+		return nil, fmt.Errorf("API error: %s", apiResp.Message)
+	}
+
+	nodes := make([]*Node, 0, len(apiResp.Data.Docs))
+	for _, doc := range apiResp.Data.Docs {
+		node := p.documentToNode(doc, datasetName)
+		nodes = append(nodes, node)
+	}
+
+	return &Result{
+		Nodes: nodes,
+		Total: len(nodes),
+	}, nil
+}
+
+func (p *DatasetProvider) getDocumentNode(ctx stdctx.Context, datasetName, docName string) (*Result, error) {
+	node, err := p.getDocument(ctx, datasetName, docName)
+	if err != nil {
+		return nil, err
+	}
+	return &Result{
+		Nodes: []*Node{node},
+		Total: 1,
+	}, nil
+}
+
+func (p *DatasetProvider) getDocument(ctx stdctx.Context, datasetName, docName string) (*Node, error) {
+	// List all documents and find the matching one
+	result, err := p.listDocuments(ctx, datasetName, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	for _, node := range result.Nodes {
+		if node.Name == docName {
+			return node, nil
+		}
+	}
+
+	return nil, fmt.Errorf("%s: document '%s' in dataset '%s'", ErrNotFound, docName, datasetName)
+}
+
+func (p *DatasetProvider) searchDocuments(ctx stdctx.Context, datasetName string, opts *SearchOptions) (*Result, error) {
+	// If no query is provided, just list documents
+	if opts.Query == "" {
+		return p.listDocuments(ctx, datasetName, &ListOptions{
+			Limit:  opts.Limit,
+			Offset: opts.Offset,
+		})
+	}
+
+	// Use retrieval API for semantic search in specific dataset
+	ds, err := p.getDataset(ctx, datasetName)
+	if err != nil {
+		return nil, err
+	}
+
+	kbID := getString(ds.Metadata["id"])
+	if kbID == "" {
+		return nil, fmt.Errorf("dataset ID not found for '%s'", datasetName)
+	}
+
+	// Build kb_id -> dataset name mapping
+	kbIDToName := map[string]string{kbID: datasetName}
+
+	// Build retrieval request for specific dataset
+	payload := map[string]interface{}{
+		"kb_id":    []string{kbID},
+		"question": opts.Query,
+	}
+
+	// Set top_k (default to 10 if not specified)
+	topK := opts.TopK
+	if topK <= 0 {
+		topK = 10
+	}
+	payload["top_k"] = topK
+
+	// Set similarity threshold (default to 0.2 if not specified to match UI behavior)
+	threshold := opts.Threshold
+	if threshold <= 0 {
+		threshold = 0.2
+	}
+	payload["similarity_threshold"] = threshold
+
+	// Call retrieval API (useAPIBase=false because the route is /v1/chunk/retrieval_test, not /api/v1/...)
+	resp, err := p.httpClient.Request("POST", "/chunk/retrieval_test", false, "auto", nil, payload)
+	if err != nil {
+		return nil, fmt.Errorf("retrieval request failed: %w", err)
+	}
+
+	var apiResp struct {
+		Code    int                    `json:"code"`
+		Data    map[string]interface{} `json:"data"`
+		Message string                 `json:"message"`
+	}
+
+	if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
+		return nil, err
+	}
+
+	if apiResp.Code != 0 {
+		return nil, fmt.Errorf("API error: %s", apiResp.Message)
+	}
+
+	// Parse chunks from response
+	var nodes []*Node
+	if chunksData, ok := apiResp.Data["chunks"].([]interface{}); ok {
+		for _, chunk := range chunksData {
+			if chunkMap, ok := chunk.(map[string]interface{}); ok {
+				node := p.chunkToNodeWithKBMapping(chunkMap, kbIDToName)
+				nodes = append(nodes, node)
+			}
+		}
+	}
+
+	// Apply top_k limit if specified (API may return more results)
+	if topK > 0 && len(nodes) > topK {
+		nodes = nodes[:topK]
+	}
+
+	return &Result{
+		Nodes: nodes,
+		Total: len(nodes),
+	}, nil
+}
+
+// ==================== Helper Functions ====================
+
+func (p *DatasetProvider) datasetToNode(ds map[string]interface{}) *Node {
+	name := getString(ds["name"])
+	node := &Node{
+		Name:     name,
+		Path:     "/datasets/" + name,
+		Type:     NodeTypeDirectory,
+		Metadata: ds,
+	}
+
+	// Parse timestamps - try multiple field names
+	if createTime, ok := ds["create_time"]; ok && createTime != nil {
+		node.CreatedAt = parseTime(createTime)
+	} else if createDate, ok := ds["create_date"]; ok && createDate != nil {
+		node.CreatedAt = parseTime(createDate)
+	}
+
+	if updateTime, ok := ds["update_time"]; ok && updateTime != nil {
+		node.UpdatedAt = parseTime(updateTime)
+	} else if updateDate, ok := ds["update_date"]; ok && updateDate != nil {
+		node.UpdatedAt = parseTime(updateDate)
+	}
+
+	return node
+}
+
+func (p *DatasetProvider) documentToNode(doc map[string]interface{}, datasetName string) *Node {
+	name := getString(doc["name"])
+	node := &Node{
+		Name:     name,
+		Path:     "datasets/" + datasetName + "/" + name,
+		Type:     NodeTypeDocument,
+		Metadata: doc,
+	}
+
+	// Parse size
+	if size, ok := doc["size"]; ok {
+		node.Size = int64(getFloat(size))
+	}
+
+	// Parse timestamps
+	if createTime, ok := doc["create_time"]; ok {
+		node.CreatedAt = parseTime(createTime)
+	}
+	if updateTime, ok := doc["update_time"]; ok {
+		node.UpdatedAt = parseTime(updateTime)
+	}
+
+	return node
+}
+
+func getString(v interface{}) string {
+	if v == nil {
+		return ""
+	}
+	if s, ok := v.(string); ok {
+		return s
+	}
+	return fmt.Sprintf("%v", v)
+}
+
+func min(a, b int) int {
+	if a < b {
+		return a
+	}
+	return b
+}
+
+func getFloat(v interface{}) float64 {
+	if v == nil {
+		return 0
+	}
+	switch val := v.(type) {
+	case float64:
+		return val
+	case float32:
+		return float64(val)
+	case int:
+		return float64(val)
+	case int64:
+		return float64(val)
+	default:
+		return 0
+	}
+}
+
+func parseTime(v interface{}) time.Time {
+	if v == nil {
+		return time.Time{}
+	}
+
+	var ts int64
+	switch val := v.(type) {
+	case float64:
+		ts = int64(val)
+	case int64:
+		ts = val
+	case int:
+		ts = int64(val)
+	case string:
+		// Trim quotes if present
+		val = strings.Trim(val, `"`)
+		// Try to parse as number (timestamp)
+		if parsed, err := strconv.ParseInt(val, 10, 64); err == nil {
+			ts = parsed
+		} else {
+			// If it's already a formatted date string, try parsing it
+			formats := []string{
+				"2006-01-02 15:04:05",
+				"2006-01-02T15:04:05",
+				"2006-01-02T15:04:05Z",
+				"2006-01-02",
+			}
+			for _, format := range formats {
+				if t, err := time.Parse(format, val); err == nil {
+					return t
+				}
+			}
+			return time.Time{}
+		}
+	default:
+		return time.Time{}
+	}
+
+	// Convert milliseconds to seconds if timestamp is in milliseconds (13 digits)
+	if ts > 1e12 {
+		ts = ts / 1000
+	}
+
+	return time.Unix(ts, 0)
+}
--- a/internal/cli/contextengine/engine.go
+++ b/internal/cli/contextengine/engine.go
@ -0,0 +1,312 @@
+//
+//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+package contextengine
+
+import (
+	stdctx "context"
+	"fmt"
+	"strings"
+	"time"
+)
+
+// Engine is the core of the Context Engine
+// It manages providers and routes commands to the appropriate provider
+type Engine struct {
+	providers []Provider
+}
+
+// NewEngine creates a new Context Engine
+func NewEngine() *Engine {
+	return &Engine{
+		providers: make([]Provider, 0),
+	}
+}
+
+// RegisterProvider registers a provider with the engine
+func (e *Engine) RegisterProvider(provider Provider) {
+	e.providers = append(e.providers, provider)
+}
+
+// GetProviders returns all registered providers
+func (e *Engine) GetProviders() []ProviderInfo {
+	infos := make([]ProviderInfo, 0, len(e.providers))
+	for _, p := range e.providers {
+		infos = append(infos, ProviderInfo{
+			Name:        p.Name(),
+			Description: p.Description(),
+		})
+	}
+	return infos
+}
+
+// Execute executes a command and returns the result
+func (e *Engine) Execute(ctx stdctx.Context, cmd *Command) (*Result, error) {
+	switch cmd.Type {
+	case CommandList:
+		return e.List(ctx, cmd.Path, parseListOptions(cmd.Params))
+	case CommandSearch:
+		return e.Search(ctx, cmd.Path, parseSearchOptions(cmd.Params))
+	case CommandCat:
+		_, err := e.Cat(ctx, cmd.Path)
+		return nil, err
+	default:
+		return nil, fmt.Errorf("unknown command type: %s", cmd.Type)
+	}
+}
+
+// resolveProvider finds the provider for a given path
+func (e *Engine) resolveProvider(path string) (Provider, string, error) {
+	path = normalizePath(path)
+
+	for _, provider := range e.providers {
+		if provider.Supports(path) {
+			// Parse the subpath relative to the provider root
+			// Get provider name to calculate subPath
+			providerName := provider.Name()
+			var subPath string
+			if path == providerName {
+				subPath = ""
+			} else if strings.HasPrefix(path, providerName+"/") {
+				subPath = path[len(providerName)+1:]
+			} else {
+				subPath = path
+			}
+			return provider, subPath, nil
+		}
+	}
+
+	// If no provider supports this path, check if FileProvider can handle it as a fallback
+	// This allows paths like "myskills" to be treated as "files/myskills"
+	if fileProvider := e.getFileProvider(); fileProvider != nil {
+		// Check if the path looks like a file manager path (single component, not matching other providers)
+		parts := SplitPath(path)
+		if len(parts) > 0 && parts[0] != "datasets" {
+			return fileProvider, path, nil
+		}
+	}
+
+	return nil, "", fmt.Errorf("%s: %s", ErrProviderNotFound, path)
+}
+
+// List lists nodes at the given path
+// If path is empty, returns:
+//   1. Built-in providers (e.g., datasets)
+//   2. Top-level directories from files provider (if any)
+func (e *Engine) List(ctx stdctx.Context, path string, opts *ListOptions) (*Result, error) {
+	// Normalize path
+	path = normalizePath(path)
+
+	// If path is empty, return list of providers and files root directories
+	if path == "" || path == "/" {
+		return e.listRoot(ctx, opts)
+	}
+
+	provider, subPath, err := e.resolveProvider(path)
+	if err != nil {
+		// If not found, try to find in files provider as a fallback
+		// This allows "ls myfolder" to work as "ls files/myfolder"
+		if fileProvider := e.getFileProvider(); fileProvider != nil {
+			result, ferr := fileProvider.List(ctx, path, opts)
+			if ferr == nil {
+				return result, nil
+			}
+		}
+		return nil, err
+	}
+
+	return provider.List(ctx, subPath, opts)
+}
+
+// listRoot returns the root listing:
+// 1. Built-in providers (datasets, etc.)
+// 2. Top-level folders from files provider (file_manager)
+func (e *Engine) listRoot(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
+	nodes := make([]*Node, 0)
+
+	// Add built-in providers first (like datasets)
+	for _, p := range e.providers {
+		// Skip files provider from this list - we'll add its children instead
+		if p.Name() == "files" {
+			continue
+		}
+		nodes = append(nodes, &Node{
+			Name:      p.Name(),
+			Path:      "/" + p.Name(),
+			Type:      NodeTypeDirectory,
+			CreatedAt: time.Now(),
+			Metadata: map[string]interface{}{
+				"description": p.Description(),
+			},
+		})
+	}
+
+	// Add top-level folders from files provider (file_manager)
+	if fileProvider := e.getFileProvider(); fileProvider != nil {
+		filesResult, err := fileProvider.List(ctx, "", opts)
+		if err == nil {
+			for _, node := range filesResult.Nodes {
+				// Only add folders (directories), not files
+				if node.Type == NodeTypeDirectory {
+					// Ensure path doesn't have /files/ prefix for display
+					node.Path = strings.TrimPrefix(node.Path, "files/")
+					node.Path = strings.TrimPrefix(node.Path, "/")
+					nodes = append(nodes, node)
+				}
+			}
+		}
+	}
+
+	return &Result{
+		Nodes: nodes,
+		Total: len(nodes),
+	}, nil
+}
+
+// getFileProvider returns the files provider if registered
+func (e *Engine) getFileProvider() Provider {
+	for _, p := range e.providers {
+		if p.Name() == "files" {
+			return p
+		}
+	}
+	return nil
+}
+
+// Search searches for nodes matching the query
+func (e *Engine) Search(ctx stdctx.Context, path string, opts *SearchOptions) (*Result, error) {
+	provider, subPath, err := e.resolveProvider(path)
+	if err != nil {
+		return nil, err
+	}
+
+	return provider.Search(ctx, subPath, opts)
+}
+
+// Cat retrieves the content of a file/document
+func (e *Engine) Cat(ctx stdctx.Context, path string) ([]byte, error) {
+	provider, subPath, err := e.resolveProvider(path)
+	if err != nil {
+		// If not found, try to find in files provider as a fallback
+		// This allows "cat myfolder/file.txt" to work as "cat files/myfolder/file.txt"
+		if fileProvider := e.getFileProvider(); fileProvider != nil {
+			return fileProvider.Cat(ctx, path)
+		}
+		return nil, err
+	}
+
+	return provider.Cat(ctx, subPath)
+}
+
+// ParsePath parses a path and returns path information
+func (e *Engine) ParsePath(path string) (*PathInfo, error) {
+	path = normalizePath(path)
+	components := SplitPath(path)
+
+	if len(components) == 0 {
+		return nil, fmt.Errorf("empty path")
+	}
+
+	providerName := components[0]
+	isRoot := len(components) == 1
+
+	// Find the provider
+	var provider Provider
+	for _, p := range e.providers {
+		if p.Name() == providerName || strings.HasPrefix(path, p.Name()) {
+			provider = p
+			break
+		}
+	}
+
+	if provider == nil {
+		return nil, fmt.Errorf("%s: %s", ErrProviderNotFound, path)
+	}
+
+	info := &PathInfo{
+		Provider:   providerName,
+		Path:       path,
+		Components: components,
+		IsRoot:     isRoot,
+	}
+
+	// Extract resource ID or name if available
+	if len(components) >= 2 {
+		info.ResourceName = components[1]
+	}
+
+	return info, nil
+}
+
+// parseListOptions parses command params into ListOptions
+func parseListOptions(params map[string]interface{}) *ListOptions {
+	opts := &ListOptions{}
+
+	if params == nil {
+		return opts
+	}
+
+	if recursive, ok := params["recursive"].(bool); ok {
+		opts.Recursive = recursive
+	}
+	if limit, ok := params["limit"].(int); ok {
+		opts.Limit = limit
+	}
+	if offset, ok := params["offset"].(int); ok {
+		opts.Offset = offset
+	}
+	if sortBy, ok := params["sort_by"].(string); ok {
+		opts.SortBy = sortBy
+	}
+	if sortOrder, ok := params["sort_order"].(string); ok {
+		opts.SortOrder = sortOrder
+	}
+
+	return opts
+}
+
+// parseSearchOptions parses command params into SearchOptions
+func parseSearchOptions(params map[string]interface{}) *SearchOptions {
+	opts := &SearchOptions{}
+
+	if params == nil {
+		return opts
+	}
+
+	if query, ok := params["query"].(string); ok {
+		opts.Query = query
+	}
+	if limit, ok := params["limit"].(int); ok {
+		opts.Limit = limit
+	}
+	if offset, ok := params["offset"].(int); ok {
+		opts.Offset = offset
+	}
+	if recursive, ok := params["recursive"].(bool); ok {
+		opts.Recursive = recursive
+	}
+	if topK, ok := params["top_k"].(int); ok {
+		opts.TopK = topK
+	}
+	if threshold, ok := params["threshold"].(float64); ok {
+		opts.Threshold = threshold
+	}
+	if dirs, ok := params["dirs"].([]string); ok {
+		opts.Dirs = dirs
+	}
+
+	return opts
+}
--- a/internal/cli/contextengine/file_provider.go
+++ b/internal/cli/contextengine/file_provider.go
@ -0,0 +1,594 @@
+//
+//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+package contextengine
+
+import (
+	stdctx "context"
+	"encoding/json"
+	"fmt"
+	"strings"
+)
+
+// FileProvider handles file operations using Python backend /files API
+// Path structure:
+//   - files/                             -> List root folder contents
+//   - files/{folder_name}/               -> List folder contents
+//   - files/{folder_name}/{file_name}    -> Get file info/content
+//
+// Note: Uses Python backend API (useAPIBase=true):
+//   - GET /files?parent_id={id}         -> List files/folders in parent
+//   - GET /files/{file_id}              -> Get file info
+//   - POST /files                       -> Create folder or upload file
+//   - DELETE /files                     -> Delete files
+//   - GET /files/{file_id}/parent       -> Get parent folder
+//   - GET /files/{file_id}/ancestors    -> Get ancestor folders
+
+type FileProvider struct {
+	BaseProvider
+	httpClient  HTTPClientInterface
+	folderCache map[string]string // path -> folder ID cache
+	rootID      string            // root folder ID
+}
+
+// NewFileProvider creates a new FileProvider
+func NewFileProvider(httpClient HTTPClientInterface) *FileProvider {
+	return &FileProvider{
+		BaseProvider: BaseProvider{
+			name:        "files",
+			description: "File manager provider (Python server)",
+			rootPath:    "files",
+		},
+		httpClient:  httpClient,
+		folderCache: make(map[string]string),
+	}
+}
+
+// Supports returns true if this provider can handle the given path
+func (p *FileProvider) Supports(path string) bool {
+	normalized := normalizePath(path)
+	return normalized == "files" || strings.HasPrefix(normalized, "files/")
+}
+
+// List lists nodes at the given path
+// Path structure: files/ or files/{folder_name}/ or files/{folder_name}/{sub_path}/...
+func (p *FileProvider) List(ctx stdctx.Context, subPath string, opts *ListOptions) (*Result, error) {
+	// subPath is the path relative to "files/"
+	// Empty subPath means list root folder
+
+	if subPath == "" {
+		return p.listRootFolder(ctx, opts)
+	}
+
+	parts := SplitPath(subPath)
+	if len(parts) == 1 {
+		// files/{folder_name} - list contents of this folder
+		return p.listFolderByName(ctx, parts[0], opts)
+	}
+
+	// For multi-level paths like myskills/skill-name/dir1, recursively traverse
+	return p.listPathRecursive(ctx, parts, opts)
+}
+
+// listPathRecursive recursively traverses the path and lists the final component
+func (p *FileProvider) listPathRecursive(ctx stdctx.Context, parts []string, opts *ListOptions) (*Result, error) {
+	if len(parts) == 0 {
+		return nil, fmt.Errorf("empty path")
+	}
+
+	// Start from root to find the first folder
+	currentFolderID, err := p.getFolderIDByName(ctx, parts[0])
+	if err != nil {
+		return nil, err
+	}
+	currentPath := parts[0]
+
+	// Traverse through intermediate directories
+	for i := 1; i < len(parts); i++ {
+		partName := parts[i]
+
+		// List contents of current folder to find the next part
+		result, err := p.listFilesByParentID(ctx, currentFolderID, currentPath, nil)
+		if err != nil {
+			return nil, err
+		}
+
+		// Find the next component
+		found := false
+		for _, node := range result.Nodes {
+			if node.Name == partName {
+				if i == len(parts)-1 {
+					// This is the last component - if it's a directory, list its contents
+					if node.Type == NodeTypeDirectory {
+						childID := getString(node.Metadata["id"])
+						if childID == "" {
+							return nil, fmt.Errorf("folder ID not found for '%s'", partName)
+						}
+						newPath := currentPath + "/" + partName
+						p.folderCache[newPath] = childID
+						return p.listFilesByParentID(ctx, childID, newPath, opts)
+					}
+					// It's a file - return the file node
+					return &Result{
+						Nodes: []*Node{node},
+						Total: 1,
+					}, nil
+				}
+				// Not the last component - must be a directory
+				if node.Type != NodeTypeDirectory {
+					return nil, fmt.Errorf("'%s' is not a directory", partName)
+				}
+				childID := getString(node.Metadata["id"])
+				if childID == "" {
+					return nil, fmt.Errorf("folder ID not found for '%s'", partName)
+				}
+				currentFolderID = childID
+				currentPath = currentPath + "/" + partName
+				p.folderCache[currentPath] = currentFolderID
+				found = true
+				break
+			}
+		}
+
+		if !found {
+			return nil, fmt.Errorf("%s: '%s' in '%s'", ErrNotFound, partName, currentPath)
+		}
+	}
+
+	// Should have returned in the loop, but just in case
+	return p.listFilesByParentID(ctx, currentFolderID, currentPath, opts)
+}
+
+// Search searches for files/folders
+func (p *FileProvider) Search(ctx stdctx.Context, subPath string, opts *SearchOptions) (*Result, error) {
+	if opts.Query == "" {
+		return p.List(ctx, subPath, &ListOptions{
+			Limit:  opts.Limit,
+			Offset: opts.Offset,
+		})
+	}
+
+	// For now, search is not implemented - just list and filter by name
+	result, err := p.List(ctx, subPath, &ListOptions{
+		Limit:  opts.Limit,
+		Offset: opts.Offset,
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	// Simple name filtering
+	var filtered []*Node
+	query := strings.ToLower(opts.Query)
+	for _, node := range result.Nodes {
+		if strings.Contains(strings.ToLower(node.Name), query) {
+			filtered = append(filtered, node)
+		}
+	}
+
+	return &Result{
+		Nodes: filtered,
+		Total: len(filtered),
+	}, nil
+}
+
+// Cat retrieves file content
+func (p *FileProvider) Cat(ctx stdctx.Context, subPath string) ([]byte, error) {
+	if subPath == "" {
+		return nil, fmt.Errorf("cat requires a file path: files/{folder}/{file}")
+	}
+
+	parts := SplitPath(subPath)
+	if len(parts) < 2 {
+		return nil, fmt.Errorf("invalid path format, expected: files/{folder}/{file}")
+	}
+
+	// Find the file by recursively traversing the path
+	node, err := p.findNodeByPath(ctx, parts)
+	if err != nil {
+		return nil, err
+	}
+
+	if node.Type == NodeTypeDirectory {
+		return nil, fmt.Errorf("'%s' is a directory, not a file", subPath)
+	}
+
+	fileID := getString(node.Metadata["id"])
+	if fileID == "" {
+		return nil, fmt.Errorf("file ID not found")
+	}
+
+	// Download file content
+	return p.downloadFile(ctx, fileID)
+}
+
+// findNodeByPath recursively traverses the path to find the target node
+func (p *FileProvider) findNodeByPath(ctx stdctx.Context, parts []string) (*Node, error) {
+	if len(parts) == 0 {
+		return nil, fmt.Errorf("empty path")
+	}
+
+	// Start from root to find the first folder
+	currentFolderID, err := p.getFolderIDByName(ctx, parts[0])
+	if err != nil {
+		return nil, err
+	}
+	currentPath := parts[0]
+
+	// Traverse through intermediate directories
+	for i := 1; i < len(parts); i++ {
+		partName := parts[i]
+
+		// List contents of current folder to find the next part
+		result, err := p.listFilesByParentID(ctx, currentFolderID, currentPath, nil)
+		if err != nil {
+			return nil, err
+		}
+
+		// Find the next component
+		found := false
+		for _, node := range result.Nodes {
+			if node.Name == partName {
+				if i == len(parts)-1 {
+					// This is the last component - return it
+					return node, nil
+				}
+				// Not the last component - must be a directory
+				if node.Type != NodeTypeDirectory {
+					return nil, fmt.Errorf("'%s' is not a directory", partName)
+				}
+				childID := getString(node.Metadata["id"])
+				if childID == "" {
+					return nil, fmt.Errorf("folder ID not found for '%s'", partName)
+				}
+				currentFolderID = childID
+				currentPath = currentPath + "/" + partName
+				p.folderCache[currentPath] = currentFolderID
+				found = true
+				break
+			}
+		}
+
+		if !found {
+			return nil, fmt.Errorf("%s: '%s' in '%s'", ErrNotFound, partName, currentPath)
+		}
+	}
+
+	return nil, fmt.Errorf("%s: '%s'", ErrNotFound, strings.Join(parts, "/"))
+}
+
+// ==================== Python Server API Methods ====================
+
+// getRootID gets or caches the root folder ID
+func (p *FileProvider) getRootID(ctx stdctx.Context) (string, error) {
+	if p.rootID != "" {
+		return p.rootID, nil
+	}
+
+	// List files without parent_id to get root folder
+	resp, err := p.httpClient.Request("GET", "/files", true, "auto", nil, nil)
+	if err != nil {
+		return "", err
+	}
+
+	var apiResp struct {
+		Code    int                    `json:"code"`
+		Data    map[string]interface{} `json:"data"`
+		Message string                 `json:"message"`
+	}
+
+	if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
+		return "", err
+	}
+
+	if apiResp.Code != 0 {
+		return "", fmt.Errorf("API error: %s", apiResp.Message)
+	}
+
+	// Try to find root folder ID from response
+	if rootID, ok := apiResp.Data["root_id"].(string); ok && rootID != "" {
+		p.rootID = rootID
+		return rootID, nil
+	}
+
+	// If no explicit root_id, use empty parent_id for root listing
+	return "", nil
+}
+
+// listRootFolder lists the contents of root folder
+func (p *FileProvider) listRootFolder(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
+	// Get root folder ID first
+	rootID, err := p.getRootID(ctx)
+	if err != nil {
+		return nil, err
+	}
+	// List files using root folder ID as parent
+	return p.listFilesByParentID(ctx, rootID, "", opts)
+}
+
+// listFilesByParentID lists files/folders by parent ID
+func (p *FileProvider) listFilesByParentID(ctx stdctx.Context, parentID string, parentPath string, opts *ListOptions) (*Result, error) {
+	// Build query parameters
+	queryParams := make([]string, 0)
+	if parentID != "" {
+		queryParams = append(queryParams, fmt.Sprintf("parent_id=%s", parentID))
+	}
+	// Always set page=1 and page_size to ensure we get results
+	pageSize := 100
+	if opts != nil && opts.Limit > 0 {
+		pageSize = opts.Limit
+	}
+	queryParams = append(queryParams, fmt.Sprintf("page_size=%d", pageSize))
+	queryParams = append(queryParams, "page=1")
+
+	// Build URL with query string
+	path := "/files"
+	if len(queryParams) > 0 {
+		path = path + "?" + strings.Join(queryParams, "&")
+	}
+
+	resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	var apiResp struct {
+		Code    int                    `json:"code"`
+		Data    map[string]interface{} `json:"data"`
+		Message string                 `json:"message"`
+	}
+
+	if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
+		return nil, err
+	}
+
+	if apiResp.Code != 0 {
+		return nil, fmt.Errorf("API error: %s", apiResp.Message)
+	}
+
+	// Extract files list from data - API returns {"total": N, "files": [...], "parent_folder": {...}}
+	var files []map[string]interface{}
+	if fileList, ok := apiResp.Data["files"].([]interface{}); ok {
+		for _, f := range fileList {
+			if fileMap, ok := f.(map[string]interface{}); ok {
+				files = append(files, fileMap)
+			}
+		}
+	}
+
+	nodes := make([]*Node, 0, len(files))
+	for _, f := range files {
+		name := getString(f["name"])
+		// Skip hidden .knowledgebase folder
+		if strings.TrimSpace(name) == ".knowledgebase" {
+			continue
+		}
+
+		node := p.fileToNode(f, parentPath)
+		nodes = append(nodes, node)
+
+		// Cache folder ID
+		if node.Type == NodeTypeDirectory || getString(f["type"]) == "folder" {
+			if id := getString(f["id"]); id != "" {
+				cacheKey := node.Name
+				if parentPath != "" {
+					cacheKey = parentPath + "/" + node.Name
+				}
+				p.folderCache[cacheKey] = id
+			}
+		}
+	}
+
+	return &Result{
+		Nodes: nodes,
+		Total: len(nodes),
+	}, nil
+}
+
+// listFolderByName lists contents of a folder by its name
+func (p *FileProvider) listFolderByName(ctx stdctx.Context, folderName string, opts *ListOptions) (*Result, error) {
+	folderID, err := p.getFolderIDByName(ctx, folderName)
+	if err != nil {
+		return nil, err
+	}
+
+	// List files in the folder using folder ID as parent_id
+	return p.listFilesByParentID(ctx, folderID, folderName, opts)
+}
+
+// getFolderIDByName finds folder ID by its name in root
+func (p *FileProvider) getFolderIDByName(ctx stdctx.Context, folderName string) (string, error) {
+	// Check cache first
+	if id, ok := p.folderCache[folderName]; ok {
+		return id, nil
+	}
+
+	// List root folder to find the folder
+	rootID, _ := p.getRootID(ctx)
+	queryParams := make([]string, 0)
+	if rootID != "" {
+		queryParams = append(queryParams, fmt.Sprintf("parent_id=%s", rootID))
+	}
+	queryParams = append(queryParams, "page_size=100", "page=1")
+
+	path := "/files"
+	if len(queryParams) > 0 {
+		path = path + "?" + strings.Join(queryParams, "&")
+	}
+
+	resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
+	if err != nil {
+		return "", err
+	}
+
+	var apiResp struct {
+		Code    int                    `json:"code"`
+		Data    map[string]interface{} `json:"data"`
+		Message string                 `json:"message"`
+	}
+
+	if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
+		return "", err
+	}
+
+	if apiResp.Code != 0 {
+		return "", fmt.Errorf("API error: %s", apiResp.Message)
+	}
+
+	// Search for folder by name
+	var files []map[string]interface{}
+	if fileList, ok := apiResp.Data["files"].([]interface{}); ok {
+		for _, f := range fileList {
+			if fileMap, ok := f.(map[string]interface{}); ok {
+				files = append(files, fileMap)
+			}
+		}
+	} else if fileList, ok := apiResp.Data["docs"].([]interface{}); ok {
+		for _, f := range fileList {
+			if fileMap, ok := f.(map[string]interface{}); ok {
+				files = append(files, fileMap)
+			}
+		}
+	}
+
+	for _, f := range files {
+		name := getString(f["name"])
+		fileType := getString(f["type"])
+		id := getString(f["id"])
+		// Match by name and ensure it's a folder
+		if name == folderName && fileType == "folder" && id != "" {
+			p.folderCache[folderName] = id
+			return id, nil
+		}
+	}
+
+	return "", fmt.Errorf("%s: folder '%s'", ErrNotFound, folderName)
+}
+
+// getFileNode gets a file node by folder and file name
+// If fileName is a directory, returns the directory contents instead of the directory node
+func (p *FileProvider) getFileNode(ctx stdctx.Context, folderName, fileName string) (*Result, error) {
+	folderID, err := p.getFolderIDByName(ctx, folderName)
+	if err != nil {
+		return nil, err
+	}
+
+	// List files in folder to find the file
+	result, err := p.listFilesByParentID(ctx, folderID, folderName, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	// Find the specific file
+	for _, node := range result.Nodes {
+		if node.Name == fileName {
+			// If it's a directory, list its contents instead of returning the node itself
+			if node.Type == NodeTypeDirectory {
+				childFolderID := getString(node.Metadata["id"])
+				if childFolderID == "" {
+					return nil, fmt.Errorf("folder ID not found for '%s'", fileName)
+				}
+				// Cache the folder ID
+				cacheKey := folderName + "/" + fileName
+				p.folderCache[cacheKey] = childFolderID
+				// Return directory contents
+				return p.listFilesByParentID(ctx, childFolderID, cacheKey, nil)
+			}
+			// Return file node
+			return &Result{
+				Nodes: []*Node{node},
+				Total: 1,
+			}, nil
+		}
+	}
+
+	return nil, fmt.Errorf("%s: file '%s' in folder '%s'", ErrNotFound, fileName, folderName)
+}
+
+// downloadFile downloads file content
+func (p *FileProvider) downloadFile(ctx stdctx.Context, fileID string) ([]byte, error) {
+	path := fmt.Sprintf("/files/%s", fileID)
+	resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
+	if err != nil {
+		return nil, err
+	}
+
+	if resp.StatusCode != 200 {
+		// Try to parse error response
+		var apiResp struct {
+			Code    int    `json:"code"`
+			Message string `json:"message"`
+		}
+		if err := json.Unmarshal(resp.Body, &apiResp); err == nil && apiResp.Code != 0 {
+			return nil, fmt.Errorf("%s", apiResp.Message)
+		}
+		return nil, fmt.Errorf("HTTP error %d", resp.StatusCode)
+	}
+
+	// Return raw file content
+	return resp.Body, nil
+}
+
+// ==================== Conversion Functions ====================
+
+// fileToNode converts a file map to a Node
+func (p *FileProvider) fileToNode(f map[string]interface{}, parentPath string) *Node {
+	name := getString(f["name"])
+	fileType := getString(f["type"])
+	fileID := getString(f["id"])
+
+	// Determine node type
+	nodeType := NodeTypeFile
+	if fileType == "folder" {
+		nodeType = NodeTypeDirectory
+	}
+
+	// Build path
+	path := name
+	if parentPath != "" {
+		path = parentPath + "/" + name
+	}
+
+	node := &Node{
+		Name:     name,
+		Path:     path,
+		Type:     nodeType,
+		Metadata: f,
+	}
+
+	// Parse size
+	if size, ok := f["size"]; ok {
+		node.Size = int64(getFloat(size))
+	}
+
+	// Parse timestamps
+	if createTime, ok := f["create_time"]; ok && createTime != nil {
+		node.CreatedAt = parseTime(createTime)
+	}
+	if updateTime, ok := f["update_time"]; ok && updateTime != nil {
+		node.UpdatedAt = parseTime(updateTime)
+	}
+
+	// Store ID for later use
+	if fileID != "" {
+		if node.Metadata == nil {
+			node.Metadata = make(map[string]interface{})
+		}
+		node.Metadata["id"] = fileID
+	}
+
+	return node
+}
--- a/internal/cli/contextengine/provider.go
+++ b/internal/cli/contextengine/provider.go
@ -0,0 +1,180 @@
+//
+//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+package contextengine
+
+import (
+	stdctx "context"
+)
+
+// Provider is the interface for all context providers
+// Each provider handles a specific resource type (datasets, chats, agents, etc.)
+type Provider interface {
+	// Name returns the provider name (e.g., "datasets", "chats")
+	Name() string
+
+	// Description returns a human-readable description of the provider
+	Description() string
+
+	// Supports returns true if this provider can handle the given path
+	Supports(path string) bool
+
+	// List lists nodes at the given path
+	List(ctx stdctx.Context, path string, opts *ListOptions) (*Result, error)
+
+	// Search searches for nodes matching the query under the given path
+	Search(ctx stdctx.Context, path string, opts *SearchOptions) (*Result, error)
+
+	// Cat retrieves the content of a file/document at the given path
+	Cat(ctx stdctx.Context, path string) ([]byte, error)
+}
+
+// BaseProvider provides common functionality for all providers
+type BaseProvider struct {
+	name        string
+	description string
+	rootPath    string
+}
+
+// Name returns the provider name
+func (p *BaseProvider) Name() string {
+	return p.name
+}
+
+// Description returns the provider description
+func (p *BaseProvider) Description() string {
+	return p.description
+}
+
+// GetRootPath returns the root path for this provider
+func (p *BaseProvider) GetRootPath() string {
+	return p.rootPath
+}
+
+// IsRootPath checks if the given path is the root path for this provider
+func (p *BaseProvider) IsRootPath(path string) bool {
+	return normalizePath(path) == normalizePath(p.rootPath)
+}
+
+// ParsePath parses a path and returns the subpath relative to the provider root
+func (p *BaseProvider) ParsePath(path string) string {
+	normalized := normalizePath(path)
+	rootNormalized := normalizePath(p.rootPath)
+
+	if normalized == rootNormalized {
+		return ""
+	}
+
+	if len(normalized) > len(rootNormalized) && normalized[:len(rootNormalized)+1] == rootNormalized+"/" {
+		return normalized[len(rootNormalized)+1:]
+	}
+
+	return normalized
+}
+
+// SplitPath splits a path into components
+func SplitPath(path string) []string {
+	path = normalizePath(path)
+	if path == "" {
+		return []string{}
+	}
+	parts := splitString(path, '/')
+	result := make([]string, 0, len(parts))
+	for _, part := range parts {
+		if part != "" {
+			result = append(result, part)
+		}
+	}
+	return result
+}
+
+// normalizePath normalizes a path (removes leading/trailing slashes, handles "." and "..")
+func normalizePath(path string) string {
+	path = trimSpace(path)
+	if path == "" {
+		return ""
+	}
+
+	// Remove leading slashes
+	for len(path) > 0 && path[0] == '/' {
+		path = path[1:]
+	}
+
+	// Remove trailing slashes
+	for len(path) > 0 && path[len(path)-1] == '/' {
+		path = path[:len(path)-1]
+	}
+
+	// Handle "." and ".."
+	parts := splitString(path, '/')
+	result := make([]string, 0, len(parts))
+	for _, part := range parts {
+		switch part {
+		case "", ".":
+			// Skip empty and current directory
+			continue
+		case "..":
+			// Go up one directory
+			if len(result) > 0 {
+				result = result[:len(result)-1]
+			}
+		default:
+			result = append(result, part)
+		}
+	}
+
+	return joinStrings(result, "/")
+}
+
+// Helper functions to avoid importing strings package in basic operations
+func trimSpace(s string) string {
+	start := 0
+	end := len(s)
+	for start < end && (s[start] == ' ' || s[start] == '\t' || s[start] == '\n' || s[start] == '\r') {
+		start++
+	}
+	for end > start && (s[end-1] == ' ' || s[end-1] == '\t' || s[end-1] == '\n' || s[end-1] == '\r') {
+		end--
+	}
+	return s[start:end]
+}
+
+func splitString(s string, sep byte) []string {
+	var result []string
+	start := 0
+	for i := 0; i < len(s); i++ {
+		if s[i] == sep {
+			result = append(result, s[start:i])
+			start = i + 1
+		}
+	}
+	result = append(result, s[start:])
+	return result
+}
+
+func joinStrings(strs []string, sep string) string {
+	if len(strs) == 0 {
+		return ""
+	}
+	if len(strs) == 1 {
+		return strs[0]
+	}
+	result := strs[0]
+	for i := 1; i < len(strs); i++ {
+		result += sep + strs[i]
+	}
+	return result
+}
--- a/internal/cli/contextengine/types.go
+++ b/internal/cli/contextengine/types.go
@ -0,0 +1,116 @@
+//
+//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+package contextengine
+
+import "time"
+
+// NodeType represents the type of a node in the context filesystem
+type NodeType string
+
+const (
+	NodeTypeDirectory NodeType = "directory"
+	NodeTypeFile      NodeType = "file"
+	NodeTypeDataset   NodeType = "dataset"
+	NodeTypeDocument  NodeType = "document"
+	NodeTypeChat      NodeType = "chat"
+	NodeTypeAgent     NodeType = "agent"
+	NodeTypeUnknown   NodeType = "unknown"
+)
+
+// Node represents a node in the context filesystem
+// This is the unified output format for all providers
+type Node struct {
+	Name       string                 `json:"name"`
+	Path       string                 `json:"path"`
+	Type       NodeType               `json:"type"`
+	Size       int64                  `json:"size,omitempty"`
+	CreatedAt  time.Time              `json:"created_at,omitempty"`
+	UpdatedAt  time.Time              `json:"updated_at,omitempty"`
+	Metadata   map[string]interface{} `json:"metadata,omitempty"`
+}
+
+// CommandType represents the type of command
+type CommandType string
+
+const (
+	CommandList   CommandType = "ls"
+	CommandSearch CommandType = "search"
+	CommandCat    CommandType = "cat"
+)
+
+// Command represents a context engine command
+type Command struct {
+	Type   CommandType            `json:"type"`
+	Path   string                 `json:"path"`
+	Params map[string]interface{} `json:"params,omitempty"`
+}
+
+// ListOptions represents options for list operations
+type ListOptions struct {
+	Recursive bool   `json:"recursive,omitempty"`
+	Limit     int    `json:"limit,omitempty"`
+	Offset    int    `json:"offset,omitempty"`
+	SortBy    string `json:"sort_by,omitempty"`
+	SortOrder string `json:"sort_order,omitempty"` // "asc" or "desc"
+}
+
+// SearchOptions represents options for search operations
+type SearchOptions struct {
+	Query     string  `json:"query"`
+	Limit     int     `json:"limit,omitempty"`
+	Offset    int     `json:"offset,omitempty"`
+	Recursive bool    `json:"recursive,omitempty"`
+	TopK      int     `json:"top_k,omitempty"`      // Number of top results to return (default: 10)
+	Threshold float64 `json:"threshold,omitempty"`  // Similarity threshold (default: 0.2)
+	Dirs      []string `json:"dirs,omitempty"`      // List of directories to search in
+}
+
+// Result represents the result of a command execution
+type Result struct {
+	Nodes      []*Node `json:"nodes"`
+	Total      int     `json:"total"`
+	HasMore    bool    `json:"has_more"`
+	NextOffset int     `json:"next_offset,omitempty"`
+	Error      error   `json:"-"`
+}
+
+// PathInfo represents parsed path information
+type PathInfo struct {
+	Provider    string   // The provider name (e.g., "datasets", "chats")
+	Path        string   // The full path
+	Components  []string // Path components
+	IsRoot      bool     // Whether this is the root path for the provider
+	ResourceID  string   // Resource ID if applicable
+	ResourceName string // Resource name if applicable
+}
+
+// ProviderInfo holds metadata about a provider
+type ProviderInfo struct {
+	Name        string `json:"name"`
+	Description string `json:"description"`
+	RootPath    string `json:"root_path"`
+}
+
+// Common error messages
+const (
+	ErrInvalidPath     = "invalid path"
+	ErrProviderNotFound = "provider not found for path"
+	ErrNotSupported    = "operation not supported"
+	ErrNotFound        = "resource not found"
+	ErrUnauthorized    = "unauthorized"
+	ErrInternal        = "internal error"
+)
--- a/internal/cli/contextengine/utils.go
+++ b/internal/cli/contextengine/utils.go
@ -0,0 +1,304 @@
+//
+//  Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
+//
+//  Licensed under the Apache License, Version 2.0 (the "License");
+//  you may not use this file except in compliance with the License.
+//  You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+//  Unless required by applicable law or agreed to in writing, software
+//  distributed under the License is distributed on an "AS IS" BASIS,
+//  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+//  See the License for the specific language governing permissions and
+//  limitations under the License.
+//
+
+package contextengine
+
+import (
+	"encoding/json"
+	"fmt"
+	"time"
+)
+
+// FormatNode formats a node for display
+func FormatNode(node *Node, format string) map[string]interface{} {
+	switch format {
+	case "json":
+		return map[string]interface{}{
+			"name":       node.Name,
+			"path":       node.Path,
+			"type":       string(node.Type),
+			"size":       node.Size,
+			"created_at": node.CreatedAt.Format(time.RFC3339),
+			"updated_at": node.UpdatedAt.Format(time.RFC3339),
+		}
+	case "table":
+		return map[string]interface{}{
+			"name": node.Name,
+			"path": node.Path,
+			"type": string(node.Type),
+			"size": formatSize(node.Size),
+			"created_at": formatTime(node.CreatedAt),
+			"updated_at": formatTime(node.UpdatedAt),
+		}
+	default: // "plain"
+		return map[string]interface{}{
+			"name":       node.Name,
+			"path":       node.Path,
+			"type":       string(node.Type),
+			"created_at": formatTime(node.CreatedAt),
+			"updated_at": formatTime(node.UpdatedAt),
+		}
+	}
+}
+
+// FormatNodes formats a list of nodes for display
+func FormatNodes(nodes []*Node, format string) []map[string]interface{} {
+	result := make([]map[string]interface{}, 0, len(nodes))
+	for _, node := range nodes {
+		result = append(result, FormatNode(node, format))
+	}
+	return result
+}
+
+// formatSize formats a size in bytes to human-readable format
+func formatSize(size int64) string {
+	if size == 0 {
+		return "-"
+	}
+
+	const (
+		KB = 1024
+		MB = 1024 * KB
+		GB = 1024 * MB
+		TB = 1024 * GB
+	)
+
+	switch {
+	case size >= TB:
+		return fmt.Sprintf("%.2f TB", float64(size)/TB)
+	case size >= GB:
+		return fmt.Sprintf("%.2f GB", float64(size)/GB)
+	case size >= MB:
+		return fmt.Sprintf("%.2f MB", float64(size)/MB)
+	case size >= KB:
+		return fmt.Sprintf("%.2f KB", float64(size)/KB)
+	default:
+		return fmt.Sprintf("%d B", size)
+	}
+}
+
+// formatTime formats a time to a readable string
+func formatTime(t time.Time) string {
+	if t.IsZero() {
+		return "-"
+	}
+	return t.Format("2006-01-02 15:04:05")
+}
+
+// ResultToMap converts a Result to a map for JSON serialization
+func ResultToMap(result *Result) map[string]interface{} {
+	if result == nil {
+		return map[string]interface{}{
+			"nodes": []interface{}{},
+			"total": 0,
+		}
+	}
+
+	nodes := make([]map[string]interface{}, 0, len(result.Nodes))
+	for _, node := range result.Nodes {
+		nodes = append(nodes, nodeToMap(node))
+	}
+
+	return map[string]interface{}{
+		"nodes":       nodes,
+		"total":       result.Total,
+		"has_more":    result.HasMore,
+		"next_offset": result.NextOffset,
+	}
+}
+
+// nodeToMap converts a Node to a map
+func nodeToMap(node *Node) map[string]interface{} {
+	m := map[string]interface{}{
+		"name": node.Name,
+		"path": node.Path,
+		"type": string(node.Type),
+	}
+
+	if node.Size > 0 {
+		m["size"] = node.Size
+	}
+
+	if !node.CreatedAt.IsZero() {
+		m["created_at"] = node.CreatedAt.Format(time.RFC3339)
+	}
+
+	if !node.UpdatedAt.IsZero() {
+		m["updated_at"] = node.UpdatedAt.Format(time.RFC3339)
+	}
+
+	if len(node.Metadata) > 0 {
+		m["metadata"] = node.Metadata
+	}
+
+	return m
+}
+
+// MarshalJSON marshals a Result to JSON bytes
+func (r *Result) MarshalJSON() ([]byte, error) {
+	return json.Marshal(ResultToMap(r))
+}
+
+// PrintResult prints a result in the specified format
+func PrintResult(result *Result, format string) {
+	if result == nil {
+		fmt.Println("No results")
+		return
+	}
+
+	switch format {
+	case "json":
+		data, _ := json.MarshalIndent(ResultToMap(result), "", "  ")
+		fmt.Println(string(data))
+	case "table":
+		printTable(result.Nodes)
+	default: // "plain"
+		for _, node := range result.Nodes {
+			fmt.Println(node.Path)
+		}
+	}
+}
+
+// printTable prints nodes in a simple table format
+func printTable(nodes []*Node) {
+	if len(nodes) == 0 {
+		fmt.Println("No results")
+		return
+	}
+
+	// Print header
+	fmt.Printf("%-40s %-12s %-12s %-20s %-20s\n", "NAME", "TYPE", "SIZE", "CREATED", "UPDATED")
+	fmt.Println(string(make([]byte, 104)))
+
+	// Print rows
+	for _, node := range nodes {
+		fmt.Printf("%-40s %-12s %-12s %-20s %-20s\n",
+			truncateString(node.Name, 40),
+			node.Type,
+			formatSize(node.Size),
+			formatTime(node.CreatedAt),
+			formatTime(node.UpdatedAt),
+		)
+	}
+}
+
+// truncateString truncates a string to the specified length
+func truncateString(s string, maxLen int) string {
+	if len(s) <= maxLen {
+		return s
+	}
+	return s[:maxLen-3] + "..."
+}
+
+// IsValidPath checks if a path is valid
+func IsValidPath(path string) bool {
+	if path == "" {
+		return false
+	}
+
+	// Check for invalid characters
+	invalidChars := []string{"..", "//", "\\", "*", "?", "<", ">", "|", "\x00"}
+	for _, char := range invalidChars {
+		if containsString(path, char) {
+			return false
+		}
+	}
+
+	return true
+}
+
+// containsString checks if a string contains a substring
+func containsString(s, substr string) bool {
+	for i := 0; i <= len(s)-len(substr); i++ {
+		if s[i:i+len(substr)] == substr {
+			return true
+		}
+	}
+	return false
+}
+
+// JoinPath joins path components
+func JoinPath(components ...string) string {
+	if len(components) == 0 {
+		return ""
+	}
+
+	result := components[0]
+	for i := 1; i < len(components); i++ {
+		if result == "" {
+			result = components[i]
+		} else if components[i] == "" {
+			continue
+		} else {
+			// Remove trailing slash from result
+			for len(result) > 0 && result[len(result)-1] == '/' {
+				result = result[:len(result)-1]
+			}
+			// Remove leading slash from component
+			start := 0
+			for start < len(components[i]) && components[i][start] == '/' {
+				start++
+			}
+			result = result + "/" + components[i][start:]
+		}
+	}
+
+	return result
+}
+
+// GetParentPath returns the parent path of a given path
+func GetParentPath(path string) string {
+	path = normalizePath(path)
+	parts := SplitPath(path)
+
+	if len(parts) <= 1 {
+		return ""
+	}
+
+	return joinStrings(parts[:len(parts)-1], "/")
+}
+
+// GetBaseName returns the last component of a path
+func GetBaseName(path string) string {
+	path = normalizePath(path)
+	parts := SplitPath(path)
+
+	if len(parts) == 0 {
+		return ""
+	}
+
+	return parts[len(parts)-1]
+}
+
+// HasPrefix checks if a path has the given prefix
+func HasPrefix(path, prefix string) bool {
+	path = normalizePath(path)
+	prefix = normalizePath(prefix)
+
+	if prefix == "" {
+		return true
+	}
+
+	if path == prefix {
+		return true
+	}
+
+	if len(path) > len(prefix) && path[:len(prefix)+1] == prefix+"/" {
+		return true
+	}
+
+	return false
+}