mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-03-28 01:50:38 +08:00
Feat: Initialize context engine CLI (#13776)
### What problem does this PR solve? - Add multiple output format to ragflow_cli - Initialize contextengine to Go module - ls datasets/ls files - cat file - search -d dir -q query issue: #13714 ### Type of change - [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
49
internal/cli/contextengine/README.md
Normal file
49
internal/cli/contextengine/README.md
Normal file
@ -0,0 +1,49 @@
|
||||
# ContextFS - Context Engine File System
|
||||
|
||||
ContextFS is a context engine interface for RAGFlow, providing users with a Unix-like file system interface to manage datasets, tools, skills, and memories.
|
||||
|
||||
## Directory Structure
|
||||
|
||||
```
|
||||
user_id/
|
||||
├── datasets/
|
||||
│ └── my_dataset/
|
||||
│ └── ...
|
||||
├── tools/
|
||||
│ ├── registry.json
|
||||
│ └── tool_name/
|
||||
│ ├── DOC.md
|
||||
│ └── ...
|
||||
├── skills/
|
||||
│ ├── registry.json
|
||||
│ └── skill_name/
|
||||
│ ├── SKILL.md
|
||||
│ └── ...
|
||||
└── memories/
|
||||
└── memory_id/
|
||||
├── sessions/
|
||||
│ ├── messages/
|
||||
│ ├── summaries/
|
||||
│ │ └── session_id/
|
||||
│ │ └── summary-{datetime}.md
|
||||
│ └── tools/
|
||||
│ └── session_id/
|
||||
│ └── {tool_name}.md # User level of memory on Tools usage
|
||||
├── users/
|
||||
│ ├── profile.md
|
||||
│ ├── preferences/
|
||||
│ └── entities/
|
||||
└── agents/
|
||||
└── agent_space/
|
||||
├── tools/
|
||||
│ └── {tool_name}.md # Agent level of memory on Tools usage
|
||||
└── skills/
|
||||
└── {skill_name}.md # Agent level of memory on Skills usage
|
||||
```
|
||||
|
||||
|
||||
## Supported Commands
|
||||
|
||||
- `ls [path]` - List directory contents
|
||||
- `cat <path>` - Display file contents(only for text files)
|
||||
- `search <query>` - Search content
|
||||
781
internal/cli/contextengine/dataset_provider.go
Normal file
781
internal/cli/contextengine/dataset_provider.go
Normal file
@ -0,0 +1,781 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package contextengine
|
||||
|
||||
import (
|
||||
stdctx "context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// HTTPResponse represents an HTTP response
|
||||
type HTTPResponse struct {
|
||||
StatusCode int
|
||||
Body []byte
|
||||
Headers map[string][]string
|
||||
Duration float64
|
||||
}
|
||||
|
||||
// HTTPClientInterface defines the interface needed from HTTPClient
|
||||
type HTTPClientInterface interface {
|
||||
Request(method, path string, useAPIBase bool, authKind string, headers map[string]string, jsonBody map[string]interface{}) (*HTTPResponse, error)
|
||||
}
|
||||
|
||||
// DatasetProvider handles datasets and their documents
|
||||
// Path structure:
|
||||
// - datasets/ -> List all datasets
|
||||
// - datasets/{name} -> List documents in dataset
|
||||
// - datasets/{name}/{doc_name} -> Get document info
|
||||
type DatasetProvider struct {
|
||||
BaseProvider
|
||||
httpClient HTTPClientInterface
|
||||
}
|
||||
|
||||
// NewDatasetProvider creates a new DatasetProvider
|
||||
func NewDatasetProvider(httpClient HTTPClientInterface) *DatasetProvider {
|
||||
return &DatasetProvider{
|
||||
BaseProvider: BaseProvider{
|
||||
name: "datasets",
|
||||
description: "Dataset management provider",
|
||||
rootPath: "datasets",
|
||||
},
|
||||
httpClient: httpClient,
|
||||
}
|
||||
}
|
||||
|
||||
// Supports returns true if this provider can handle the given path
|
||||
func (p *DatasetProvider) Supports(path string) bool {
|
||||
normalized := normalizePath(path)
|
||||
return normalized == "datasets" || strings.HasPrefix(normalized, "datasets/")
|
||||
}
|
||||
|
||||
// List lists nodes at the given path
|
||||
func (p *DatasetProvider) List(ctx stdctx.Context, subPath string, opts *ListOptions) (*Result, error) {
|
||||
// subPath is the path relative to "datasets/"
|
||||
// Empty subPath means list all datasets
|
||||
// "{name}/files" means list documents in a dataset
|
||||
|
||||
// Check if trying to access hidden .knowledgebase
|
||||
if subPath == ".knowledgebase" || strings.HasPrefix(subPath, ".knowledgebase/") {
|
||||
return nil, fmt.Errorf("invalid path: .knowledgebase is not accessible")
|
||||
}
|
||||
|
||||
if subPath == "" {
|
||||
return p.listDatasets(ctx, opts)
|
||||
}
|
||||
|
||||
parts := SplitPath(subPath)
|
||||
if len(parts) == 1 {
|
||||
// datasets/{name} - list documents in the dataset (default behavior)
|
||||
return p.listDocuments(ctx, parts[0], opts)
|
||||
}
|
||||
|
||||
if len(parts) == 2 {
|
||||
// datasets/{name}/{doc_name} - get document info
|
||||
return p.getDocumentNode(ctx, parts[0], parts[1])
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("invalid path: %s", subPath)
|
||||
}
|
||||
|
||||
// Search searches for datasets or documents
|
||||
func (p *DatasetProvider) Search(ctx stdctx.Context, subPath string, opts *SearchOptions) (*Result, error) {
|
||||
if opts.Query == "" {
|
||||
return p.List(ctx, subPath, &ListOptions{
|
||||
Limit: opts.Limit,
|
||||
Offset: opts.Offset,
|
||||
})
|
||||
}
|
||||
|
||||
// If searching under a specific dataset's files
|
||||
parts := SplitPath(subPath)
|
||||
if len(parts) >= 2 && parts[1] == "files" {
|
||||
datasetName := parts[0]
|
||||
return p.searchDocuments(ctx, datasetName, opts)
|
||||
}
|
||||
|
||||
// Otherwise search datasets
|
||||
return p.searchDatasets(ctx, opts)
|
||||
}
|
||||
|
||||
// Cat retrieves document content
|
||||
// For datasets:
|
||||
// - cat datasets -> Error: datasets is a directory, not a file
|
||||
// - cat datasets/kb_name -> Error: kb_name is a directory, not a file
|
||||
// - cat datasets/kb_name/doc_name -> Would retrieve document content (if implemented)
|
||||
func (p *DatasetProvider) Cat(ctx stdctx.Context, subPath string) ([]byte, error) {
|
||||
if subPath == "" {
|
||||
return nil, fmt.Errorf("'datasets' is a directory, not a file")
|
||||
}
|
||||
|
||||
parts := SplitPath(subPath)
|
||||
if len(parts) == 1 {
|
||||
// datasets/{name} - this is a dataset (directory)
|
||||
return nil, fmt.Errorf("'%s' is a directory, not a file", parts[0])
|
||||
}
|
||||
|
||||
if len(parts) == 2 {
|
||||
// datasets/{name}/{doc_name} - this could be a document
|
||||
// For now, document content retrieval is not implemented
|
||||
return nil, fmt.Errorf("document content retrieval not yet implemented for '%s'", parts[1])
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("invalid path for cat: %s", subPath)
|
||||
}
|
||||
|
||||
// ==================== Dataset Operations ====================
|
||||
|
||||
func (p *DatasetProvider) listDatasets(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
|
||||
resp, err := p.httpClient.Request("GET", "/datasets", true, "auto", nil, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var apiResp struct {
|
||||
Code int `json:"code"`
|
||||
Data []map[string]interface{} `json:"data"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if apiResp.Code != 0 {
|
||||
return nil, fmt.Errorf("API error: %s", apiResp.Message)
|
||||
}
|
||||
|
||||
nodes := make([]*Node, 0, len(apiResp.Data))
|
||||
for _, ds := range apiResp.Data {
|
||||
node := p.datasetToNode(ds)
|
||||
// Skip hidden .knowledgebase dataset (trim whitespace for safety)
|
||||
if strings.TrimSpace(node.Name) == ".knowledgebase" {
|
||||
continue
|
||||
}
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
|
||||
total := len(nodes)
|
||||
|
||||
// Apply limit if specified
|
||||
if opts != nil && opts.Limit > 0 && opts.Limit < len(nodes) {
|
||||
nodes = nodes[:opts.Limit]
|
||||
}
|
||||
|
||||
return &Result{
|
||||
Nodes: nodes,
|
||||
Total: total,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *DatasetProvider) getDataset(ctx stdctx.Context, name string) (*Node, error) {
|
||||
// Check if trying to access hidden .knowledgebase
|
||||
if name == ".knowledgebase" {
|
||||
return nil, fmt.Errorf("invalid path: .knowledgebase is not accessible")
|
||||
}
|
||||
|
||||
// First list all datasets to find the one with matching name
|
||||
resp, err := p.httpClient.Request("GET", "/datasets", true, "auto", nil, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var apiResp struct {
|
||||
Code int `json:"code"`
|
||||
Data []map[string]interface{} `json:"data"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if apiResp.Code != 0 {
|
||||
return nil, fmt.Errorf("API error: %s", apiResp.Message)
|
||||
}
|
||||
|
||||
for _, ds := range apiResp.Data {
|
||||
if getString(ds["name"]) == name {
|
||||
return p.datasetToNode(ds), nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("%s: dataset '%s'", ErrNotFound, name)
|
||||
}
|
||||
|
||||
func (p *DatasetProvider) searchDatasets(ctx stdctx.Context, opts *SearchOptions) (*Result, error) {
|
||||
// If no query is provided, just list datasets
|
||||
if opts.Query == "" {
|
||||
return p.listDatasets(ctx, &ListOptions{
|
||||
Limit: opts.Limit,
|
||||
Offset: opts.Offset,
|
||||
})
|
||||
}
|
||||
|
||||
// Use retrieval API for semantic search
|
||||
return p.searchWithRetrieval(ctx, opts)
|
||||
}
|
||||
|
||||
// searchWithRetrieval performs semantic search using the retrieval API
|
||||
func (p *DatasetProvider) searchWithRetrieval(ctx stdctx.Context, opts *SearchOptions) (*Result, error) {
|
||||
// Determine kb_ids to search in
|
||||
var kbIDs []string
|
||||
var datasetsToSearch []*Node
|
||||
|
||||
if len(opts.Dirs) > 0 && opts.Dirs[0] != "datasets" {
|
||||
// Search in specific datasets
|
||||
for _, dir := range opts.Dirs {
|
||||
// Extract dataset name from path (e.g., "datasets/kb1" -> "kb1")
|
||||
datasetName := dir
|
||||
if strings.HasPrefix(dir, "datasets/") {
|
||||
datasetName = dir[len("datasets/"):]
|
||||
}
|
||||
ds, err := p.getDataset(ctx, datasetName)
|
||||
if err != nil {
|
||||
// Try case-insensitive match
|
||||
allResult, listErr := p.listDatasets(ctx, nil)
|
||||
if listErr == nil {
|
||||
for _, d := range allResult.Nodes {
|
||||
if strings.EqualFold(d.Name, datasetName) {
|
||||
ds = d
|
||||
err = nil
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("dataset not found: %s", datasetName)
|
||||
}
|
||||
}
|
||||
datasetsToSearch = append(datasetsToSearch, ds)
|
||||
kbID := getString(ds.Metadata["id"])
|
||||
if kbID != "" {
|
||||
kbIDs = append(kbIDs, kbID)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Search in all datasets
|
||||
allResult, err := p.listDatasets(ctx, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
datasetsToSearch = allResult.Nodes
|
||||
for _, ds := range datasetsToSearch {
|
||||
kbID := getString(ds.Metadata["id"])
|
||||
if kbID != "" {
|
||||
kbIDs = append(kbIDs, kbID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(kbIDs) == 0 {
|
||||
return &Result{
|
||||
Nodes: []*Node{},
|
||||
Total: 0,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Build kb_id -> dataset name mapping
|
||||
kbIDToName := make(map[string]string)
|
||||
for _, ds := range datasetsToSearch {
|
||||
kbID := getString(ds.Metadata["id"])
|
||||
if kbID != "" && ds.Name != "" {
|
||||
kbIDToName[kbID] = ds.Name
|
||||
}
|
||||
}
|
||||
|
||||
// Build retrieval request
|
||||
payload := map[string]interface{}{
|
||||
"kb_id": kbIDs,
|
||||
"question": opts.Query,
|
||||
}
|
||||
|
||||
// Set top_k (default to 10 if not specified)
|
||||
topK := opts.TopK
|
||||
if topK <= 0 {
|
||||
topK = 10
|
||||
}
|
||||
payload["top_k"] = topK
|
||||
|
||||
// Set similarity threshold (default to 0.2 if not specified to match UI behavior)
|
||||
threshold := opts.Threshold
|
||||
if threshold <= 0 {
|
||||
threshold = 0.2
|
||||
}
|
||||
payload["similarity_threshold"] = threshold
|
||||
|
||||
// Call retrieval API (useAPIBase=false because the route is /v1/chunk/retrieval_test, not /api/v1/...)
|
||||
resp, err := p.httpClient.Request("POST", "/chunk/retrieval_test", false, "auto", nil, payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("retrieval request failed: %w", err)
|
||||
}
|
||||
|
||||
var apiResp struct {
|
||||
Code int `json:"code"`
|
||||
Data map[string]interface{} `json:"data"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if apiResp.Code != 0 {
|
||||
return nil, fmt.Errorf("API error: %s", apiResp.Message)
|
||||
}
|
||||
|
||||
// Parse chunks from response
|
||||
var nodes []*Node
|
||||
if chunksData, ok := apiResp.Data["chunks"].([]interface{}); ok {
|
||||
for _, chunk := range chunksData {
|
||||
if chunkMap, ok := chunk.(map[string]interface{}); ok {
|
||||
node := p.chunkToNodeWithKBMapping(chunkMap, kbIDToName)
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply top_k limit if specified (API may return more results)
|
||||
if topK > 0 && len(nodes) > topK {
|
||||
nodes = nodes[:topK]
|
||||
}
|
||||
|
||||
return &Result{
|
||||
Nodes: nodes,
|
||||
Total: len(nodes),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// chunkToNodeWithKBMapping converts a chunk map to a Node with kb_id -> name mapping
|
||||
func (p *DatasetProvider) chunkToNodeWithKBMapping(chunk map[string]interface{}, kbIDToName map[string]string) *Node {
|
||||
// Extract chunk content - try multiple field names
|
||||
content := ""
|
||||
if v, ok := chunk["content_with_weight"].(string); ok && v != "" {
|
||||
content = v
|
||||
} else if v, ok := chunk["content"].(string); ok && v != "" {
|
||||
content = v
|
||||
} else if v, ok := chunk["content_ltks"].(string); ok && v != "" {
|
||||
content = v
|
||||
} else if v, ok := chunk["text"].(string); ok && v != "" {
|
||||
content = v
|
||||
}
|
||||
|
||||
// Get chunk_id for URI
|
||||
chunkID := ""
|
||||
if v, ok := chunk["chunk_id"].(string); ok {
|
||||
chunkID = v
|
||||
} else if v, ok := chunk["id"].(string); ok {
|
||||
chunkID = v
|
||||
}
|
||||
|
||||
// Get document name and ID
|
||||
docName := ""
|
||||
if v, ok := chunk["docnm_kwd"].(string); ok && v != "" {
|
||||
docName = v
|
||||
} else if v, ok := chunk["docnm"].(string); ok && v != "" {
|
||||
docName = v
|
||||
} else if v, ok := chunk["doc_name"].(string); ok && v != "" {
|
||||
docName = v
|
||||
}
|
||||
|
||||
docID := ""
|
||||
if v, ok := chunk["doc_id"].(string); ok && v != "" {
|
||||
docID = v
|
||||
}
|
||||
|
||||
// Get dataset/kb name from mapping or chunk data
|
||||
datasetName := ""
|
||||
datasetID := ""
|
||||
|
||||
// First try to get kb_id from chunk (could be string or array)
|
||||
if v, ok := chunk["kb_id"].(string); ok && v != "" {
|
||||
datasetID = v
|
||||
} else if v, ok := chunk["kb_id"].([]interface{}); ok && len(v) > 0 {
|
||||
if s, ok := v[0].(string); ok {
|
||||
datasetID = s
|
||||
}
|
||||
}
|
||||
|
||||
// Look up dataset name from mapping using kb_id
|
||||
if datasetID != "" && kbIDToName != nil {
|
||||
if name, ok := kbIDToName[datasetID]; ok && name != "" {
|
||||
datasetName = name
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to kb_name from chunk if mapping doesn't have it
|
||||
if datasetName == "" {
|
||||
if v, ok := chunk["kb_name"].(string); ok && v != "" {
|
||||
datasetName = v
|
||||
}
|
||||
}
|
||||
|
||||
// Build URI path: prefer names over IDs for readability
|
||||
// Format: datasets/{dataset_name}/{doc_name}
|
||||
path := "/datasets"
|
||||
if datasetName != "" {
|
||||
path += "/" + datasetName
|
||||
} else if datasetID != "" {
|
||||
path += "/" + datasetID
|
||||
}
|
||||
if docName != "" {
|
||||
path += "/" + docName
|
||||
} else if docID != "" {
|
||||
path += "/" + docID
|
||||
}
|
||||
|
||||
// Use doc_name or chunk_id as the name if content is empty
|
||||
name := content
|
||||
if name == "" {
|
||||
if docName != "" {
|
||||
name = docName
|
||||
} else if chunkID != "" {
|
||||
name = "chunk:" + chunkID[:min(len(chunkID), 16)]
|
||||
} else {
|
||||
name = "(empty)"
|
||||
}
|
||||
}
|
||||
|
||||
node := &Node{
|
||||
Name: name,
|
||||
Path: path,
|
||||
Type: NodeTypeDocument,
|
||||
Metadata: chunk,
|
||||
}
|
||||
|
||||
// Parse timestamps if available
|
||||
if createTime, ok := chunk["create_time"]; ok {
|
||||
node.CreatedAt = parseTime(createTime)
|
||||
}
|
||||
if updateTime, ok := chunk["update_time"]; ok {
|
||||
node.UpdatedAt = parseTime(updateTime)
|
||||
}
|
||||
|
||||
return node
|
||||
}
|
||||
|
||||
// chunkToNode converts a chunk map to a Node (legacy, uses chunk data only)
|
||||
func (p *DatasetProvider) chunkToNode(chunk map[string]interface{}) *Node {
|
||||
return p.chunkToNodeWithKBMapping(chunk, nil)
|
||||
}
|
||||
|
||||
// ==================== Document Operations ====================
|
||||
|
||||
func (p *DatasetProvider) listDocuments(ctx stdctx.Context, datasetName string, opts *ListOptions) (*Result, error) {
|
||||
// First get the dataset ID
|
||||
ds, err := p.getDataset(ctx, datasetName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
datasetID := getString(ds.Metadata["id"])
|
||||
if datasetID == "" {
|
||||
return nil, fmt.Errorf("dataset ID not found")
|
||||
}
|
||||
|
||||
// Build query parameters
|
||||
params := make(map[string]string)
|
||||
if opts != nil {
|
||||
if opts.Limit > 0 {
|
||||
params["page_size"] = fmt.Sprintf("%d", opts.Limit)
|
||||
}
|
||||
if opts.Offset > 0 {
|
||||
params["page"] = fmt.Sprintf("%d", opts.Offset/opts.Limit+1)
|
||||
}
|
||||
}
|
||||
|
||||
path := fmt.Sprintf("/datasets/%s/documents", datasetID)
|
||||
resp, err := p.httpClient.Request("GET", path, true, "auto", params, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var apiResp struct {
|
||||
Code int `json:"code"`
|
||||
Data struct {
|
||||
Docs []map[string]interface{} `json:"docs"`
|
||||
} `json:"data"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if apiResp.Code != 0 {
|
||||
return nil, fmt.Errorf("API error: %s", apiResp.Message)
|
||||
}
|
||||
|
||||
nodes := make([]*Node, 0, len(apiResp.Data.Docs))
|
||||
for _, doc := range apiResp.Data.Docs {
|
||||
node := p.documentToNode(doc, datasetName)
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
|
||||
return &Result{
|
||||
Nodes: nodes,
|
||||
Total: len(nodes),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *DatasetProvider) getDocumentNode(ctx stdctx.Context, datasetName, docName string) (*Result, error) {
|
||||
node, err := p.getDocument(ctx, datasetName, docName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &Result{
|
||||
Nodes: []*Node{node},
|
||||
Total: 1,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (p *DatasetProvider) getDocument(ctx stdctx.Context, datasetName, docName string) (*Node, error) {
|
||||
// List all documents and find the matching one
|
||||
result, err := p.listDocuments(ctx, datasetName, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, node := range result.Nodes {
|
||||
if node.Name == docName {
|
||||
return node, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("%s: document '%s' in dataset '%s'", ErrNotFound, docName, datasetName)
|
||||
}
|
||||
|
||||
func (p *DatasetProvider) searchDocuments(ctx stdctx.Context, datasetName string, opts *SearchOptions) (*Result, error) {
|
||||
// If no query is provided, just list documents
|
||||
if opts.Query == "" {
|
||||
return p.listDocuments(ctx, datasetName, &ListOptions{
|
||||
Limit: opts.Limit,
|
||||
Offset: opts.Offset,
|
||||
})
|
||||
}
|
||||
|
||||
// Use retrieval API for semantic search in specific dataset
|
||||
ds, err := p.getDataset(ctx, datasetName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
kbID := getString(ds.Metadata["id"])
|
||||
if kbID == "" {
|
||||
return nil, fmt.Errorf("dataset ID not found for '%s'", datasetName)
|
||||
}
|
||||
|
||||
// Build kb_id -> dataset name mapping
|
||||
kbIDToName := map[string]string{kbID: datasetName}
|
||||
|
||||
// Build retrieval request for specific dataset
|
||||
payload := map[string]interface{}{
|
||||
"kb_id": []string{kbID},
|
||||
"question": opts.Query,
|
||||
}
|
||||
|
||||
// Set top_k (default to 10 if not specified)
|
||||
topK := opts.TopK
|
||||
if topK <= 0 {
|
||||
topK = 10
|
||||
}
|
||||
payload["top_k"] = topK
|
||||
|
||||
// Set similarity threshold (default to 0.2 if not specified to match UI behavior)
|
||||
threshold := opts.Threshold
|
||||
if threshold <= 0 {
|
||||
threshold = 0.2
|
||||
}
|
||||
payload["similarity_threshold"] = threshold
|
||||
|
||||
// Call retrieval API (useAPIBase=false because the route is /v1/chunk/retrieval_test, not /api/v1/...)
|
||||
resp, err := p.httpClient.Request("POST", "/chunk/retrieval_test", false, "auto", nil, payload)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("retrieval request failed: %w", err)
|
||||
}
|
||||
|
||||
var apiResp struct {
|
||||
Code int `json:"code"`
|
||||
Data map[string]interface{} `json:"data"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if apiResp.Code != 0 {
|
||||
return nil, fmt.Errorf("API error: %s", apiResp.Message)
|
||||
}
|
||||
|
||||
// Parse chunks from response
|
||||
var nodes []*Node
|
||||
if chunksData, ok := apiResp.Data["chunks"].([]interface{}); ok {
|
||||
for _, chunk := range chunksData {
|
||||
if chunkMap, ok := chunk.(map[string]interface{}); ok {
|
||||
node := p.chunkToNodeWithKBMapping(chunkMap, kbIDToName)
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Apply top_k limit if specified (API may return more results)
|
||||
if topK > 0 && len(nodes) > topK {
|
||||
nodes = nodes[:topK]
|
||||
}
|
||||
|
||||
return &Result{
|
||||
Nodes: nodes,
|
||||
Total: len(nodes),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// ==================== Helper Functions ====================
|
||||
|
||||
func (p *DatasetProvider) datasetToNode(ds map[string]interface{}) *Node {
|
||||
name := getString(ds["name"])
|
||||
node := &Node{
|
||||
Name: name,
|
||||
Path: "/datasets/" + name,
|
||||
Type: NodeTypeDirectory,
|
||||
Metadata: ds,
|
||||
}
|
||||
|
||||
// Parse timestamps - try multiple field names
|
||||
if createTime, ok := ds["create_time"]; ok && createTime != nil {
|
||||
node.CreatedAt = parseTime(createTime)
|
||||
} else if createDate, ok := ds["create_date"]; ok && createDate != nil {
|
||||
node.CreatedAt = parseTime(createDate)
|
||||
}
|
||||
|
||||
if updateTime, ok := ds["update_time"]; ok && updateTime != nil {
|
||||
node.UpdatedAt = parseTime(updateTime)
|
||||
} else if updateDate, ok := ds["update_date"]; ok && updateDate != nil {
|
||||
node.UpdatedAt = parseTime(updateDate)
|
||||
}
|
||||
|
||||
return node
|
||||
}
|
||||
|
||||
func (p *DatasetProvider) documentToNode(doc map[string]interface{}, datasetName string) *Node {
|
||||
name := getString(doc["name"])
|
||||
node := &Node{
|
||||
Name: name,
|
||||
Path: "datasets/" + datasetName + "/" + name,
|
||||
Type: NodeTypeDocument,
|
||||
Metadata: doc,
|
||||
}
|
||||
|
||||
// Parse size
|
||||
if size, ok := doc["size"]; ok {
|
||||
node.Size = int64(getFloat(size))
|
||||
}
|
||||
|
||||
// Parse timestamps
|
||||
if createTime, ok := doc["create_time"]; ok {
|
||||
node.CreatedAt = parseTime(createTime)
|
||||
}
|
||||
if updateTime, ok := doc["update_time"]; ok {
|
||||
node.UpdatedAt = parseTime(updateTime)
|
||||
}
|
||||
|
||||
return node
|
||||
}
|
||||
|
||||
func getString(v interface{}) string {
|
||||
if v == nil {
|
||||
return ""
|
||||
}
|
||||
if s, ok := v.(string); ok {
|
||||
return s
|
||||
}
|
||||
return fmt.Sprintf("%v", v)
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func getFloat(v interface{}) float64 {
|
||||
if v == nil {
|
||||
return 0
|
||||
}
|
||||
switch val := v.(type) {
|
||||
case float64:
|
||||
return val
|
||||
case float32:
|
||||
return float64(val)
|
||||
case int:
|
||||
return float64(val)
|
||||
case int64:
|
||||
return float64(val)
|
||||
default:
|
||||
return 0
|
||||
}
|
||||
}
|
||||
|
||||
func parseTime(v interface{}) time.Time {
|
||||
if v == nil {
|
||||
return time.Time{}
|
||||
}
|
||||
|
||||
var ts int64
|
||||
switch val := v.(type) {
|
||||
case float64:
|
||||
ts = int64(val)
|
||||
case int64:
|
||||
ts = val
|
||||
case int:
|
||||
ts = int64(val)
|
||||
case string:
|
||||
// Trim quotes if present
|
||||
val = strings.Trim(val, `"`)
|
||||
// Try to parse as number (timestamp)
|
||||
if parsed, err := strconv.ParseInt(val, 10, 64); err == nil {
|
||||
ts = parsed
|
||||
} else {
|
||||
// If it's already a formatted date string, try parsing it
|
||||
formats := []string{
|
||||
"2006-01-02 15:04:05",
|
||||
"2006-01-02T15:04:05",
|
||||
"2006-01-02T15:04:05Z",
|
||||
"2006-01-02",
|
||||
}
|
||||
for _, format := range formats {
|
||||
if t, err := time.Parse(format, val); err == nil {
|
||||
return t
|
||||
}
|
||||
}
|
||||
return time.Time{}
|
||||
}
|
||||
default:
|
||||
return time.Time{}
|
||||
}
|
||||
|
||||
// Convert milliseconds to seconds if timestamp is in milliseconds (13 digits)
|
||||
if ts > 1e12 {
|
||||
ts = ts / 1000
|
||||
}
|
||||
|
||||
return time.Unix(ts, 0)
|
||||
}
|
||||
312
internal/cli/contextengine/engine.go
Normal file
312
internal/cli/contextengine/engine.go
Normal file
@ -0,0 +1,312 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package contextengine
|
||||
|
||||
import (
|
||||
stdctx "context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Engine is the core of the Context Engine
|
||||
// It manages providers and routes commands to the appropriate provider
|
||||
type Engine struct {
|
||||
providers []Provider
|
||||
}
|
||||
|
||||
// NewEngine creates a new Context Engine
|
||||
func NewEngine() *Engine {
|
||||
return &Engine{
|
||||
providers: make([]Provider, 0),
|
||||
}
|
||||
}
|
||||
|
||||
// RegisterProvider registers a provider with the engine
|
||||
func (e *Engine) RegisterProvider(provider Provider) {
|
||||
e.providers = append(e.providers, provider)
|
||||
}
|
||||
|
||||
// GetProviders returns all registered providers
|
||||
func (e *Engine) GetProviders() []ProviderInfo {
|
||||
infos := make([]ProviderInfo, 0, len(e.providers))
|
||||
for _, p := range e.providers {
|
||||
infos = append(infos, ProviderInfo{
|
||||
Name: p.Name(),
|
||||
Description: p.Description(),
|
||||
})
|
||||
}
|
||||
return infos
|
||||
}
|
||||
|
||||
// Execute executes a command and returns the result
|
||||
func (e *Engine) Execute(ctx stdctx.Context, cmd *Command) (*Result, error) {
|
||||
switch cmd.Type {
|
||||
case CommandList:
|
||||
return e.List(ctx, cmd.Path, parseListOptions(cmd.Params))
|
||||
case CommandSearch:
|
||||
return e.Search(ctx, cmd.Path, parseSearchOptions(cmd.Params))
|
||||
case CommandCat:
|
||||
_, err := e.Cat(ctx, cmd.Path)
|
||||
return nil, err
|
||||
default:
|
||||
return nil, fmt.Errorf("unknown command type: %s", cmd.Type)
|
||||
}
|
||||
}
|
||||
|
||||
// resolveProvider finds the provider for a given path
|
||||
func (e *Engine) resolveProvider(path string) (Provider, string, error) {
|
||||
path = normalizePath(path)
|
||||
|
||||
for _, provider := range e.providers {
|
||||
if provider.Supports(path) {
|
||||
// Parse the subpath relative to the provider root
|
||||
// Get provider name to calculate subPath
|
||||
providerName := provider.Name()
|
||||
var subPath string
|
||||
if path == providerName {
|
||||
subPath = ""
|
||||
} else if strings.HasPrefix(path, providerName+"/") {
|
||||
subPath = path[len(providerName)+1:]
|
||||
} else {
|
||||
subPath = path
|
||||
}
|
||||
return provider, subPath, nil
|
||||
}
|
||||
}
|
||||
|
||||
// If no provider supports this path, check if FileProvider can handle it as a fallback
|
||||
// This allows paths like "myskills" to be treated as "files/myskills"
|
||||
if fileProvider := e.getFileProvider(); fileProvider != nil {
|
||||
// Check if the path looks like a file manager path (single component, not matching other providers)
|
||||
parts := SplitPath(path)
|
||||
if len(parts) > 0 && parts[0] != "datasets" {
|
||||
return fileProvider, path, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, "", fmt.Errorf("%s: %s", ErrProviderNotFound, path)
|
||||
}
|
||||
|
||||
// List lists nodes at the given path
|
||||
// If path is empty, returns:
|
||||
// 1. Built-in providers (e.g., datasets)
|
||||
// 2. Top-level directories from files provider (if any)
|
||||
func (e *Engine) List(ctx stdctx.Context, path string, opts *ListOptions) (*Result, error) {
|
||||
// Normalize path
|
||||
path = normalizePath(path)
|
||||
|
||||
// If path is empty, return list of providers and files root directories
|
||||
if path == "" || path == "/" {
|
||||
return e.listRoot(ctx, opts)
|
||||
}
|
||||
|
||||
provider, subPath, err := e.resolveProvider(path)
|
||||
if err != nil {
|
||||
// If not found, try to find in files provider as a fallback
|
||||
// This allows "ls myfolder" to work as "ls files/myfolder"
|
||||
if fileProvider := e.getFileProvider(); fileProvider != nil {
|
||||
result, ferr := fileProvider.List(ctx, path, opts)
|
||||
if ferr == nil {
|
||||
return result, nil
|
||||
}
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return provider.List(ctx, subPath, opts)
|
||||
}
|
||||
|
||||
// listRoot returns the root listing:
|
||||
// 1. Built-in providers (datasets, etc.)
|
||||
// 2. Top-level folders from files provider (file_manager)
|
||||
func (e *Engine) listRoot(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
|
||||
nodes := make([]*Node, 0)
|
||||
|
||||
// Add built-in providers first (like datasets)
|
||||
for _, p := range e.providers {
|
||||
// Skip files provider from this list - we'll add its children instead
|
||||
if p.Name() == "files" {
|
||||
continue
|
||||
}
|
||||
nodes = append(nodes, &Node{
|
||||
Name: p.Name(),
|
||||
Path: "/" + p.Name(),
|
||||
Type: NodeTypeDirectory,
|
||||
CreatedAt: time.Now(),
|
||||
Metadata: map[string]interface{}{
|
||||
"description": p.Description(),
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// Add top-level folders from files provider (file_manager)
|
||||
if fileProvider := e.getFileProvider(); fileProvider != nil {
|
||||
filesResult, err := fileProvider.List(ctx, "", opts)
|
||||
if err == nil {
|
||||
for _, node := range filesResult.Nodes {
|
||||
// Only add folders (directories), not files
|
||||
if node.Type == NodeTypeDirectory {
|
||||
// Ensure path doesn't have /files/ prefix for display
|
||||
node.Path = strings.TrimPrefix(node.Path, "files/")
|
||||
node.Path = strings.TrimPrefix(node.Path, "/")
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &Result{
|
||||
Nodes: nodes,
|
||||
Total: len(nodes),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// getFileProvider returns the files provider if registered
|
||||
func (e *Engine) getFileProvider() Provider {
|
||||
for _, p := range e.providers {
|
||||
if p.Name() == "files" {
|
||||
return p
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Search searches for nodes matching the query
|
||||
func (e *Engine) Search(ctx stdctx.Context, path string, opts *SearchOptions) (*Result, error) {
|
||||
provider, subPath, err := e.resolveProvider(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return provider.Search(ctx, subPath, opts)
|
||||
}
|
||||
|
||||
// Cat retrieves the content of a file/document
|
||||
func (e *Engine) Cat(ctx stdctx.Context, path string) ([]byte, error) {
|
||||
provider, subPath, err := e.resolveProvider(path)
|
||||
if err != nil {
|
||||
// If not found, try to find in files provider as a fallback
|
||||
// This allows "cat myfolder/file.txt" to work as "cat files/myfolder/file.txt"
|
||||
if fileProvider := e.getFileProvider(); fileProvider != nil {
|
||||
return fileProvider.Cat(ctx, path)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return provider.Cat(ctx, subPath)
|
||||
}
|
||||
|
||||
// ParsePath parses a path and returns path information
|
||||
func (e *Engine) ParsePath(path string) (*PathInfo, error) {
|
||||
path = normalizePath(path)
|
||||
components := SplitPath(path)
|
||||
|
||||
if len(components) == 0 {
|
||||
return nil, fmt.Errorf("empty path")
|
||||
}
|
||||
|
||||
providerName := components[0]
|
||||
isRoot := len(components) == 1
|
||||
|
||||
// Find the provider
|
||||
var provider Provider
|
||||
for _, p := range e.providers {
|
||||
if p.Name() == providerName || strings.HasPrefix(path, p.Name()) {
|
||||
provider = p
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if provider == nil {
|
||||
return nil, fmt.Errorf("%s: %s", ErrProviderNotFound, path)
|
||||
}
|
||||
|
||||
info := &PathInfo{
|
||||
Provider: providerName,
|
||||
Path: path,
|
||||
Components: components,
|
||||
IsRoot: isRoot,
|
||||
}
|
||||
|
||||
// Extract resource ID or name if available
|
||||
if len(components) >= 2 {
|
||||
info.ResourceName = components[1]
|
||||
}
|
||||
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// parseListOptions parses command params into ListOptions
|
||||
func parseListOptions(params map[string]interface{}) *ListOptions {
|
||||
opts := &ListOptions{}
|
||||
|
||||
if params == nil {
|
||||
return opts
|
||||
}
|
||||
|
||||
if recursive, ok := params["recursive"].(bool); ok {
|
||||
opts.Recursive = recursive
|
||||
}
|
||||
if limit, ok := params["limit"].(int); ok {
|
||||
opts.Limit = limit
|
||||
}
|
||||
if offset, ok := params["offset"].(int); ok {
|
||||
opts.Offset = offset
|
||||
}
|
||||
if sortBy, ok := params["sort_by"].(string); ok {
|
||||
opts.SortBy = sortBy
|
||||
}
|
||||
if sortOrder, ok := params["sort_order"].(string); ok {
|
||||
opts.SortOrder = sortOrder
|
||||
}
|
||||
|
||||
return opts
|
||||
}
|
||||
|
||||
// parseSearchOptions parses command params into SearchOptions
|
||||
func parseSearchOptions(params map[string]interface{}) *SearchOptions {
|
||||
opts := &SearchOptions{}
|
||||
|
||||
if params == nil {
|
||||
return opts
|
||||
}
|
||||
|
||||
if query, ok := params["query"].(string); ok {
|
||||
opts.Query = query
|
||||
}
|
||||
if limit, ok := params["limit"].(int); ok {
|
||||
opts.Limit = limit
|
||||
}
|
||||
if offset, ok := params["offset"].(int); ok {
|
||||
opts.Offset = offset
|
||||
}
|
||||
if recursive, ok := params["recursive"].(bool); ok {
|
||||
opts.Recursive = recursive
|
||||
}
|
||||
if topK, ok := params["top_k"].(int); ok {
|
||||
opts.TopK = topK
|
||||
}
|
||||
if threshold, ok := params["threshold"].(float64); ok {
|
||||
opts.Threshold = threshold
|
||||
}
|
||||
if dirs, ok := params["dirs"].([]string); ok {
|
||||
opts.Dirs = dirs
|
||||
}
|
||||
|
||||
return opts
|
||||
}
|
||||
594
internal/cli/contextengine/file_provider.go
Normal file
594
internal/cli/contextengine/file_provider.go
Normal file
@ -0,0 +1,594 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package contextengine
|
||||
|
||||
import (
|
||||
stdctx "context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// FileProvider handles file operations using Python backend /files API
|
||||
// Path structure:
|
||||
// - files/ -> List root folder contents
|
||||
// - files/{folder_name}/ -> List folder contents
|
||||
// - files/{folder_name}/{file_name} -> Get file info/content
|
||||
//
|
||||
// Note: Uses Python backend API (useAPIBase=true):
|
||||
// - GET /files?parent_id={id} -> List files/folders in parent
|
||||
// - GET /files/{file_id} -> Get file info
|
||||
// - POST /files -> Create folder or upload file
|
||||
// - DELETE /files -> Delete files
|
||||
// - GET /files/{file_id}/parent -> Get parent folder
|
||||
// - GET /files/{file_id}/ancestors -> Get ancestor folders
|
||||
|
||||
type FileProvider struct {
|
||||
BaseProvider
|
||||
httpClient HTTPClientInterface
|
||||
folderCache map[string]string // path -> folder ID cache
|
||||
rootID string // root folder ID
|
||||
}
|
||||
|
||||
// NewFileProvider creates a new FileProvider
|
||||
func NewFileProvider(httpClient HTTPClientInterface) *FileProvider {
|
||||
return &FileProvider{
|
||||
BaseProvider: BaseProvider{
|
||||
name: "files",
|
||||
description: "File manager provider (Python server)",
|
||||
rootPath: "files",
|
||||
},
|
||||
httpClient: httpClient,
|
||||
folderCache: make(map[string]string),
|
||||
}
|
||||
}
|
||||
|
||||
// Supports returns true if this provider can handle the given path
|
||||
func (p *FileProvider) Supports(path string) bool {
|
||||
normalized := normalizePath(path)
|
||||
return normalized == "files" || strings.HasPrefix(normalized, "files/")
|
||||
}
|
||||
|
||||
// List lists nodes at the given path
|
||||
// Path structure: files/ or files/{folder_name}/ or files/{folder_name}/{sub_path}/...
|
||||
func (p *FileProvider) List(ctx stdctx.Context, subPath string, opts *ListOptions) (*Result, error) {
|
||||
// subPath is the path relative to "files/"
|
||||
// Empty subPath means list root folder
|
||||
|
||||
if subPath == "" {
|
||||
return p.listRootFolder(ctx, opts)
|
||||
}
|
||||
|
||||
parts := SplitPath(subPath)
|
||||
if len(parts) == 1 {
|
||||
// files/{folder_name} - list contents of this folder
|
||||
return p.listFolderByName(ctx, parts[0], opts)
|
||||
}
|
||||
|
||||
// For multi-level paths like myskills/skill-name/dir1, recursively traverse
|
||||
return p.listPathRecursive(ctx, parts, opts)
|
||||
}
|
||||
|
||||
// listPathRecursive recursively traverses the path and lists the final component
|
||||
func (p *FileProvider) listPathRecursive(ctx stdctx.Context, parts []string, opts *ListOptions) (*Result, error) {
|
||||
if len(parts) == 0 {
|
||||
return nil, fmt.Errorf("empty path")
|
||||
}
|
||||
|
||||
// Start from root to find the first folder
|
||||
currentFolderID, err := p.getFolderIDByName(ctx, parts[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
currentPath := parts[0]
|
||||
|
||||
// Traverse through intermediate directories
|
||||
for i := 1; i < len(parts); i++ {
|
||||
partName := parts[i]
|
||||
|
||||
// List contents of current folder to find the next part
|
||||
result, err := p.listFilesByParentID(ctx, currentFolderID, currentPath, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Find the next component
|
||||
found := false
|
||||
for _, node := range result.Nodes {
|
||||
if node.Name == partName {
|
||||
if i == len(parts)-1 {
|
||||
// This is the last component - if it's a directory, list its contents
|
||||
if node.Type == NodeTypeDirectory {
|
||||
childID := getString(node.Metadata["id"])
|
||||
if childID == "" {
|
||||
return nil, fmt.Errorf("folder ID not found for '%s'", partName)
|
||||
}
|
||||
newPath := currentPath + "/" + partName
|
||||
p.folderCache[newPath] = childID
|
||||
return p.listFilesByParentID(ctx, childID, newPath, opts)
|
||||
}
|
||||
// It's a file - return the file node
|
||||
return &Result{
|
||||
Nodes: []*Node{node},
|
||||
Total: 1,
|
||||
}, nil
|
||||
}
|
||||
// Not the last component - must be a directory
|
||||
if node.Type != NodeTypeDirectory {
|
||||
return nil, fmt.Errorf("'%s' is not a directory", partName)
|
||||
}
|
||||
childID := getString(node.Metadata["id"])
|
||||
if childID == "" {
|
||||
return nil, fmt.Errorf("folder ID not found for '%s'", partName)
|
||||
}
|
||||
currentFolderID = childID
|
||||
currentPath = currentPath + "/" + partName
|
||||
p.folderCache[currentPath] = currentFolderID
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
return nil, fmt.Errorf("%s: '%s' in '%s'", ErrNotFound, partName, currentPath)
|
||||
}
|
||||
}
|
||||
|
||||
// Should have returned in the loop, but just in case
|
||||
return p.listFilesByParentID(ctx, currentFolderID, currentPath, opts)
|
||||
}
|
||||
|
||||
// Search searches for files/folders
|
||||
func (p *FileProvider) Search(ctx stdctx.Context, subPath string, opts *SearchOptions) (*Result, error) {
|
||||
if opts.Query == "" {
|
||||
return p.List(ctx, subPath, &ListOptions{
|
||||
Limit: opts.Limit,
|
||||
Offset: opts.Offset,
|
||||
})
|
||||
}
|
||||
|
||||
// For now, search is not implemented - just list and filter by name
|
||||
result, err := p.List(ctx, subPath, &ListOptions{
|
||||
Limit: opts.Limit,
|
||||
Offset: opts.Offset,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Simple name filtering
|
||||
var filtered []*Node
|
||||
query := strings.ToLower(opts.Query)
|
||||
for _, node := range result.Nodes {
|
||||
if strings.Contains(strings.ToLower(node.Name), query) {
|
||||
filtered = append(filtered, node)
|
||||
}
|
||||
}
|
||||
|
||||
return &Result{
|
||||
Nodes: filtered,
|
||||
Total: len(filtered),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Cat retrieves file content
|
||||
func (p *FileProvider) Cat(ctx stdctx.Context, subPath string) ([]byte, error) {
|
||||
if subPath == "" {
|
||||
return nil, fmt.Errorf("cat requires a file path: files/{folder}/{file}")
|
||||
}
|
||||
|
||||
parts := SplitPath(subPath)
|
||||
if len(parts) < 2 {
|
||||
return nil, fmt.Errorf("invalid path format, expected: files/{folder}/{file}")
|
||||
}
|
||||
|
||||
// Find the file by recursively traversing the path
|
||||
node, err := p.findNodeByPath(ctx, parts)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if node.Type == NodeTypeDirectory {
|
||||
return nil, fmt.Errorf("'%s' is a directory, not a file", subPath)
|
||||
}
|
||||
|
||||
fileID := getString(node.Metadata["id"])
|
||||
if fileID == "" {
|
||||
return nil, fmt.Errorf("file ID not found")
|
||||
}
|
||||
|
||||
// Download file content
|
||||
return p.downloadFile(ctx, fileID)
|
||||
}
|
||||
|
||||
// findNodeByPath recursively traverses the path to find the target node
|
||||
func (p *FileProvider) findNodeByPath(ctx stdctx.Context, parts []string) (*Node, error) {
|
||||
if len(parts) == 0 {
|
||||
return nil, fmt.Errorf("empty path")
|
||||
}
|
||||
|
||||
// Start from root to find the first folder
|
||||
currentFolderID, err := p.getFolderIDByName(ctx, parts[0])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
currentPath := parts[0]
|
||||
|
||||
// Traverse through intermediate directories
|
||||
for i := 1; i < len(parts); i++ {
|
||||
partName := parts[i]
|
||||
|
||||
// List contents of current folder to find the next part
|
||||
result, err := p.listFilesByParentID(ctx, currentFolderID, currentPath, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Find the next component
|
||||
found := false
|
||||
for _, node := range result.Nodes {
|
||||
if node.Name == partName {
|
||||
if i == len(parts)-1 {
|
||||
// This is the last component - return it
|
||||
return node, nil
|
||||
}
|
||||
// Not the last component - must be a directory
|
||||
if node.Type != NodeTypeDirectory {
|
||||
return nil, fmt.Errorf("'%s' is not a directory", partName)
|
||||
}
|
||||
childID := getString(node.Metadata["id"])
|
||||
if childID == "" {
|
||||
return nil, fmt.Errorf("folder ID not found for '%s'", partName)
|
||||
}
|
||||
currentFolderID = childID
|
||||
currentPath = currentPath + "/" + partName
|
||||
p.folderCache[currentPath] = currentFolderID
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !found {
|
||||
return nil, fmt.Errorf("%s: '%s' in '%s'", ErrNotFound, partName, currentPath)
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("%s: '%s'", ErrNotFound, strings.Join(parts, "/"))
|
||||
}
|
||||
|
||||
// ==================== Python Server API Methods ====================
|
||||
|
||||
// getRootID gets or caches the root folder ID
|
||||
func (p *FileProvider) getRootID(ctx stdctx.Context) (string, error) {
|
||||
if p.rootID != "" {
|
||||
return p.rootID, nil
|
||||
}
|
||||
|
||||
// List files without parent_id to get root folder
|
||||
resp, err := p.httpClient.Request("GET", "/files", true, "auto", nil, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var apiResp struct {
|
||||
Code int `json:"code"`
|
||||
Data map[string]interface{} `json:"data"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if apiResp.Code != 0 {
|
||||
return "", fmt.Errorf("API error: %s", apiResp.Message)
|
||||
}
|
||||
|
||||
// Try to find root folder ID from response
|
||||
if rootID, ok := apiResp.Data["root_id"].(string); ok && rootID != "" {
|
||||
p.rootID = rootID
|
||||
return rootID, nil
|
||||
}
|
||||
|
||||
// If no explicit root_id, use empty parent_id for root listing
|
||||
return "", nil
|
||||
}
|
||||
|
||||
// listRootFolder lists the contents of root folder
|
||||
func (p *FileProvider) listRootFolder(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
|
||||
// Get root folder ID first
|
||||
rootID, err := p.getRootID(ctx)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// List files using root folder ID as parent
|
||||
return p.listFilesByParentID(ctx, rootID, "", opts)
|
||||
}
|
||||
|
||||
// listFilesByParentID lists files/folders by parent ID
|
||||
func (p *FileProvider) listFilesByParentID(ctx stdctx.Context, parentID string, parentPath string, opts *ListOptions) (*Result, error) {
|
||||
// Build query parameters
|
||||
queryParams := make([]string, 0)
|
||||
if parentID != "" {
|
||||
queryParams = append(queryParams, fmt.Sprintf("parent_id=%s", parentID))
|
||||
}
|
||||
// Always set page=1 and page_size to ensure we get results
|
||||
pageSize := 100
|
||||
if opts != nil && opts.Limit > 0 {
|
||||
pageSize = opts.Limit
|
||||
}
|
||||
queryParams = append(queryParams, fmt.Sprintf("page_size=%d", pageSize))
|
||||
queryParams = append(queryParams, "page=1")
|
||||
|
||||
// Build URL with query string
|
||||
path := "/files"
|
||||
if len(queryParams) > 0 {
|
||||
path = path + "?" + strings.Join(queryParams, "&")
|
||||
}
|
||||
|
||||
resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var apiResp struct {
|
||||
Code int `json:"code"`
|
||||
Data map[string]interface{} `json:"data"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if apiResp.Code != 0 {
|
||||
return nil, fmt.Errorf("API error: %s", apiResp.Message)
|
||||
}
|
||||
|
||||
// Extract files list from data - API returns {"total": N, "files": [...], "parent_folder": {...}}
|
||||
var files []map[string]interface{}
|
||||
if fileList, ok := apiResp.Data["files"].([]interface{}); ok {
|
||||
for _, f := range fileList {
|
||||
if fileMap, ok := f.(map[string]interface{}); ok {
|
||||
files = append(files, fileMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nodes := make([]*Node, 0, len(files))
|
||||
for _, f := range files {
|
||||
name := getString(f["name"])
|
||||
// Skip hidden .knowledgebase folder
|
||||
if strings.TrimSpace(name) == ".knowledgebase" {
|
||||
continue
|
||||
}
|
||||
|
||||
node := p.fileToNode(f, parentPath)
|
||||
nodes = append(nodes, node)
|
||||
|
||||
// Cache folder ID
|
||||
if node.Type == NodeTypeDirectory || getString(f["type"]) == "folder" {
|
||||
if id := getString(f["id"]); id != "" {
|
||||
cacheKey := node.Name
|
||||
if parentPath != "" {
|
||||
cacheKey = parentPath + "/" + node.Name
|
||||
}
|
||||
p.folderCache[cacheKey] = id
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return &Result{
|
||||
Nodes: nodes,
|
||||
Total: len(nodes),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// listFolderByName lists contents of a folder by its name
|
||||
func (p *FileProvider) listFolderByName(ctx stdctx.Context, folderName string, opts *ListOptions) (*Result, error) {
|
||||
folderID, err := p.getFolderIDByName(ctx, folderName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// List files in the folder using folder ID as parent_id
|
||||
return p.listFilesByParentID(ctx, folderID, folderName, opts)
|
||||
}
|
||||
|
||||
// getFolderIDByName finds folder ID by its name in root
|
||||
func (p *FileProvider) getFolderIDByName(ctx stdctx.Context, folderName string) (string, error) {
|
||||
// Check cache first
|
||||
if id, ok := p.folderCache[folderName]; ok {
|
||||
return id, nil
|
||||
}
|
||||
|
||||
// List root folder to find the folder
|
||||
rootID, _ := p.getRootID(ctx)
|
||||
queryParams := make([]string, 0)
|
||||
if rootID != "" {
|
||||
queryParams = append(queryParams, fmt.Sprintf("parent_id=%s", rootID))
|
||||
}
|
||||
queryParams = append(queryParams, "page_size=100", "page=1")
|
||||
|
||||
path := "/files"
|
||||
if len(queryParams) > 0 {
|
||||
path = path + "?" + strings.Join(queryParams, "&")
|
||||
}
|
||||
|
||||
resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
var apiResp struct {
|
||||
Code int `json:"code"`
|
||||
Data map[string]interface{} `json:"data"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
if apiResp.Code != 0 {
|
||||
return "", fmt.Errorf("API error: %s", apiResp.Message)
|
||||
}
|
||||
|
||||
// Search for folder by name
|
||||
var files []map[string]interface{}
|
||||
if fileList, ok := apiResp.Data["files"].([]interface{}); ok {
|
||||
for _, f := range fileList {
|
||||
if fileMap, ok := f.(map[string]interface{}); ok {
|
||||
files = append(files, fileMap)
|
||||
}
|
||||
}
|
||||
} else if fileList, ok := apiResp.Data["docs"].([]interface{}); ok {
|
||||
for _, f := range fileList {
|
||||
if fileMap, ok := f.(map[string]interface{}); ok {
|
||||
files = append(files, fileMap)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for _, f := range files {
|
||||
name := getString(f["name"])
|
||||
fileType := getString(f["type"])
|
||||
id := getString(f["id"])
|
||||
// Match by name and ensure it's a folder
|
||||
if name == folderName && fileType == "folder" && id != "" {
|
||||
p.folderCache[folderName] = id
|
||||
return id, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("%s: folder '%s'", ErrNotFound, folderName)
|
||||
}
|
||||
|
||||
// getFileNode gets a file node by folder and file name
|
||||
// If fileName is a directory, returns the directory contents instead of the directory node
|
||||
func (p *FileProvider) getFileNode(ctx stdctx.Context, folderName, fileName string) (*Result, error) {
|
||||
folderID, err := p.getFolderIDByName(ctx, folderName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// List files in folder to find the file
|
||||
result, err := p.listFilesByParentID(ctx, folderID, folderName, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Find the specific file
|
||||
for _, node := range result.Nodes {
|
||||
if node.Name == fileName {
|
||||
// If it's a directory, list its contents instead of returning the node itself
|
||||
if node.Type == NodeTypeDirectory {
|
||||
childFolderID := getString(node.Metadata["id"])
|
||||
if childFolderID == "" {
|
||||
return nil, fmt.Errorf("folder ID not found for '%s'", fileName)
|
||||
}
|
||||
// Cache the folder ID
|
||||
cacheKey := folderName + "/" + fileName
|
||||
p.folderCache[cacheKey] = childFolderID
|
||||
// Return directory contents
|
||||
return p.listFilesByParentID(ctx, childFolderID, cacheKey, nil)
|
||||
}
|
||||
// Return file node
|
||||
return &Result{
|
||||
Nodes: []*Node{node},
|
||||
Total: 1,
|
||||
}, nil
|
||||
}
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("%s: file '%s' in folder '%s'", ErrNotFound, fileName, folderName)
|
||||
}
|
||||
|
||||
// downloadFile downloads file content
|
||||
func (p *FileProvider) downloadFile(ctx stdctx.Context, fileID string) ([]byte, error) {
|
||||
path := fmt.Sprintf("/files/%s", fileID)
|
||||
resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
// Try to parse error response
|
||||
var apiResp struct {
|
||||
Code int `json:"code"`
|
||||
Message string `json:"message"`
|
||||
}
|
||||
if err := json.Unmarshal(resp.Body, &apiResp); err == nil && apiResp.Code != 0 {
|
||||
return nil, fmt.Errorf("%s", apiResp.Message)
|
||||
}
|
||||
return nil, fmt.Errorf("HTTP error %d", resp.StatusCode)
|
||||
}
|
||||
|
||||
// Return raw file content
|
||||
return resp.Body, nil
|
||||
}
|
||||
|
||||
// ==================== Conversion Functions ====================
|
||||
|
||||
// fileToNode converts a file map to a Node
|
||||
func (p *FileProvider) fileToNode(f map[string]interface{}, parentPath string) *Node {
|
||||
name := getString(f["name"])
|
||||
fileType := getString(f["type"])
|
||||
fileID := getString(f["id"])
|
||||
|
||||
// Determine node type
|
||||
nodeType := NodeTypeFile
|
||||
if fileType == "folder" {
|
||||
nodeType = NodeTypeDirectory
|
||||
}
|
||||
|
||||
// Build path
|
||||
path := name
|
||||
if parentPath != "" {
|
||||
path = parentPath + "/" + name
|
||||
}
|
||||
|
||||
node := &Node{
|
||||
Name: name,
|
||||
Path: path,
|
||||
Type: nodeType,
|
||||
Metadata: f,
|
||||
}
|
||||
|
||||
// Parse size
|
||||
if size, ok := f["size"]; ok {
|
||||
node.Size = int64(getFloat(size))
|
||||
}
|
||||
|
||||
// Parse timestamps
|
||||
if createTime, ok := f["create_time"]; ok && createTime != nil {
|
||||
node.CreatedAt = parseTime(createTime)
|
||||
}
|
||||
if updateTime, ok := f["update_time"]; ok && updateTime != nil {
|
||||
node.UpdatedAt = parseTime(updateTime)
|
||||
}
|
||||
|
||||
// Store ID for later use
|
||||
if fileID != "" {
|
||||
if node.Metadata == nil {
|
||||
node.Metadata = make(map[string]interface{})
|
||||
}
|
||||
node.Metadata["id"] = fileID
|
||||
}
|
||||
|
||||
return node
|
||||
}
|
||||
180
internal/cli/contextengine/provider.go
Normal file
180
internal/cli/contextengine/provider.go
Normal file
@ -0,0 +1,180 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package contextengine
|
||||
|
||||
import (
|
||||
stdctx "context"
|
||||
)
|
||||
|
||||
// Provider is the interface for all context providers
|
||||
// Each provider handles a specific resource type (datasets, chats, agents, etc.)
|
||||
type Provider interface {
|
||||
// Name returns the provider name (e.g., "datasets", "chats")
|
||||
Name() string
|
||||
|
||||
// Description returns a human-readable description of the provider
|
||||
Description() string
|
||||
|
||||
// Supports returns true if this provider can handle the given path
|
||||
Supports(path string) bool
|
||||
|
||||
// List lists nodes at the given path
|
||||
List(ctx stdctx.Context, path string, opts *ListOptions) (*Result, error)
|
||||
|
||||
// Search searches for nodes matching the query under the given path
|
||||
Search(ctx stdctx.Context, path string, opts *SearchOptions) (*Result, error)
|
||||
|
||||
// Cat retrieves the content of a file/document at the given path
|
||||
Cat(ctx stdctx.Context, path string) ([]byte, error)
|
||||
}
|
||||
|
||||
// BaseProvider provides common functionality for all providers
|
||||
type BaseProvider struct {
|
||||
name string
|
||||
description string
|
||||
rootPath string
|
||||
}
|
||||
|
||||
// Name returns the provider name
|
||||
func (p *BaseProvider) Name() string {
|
||||
return p.name
|
||||
}
|
||||
|
||||
// Description returns the provider description
|
||||
func (p *BaseProvider) Description() string {
|
||||
return p.description
|
||||
}
|
||||
|
||||
// GetRootPath returns the root path for this provider
|
||||
func (p *BaseProvider) GetRootPath() string {
|
||||
return p.rootPath
|
||||
}
|
||||
|
||||
// IsRootPath checks if the given path is the root path for this provider
|
||||
func (p *BaseProvider) IsRootPath(path string) bool {
|
||||
return normalizePath(path) == normalizePath(p.rootPath)
|
||||
}
|
||||
|
||||
// ParsePath parses a path and returns the subpath relative to the provider root
|
||||
func (p *BaseProvider) ParsePath(path string) string {
|
||||
normalized := normalizePath(path)
|
||||
rootNormalized := normalizePath(p.rootPath)
|
||||
|
||||
if normalized == rootNormalized {
|
||||
return ""
|
||||
}
|
||||
|
||||
if len(normalized) > len(rootNormalized) && normalized[:len(rootNormalized)+1] == rootNormalized+"/" {
|
||||
return normalized[len(rootNormalized)+1:]
|
||||
}
|
||||
|
||||
return normalized
|
||||
}
|
||||
|
||||
// SplitPath splits a path into components
|
||||
func SplitPath(path string) []string {
|
||||
path = normalizePath(path)
|
||||
if path == "" {
|
||||
return []string{}
|
||||
}
|
||||
parts := splitString(path, '/')
|
||||
result := make([]string, 0, len(parts))
|
||||
for _, part := range parts {
|
||||
if part != "" {
|
||||
result = append(result, part)
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// normalizePath normalizes a path (removes leading/trailing slashes, handles "." and "..")
|
||||
func normalizePath(path string) string {
|
||||
path = trimSpace(path)
|
||||
if path == "" {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Remove leading slashes
|
||||
for len(path) > 0 && path[0] == '/' {
|
||||
path = path[1:]
|
||||
}
|
||||
|
||||
// Remove trailing slashes
|
||||
for len(path) > 0 && path[len(path)-1] == '/' {
|
||||
path = path[:len(path)-1]
|
||||
}
|
||||
|
||||
// Handle "." and ".."
|
||||
parts := splitString(path, '/')
|
||||
result := make([]string, 0, len(parts))
|
||||
for _, part := range parts {
|
||||
switch part {
|
||||
case "", ".":
|
||||
// Skip empty and current directory
|
||||
continue
|
||||
case "..":
|
||||
// Go up one directory
|
||||
if len(result) > 0 {
|
||||
result = result[:len(result)-1]
|
||||
}
|
||||
default:
|
||||
result = append(result, part)
|
||||
}
|
||||
}
|
||||
|
||||
return joinStrings(result, "/")
|
||||
}
|
||||
|
||||
// Helper functions to avoid importing strings package in basic operations
|
||||
func trimSpace(s string) string {
|
||||
start := 0
|
||||
end := len(s)
|
||||
for start < end && (s[start] == ' ' || s[start] == '\t' || s[start] == '\n' || s[start] == '\r') {
|
||||
start++
|
||||
}
|
||||
for end > start && (s[end-1] == ' ' || s[end-1] == '\t' || s[end-1] == '\n' || s[end-1] == '\r') {
|
||||
end--
|
||||
}
|
||||
return s[start:end]
|
||||
}
|
||||
|
||||
func splitString(s string, sep byte) []string {
|
||||
var result []string
|
||||
start := 0
|
||||
for i := 0; i < len(s); i++ {
|
||||
if s[i] == sep {
|
||||
result = append(result, s[start:i])
|
||||
start = i + 1
|
||||
}
|
||||
}
|
||||
result = append(result, s[start:])
|
||||
return result
|
||||
}
|
||||
|
||||
func joinStrings(strs []string, sep string) string {
|
||||
if len(strs) == 0 {
|
||||
return ""
|
||||
}
|
||||
if len(strs) == 1 {
|
||||
return strs[0]
|
||||
}
|
||||
result := strs[0]
|
||||
for i := 1; i < len(strs); i++ {
|
||||
result += sep + strs[i]
|
||||
}
|
||||
return result
|
||||
}
|
||||
116
internal/cli/contextengine/types.go
Normal file
116
internal/cli/contextengine/types.go
Normal file
@ -0,0 +1,116 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package contextengine
|
||||
|
||||
import "time"
|
||||
|
||||
// NodeType represents the type of a node in the context filesystem
|
||||
type NodeType string
|
||||
|
||||
const (
|
||||
NodeTypeDirectory NodeType = "directory"
|
||||
NodeTypeFile NodeType = "file"
|
||||
NodeTypeDataset NodeType = "dataset"
|
||||
NodeTypeDocument NodeType = "document"
|
||||
NodeTypeChat NodeType = "chat"
|
||||
NodeTypeAgent NodeType = "agent"
|
||||
NodeTypeUnknown NodeType = "unknown"
|
||||
)
|
||||
|
||||
// Node represents a node in the context filesystem
|
||||
// This is the unified output format for all providers
|
||||
type Node struct {
|
||||
Name string `json:"name"`
|
||||
Path string `json:"path"`
|
||||
Type NodeType `json:"type"`
|
||||
Size int64 `json:"size,omitempty"`
|
||||
CreatedAt time.Time `json:"created_at,omitempty"`
|
||||
UpdatedAt time.Time `json:"updated_at,omitempty"`
|
||||
Metadata map[string]interface{} `json:"metadata,omitempty"`
|
||||
}
|
||||
|
||||
// CommandType represents the type of command
|
||||
type CommandType string
|
||||
|
||||
const (
|
||||
CommandList CommandType = "ls"
|
||||
CommandSearch CommandType = "search"
|
||||
CommandCat CommandType = "cat"
|
||||
)
|
||||
|
||||
// Command represents a context engine command
|
||||
type Command struct {
|
||||
Type CommandType `json:"type"`
|
||||
Path string `json:"path"`
|
||||
Params map[string]interface{} `json:"params,omitempty"`
|
||||
}
|
||||
|
||||
// ListOptions represents options for list operations
|
||||
type ListOptions struct {
|
||||
Recursive bool `json:"recursive,omitempty"`
|
||||
Limit int `json:"limit,omitempty"`
|
||||
Offset int `json:"offset,omitempty"`
|
||||
SortBy string `json:"sort_by,omitempty"`
|
||||
SortOrder string `json:"sort_order,omitempty"` // "asc" or "desc"
|
||||
}
|
||||
|
||||
// SearchOptions represents options for search operations
|
||||
type SearchOptions struct {
|
||||
Query string `json:"query"`
|
||||
Limit int `json:"limit,omitempty"`
|
||||
Offset int `json:"offset,omitempty"`
|
||||
Recursive bool `json:"recursive,omitempty"`
|
||||
TopK int `json:"top_k,omitempty"` // Number of top results to return (default: 10)
|
||||
Threshold float64 `json:"threshold,omitempty"` // Similarity threshold (default: 0.2)
|
||||
Dirs []string `json:"dirs,omitempty"` // List of directories to search in
|
||||
}
|
||||
|
||||
// Result represents the result of a command execution
|
||||
type Result struct {
|
||||
Nodes []*Node `json:"nodes"`
|
||||
Total int `json:"total"`
|
||||
HasMore bool `json:"has_more"`
|
||||
NextOffset int `json:"next_offset,omitempty"`
|
||||
Error error `json:"-"`
|
||||
}
|
||||
|
||||
// PathInfo represents parsed path information
|
||||
type PathInfo struct {
|
||||
Provider string // The provider name (e.g., "datasets", "chats")
|
||||
Path string // The full path
|
||||
Components []string // Path components
|
||||
IsRoot bool // Whether this is the root path for the provider
|
||||
ResourceID string // Resource ID if applicable
|
||||
ResourceName string // Resource name if applicable
|
||||
}
|
||||
|
||||
// ProviderInfo holds metadata about a provider
|
||||
type ProviderInfo struct {
|
||||
Name string `json:"name"`
|
||||
Description string `json:"description"`
|
||||
RootPath string `json:"root_path"`
|
||||
}
|
||||
|
||||
// Common error messages
|
||||
const (
|
||||
ErrInvalidPath = "invalid path"
|
||||
ErrProviderNotFound = "provider not found for path"
|
||||
ErrNotSupported = "operation not supported"
|
||||
ErrNotFound = "resource not found"
|
||||
ErrUnauthorized = "unauthorized"
|
||||
ErrInternal = "internal error"
|
||||
)
|
||||
304
internal/cli/contextengine/utils.go
Normal file
304
internal/cli/contextengine/utils.go
Normal file
@ -0,0 +1,304 @@
|
||||
//
|
||||
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
// You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software
|
||||
// distributed under the License is distributed on an "AS IS" BASIS,
|
||||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
// See the License for the specific language governing permissions and
|
||||
// limitations under the License.
|
||||
//
|
||||
|
||||
package contextengine
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// FormatNode formats a node for display
|
||||
func FormatNode(node *Node, format string) map[string]interface{} {
|
||||
switch format {
|
||||
case "json":
|
||||
return map[string]interface{}{
|
||||
"name": node.Name,
|
||||
"path": node.Path,
|
||||
"type": string(node.Type),
|
||||
"size": node.Size,
|
||||
"created_at": node.CreatedAt.Format(time.RFC3339),
|
||||
"updated_at": node.UpdatedAt.Format(time.RFC3339),
|
||||
}
|
||||
case "table":
|
||||
return map[string]interface{}{
|
||||
"name": node.Name,
|
||||
"path": node.Path,
|
||||
"type": string(node.Type),
|
||||
"size": formatSize(node.Size),
|
||||
"created_at": formatTime(node.CreatedAt),
|
||||
"updated_at": formatTime(node.UpdatedAt),
|
||||
}
|
||||
default: // "plain"
|
||||
return map[string]interface{}{
|
||||
"name": node.Name,
|
||||
"path": node.Path,
|
||||
"type": string(node.Type),
|
||||
"created_at": formatTime(node.CreatedAt),
|
||||
"updated_at": formatTime(node.UpdatedAt),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// FormatNodes formats a list of nodes for display
|
||||
func FormatNodes(nodes []*Node, format string) []map[string]interface{} {
|
||||
result := make([]map[string]interface{}, 0, len(nodes))
|
||||
for _, node := range nodes {
|
||||
result = append(result, FormatNode(node, format))
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// formatSize formats a size in bytes to human-readable format
|
||||
func formatSize(size int64) string {
|
||||
if size == 0 {
|
||||
return "-"
|
||||
}
|
||||
|
||||
const (
|
||||
KB = 1024
|
||||
MB = 1024 * KB
|
||||
GB = 1024 * MB
|
||||
TB = 1024 * GB
|
||||
)
|
||||
|
||||
switch {
|
||||
case size >= TB:
|
||||
return fmt.Sprintf("%.2f TB", float64(size)/TB)
|
||||
case size >= GB:
|
||||
return fmt.Sprintf("%.2f GB", float64(size)/GB)
|
||||
case size >= MB:
|
||||
return fmt.Sprintf("%.2f MB", float64(size)/MB)
|
||||
case size >= KB:
|
||||
return fmt.Sprintf("%.2f KB", float64(size)/KB)
|
||||
default:
|
||||
return fmt.Sprintf("%d B", size)
|
||||
}
|
||||
}
|
||||
|
||||
// formatTime formats a time to a readable string
|
||||
func formatTime(t time.Time) string {
|
||||
if t.IsZero() {
|
||||
return "-"
|
||||
}
|
||||
return t.Format("2006-01-02 15:04:05")
|
||||
}
|
||||
|
||||
// ResultToMap converts a Result to a map for JSON serialization
|
||||
func ResultToMap(result *Result) map[string]interface{} {
|
||||
if result == nil {
|
||||
return map[string]interface{}{
|
||||
"nodes": []interface{}{},
|
||||
"total": 0,
|
||||
}
|
||||
}
|
||||
|
||||
nodes := make([]map[string]interface{}, 0, len(result.Nodes))
|
||||
for _, node := range result.Nodes {
|
||||
nodes = append(nodes, nodeToMap(node))
|
||||
}
|
||||
|
||||
return map[string]interface{}{
|
||||
"nodes": nodes,
|
||||
"total": result.Total,
|
||||
"has_more": result.HasMore,
|
||||
"next_offset": result.NextOffset,
|
||||
}
|
||||
}
|
||||
|
||||
// nodeToMap converts a Node to a map
|
||||
func nodeToMap(node *Node) map[string]interface{} {
|
||||
m := map[string]interface{}{
|
||||
"name": node.Name,
|
||||
"path": node.Path,
|
||||
"type": string(node.Type),
|
||||
}
|
||||
|
||||
if node.Size > 0 {
|
||||
m["size"] = node.Size
|
||||
}
|
||||
|
||||
if !node.CreatedAt.IsZero() {
|
||||
m["created_at"] = node.CreatedAt.Format(time.RFC3339)
|
||||
}
|
||||
|
||||
if !node.UpdatedAt.IsZero() {
|
||||
m["updated_at"] = node.UpdatedAt.Format(time.RFC3339)
|
||||
}
|
||||
|
||||
if len(node.Metadata) > 0 {
|
||||
m["metadata"] = node.Metadata
|
||||
}
|
||||
|
||||
return m
|
||||
}
|
||||
|
||||
// MarshalJSON marshals a Result to JSON bytes
|
||||
func (r *Result) MarshalJSON() ([]byte, error) {
|
||||
return json.Marshal(ResultToMap(r))
|
||||
}
|
||||
|
||||
// PrintResult prints a result in the specified format
|
||||
func PrintResult(result *Result, format string) {
|
||||
if result == nil {
|
||||
fmt.Println("No results")
|
||||
return
|
||||
}
|
||||
|
||||
switch format {
|
||||
case "json":
|
||||
data, _ := json.MarshalIndent(ResultToMap(result), "", " ")
|
||||
fmt.Println(string(data))
|
||||
case "table":
|
||||
printTable(result.Nodes)
|
||||
default: // "plain"
|
||||
for _, node := range result.Nodes {
|
||||
fmt.Println(node.Path)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// printTable prints nodes in a simple table format
|
||||
func printTable(nodes []*Node) {
|
||||
if len(nodes) == 0 {
|
||||
fmt.Println("No results")
|
||||
return
|
||||
}
|
||||
|
||||
// Print header
|
||||
fmt.Printf("%-40s %-12s %-12s %-20s %-20s\n", "NAME", "TYPE", "SIZE", "CREATED", "UPDATED")
|
||||
fmt.Println(string(make([]byte, 104)))
|
||||
|
||||
// Print rows
|
||||
for _, node := range nodes {
|
||||
fmt.Printf("%-40s %-12s %-12s %-20s %-20s\n",
|
||||
truncateString(node.Name, 40),
|
||||
node.Type,
|
||||
formatSize(node.Size),
|
||||
formatTime(node.CreatedAt),
|
||||
formatTime(node.UpdatedAt),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// truncateString truncates a string to the specified length
|
||||
func truncateString(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
return s[:maxLen-3] + "..."
|
||||
}
|
||||
|
||||
// IsValidPath checks if a path is valid
|
||||
func IsValidPath(path string) bool {
|
||||
if path == "" {
|
||||
return false
|
||||
}
|
||||
|
||||
// Check for invalid characters
|
||||
invalidChars := []string{"..", "//", "\\", "*", "?", "<", ">", "|", "\x00"}
|
||||
for _, char := range invalidChars {
|
||||
if containsString(path, char) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// containsString checks if a string contains a substring
|
||||
func containsString(s, substr string) bool {
|
||||
for i := 0; i <= len(s)-len(substr); i++ {
|
||||
if s[i:i+len(substr)] == substr {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// JoinPath joins path components
|
||||
func JoinPath(components ...string) string {
|
||||
if len(components) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
result := components[0]
|
||||
for i := 1; i < len(components); i++ {
|
||||
if result == "" {
|
||||
result = components[i]
|
||||
} else if components[i] == "" {
|
||||
continue
|
||||
} else {
|
||||
// Remove trailing slash from result
|
||||
for len(result) > 0 && result[len(result)-1] == '/' {
|
||||
result = result[:len(result)-1]
|
||||
}
|
||||
// Remove leading slash from component
|
||||
start := 0
|
||||
for start < len(components[i]) && components[i][start] == '/' {
|
||||
start++
|
||||
}
|
||||
result = result + "/" + components[i][start:]
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// GetParentPath returns the parent path of a given path
|
||||
func GetParentPath(path string) string {
|
||||
path = normalizePath(path)
|
||||
parts := SplitPath(path)
|
||||
|
||||
if len(parts) <= 1 {
|
||||
return ""
|
||||
}
|
||||
|
||||
return joinStrings(parts[:len(parts)-1], "/")
|
||||
}
|
||||
|
||||
// GetBaseName returns the last component of a path
|
||||
func GetBaseName(path string) string {
|
||||
path = normalizePath(path)
|
||||
parts := SplitPath(path)
|
||||
|
||||
if len(parts) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
return parts[len(parts)-1]
|
||||
}
|
||||
|
||||
// HasPrefix checks if a path has the given prefix
|
||||
func HasPrefix(path, prefix string) bool {
|
||||
path = normalizePath(path)
|
||||
prefix = normalizePath(prefix)
|
||||
|
||||
if prefix == "" {
|
||||
return true
|
||||
}
|
||||
|
||||
if path == prefix {
|
||||
return true
|
||||
}
|
||||
|
||||
if len(path) > len(prefix) && path[:len(prefix)+1] == prefix+"/" {
|
||||
return true
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
Reference in New Issue
Block a user