Feat: Initialize context engine CLI (#13776)

### What problem does this PR solve?

- Add multiple output format to ragflow_cli
- Initialize contextengine to Go module
  - ls datasets/ls files
  - cat file
  - search -d dir -q query

issue: #13714

### Type of change

- [x] New Feature (non-breaking change which adds functionality)
This commit is contained in:
Yingfeng
2026-03-26 21:07:06 +08:00
committed by GitHub
parent 3b1e77a6d4
commit 6e309f9d0a
14 changed files with 3753 additions and 155 deletions

View File

@ -4,20 +4,21 @@ This is the Go implementation of the RAGFlow command-line interface, compatible
## Features
- Interactive mode only
- Interactive mode and single command execution
- Full compatibility with Python CLI syntax
- Recursive descent parser for SQL-like commands
- Context Engine (Virtual Filesystem) for intuitive resource management
- Support for all major commands:
- User management: LOGIN, REGISTER, CREATE USER, DROP USER, LIST USERS, etc.
- Service management: LIST SERVICES, SHOW SERVICE, STARTUP/SHUTDOWN/RESTART SERVICE
- Role management: CREATE ROLE, DROP ROLE, LIST ROLES, GRANT/REVOKE PERMISSION
- Dataset management: CREATE DATASET, DROP DATASET, LIST DATASETS
- Dataset management via Context Engine: `ls`, `search`, `mkdir`, `cat`, `rm`
- Model management: SET/RESET DEFAULT LLM/VLM/EMBEDDING/etc.
- And more...
## Usage
Build and run:
### Build and run
```bash
go build -o ragflow_cli ./cmd/ragflow_cli.go
@ -28,11 +29,94 @@ go build -o ragflow_cli ./cmd/ragflow_cli.go
```
internal/cli/
├── cli.go # Main CLI loop and interaction
├── parser/ # Command parser package
│ ├── types.go # Token and Command types
│ ├── lexer.go # Lexical analyzer
── parser.go # Recursive descent parser
├── cli.go # Main CLI loop and interaction
├── client.go # RAGFlowClient with Context Engine integration
├── http_client.go # HTTP client for API communication
├── parser/ # Command parser package
── types.go # Token and Command types
│ ├── lexer.go # Lexical analyzer
│ └── parser.go # Recursive descent parser
└── contextengine/ # Context Engine (Virtual Filesystem)
├── engine.go # Core engine: path resolution, command routing
├── types.go # Node, Command, Result types
├── provider.go # Provider interface definition
├── dataset_provider.go # Dataset provider implementation
├── file_provider.go # File manager provider implementation
└── utils.go # Helper functions
```
## Context Engine
The Context Engine provides a unified virtual filesystem interface over RAGFlow's RESTful APIs.
### Design Principles
1. **No Server-Side Changes**: All logic implemented client-side using existing APIs
2. **Provider Pattern**: Modular providers for different resource types (datasets, files, etc.)
3. **Unified Interface**: Common `ls`, `search`, `mkdir` commands across all providers
4. **Path-Based Navigation**: Virtual paths like `/datasets`, `/datasets/{name}/files`
### Supported Paths
| Path | Description |
|------|-------------|
| `/datasets` | List all datasets |
| `/datasets/{name}` | List documents in dataset (default behavior) |
| `/datasets/{name}/{doc}` | Get document info |
### Commands
#### `ls [path] [options]` - List nodes at path
List contents of a path in the context filesystem.
**Arguments:**
- `[path]` - Path to list (default: "datasets")
**Options:**
- `-n, --limit <number>` - Maximum number of items to display (default: 10)
- `-h, --help` - Show ls help message
**Examples:**
```bash
ls # List all datasets (default 10)
ls -n 20 # List 20 datasets
ls datasets/kb1 # List files in kb1 dataset
ls datasets/kb1 -n 50 # List 50 files in kb1 dataset
```
#### `search [options]` - Search for content
Semantic search in datasets.
**Options:**
- `-d, --dir <path>` - Directory to search in (can be specified multiple times)
- `-q, --query <query>` - Search query (required)
- `-k, --top-k <number>` - Number of top results to return (default: 10)
- `-t, --threshold <num>` - Similarity threshold, 0.0-1.0 (default: 0.2)
- `-h, --help` - Show search help message
**Output Formats:**
- Default: JSON format
- `--output plain` - Plain text format
- `--output table` - Table format with borders
**Examples:**
```bash
search -q "machine learning" # Search all datasets (JSON output)
search -d datasets/kb1 -q "neural networks" # Search in kb1
search -d datasets/kb1 -q "AI" --output plain # Plain text output
search -q "RAG" -k 20 -t 0.5 # Return 20 results with threshold 0.5
```
#### `cat <path>` - Display content
Display document content (if available).
**Examples:**
```bash
cat myskills/doc.md # Show content of doc.md file
cat datasets/kb1/document.pdf # Error: cannot display binary file content
```
## Command Examples
@ -71,6 +155,15 @@ SET DEFAULT LLM 'gpt-4';
SET DEFAULT EMBEDDING 'text-embedding-ada-002';
RESET DEFAULT LLM;
-- Context Engine (Virtual Filesystem)
ls; -- List all datasets (default 10)
ls -n 20; -- List 20 datasets
ls datasets/my_dataset; -- List documents in dataset
ls datasets/my_dataset -n 50; -- List 50 documents
ls datasets/my_dataset/info; -- Show dataset info
search -q "test"; -- Search all datasets (JSON output)
search -d datasets/my_dataset -q "test"; -- Search in specific dataset
-- Meta commands
\? -- Show help
\q -- Quit

View File

@ -17,6 +17,8 @@
package cli
import (
"context"
"encoding/json"
"errors"
"fmt"
"os"
@ -24,10 +26,13 @@ import (
"strconv"
"strings"
"syscall"
"unicode/utf8"
"github.com/peterh/liner"
"golang.org/x/term"
"gopkg.in/yaml.v3"
"ragflow/internal/cli/contextengine"
)
// ConfigFile represents the rf.yml configuration file structure
@ -38,16 +43,28 @@ type ConfigFile struct {
Password string `yaml:"password"`
}
// OutputFormat represents the output format type
type OutputFormat string
const (
OutputFormatTable OutputFormat = "table" // Table format with borders
OutputFormatPlain OutputFormat = "plain" // Plain text, space-separated (no borders)
OutputFormatJSON OutputFormat = "json" // JSON format (reserved for future use)
)
// ConnectionArgs holds the parsed command line arguments
type ConnectionArgs struct {
Host string
Port int
Password string
APIToken string
UserName string
Command string
ShowHelp bool
AdminMode bool
Host string
Port int
Password string
APIToken string
UserName string
Command string // Original command string (for SQL mode)
CommandArgs []string // Split command arguments (for ContextEngine mode)
IsSQLMode bool // true=SQL mode (quoted), false=ContextEngine mode (unquoted)
ShowHelp bool
AdminMode bool
OutputFormat OutputFormat // Output format: table, plain, json
}
// LoadDefaultConfigFile reads the rf.yml file from current directory if it exists
@ -111,13 +128,25 @@ func ParseConnectionArgs(args []string) (*ConnectionArgs, error) {
// First, scan args to check for help, config file, and admin mode
var configFilePath string
var adminMode bool = false
foundCommand := false
for i := 0; i < len(args); i++ {
arg := args[i]
if arg == "--help" || arg == "-help" {
// If we found a command (non-flag arg), stop processing global flags
// This allows subcommands like "search --help" to handle their own help
if !strings.HasPrefix(arg, "-") {
foundCommand = true
continue
}
// Only process --help as global help if it's before any command
if !foundCommand && (arg == "--help" || arg == "-help") {
return &ConnectionArgs{ShowHelp: true}, nil
} else if (arg == "-f" || arg == "--config") && i+1 < len(args) {
configFilePath = args[i+1]
i++
} else if (arg == "-o" || arg == "--output") && i+1 < len(args) {
// -o/--output is allowed with config file, skip it and its value
i++
continue
} else if arg == "--admin" {
adminMode = true
}
@ -130,7 +159,6 @@ func ParseConnectionArgs(args []string) (*ConnectionArgs, error) {
// Parse arguments manually to support both short and long forms
// and to handle priority: command line > config file > defaults
// Build result from config file first (if exists), then override with command line flags
result := &ConnectionArgs{}
if !adminMode {
@ -171,8 +199,18 @@ func ParseConnectionArgs(args []string) (*ConnectionArgs, error) {
// Override with command line flags (higher priority)
// Handle both short and long forms manually
// Once we encounter a non-flag argument (command), stop parsing global flags
// Remaining args belong to the subcommand
foundCommand = false
for i := 0; i < len(args); i++ {
arg := args[i]
// If we've found the command, collect remaining args as subcommand args
if foundCommand {
nonFlagArgs = append(nonFlagArgs, arg)
continue
}
switch arg {
case "-h", "--host":
if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") {
@ -205,6 +243,20 @@ func ParseConnectionArgs(args []string) (*ConnectionArgs, error) {
if i+1 < len(args) {
i++
}
case "-o", "--output":
// Parse output format
if i+1 < len(args) && !strings.HasPrefix(args[i+1], "-") {
format := args[i+1]
switch format {
case "plain":
result.OutputFormat = OutputFormatPlain
case "json":
result.OutputFormat = OutputFormatJSON
default:
result.OutputFormat = OutputFormatTable
}
i++
}
case "--admin", "-admin":
result.AdminMode = true
case "--help", "-help":
@ -214,6 +266,7 @@ func ParseConnectionArgs(args []string) (*ConnectionArgs, error) {
// Non-flag argument (command)
if !strings.HasPrefix(arg, "-") {
nonFlagArgs = append(nonFlagArgs, arg)
foundCommand = true
}
}
}
@ -253,12 +306,42 @@ func ParseConnectionArgs(args []string) (*ConnectionArgs, error) {
// Get command from remaining args (non-flag arguments)
if len(nonFlagArgs) > 0 {
result.Command = strings.Join(nonFlagArgs, " ")
// Check if this is SQL mode or ContextEngine mode
// SQL mode: single argument that looks like SQL (e.g., "LIST DATASETS")
// ContextEngine mode: multiple arguments (e.g., "ls", "datasets")
if len(nonFlagArgs) == 1 && looksLikeSQL(nonFlagArgs[0]) {
// SQL mode: single argument that looks like SQL
result.IsSQLMode = true
result.Command = nonFlagArgs[0]
} else {
// ContextEngine mode: multiple arguments
result.IsSQLMode = false
result.CommandArgs = nonFlagArgs
// Also store joined version for backward compatibility
result.Command = strings.Join(nonFlagArgs, " ")
}
}
return result, nil
}
// looksLikeSQL checks if a string looks like a SQL command
func looksLikeSQL(s string) bool {
s = strings.ToUpper(strings.TrimSpace(s))
sqlPrefixes := []string{
"LIST ", "SHOW ", "CREATE ", "DROP ", "ALTER ",
"LOGIN ", "REGISTER ", "PING", "GRANT ", "REVOKE ",
"SET ", "UNSET ", "UPDATE ", "DELETE ", "INSERT ",
"SELECT ", "DESCRIBE ", "EXPLAIN ",
}
for _, prefix := range sqlPrefixes {
if strings.HasPrefix(s, prefix) {
return true
}
}
return false
}
// PrintUsage prints the CLI usage information
func PrintUsage() {
fmt.Println(`RAGFlow CLI Client
@ -271,6 +354,7 @@ Options:
-u, --user string Username for authentication
-p, --password string Password for authentication
-f, --config string Path to config file (YAML format)
-o, --output string Output format: table, plain, json (search defaults to json)
--admin, -admin Run in admin mode
--help Show this help message
@ -298,7 +382,8 @@ Configuration File:
Note: api_token and user_name/password are mutually exclusive in config file.
Commands:
SQL commands like: LOGIN USER 'email'; LIST USERS; etc.
SQL commands (use quotes): "LIST USERS", "CREATE USER 'email' 'password'", etc.
Context Engine commands (no quotes): ls datasets, search "keyword", cat path, etc.
If no command is provided, CLI runs in interactive mode.
`)
}
@ -312,11 +397,13 @@ const historyFileName = ".ragflow_cli_history"
// CLI represents the command line interface
type CLI struct {
client *RAGFlowClient
prompt string
running bool
line *liner.State
args *ConnectionArgs
client *RAGFlowClient
contextEngine *contextengine.Engine
prompt string
running bool
line *liner.State
args *ConnectionArgs
outputFormat OutputFormat // Output format
}
// NewCLI creates a new CLI instance
@ -352,17 +439,41 @@ func NewCLIWithArgs(args *ConnectionArgs) (*CLI, error) {
}
}
// Apply API token if provided (from config file)
if args.APIToken != "" {
client.HTTPClient.APIToken = args.APIToken
client.HTTPClient.useAPIToken = true
}
// Set output format
client.OutputFormat = args.OutputFormat
// Auto-login if user and password are provided (from config file)
if args.UserName != "" && args.Password != "" && args.APIToken == "" {
if err := client.LoginUserInteractive(args.UserName, args.Password); err != nil {
line.Close()
return nil, fmt.Errorf("auto-login failed: %w", err)
}
}
// Set prompt based on server type
prompt := "RAGFlow(user)> "
if serverType == "admin" {
prompt = "RAGFlow(admin)> "
}
// Create context engine and register providers
engine := contextengine.NewEngine()
engine.RegisterProvider(contextengine.NewDatasetProvider(&httpClientAdapter{client: client.HTTPClient}))
engine.RegisterProvider(contextengine.NewFileProvider(&httpClientAdapter{client: client.HTTPClient}))
return &CLI{
prompt: prompt,
client: client,
line: line,
args: args,
prompt: prompt,
client: client,
contextEngine: engine,
line: line,
args: args,
outputFormat: args.OutputFormat,
}, nil
}
@ -464,28 +575,360 @@ func (c *CLI) Run() error {
}
func (c *CLI) execute(input string) error {
p := NewParser(input)
cmd, err := p.Parse(c.args.AdminMode)
// Determine execution mode based on input and args
input = strings.TrimSpace(input)
// Handle meta commands (start with \)
if strings.HasPrefix(input, "\\") {
p := NewParser(input)
cmd, err := p.Parse(c.args.AdminMode)
if err != nil {
return err
}
if cmd != nil && cmd.Type == "meta" {
return c.handleMetaCommand(cmd)
}
}
// Check if we should use SQL mode or ContextEngine mode
isSQLMode := false
if c.args != nil && len(c.args.CommandArgs) > 0 {
// Non-interactive mode: use pre-determined mode from args
isSQLMode = c.args.IsSQLMode
} else {
// Interactive mode: determine based on input
isSQLMode = looksLikeSQL(input)
}
if isSQLMode {
// SQL mode: use parser
p := NewParser(input)
cmd, err := p.Parse(c.args.AdminMode)
if err != nil {
return err
}
if cmd == nil {
return nil
}
// Execute SQL command using the client
var result ResponseIf
result, err = c.client.ExecuteCommand(cmd)
if result != nil {
result.SetOutputFormat(c.outputFormat)
result.PrintOut()
}
return err
}
// ContextEngine mode: execute context engine command
return c.executeContextEngine(input)
}
// executeContextEngine executes a Context Engine command
func (c *CLI) executeContextEngine(input string) error {
// Parse input into arguments
var args []string
if c.args != nil && len(c.args.CommandArgs) > 0 {
// Non-interactive mode: use pre-parsed args
args = c.args.CommandArgs
} else {
// Interactive mode: parse input
args = parseContextEngineArgs(input)
}
if len(args) == 0 {
return fmt.Errorf("no command provided")
}
// Check if we have a context engine
if c.contextEngine == nil {
return fmt.Errorf("context engine not available")
}
cmdType := args[0]
cmdArgs := args[1:]
// Build context engine command
var ceCmd *contextengine.Command
switch cmdType {
case "ls", "list":
// Parse list command arguments
listOpts, err := parseListCommandArgs(cmdArgs)
if err != nil {
return err
}
if listOpts == nil {
// Help was printed
return nil
}
ceCmd = &contextengine.Command{
Type: contextengine.CommandList,
Path: listOpts.Path,
Params: map[string]interface{}{
"limit": listOpts.Limit,
},
}
case "search":
// Parse search command arguments
searchOpts, err := parseSearchCommandArgs(cmdArgs)
if err != nil {
return err
}
if searchOpts == nil {
// Help was printed
return nil
}
// Determine the path for provider resolution
// Use first dir if specified, otherwise default to "datasets"
searchPath := "datasets"
if len(searchOpts.Dirs) > 0 {
searchPath = searchOpts.Dirs[0]
}
ceCmd = &contextengine.Command{
Type: contextengine.CommandSearch,
Path: searchPath,
Params: map[string]interface{}{
"query": searchOpts.Query,
"top_k": searchOpts.TopK,
"threshold": searchOpts.Threshold,
"dirs": searchOpts.Dirs,
},
}
case "cat":
if len(cmdArgs) == 0 {
return fmt.Errorf("cat requires a path argument")
}
// Handle cat command directly since it returns []byte, not *Result
content, err := c.contextEngine.Cat(context.Background(), cmdArgs[0])
if err != nil {
return err
}
if content == nil || len(content) == 0 {
fmt.Println("(empty file)")
} else if isBinaryContent(content) {
return fmt.Errorf("cannot display binary file content")
} else {
fmt.Println(string(content))
}
return nil
default:
return fmt.Errorf("unknown context engine command: %s", cmdType)
}
// Execute the command
result, err := c.contextEngine.Execute(context.Background(), ceCmd)
if err != nil {
return err
}
if cmd == nil {
return nil
// Print result
// For search command, default to JSON format if not explicitly set to plain/table
format := c.outputFormat
if ceCmd.Type == contextengine.CommandSearch && format != OutputFormatPlain && format != OutputFormatTable {
format = OutputFormatJSON
}
// Get limit for list command
limit := 0
if ceCmd.Type == contextengine.CommandList {
if l, ok := ceCmd.Params["limit"].(int); ok {
limit = l
}
}
c.printContextEngineResult(result, ceCmd.Type, format, limit)
return nil
}
// parseContextEngineArgs parses Context Engine command arguments
// Supports simple space-separated args and quoted strings
func parseContextEngineArgs(input string) []string {
var args []string
var current strings.Builder
inQuote := false
var quoteChar rune
for _, ch := range input {
switch ch {
case '"', '\'':
if !inQuote {
inQuote = true
quoteChar = ch
if current.Len() > 0 {
args = append(args, current.String())
current.Reset()
}
} else if ch == quoteChar {
inQuote = false
args = append(args, current.String())
current.Reset()
} else {
current.WriteRune(ch)
}
case ' ', '\t':
if inQuote {
current.WriteRune(ch)
} else if current.Len() > 0 {
args = append(args, current.String())
current.Reset()
}
default:
current.WriteRune(ch)
}
}
// Handle meta commands
if cmd.Type == "meta" {
return c.handleMetaCommand(cmd)
if current.Len() > 0 {
args = append(args, current.String())
}
// Execute the command using the client
var result ResponseIf
result, err = c.client.ExecuteCommand(cmd)
if result != nil {
result.PrintOut()
return args
}
// printContextEngineResult prints the result of a context engine command
func (c *CLI) printContextEngineResult(result *contextengine.Result, cmdType contextengine.CommandType, format OutputFormat, limit int) {
if result == nil {
return
}
switch cmdType {
case contextengine.CommandList:
if len(result.Nodes) == 0 {
fmt.Println("(empty)")
return
}
displayCount := len(result.Nodes)
if limit > 0 && displayCount > limit {
displayCount = limit
}
if format == OutputFormatPlain {
// Plain format: simple space-separated, no headers
for i := 0; i < displayCount; i++ {
node := result.Nodes[i]
fmt.Printf("%s %s %s %s\n", node.Name, node.Type, node.Path, node.CreatedAt.Format("2006-01-02 15:04"))
}
} else {
// Table format: with headers and aligned columns
fmt.Printf("%-30s %-12s %-50s %-20s\n", "NAME", "TYPE", "PATH", "CREATED")
fmt.Println(strings.Repeat("-", 112))
for i := 0; i < displayCount; i++ {
node := result.Nodes[i]
created := node.CreatedAt.Format("2006-01-02 15:04")
if node.CreatedAt.IsZero() {
created = "-"
}
// Remove leading "/" from path for display
displayPath := node.Path
if strings.HasPrefix(displayPath, "/") {
displayPath = displayPath[1:]
}
fmt.Printf("%-30s %-12s %-50s %-20s\n", node.Name, node.Type, displayPath, created)
}
}
if limit > 0 && result.Total > limit {
fmt.Printf("\n... and %d more (use -n to show more)\n", result.Total-limit)
}
fmt.Printf("Total: %d\n", result.Total)
case contextengine.CommandSearch:
if len(result.Nodes) == 0 {
if format == OutputFormatJSON {
fmt.Println("[]")
} else {
fmt.Println("No results found")
}
return
}
// Build data for output (same fields for all formats: content, path, score)
type searchResult struct {
Content string `json:"content"`
Path string `json:"path"`
Score float64 `json:"score,omitempty"`
}
results := make([]searchResult, 0, len(result.Nodes))
for _, node := range result.Nodes {
content := node.Name
if content == "" {
content = "(empty)"
}
displayPath := node.Path
if strings.HasPrefix(displayPath, "/") {
displayPath = displayPath[1:]
}
var score float64
if s, ok := node.Metadata["similarity"].(float64); ok {
score = s
} else if s, ok := node.Metadata["_score"].(float64); ok {
score = s
}
results = append(results, searchResult{
Content: content,
Path: displayPath,
Score: score,
})
}
// Output based on format
if format == OutputFormatJSON {
jsonData, err := json.MarshalIndent(results, "", " ")
if err != nil {
fmt.Printf("Error marshaling JSON: %v\n", err)
return
}
fmt.Println(string(jsonData))
} else if format == OutputFormatPlain {
// Plain format: simple space-separated, no borders
fmt.Printf("%-70s %-50s %-10s\n", "CONTENT", "PATH", "SCORE")
for i, sr := range results {
content := strings.Join(strings.Fields(sr.Content), " ")
if len(content) > 70 {
content = content[:67] + "..."
}
displayPath := sr.Path
if len(displayPath) > 50 {
displayPath = displayPath[:47] + "..."
}
scoreStr := "-"
if sr.Score > 0 {
scoreStr = fmt.Sprintf("%.4f", sr.Score)
}
fmt.Printf("%-70s %-50s %-10s\n", content, displayPath, scoreStr)
if i >= 99 {
fmt.Printf("\n... and %d more results\n", result.Total-i-1)
break
}
}
fmt.Printf("\nTotal: %d\n", result.Total)
} else {
// Table format: with borders
col1Width, col2Width, col3Width := 70, 50, 10
sep := "+" + strings.Repeat("-", col1Width+2) + "+" + strings.Repeat("-", col2Width+2) + "+" + strings.Repeat("-", col3Width+2) + "+"
fmt.Println(sep)
fmt.Printf("| %-70s | %-50s | %-10s |\n", "CONTENT", "PATH", "SCORE")
fmt.Println(sep)
for i, sr := range results {
content := strings.Join(strings.Fields(sr.Content), " ")
if len(content) > 70 {
content = content[:67] + "..."
}
displayPath := sr.Path
if len(displayPath) > 50 {
displayPath = displayPath[:47] + "..."
}
scoreStr := "-"
if sr.Score > 0 {
scoreStr = fmt.Sprintf("%.4f", sr.Score)
}
fmt.Printf("| %-70s | %-50s | %-10s |\n", content, displayPath, scoreStr)
if i >= 99 {
fmt.Printf("\n... and %d more results\n", result.Total-i-1)
break
}
}
fmt.Println(sep)
fmt.Printf("Total: %d\n", result.Total)
}
case contextengine.CommandCat:
// Cat output is handled differently - it returns []byte, not *Result
// This case should not be reached in normal flow since Cat returns []byte directly
fmt.Println("Content retrieved")
}
return err
}
func (c *CLI) handleMetaCommand(cmd *Command) error {
@ -574,24 +1017,25 @@ Commands (User Mode):
CREATE INDEX DOC_META; - Create doc meta index
DROP INDEX DOC_META; - Drop doc meta index
Commands (Admin Mode):
LIST USERS; - List all users
SHOW USER 'email'; - Show user details
CREATE USER 'email' 'password'; - Create new user
DROP USER 'email'; - Delete user
ALTER USER PASSWORD 'email' 'new_password'; - Change user password
ALTER USER ACTIVE 'email' on/off; - Activate/deactivate user
GRANT ADMIN 'email'; - Grant admin role
REVOKE ADMIN 'email'; - Revoke admin role
LIST SERVICES; - List services
SHOW SERVICE <id>; - Show service details
PING; - Ping server
... and many more
Context Engine Commands (no quotes):
ls [path] - List resources
e.g., ls - List root (providers and folders)
e.g., ls datasets - List all datasets
e.g., ls datasets/kb1 - Show dataset info
e.g., ls myfolder - List files in 'myfolder' (file_manager)
list [path] - Same as ls
search [options] - Search resources in datasets
Use 'search -h' for detailed options
cat <path> - Show file content
e.g., cat files/docs/file.txt - Show file content
Note: cat datasets or cat datasets/kb1 will error
Meta Commands:
\? or \h - Show this help
\q or \quit - Exit CLI
\c or \clear - Clear screen
Examples:
ragflow_cli -f rf.yml "LIST USERS" # SQL mode (with quotes)
ragflow_cli -f rf.yml ls datasets # Context Engine mode (no quotes)
ragflow_cli -f rf.yml ls files # List files in root
ragflow_cli -f rf.yml cat datasets # Error: datasets is a directory
ragflow_cli -f rf.yml ls files/myfolder # List folder contents
For more information, see documentation.
`
@ -600,7 +1044,10 @@ For more information, see documentation.
// Cleanup performs cleanup before exit
func (c *CLI) Cleanup() {
fmt.Println("\nCleaning up...")
// Close liner to restore terminal settings
if c.line != nil {
c.line.Close()
}
}
// RunInteractive runs the CLI in interactive mode
@ -624,6 +1071,9 @@ func RunInteractive() error {
// RunSingleCommand executes a single command and exits
func (c *CLI) RunSingleCommand(command string) error {
// Ensure cleanup is called on exit to restore terminal settings
defer c.Cleanup()
// Execute the command
if err := c.execute(command); err != nil {
return err
@ -659,3 +1109,227 @@ func (c *CLI) VerifyAuth() error {
_, err := c.client.ExecuteCommand(cmd)
return err
}
// isBinaryContent checks if content is binary (contains null bytes or invalid UTF-8)
func isBinaryContent(content []byte) bool {
// Check for null bytes (binary file indicator)
for _, b := range content {
if b == 0 {
return true
}
}
// Check valid UTF-8
return !utf8.Valid(content)
}
// SearchCommandOptions holds parsed search command options
type SearchCommandOptions struct {
Query string
TopK int
Threshold float64
Dirs []string
}
// ListCommandOptions holds parsed list command options
type ListCommandOptions struct {
Path string
Limit int
}
// parseSearchCommandArgs parses search command arguments
// Format: search [-d dir1] [-d dir2] ... -q query [-k top_k] [-t threshold]
// search -h|--help (shows help)
func parseSearchCommandArgs(args []string) (*SearchCommandOptions, error) {
opts := &SearchCommandOptions{
TopK: 10,
Threshold: 0.2,
Dirs: []string{},
}
// Check for help flag
for _, arg := range args {
if arg == "-h" || arg == "--help" {
printSearchHelp()
return nil, nil
}
}
// Parse arguments
i := 0
for i < len(args) {
arg := args[i]
switch arg {
case "-d", "--dir":
if i+1 >= len(args) {
return nil, fmt.Errorf("missing value for %s flag", arg)
}
opts.Dirs = append(opts.Dirs, args[i+1])
i += 2
case "-q", "--query":
if i+1 >= len(args) {
return nil, fmt.Errorf("missing value for %s flag", arg)
}
opts.Query = args[i+1]
i += 2
case "-k", "--top-k":
if i+1 >= len(args) {
return nil, fmt.Errorf("missing value for %s flag", arg)
}
topK, err := strconv.Atoi(args[i+1])
if err != nil {
return nil, fmt.Errorf("invalid top-k value: %s", args[i+1])
}
opts.TopK = topK
i += 2
case "-t", "--threshold":
if i+1 >= len(args) {
return nil, fmt.Errorf("missing value for %s flag", arg)
}
threshold, err := strconv.ParseFloat(args[i+1], 64)
if err != nil {
return nil, fmt.Errorf("invalid threshold value: %s", args[i+1])
}
opts.Threshold = threshold
i += 2
default:
// If it doesn't start with -, it might be a positional argument
if !strings.HasPrefix(arg, "-") {
// For backwards compatibility: if no -q flag and this is the last arg, treat as query
if opts.Query == "" && i == len(args)-1 {
opts.Query = arg
} else if opts.Query == "" && len(args) > 0 && i < len(args)-1 {
// Old format: search [path] query
// Treat first non-flag as path, rest as query
opts.Dirs = append(opts.Dirs, arg)
// Join remaining args as query
remainingArgs := args[i+1:]
queryParts := []string{}
for _, part := range remainingArgs {
if !strings.HasPrefix(part, "-") {
queryParts = append(queryParts, part)
}
}
opts.Query = strings.Join(queryParts, " ")
break
}
} else {
return nil, fmt.Errorf("unknown flag: %s", arg)
}
i++
}
}
// Validate required parameters
if opts.Query == "" {
return nil, fmt.Errorf("query is required (use -q or --query)")
}
// If no directories specified, search in all datasets (empty path means all)
if len(opts.Dirs) == 0 {
opts.Dirs = []string{"datasets"}
}
return opts, nil
}
// printSearchHelp prints help for the search command
func printSearchHelp() {
help := `Search command usage: search [options]
Search for content in datasets. Currently only supports searching in datasets.
Options:
-d, --dir <path> Directory to search in (can be specified multiple times)
Currently only supports paths under 'datasets/'
Example: -d datasets/kb1 -d datasets/kb2
-q, --query <query> Search query (required)
Example: -q "machine learning"
-k, --top-k <number> Number of top results to return (default: 10)
Example: -k 20
-t, --threshold <num> Similarity threshold, 0.0-1.0 (default: 0.2)
Example: -t 0.5
-h, --help Show this help message
Output:
Default output format is JSON. Use --output plain or --output table for other formats.
Examples:
search -d datasets/kb1 -q "neural networks" # Search in kb1 (JSON output)
search -d datasets/kb1 -q "AI" --output plain # Search with plain text output
search -q "data mining" # Search all datasets
search -q "RAG" -k 20 -t 0.5 # Return 20 results with threshold 0.5
`
fmt.Println(help)
}
// printListHelp prints help for the list/ls command
func printListHelp() {
help := `List command usage: ls [path] [options]
List contents of a path in the context filesystem.
Arguments:
[path] Path to list (default: root - shows all providers and folders)
Examples: datasets, datasets/kb1, myfolder
Options:
-n, --limit <number> Maximum number of items to display (default: 10)
Example: -n 20
-h, --help Show this help message
Examples:
ls # List root (all providers and file_manager folders)
ls datasets # List all datasets
ls datasets/kb1 # List files in kb1 dataset (default 10 items)
ls myfolder # List files in file_manager folder 'myfolder'
ls -n 5 # List 5 items at root
`
fmt.Println(help)
}
// parseListCommandArgs parses list/ls command arguments
// Format: ls [path] [-n limit] [-h|--help]
func parseListCommandArgs(args []string) (*ListCommandOptions, error) {
opts := &ListCommandOptions{
Path: "", // Empty path means list root (all providers and file_manager folders)
Limit: 10,
}
// Check for help flag
for _, arg := range args {
if arg == "-h" || arg == "--help" {
printListHelp()
return nil, nil
}
}
// Parse arguments
i := 0
for i < len(args) {
arg := args[i]
switch arg {
case "-n", "--limit":
if i+1 >= len(args) {
return nil, fmt.Errorf("missing value for %s flag", arg)
}
limit, err := strconv.Atoi(args[i+1])
if err != nil {
return nil, fmt.Errorf("invalid limit value: %s", args[i+1])
}
opts.Limit = limit
i += 2
default:
// If it doesn't start with -, treat as path
if !strings.HasPrefix(arg, "-") {
opts.Path = arg
} else {
return nil, fmt.Errorf("unknown flag: %s", arg)
}
i++
}
}
return opts, nil
}

View File

@ -18,13 +18,17 @@ package cli
import (
"bufio"
"context"
"encoding/json"
"fmt"
"os"
"os/exec"
"strings"
"syscall"
"time"
"unsafe"
ce "ragflow/internal/cli/contextengine"
)
// PasswordPromptFunc is a function type for password input
@ -35,6 +39,8 @@ type RAGFlowClient struct {
HTTPClient *HTTPClient
ServerType string // "admin" or "user"
PasswordPrompt PasswordPromptFunc // Function for password input
OutputFormat OutputFormat // Output format: table, plain, json
ContextEngine *ce.Engine // Context Engine for virtual filesystem
}
// NewRAGFlowClient creates a new RAGFlow client
@ -47,10 +53,54 @@ func NewRAGFlowClient(serverType string) *RAGFlowClient {
httpClient.Port = 9380
}
return &RAGFlowClient{
client := &RAGFlowClient{
HTTPClient: httpClient,
ServerType: serverType,
}
// Initialize Context Engine
client.initContextEngine()
return client
}
// initContextEngine initializes the Context Engine with all providers
func (c *RAGFlowClient) initContextEngine() {
engine := ce.NewEngine()
// Register providers
engine.RegisterProvider(ce.NewDatasetProvider(&httpClientAdapter{c.HTTPClient}))
c.ContextEngine = engine
}
// httpClientAdapter adapts HTTPClient to ce.HTTPClientInterface
type httpClientAdapter struct {
client *HTTPClient
}
func (a *httpClientAdapter) Request(method, path string, useAPIBase bool, authKind string, headers map[string]string, jsonBody map[string]interface{}) (*ce.HTTPResponse, error) {
// Auto-detect auth kind based on available tokens
// If authKind is "auto" or empty, determine based on token availability
if authKind == "auto" || authKind == "" {
if a.client.useAPIToken && a.client.APIToken != "" {
authKind = "api"
} else if a.client.LoginToken != "" {
authKind = "web"
} else {
authKind = "web" // default
}
}
resp, err := a.client.Request(method, path, useAPIBase, authKind, headers, jsonBody)
if err != nil {
return nil, err
}
return &ce.HTTPResponse{
StatusCode: resp.StatusCode,
Body: resp.Body,
Headers: resp.Headers,
Duration: resp.Duration,
}, nil
}
// LoginUserInteractive performs interactive login with username and password
@ -413,6 +463,11 @@ func (c *RAGFlowClient) ExecuteUserCommand(cmd *Command) (ResponseIf, error) {
return c.CreateDocMetaIndex(cmd)
case "drop_doc_meta_index":
return c.DropDocMetaIndex(cmd)
// ContextEngine commands
case "ce_ls":
return c.CEList(cmd)
case "ce_search":
return c.CESearch(cmd)
// TODO: Implement other commands
default:
return nil, fmt.Errorf("command '%s' would be executed with API", cmd.Type)
@ -432,13 +487,15 @@ type ResponseIf interface {
Type() string
PrintOut()
TimeCost() float64
SetOutputFormat(format OutputFormat)
}
type CommonResponse struct {
Code int `json:"code"`
Data []map[string]interface{} `json:"data"`
Message string `json:"message"`
Duration float64
Code int `json:"code"`
Data []map[string]interface{} `json:"data"`
Message string `json:"message"`
Duration float64
outputFormat OutputFormat
}
func (r *CommonResponse) Type() string {
@ -449,9 +506,13 @@ func (r *CommonResponse) TimeCost() float64 {
return r.Duration
}
func (r *CommonResponse) SetOutputFormat(format OutputFormat) {
r.outputFormat = format
}
func (r *CommonResponse) PrintOut() {
if r.Code == 0 {
PrintTableSimple(r.Data)
PrintTableSimpleByFormat(r.Data, r.outputFormat)
} else {
fmt.Println("ERROR")
fmt.Printf("%d, %s\n", r.Code, r.Message)
@ -459,10 +520,11 @@ func (r *CommonResponse) PrintOut() {
}
type CommonDataResponse struct {
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
Duration float64
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
Duration float64
outputFormat OutputFormat
}
func (r *CommonDataResponse) Type() string {
@ -473,11 +535,15 @@ func (r *CommonDataResponse) TimeCost() float64 {
return r.Duration
}
func (r *CommonDataResponse) SetOutputFormat(format OutputFormat) {
r.outputFormat = format
}
func (r *CommonDataResponse) PrintOut() {
if r.Code == 0 {
table := make([]map[string]interface{}, 0)
table = append(table, r.Data)
PrintTableSimple(table)
PrintTableSimpleByFormat(table, r.outputFormat)
} else {
fmt.Println("ERROR")
fmt.Printf("%d, %s\n", r.Code, r.Message)
@ -485,9 +551,10 @@ func (r *CommonDataResponse) PrintOut() {
}
type SimpleResponse struct {
Code int `json:"code"`
Message string `json:"message"`
Duration float64
Code int `json:"code"`
Message string `json:"message"`
Duration float64
outputFormat OutputFormat
}
func (r *SimpleResponse) Type() string {
@ -498,6 +565,10 @@ func (r *SimpleResponse) TimeCost() float64 {
return r.Duration
}
func (r *SimpleResponse) SetOutputFormat(format OutputFormat) {
r.outputFormat = format
}
func (r *SimpleResponse) PrintOut() {
if r.Code == 0 {
fmt.Println("SUCCESS")
@ -508,9 +579,10 @@ func (r *SimpleResponse) PrintOut() {
}
type RegisterResponse struct {
Code int `json:"code"`
Message string `json:"message"`
Duration float64
Code int `json:"code"`
Message string `json:"message"`
Duration float64
outputFormat OutputFormat
}
func (r *RegisterResponse) Type() string {
@ -521,6 +593,10 @@ func (r *RegisterResponse) TimeCost() float64 {
return r.Duration
}
func (r *RegisterResponse) SetOutputFormat(format OutputFormat) {
r.outputFormat = format
}
func (r *RegisterResponse) PrintOut() {
if r.Code == 0 {
fmt.Println("Register successfully")
@ -536,12 +612,17 @@ type BenchmarkResponse struct {
SuccessCount int `json:"success_count"`
FailureCount int `json:"failure_count"`
Concurrency int
outputFormat OutputFormat
}
func (r *BenchmarkResponse) Type() string {
return "benchmark"
}
func (r *BenchmarkResponse) SetOutputFormat(format OutputFormat) {
r.outputFormat = format
}
func (r *BenchmarkResponse) PrintOut() {
if r.Code != 0 {
fmt.Printf("ERROR, Code: %d\n", r.Code)
@ -565,10 +646,11 @@ func (r *BenchmarkResponse) TimeCost() float64 {
}
type KeyValueResponse struct {
Code int `json:"code"`
Key string `json:"key"`
Value string `json:"data"`
Duration float64
Code int `json:"code"`
Key string `json:"key"`
Value string `json:"data"`
Duration float64
outputFormat OutputFormat
}
func (r *KeyValueResponse) Type() string {
@ -579,6 +661,10 @@ func (r *KeyValueResponse) TimeCost() float64 {
return r.Duration
}
func (r *KeyValueResponse) SetOutputFormat(format OutputFormat) {
r.outputFormat = format
}
func (r *KeyValueResponse) PrintOut() {
if r.Code == 0 {
table := make([]map[string]interface{}, 0)
@ -587,9 +673,175 @@ func (r *KeyValueResponse) PrintOut() {
"key": r.Key,
"value": r.Value,
})
PrintTableSimple(table)
PrintTableSimpleByFormat(table, r.outputFormat)
} else {
fmt.Println("ERROR")
fmt.Printf("%d\n", r.Code)
}
}
// ==================== ContextEngine Commands ====================
// CEListResponse represents the response for ls command
type CEListResponse struct {
Code int `json:"code"`
Data []map[string]interface{} `json:"data"`
Message string `json:"message"`
Duration float64
outputFormat OutputFormat
}
func (r *CEListResponse) Type() string { return "ce_ls" }
func (r *CEListResponse) TimeCost() float64 { return r.Duration }
func (r *CEListResponse) SetOutputFormat(format OutputFormat) { r.outputFormat = format }
func (r *CEListResponse) PrintOut() {
if r.Code == 0 {
PrintTableSimpleByFormat(r.Data, r.outputFormat)
} else {
fmt.Println("ERROR")
fmt.Printf("%d, %s\n", r.Code, r.Message)
}
}
// CEList handles the ls command - lists nodes using Context Engine
func (c *RAGFlowClient) CEList(cmd *Command) (ResponseIf, error) {
// Get path from command params, default to "datasets"
path, _ := cmd.Params["path"].(string)
if path == "" {
path = "datasets"
}
// Parse options
opts := &ce.ListOptions{}
if recursive, ok := cmd.Params["recursive"].(bool); ok {
opts.Recursive = recursive
}
if limit, ok := cmd.Params["limit"].(int); ok {
opts.Limit = limit
}
if offset, ok := cmd.Params["offset"].(int); ok {
opts.Offset = offset
}
// Execute list command through Context Engine
ctx := context.Background()
result, err := c.ContextEngine.List(ctx, path, opts)
if err != nil {
return nil, err
}
// Convert to response
var response CEListResponse
response.outputFormat = c.OutputFormat
response.Code = 0
response.Data = ce.FormatNodes(result.Nodes, string(c.OutputFormat))
return &response, nil
}
// getStringValue safely converts interface{} to string
func getStringValue(v interface{}) string {
if v == nil {
return ""
}
if s, ok := v.(string); ok {
return s
}
return fmt.Sprintf("%v", v)
}
// formatTimeValue converts a timestamp (milliseconds or string) to readable format
func formatTimeValue(v interface{}) string {
if v == nil {
return ""
}
var ts int64
switch val := v.(type) {
case float64:
ts = int64(val)
case int64:
ts = val
case int:
ts = int64(val)
case string:
// Try to parse as number
if _, err := fmt.Sscanf(val, "%d", &ts); err != nil {
// If it's already a formatted date string, return as is
return val
}
default:
return fmt.Sprintf("%v", v)
}
// Convert milliseconds to seconds if timestamp is in milliseconds (13 digits)
if ts > 1e12 {
ts = ts / 1000
}
t := time.Unix(ts, 0)
return t.Format("2006-01-02 15:04:05")
}
// CESearchResponse represents the response for search command
type CESearchResponse struct {
Code int `json:"code"`
Data []map[string]interface{} `json:"data"`
Total int `json:"total"`
Message string `json:"message"`
Duration float64
outputFormat OutputFormat
}
func (r *CESearchResponse) Type() string { return "ce_search" }
func (r *CESearchResponse) TimeCost() float64 { return r.Duration }
func (r *CESearchResponse) SetOutputFormat(format OutputFormat) { r.outputFormat = format }
func (r *CESearchResponse) PrintOut() {
if r.Code == 0 {
fmt.Printf("Found %d results:\n", r.Total)
PrintTableSimpleByFormat(r.Data, r.outputFormat)
} else {
fmt.Println("ERROR")
fmt.Printf("%d, %s\n", r.Code, r.Message)
}
}
// CESearch handles the search command using Context Engine
func (c *RAGFlowClient) CESearch(cmd *Command) (ResponseIf, error) {
// Get path and query from command params
path, _ := cmd.Params["path"].(string)
if path == "" {
path = "datasets"
}
query, _ := cmd.Params["query"].(string)
// Parse options
opts := &ce.SearchOptions{
Query: query,
}
if limit, ok := cmd.Params["limit"].(int); ok {
opts.Limit = limit
}
if offset, ok := cmd.Params["offset"].(int); ok {
opts.Offset = offset
}
if recursive, ok := cmd.Params["recursive"].(bool); ok {
opts.Recursive = recursive
}
// Execute search command through Context Engine
ctx := context.Background()
result, err := c.ContextEngine.Search(ctx, path, opts)
if err != nil {
return nil, err
}
// Convert to response
var response CESearchResponse
response.outputFormat = c.OutputFormat
response.Code = 0
response.Total = result.Total
response.Data = ce.FormatNodes(result.Nodes, string(c.OutputFormat))
return &response, nil
}

View File

@ -0,0 +1,49 @@
# ContextFS - Context Engine File System
ContextFS is a context engine interface for RAGFlow, providing users with a Unix-like file system interface to manage datasets, tools, skills, and memories.
## Directory Structure
```
user_id/
├── datasets/
│ └── my_dataset/
│ └── ...
├── tools/
│ ├── registry.json
│ └── tool_name/
│ ├── DOC.md
│ └── ...
├── skills/
│ ├── registry.json
│ └── skill_name/
│ ├── SKILL.md
│ └── ...
└── memories/
└── memory_id/
├── sessions/
│ ├── messages/
│ ├── summaries/
│ │ └── session_id/
│ │ └── summary-{datetime}.md
│ └── tools/
│ └── session_id/
│ └── {tool_name}.md # User level of memory on Tools usage
├── users/
│ ├── profile.md
│ ├── preferences/
│ └── entities/
└── agents/
└── agent_space/
├── tools/
│ └── {tool_name}.md # Agent level of memory on Tools usage
└── skills/
└── {skill_name}.md # Agent level of memory on Skills usage
```
## Supported Commands
- `ls [path]` - List directory contents
- `cat <path>` - Display file contents(only for text files)
- `search <query>` - Search content

View File

@ -0,0 +1,781 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import (
stdctx "context"
"encoding/json"
"fmt"
"strconv"
"strings"
"time"
)
// HTTPResponse represents an HTTP response
type HTTPResponse struct {
StatusCode int
Body []byte
Headers map[string][]string
Duration float64
}
// HTTPClientInterface defines the interface needed from HTTPClient
type HTTPClientInterface interface {
Request(method, path string, useAPIBase bool, authKind string, headers map[string]string, jsonBody map[string]interface{}) (*HTTPResponse, error)
}
// DatasetProvider handles datasets and their documents
// Path structure:
// - datasets/ -> List all datasets
// - datasets/{name} -> List documents in dataset
// - datasets/{name}/{doc_name} -> Get document info
type DatasetProvider struct {
BaseProvider
httpClient HTTPClientInterface
}
// NewDatasetProvider creates a new DatasetProvider
func NewDatasetProvider(httpClient HTTPClientInterface) *DatasetProvider {
return &DatasetProvider{
BaseProvider: BaseProvider{
name: "datasets",
description: "Dataset management provider",
rootPath: "datasets",
},
httpClient: httpClient,
}
}
// Supports returns true if this provider can handle the given path
func (p *DatasetProvider) Supports(path string) bool {
normalized := normalizePath(path)
return normalized == "datasets" || strings.HasPrefix(normalized, "datasets/")
}
// List lists nodes at the given path
func (p *DatasetProvider) List(ctx stdctx.Context, subPath string, opts *ListOptions) (*Result, error) {
// subPath is the path relative to "datasets/"
// Empty subPath means list all datasets
// "{name}/files" means list documents in a dataset
// Check if trying to access hidden .knowledgebase
if subPath == ".knowledgebase" || strings.HasPrefix(subPath, ".knowledgebase/") {
return nil, fmt.Errorf("invalid path: .knowledgebase is not accessible")
}
if subPath == "" {
return p.listDatasets(ctx, opts)
}
parts := SplitPath(subPath)
if len(parts) == 1 {
// datasets/{name} - list documents in the dataset (default behavior)
return p.listDocuments(ctx, parts[0], opts)
}
if len(parts) == 2 {
// datasets/{name}/{doc_name} - get document info
return p.getDocumentNode(ctx, parts[0], parts[1])
}
return nil, fmt.Errorf("invalid path: %s", subPath)
}
// Search searches for datasets or documents
func (p *DatasetProvider) Search(ctx stdctx.Context, subPath string, opts *SearchOptions) (*Result, error) {
if opts.Query == "" {
return p.List(ctx, subPath, &ListOptions{
Limit: opts.Limit,
Offset: opts.Offset,
})
}
// If searching under a specific dataset's files
parts := SplitPath(subPath)
if len(parts) >= 2 && parts[1] == "files" {
datasetName := parts[0]
return p.searchDocuments(ctx, datasetName, opts)
}
// Otherwise search datasets
return p.searchDatasets(ctx, opts)
}
// Cat retrieves document content
// For datasets:
// - cat datasets -> Error: datasets is a directory, not a file
// - cat datasets/kb_name -> Error: kb_name is a directory, not a file
// - cat datasets/kb_name/doc_name -> Would retrieve document content (if implemented)
func (p *DatasetProvider) Cat(ctx stdctx.Context, subPath string) ([]byte, error) {
if subPath == "" {
return nil, fmt.Errorf("'datasets' is a directory, not a file")
}
parts := SplitPath(subPath)
if len(parts) == 1 {
// datasets/{name} - this is a dataset (directory)
return nil, fmt.Errorf("'%s' is a directory, not a file", parts[0])
}
if len(parts) == 2 {
// datasets/{name}/{doc_name} - this could be a document
// For now, document content retrieval is not implemented
return nil, fmt.Errorf("document content retrieval not yet implemented for '%s'", parts[1])
}
return nil, fmt.Errorf("invalid path for cat: %s", subPath)
}
// ==================== Dataset Operations ====================
func (p *DatasetProvider) listDatasets(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
resp, err := p.httpClient.Request("GET", "/datasets", true, "auto", nil, nil)
if err != nil {
return nil, err
}
var apiResp struct {
Code int `json:"code"`
Data []map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
nodes := make([]*Node, 0, len(apiResp.Data))
for _, ds := range apiResp.Data {
node := p.datasetToNode(ds)
// Skip hidden .knowledgebase dataset (trim whitespace for safety)
if strings.TrimSpace(node.Name) == ".knowledgebase" {
continue
}
nodes = append(nodes, node)
}
total := len(nodes)
// Apply limit if specified
if opts != nil && opts.Limit > 0 && opts.Limit < len(nodes) {
nodes = nodes[:opts.Limit]
}
return &Result{
Nodes: nodes,
Total: total,
}, nil
}
func (p *DatasetProvider) getDataset(ctx stdctx.Context, name string) (*Node, error) {
// Check if trying to access hidden .knowledgebase
if name == ".knowledgebase" {
return nil, fmt.Errorf("invalid path: .knowledgebase is not accessible")
}
// First list all datasets to find the one with matching name
resp, err := p.httpClient.Request("GET", "/datasets", true, "auto", nil, nil)
if err != nil {
return nil, err
}
var apiResp struct {
Code int `json:"code"`
Data []map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
for _, ds := range apiResp.Data {
if getString(ds["name"]) == name {
return p.datasetToNode(ds), nil
}
}
return nil, fmt.Errorf("%s: dataset '%s'", ErrNotFound, name)
}
func (p *DatasetProvider) searchDatasets(ctx stdctx.Context, opts *SearchOptions) (*Result, error) {
// If no query is provided, just list datasets
if opts.Query == "" {
return p.listDatasets(ctx, &ListOptions{
Limit: opts.Limit,
Offset: opts.Offset,
})
}
// Use retrieval API for semantic search
return p.searchWithRetrieval(ctx, opts)
}
// searchWithRetrieval performs semantic search using the retrieval API
func (p *DatasetProvider) searchWithRetrieval(ctx stdctx.Context, opts *SearchOptions) (*Result, error) {
// Determine kb_ids to search in
var kbIDs []string
var datasetsToSearch []*Node
if len(opts.Dirs) > 0 && opts.Dirs[0] != "datasets" {
// Search in specific datasets
for _, dir := range opts.Dirs {
// Extract dataset name from path (e.g., "datasets/kb1" -> "kb1")
datasetName := dir
if strings.HasPrefix(dir, "datasets/") {
datasetName = dir[len("datasets/"):]
}
ds, err := p.getDataset(ctx, datasetName)
if err != nil {
// Try case-insensitive match
allResult, listErr := p.listDatasets(ctx, nil)
if listErr == nil {
for _, d := range allResult.Nodes {
if strings.EqualFold(d.Name, datasetName) {
ds = d
err = nil
break
}
}
}
if err != nil {
return nil, fmt.Errorf("dataset not found: %s", datasetName)
}
}
datasetsToSearch = append(datasetsToSearch, ds)
kbID := getString(ds.Metadata["id"])
if kbID != "" {
kbIDs = append(kbIDs, kbID)
}
}
} else {
// Search in all datasets
allResult, err := p.listDatasets(ctx, nil)
if err != nil {
return nil, err
}
datasetsToSearch = allResult.Nodes
for _, ds := range datasetsToSearch {
kbID := getString(ds.Metadata["id"])
if kbID != "" {
kbIDs = append(kbIDs, kbID)
}
}
}
if len(kbIDs) == 0 {
return &Result{
Nodes: []*Node{},
Total: 0,
}, nil
}
// Build kb_id -> dataset name mapping
kbIDToName := make(map[string]string)
for _, ds := range datasetsToSearch {
kbID := getString(ds.Metadata["id"])
if kbID != "" && ds.Name != "" {
kbIDToName[kbID] = ds.Name
}
}
// Build retrieval request
payload := map[string]interface{}{
"kb_id": kbIDs,
"question": opts.Query,
}
// Set top_k (default to 10 if not specified)
topK := opts.TopK
if topK <= 0 {
topK = 10
}
payload["top_k"] = topK
// Set similarity threshold (default to 0.2 if not specified to match UI behavior)
threshold := opts.Threshold
if threshold <= 0 {
threshold = 0.2
}
payload["similarity_threshold"] = threshold
// Call retrieval API (useAPIBase=false because the route is /v1/chunk/retrieval_test, not /api/v1/...)
resp, err := p.httpClient.Request("POST", "/chunk/retrieval_test", false, "auto", nil, payload)
if err != nil {
return nil, fmt.Errorf("retrieval request failed: %w", err)
}
var apiResp struct {
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
// Parse chunks from response
var nodes []*Node
if chunksData, ok := apiResp.Data["chunks"].([]interface{}); ok {
for _, chunk := range chunksData {
if chunkMap, ok := chunk.(map[string]interface{}); ok {
node := p.chunkToNodeWithKBMapping(chunkMap, kbIDToName)
nodes = append(nodes, node)
}
}
}
// Apply top_k limit if specified (API may return more results)
if topK > 0 && len(nodes) > topK {
nodes = nodes[:topK]
}
return &Result{
Nodes: nodes,
Total: len(nodes),
}, nil
}
// chunkToNodeWithKBMapping converts a chunk map to a Node with kb_id -> name mapping
func (p *DatasetProvider) chunkToNodeWithKBMapping(chunk map[string]interface{}, kbIDToName map[string]string) *Node {
// Extract chunk content - try multiple field names
content := ""
if v, ok := chunk["content_with_weight"].(string); ok && v != "" {
content = v
} else if v, ok := chunk["content"].(string); ok && v != "" {
content = v
} else if v, ok := chunk["content_ltks"].(string); ok && v != "" {
content = v
} else if v, ok := chunk["text"].(string); ok && v != "" {
content = v
}
// Get chunk_id for URI
chunkID := ""
if v, ok := chunk["chunk_id"].(string); ok {
chunkID = v
} else if v, ok := chunk["id"].(string); ok {
chunkID = v
}
// Get document name and ID
docName := ""
if v, ok := chunk["docnm_kwd"].(string); ok && v != "" {
docName = v
} else if v, ok := chunk["docnm"].(string); ok && v != "" {
docName = v
} else if v, ok := chunk["doc_name"].(string); ok && v != "" {
docName = v
}
docID := ""
if v, ok := chunk["doc_id"].(string); ok && v != "" {
docID = v
}
// Get dataset/kb name from mapping or chunk data
datasetName := ""
datasetID := ""
// First try to get kb_id from chunk (could be string or array)
if v, ok := chunk["kb_id"].(string); ok && v != "" {
datasetID = v
} else if v, ok := chunk["kb_id"].([]interface{}); ok && len(v) > 0 {
if s, ok := v[0].(string); ok {
datasetID = s
}
}
// Look up dataset name from mapping using kb_id
if datasetID != "" && kbIDToName != nil {
if name, ok := kbIDToName[datasetID]; ok && name != "" {
datasetName = name
}
}
// Fallback to kb_name from chunk if mapping doesn't have it
if datasetName == "" {
if v, ok := chunk["kb_name"].(string); ok && v != "" {
datasetName = v
}
}
// Build URI path: prefer names over IDs for readability
// Format: datasets/{dataset_name}/{doc_name}
path := "/datasets"
if datasetName != "" {
path += "/" + datasetName
} else if datasetID != "" {
path += "/" + datasetID
}
if docName != "" {
path += "/" + docName
} else if docID != "" {
path += "/" + docID
}
// Use doc_name or chunk_id as the name if content is empty
name := content
if name == "" {
if docName != "" {
name = docName
} else if chunkID != "" {
name = "chunk:" + chunkID[:min(len(chunkID), 16)]
} else {
name = "(empty)"
}
}
node := &Node{
Name: name,
Path: path,
Type: NodeTypeDocument,
Metadata: chunk,
}
// Parse timestamps if available
if createTime, ok := chunk["create_time"]; ok {
node.CreatedAt = parseTime(createTime)
}
if updateTime, ok := chunk["update_time"]; ok {
node.UpdatedAt = parseTime(updateTime)
}
return node
}
// chunkToNode converts a chunk map to a Node (legacy, uses chunk data only)
func (p *DatasetProvider) chunkToNode(chunk map[string]interface{}) *Node {
return p.chunkToNodeWithKBMapping(chunk, nil)
}
// ==================== Document Operations ====================
func (p *DatasetProvider) listDocuments(ctx stdctx.Context, datasetName string, opts *ListOptions) (*Result, error) {
// First get the dataset ID
ds, err := p.getDataset(ctx, datasetName)
if err != nil {
return nil, err
}
datasetID := getString(ds.Metadata["id"])
if datasetID == "" {
return nil, fmt.Errorf("dataset ID not found")
}
// Build query parameters
params := make(map[string]string)
if opts != nil {
if opts.Limit > 0 {
params["page_size"] = fmt.Sprintf("%d", opts.Limit)
}
if opts.Offset > 0 {
params["page"] = fmt.Sprintf("%d", opts.Offset/opts.Limit+1)
}
}
path := fmt.Sprintf("/datasets/%s/documents", datasetID)
resp, err := p.httpClient.Request("GET", path, true, "auto", params, nil)
if err != nil {
return nil, err
}
var apiResp struct {
Code int `json:"code"`
Data struct {
Docs []map[string]interface{} `json:"docs"`
} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
nodes := make([]*Node, 0, len(apiResp.Data.Docs))
for _, doc := range apiResp.Data.Docs {
node := p.documentToNode(doc, datasetName)
nodes = append(nodes, node)
}
return &Result{
Nodes: nodes,
Total: len(nodes),
}, nil
}
func (p *DatasetProvider) getDocumentNode(ctx stdctx.Context, datasetName, docName string) (*Result, error) {
node, err := p.getDocument(ctx, datasetName, docName)
if err != nil {
return nil, err
}
return &Result{
Nodes: []*Node{node},
Total: 1,
}, nil
}
func (p *DatasetProvider) getDocument(ctx stdctx.Context, datasetName, docName string) (*Node, error) {
// List all documents and find the matching one
result, err := p.listDocuments(ctx, datasetName, nil)
if err != nil {
return nil, err
}
for _, node := range result.Nodes {
if node.Name == docName {
return node, nil
}
}
return nil, fmt.Errorf("%s: document '%s' in dataset '%s'", ErrNotFound, docName, datasetName)
}
func (p *DatasetProvider) searchDocuments(ctx stdctx.Context, datasetName string, opts *SearchOptions) (*Result, error) {
// If no query is provided, just list documents
if opts.Query == "" {
return p.listDocuments(ctx, datasetName, &ListOptions{
Limit: opts.Limit,
Offset: opts.Offset,
})
}
// Use retrieval API for semantic search in specific dataset
ds, err := p.getDataset(ctx, datasetName)
if err != nil {
return nil, err
}
kbID := getString(ds.Metadata["id"])
if kbID == "" {
return nil, fmt.Errorf("dataset ID not found for '%s'", datasetName)
}
// Build kb_id -> dataset name mapping
kbIDToName := map[string]string{kbID: datasetName}
// Build retrieval request for specific dataset
payload := map[string]interface{}{
"kb_id": []string{kbID},
"question": opts.Query,
}
// Set top_k (default to 10 if not specified)
topK := opts.TopK
if topK <= 0 {
topK = 10
}
payload["top_k"] = topK
// Set similarity threshold (default to 0.2 if not specified to match UI behavior)
threshold := opts.Threshold
if threshold <= 0 {
threshold = 0.2
}
payload["similarity_threshold"] = threshold
// Call retrieval API (useAPIBase=false because the route is /v1/chunk/retrieval_test, not /api/v1/...)
resp, err := p.httpClient.Request("POST", "/chunk/retrieval_test", false, "auto", nil, payload)
if err != nil {
return nil, fmt.Errorf("retrieval request failed: %w", err)
}
var apiResp struct {
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
// Parse chunks from response
var nodes []*Node
if chunksData, ok := apiResp.Data["chunks"].([]interface{}); ok {
for _, chunk := range chunksData {
if chunkMap, ok := chunk.(map[string]interface{}); ok {
node := p.chunkToNodeWithKBMapping(chunkMap, kbIDToName)
nodes = append(nodes, node)
}
}
}
// Apply top_k limit if specified (API may return more results)
if topK > 0 && len(nodes) > topK {
nodes = nodes[:topK]
}
return &Result{
Nodes: nodes,
Total: len(nodes),
}, nil
}
// ==================== Helper Functions ====================
func (p *DatasetProvider) datasetToNode(ds map[string]interface{}) *Node {
name := getString(ds["name"])
node := &Node{
Name: name,
Path: "/datasets/" + name,
Type: NodeTypeDirectory,
Metadata: ds,
}
// Parse timestamps - try multiple field names
if createTime, ok := ds["create_time"]; ok && createTime != nil {
node.CreatedAt = parseTime(createTime)
} else if createDate, ok := ds["create_date"]; ok && createDate != nil {
node.CreatedAt = parseTime(createDate)
}
if updateTime, ok := ds["update_time"]; ok && updateTime != nil {
node.UpdatedAt = parseTime(updateTime)
} else if updateDate, ok := ds["update_date"]; ok && updateDate != nil {
node.UpdatedAt = parseTime(updateDate)
}
return node
}
func (p *DatasetProvider) documentToNode(doc map[string]interface{}, datasetName string) *Node {
name := getString(doc["name"])
node := &Node{
Name: name,
Path: "datasets/" + datasetName + "/" + name,
Type: NodeTypeDocument,
Metadata: doc,
}
// Parse size
if size, ok := doc["size"]; ok {
node.Size = int64(getFloat(size))
}
// Parse timestamps
if createTime, ok := doc["create_time"]; ok {
node.CreatedAt = parseTime(createTime)
}
if updateTime, ok := doc["update_time"]; ok {
node.UpdatedAt = parseTime(updateTime)
}
return node
}
func getString(v interface{}) string {
if v == nil {
return ""
}
if s, ok := v.(string); ok {
return s
}
return fmt.Sprintf("%v", v)
}
func min(a, b int) int {
if a < b {
return a
}
return b
}
func getFloat(v interface{}) float64 {
if v == nil {
return 0
}
switch val := v.(type) {
case float64:
return val
case float32:
return float64(val)
case int:
return float64(val)
case int64:
return float64(val)
default:
return 0
}
}
func parseTime(v interface{}) time.Time {
if v == nil {
return time.Time{}
}
var ts int64
switch val := v.(type) {
case float64:
ts = int64(val)
case int64:
ts = val
case int:
ts = int64(val)
case string:
// Trim quotes if present
val = strings.Trim(val, `"`)
// Try to parse as number (timestamp)
if parsed, err := strconv.ParseInt(val, 10, 64); err == nil {
ts = parsed
} else {
// If it's already a formatted date string, try parsing it
formats := []string{
"2006-01-02 15:04:05",
"2006-01-02T15:04:05",
"2006-01-02T15:04:05Z",
"2006-01-02",
}
for _, format := range formats {
if t, err := time.Parse(format, val); err == nil {
return t
}
}
return time.Time{}
}
default:
return time.Time{}
}
// Convert milliseconds to seconds if timestamp is in milliseconds (13 digits)
if ts > 1e12 {
ts = ts / 1000
}
return time.Unix(ts, 0)
}

View File

@ -0,0 +1,312 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import (
stdctx "context"
"fmt"
"strings"
"time"
)
// Engine is the core of the Context Engine
// It manages providers and routes commands to the appropriate provider
type Engine struct {
providers []Provider
}
// NewEngine creates a new Context Engine
func NewEngine() *Engine {
return &Engine{
providers: make([]Provider, 0),
}
}
// RegisterProvider registers a provider with the engine
func (e *Engine) RegisterProvider(provider Provider) {
e.providers = append(e.providers, provider)
}
// GetProviders returns all registered providers
func (e *Engine) GetProviders() []ProviderInfo {
infos := make([]ProviderInfo, 0, len(e.providers))
for _, p := range e.providers {
infos = append(infos, ProviderInfo{
Name: p.Name(),
Description: p.Description(),
})
}
return infos
}
// Execute executes a command and returns the result
func (e *Engine) Execute(ctx stdctx.Context, cmd *Command) (*Result, error) {
switch cmd.Type {
case CommandList:
return e.List(ctx, cmd.Path, parseListOptions(cmd.Params))
case CommandSearch:
return e.Search(ctx, cmd.Path, parseSearchOptions(cmd.Params))
case CommandCat:
_, err := e.Cat(ctx, cmd.Path)
return nil, err
default:
return nil, fmt.Errorf("unknown command type: %s", cmd.Type)
}
}
// resolveProvider finds the provider for a given path
func (e *Engine) resolveProvider(path string) (Provider, string, error) {
path = normalizePath(path)
for _, provider := range e.providers {
if provider.Supports(path) {
// Parse the subpath relative to the provider root
// Get provider name to calculate subPath
providerName := provider.Name()
var subPath string
if path == providerName {
subPath = ""
} else if strings.HasPrefix(path, providerName+"/") {
subPath = path[len(providerName)+1:]
} else {
subPath = path
}
return provider, subPath, nil
}
}
// If no provider supports this path, check if FileProvider can handle it as a fallback
// This allows paths like "myskills" to be treated as "files/myskills"
if fileProvider := e.getFileProvider(); fileProvider != nil {
// Check if the path looks like a file manager path (single component, not matching other providers)
parts := SplitPath(path)
if len(parts) > 0 && parts[0] != "datasets" {
return fileProvider, path, nil
}
}
return nil, "", fmt.Errorf("%s: %s", ErrProviderNotFound, path)
}
// List lists nodes at the given path
// If path is empty, returns:
// 1. Built-in providers (e.g., datasets)
// 2. Top-level directories from files provider (if any)
func (e *Engine) List(ctx stdctx.Context, path string, opts *ListOptions) (*Result, error) {
// Normalize path
path = normalizePath(path)
// If path is empty, return list of providers and files root directories
if path == "" || path == "/" {
return e.listRoot(ctx, opts)
}
provider, subPath, err := e.resolveProvider(path)
if err != nil {
// If not found, try to find in files provider as a fallback
// This allows "ls myfolder" to work as "ls files/myfolder"
if fileProvider := e.getFileProvider(); fileProvider != nil {
result, ferr := fileProvider.List(ctx, path, opts)
if ferr == nil {
return result, nil
}
}
return nil, err
}
return provider.List(ctx, subPath, opts)
}
// listRoot returns the root listing:
// 1. Built-in providers (datasets, etc.)
// 2. Top-level folders from files provider (file_manager)
func (e *Engine) listRoot(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
nodes := make([]*Node, 0)
// Add built-in providers first (like datasets)
for _, p := range e.providers {
// Skip files provider from this list - we'll add its children instead
if p.Name() == "files" {
continue
}
nodes = append(nodes, &Node{
Name: p.Name(),
Path: "/" + p.Name(),
Type: NodeTypeDirectory,
CreatedAt: time.Now(),
Metadata: map[string]interface{}{
"description": p.Description(),
},
})
}
// Add top-level folders from files provider (file_manager)
if fileProvider := e.getFileProvider(); fileProvider != nil {
filesResult, err := fileProvider.List(ctx, "", opts)
if err == nil {
for _, node := range filesResult.Nodes {
// Only add folders (directories), not files
if node.Type == NodeTypeDirectory {
// Ensure path doesn't have /files/ prefix for display
node.Path = strings.TrimPrefix(node.Path, "files/")
node.Path = strings.TrimPrefix(node.Path, "/")
nodes = append(nodes, node)
}
}
}
}
return &Result{
Nodes: nodes,
Total: len(nodes),
}, nil
}
// getFileProvider returns the files provider if registered
func (e *Engine) getFileProvider() Provider {
for _, p := range e.providers {
if p.Name() == "files" {
return p
}
}
return nil
}
// Search searches for nodes matching the query
func (e *Engine) Search(ctx stdctx.Context, path string, opts *SearchOptions) (*Result, error) {
provider, subPath, err := e.resolveProvider(path)
if err != nil {
return nil, err
}
return provider.Search(ctx, subPath, opts)
}
// Cat retrieves the content of a file/document
func (e *Engine) Cat(ctx stdctx.Context, path string) ([]byte, error) {
provider, subPath, err := e.resolveProvider(path)
if err != nil {
// If not found, try to find in files provider as a fallback
// This allows "cat myfolder/file.txt" to work as "cat files/myfolder/file.txt"
if fileProvider := e.getFileProvider(); fileProvider != nil {
return fileProvider.Cat(ctx, path)
}
return nil, err
}
return provider.Cat(ctx, subPath)
}
// ParsePath parses a path and returns path information
func (e *Engine) ParsePath(path string) (*PathInfo, error) {
path = normalizePath(path)
components := SplitPath(path)
if len(components) == 0 {
return nil, fmt.Errorf("empty path")
}
providerName := components[0]
isRoot := len(components) == 1
// Find the provider
var provider Provider
for _, p := range e.providers {
if p.Name() == providerName || strings.HasPrefix(path, p.Name()) {
provider = p
break
}
}
if provider == nil {
return nil, fmt.Errorf("%s: %s", ErrProviderNotFound, path)
}
info := &PathInfo{
Provider: providerName,
Path: path,
Components: components,
IsRoot: isRoot,
}
// Extract resource ID or name if available
if len(components) >= 2 {
info.ResourceName = components[1]
}
return info, nil
}
// parseListOptions parses command params into ListOptions
func parseListOptions(params map[string]interface{}) *ListOptions {
opts := &ListOptions{}
if params == nil {
return opts
}
if recursive, ok := params["recursive"].(bool); ok {
opts.Recursive = recursive
}
if limit, ok := params["limit"].(int); ok {
opts.Limit = limit
}
if offset, ok := params["offset"].(int); ok {
opts.Offset = offset
}
if sortBy, ok := params["sort_by"].(string); ok {
opts.SortBy = sortBy
}
if sortOrder, ok := params["sort_order"].(string); ok {
opts.SortOrder = sortOrder
}
return opts
}
// parseSearchOptions parses command params into SearchOptions
func parseSearchOptions(params map[string]interface{}) *SearchOptions {
opts := &SearchOptions{}
if params == nil {
return opts
}
if query, ok := params["query"].(string); ok {
opts.Query = query
}
if limit, ok := params["limit"].(int); ok {
opts.Limit = limit
}
if offset, ok := params["offset"].(int); ok {
opts.Offset = offset
}
if recursive, ok := params["recursive"].(bool); ok {
opts.Recursive = recursive
}
if topK, ok := params["top_k"].(int); ok {
opts.TopK = topK
}
if threshold, ok := params["threshold"].(float64); ok {
opts.Threshold = threshold
}
if dirs, ok := params["dirs"].([]string); ok {
opts.Dirs = dirs
}
return opts
}

View File

@ -0,0 +1,594 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import (
stdctx "context"
"encoding/json"
"fmt"
"strings"
)
// FileProvider handles file operations using Python backend /files API
// Path structure:
// - files/ -> List root folder contents
// - files/{folder_name}/ -> List folder contents
// - files/{folder_name}/{file_name} -> Get file info/content
//
// Note: Uses Python backend API (useAPIBase=true):
// - GET /files?parent_id={id} -> List files/folders in parent
// - GET /files/{file_id} -> Get file info
// - POST /files -> Create folder or upload file
// - DELETE /files -> Delete files
// - GET /files/{file_id}/parent -> Get parent folder
// - GET /files/{file_id}/ancestors -> Get ancestor folders
type FileProvider struct {
BaseProvider
httpClient HTTPClientInterface
folderCache map[string]string // path -> folder ID cache
rootID string // root folder ID
}
// NewFileProvider creates a new FileProvider
func NewFileProvider(httpClient HTTPClientInterface) *FileProvider {
return &FileProvider{
BaseProvider: BaseProvider{
name: "files",
description: "File manager provider (Python server)",
rootPath: "files",
},
httpClient: httpClient,
folderCache: make(map[string]string),
}
}
// Supports returns true if this provider can handle the given path
func (p *FileProvider) Supports(path string) bool {
normalized := normalizePath(path)
return normalized == "files" || strings.HasPrefix(normalized, "files/")
}
// List lists nodes at the given path
// Path structure: files/ or files/{folder_name}/ or files/{folder_name}/{sub_path}/...
func (p *FileProvider) List(ctx stdctx.Context, subPath string, opts *ListOptions) (*Result, error) {
// subPath is the path relative to "files/"
// Empty subPath means list root folder
if subPath == "" {
return p.listRootFolder(ctx, opts)
}
parts := SplitPath(subPath)
if len(parts) == 1 {
// files/{folder_name} - list contents of this folder
return p.listFolderByName(ctx, parts[0], opts)
}
// For multi-level paths like myskills/skill-name/dir1, recursively traverse
return p.listPathRecursive(ctx, parts, opts)
}
// listPathRecursive recursively traverses the path and lists the final component
func (p *FileProvider) listPathRecursive(ctx stdctx.Context, parts []string, opts *ListOptions) (*Result, error) {
if len(parts) == 0 {
return nil, fmt.Errorf("empty path")
}
// Start from root to find the first folder
currentFolderID, err := p.getFolderIDByName(ctx, parts[0])
if err != nil {
return nil, err
}
currentPath := parts[0]
// Traverse through intermediate directories
for i := 1; i < len(parts); i++ {
partName := parts[i]
// List contents of current folder to find the next part
result, err := p.listFilesByParentID(ctx, currentFolderID, currentPath, nil)
if err != nil {
return nil, err
}
// Find the next component
found := false
for _, node := range result.Nodes {
if node.Name == partName {
if i == len(parts)-1 {
// This is the last component - if it's a directory, list its contents
if node.Type == NodeTypeDirectory {
childID := getString(node.Metadata["id"])
if childID == "" {
return nil, fmt.Errorf("folder ID not found for '%s'", partName)
}
newPath := currentPath + "/" + partName
p.folderCache[newPath] = childID
return p.listFilesByParentID(ctx, childID, newPath, opts)
}
// It's a file - return the file node
return &Result{
Nodes: []*Node{node},
Total: 1,
}, nil
}
// Not the last component - must be a directory
if node.Type != NodeTypeDirectory {
return nil, fmt.Errorf("'%s' is not a directory", partName)
}
childID := getString(node.Metadata["id"])
if childID == "" {
return nil, fmt.Errorf("folder ID not found for '%s'", partName)
}
currentFolderID = childID
currentPath = currentPath + "/" + partName
p.folderCache[currentPath] = currentFolderID
found = true
break
}
}
if !found {
return nil, fmt.Errorf("%s: '%s' in '%s'", ErrNotFound, partName, currentPath)
}
}
// Should have returned in the loop, but just in case
return p.listFilesByParentID(ctx, currentFolderID, currentPath, opts)
}
// Search searches for files/folders
func (p *FileProvider) Search(ctx stdctx.Context, subPath string, opts *SearchOptions) (*Result, error) {
if opts.Query == "" {
return p.List(ctx, subPath, &ListOptions{
Limit: opts.Limit,
Offset: opts.Offset,
})
}
// For now, search is not implemented - just list and filter by name
result, err := p.List(ctx, subPath, &ListOptions{
Limit: opts.Limit,
Offset: opts.Offset,
})
if err != nil {
return nil, err
}
// Simple name filtering
var filtered []*Node
query := strings.ToLower(opts.Query)
for _, node := range result.Nodes {
if strings.Contains(strings.ToLower(node.Name), query) {
filtered = append(filtered, node)
}
}
return &Result{
Nodes: filtered,
Total: len(filtered),
}, nil
}
// Cat retrieves file content
func (p *FileProvider) Cat(ctx stdctx.Context, subPath string) ([]byte, error) {
if subPath == "" {
return nil, fmt.Errorf("cat requires a file path: files/{folder}/{file}")
}
parts := SplitPath(subPath)
if len(parts) < 2 {
return nil, fmt.Errorf("invalid path format, expected: files/{folder}/{file}")
}
// Find the file by recursively traversing the path
node, err := p.findNodeByPath(ctx, parts)
if err != nil {
return nil, err
}
if node.Type == NodeTypeDirectory {
return nil, fmt.Errorf("'%s' is a directory, not a file", subPath)
}
fileID := getString(node.Metadata["id"])
if fileID == "" {
return nil, fmt.Errorf("file ID not found")
}
// Download file content
return p.downloadFile(ctx, fileID)
}
// findNodeByPath recursively traverses the path to find the target node
func (p *FileProvider) findNodeByPath(ctx stdctx.Context, parts []string) (*Node, error) {
if len(parts) == 0 {
return nil, fmt.Errorf("empty path")
}
// Start from root to find the first folder
currentFolderID, err := p.getFolderIDByName(ctx, parts[0])
if err != nil {
return nil, err
}
currentPath := parts[0]
// Traverse through intermediate directories
for i := 1; i < len(parts); i++ {
partName := parts[i]
// List contents of current folder to find the next part
result, err := p.listFilesByParentID(ctx, currentFolderID, currentPath, nil)
if err != nil {
return nil, err
}
// Find the next component
found := false
for _, node := range result.Nodes {
if node.Name == partName {
if i == len(parts)-1 {
// This is the last component - return it
return node, nil
}
// Not the last component - must be a directory
if node.Type != NodeTypeDirectory {
return nil, fmt.Errorf("'%s' is not a directory", partName)
}
childID := getString(node.Metadata["id"])
if childID == "" {
return nil, fmt.Errorf("folder ID not found for '%s'", partName)
}
currentFolderID = childID
currentPath = currentPath + "/" + partName
p.folderCache[currentPath] = currentFolderID
found = true
break
}
}
if !found {
return nil, fmt.Errorf("%s: '%s' in '%s'", ErrNotFound, partName, currentPath)
}
}
return nil, fmt.Errorf("%s: '%s'", ErrNotFound, strings.Join(parts, "/"))
}
// ==================== Python Server API Methods ====================
// getRootID gets or caches the root folder ID
func (p *FileProvider) getRootID(ctx stdctx.Context) (string, error) {
if p.rootID != "" {
return p.rootID, nil
}
// List files without parent_id to get root folder
resp, err := p.httpClient.Request("GET", "/files", true, "auto", nil, nil)
if err != nil {
return "", err
}
var apiResp struct {
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return "", err
}
if apiResp.Code != 0 {
return "", fmt.Errorf("API error: %s", apiResp.Message)
}
// Try to find root folder ID from response
if rootID, ok := apiResp.Data["root_id"].(string); ok && rootID != "" {
p.rootID = rootID
return rootID, nil
}
// If no explicit root_id, use empty parent_id for root listing
return "", nil
}
// listRootFolder lists the contents of root folder
func (p *FileProvider) listRootFolder(ctx stdctx.Context, opts *ListOptions) (*Result, error) {
// Get root folder ID first
rootID, err := p.getRootID(ctx)
if err != nil {
return nil, err
}
// List files using root folder ID as parent
return p.listFilesByParentID(ctx, rootID, "", opts)
}
// listFilesByParentID lists files/folders by parent ID
func (p *FileProvider) listFilesByParentID(ctx stdctx.Context, parentID string, parentPath string, opts *ListOptions) (*Result, error) {
// Build query parameters
queryParams := make([]string, 0)
if parentID != "" {
queryParams = append(queryParams, fmt.Sprintf("parent_id=%s", parentID))
}
// Always set page=1 and page_size to ensure we get results
pageSize := 100
if opts != nil && opts.Limit > 0 {
pageSize = opts.Limit
}
queryParams = append(queryParams, fmt.Sprintf("page_size=%d", pageSize))
queryParams = append(queryParams, "page=1")
// Build URL with query string
path := "/files"
if len(queryParams) > 0 {
path = path + "?" + strings.Join(queryParams, "&")
}
resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
if err != nil {
return nil, err
}
var apiResp struct {
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return nil, err
}
if apiResp.Code != 0 {
return nil, fmt.Errorf("API error: %s", apiResp.Message)
}
// Extract files list from data - API returns {"total": N, "files": [...], "parent_folder": {...}}
var files []map[string]interface{}
if fileList, ok := apiResp.Data["files"].([]interface{}); ok {
for _, f := range fileList {
if fileMap, ok := f.(map[string]interface{}); ok {
files = append(files, fileMap)
}
}
}
nodes := make([]*Node, 0, len(files))
for _, f := range files {
name := getString(f["name"])
// Skip hidden .knowledgebase folder
if strings.TrimSpace(name) == ".knowledgebase" {
continue
}
node := p.fileToNode(f, parentPath)
nodes = append(nodes, node)
// Cache folder ID
if node.Type == NodeTypeDirectory || getString(f["type"]) == "folder" {
if id := getString(f["id"]); id != "" {
cacheKey := node.Name
if parentPath != "" {
cacheKey = parentPath + "/" + node.Name
}
p.folderCache[cacheKey] = id
}
}
}
return &Result{
Nodes: nodes,
Total: len(nodes),
}, nil
}
// listFolderByName lists contents of a folder by its name
func (p *FileProvider) listFolderByName(ctx stdctx.Context, folderName string, opts *ListOptions) (*Result, error) {
folderID, err := p.getFolderIDByName(ctx, folderName)
if err != nil {
return nil, err
}
// List files in the folder using folder ID as parent_id
return p.listFilesByParentID(ctx, folderID, folderName, opts)
}
// getFolderIDByName finds folder ID by its name in root
func (p *FileProvider) getFolderIDByName(ctx stdctx.Context, folderName string) (string, error) {
// Check cache first
if id, ok := p.folderCache[folderName]; ok {
return id, nil
}
// List root folder to find the folder
rootID, _ := p.getRootID(ctx)
queryParams := make([]string, 0)
if rootID != "" {
queryParams = append(queryParams, fmt.Sprintf("parent_id=%s", rootID))
}
queryParams = append(queryParams, "page_size=100", "page=1")
path := "/files"
if len(queryParams) > 0 {
path = path + "?" + strings.Join(queryParams, "&")
}
resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
if err != nil {
return "", err
}
var apiResp struct {
Code int `json:"code"`
Data map[string]interface{} `json:"data"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err != nil {
return "", err
}
if apiResp.Code != 0 {
return "", fmt.Errorf("API error: %s", apiResp.Message)
}
// Search for folder by name
var files []map[string]interface{}
if fileList, ok := apiResp.Data["files"].([]interface{}); ok {
for _, f := range fileList {
if fileMap, ok := f.(map[string]interface{}); ok {
files = append(files, fileMap)
}
}
} else if fileList, ok := apiResp.Data["docs"].([]interface{}); ok {
for _, f := range fileList {
if fileMap, ok := f.(map[string]interface{}); ok {
files = append(files, fileMap)
}
}
}
for _, f := range files {
name := getString(f["name"])
fileType := getString(f["type"])
id := getString(f["id"])
// Match by name and ensure it's a folder
if name == folderName && fileType == "folder" && id != "" {
p.folderCache[folderName] = id
return id, nil
}
}
return "", fmt.Errorf("%s: folder '%s'", ErrNotFound, folderName)
}
// getFileNode gets a file node by folder and file name
// If fileName is a directory, returns the directory contents instead of the directory node
func (p *FileProvider) getFileNode(ctx stdctx.Context, folderName, fileName string) (*Result, error) {
folderID, err := p.getFolderIDByName(ctx, folderName)
if err != nil {
return nil, err
}
// List files in folder to find the file
result, err := p.listFilesByParentID(ctx, folderID, folderName, nil)
if err != nil {
return nil, err
}
// Find the specific file
for _, node := range result.Nodes {
if node.Name == fileName {
// If it's a directory, list its contents instead of returning the node itself
if node.Type == NodeTypeDirectory {
childFolderID := getString(node.Metadata["id"])
if childFolderID == "" {
return nil, fmt.Errorf("folder ID not found for '%s'", fileName)
}
// Cache the folder ID
cacheKey := folderName + "/" + fileName
p.folderCache[cacheKey] = childFolderID
// Return directory contents
return p.listFilesByParentID(ctx, childFolderID, cacheKey, nil)
}
// Return file node
return &Result{
Nodes: []*Node{node},
Total: 1,
}, nil
}
}
return nil, fmt.Errorf("%s: file '%s' in folder '%s'", ErrNotFound, fileName, folderName)
}
// downloadFile downloads file content
func (p *FileProvider) downloadFile(ctx stdctx.Context, fileID string) ([]byte, error) {
path := fmt.Sprintf("/files/%s", fileID)
resp, err := p.httpClient.Request("GET", path, true, "auto", nil, nil)
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
// Try to parse error response
var apiResp struct {
Code int `json:"code"`
Message string `json:"message"`
}
if err := json.Unmarshal(resp.Body, &apiResp); err == nil && apiResp.Code != 0 {
return nil, fmt.Errorf("%s", apiResp.Message)
}
return nil, fmt.Errorf("HTTP error %d", resp.StatusCode)
}
// Return raw file content
return resp.Body, nil
}
// ==================== Conversion Functions ====================
// fileToNode converts a file map to a Node
func (p *FileProvider) fileToNode(f map[string]interface{}, parentPath string) *Node {
name := getString(f["name"])
fileType := getString(f["type"])
fileID := getString(f["id"])
// Determine node type
nodeType := NodeTypeFile
if fileType == "folder" {
nodeType = NodeTypeDirectory
}
// Build path
path := name
if parentPath != "" {
path = parentPath + "/" + name
}
node := &Node{
Name: name,
Path: path,
Type: nodeType,
Metadata: f,
}
// Parse size
if size, ok := f["size"]; ok {
node.Size = int64(getFloat(size))
}
// Parse timestamps
if createTime, ok := f["create_time"]; ok && createTime != nil {
node.CreatedAt = parseTime(createTime)
}
if updateTime, ok := f["update_time"]; ok && updateTime != nil {
node.UpdatedAt = parseTime(updateTime)
}
// Store ID for later use
if fileID != "" {
if node.Metadata == nil {
node.Metadata = make(map[string]interface{})
}
node.Metadata["id"] = fileID
}
return node
}

View File

@ -0,0 +1,180 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import (
stdctx "context"
)
// Provider is the interface for all context providers
// Each provider handles a specific resource type (datasets, chats, agents, etc.)
type Provider interface {
// Name returns the provider name (e.g., "datasets", "chats")
Name() string
// Description returns a human-readable description of the provider
Description() string
// Supports returns true if this provider can handle the given path
Supports(path string) bool
// List lists nodes at the given path
List(ctx stdctx.Context, path string, opts *ListOptions) (*Result, error)
// Search searches for nodes matching the query under the given path
Search(ctx stdctx.Context, path string, opts *SearchOptions) (*Result, error)
// Cat retrieves the content of a file/document at the given path
Cat(ctx stdctx.Context, path string) ([]byte, error)
}
// BaseProvider provides common functionality for all providers
type BaseProvider struct {
name string
description string
rootPath string
}
// Name returns the provider name
func (p *BaseProvider) Name() string {
return p.name
}
// Description returns the provider description
func (p *BaseProvider) Description() string {
return p.description
}
// GetRootPath returns the root path for this provider
func (p *BaseProvider) GetRootPath() string {
return p.rootPath
}
// IsRootPath checks if the given path is the root path for this provider
func (p *BaseProvider) IsRootPath(path string) bool {
return normalizePath(path) == normalizePath(p.rootPath)
}
// ParsePath parses a path and returns the subpath relative to the provider root
func (p *BaseProvider) ParsePath(path string) string {
normalized := normalizePath(path)
rootNormalized := normalizePath(p.rootPath)
if normalized == rootNormalized {
return ""
}
if len(normalized) > len(rootNormalized) && normalized[:len(rootNormalized)+1] == rootNormalized+"/" {
return normalized[len(rootNormalized)+1:]
}
return normalized
}
// SplitPath splits a path into components
func SplitPath(path string) []string {
path = normalizePath(path)
if path == "" {
return []string{}
}
parts := splitString(path, '/')
result := make([]string, 0, len(parts))
for _, part := range parts {
if part != "" {
result = append(result, part)
}
}
return result
}
// normalizePath normalizes a path (removes leading/trailing slashes, handles "." and "..")
func normalizePath(path string) string {
path = trimSpace(path)
if path == "" {
return ""
}
// Remove leading slashes
for len(path) > 0 && path[0] == '/' {
path = path[1:]
}
// Remove trailing slashes
for len(path) > 0 && path[len(path)-1] == '/' {
path = path[:len(path)-1]
}
// Handle "." and ".."
parts := splitString(path, '/')
result := make([]string, 0, len(parts))
for _, part := range parts {
switch part {
case "", ".":
// Skip empty and current directory
continue
case "..":
// Go up one directory
if len(result) > 0 {
result = result[:len(result)-1]
}
default:
result = append(result, part)
}
}
return joinStrings(result, "/")
}
// Helper functions to avoid importing strings package in basic operations
func trimSpace(s string) string {
start := 0
end := len(s)
for start < end && (s[start] == ' ' || s[start] == '\t' || s[start] == '\n' || s[start] == '\r') {
start++
}
for end > start && (s[end-1] == ' ' || s[end-1] == '\t' || s[end-1] == '\n' || s[end-1] == '\r') {
end--
}
return s[start:end]
}
func splitString(s string, sep byte) []string {
var result []string
start := 0
for i := 0; i < len(s); i++ {
if s[i] == sep {
result = append(result, s[start:i])
start = i + 1
}
}
result = append(result, s[start:])
return result
}
func joinStrings(strs []string, sep string) string {
if len(strs) == 0 {
return ""
}
if len(strs) == 1 {
return strs[0]
}
result := strs[0]
for i := 1; i < len(strs); i++ {
result += sep + strs[i]
}
return result
}

View File

@ -0,0 +1,116 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import "time"
// NodeType represents the type of a node in the context filesystem
type NodeType string
const (
NodeTypeDirectory NodeType = "directory"
NodeTypeFile NodeType = "file"
NodeTypeDataset NodeType = "dataset"
NodeTypeDocument NodeType = "document"
NodeTypeChat NodeType = "chat"
NodeTypeAgent NodeType = "agent"
NodeTypeUnknown NodeType = "unknown"
)
// Node represents a node in the context filesystem
// This is the unified output format for all providers
type Node struct {
Name string `json:"name"`
Path string `json:"path"`
Type NodeType `json:"type"`
Size int64 `json:"size,omitempty"`
CreatedAt time.Time `json:"created_at,omitempty"`
UpdatedAt time.Time `json:"updated_at,omitempty"`
Metadata map[string]interface{} `json:"metadata,omitempty"`
}
// CommandType represents the type of command
type CommandType string
const (
CommandList CommandType = "ls"
CommandSearch CommandType = "search"
CommandCat CommandType = "cat"
)
// Command represents a context engine command
type Command struct {
Type CommandType `json:"type"`
Path string `json:"path"`
Params map[string]interface{} `json:"params,omitempty"`
}
// ListOptions represents options for list operations
type ListOptions struct {
Recursive bool `json:"recursive,omitempty"`
Limit int `json:"limit,omitempty"`
Offset int `json:"offset,omitempty"`
SortBy string `json:"sort_by,omitempty"`
SortOrder string `json:"sort_order,omitempty"` // "asc" or "desc"
}
// SearchOptions represents options for search operations
type SearchOptions struct {
Query string `json:"query"`
Limit int `json:"limit,omitempty"`
Offset int `json:"offset,omitempty"`
Recursive bool `json:"recursive,omitempty"`
TopK int `json:"top_k,omitempty"` // Number of top results to return (default: 10)
Threshold float64 `json:"threshold,omitempty"` // Similarity threshold (default: 0.2)
Dirs []string `json:"dirs,omitempty"` // List of directories to search in
}
// Result represents the result of a command execution
type Result struct {
Nodes []*Node `json:"nodes"`
Total int `json:"total"`
HasMore bool `json:"has_more"`
NextOffset int `json:"next_offset,omitempty"`
Error error `json:"-"`
}
// PathInfo represents parsed path information
type PathInfo struct {
Provider string // The provider name (e.g., "datasets", "chats")
Path string // The full path
Components []string // Path components
IsRoot bool // Whether this is the root path for the provider
ResourceID string // Resource ID if applicable
ResourceName string // Resource name if applicable
}
// ProviderInfo holds metadata about a provider
type ProviderInfo struct {
Name string `json:"name"`
Description string `json:"description"`
RootPath string `json:"root_path"`
}
// Common error messages
const (
ErrInvalidPath = "invalid path"
ErrProviderNotFound = "provider not found for path"
ErrNotSupported = "operation not supported"
ErrNotFound = "resource not found"
ErrUnauthorized = "unauthorized"
ErrInternal = "internal error"
)

View File

@ -0,0 +1,304 @@
//
// Copyright 2026 The InfiniFlow Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
package contextengine
import (
"encoding/json"
"fmt"
"time"
)
// FormatNode formats a node for display
func FormatNode(node *Node, format string) map[string]interface{} {
switch format {
case "json":
return map[string]interface{}{
"name": node.Name,
"path": node.Path,
"type": string(node.Type),
"size": node.Size,
"created_at": node.CreatedAt.Format(time.RFC3339),
"updated_at": node.UpdatedAt.Format(time.RFC3339),
}
case "table":
return map[string]interface{}{
"name": node.Name,
"path": node.Path,
"type": string(node.Type),
"size": formatSize(node.Size),
"created_at": formatTime(node.CreatedAt),
"updated_at": formatTime(node.UpdatedAt),
}
default: // "plain"
return map[string]interface{}{
"name": node.Name,
"path": node.Path,
"type": string(node.Type),
"created_at": formatTime(node.CreatedAt),
"updated_at": formatTime(node.UpdatedAt),
}
}
}
// FormatNodes formats a list of nodes for display
func FormatNodes(nodes []*Node, format string) []map[string]interface{} {
result := make([]map[string]interface{}, 0, len(nodes))
for _, node := range nodes {
result = append(result, FormatNode(node, format))
}
return result
}
// formatSize formats a size in bytes to human-readable format
func formatSize(size int64) string {
if size == 0 {
return "-"
}
const (
KB = 1024
MB = 1024 * KB
GB = 1024 * MB
TB = 1024 * GB
)
switch {
case size >= TB:
return fmt.Sprintf("%.2f TB", float64(size)/TB)
case size >= GB:
return fmt.Sprintf("%.2f GB", float64(size)/GB)
case size >= MB:
return fmt.Sprintf("%.2f MB", float64(size)/MB)
case size >= KB:
return fmt.Sprintf("%.2f KB", float64(size)/KB)
default:
return fmt.Sprintf("%d B", size)
}
}
// formatTime formats a time to a readable string
func formatTime(t time.Time) string {
if t.IsZero() {
return "-"
}
return t.Format("2006-01-02 15:04:05")
}
// ResultToMap converts a Result to a map for JSON serialization
func ResultToMap(result *Result) map[string]interface{} {
if result == nil {
return map[string]interface{}{
"nodes": []interface{}{},
"total": 0,
}
}
nodes := make([]map[string]interface{}, 0, len(result.Nodes))
for _, node := range result.Nodes {
nodes = append(nodes, nodeToMap(node))
}
return map[string]interface{}{
"nodes": nodes,
"total": result.Total,
"has_more": result.HasMore,
"next_offset": result.NextOffset,
}
}
// nodeToMap converts a Node to a map
func nodeToMap(node *Node) map[string]interface{} {
m := map[string]interface{}{
"name": node.Name,
"path": node.Path,
"type": string(node.Type),
}
if node.Size > 0 {
m["size"] = node.Size
}
if !node.CreatedAt.IsZero() {
m["created_at"] = node.CreatedAt.Format(time.RFC3339)
}
if !node.UpdatedAt.IsZero() {
m["updated_at"] = node.UpdatedAt.Format(time.RFC3339)
}
if len(node.Metadata) > 0 {
m["metadata"] = node.Metadata
}
return m
}
// MarshalJSON marshals a Result to JSON bytes
func (r *Result) MarshalJSON() ([]byte, error) {
return json.Marshal(ResultToMap(r))
}
// PrintResult prints a result in the specified format
func PrintResult(result *Result, format string) {
if result == nil {
fmt.Println("No results")
return
}
switch format {
case "json":
data, _ := json.MarshalIndent(ResultToMap(result), "", " ")
fmt.Println(string(data))
case "table":
printTable(result.Nodes)
default: // "plain"
for _, node := range result.Nodes {
fmt.Println(node.Path)
}
}
}
// printTable prints nodes in a simple table format
func printTable(nodes []*Node) {
if len(nodes) == 0 {
fmt.Println("No results")
return
}
// Print header
fmt.Printf("%-40s %-12s %-12s %-20s %-20s\n", "NAME", "TYPE", "SIZE", "CREATED", "UPDATED")
fmt.Println(string(make([]byte, 104)))
// Print rows
for _, node := range nodes {
fmt.Printf("%-40s %-12s %-12s %-20s %-20s\n",
truncateString(node.Name, 40),
node.Type,
formatSize(node.Size),
formatTime(node.CreatedAt),
formatTime(node.UpdatedAt),
)
}
}
// truncateString truncates a string to the specified length
func truncateString(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
return s[:maxLen-3] + "..."
}
// IsValidPath checks if a path is valid
func IsValidPath(path string) bool {
if path == "" {
return false
}
// Check for invalid characters
invalidChars := []string{"..", "//", "\\", "*", "?", "<", ">", "|", "\x00"}
for _, char := range invalidChars {
if containsString(path, char) {
return false
}
}
return true
}
// containsString checks if a string contains a substring
func containsString(s, substr string) bool {
for i := 0; i <= len(s)-len(substr); i++ {
if s[i:i+len(substr)] == substr {
return true
}
}
return false
}
// JoinPath joins path components
func JoinPath(components ...string) string {
if len(components) == 0 {
return ""
}
result := components[0]
for i := 1; i < len(components); i++ {
if result == "" {
result = components[i]
} else if components[i] == "" {
continue
} else {
// Remove trailing slash from result
for len(result) > 0 && result[len(result)-1] == '/' {
result = result[:len(result)-1]
}
// Remove leading slash from component
start := 0
for start < len(components[i]) && components[i][start] == '/' {
start++
}
result = result + "/" + components[i][start:]
}
}
return result
}
// GetParentPath returns the parent path of a given path
func GetParentPath(path string) string {
path = normalizePath(path)
parts := SplitPath(path)
if len(parts) <= 1 {
return ""
}
return joinStrings(parts[:len(parts)-1], "/")
}
// GetBaseName returns the last component of a path
func GetBaseName(path string) string {
path = normalizePath(path)
parts := SplitPath(path)
if len(parts) == 0 {
return ""
}
return parts[len(parts)-1]
}
// HasPrefix checks if a path has the given prefix
func HasPrefix(path, prefix string) bool {
path = normalizePath(path)
prefix = normalizePath(prefix)
if prefix == "" {
return true
}
if path == prefix {
return true
}
if len(path) > len(prefix) && path[:len(prefix)+1] == prefix+"/" {
return true
}
return false
}

View File

@ -55,6 +55,11 @@ func (p *Parser) Parse(adminCommand bool) (*Command, error) {
return p.parseMetaCommand()
}
// Check for ContextEngine commands (ls, cat, search)
if p.curToken.Type == TokenIdentifier && isCECommand(p.curToken.Value) {
return p.parseCECommand()
}
// Parse SQL-like command
return p.parseSQLCommand(adminCommand)
}
@ -215,6 +220,16 @@ func isKeyword(tokenType int) bool {
return tokenType >= TokenLogin && tokenType <= TokenDocMeta
}
// isCECommand checks if the given string is a ContextEngine command
func isCECommand(s string) bool {
upper := strings.ToUpper(s)
switch upper {
case "LS", "LIST", "SEARCH":
return true
}
return false
}
// Helper functions for parsing
func (p *Parser) parseQuotedString() (string, error) {
if p.curToken.Type != TokenQuotedString {
@ -241,3 +256,92 @@ func tokenTypeToString(t int) string {
// Simplified for error messages
return fmt.Sprintf("token(%d)", t)
}
// parseCECommand parses ContextEngine commands (ls, search)
func (p *Parser) parseCECommand() (*Command, error) {
cmdName := strings.ToUpper(p.curToken.Value)
switch cmdName {
case "LS", "LIST":
return p.parseCEListCommand()
case "SEARCH":
return p.parseCESearchCommand()
default:
return nil, fmt.Errorf("unknown ContextEngine command: %s", cmdName)
}
}
// parseCEListCommand parses the ls command
// Syntax: ls [path] or ls datasets
func (p *Parser) parseCEListCommand() (*Command, error) {
p.nextToken() // consume LS/LIST
cmd := NewCommand("ce_ls")
// Check if there's a path argument
// Also accept TokenDatasets since "datasets" is a keyword but can be a path
if p.curToken.Type == TokenIdentifier || p.curToken.Type == TokenQuotedString ||
p.curToken.Type == TokenDatasets {
path := p.curToken.Value
// Remove quotes if present
if p.curToken.Type == TokenQuotedString {
path = strings.Trim(path, "\"'")
}
cmd.Params["path"] = path
p.nextToken()
} else {
// Default to "datasets" root
cmd.Params["path"] = "datasets"
}
// Optional semicolon
if p.curToken.Type == TokenSemicolon {
p.nextToken()
}
return cmd, nil
}
// parseCESearchCommand parses the search command
// Syntax: search <query> or search <query> in <path>
func (p *Parser) parseCESearchCommand() (*Command, error) {
p.nextToken() // consume SEARCH
cmd := NewCommand("ce_search")
if p.curToken.Type != TokenIdentifier && p.curToken.Type != TokenQuotedString {
return nil, fmt.Errorf("expected query after SEARCH")
}
query := p.curToken.Value
if p.curToken.Type == TokenQuotedString {
query = strings.Trim(query, "\"'")
}
cmd.Params["query"] = query
p.nextToken()
// Check for optional "in <path>" clause
if p.curToken.Type == TokenIdentifier && strings.ToUpper(p.curToken.Value) == "IN" {
p.nextToken() // consume IN
if p.curToken.Type != TokenIdentifier && p.curToken.Type != TokenQuotedString {
return nil, fmt.Errorf("expected path after IN")
}
path := p.curToken.Value
if p.curToken.Type == TokenQuotedString {
path = strings.Trim(path, "\"'")
}
cmd.Params["path"] = path
p.nextToken()
} else {
cmd.Params["path"] = "."
}
// Optional semicolon
if p.curToken.Type == TokenSemicolon {
p.nextToken()
}
return cmd, nil
}

View File

@ -17,16 +17,46 @@
package cli
import (
"encoding/json"
"fmt"
"strconv"
"strings"
"unicode"
)
// PrintTableSimple prints data in a simple table format
const maxColWidth = 256
// PrintTableSimple prints data in a simple table format (default: table format with borders)
// Similar to Python's _print_table_simple
func PrintTableSimple(data []map[string]interface{}) {
PrintTableSimpleByFormat(data, OutputFormatTable)
}
// PrintTableSimpleByFormat prints data in the specified format
// Supports: table (with borders), plain (no borders, space-separated), json
// - Column names in lowercase
// - Two spaces between columns
// - Numeric columns right-aligned
// - URI/path columns not truncated
func PrintTableSimpleByFormat(data []map[string]interface{}, format OutputFormat) {
if len(data) == 0 {
fmt.Println("No data to print")
if format == OutputFormatJSON {
fmt.Println("[]")
} else if format == OutputFormatPlain {
fmt.Println("(empty)")
} else {
fmt.Println("No data to print")
}
return
}
// JSON format: output as JSON array
if format == OutputFormatJSON {
jsonData, err := json.MarshalIndent(data, "", " ")
if err != nil {
fmt.Printf("Error marshaling JSON: %v\n", err)
return
}
fmt.Println(string(jsonData))
return
}
@ -52,60 +82,190 @@ func PrintTableSimple(data []map[string]interface{}) {
}
}
// Calculate column widths
// Analyze columns: check if numeric and if URI column
colIsNumeric := make(map[string]bool)
colIsURI := make(map[string]bool)
for _, col := range columns {
colLower := strings.ToLower(col)
if colLower == "uri" || colLower == "path" || colLower == "id" {
colIsURI[col] = true
}
// Check if all values are numeric
isNumeric := true
for _, item := range data {
if val, ok := item[col]; ok {
if !isNumericValue(val) {
isNumeric = false
break
}
}
}
colIsNumeric[col] = isNumeric
}
// Calculate column widths (capped at maxColWidth)
colWidths := make(map[string]int)
for _, col := range columns {
maxWidth := getStringWidth(col)
maxWidth := getStringWidth(strings.ToLower(col))
for _, item := range data {
value := fmt.Sprintf("%v", item[col])
value := formatValue(item[col])
valueWidth := getStringWidth(value)
if valueWidth > maxWidth {
maxWidth = valueWidth
}
}
if maxWidth > maxColWidth {
maxWidth = maxColWidth
}
if maxWidth < 2 {
maxWidth = 2
}
colWidths[col] = maxWidth
}
// Generate separator
separatorParts := make([]string, 0, len(columns))
for _, col := range columns {
separatorParts = append(separatorParts, strings.Repeat("-", colWidths[col]+2))
}
separator := "+" + strings.Join(separatorParts, "+") + "+"
// Print header
fmt.Println(separator)
headerParts := make([]string, 0, len(columns))
for _, col := range columns {
headerParts = append(headerParts, fmt.Sprintf(" %-*s ", colWidths[col], col))
}
fmt.Println("|" + strings.Join(headerParts, "|") + "|")
fmt.Println(separator)
// Print data rows
for _, item := range data {
rowParts := make([]string, 0, len(columns))
if format == OutputFormatPlain {
// Plain mode: no borders, space-separated (ov CLI compatible)
// Print header (lowercase column names, right-aligned for numeric columns)
headerParts := make([]string, 0, len(columns))
for _, col := range columns {
value := fmt.Sprintf("%v", item[col])
valueWidth := getStringWidth(value)
// Truncate if too long
if valueWidth > colWidths[col] {
runes := []rune(value)
truncated := truncateString(runes, colWidths[col])
value = truncated
valueWidth = getStringWidth(value)
}
// Pad to column width
padding := colWidths[col] - valueWidth + len(value)
rowParts = append(rowParts, fmt.Sprintf(" %-*s ", padding, value))
// Header follows the same alignment as data (right-aligned for numeric columns)
headerParts = append(headerParts, padCell(strings.ToLower(col), colWidths[col], colIsNumeric[col]))
}
fmt.Println("|" + strings.Join(rowParts, "|") + "|")
}
fmt.Println(strings.Join(headerParts, " "))
fmt.Println(separator)
// Print data rows
for _, item := range data {
rowParts := make([]string, 0, len(columns))
for _, col := range columns {
value := formatValue(item[col])
isURI := colIsURI[col]
isNumeric := colIsNumeric[col]
// URI columns: never truncate, no padding if too long
if isURI && getStringWidth(value) > colWidths[col] {
rowParts = append(rowParts, value)
} else {
// Normal cell: truncate if too long, then pad
valueWidth := getStringWidth(value)
if valueWidth > colWidths[col] {
runes := []rune(value)
value = truncateStringByWidth(runes, colWidths[col])
valueWidth = getStringWidth(value)
}
rowParts = append(rowParts, padCell(value, colWidths[col], isNumeric))
}
}
fmt.Println(strings.Join(rowParts, " "))
}
} else {
// Normal mode: with borders
// Generate separator
separatorParts := make([]string, 0, len(columns))
for _, col := range columns {
separatorParts = append(separatorParts, strings.Repeat("-", colWidths[col]+2))
}
separator := "+" + strings.Join(separatorParts, "+") + "+"
// Print header
fmt.Println(separator)
headerParts := make([]string, 0, len(columns))
for _, col := range columns {
headerParts = append(headerParts, fmt.Sprintf(" %-*s ", colWidths[col], col))
}
fmt.Println("|" + strings.Join(headerParts, "|") + "|")
fmt.Println(separator)
// Print data rows
for _, item := range data {
rowParts := make([]string, 0, len(columns))
for _, col := range columns {
value := formatValue(item[col])
valueWidth := getStringWidth(value)
// Truncate if too long
if valueWidth > colWidths[col] {
runes := []rune(value)
value = truncateStringByWidth(runes, colWidths[col])
valueWidth = getStringWidth(value)
}
// Pad to column width
padding := colWidths[col] - valueWidth + len(value)
rowParts = append(rowParts, fmt.Sprintf(" %-*s ", padding, value))
}
fmt.Println("|" + strings.Join(rowParts, "|") + "|")
}
fmt.Println(separator)
}
}
// formatValue formats a value for display
func formatValue(v interface{}) string {
if v == nil {
return ""
}
switch val := v.(type) {
case string:
return val
case int:
return strconv.Itoa(val)
case int64:
return strconv.FormatInt(val, 10)
case float64:
return strconv.FormatFloat(val, 'f', -1, 64)
case bool:
return strconv.FormatBool(val)
default:
return fmt.Sprintf("%v", v)
}
}
// isNumericValue checks if a value is numeric
func isNumericValue(v interface{}) bool {
if v == nil {
return false
}
switch val := v.(type) {
case int, int8, int16, int32, int64:
return true
case uint, uint8, uint16, uint32, uint64:
return true
case float32, float64:
return true
case string:
_, err := strconv.ParseFloat(val, 64)
return err == nil
default:
return false
}
}
// truncateStringByWidth truncates a string to fit within maxWidth display width
func truncateStringByWidth(runes []rune, maxWidth int) string {
width := 0
for i, r := range runes {
if isHalfWidth(r) {
width++
} else {
width += 2
}
if width > maxWidth-3 {
return string(runes[:i]) + "..."
}
}
return string(runes)
}
// padCell pads a string to the specified width for alignment
func padCell(content string, width int, alignRight bool) string {
contentWidth := getStringWidth(content)
if contentWidth >= width {
return content
}
padding := width - contentWidth
if alignRight {
return strings.Repeat(" ", padding) + content
}
return content + strings.Repeat(" ", padding)
}
// getStringWidth calculates the display width of a string
@ -134,34 +294,4 @@ func isHalfWidth(r rune) bool {
return false
}
// truncateString truncates a string to fit within maxWidth display width
func truncateString(runes []rune, maxWidth int) string {
width := 0
for i, r := range runes {
if isHalfWidth(r) {
width++
} else {
width += 2
}
if width > maxWidth-3 {
return string(runes[:i]) + "..."
}
}
return string(runes)
}
// getMax returns the maximum of two integers
func getMax(a, b int) int {
if a > b {
return a
}
return b
}
// isWideChar checks if a character is wide (CJK, etc.)
func isWideChar(r rune) bool {
return unicode.Is(unicode.Han, r) ||
unicode.Is(unicode.Hiragana, r) ||
unicode.Is(unicode.Katakana, r) ||
unicode.Is(unicode.Hangul, r)
}

View File

@ -143,13 +143,19 @@ func (c *RAGFlowClient) ListUserDatasets(cmd *Command) (ResponseIf, error) {
iterations = val
}
// Determine auth kind based on whether API token is being used
authKind := "web"
if c.HTTPClient.useAPIToken {
authKind = "api"
}
if iterations > 1 {
// Benchmark mode - return raw result for benchmark stats
return c.HTTPClient.RequestWithIterations("GET", "/datasets", true, "web", nil, nil, iterations)
return c.HTTPClient.RequestWithIterations("GET", "/datasets", true, authKind, nil, nil, iterations)
}
// Normal mode
resp, err := c.HTTPClient.Request("GET", "/datasets", true, "web", nil, nil)
resp, err := c.HTTPClient.Request("GET", "/datasets", true, authKind, nil, nil)
if err != nil {
return nil, fmt.Errorf("failed to list datasets: %w", err)
}

View File

@ -1,6 +1,9 @@
package cli
import "fmt"
import (
"fmt"
"strconv"
)
// Command parsers
func (p *Parser) parseLogout() (*Command, error) {