Files
ragflow/internal/common/parser_config.go
Magicbook1108 cfe6ea6f56 Feat: CREATE / DELETE / LIST dataset api in Go (#13695)
### What problem does this PR solve?

Feat: CREATE / DELETE / LIST dataset api in Go

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

---------

Co-authored-by: Lynn <lynn_inf@hotmail.com>
Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
2026-03-19 20:48:32 +08:00

122 lines
3.8 KiB
Go

package common
// deepCopyMap duplicates a JSON-like map so later merges do not mutate shared defaults.
func deepCopyMap(source map[string]interface{}) map[string]interface{} {
if source == nil {
return nil
}
cloned := make(map[string]interface{}, len(source))
for key, value := range source {
cloned[key] = deepCopyValue(value)
}
return cloned
}
// deepCopyValue recursively copies nested maps and slices inside parser_config values.
func deepCopyValue(value interface{}) interface{} {
switch typedValue := value.(type) {
case map[string]interface{}:
return deepCopyMap(typedValue)
case []interface{}:
cloned := make([]interface{}, len(typedValue))
for idx, item := range typedValue {
cloned[idx] = deepCopyValue(item)
}
return cloned
default:
return typedValue
}
}
// DeepMergeMaps applies override onto base while preserving nested defaults such as raptor/graphrag.
func DeepMergeMaps(base, override map[string]interface{}) map[string]interface{} {
merged := deepCopyMap(base)
if merged == nil {
merged = make(map[string]interface{})
}
if override == nil {
return merged
}
for key, value := range override {
overrideMap, overrideIsMap := value.(map[string]interface{})
existingMap, existingIsMap := merged[key].(map[string]interface{})
if overrideIsMap && existingIsMap {
merged[key] = DeepMergeMaps(existingMap, overrideMap)
continue
}
merged[key] = deepCopyValue(value)
}
return merged
}
// GetParserConfig builds the final parser_config stored on a dataset:
// base defaults -> chunk-method defaults -> caller overrides.
func GetParserConfig(chunkMethod string, parserConfig map[string]interface{}) map[string]interface{} {
baseDefaults := map[string]interface{}{
"table_context_size": 0,
"image_context_size": 0,
}
defaultConfigs := map[string]map[string]interface{}{
"naive": {
"layout_recognize": "DeepDOC",
"chunk_token_num": 512,
"delimiter": "\n",
"auto_keywords": 0,
"auto_questions": 0,
"html4excel": false,
"topn_tags": 3,
"raptor": map[string]interface{}{
"use_raptor": true,
"prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
"max_token": 256,
"threshold": 0.1,
"max_cluster": 64,
"random_seed": 0,
},
"graphrag": map[string]interface{}{
"use_graphrag": true,
"entity_types": []interface{}{"organization", "person", "geo", "event", "category"},
"method": "light",
},
},
"qa": {
"raptor": map[string]interface{}{"use_raptor": false},
"graphrag": map[string]interface{}{"use_graphrag": false},
},
"resume": nil,
"manual": {
"raptor": map[string]interface{}{"use_raptor": false},
"graphrag": map[string]interface{}{"use_graphrag": false},
},
"paper": {
"raptor": map[string]interface{}{"use_raptor": false},
"graphrag": map[string]interface{}{"use_graphrag": false},
},
"book": {
"raptor": map[string]interface{}{"use_raptor": false},
"graphrag": map[string]interface{}{"use_graphrag": false},
},
"laws": {
"raptor": map[string]interface{}{"use_raptor": false},
"graphrag": map[string]interface{}{"use_graphrag": false},
},
"presentation": {
"raptor": map[string]interface{}{"use_raptor": false},
"graphrag": map[string]interface{}{"use_graphrag": false},
},
"knowledge_graph": {
"chunk_token_num": 8192,
"delimiter": "\\n",
"entity_types": []interface{}{"organization", "person", "location", "event", "time"},
"raptor": map[string]interface{}{"use_raptor": false},
"graphrag": map[string]interface{}{"use_graphrag": false},
},
}
merged := DeepMergeMaps(baseDefaults, defaultConfigs[chunkMethod])
return DeepMergeMaps(merged, parserConfig)
}