mirror of
https://github.com/infiniflow/ragflow.git
synced 2026-03-27 17:29:39 +08:00
### What problem does this PR solve? Feat: CREATE / DELETE / LIST dataset api in Go ### Type of change - [x] New Feature (non-breaking change which adds functionality) --------- Co-authored-by: Lynn <lynn_inf@hotmail.com> Co-authored-by: Yingfeng <yingfeng.zhang@gmail.com>
122 lines
3.8 KiB
Go
122 lines
3.8 KiB
Go
package common
|
|
|
|
// deepCopyMap duplicates a JSON-like map so later merges do not mutate shared defaults.
|
|
func deepCopyMap(source map[string]interface{}) map[string]interface{} {
|
|
if source == nil {
|
|
return nil
|
|
}
|
|
|
|
cloned := make(map[string]interface{}, len(source))
|
|
for key, value := range source {
|
|
cloned[key] = deepCopyValue(value)
|
|
}
|
|
return cloned
|
|
}
|
|
|
|
// deepCopyValue recursively copies nested maps and slices inside parser_config values.
|
|
func deepCopyValue(value interface{}) interface{} {
|
|
switch typedValue := value.(type) {
|
|
case map[string]interface{}:
|
|
return deepCopyMap(typedValue)
|
|
case []interface{}:
|
|
cloned := make([]interface{}, len(typedValue))
|
|
for idx, item := range typedValue {
|
|
cloned[idx] = deepCopyValue(item)
|
|
}
|
|
return cloned
|
|
default:
|
|
return typedValue
|
|
}
|
|
}
|
|
|
|
// DeepMergeMaps applies override onto base while preserving nested defaults such as raptor/graphrag.
|
|
func DeepMergeMaps(base, override map[string]interface{}) map[string]interface{} {
|
|
merged := deepCopyMap(base)
|
|
if merged == nil {
|
|
merged = make(map[string]interface{})
|
|
}
|
|
if override == nil {
|
|
return merged
|
|
}
|
|
|
|
for key, value := range override {
|
|
overrideMap, overrideIsMap := value.(map[string]interface{})
|
|
existingMap, existingIsMap := merged[key].(map[string]interface{})
|
|
if overrideIsMap && existingIsMap {
|
|
merged[key] = DeepMergeMaps(existingMap, overrideMap)
|
|
continue
|
|
}
|
|
merged[key] = deepCopyValue(value)
|
|
}
|
|
return merged
|
|
}
|
|
|
|
// GetParserConfig builds the final parser_config stored on a dataset:
|
|
// base defaults -> chunk-method defaults -> caller overrides.
|
|
func GetParserConfig(chunkMethod string, parserConfig map[string]interface{}) map[string]interface{} {
|
|
baseDefaults := map[string]interface{}{
|
|
"table_context_size": 0,
|
|
"image_context_size": 0,
|
|
}
|
|
|
|
defaultConfigs := map[string]map[string]interface{}{
|
|
"naive": {
|
|
"layout_recognize": "DeepDOC",
|
|
"chunk_token_num": 512,
|
|
"delimiter": "\n",
|
|
"auto_keywords": 0,
|
|
"auto_questions": 0,
|
|
"html4excel": false,
|
|
"topn_tags": 3,
|
|
"raptor": map[string]interface{}{
|
|
"use_raptor": true,
|
|
"prompt": "Please summarize the following paragraphs. Be careful with the numbers, do not make things up. Paragraphs as following:\n {cluster_content}\nThe above is the content you need to summarize.",
|
|
"max_token": 256,
|
|
"threshold": 0.1,
|
|
"max_cluster": 64,
|
|
"random_seed": 0,
|
|
},
|
|
"graphrag": map[string]interface{}{
|
|
"use_graphrag": true,
|
|
"entity_types": []interface{}{"organization", "person", "geo", "event", "category"},
|
|
"method": "light",
|
|
},
|
|
},
|
|
"qa": {
|
|
"raptor": map[string]interface{}{"use_raptor": false},
|
|
"graphrag": map[string]interface{}{"use_graphrag": false},
|
|
},
|
|
"resume": nil,
|
|
"manual": {
|
|
"raptor": map[string]interface{}{"use_raptor": false},
|
|
"graphrag": map[string]interface{}{"use_graphrag": false},
|
|
},
|
|
"paper": {
|
|
"raptor": map[string]interface{}{"use_raptor": false},
|
|
"graphrag": map[string]interface{}{"use_graphrag": false},
|
|
},
|
|
"book": {
|
|
"raptor": map[string]interface{}{"use_raptor": false},
|
|
"graphrag": map[string]interface{}{"use_graphrag": false},
|
|
},
|
|
"laws": {
|
|
"raptor": map[string]interface{}{"use_raptor": false},
|
|
"graphrag": map[string]interface{}{"use_graphrag": false},
|
|
},
|
|
"presentation": {
|
|
"raptor": map[string]interface{}{"use_raptor": false},
|
|
"graphrag": map[string]interface{}{"use_graphrag": false},
|
|
},
|
|
"knowledge_graph": {
|
|
"chunk_token_num": 8192,
|
|
"delimiter": "\\n",
|
|
"entity_types": []interface{}{"organization", "person", "location", "event", "time"},
|
|
"raptor": map[string]interface{}{"use_raptor": false},
|
|
"graphrag": map[string]interface{}{"use_graphrag": false},
|
|
},
|
|
}
|
|
|
|
merged := DeepMergeMaps(baseDefaults, defaultConfigs[chunkMethod])
|
|
return DeepMergeMaps(merged, parserConfig)
|
|
}
|