Merge branch 'main' into feat/rag-pipeline

This commit is contained in:
twwu
2025-06-23 13:59:05 +08:00
106 changed files with 5241 additions and 2860 deletions

View File

@ -162,7 +162,9 @@ const StepTwo = ({
const isInCreatePage = !datasetId || (datasetId && !currentDataset?.data_source_type)
const dataSourceType = isInCreatePage ? inCreatePageDataSourceType : currentDataset?.data_source_type
const [segmentationType, setSegmentationType] = useState<ProcessMode>(ProcessMode.general)
const [segmentationType, setSegmentationType] = useState<ProcessMode>(
currentDataset?.doc_form === ChunkingMode.parentChild ? ProcessMode.parentChild : ProcessMode.general,
)
const [segmentIdentifier, doSetSegmentIdentifier] = useState(DEFAULT_SEGMENT_IDENTIFIER)
const setSegmentIdentifier = useCallback((value: string, canEmpty?: boolean) => {
doSetSegmentIdentifier(value ? escape(value) : (canEmpty ? '' : DEFAULT_SEGMENT_IDENTIFIER))
@ -208,7 +210,14 @@ const StepTwo = ({
}
if (value === ChunkingMode.parentChild && indexType === IndexingType.ECONOMICAL)
setIndexType(IndexingType.QUALIFIED)
setDocForm(value)
if (value === ChunkingMode.parentChild)
setSegmentationType(ProcessMode.parentChild)
else
setSegmentationType(ProcessMode.general)
// eslint-disable-next-line ts/no-use-before-define
currentEstimateMutation.reset()
}
@ -504,6 +513,20 @@ const StepTwo = ({
setOverlap(overlap!)
setRules(rules.pre_processing_rules)
setDefaultConfig(rules)
if (documentDetail.dataset_process_rule.mode === 'hierarchical') {
setParentChildConfig({
chunkForContext: rules.parent_mode || 'paragraph',
parent: {
delimiter: escape(rules.segmentation.separator),
maxLength: rules.segmentation.max_tokens,
},
child: {
delimiter: escape(rules.subchunk_segmentation.separator),
maxLength: rules.subchunk_segmentation.max_tokens,
},
})
}
}
}
@ -966,8 +989,8 @@ const StepTwo = ({
<div className='system-md-semibold mb-0.5 text-text-secondary'>{t('datasetSettings.form.retrievalSetting.title')}</div>
<div className='body-xs-regular text-text-tertiary'>
<a target='_blank' rel='noopener noreferrer'
href={docLink('/guides/knowledge-base/create-knowledge-and-upload-documents')}
className='text-text-accent'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
href={docLink('/guides/knowledge-base/create-knowledge-and-upload-documents')}
className='text-text-accent'>{t('datasetSettings.form.retrievalSetting.learnMore')}</a>
{t('datasetSettings.form.retrievalSetting.longDescription')}
</div>
</div>
@ -1131,7 +1154,7 @@ const StepTwo = ({
const indexForLabel = index + 1
return (
<PreviewSlice
key={child}
key={`C-${indexForLabel}-${child}`}
label={`C-${indexForLabel}`}
text={child}
tooltip={`Child-chunk-${indexForLabel} · ${child.length} Characters`}

View File

@ -1124,6 +1124,63 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/status/{action}'
method='PATCH'
title='Update Document Status'
name='#batch_document_status'
/>
<Row>
<Col>
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
Knowledge ID
</Property>
<Property name='action' type='string' key='action'>
- `enable` - Enable document
- `disable` - Disable document
- `archive` - Archive document
- `un_archive` - Unarchive document
</Property>
</Properties>
### Request Body
<Properties>
<Property name='document_ids' type='array[string]' key='document_ids'>
List of document IDs
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="PATCH"
label="/datasets/{dataset_id}/documents/status/{action}"
targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{\n "document_ids": ["doc-id-1", "doc-id-2"]\n}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
"document_ids": ["doc-id-1", "doc-id-2"]
}'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"result": "success"
}
```
</CodeGroup>
</Col>
</Row>
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments'
method='POST'

View File

@ -881,6 +881,63 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/status/{action}'
method='PATCH'
title='ドキュメントステータスの更新'
name='#batch_document_status'
/>
<Row>
<Col>
### パス
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
ナレッジ ID
</Property>
<Property name='action' type='string' key='action'>
- `enable` - ドキュメントを有効化
- `disable` - ドキュメントを無効化
- `archive` - ドキュメントをアーカイブ
- `un_archive` - ドキュメントのアーカイブを解除
</Property>
</Properties>
### リクエストボディ
<Properties>
<Property name='document_ids' type='array[string]' key='document_ids'>
ドキュメントIDのリスト
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="リクエスト"
tag="PATCH"
label="/datasets/{dataset_id}/documents/status/{action}"
targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{\n "document_ids": ["doc-id-1", "doc-id-2"]\n}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
"document_ids": ["doc-id-1", "doc-id-2"]
}'
```
</CodeGroup>
<CodeGroup title="レスポンス">
```json {{ title: 'Response' }}
{
"result": "success"
}
```
</CodeGroup>
</Col>
</Row>
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments'
method='POST'
@ -2413,3 +2470,4 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
</tbody>
</table>
<div className="pb-4" />

View File

@ -1131,6 +1131,63 @@ import { Row, Col, Properties, Property, Heading, SubProperty, PropertyInstructi
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/status/{action}'
method='PATCH'
title='更新文档状态'
name='#batch_document_status'
/>
<Row>
<Col>
### Path
<Properties>
<Property name='dataset_id' type='string' key='dataset_id'>
知识库 ID
</Property>
<Property name='action' type='string' key='action'>
- `enable` - 启用文档
- `disable` - 禁用文档
- `archive` - 归档文档
- `un_archive` - 取消归档文档
</Property>
</Properties>
### Request Body
<Properties>
<Property name='document_ids' type='array[string]' key='document_ids'>
文档ID列表
</Property>
</Properties>
</Col>
<Col sticky>
<CodeGroup
title="Request"
tag="PATCH"
label="/datasets/{dataset_id}/documents/status/{action}"
targetCode={`curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \\\n--header 'Authorization: Bearer {api_key}' \\\n--header 'Content-Type: application/json' \\\n--data-raw '{\n "document_ids": ["doc-id-1", "doc-id-2"]\n}'`}
>
```bash {{ title: 'cURL' }}
curl --location --request PATCH '${props.apiBaseUrl}/datasets/{dataset_id}/documents/status/{action}' \
--header 'Authorization: Bearer {api_key}' \
--header 'Content-Type: application/json' \
--data-raw '{
"document_ids": ["doc-id-1", "doc-id-2"]
}'
```
</CodeGroup>
<CodeGroup title="Response">
```json {{ title: 'Response' }}
{
"result": "success"
}
```
</CodeGroup>
</Col>
</Row>
<hr className='ml-0 mr-0' />
<Heading
url='/datasets/{dataset_id}/documents/{document_id}/segments'
method='POST'