mirror of
https://github.com/langgenius/dify.git
synced 2026-05-05 18:08:07 +08:00
merge main
This commit is contained in:
@ -53,7 +53,7 @@ import { ModelTypeEnum } from '@/app/components/header/account-setting/model-pro
|
||||
import { Globe01 } from '@/app/components/base/icons/src/vender/line/mapsAndTravel'
|
||||
|
||||
type ValueOf<T> = T[keyof T]
|
||||
interface StepTwoProps {
|
||||
type StepTwoProps = {
|
||||
isSetting?: boolean
|
||||
documentDetail?: FullDocumentDetail
|
||||
isAPIKeySet: boolean
|
||||
@ -122,7 +122,8 @@ const StepTwo = ({
|
||||
const setSegmentIdentifier = useCallback((value: string) => {
|
||||
doSetSegmentIdentifier(value ? escape(value) : DEFAULT_SEGMENT_IDENTIFIER)
|
||||
}, [])
|
||||
const [max, setMax] = useState(4000) // default chunk length
|
||||
const [maxChunkLength, setMaxChunkLength] = useState(4000) // default chunk length
|
||||
const [limitMaxChunkLength, setLimitMaxChunkLength] = useState(4000)
|
||||
const [overlap, setOverlap] = useState(50)
|
||||
const [rules, setRules] = useState<PreProcessingRule[]>([])
|
||||
const [defaultConfig, setDefaultConfig] = useState<Rules>()
|
||||
@ -196,13 +197,14 @@ const StepTwo = ({
|
||||
const resetRules = () => {
|
||||
if (defaultConfig) {
|
||||
setSegmentIdentifier(defaultConfig.segmentation.separator)
|
||||
setMax(defaultConfig.segmentation.max_tokens)
|
||||
setMaxChunkLength(defaultConfig.segmentation.max_tokens)
|
||||
setOverlap(defaultConfig.segmentation.chunk_overlap)
|
||||
setRules(defaultConfig.pre_processing_rules)
|
||||
}
|
||||
}
|
||||
|
||||
const fetchFileIndexingEstimate = async (docForm = DocForm.TEXT, language?: string) => {
|
||||
// eslint-disable-next-line ts/no-use-before-define
|
||||
const res = await didFetchFileIndexingEstimate(getFileIndexingEstimateParams(docForm, language)!)
|
||||
if (segmentationType === SegmentType.CUSTOM)
|
||||
setCustomFileIndexingEstimate(res)
|
||||
@ -211,8 +213,8 @@ const StepTwo = ({
|
||||
}
|
||||
|
||||
const confirmChangeCustomConfig = () => {
|
||||
if (segmentationType === SegmentType.CUSTOM && max > 4000) {
|
||||
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') })
|
||||
if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) {
|
||||
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) })
|
||||
return
|
||||
}
|
||||
setCustomFileIndexingEstimate(null)
|
||||
@ -233,7 +235,7 @@ const StepTwo = ({
|
||||
pre_processing_rules: rules,
|
||||
segmentation: {
|
||||
separator: unescape(segmentIdentifier),
|
||||
max_tokens: max,
|
||||
max_tokens: maxChunkLength,
|
||||
chunk_overlap: overlap,
|
||||
},
|
||||
}
|
||||
@ -338,12 +340,12 @@ const StepTwo = ({
|
||||
)
|
||||
const getCreationParams = () => {
|
||||
let params
|
||||
if (segmentationType === SegmentType.CUSTOM && overlap > max) {
|
||||
if (segmentationType === SegmentType.CUSTOM && overlap > maxChunkLength) {
|
||||
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.overlapCheck') })
|
||||
return
|
||||
}
|
||||
if (segmentationType === SegmentType.CUSTOM && max > 4000) {
|
||||
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck') })
|
||||
if (segmentationType === SegmentType.CUSTOM && maxChunkLength > limitMaxChunkLength) {
|
||||
Toast.notify({ type: 'error', message: t('datasetCreation.stepTwo.maxLengthCheck', { limit: limitMaxChunkLength }) })
|
||||
return
|
||||
}
|
||||
if (isSetting) {
|
||||
@ -352,7 +354,7 @@ const StepTwo = ({
|
||||
doc_form: docForm,
|
||||
doc_language: docLanguage,
|
||||
process_rule: getProcessRule(),
|
||||
|
||||
// eslint-disable-next-line ts/no-use-before-define
|
||||
retrieval_model: retrievalConfig, // Readonly. If want to changed, just go to settings page.
|
||||
embedding_model: embeddingModel.model, // Readonly
|
||||
embedding_model_provider: embeddingModel.provider, // Readonly
|
||||
@ -365,7 +367,7 @@ const StepTwo = ({
|
||||
rerankDefaultModel,
|
||||
isRerankDefaultModelValid: !!isRerankDefaultModelValid,
|
||||
rerankModelList,
|
||||
|
||||
// eslint-disable-next-line ts/no-use-before-define
|
||||
retrievalConfig,
|
||||
indexMethod: indexMethod as string,
|
||||
})
|
||||
@ -375,7 +377,7 @@ const StepTwo = ({
|
||||
}
|
||||
const postRetrievalConfig = ensureRerankModelSelected({
|
||||
rerankDefaultModel: rerankDefaultModel!,
|
||||
|
||||
// eslint-disable-next-line ts/no-use-before-define
|
||||
retrievalConfig,
|
||||
indexMethod: indexMethod as string,
|
||||
})
|
||||
@ -414,7 +416,8 @@ const StepTwo = ({
|
||||
const res = await fetchDefaultProcessRule({ url: '/datasets/process-rule' })
|
||||
const separator = res.rules.segmentation.separator
|
||||
setSegmentIdentifier(separator)
|
||||
setMax(res.rules.segmentation.max_tokens)
|
||||
setMaxChunkLength(res.rules.segmentation.max_tokens)
|
||||
setLimitMaxChunkLength(res.limits.indexing_max_segmentation_tokens_length)
|
||||
setOverlap(res.rules.segmentation.chunk_overlap)
|
||||
setRules(res.rules.pre_processing_rules)
|
||||
setDefaultConfig(res.rules)
|
||||
@ -431,7 +434,7 @@ const StepTwo = ({
|
||||
const max = rules.segmentation.max_tokens
|
||||
const overlap = rules.segmentation.chunk_overlap
|
||||
setSegmentIdentifier(separator)
|
||||
setMax(max)
|
||||
setMaxChunkLength(max)
|
||||
setOverlap(overlap)
|
||||
setRules(rules.pre_processing_rules)
|
||||
setDefaultConfig(rules)
|
||||
@ -669,10 +672,10 @@ const StepTwo = ({
|
||||
type="number"
|
||||
className='h-9'
|
||||
placeholder={t('datasetCreation.stepTwo.maxLength') || ''}
|
||||
value={max}
|
||||
max={4000}
|
||||
value={maxChunkLength}
|
||||
max={limitMaxChunkLength}
|
||||
min={1}
|
||||
onChange={e => setMax(Number.parseInt(e.target.value.replace(/^0+/, ''), 10))}
|
||||
onChange={e => setMaxChunkLength(Number.parseInt(e.target.value.replace(/^0+/, ''), 10))}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
Reference in New Issue
Block a user