refactor: add create-from-pipeline page and associated components for document processing

This commit is contained in:
twwu
2025-05-21 16:37:02 +08:00
parent 9aef4b6d6b
commit b18519b824
20 changed files with 414 additions and 157 deletions

View File

@ -7,8 +7,7 @@ import Effect from '../../base/effect'
const CreateFromPipeline = () => {
return (
<div
className='relative flex flex-col rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle'
style={{ height: 'calc(100vh - 56px)' }}
className='relative flex h-[calc(100vh-56px)] flex-col rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle'
>
<Effect className='left-8 top-[-34px] opacity-20' />
<Header />

View File

@ -0,0 +1,21 @@
import { useTranslation } from 'react-i18next'
import { AddDocumentsStep } from './types'
export const useAddDocumentsSteps = () => {
const { t } = useTranslation()
const steps = [
{
label: t('datasetPipeline.addDocuments.steps.chooseDatasource'),
value: AddDocumentsStep.dataSource,
},
{
label: t('datasetPipeline.addDocuments.steps.ProcessDocuments'),
value: AddDocumentsStep.processDocuments,
},
{
label: t('datasetPipeline.addDocuments.steps.ProcessingDocuments'),
value: AddDocumentsStep.processingDocuments,
},
]
return steps
}

View File

@ -0,0 +1,217 @@
'use client'
import { useCallback, useMemo, useState } from 'react'
// import StepIndicator from './step-indicator'
// import { useTestRunSteps } from './hooks'
// import DataSourceOptions from './data-source-options'
import type { CrawlResultItem, FileItem } from '@/models/datasets'
import { DataSourceType } from '@/models/datasets'
// import LocalFile from './data-source/local-file'
import produce from 'immer'
import { useProviderContextSelector } from '@/context/provider-context'
import { DataSourceProvider, type NotionPage } from '@/models/common'
// import Notion from './data-source/notion'
import VectorSpaceFull from '@/app/components/billing/vector-space-full'
// import Firecrawl from './data-source/website/firecrawl'
// import JinaReader from './data-source/website/jina-reader'
// import WaterCrawl from './data-source/website/water-crawl'
// import Actions from './data-source/actions'
// import DocumentProcessing from './document-processing'
import { useTranslation } from 'react-i18next'
import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
import LocalFile from '@/app/components/rag-pipeline/components/panel/test-run/data-source/local-file'
import Notion from '@/app/components/rag-pipeline/components/panel/test-run/data-source/notion'
import FireCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/firecrawl'
import JinaReader from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/jina-reader'
import WaterCrawl from '@/app/components/rag-pipeline/components/panel/test-run/data-source/website/water-crawl'
import Actions from '@/app/components/rag-pipeline/components/panel/test-run/data-source/actions'
import DocumentProcessing from '@/app/components/rag-pipeline/components/panel/test-run/document-processing'
import LeftHeader from './left-header'
// import { usePipelineRun } from '../../../hooks'
// import type { Datasource } from './types'
const TestRunPanel = () => {
const { t } = useTranslation()
const [currentStep, setCurrentStep] = useState(1)
const [datasource, setDatasource] = useState<Datasource>()
const [fileList, setFiles] = useState<FileItem[]>([])
const [notionPages, setNotionPages] = useState<NotionPage[]>([])
const [websitePages, setWebsitePages] = useState<CrawlResultItem[]>([])
const [websiteCrawlJobId, setWebsiteCrawlJobId] = useState('')
const plan = useProviderContextSelector(state => state.plan)
const enableBilling = useProviderContextSelector(state => state.enableBilling)
// const steps = useTestRunSteps()
const allFileLoaded = (fileList.length > 0 && fileList.every(file => file.file.id))
const isVectorSpaceFull = plan.usage.vectorSpace >= plan.total.vectorSpace
const isShowVectorSpaceFull = allFileLoaded && isVectorSpaceFull && enableBilling
const notSupportBatchUpload = enableBilling && plan.type === 'sandbox'
const nextDisabled = useMemo(() => {
if (!fileList.length)
return true
if (fileList.some(file => !file.file.id))
return true
return isShowVectorSpaceFull
}, [fileList, isShowVectorSpaceFull])
const nextBtnDisabled = useMemo(() => {
if (!datasource) return true
if (datasource.type === DataSourceType.FILE)
return nextDisabled
if (datasource.type === DataSourceType.NOTION)
return isShowVectorSpaceFull || !notionPages.length
if (datasource.type === DataSourceProvider.fireCrawl
|| datasource.type === DataSourceProvider.jinaReader
|| datasource.type === DataSourceProvider.waterCrawl)
return isShowVectorSpaceFull || !websitePages.length
return false
}, [datasource, nextDisabled, isShowVectorSpaceFull, notionPages.length, websitePages.length])
const updateFile = (fileItem: FileItem, progress: number, list: FileItem[]) => {
const newList = produce(list, (draft) => {
const targetIndex = draft.findIndex(file => file.fileID === fileItem.fileID)
draft[targetIndex] = {
...draft[targetIndex],
progress,
}
})
setFiles(newList)
}
const updateFileList = (preparedFiles: FileItem[]) => {
setFiles(preparedFiles)
}
const updateNotionPages = (value: NotionPage[]) => {
setNotionPages(value)
}
const handleNextStep = useCallback(() => {
setCurrentStep(preStep => preStep + 1)
}, [])
const handleBackStep = useCallback(() => {
setCurrentStep(preStep => preStep - 1)
}, [])
// const { handleRun } = usePipelineRun()
const handleProcess = useCallback((data: Record<string, any>) => {
if (!datasource)
return
const datasourceInfo: Record<string, any> = {}
let datasource_type = ''
if (datasource.type === DataSourceType.FILE) {
datasource_type = 'local_file'
datasourceInfo.fileId = fileList.map(file => file.fileID)
}
if (datasource.type === DataSourceType.NOTION) {
datasource_type = 'online_document'
datasourceInfo.workspaceId = notionPages[0].workspace_id
datasourceInfo.page = notionPages.map((page) => {
const { workspace_id, ...rest } = page
return rest
})
}
if (datasource.type === DataSourceProvider.fireCrawl
|| datasource.type === DataSourceProvider.jinaReader
|| datasource.type === DataSourceProvider.waterCrawl) {
datasource_type = 'website_crawl'
datasourceInfo.jobId = websiteCrawlJobId
datasourceInfo.result = websitePages
}
// handleRun({
// inputs: data,
// datasource_type,
// datasource_info: datasourceInfo,
// })
}, [datasource, fileList, notionPages, websiteCrawlJobId, websitePages])
return (
<div
className='relative flex h-[calc(100vh-56px)] min-w-[1512px] rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle'
>
<div className='flex flex-1 flex-col px-14'>
<LeftHeader
title={t('datasetPipeline.addDocuments.title')}
currentStep={currentStep}
/>
<div className='grow overflow-y-auto'>
{
currentStep === 1 && (
<>
<div className='flex flex-col gap-y-4 px-4 py-2'>
{/* <DataSourceOptions
datasourceNodeId={datasource?.nodeId || ''}
onSelect={setDatasource}
/> */}
{datasource?.type === DataSourceType.FILE && (
<LocalFile
files={fileList}
updateFile={updateFile}
updateFileList={updateFileList}
notSupportBatchUpload={notSupportBatchUpload}
/>
)}
{datasource?.type === DataSourceType.NOTION && (
<Notion
nodeId={datasource?.nodeId || ''}
notionPages={notionPages}
updateNotionPages={updateNotionPages}
/>
)}
{datasource?.type === DataSourceProvider.fireCrawl && (
<FireCrawl
nodeId={datasource?.nodeId || ''}
variables={datasource?.variables}
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages}
onJobIdChange={setWebsiteCrawlJobId}
/>
)}
{datasource?.type === DataSourceProvider.jinaReader && (
<JinaReader
nodeId={datasource?.nodeId || ''}
variables={datasource?.variables}
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages}
onJobIdChange={setWebsiteCrawlJobId}
/>
)}
{datasource?.type === DataSourceProvider.waterCrawl && (
<WaterCrawl
nodeId={datasource?.nodeId || ''}
variables={datasource?.variables}
checkedCrawlResult={websitePages}
onCheckedCrawlResultChange={setWebsitePages}
onJobIdChange={setWebsiteCrawlJobId}
/>
)}
{isShowVectorSpaceFull && (
<VectorSpaceFull />
)}
</div>
<Actions disabled={nextBtnDisabled} handleNextStep={handleNextStep} />
</>
)
}
{
currentStep === 2 && (
<DocumentProcessing
dataSourceNodeId={datasource?.nodeId || ''}
onProcess={handleProcess}
onBack={handleBackStep}
/>
)
}
</div>
</div>
{/* Preview */}
<div className='flex h-full flex-1 shrink-0 flex-col pl-2 pt-2'>
</div>
</div>
)
}
export default TestRunPanel

View File

@ -0,0 +1,46 @@
import React from 'react'
import { RiArrowLeftLine } from '@remixicon/react'
import Button from '@/app/components/base/button'
import { useParams } from 'next/navigation'
import Effect from '@/app/components/base/effect'
import { useAddDocumentsSteps } from './hooks'
import StepIndicator from './step-indicator'
type LeftHeaderProps = {
title: string
currentStep: number
}
const LeftHeader = ({
title,
currentStep,
}: LeftHeaderProps) => {
const { datasetId } = useParams()
const steps = useAddDocumentsSteps()
return (
<div className='relative flex flex-col gap-y-0.5 pb-2 pt-4'>
<div className='flex items-center gap-x-2'>
<span className='system-2xs-semibold-uppercase bg-pipeline-add-documents-title-bg bg-clip-text text-transparent'>
{title}
</span>
<span className='system-2xs-regular text-divider-regular'>/</span>
<StepIndicator steps={steps} currentStep={currentStep} />
</div>
<div className='system-md-semibold text-text-primary'>
{steps[currentStep - 1]?.label}
</div>
<a
className='absolute -left-11 top-3.5'
href={`/datasets/${datasetId}/documents`}
>
<Button variant='secondary-accent' className='size-9 rounded-full p-0'>
<RiArrowLeftLine className='size-5 ' />
</Button>
</a>
<Effect className='left-8 top-[-34px] opacity-20' />
</div>
)
}
export default React.memo(LeftHeader)

View File

@ -0,0 +1,33 @@
import cn from '@/utils/classnames'
import React from 'react'
type Step = {
label: string
value: string
}
type StepIndicatorProps = {
currentStep: number
steps: Step[]
}
const StepIndicator = ({
currentStep,
steps,
}: StepIndicatorProps) => {
return (
<div className='flex gap-x-1'>
{steps.map((step, index) => {
const isActive = index === currentStep - 1
return (
<div
key={step.value}
className={cn('h-1 w-1 rounded-lg bg-divider-solid', isActive && 'w-2 bg-state-accent-solid')}
/>
)
})}
</div>
)
}
export default React.memo(StepIndicator)

View File

@ -0,0 +1,5 @@
export enum AddDocumentsStep {
dataSource = 'dataSource',
processDocuments = 'processDocuments',
processingDocuments = 'processingDocuments',
}

View File

@ -1,7 +1,7 @@
import React, { type FC, useCallback, useMemo, useState } from 'react'
import { useTranslation } from 'react-i18next'
import { RiDeleteBinLine, RiEditLine } from '@remixicon/react'
import { StatusItem } from '../../../list'
import StatusItem from '../../../status-item'
import { useDocumentContext } from '../../index'
import ChildSegmentList from '../child-segment-list'
import Tag from '../common/tag'
@ -228,15 +228,15 @@ const SegmentCard: FC<ISegmentCardProps> = ({
}
{
isParagraphMode && child_chunks.length > 0
&& <ChildSegmentList
parentChunkId={id}
childChunks={child_chunks}
enabled={enabled}
onDelete={onDeleteChildChunk!}
handleAddNewChildChunk={handleAddNewChildChunk}
onClickSlice={onClickSlice}
focused={focused.segmentContent}
/>
&& <ChildSegmentList
parentChunkId={id}
childChunks={child_chunks}
enabled={enabled}
onDelete={onDeleteChildChunk!}
handleAddNewChildChunk={handleAddNewChildChunk}
onClickSlice={onClickSlice}
focused={focused.segmentContent}
/>
}
{showModal
&& <Confirm

View File

@ -5,7 +5,8 @@ import { createContext, useContext, useContextSelector } from 'use-context-selec
import { useTranslation } from 'react-i18next'
import { useRouter } from 'next/navigation'
import { RiArrowLeftLine, RiLayoutLeft2Line, RiLayoutRight2Line } from '@remixicon/react'
import { OperationAction, StatusItem } from '../list'
import Operations from '../operations'
import StatusItem from '../status-item'
import DocumentPicker from '../../common/document-picker'
import Completed from './completed'
import Embedding from './embedding'
@ -228,7 +229,7 @@ const DocumentDetail: FC<Props> = ({ datasetId, documentId }) => {
datasetId={datasetId}
onUpdate={handleOperate}
/>
<OperationAction
<Operations
scene='detail'
embeddingAvailable={embeddingAvailable}
detail={{

View File

@ -15,7 +15,7 @@ import Button from '@/app/components/base/button'
import Input from '@/app/components/base/input'
import { get } from '@/service/base'
import { createDocument } from '@/service/datasets'
import { useDatasetDetailContext } from '@/context/dataset-detail'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { NotionPageSelectorModal } from '@/app/components/base/notion-page-selector'
import type { NotionPage } from '@/models/common'
import type { CreateDocumentReq } from '@/models/datasets'
@ -93,7 +93,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
const [currPage, setCurrPage] = React.useState<number>(0)
const [limit, setLimit] = useState<number>(DEFAULT_LIMIT)
const router = useRouter()
const { dataset } = useDatasetDetailContext()
const dataset = useDatasetDetailContextWithSelector(s => s.dataset)
const [notionPageSelectorModalVisible, setNotionPageSelectorModalVisible] = useState(false)
const [timerCanRun, setTimerCanRun] = useState(true)
const isDataSourceNotion = dataset?.data_source_type === DataSourceType.NOTION
@ -172,6 +172,11 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
const total = documentsRes?.total || 0
const routeToDocCreate = () => {
// if dataset is create from pipeline, redirect to create from pipeline page
if (dataset?.pipeline_id) {
router.push(`/datasets/${datasetId}/documents/create-from-pipeline`)
return
}
if (isDataSourceNotion) {
setNotionPageSelectorModalVisible(true)
return
@ -267,7 +272,7 @@ const Documents: FC<IDocumentsProps> = ({ datasetId }) => {
? 'https://docs.dify.ai/zh-hans/guides/knowledge-base/integrate-knowledge-within-application'
: 'https://docs.dify.ai/en/guides/knowledge-base/integrate-knowledge-within-application'
}
>
>
<span>{t('datasetDocuments.list.learnMore')}</span>
<RiExternalLinkLine className='h-3 w-3' />
</a>