diff --git a/web/app/components/datasets/documents/detail/settings/__tests__/document-settings.spec.tsx b/web/app/components/datasets/documents/detail/settings/__tests__/document-settings.spec.tsx index 4ac30289e1..bf516d432b 100644 --- a/web/app/components/datasets/documents/detail/settings/__tests__/document-settings.spec.tsx +++ b/web/app/components/datasets/documents/detail/settings/__tests__/document-settings.spec.tsx @@ -224,6 +224,20 @@ describe('DocumentSettings', () => { // Data source types describe('Data Source Types', () => { + it('should handle upload_file_id data source format', () => { + mockDocumentDetail = { + name: 'test-document', + data_source_type: 'upload_file', + data_source_info: { + upload_file_id: '4a807f05-45d6-4fc4-b7a8-b009a4568b36', + }, + } + + render() + + expect(screen.getByTestId('files-count')).toHaveTextContent('1') + }) + it('should handle legacy upload_file data source', () => { mockDocumentDetail = { name: 'test-document', @@ -307,6 +321,18 @@ describe('DocumentSettings', () => { expect(screen.getByTestId('files-count')).toHaveTextContent('0') }) + it('should handle empty data_source_info object', () => { + mockDocumentDetail = { + name: 'test-document', + data_source_type: 'upload_file', + data_source_info: {}, + } + + render() + + expect(screen.getByTestId('files-count')).toHaveTextContent('0') + }) + it('should maintain structure when rerendered', () => { const { rerender } = render( , @@ -317,4 +343,37 @@ describe('DocumentSettings', () => { expect(screen.getByTestId('step-two')).toBeInTheDocument() }) }) + + describe('Files Extraction Regression Tests', () => { + it('should correctly extract file ID from upload_file_id format', () => { + const fileId = '4a807f05-45d6-4fc4-b7a8-b009a4568b36' + mockDocumentDetail = { + name: 'test-document.pdf', + data_source_type: 'upload_file', + data_source_info: { + upload_file_id: fileId, + }, + } + + render() + + // Verify files array is populated with correct file ID + expect(screen.getByTestId('files-count')).toHaveTextContent('1') + }) + + it('should preserve document name when using upload_file_id format', () => { + const documentName = 'my-uploaded-document.txt' + mockDocumentDetail = { + name: documentName, + data_source_type: 'upload_file', + data_source_info: { + upload_file_id: 'some-file-id', + }, + } + + render() + + expect(screen.getByTestId('files-count')).toHaveTextContent('1') + }) + }) }) diff --git a/web/app/components/datasets/documents/detail/settings/document-settings.tsx b/web/app/components/datasets/documents/detail/settings/document-settings.tsx index bcbc149231..2b6cc77683 100644 --- a/web/app/components/datasets/documents/detail/settings/document-settings.tsx +++ b/web/app/components/datasets/documents/detail/settings/document-settings.tsx @@ -8,6 +8,7 @@ import type { LegacyDataSourceInfo, LocalFileInfo, OnlineDocumentInfo, + UploadFileIdInfo, WebsiteCrawlInfo, } from '@/models/datasets' import { useBoolean } from 'ahooks' @@ -61,6 +62,7 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { const dataSourceInfo = documentDetail?.data_source_info + // Type guards for DataSourceInfo union const isLegacyDataSourceInfo = (info: DataSourceInfo | undefined): info is LegacyDataSourceInfo => { return !!info && 'upload_file' in info } @@ -73,10 +75,15 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { const isLocalFileInfo = (info: DataSourceInfo | undefined): info is LocalFileInfo => { return !!info && 'related_id' in info && 'transfer_method' in info } + const isUploadFileIdInfo = (info: DataSourceInfo | undefined): info is UploadFileIdInfo => { + return !!info && 'upload_file_id' in info + } + const legacyInfo = isLegacyDataSourceInfo(dataSourceInfo) ? dataSourceInfo : undefined const websiteInfo = isWebsiteCrawlInfo(dataSourceInfo) ? dataSourceInfo : undefined const onlineDocumentInfo = isOnlineDocumentInfo(dataSourceInfo) ? dataSourceInfo : undefined const localFileInfo = isLocalFileInfo(dataSourceInfo) ? dataSourceInfo : undefined + const uploadFileIdInfo = isUploadFileIdInfo(dataSourceInfo) ? dataSourceInfo : undefined const currentPage = useMemo(() => { if (legacyInfo) { @@ -101,8 +108,20 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { }, [documentDetail?.data_source_type, documentDetail?.name, legacyInfo, onlineDocumentInfo]) const files = useMemo(() => { - if (legacyInfo?.upload_file) - return [legacyInfo.upload_file as CustomFile] + // Handle upload_file_id format + if (uploadFileIdInfo) { + return [{ + id: uploadFileIdInfo.upload_file_id, + name: documentDetail?.name || '', + } as unknown as CustomFile] + } + + // Handle legacy upload_file format + if (legacyInfo?.upload_file) { + return [legacyInfo.upload_file as unknown as CustomFile] + } + + // Handle local file info format if (localFileInfo) { const { related_id, name, extension } = localFileInfo return [{ @@ -111,8 +130,9 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => { extension, } as unknown as CustomFile] } + return [] - }, [legacyInfo?.upload_file, localFileInfo]) + }, [uploadFileIdInfo, legacyInfo?.upload_file, localFileInfo, documentDetail?.name]) const websitePages = useMemo(() => { if (!websiteInfo) diff --git a/web/models/datasets.ts b/web/models/datasets.ts index ed16e1a67c..e4793357f4 100644 --- a/web/models/datasets.ts +++ b/web/models/datasets.ts @@ -381,7 +381,11 @@ export type OnlineDriveInfo = { type: 'file' | 'folder' } -export type DataSourceInfo = LegacyDataSourceInfo | LocalFileInfo | OnlineDocumentInfo | WebsiteCrawlInfo +export type UploadFileIdInfo = { + upload_file_id: string +} + +export type DataSourceInfo = LegacyDataSourceInfo | LocalFileInfo | OnlineDocumentInfo | WebsiteCrawlInfo | UploadFileIdInfo export type InitialDocumentDetail = { id: string diff --git a/web/service/knowledge/use-create-dataset.ts b/web/service/knowledge/use-create-dataset.ts index a0d55eeb99..297bb44827 100644 --- a/web/service/knowledge/use-create-dataset.ts +++ b/web/service/knowledge/use-create-dataset.ts @@ -91,11 +91,15 @@ const getFileIndexingEstimateParamsForFile = ({ processRule, dataset_id, }: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => { + const fileIds = files + .map(file => file.id) + .filter((id): id is string => Boolean(id)) + return { info_list: { data_source_type: dataSourceType, file_info_list: { - file_ids: files.map(file => file.id) as string[], + file_ids: fileIds, }, }, indexing_technique: indexingTechnique,