diff --git a/web/app/components/datasets/documents/detail/settings/__tests__/document-settings.spec.tsx b/web/app/components/datasets/documents/detail/settings/__tests__/document-settings.spec.tsx
index 4ac30289e1..bf516d432b 100644
--- a/web/app/components/datasets/documents/detail/settings/__tests__/document-settings.spec.tsx
+++ b/web/app/components/datasets/documents/detail/settings/__tests__/document-settings.spec.tsx
@@ -224,6 +224,20 @@ describe('DocumentSettings', () => {
// Data source types
describe('Data Source Types', () => {
+ it('should handle upload_file_id data source format', () => {
+ mockDocumentDetail = {
+ name: 'test-document',
+ data_source_type: 'upload_file',
+ data_source_info: {
+ upload_file_id: '4a807f05-45d6-4fc4-b7a8-b009a4568b36',
+ },
+ }
+
+ render()
+
+ expect(screen.getByTestId('files-count')).toHaveTextContent('1')
+ })
+
it('should handle legacy upload_file data source', () => {
mockDocumentDetail = {
name: 'test-document',
@@ -307,6 +321,18 @@ describe('DocumentSettings', () => {
expect(screen.getByTestId('files-count')).toHaveTextContent('0')
})
+ it('should handle empty data_source_info object', () => {
+ mockDocumentDetail = {
+ name: 'test-document',
+ data_source_type: 'upload_file',
+ data_source_info: {},
+ }
+
+ render()
+
+ expect(screen.getByTestId('files-count')).toHaveTextContent('0')
+ })
+
it('should maintain structure when rerendered', () => {
const { rerender } = render(
,
@@ -317,4 +343,37 @@ describe('DocumentSettings', () => {
expect(screen.getByTestId('step-two')).toBeInTheDocument()
})
})
+
+ describe('Files Extraction Regression Tests', () => {
+ it('should correctly extract file ID from upload_file_id format', () => {
+ const fileId = '4a807f05-45d6-4fc4-b7a8-b009a4568b36'
+ mockDocumentDetail = {
+ name: 'test-document.pdf',
+ data_source_type: 'upload_file',
+ data_source_info: {
+ upload_file_id: fileId,
+ },
+ }
+
+ render()
+
+ // Verify files array is populated with correct file ID
+ expect(screen.getByTestId('files-count')).toHaveTextContent('1')
+ })
+
+ it('should preserve document name when using upload_file_id format', () => {
+ const documentName = 'my-uploaded-document.txt'
+ mockDocumentDetail = {
+ name: documentName,
+ data_source_type: 'upload_file',
+ data_source_info: {
+ upload_file_id: 'some-file-id',
+ },
+ }
+
+ render()
+
+ expect(screen.getByTestId('files-count')).toHaveTextContent('1')
+ })
+ })
})
diff --git a/web/app/components/datasets/documents/detail/settings/document-settings.tsx b/web/app/components/datasets/documents/detail/settings/document-settings.tsx
index bcbc149231..2b6cc77683 100644
--- a/web/app/components/datasets/documents/detail/settings/document-settings.tsx
+++ b/web/app/components/datasets/documents/detail/settings/document-settings.tsx
@@ -8,6 +8,7 @@ import type {
LegacyDataSourceInfo,
LocalFileInfo,
OnlineDocumentInfo,
+ UploadFileIdInfo,
WebsiteCrawlInfo,
} from '@/models/datasets'
import { useBoolean } from 'ahooks'
@@ -61,6 +62,7 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
const dataSourceInfo = documentDetail?.data_source_info
+ // Type guards for DataSourceInfo union
const isLegacyDataSourceInfo = (info: DataSourceInfo | undefined): info is LegacyDataSourceInfo => {
return !!info && 'upload_file' in info
}
@@ -73,10 +75,15 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
const isLocalFileInfo = (info: DataSourceInfo | undefined): info is LocalFileInfo => {
return !!info && 'related_id' in info && 'transfer_method' in info
}
+ const isUploadFileIdInfo = (info: DataSourceInfo | undefined): info is UploadFileIdInfo => {
+ return !!info && 'upload_file_id' in info
+ }
+
const legacyInfo = isLegacyDataSourceInfo(dataSourceInfo) ? dataSourceInfo : undefined
const websiteInfo = isWebsiteCrawlInfo(dataSourceInfo) ? dataSourceInfo : undefined
const onlineDocumentInfo = isOnlineDocumentInfo(dataSourceInfo) ? dataSourceInfo : undefined
const localFileInfo = isLocalFileInfo(dataSourceInfo) ? dataSourceInfo : undefined
+ const uploadFileIdInfo = isUploadFileIdInfo(dataSourceInfo) ? dataSourceInfo : undefined
const currentPage = useMemo(() => {
if (legacyInfo) {
@@ -101,8 +108,20 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
}, [documentDetail?.data_source_type, documentDetail?.name, legacyInfo, onlineDocumentInfo])
const files = useMemo(() => {
- if (legacyInfo?.upload_file)
- return [legacyInfo.upload_file as CustomFile]
+ // Handle upload_file_id format
+ if (uploadFileIdInfo) {
+ return [{
+ id: uploadFileIdInfo.upload_file_id,
+ name: documentDetail?.name || '',
+ } as unknown as CustomFile]
+ }
+
+ // Handle legacy upload_file format
+ if (legacyInfo?.upload_file) {
+ return [legacyInfo.upload_file as unknown as CustomFile]
+ }
+
+ // Handle local file info format
if (localFileInfo) {
const { related_id, name, extension } = localFileInfo
return [{
@@ -111,8 +130,9 @@ const DocumentSettings = ({ datasetId, documentId }: DocumentSettingsProps) => {
extension,
} as unknown as CustomFile]
}
+
return []
- }, [legacyInfo?.upload_file, localFileInfo])
+ }, [uploadFileIdInfo, legacyInfo?.upload_file, localFileInfo, documentDetail?.name])
const websitePages = useMemo(() => {
if (!websiteInfo)
diff --git a/web/models/datasets.ts b/web/models/datasets.ts
index ed16e1a67c..e4793357f4 100644
--- a/web/models/datasets.ts
+++ b/web/models/datasets.ts
@@ -381,7 +381,11 @@ export type OnlineDriveInfo = {
type: 'file' | 'folder'
}
-export type DataSourceInfo = LegacyDataSourceInfo | LocalFileInfo | OnlineDocumentInfo | WebsiteCrawlInfo
+export type UploadFileIdInfo = {
+ upload_file_id: string
+}
+
+export type DataSourceInfo = LegacyDataSourceInfo | LocalFileInfo | OnlineDocumentInfo | WebsiteCrawlInfo | UploadFileIdInfo
export type InitialDocumentDetail = {
id: string
diff --git a/web/service/knowledge/use-create-dataset.ts b/web/service/knowledge/use-create-dataset.ts
index a0d55eeb99..297bb44827 100644
--- a/web/service/knowledge/use-create-dataset.ts
+++ b/web/service/knowledge/use-create-dataset.ts
@@ -91,11 +91,15 @@ const getFileIndexingEstimateParamsForFile = ({
processRule,
dataset_id,
}: GetFileIndexingEstimateParamsOptionFile): IndexingEstimateParams => {
+ const fileIds = files
+ .map(file => file.id)
+ .filter((id): id is string => Boolean(id))
+
return {
info_list: {
data_source_type: dataSourceType,
file_info_list: {
- file_ids: files.map(file => file.id) as string[],
+ file_ids: fileIds,
},
},
indexing_technique: indexingTechnique,