diff --git a/api/core/rag/retrieval/dataset_retrieval.py b/api/core/rag/retrieval/dataset_retrieval.py index cfea8d114a..459d7bed95 100644 --- a/api/core/rag/retrieval/dataset_retrieval.py +++ b/api/core/rag/retrieval/dataset_retrieval.py @@ -248,19 +248,22 @@ class DatasetRetrieval: retrieval_resource_list = [] # deal with external documents for item in external_documents: + ext_meta = item.metadata or {} + title = ext_meta.get("title") or "" + doc_id = ext_meta.get("document_id") or title source = Source( metadata=SourceMetadata( source="knowledge", - dataset_id=item.metadata.get("dataset_id"), - dataset_name=item.metadata.get("dataset_name"), - document_id=item.metadata.get("document_id"), - document_name=item.metadata.get("title"), + dataset_id=ext_meta.get("dataset_id") or "", + dataset_name=ext_meta.get("dataset_name") or "", + document_id=str(doc_id), + document_name=ext_meta.get("title") or "", data_source_type="external", retriever_from="workflow", - score=item.metadata.get("score"), - doc_metadata=item.metadata, + score=float(ext_meta.get("score") or 0.0), + doc_metadata=ext_meta, ), - title=item.metadata.get("title"), + title=title, content=item.page_content, ) retrieval_resource_list.append(source)