From 9ee481807fdb576da0133c1ff13649e4be982f0c Mon Sep 17 00:00:00 2001 From: buua436 Date: Tue, 12 May 2026 17:16:48 +0800 Subject: [PATCH] GO: implement GET /api/v1/datasets/:dataset_id (#14834) ### What problem does this PR solve? implement GET /api/v1/datasets/:dataset_id ### Type of change - [x] Refactoring --- internal/dao/connector.go | 26 ++++++++++++++++++++++ internal/dao/document.go | 10 +++++++++ internal/handler/datasets.go | 21 ++++++++++++++++++ internal/router/router.go | 1 + internal/service/datasets.go | 43 ++++++++++++++++++++++++++++++++++++ 5 files changed, 101 insertions(+) diff --git a/internal/dao/connector.go b/internal/dao/connector.go index 2f18e00b3..260e1596a 100644 --- a/internal/dao/connector.go +++ b/internal/dao/connector.go @@ -36,6 +36,15 @@ type ConnectorListItem struct { Status string `json:"status"` } +// ConnectorDatasetListItem represents a connector linked to a dataset. +type ConnectorDatasetListItem struct { + ID string `json:"id" gorm:"column:id"` + Source string `json:"source" gorm:"column:source"` + Name string `json:"name" gorm:"column:name"` + AutoParse string `json:"auto_parse" gorm:"column:auto_parse"` + Status string `json:"status" gorm:"column:status"` +} + // ListByTenantID list connectors by tenant ID // Only selects id, name, source, status fields (matching Python implementation) func (dao *ConnectorDAO) ListByTenantID(tenantID string) ([]*ConnectorListItem, error) { @@ -53,6 +62,23 @@ func (dao *ConnectorDAO) ListByTenantID(tenantID string) ([]*ConnectorListItem, return connectors, nil } +// ListByDatasetID lists connectors linked to a dataset. +func (dao *ConnectorDAO) ListByDatasetID(datasetID string) ([]*ConnectorDatasetListItem, error) { + var connectors []*ConnectorDatasetListItem + + err := DB.Model(&entity.Connector2Kb{}). + Select("connector.id, connector.source, connector.name, connector2kb.auto_parse, connector.status"). + Joins("JOIN connector ON connector2kb.connector_id = connector.id"). + Where("connector2kb.kb_id = ?", datasetID). + Scan(&connectors).Error + + if err != nil { + return nil, err + } + + return connectors, nil +} + // GetByID get connector by ID func (dao *ConnectorDAO) GetByID(id string) (*entity.Connector, error) { var connector entity.Connector diff --git a/internal/dao/document.go b/internal/dao/document.go index e2e055a11..49ef0e88d 100644 --- a/internal/dao/document.go +++ b/internal/dao/document.go @@ -138,3 +138,13 @@ func (dao *DocumentDAO) CountByTenantID(tenantID string) (int64, error) { err := DB.Model(&entity.Document{}).Where("created_by = ?", tenantID).Count(&count).Error return count, err } + +// SumSizeByDatasetID returns the total document size for a dataset. +func (dao *DocumentDAO) SumSizeByDatasetID(datasetID string) (int64, error) { + var total int64 + err := DB.Model(&entity.Document{}). + Select("COALESCE(SUM(size), 0)"). + Where("kb_id = ?", datasetID). + Scan(&total).Error + return total, err +} diff --git a/internal/handler/datasets.go b/internal/handler/datasets.go index a1768e63f..f74021232 100644 --- a/internal/handler/datasets.go +++ b/internal/handler/datasets.go @@ -142,6 +142,27 @@ func (h *DatasetsHandler) CreateDataset(c *gin.Context) { }) } +// GetDataset handles GET /api/v1/datasets/:dataset_id. +func (h *DatasetsHandler) GetDataset(c *gin.Context) { + user, errorCode, errorMessage := GetUser(c) + if errorCode != common.CodeSuccess { + jsonError(c, errorCode, errorMessage) + return + } + + datasetID := c.Param("dataset_id") + result, code, err := h.datasetsService.GetDataset(datasetID, user.ID) + if err != nil { + jsonError(c, code, err.Error()) + return + } + + c.JSON(http.StatusOK, gin.H{ + "code": common.CodeSuccess, + "data": result, + }) +} + // DeleteDatasets handles DELETE /api/v1/datasets. func (h *DatasetsHandler) DeleteDatasets(c *gin.Context) { user, errorCode, errorMessage := GetUser(c) diff --git a/internal/router/router.go b/internal/router/router.go index 97c9b9098..67ae4e0a1 100644 --- a/internal/router/router.go +++ b/internal/router/router.go @@ -173,6 +173,7 @@ func (r *Router) Setup(engine *gin.Engine) { datasets := v1.Group("/datasets") { datasets.GET("", r.datasetsHandler.ListDatasets) + datasets.GET("/:dataset_id", r.datasetsHandler.GetDataset) datasets.POST("", r.datasetsHandler.CreateDataset) datasets.DELETE("", r.datasetsHandler.DeleteDatasets) datasets.POST("/search", r.chunkHandler.RetrievalTest) diff --git a/internal/service/datasets.go b/internal/service/datasets.go index 271f457a2..4c9d64aff 100644 --- a/internal/service/datasets.go +++ b/internal/service/datasets.go @@ -61,6 +61,8 @@ var ( // DatasetsService implements the RESTful dataset APIs from dataset_api.py. type DatasetsService struct { kbDAO *dao.KnowledgebaseDAO + documentDAO *dao.DocumentDAO + connectorDAO *dao.ConnectorDAO tenantDAO *dao.TenantDAO tenantLLMDAO *dao.TenantLLMDAO } @@ -69,6 +71,8 @@ type DatasetsService struct { func NewDatasetsService() *DatasetsService { return &DatasetsService{ kbDAO: dao.NewKnowledgebaseDAO(), + documentDAO: dao.NewDocumentDAO(), + connectorDAO: dao.NewConnectorDAO(), tenantDAO: dao.NewTenantDAO(), tenantLLMDAO: dao.NewTenantLLMDAO(), } @@ -523,6 +527,45 @@ func (s *DatasetsService) DeleteDatasets(ids []string, deleteAll bool, tenantID }, common.CodeSuccess, nil } +// GetDataset gets a single dataset with its size and linked connectors. +func (s *DatasetsService) GetDataset(datasetID, userID string) (map[string]interface{}, common.ErrorCode, error) { + datasetID = strings.TrimSpace(datasetID) + if datasetID == "" { + return nil, common.CodeDataError, errors.New("Lack of \"Dataset ID\"") + } + + normalizedID, err := normalizeDatasetUUID1(datasetID) + if err != nil { + return nil, common.CodeDataError, err + } + datasetID = normalizedID + + if !s.kbDAO.Accessible(datasetID, userID) { + return nil, common.CodeDataError, fmt.Errorf("User '%s' lacks permission for dataset '%s'", userID, datasetID) + } + + kb, err := s.kbDAO.GetByID(datasetID) + if err != nil || kb == nil { + return nil, common.CodeDataError, errors.New("Invalid Dataset ID") + } + + data := datasetToMap(kb) + + size, err := s.documentDAO.SumSizeByDatasetID(datasetID) + if err != nil { + return nil, common.CodeServerError, errors.New("Database operation failed") + } + data["size"] = size + + connectors, err := s.connectorDAO.ListByDatasetID(datasetID) + if err != nil { + return nil, common.CodeServerError, errors.New("Database operation failed") + } + data["connectors"] = connectors + + return data, common.CodeSuccess, nil +} + func (s *DatasetsService) deleteDataset(tenantID string, kb *entity.Knowledgebase) error { return dao.DB.Transaction(func(tx *gorm.DB) error { var documents []entity.Document