mirror of
https://github.com/langgenius/dify.git
synced 2026-05-04 17:38:04 +08:00
external knowledge api
This commit is contained in:
@ -32,6 +32,7 @@ from models.dataset import (
|
||||
DatasetQuery,
|
||||
Document,
|
||||
DocumentSegment,
|
||||
ExternalKnowledgeBindings,
|
||||
)
|
||||
from models.model import UploadFile
|
||||
from models.source import DataSourceOauthBinding
|
||||
@ -39,6 +40,7 @@ from services.errors.account import NoPermissionError
|
||||
from services.errors.dataset import DatasetNameDuplicateError
|
||||
from services.errors.document import DocumentIndexingError
|
||||
from services.errors.file import FileNotExistsError
|
||||
from services.external_knowledge_service import ExternalDatasetService
|
||||
from services.feature_service import FeatureModel, FeatureService
|
||||
from services.tag_service import TagService
|
||||
from services.vector_service import VectorService
|
||||
@ -137,7 +139,14 @@ class DatasetService:
|
||||
|
||||
@staticmethod
|
||||
def create_empty_dataset(
|
||||
tenant_id: str, name: str, indexing_technique: Optional[str], account: Account, permission: Optional[str] = None
|
||||
tenant_id: str,
|
||||
name: str,
|
||||
indexing_technique: Optional[str],
|
||||
account: Account,
|
||||
permission: Optional[str] = None,
|
||||
provider: str = "vendor",
|
||||
external_api_template_id: Optional[str] = None,
|
||||
external_knowledge_id: Optional[str] = None,
|
||||
):
|
||||
# check if dataset name already exists
|
||||
if Dataset.query.filter_by(name=name, tenant_id=tenant_id).first():
|
||||
@ -156,7 +165,23 @@ class DatasetService:
|
||||
dataset.embedding_model_provider = embedding_model.provider if embedding_model else None
|
||||
dataset.embedding_model = embedding_model.model if embedding_model else None
|
||||
dataset.permission = permission if permission else DatasetPermissionEnum.ONLY_ME
|
||||
dataset.provider = provider
|
||||
db.session.add(dataset)
|
||||
db.session.flush()
|
||||
|
||||
if provider == "external" and external_api_template_id:
|
||||
external_api_template = ExternalDatasetService.get_api_template(external_api_template_id)
|
||||
if not external_api_template:
|
||||
raise ValueError("External API template not found.")
|
||||
external_knowledge_binding = ExternalKnowledgeBindings(
|
||||
tenant_id=tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
external_api_template_id=external_api_template_id,
|
||||
external_knowledge_id=external_knowledge_id,
|
||||
created_by=account.id,
|
||||
)
|
||||
db.session.add(external_knowledge_binding)
|
||||
|
||||
db.session.commit()
|
||||
return dataset
|
||||
|
||||
|
||||
@ -1,16 +1,16 @@
|
||||
from typing import Literal, Union, Optional
|
||||
from typing import Literal, Optional, Union
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class AuthorizationConfig(BaseModel):
|
||||
type: Literal[None, 'basic', 'bearer', 'custom']
|
||||
type: Literal[None, "basic", "bearer", "custom"]
|
||||
api_key: Union[None, str] = None
|
||||
header: Union[None, str] = None
|
||||
|
||||
|
||||
class Authorization(BaseModel):
|
||||
type: Literal['no-auth', 'api-key']
|
||||
type: Literal["no-auth", "api-key"]
|
||||
config: Optional[AuthorizationConfig] = None
|
||||
|
||||
|
||||
|
||||
@ -3,59 +3,45 @@ import random
|
||||
import time
|
||||
from copy import deepcopy
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Union, Optional
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import httpx
|
||||
|
||||
from core.helper import ssrf_proxy
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from libs import helper
|
||||
from models.account import Account, TenantAccountRole
|
||||
from models.dataset import (
|
||||
AppDatasetJoin,
|
||||
Dataset,
|
||||
DatasetCollectionBinding,
|
||||
DatasetPermission,
|
||||
DatasetProcessRule,
|
||||
DatasetQuery,
|
||||
Document,
|
||||
DocumentSegment, ExternalApiTemplates, ExternalKnowledgeBindings,
|
||||
ExternalApiTemplates,
|
||||
ExternalKnowledgeBindings,
|
||||
)
|
||||
from models.model import UploadFile
|
||||
from services.dataset_service import DocumentService
|
||||
from services.entities.external_knowledge_entities.external_knowledge_entities import Authorization, ApiTemplateSetting
|
||||
from services.entities.external_knowledge_entities.external_knowledge_entities import ApiTemplateSetting, Authorization
|
||||
from services.errors.dataset import DatasetNameDuplicateError
|
||||
from tasks.external_document_indexing_task import external_document_indexing_task
|
||||
# from tasks.external_document_indexing_task import external_document_indexing_task
|
||||
|
||||
|
||||
class ExternalDatasetService:
|
||||
|
||||
@staticmethod
|
||||
def get_external_api_templates(page, per_page, tenant_id, search=None) -> tuple[list[ExternalApiTemplates], int]:
|
||||
query = ExternalApiTemplates.query.filter(ExternalApiTemplates.tenant_id == tenant_id).order_by(
|
||||
ExternalApiTemplates.created_at.desc()
|
||||
)
|
||||
if search:
|
||||
query = query.filter(ExternalApiTemplates.name.ilike(f'%{search}%'))
|
||||
query = query.filter(ExternalApiTemplates.name.ilike(f"%{search}%"))
|
||||
|
||||
api_templates = query.paginate(
|
||||
page=page,
|
||||
per_page=per_page,
|
||||
max_per_page=100,
|
||||
error_out=False
|
||||
)
|
||||
api_templates = query.paginate(page=page, per_page=per_page, max_per_page=100, error_out=False)
|
||||
|
||||
return api_templates.items, api_templates.total
|
||||
|
||||
@classmethod
|
||||
def validate_api_list(cls, api_settings: dict):
|
||||
if not api_settings:
|
||||
raise ValueError('api list is empty')
|
||||
if 'endpoint' not in api_settings and not api_settings['endpoint']:
|
||||
raise ValueError('endpoint is required')
|
||||
if 'api_key' not in api_settings and not api_settings['api_key']:
|
||||
raise ValueError('api_key is required')
|
||||
raise ValueError("api list is empty")
|
||||
if "endpoint" not in api_settings and not api_settings["endpoint"]:
|
||||
raise ValueError("endpoint is required")
|
||||
if "api_key" not in api_settings and not api_settings["api_key"]:
|
||||
raise ValueError("api_key is required")
|
||||
|
||||
@staticmethod
|
||||
def create_api_template(tenant_id: str, user_id: str, args: dict) -> ExternalApiTemplates:
|
||||
@ -63,9 +49,9 @@ class ExternalDatasetService:
|
||||
tenant_id=tenant_id,
|
||||
created_by=user_id,
|
||||
updated_by=user_id,
|
||||
name=args.get('name'),
|
||||
description=args.get('description', ''),
|
||||
settings=json.dumps(args.get('settings'), ensure_ascii=False),
|
||||
name=args.get("name"),
|
||||
description=args.get("description", ""),
|
||||
settings=json.dumps(args.get("settings"), ensure_ascii=False),
|
||||
)
|
||||
|
||||
db.session.add(api_template)
|
||||
@ -74,22 +60,17 @@ class ExternalDatasetService:
|
||||
|
||||
@staticmethod
|
||||
def get_api_template(api_template_id: str) -> ExternalApiTemplates:
|
||||
return ExternalApiTemplates.query.filter_by(
|
||||
id=api_template_id
|
||||
).first()
|
||||
return ExternalApiTemplates.query.filter_by(id=api_template_id).first()
|
||||
|
||||
@staticmethod
|
||||
def update_api_template(tenant_id, user_id, api_template_id, args) -> ExternalApiTemplates:
|
||||
api_template = ExternalApiTemplates.query.filter_by(
|
||||
id=api_template_id,
|
||||
tenant_id=tenant_id
|
||||
).first()
|
||||
api_template = ExternalApiTemplates.query.filter_by(id=api_template_id, tenant_id=tenant_id).first()
|
||||
if api_template is None:
|
||||
raise ValueError('api template not found')
|
||||
raise ValueError("api template not found")
|
||||
|
||||
api_template.name = args.get('name')
|
||||
api_template.description = args.get('description', '')
|
||||
api_template.settings = json.dumps(args.get('settings'), ensure_ascii=False)
|
||||
api_template.name = args.get("name")
|
||||
api_template.description = args.get("description", "")
|
||||
api_template.settings = json.dumps(args.get("settings"), ensure_ascii=False)
|
||||
api_template.updated_by = user_id
|
||||
api_template.updated_at = datetime.now(timezone.utc).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
@ -98,21 +79,16 @@ class ExternalDatasetService:
|
||||
|
||||
@staticmethod
|
||||
def delete_api_template(tenant_id: str, api_template_id: str):
|
||||
api_template = ExternalApiTemplates.query.filter_by(
|
||||
id=api_template_id,
|
||||
tenant_id=tenant_id
|
||||
).first()
|
||||
api_template = ExternalApiTemplates.query.filter_by(id=api_template_id, tenant_id=tenant_id).first()
|
||||
if api_template is None:
|
||||
raise ValueError('api template not found')
|
||||
raise ValueError("api template not found")
|
||||
|
||||
db.session.delete(api_template)
|
||||
db.session.commit()
|
||||
|
||||
@staticmethod
|
||||
def external_api_template_use_check(api_template_id: str) -> bool:
|
||||
count = ExternalKnowledgeBindings.query.filter_by(
|
||||
external_api_template_id=api_template_id
|
||||
).count()
|
||||
count = ExternalKnowledgeBindings.query.filter_by(external_api_template_id=api_template_id).count()
|
||||
if count > 0:
|
||||
return True
|
||||
return False
|
||||
@ -120,66 +96,63 @@ class ExternalDatasetService:
|
||||
@staticmethod
|
||||
def get_external_knowledge_binding_with_dataset_id(tenant_id: str, dataset_id: str) -> ExternalKnowledgeBindings:
|
||||
external_knowledge_binding = ExternalKnowledgeBindings.query.filter_by(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id
|
||||
dataset_id=dataset_id, tenant_id=tenant_id
|
||||
).first()
|
||||
if not external_knowledge_binding:
|
||||
raise ValueError('external knowledge binding not found')
|
||||
raise ValueError("external knowledge binding not found")
|
||||
return external_knowledge_binding
|
||||
|
||||
@staticmethod
|
||||
def document_create_args_validate(tenant_id: str, api_template_id: str, process_parameter: dict):
|
||||
api_template = ExternalApiTemplates.query.filter_by(
|
||||
id=api_template_id,
|
||||
tenant_id=tenant_id
|
||||
).first()
|
||||
api_template = ExternalApiTemplates.query.filter_by(id=api_template_id, tenant_id=tenant_id).first()
|
||||
if api_template is None:
|
||||
raise ValueError('api template not found')
|
||||
raise ValueError("api template not found")
|
||||
settings = json.loads(api_template.settings)
|
||||
for settings in settings:
|
||||
custom_parameters = settings.get('document_process_setting')
|
||||
custom_parameters = settings.get("document_process_setting")
|
||||
if custom_parameters:
|
||||
for parameter in custom_parameters:
|
||||
if parameter.get('required', False) and not process_parameter.get(parameter.get('name')):
|
||||
if parameter.get("required", False) and not process_parameter.get(parameter.get("name")):
|
||||
raise ValueError(f'{parameter.get("name")} is required')
|
||||
|
||||
@staticmethod
|
||||
def init_external_dataset(tenant_id: str, user_id: str, args: dict, created_from: str = 'web'):
|
||||
api_template_id = args.get('api_template_id')
|
||||
def init_external_dataset(tenant_id: str, user_id: str, args: dict, created_from: str = "web"):
|
||||
api_template_id = args.get("api_template_id")
|
||||
|
||||
data_source = args.get('data_source')
|
||||
data_source = args.get("data_source")
|
||||
if data_source is None:
|
||||
raise ValueError('data source is required')
|
||||
raise ValueError("data source is required")
|
||||
|
||||
process_parameter = args.get('process_parameter')
|
||||
api_template = ExternalApiTemplates.query.filter_by(
|
||||
id=api_template_id,
|
||||
tenant_id=tenant_id
|
||||
).first()
|
||||
process_parameter = args.get("process_parameter")
|
||||
api_template = ExternalApiTemplates.query.filter_by(id=api_template_id, tenant_id=tenant_id).first()
|
||||
if api_template is None:
|
||||
raise ValueError('api template not found')
|
||||
raise ValueError("api template not found")
|
||||
|
||||
dataset = Dataset(
|
||||
tenant_id=tenant_id,
|
||||
name=args.get('name'),
|
||||
description=args.get('description', ''),
|
||||
provider='external',
|
||||
name=args.get("name"),
|
||||
description=args.get("description", ""),
|
||||
provider="external",
|
||||
created_by=user_id,
|
||||
)
|
||||
|
||||
db.session.add(dataset)
|
||||
db.session.flush()
|
||||
|
||||
position = DocumentService.get_documents_position(dataset.id)
|
||||
batch = time.strftime('%Y%m%d%H%M%S') + str(random.randint(100000, 999999))
|
||||
document = Document.query.filter_by(dataset_id=dataset.id).order_by(Document.position.desc()).first()
|
||||
|
||||
position = document.position + 1 if document else 1
|
||||
|
||||
batch = time.strftime("%Y%m%d%H%M%S") + str(random.randint(100000, 999999))
|
||||
document_ids = []
|
||||
if data_source["type"] == "upload_file":
|
||||
upload_file_list = data_source["info_list"]['file_info_list']['file_ids']
|
||||
upload_file_list = data_source["info_list"]["file_info_list"]["file_ids"]
|
||||
for file_id in upload_file_list:
|
||||
file = db.session.query(UploadFile).filter(
|
||||
UploadFile.tenant_id == dataset.tenant_id,
|
||||
UploadFile.id == file_id
|
||||
).first()
|
||||
file = (
|
||||
db.session.query(UploadFile)
|
||||
.filter(UploadFile.tenant_id == dataset.tenant_id, UploadFile.id == file_id)
|
||||
.first()
|
||||
)
|
||||
if file:
|
||||
data_source_info = {
|
||||
"upload_file_id": file_id,
|
||||
@ -200,21 +173,20 @@ class ExternalDatasetService:
|
||||
db.session.flush()
|
||||
document_ids.append(document.id)
|
||||
db.session.commit()
|
||||
external_document_indexing_task.delay(dataset.id, api_template_id, data_source, process_parameter)
|
||||
#external_document_indexing_task.delay(dataset.id, api_template_id, data_source, process_parameter)
|
||||
|
||||
return dataset
|
||||
|
||||
@staticmethod
|
||||
def process_external_api(settings: ApiTemplateSetting,
|
||||
files: Union[None, dict[str, Any]]) -> httpx.Response:
|
||||
def process_external_api(settings: ApiTemplateSetting, files: Union[None, dict[str, Any]]) -> httpx.Response:
|
||||
"""
|
||||
do http request depending on api bundle
|
||||
"""
|
||||
|
||||
kwargs = {
|
||||
'url': settings.url,
|
||||
'headers': settings.headers,
|
||||
'follow_redirects': True,
|
||||
"url": settings.url,
|
||||
"headers": settings.headers,
|
||||
"follow_redirects": True,
|
||||
}
|
||||
|
||||
response = getattr(ssrf_proxy, settings.request_method)(data=settings.params, files=files, **kwargs)
|
||||
@ -228,21 +200,21 @@ class ExternalDatasetService:
|
||||
headers = deepcopy(headers)
|
||||
else:
|
||||
headers = {}
|
||||
if authorization.type == 'api-key':
|
||||
if authorization.type == "api-key":
|
||||
if authorization.config is None:
|
||||
raise ValueError('authorization config is required')
|
||||
raise ValueError("authorization config is required")
|
||||
|
||||
if authorization.config.api_key is None:
|
||||
raise ValueError('api_key is required')
|
||||
raise ValueError("api_key is required")
|
||||
|
||||
if not authorization.config.header:
|
||||
authorization.config.header = 'Authorization'
|
||||
authorization.config.header = "Authorization"
|
||||
|
||||
if authorization.config.type == 'bearer':
|
||||
headers[authorization.config.header] = f'Bearer {authorization.config.api_key}'
|
||||
elif authorization.config.type == 'basic':
|
||||
headers[authorization.config.header] = f'Basic {authorization.config.api_key}'
|
||||
elif authorization.config.type == 'custom':
|
||||
if authorization.config.type == "bearer":
|
||||
headers[authorization.config.header] = f"Bearer {authorization.config.api_key}"
|
||||
elif authorization.config.type == "basic":
|
||||
headers[authorization.config.header] = f"Basic {authorization.config.api_key}"
|
||||
elif authorization.config.type == "custom":
|
||||
headers[authorization.config.header] = authorization.config.api_key
|
||||
|
||||
return headers
|
||||
@ -254,21 +226,20 @@ class ExternalDatasetService:
|
||||
@staticmethod
|
||||
def create_external_dataset(tenant_id: str, user_id: str, args: dict) -> Dataset:
|
||||
# check if dataset name already exists
|
||||
if Dataset.query.filter_by(name=args.get('name'), tenant_id=tenant_id).first():
|
||||
if Dataset.query.filter_by(name=args.get("name"), tenant_id=tenant_id).first():
|
||||
raise DatasetNameDuplicateError(f"Dataset with name {args.get('name')} already exists.")
|
||||
api_template = ExternalApiTemplates.query.filter_by(
|
||||
id=args.get('api_template_id'),
|
||||
tenant_id=tenant_id
|
||||
id=args.get("external_api_template_id"), tenant_id=tenant_id
|
||||
).first()
|
||||
|
||||
if api_template is None:
|
||||
raise ValueError('api template not found')
|
||||
raise ValueError("api template not found")
|
||||
|
||||
dataset = Dataset(
|
||||
tenant_id=tenant_id,
|
||||
name=args.get('name'),
|
||||
description=args.get('description', ''),
|
||||
provider='external',
|
||||
name=args.get("name"),
|
||||
description=args.get("description", ""),
|
||||
provider="external",
|
||||
created_by=user_id,
|
||||
)
|
||||
|
||||
@ -278,8 +249,8 @@ class ExternalDatasetService:
|
||||
external_knowledge_binding = ExternalKnowledgeBindings(
|
||||
tenant_id=tenant_id,
|
||||
dataset_id=dataset.id,
|
||||
external_api_template_id=args.get('api_template_id'),
|
||||
external_knowledge_id=args.get('external_knowledge_id'),
|
||||
external_api_template_id=args.get("external_api_template_id"),
|
||||
external_knowledge_id=args.get("external_knowledge_id"),
|
||||
created_by=user_id,
|
||||
)
|
||||
db.session.add(external_knowledge_binding)
|
||||
@ -289,36 +260,32 @@ class ExternalDatasetService:
|
||||
return dataset
|
||||
|
||||
@staticmethod
|
||||
def fetch_external_knowledge_retrival(tenant_id: str,
|
||||
dataset_id: str,
|
||||
query: str,
|
||||
external_retrival_parameters: dict):
|
||||
def fetch_external_knowledge_retrival(
|
||||
tenant_id: str, dataset_id: str, query: str, external_retrival_parameters: dict
|
||||
):
|
||||
external_knowledge_binding = ExternalKnowledgeBindings.query.filter_by(
|
||||
dataset_id=dataset_id,
|
||||
tenant_id=tenant_id
|
||||
dataset_id=dataset_id, tenant_id=tenant_id
|
||||
).first()
|
||||
if not external_knowledge_binding:
|
||||
raise ValueError('external knowledge binding not found')
|
||||
raise ValueError("external knowledge binding not found")
|
||||
|
||||
external_api_template = ExternalApiTemplates.query.filter_by(
|
||||
id=external_knowledge_binding.external_api_template_id
|
||||
).first()
|
||||
if not external_api_template:
|
||||
raise ValueError('external api template not found')
|
||||
raise ValueError("external api template not found")
|
||||
|
||||
settings = json.loads(external_api_template.settings)
|
||||
headers = {}
|
||||
if settings.get('api_token'):
|
||||
headers['Authorization'] = f"Bearer {settings.get('api_token')}"
|
||||
if settings.get("api_token"):
|
||||
headers["Authorization"] = f"Bearer {settings.get('api_token')}"
|
||||
|
||||
external_retrival_parameters['query'] = query
|
||||
external_retrival_parameters["query"] = query
|
||||
|
||||
api_template_setting = {
|
||||
'url': f"{settings.get('endpoint')}/dify/external-knowledge/retrival-documents",
|
||||
'request_method': 'post',
|
||||
'headers': settings.get('headers'),
|
||||
'params': external_retrival_parameters
|
||||
"url": f"{settings.get('endpoint')}/dify/external-knowledge/retrival-documents",
|
||||
"request_method": "post",
|
||||
"headers": settings.get("headers"),
|
||||
"params": external_retrival_parameters,
|
||||
}
|
||||
response = ExternalDatasetService.process_external_api(
|
||||
ApiTemplateSetting(**api_template_setting), None
|
||||
)
|
||||
response = ExternalDatasetService.process_external_api(ApiTemplateSetting(**api_template_setting), None)
|
||||
|
||||
@ -19,8 +19,15 @@ default_retrieval_model = {
|
||||
|
||||
class HitTestingService:
|
||||
@classmethod
|
||||
def retrieve(cls, dataset: Dataset, query: str, account: Account,
|
||||
retrieval_model: dict, external_retrieval_model: dict, limit: int = 10) -> dict:
|
||||
def retrieve(
|
||||
cls,
|
||||
dataset: Dataset,
|
||||
query: str,
|
||||
account: Account,
|
||||
retrieval_model: dict,
|
||||
external_retrieval_model: dict,
|
||||
limit: int = 10,
|
||||
) -> dict:
|
||||
if dataset.available_document_count == 0 or dataset.available_segment_count == 0:
|
||||
return {
|
||||
"query": {
|
||||
|
||||
Reference in New Issue
Block a user