mirror of
https://github.com/langgenius/dify.git
synced 2026-05-02 00:18:03 +08:00
Merge branch 'main' into feat/mcp
This commit is contained in:
@ -56,8 +56,7 @@ class InsertExploreAppListApi(Resource):
|
||||
parser.add_argument("position", type=int, required=True, nullable=False, location="json")
|
||||
args = parser.parse_args()
|
||||
|
||||
with Session(db.engine) as session:
|
||||
app = session.execute(select(App).filter(App.id == args["app_id"])).scalar_one_or_none()
|
||||
app = db.session.execute(select(App).filter(App.id == args["app_id"])).scalar_one_or_none()
|
||||
if not app:
|
||||
raise NotFound(f"App '{args['app_id']}' is not found")
|
||||
|
||||
@ -78,38 +77,38 @@ class InsertExploreAppListApi(Resource):
|
||||
select(RecommendedApp).filter(RecommendedApp.app_id == args["app_id"])
|
||||
).scalar_one_or_none()
|
||||
|
||||
if not recommended_app:
|
||||
recommended_app = RecommendedApp(
|
||||
app_id=app.id,
|
||||
description=desc,
|
||||
copyright=copy_right,
|
||||
privacy_policy=privacy_policy,
|
||||
custom_disclaimer=custom_disclaimer,
|
||||
language=args["language"],
|
||||
category=args["category"],
|
||||
position=args["position"],
|
||||
)
|
||||
if not recommended_app:
|
||||
recommended_app = RecommendedApp(
|
||||
app_id=app.id,
|
||||
description=desc,
|
||||
copyright=copy_right,
|
||||
privacy_policy=privacy_policy,
|
||||
custom_disclaimer=custom_disclaimer,
|
||||
language=args["language"],
|
||||
category=args["category"],
|
||||
position=args["position"],
|
||||
)
|
||||
|
||||
db.session.add(recommended_app)
|
||||
db.session.add(recommended_app)
|
||||
|
||||
app.is_public = True
|
||||
db.session.commit()
|
||||
app.is_public = True
|
||||
db.session.commit()
|
||||
|
||||
return {"result": "success"}, 201
|
||||
else:
|
||||
recommended_app.description = desc
|
||||
recommended_app.copyright = copy_right
|
||||
recommended_app.privacy_policy = privacy_policy
|
||||
recommended_app.custom_disclaimer = custom_disclaimer
|
||||
recommended_app.language = args["language"]
|
||||
recommended_app.category = args["category"]
|
||||
recommended_app.position = args["position"]
|
||||
return {"result": "success"}, 201
|
||||
else:
|
||||
recommended_app.description = desc
|
||||
recommended_app.copyright = copy_right
|
||||
recommended_app.privacy_policy = privacy_policy
|
||||
recommended_app.custom_disclaimer = custom_disclaimer
|
||||
recommended_app.language = args["language"]
|
||||
recommended_app.category = args["category"]
|
||||
recommended_app.position = args["position"]
|
||||
|
||||
app.is_public = True
|
||||
app.is_public = True
|
||||
|
||||
db.session.commit()
|
||||
db.session.commit()
|
||||
|
||||
return {"result": "success"}, 200
|
||||
return {"result": "success"}, 200
|
||||
|
||||
|
||||
class InsertExploreAppApi(Resource):
|
||||
|
||||
@ -43,7 +43,6 @@ from core.model_runtime.errors.invoke import InvokeAuthorizationError
|
||||
from core.plugin.impl.exc import PluginDaemonClientSideError
|
||||
from core.rag.extractor.entity.extract_setting import ExtractSetting
|
||||
from extensions.ext_database import db
|
||||
from extensions.ext_redis import redis_client
|
||||
from fields.document_fields import (
|
||||
dataset_and_document_fields,
|
||||
document_fields,
|
||||
@ -54,8 +53,6 @@ from libs.login import login_required
|
||||
from models import Dataset, DatasetProcessRule, Document, DocumentSegment, UploadFile
|
||||
from services.dataset_service import DatasetService, DocumentService
|
||||
from services.entities.knowledge_entities.knowledge_entities import KnowledgeConfig
|
||||
from tasks.add_document_to_index_task import add_document_to_index_task
|
||||
from tasks.remove_document_from_index_task import remove_document_from_index_task
|
||||
|
||||
|
||||
class DocumentResource(Resource):
|
||||
@ -862,77 +859,16 @@ class DocumentStatusApi(DocumentResource):
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
|
||||
document_ids = request.args.getlist("document_id")
|
||||
for document_id in document_ids:
|
||||
document = self.get_document(dataset_id, document_id)
|
||||
|
||||
indexing_cache_key = "document_{}_indexing".format(document.id)
|
||||
cache_result = redis_client.get(indexing_cache_key)
|
||||
if cache_result is not None:
|
||||
raise InvalidActionError(f"Document:{document.name} is being indexed, please try again later")
|
||||
try:
|
||||
DocumentService.batch_update_document_status(dataset, document_ids, action, current_user)
|
||||
except services.errors.document.DocumentIndexingError as e:
|
||||
raise InvalidActionError(str(e))
|
||||
except ValueError as e:
|
||||
raise InvalidActionError(str(e))
|
||||
except NotFound as e:
|
||||
raise NotFound(str(e))
|
||||
|
||||
if action == "enable":
|
||||
if document.enabled:
|
||||
continue
|
||||
document.enabled = True
|
||||
document.disabled_at = None
|
||||
document.disabled_by = None
|
||||
document.updated_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
add_document_to_index_task.delay(document_id)
|
||||
|
||||
elif action == "disable":
|
||||
if not document.completed_at or document.indexing_status != "completed":
|
||||
raise InvalidActionError(f"Document: {document.name} is not completed.")
|
||||
if not document.enabled:
|
||||
continue
|
||||
|
||||
document.enabled = False
|
||||
document.disabled_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
document.disabled_by = current_user.id
|
||||
document.updated_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
remove_document_from_index_task.delay(document_id)
|
||||
|
||||
elif action == "archive":
|
||||
if document.archived:
|
||||
continue
|
||||
|
||||
document.archived = True
|
||||
document.archived_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
document.archived_by = current_user.id
|
||||
document.updated_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
if document.enabled:
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
remove_document_from_index_task.delay(document_id)
|
||||
|
||||
elif action == "un_archive":
|
||||
if not document.archived:
|
||||
continue
|
||||
document.archived = False
|
||||
document.archived_at = None
|
||||
document.archived_by = None
|
||||
document.updated_at = datetime.now(UTC).replace(tzinfo=None)
|
||||
db.session.commit()
|
||||
|
||||
# Set cache to prevent indexing the same document multiple times
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
|
||||
add_document_to_index_task.delay(document_id)
|
||||
|
||||
else:
|
||||
raise InvalidActionError()
|
||||
return {"result": "success"}, 200
|
||||
|
||||
|
||||
|
||||
@ -4,7 +4,7 @@ from werkzeug.exceptions import Forbidden, NotFound
|
||||
|
||||
import services.dataset_service
|
||||
from controllers.service_api import api
|
||||
from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError
|
||||
from controllers.service_api.dataset.error import DatasetInUseError, DatasetNameDuplicateError, InvalidActionError
|
||||
from controllers.service_api.wraps import (
|
||||
DatasetApiResource,
|
||||
cloud_edition_billing_rate_limit_check,
|
||||
@ -17,7 +17,7 @@ from fields.dataset_fields import dataset_detail_fields
|
||||
from fields.tag_fields import tag_fields
|
||||
from libs.login import current_user
|
||||
from models.dataset import Dataset, DatasetPermissionEnum
|
||||
from services.dataset_service import DatasetPermissionService, DatasetService
|
||||
from services.dataset_service import DatasetPermissionService, DatasetService, DocumentService
|
||||
from services.entities.knowledge_entities.knowledge_entities import RetrievalModel
|
||||
from services.tag_service import TagService
|
||||
|
||||
@ -329,6 +329,56 @@ class DatasetApi(DatasetApiResource):
|
||||
raise DatasetInUseError()
|
||||
|
||||
|
||||
class DocumentStatusApi(DatasetApiResource):
|
||||
"""Resource for batch document status operations."""
|
||||
|
||||
def patch(self, tenant_id, dataset_id, action):
|
||||
"""
|
||||
Batch update document status.
|
||||
|
||||
Args:
|
||||
tenant_id: tenant id
|
||||
dataset_id: dataset id
|
||||
action: action to perform (enable, disable, archive, un_archive)
|
||||
|
||||
Returns:
|
||||
dict: A dictionary with a key 'result' and a value 'success'
|
||||
int: HTTP status code 200 indicating that the operation was successful.
|
||||
|
||||
Raises:
|
||||
NotFound: If the dataset with the given ID does not exist.
|
||||
Forbidden: If the user does not have permission.
|
||||
InvalidActionError: If the action is invalid or cannot be performed.
|
||||
"""
|
||||
dataset_id_str = str(dataset_id)
|
||||
dataset = DatasetService.get_dataset(dataset_id_str)
|
||||
|
||||
if dataset is None:
|
||||
raise NotFound("Dataset not found.")
|
||||
|
||||
# Check user's permission
|
||||
try:
|
||||
DatasetService.check_dataset_permission(dataset, current_user)
|
||||
except services.errors.account.NoPermissionError as e:
|
||||
raise Forbidden(str(e))
|
||||
|
||||
# Check dataset model setting
|
||||
DatasetService.check_dataset_model_setting(dataset)
|
||||
|
||||
# Get document IDs from request body
|
||||
data = request.get_json()
|
||||
document_ids = data.get("document_ids", [])
|
||||
|
||||
try:
|
||||
DocumentService.batch_update_document_status(dataset, document_ids, action, current_user)
|
||||
except services.errors.document.DocumentIndexingError as e:
|
||||
raise InvalidActionError(str(e))
|
||||
except ValueError as e:
|
||||
raise InvalidActionError(str(e))
|
||||
|
||||
return {"result": "success"}, 200
|
||||
|
||||
|
||||
class DatasetTagsApi(DatasetApiResource):
|
||||
@validate_dataset_token
|
||||
@marshal_with(tag_fields)
|
||||
@ -457,6 +507,7 @@ class DatasetTagsBindingStatusApi(DatasetApiResource):
|
||||
|
||||
api.add_resource(DatasetListApi, "/datasets")
|
||||
api.add_resource(DatasetApi, "/datasets/<uuid:dataset_id>")
|
||||
api.add_resource(DocumentStatusApi, "/datasets/<uuid:dataset_id>/documents/status/<string:action>")
|
||||
api.add_resource(DatasetTagsApi, "/datasets/tags")
|
||||
api.add_resource(DatasetTagBindingApi, "/datasets/tags/binding")
|
||||
api.add_resource(DatasetTagUnbindingApi, "/datasets/tags/unbinding")
|
||||
|
||||
@ -68,22 +68,17 @@ class MarkdownExtractor(BaseExtractor):
|
||||
continue
|
||||
header_match = re.match(r"^#+\s", line)
|
||||
if header_match:
|
||||
if current_header is not None:
|
||||
markdown_tups.append((current_header, current_text))
|
||||
|
||||
markdown_tups.append((current_header, current_text))
|
||||
current_header = line
|
||||
current_text = ""
|
||||
else:
|
||||
current_text += line + "\n"
|
||||
markdown_tups.append((current_header, current_text))
|
||||
|
||||
if current_header is not None:
|
||||
# pass linting, assert keys are defined
|
||||
markdown_tups = [
|
||||
(re.sub(r"#", "", cast(str, key)).strip(), re.sub(r"<.*?>", "", value)) for key, value in markdown_tups
|
||||
]
|
||||
else:
|
||||
markdown_tups = [(key, re.sub("\n", "", value)) for key, value in markdown_tups]
|
||||
markdown_tups = [
|
||||
(re.sub(r"#", "", cast(str, key)).strip() if key else None, re.sub(r"<.*?>", "", value))
|
||||
for key, value in markdown_tups
|
||||
]
|
||||
|
||||
return markdown_tups
|
||||
|
||||
|
||||
@ -8,5 +8,4 @@ EMPTY_VALUE_MAPPING = {
|
||||
SegmentType.ARRAY_STRING: [],
|
||||
SegmentType.ARRAY_NUMBER: [],
|
||||
SegmentType.ARRAY_OBJECT: [],
|
||||
SegmentType.ARRAY_FILE: [],
|
||||
}
|
||||
|
||||
@ -1,6 +1,5 @@
|
||||
from typing import Any
|
||||
|
||||
from core.file import File
|
||||
from core.variables import SegmentType
|
||||
|
||||
from .enums import Operation
|
||||
@ -86,8 +85,6 @@ def is_input_value_valid(*, variable_type: SegmentType, operation: Operation, va
|
||||
return isinstance(value, int | float)
|
||||
case SegmentType.ARRAY_OBJECT if operation == Operation.APPEND:
|
||||
return isinstance(value, dict)
|
||||
case SegmentType.ARRAY_FILE if operation == Operation.APPEND:
|
||||
return isinstance(value, File)
|
||||
|
||||
# Array & Extend / Overwrite
|
||||
case SegmentType.ARRAY_ANY if operation in {Operation.EXTEND, Operation.OVER_WRITE}:
|
||||
@ -98,8 +95,6 @@ def is_input_value_valid(*, variable_type: SegmentType, operation: Operation, va
|
||||
return isinstance(value, list) and all(isinstance(item, int | float) for item in value)
|
||||
case SegmentType.ARRAY_OBJECT if operation in {Operation.EXTEND, Operation.OVER_WRITE}:
|
||||
return isinstance(value, list) and all(isinstance(item, dict) for item in value)
|
||||
case SegmentType.ARRAY_FILE if operation in {Operation.EXTEND, Operation.OVER_WRITE}:
|
||||
return isinstance(value, list) and all(isinstance(item, File) for item in value)
|
||||
|
||||
case _:
|
||||
return False
|
||||
|
||||
@ -101,8 +101,6 @@ def _build_variable_from_mapping(*, mapping: Mapping[str, Any], selector: Sequen
|
||||
result = ArrayNumberVariable.model_validate(mapping)
|
||||
case SegmentType.ARRAY_OBJECT if isinstance(value, list):
|
||||
result = ArrayObjectVariable.model_validate(mapping)
|
||||
case SegmentType.ARRAY_FILE if isinstance(value, list):
|
||||
result = ArrayFileVariable.model_validate(mapping)
|
||||
case _:
|
||||
raise VariableError(f"not supported value type {value_type}")
|
||||
if result.size > dify_config.MAX_VARIABLE_SIZE:
|
||||
|
||||
@ -59,6 +59,7 @@ from services.external_knowledge_service import ExternalDatasetService
|
||||
from services.feature_service import FeatureModel, FeatureService
|
||||
from services.tag_service import TagService
|
||||
from services.vector_service import VectorService
|
||||
from tasks.add_document_to_index_task import add_document_to_index_task
|
||||
from tasks.batch_clean_document_task import batch_clean_document_task
|
||||
from tasks.clean_notion_document_task import clean_notion_document_task
|
||||
from tasks.deal_dataset_vector_index_task import deal_dataset_vector_index_task
|
||||
@ -70,6 +71,7 @@ from tasks.document_indexing_update_task import document_indexing_update_task
|
||||
from tasks.duplicate_document_indexing_task import duplicate_document_indexing_task
|
||||
from tasks.enable_segments_to_index_task import enable_segments_to_index_task
|
||||
from tasks.recover_document_indexing_task import recover_document_indexing_task
|
||||
from tasks.remove_document_from_index_task import remove_document_from_index_task
|
||||
from tasks.retry_document_indexing_task import retry_document_indexing_task
|
||||
from tasks.sync_website_document_indexing_task import sync_website_document_indexing_task
|
||||
|
||||
@ -434,7 +436,7 @@ class DatasetService:
|
||||
raise ValueError(ex.description)
|
||||
|
||||
filtered_data["updated_by"] = user.id
|
||||
filtered_data["updated_at"] = datetime.datetime.now()
|
||||
filtered_data["updated_at"] = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
|
||||
# update Retrieval model
|
||||
filtered_data["retrieval_model"] = data["retrieval_model"]
|
||||
@ -976,12 +978,17 @@ class DocumentService:
|
||||
process_rule = knowledge_config.process_rule
|
||||
if process_rule:
|
||||
if process_rule.mode in ("custom", "hierarchical"):
|
||||
dataset_process_rule = DatasetProcessRule(
|
||||
dataset_id=dataset.id,
|
||||
mode=process_rule.mode,
|
||||
rules=process_rule.rules.model_dump_json() if process_rule.rules else None,
|
||||
created_by=account.id,
|
||||
)
|
||||
if process_rule.rules:
|
||||
dataset_process_rule = DatasetProcessRule(
|
||||
dataset_id=dataset.id,
|
||||
mode=process_rule.mode,
|
||||
rules=process_rule.rules.model_dump_json() if process_rule.rules else None,
|
||||
created_by=account.id,
|
||||
)
|
||||
else:
|
||||
dataset_process_rule = dataset.latest_process_rule
|
||||
if not dataset_process_rule:
|
||||
raise ValueError("No process rule found.")
|
||||
elif process_rule.mode == "automatic":
|
||||
dataset_process_rule = DatasetProcessRule(
|
||||
dataset_id=dataset.id,
|
||||
@ -1603,6 +1610,191 @@ class DocumentService:
|
||||
if not isinstance(args["process_rule"]["rules"]["segmentation"]["max_tokens"], int):
|
||||
raise ValueError("Process rule segmentation max_tokens is invalid")
|
||||
|
||||
@staticmethod
|
||||
def batch_update_document_status(dataset: Dataset, document_ids: list[str], action: str, user):
|
||||
"""
|
||||
Batch update document status.
|
||||
|
||||
Args:
|
||||
dataset (Dataset): The dataset object
|
||||
document_ids (list[str]): List of document IDs to update
|
||||
action (str): Action to perform (enable, disable, archive, un_archive)
|
||||
user: Current user performing the action
|
||||
|
||||
Raises:
|
||||
DocumentIndexingError: If document is being indexed or not in correct state
|
||||
ValueError: If action is invalid
|
||||
"""
|
||||
if not document_ids:
|
||||
return
|
||||
|
||||
# Early validation of action parameter
|
||||
valid_actions = ["enable", "disable", "archive", "un_archive"]
|
||||
if action not in valid_actions:
|
||||
raise ValueError(f"Invalid action: {action}. Must be one of {valid_actions}")
|
||||
|
||||
documents_to_update = []
|
||||
|
||||
# First pass: validate all documents and prepare updates
|
||||
for document_id in document_ids:
|
||||
document = DocumentService.get_document(dataset.id, document_id)
|
||||
if not document:
|
||||
continue
|
||||
|
||||
# Check if document is being indexed
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
cache_result = redis_client.get(indexing_cache_key)
|
||||
if cache_result is not None:
|
||||
raise DocumentIndexingError(f"Document:{document.name} is being indexed, please try again later")
|
||||
|
||||
# Prepare update based on action
|
||||
update_info = DocumentService._prepare_document_status_update(document, action, user)
|
||||
if update_info:
|
||||
documents_to_update.append(update_info)
|
||||
|
||||
# Second pass: apply all updates in a single transaction
|
||||
if documents_to_update:
|
||||
try:
|
||||
for update_info in documents_to_update:
|
||||
document = update_info["document"]
|
||||
updates = update_info["updates"]
|
||||
|
||||
# Apply updates to the document
|
||||
for field, value in updates.items():
|
||||
setattr(document, field, value)
|
||||
|
||||
db.session.add(document)
|
||||
|
||||
# Batch commit all changes
|
||||
db.session.commit()
|
||||
except Exception as e:
|
||||
# Rollback on any error
|
||||
db.session.rollback()
|
||||
raise e
|
||||
# Execute async tasks and set Redis cache after successful commit
|
||||
# propagation_error is used to capture any errors for submitting async task execution
|
||||
propagation_error = None
|
||||
for update_info in documents_to_update:
|
||||
try:
|
||||
# Execute async tasks after successful commit
|
||||
if update_info["async_task"]:
|
||||
task_info = update_info["async_task"]
|
||||
task_func = task_info["function"]
|
||||
task_args = task_info["args"]
|
||||
task_func.delay(*task_args)
|
||||
except Exception as e:
|
||||
# Log the error but do not rollback the transaction
|
||||
logging.exception(f"Error executing async task for document {update_info['document'].id}")
|
||||
# don't raise the error immediately, but capture it for later
|
||||
propagation_error = e
|
||||
try:
|
||||
# Set Redis cache if needed after successful commit
|
||||
if update_info["set_cache"]:
|
||||
document = update_info["document"]
|
||||
indexing_cache_key = f"document_{document.id}_indexing"
|
||||
redis_client.setex(indexing_cache_key, 600, 1)
|
||||
except Exception as e:
|
||||
# Log the error but do not rollback the transaction
|
||||
logging.exception(f"Error setting cache for document {update_info['document'].id}")
|
||||
# Raise any propagation error after all updates
|
||||
if propagation_error:
|
||||
raise propagation_error
|
||||
|
||||
@staticmethod
|
||||
def _prepare_document_status_update(document, action: str, user):
|
||||
"""
|
||||
Prepare document status update information.
|
||||
|
||||
Args:
|
||||
document: Document object to update
|
||||
action: Action to perform
|
||||
user: Current user
|
||||
|
||||
Returns:
|
||||
dict: Update information or None if no update needed
|
||||
"""
|
||||
now = datetime.datetime.now(datetime.UTC).replace(tzinfo=None)
|
||||
|
||||
if action == "enable":
|
||||
return DocumentService._prepare_enable_update(document, now)
|
||||
elif action == "disable":
|
||||
return DocumentService._prepare_disable_update(document, user, now)
|
||||
elif action == "archive":
|
||||
return DocumentService._prepare_archive_update(document, user, now)
|
||||
elif action == "un_archive":
|
||||
return DocumentService._prepare_unarchive_update(document, now)
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _prepare_enable_update(document, now):
|
||||
"""Prepare updates for enabling a document."""
|
||||
if document.enabled:
|
||||
return None
|
||||
|
||||
return {
|
||||
"document": document,
|
||||
"updates": {"enabled": True, "disabled_at": None, "disabled_by": None, "updated_at": now},
|
||||
"async_task": {"function": add_document_to_index_task, "args": [document.id]},
|
||||
"set_cache": True,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _prepare_disable_update(document, user, now):
|
||||
"""Prepare updates for disabling a document."""
|
||||
if not document.completed_at or document.indexing_status != "completed":
|
||||
raise DocumentIndexingError(f"Document: {document.name} is not completed.")
|
||||
|
||||
if not document.enabled:
|
||||
return None
|
||||
|
||||
return {
|
||||
"document": document,
|
||||
"updates": {"enabled": False, "disabled_at": now, "disabled_by": user.id, "updated_at": now},
|
||||
"async_task": {"function": remove_document_from_index_task, "args": [document.id]},
|
||||
"set_cache": True,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _prepare_archive_update(document, user, now):
|
||||
"""Prepare updates for archiving a document."""
|
||||
if document.archived:
|
||||
return None
|
||||
|
||||
update_info = {
|
||||
"document": document,
|
||||
"updates": {"archived": True, "archived_at": now, "archived_by": user.id, "updated_at": now},
|
||||
"async_task": None,
|
||||
"set_cache": False,
|
||||
}
|
||||
|
||||
# Only set async task and cache if document is currently enabled
|
||||
if document.enabled:
|
||||
update_info["async_task"] = {"function": remove_document_from_index_task, "args": [document.id]}
|
||||
update_info["set_cache"] = True
|
||||
|
||||
return update_info
|
||||
|
||||
@staticmethod
|
||||
def _prepare_unarchive_update(document, now):
|
||||
"""Prepare updates for unarchiving a document."""
|
||||
if not document.archived:
|
||||
return None
|
||||
|
||||
update_info = {
|
||||
"document": document,
|
||||
"updates": {"archived": False, "archived_at": None, "archived_by": None, "updated_at": now},
|
||||
"async_task": None,
|
||||
"set_cache": False,
|
||||
}
|
||||
|
||||
# Only re-index if the document is currently enabled
|
||||
if document.enabled:
|
||||
update_info["async_task"] = {"function": add_document_to_index_task, "args": [document.id]}
|
||||
update_info["set_cache"] = True
|
||||
|
||||
return update_info
|
||||
|
||||
|
||||
class SegmentService:
|
||||
@classmethod
|
||||
|
||||
@ -22,7 +22,7 @@ class PluginDataMigration:
|
||||
cls.migrate_datasets()
|
||||
cls.migrate_db_records("embeddings", "provider_name", ModelProviderID) # large table
|
||||
cls.migrate_db_records("dataset_collection_bindings", "provider_name", ModelProviderID)
|
||||
cls.migrate_db_records("tool_builtin_providers", "provider_name", ToolProviderID)
|
||||
cls.migrate_db_records("tool_builtin_providers", "provider", ToolProviderID)
|
||||
|
||||
@classmethod
|
||||
def migrate_datasets(cls) -> None:
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
import os
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from flask import Flask
|
||||
@ -11,6 +12,24 @@ PROJECT_DIR = os.path.abspath(os.path.join(ABS_PATH, os.pardir, os.pardir))
|
||||
|
||||
CACHED_APP = Flask(__name__)
|
||||
|
||||
# set global mock for Redis client
|
||||
redis_mock = MagicMock()
|
||||
redis_mock.get = MagicMock(return_value=None)
|
||||
redis_mock.setex = MagicMock()
|
||||
redis_mock.setnx = MagicMock()
|
||||
redis_mock.delete = MagicMock()
|
||||
redis_mock.lock = MagicMock()
|
||||
redis_mock.exists = MagicMock(return_value=False)
|
||||
redis_mock.set = MagicMock()
|
||||
redis_mock.expire = MagicMock()
|
||||
redis_mock.hgetall = MagicMock(return_value={})
|
||||
redis_mock.hdel = MagicMock()
|
||||
redis_mock.incr = MagicMock(return_value=1)
|
||||
|
||||
# apply the mock to the Redis client in the Flask app
|
||||
redis_patcher = patch("extensions.ext_redis.redis_client", redis_mock)
|
||||
redis_patcher.start()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def app() -> Flask:
|
||||
@ -21,3 +40,19 @@ def app() -> Flask:
|
||||
def _provide_app_context(app: Flask):
|
||||
with app.app_context():
|
||||
yield
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def reset_redis_mock():
|
||||
"""reset the Redis mock before each test"""
|
||||
redis_mock.reset_mock()
|
||||
redis_mock.get.return_value = None
|
||||
redis_mock.setex.return_value = None
|
||||
redis_mock.setnx.return_value = None
|
||||
redis_mock.delete.return_value = None
|
||||
redis_mock.exists.return_value = False
|
||||
redis_mock.set.return_value = None
|
||||
redis_mock.expire.return_value = None
|
||||
redis_mock.hgetall.return_value = {}
|
||||
redis_mock.hdel.return_value = None
|
||||
redis_mock.incr.return_value = 1
|
||||
|
||||
@ -0,0 +1,22 @@
|
||||
from core.rag.extractor.markdown_extractor import MarkdownExtractor
|
||||
|
||||
|
||||
def test_markdown_to_tups():
|
||||
markdown = """
|
||||
this is some text without header
|
||||
|
||||
# title 1
|
||||
this is balabala text
|
||||
|
||||
## title 2
|
||||
this is more specific text.
|
||||
"""
|
||||
extractor = MarkdownExtractor(file_path="dummy_path")
|
||||
updated_output = extractor.markdown_to_tups(markdown)
|
||||
assert len(updated_output) == 3
|
||||
key, header_value = updated_output[0]
|
||||
assert key == None
|
||||
assert header_value.strip() == "this is some text without header"
|
||||
title_1, value = updated_output[1]
|
||||
assert title_1.strip() == "title 1"
|
||||
assert value.strip() == "this is balabala text"
|
||||
1238
api/tests/unit_tests/services/test_dataset_service.py
Normal file
1238
api/tests/unit_tests/services/test_dataset_service.py
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user