feat: clear free plan workflow run logs (#29494)

Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>
Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>
Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>
This commit is contained in:
非法操作
2026-01-12 15:57:40 +08:00
committed by GitHub
parent 00698e41b7
commit 51ea87ab85
22 changed files with 1312 additions and 9 deletions

View File

@ -34,11 +34,14 @@ Example:
```
"""
from collections.abc import Sequence
from collections.abc import Callable, Sequence
from datetime import datetime
from typing import Protocol
from sqlalchemy.orm import Session
from core.workflow.entities.pause_reason import PauseReason
from core.workflow.enums import WorkflowType
from core.workflow.repositories.workflow_execution_repository import WorkflowExecutionRepository
from libs.infinite_scroll_pagination import InfiniteScrollPagination
from models.enums import WorkflowRunTriggeredFrom
@ -253,6 +256,44 @@ class APIWorkflowRunRepository(WorkflowExecutionRepository, Protocol):
"""
...
def get_runs_batch_by_time_range(
self,
start_from: datetime | None,
end_before: datetime,
last_seen: tuple[datetime, str] | None,
batch_size: int,
run_types: Sequence[WorkflowType] | None = None,
tenant_ids: Sequence[str] | None = None,
) -> Sequence[WorkflowRun]:
"""
Fetch ended workflow runs in a time window for archival and clean batching.
"""
...
def delete_runs_with_related(
self,
runs: Sequence[WorkflowRun],
delete_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
delete_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
) -> dict[str, int]:
"""
Delete workflow runs and their related records (node executions, offloads, app logs,
trigger logs, pauses, pause reasons).
"""
...
def count_runs_with_related(
self,
runs: Sequence[WorkflowRun],
count_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
count_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
) -> dict[str, int]:
"""
Count workflow runs and their related records (node executions, offloads, app logs,
trigger logs, pauses, pause reasons) without deleting data.
"""
...
def create_workflow_pause(
self,
workflow_run_id: str,

View File

@ -7,13 +7,18 @@ using SQLAlchemy 2.0 style queries for WorkflowNodeExecutionModel operations.
from collections.abc import Sequence
from datetime import datetime
from typing import cast
from typing import TypedDict, cast
from sqlalchemy import asc, delete, desc, select
from sqlalchemy import asc, delete, desc, func, select, tuple_
from sqlalchemy.engine import CursorResult
from sqlalchemy.orm import Session, sessionmaker
from models.workflow import WorkflowNodeExecutionModel
from models.enums import WorkflowRunTriggeredFrom
from models.workflow import (
WorkflowNodeExecutionModel,
WorkflowNodeExecutionOffload,
WorkflowNodeExecutionTriggeredFrom,
)
from repositories.api_workflow_node_execution_repository import DifyAPIWorkflowNodeExecutionRepository
@ -44,6 +49,26 @@ class DifyAPISQLAlchemyWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecut
"""
self._session_maker = session_maker
@staticmethod
def _map_run_triggered_from_to_node_triggered_from(triggered_from: str) -> str:
"""
Map workflow run triggered_from values to workflow node execution triggered_from values.
"""
if triggered_from in {
WorkflowRunTriggeredFrom.APP_RUN.value,
WorkflowRunTriggeredFrom.DEBUGGING.value,
WorkflowRunTriggeredFrom.SCHEDULE.value,
WorkflowRunTriggeredFrom.PLUGIN.value,
WorkflowRunTriggeredFrom.WEBHOOK.value,
}:
return WorkflowNodeExecutionTriggeredFrom.WORKFLOW_RUN.value
if triggered_from in {
WorkflowRunTriggeredFrom.RAG_PIPELINE_RUN.value,
WorkflowRunTriggeredFrom.RAG_PIPELINE_DEBUGGING.value,
}:
return WorkflowNodeExecutionTriggeredFrom.RAG_PIPELINE_RUN.value
return ""
def get_node_last_execution(
self,
tenant_id: str,
@ -290,3 +315,119 @@ class DifyAPISQLAlchemyWorkflowNodeExecutionRepository(DifyAPIWorkflowNodeExecut
result = cast(CursorResult, session.execute(stmt))
session.commit()
return result.rowcount
class RunContext(TypedDict):
run_id: str
tenant_id: str
app_id: str
workflow_id: str
triggered_from: str
@staticmethod
def delete_by_runs(session: Session, runs: Sequence[RunContext]) -> tuple[int, int]:
"""
Delete node executions (and offloads) for the given workflow runs using indexed columns.
Uses the composite index on (tenant_id, app_id, workflow_id, triggered_from, workflow_run_id)
by filtering on those columns with tuple IN.
"""
if not runs:
return 0, 0
tuple_values = [
(
run["tenant_id"],
run["app_id"],
run["workflow_id"],
DifyAPISQLAlchemyWorkflowNodeExecutionRepository._map_run_triggered_from_to_node_triggered_from(
run["triggered_from"]
),
run["run_id"],
)
for run in runs
]
node_execution_ids = session.scalars(
select(WorkflowNodeExecutionModel.id).where(
tuple_(
WorkflowNodeExecutionModel.tenant_id,
WorkflowNodeExecutionModel.app_id,
WorkflowNodeExecutionModel.workflow_id,
WorkflowNodeExecutionModel.triggered_from,
WorkflowNodeExecutionModel.workflow_run_id,
).in_(tuple_values)
)
).all()
if not node_execution_ids:
return 0, 0
offloads_deleted = (
cast(
CursorResult,
session.execute(
delete(WorkflowNodeExecutionOffload).where(
WorkflowNodeExecutionOffload.node_execution_id.in_(node_execution_ids)
)
),
).rowcount
or 0
)
node_executions_deleted = (
cast(
CursorResult,
session.execute(
delete(WorkflowNodeExecutionModel).where(WorkflowNodeExecutionModel.id.in_(node_execution_ids))
),
).rowcount
or 0
)
return node_executions_deleted, offloads_deleted
@staticmethod
def count_by_runs(session: Session, runs: Sequence[RunContext]) -> tuple[int, int]:
"""
Count node executions (and offloads) for the given workflow runs using indexed columns.
"""
if not runs:
return 0, 0
tuple_values = [
(
run["tenant_id"],
run["app_id"],
run["workflow_id"],
DifyAPISQLAlchemyWorkflowNodeExecutionRepository._map_run_triggered_from_to_node_triggered_from(
run["triggered_from"]
),
run["run_id"],
)
for run in runs
]
tuple_filter = tuple_(
WorkflowNodeExecutionModel.tenant_id,
WorkflowNodeExecutionModel.app_id,
WorkflowNodeExecutionModel.workflow_id,
WorkflowNodeExecutionModel.triggered_from,
WorkflowNodeExecutionModel.workflow_run_id,
).in_(tuple_values)
node_executions_count = (
session.scalar(select(func.count()).select_from(WorkflowNodeExecutionModel).where(tuple_filter)) or 0
)
offloads_count = (
session.scalar(
select(func.count())
.select_from(WorkflowNodeExecutionOffload)
.join(
WorkflowNodeExecutionModel,
WorkflowNodeExecutionOffload.node_execution_id == WorkflowNodeExecutionModel.id,
)
.where(tuple_filter)
)
or 0
)
return int(node_executions_count), int(offloads_count)

View File

@ -21,7 +21,7 @@ Implementation Notes:
import logging
import uuid
from collections.abc import Sequence
from collections.abc import Callable, Sequence
from datetime import datetime
from decimal import Decimal
from typing import Any, cast
@ -32,7 +32,7 @@ from sqlalchemy.engine import CursorResult
from sqlalchemy.orm import Session, selectinload, sessionmaker
from core.workflow.entities.pause_reason import HumanInputRequired, PauseReason, SchedulingPause
from core.workflow.enums import WorkflowExecutionStatus
from core.workflow.enums import WorkflowExecutionStatus, WorkflowType
from extensions.ext_storage import storage
from libs.datetime_utils import naive_utc_now
from libs.helper import convert_datetime_to_date
@ -40,8 +40,14 @@ from libs.infinite_scroll_pagination import InfiniteScrollPagination
from libs.time_parser import get_time_threshold
from libs.uuid_utils import uuidv7
from models.enums import WorkflowRunTriggeredFrom
from models.workflow import WorkflowPause as WorkflowPauseModel
from models.workflow import WorkflowPauseReason, WorkflowRun
from models.workflow import (
WorkflowAppLog,
WorkflowPauseReason,
WorkflowRun,
)
from models.workflow import (
WorkflowPause as WorkflowPauseModel,
)
from repositories.api_workflow_run_repository import APIWorkflowRunRepository
from repositories.entities.workflow_pause import WorkflowPauseEntity
from repositories.types import (
@ -314,6 +320,171 @@ class DifyAPISQLAlchemyWorkflowRunRepository(APIWorkflowRunRepository):
logger.info("Total deleted %s workflow runs for app %s", total_deleted, app_id)
return total_deleted
def get_runs_batch_by_time_range(
self,
start_from: datetime | None,
end_before: datetime,
last_seen: tuple[datetime, str] | None,
batch_size: int,
run_types: Sequence[WorkflowType] | None = None,
tenant_ids: Sequence[str] | None = None,
) -> Sequence[WorkflowRun]:
"""
Fetch ended workflow runs in a time window for archival and clean batching.
Query scope:
- created_at in [start_from, end_before)
- type in run_types (when provided)
- status is an ended state
- optional tenant_id filter and cursor (last_seen) for pagination
"""
with self._session_maker() as session:
stmt = (
select(WorkflowRun)
.where(
WorkflowRun.created_at < end_before,
WorkflowRun.status.in_(WorkflowExecutionStatus.ended_values()),
)
.order_by(WorkflowRun.created_at.asc(), WorkflowRun.id.asc())
.limit(batch_size)
)
if run_types is not None:
if not run_types:
return []
stmt = stmt.where(WorkflowRun.type.in_(run_types))
if start_from:
stmt = stmt.where(WorkflowRun.created_at >= start_from)
if tenant_ids:
stmt = stmt.where(WorkflowRun.tenant_id.in_(tenant_ids))
if last_seen:
stmt = stmt.where(
or_(
WorkflowRun.created_at > last_seen[0],
and_(WorkflowRun.created_at == last_seen[0], WorkflowRun.id > last_seen[1]),
)
)
return session.scalars(stmt).all()
def delete_runs_with_related(
self,
runs: Sequence[WorkflowRun],
delete_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
delete_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
) -> dict[str, int]:
if not runs:
return {
"runs": 0,
"node_executions": 0,
"offloads": 0,
"app_logs": 0,
"trigger_logs": 0,
"pauses": 0,
"pause_reasons": 0,
}
with self._session_maker() as session:
run_ids = [run.id for run in runs]
if delete_node_executions:
node_executions_deleted, offloads_deleted = delete_node_executions(session, runs)
else:
node_executions_deleted, offloads_deleted = 0, 0
app_logs_result = session.execute(delete(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(run_ids)))
app_logs_deleted = cast(CursorResult, app_logs_result).rowcount or 0
pause_ids = session.scalars(
select(WorkflowPauseModel.id).where(WorkflowPauseModel.workflow_run_id.in_(run_ids))
).all()
pause_reasons_deleted = 0
pauses_deleted = 0
if pause_ids:
pause_reasons_result = session.execute(
delete(WorkflowPauseReason).where(WorkflowPauseReason.pause_id.in_(pause_ids))
)
pause_reasons_deleted = cast(CursorResult, pause_reasons_result).rowcount or 0
pauses_result = session.execute(delete(WorkflowPauseModel).where(WorkflowPauseModel.id.in_(pause_ids)))
pauses_deleted = cast(CursorResult, pauses_result).rowcount or 0
trigger_logs_deleted = delete_trigger_logs(session, run_ids) if delete_trigger_logs else 0
runs_result = session.execute(delete(WorkflowRun).where(WorkflowRun.id.in_(run_ids)))
runs_deleted = cast(CursorResult, runs_result).rowcount or 0
session.commit()
return {
"runs": runs_deleted,
"node_executions": node_executions_deleted,
"offloads": offloads_deleted,
"app_logs": app_logs_deleted,
"trigger_logs": trigger_logs_deleted,
"pauses": pauses_deleted,
"pause_reasons": pause_reasons_deleted,
}
def count_runs_with_related(
self,
runs: Sequence[WorkflowRun],
count_node_executions: Callable[[Session, Sequence[WorkflowRun]], tuple[int, int]] | None = None,
count_trigger_logs: Callable[[Session, Sequence[str]], int] | None = None,
) -> dict[str, int]:
if not runs:
return {
"runs": 0,
"node_executions": 0,
"offloads": 0,
"app_logs": 0,
"trigger_logs": 0,
"pauses": 0,
"pause_reasons": 0,
}
with self._session_maker() as session:
run_ids = [run.id for run in runs]
if count_node_executions:
node_executions_count, offloads_count = count_node_executions(session, runs)
else:
node_executions_count, offloads_count = 0, 0
app_logs_count = (
session.scalar(
select(func.count()).select_from(WorkflowAppLog).where(WorkflowAppLog.workflow_run_id.in_(run_ids))
)
or 0
)
pause_ids = session.scalars(
select(WorkflowPauseModel.id).where(WorkflowPauseModel.workflow_run_id.in_(run_ids))
).all()
pauses_count = len(pause_ids)
pause_reasons_count = 0
if pause_ids:
pause_reasons_count = (
session.scalar(
select(func.count())
.select_from(WorkflowPauseReason)
.where(WorkflowPauseReason.pause_id.in_(pause_ids))
)
or 0
)
trigger_logs_count = count_trigger_logs(session, run_ids) if count_trigger_logs else 0
return {
"runs": len(runs),
"node_executions": node_executions_count,
"offloads": offloads_count,
"app_logs": int(app_logs_count),
"trigger_logs": trigger_logs_count,
"pauses": pauses_count,
"pause_reasons": int(pause_reasons_count),
}
def create_workflow_pause(
self,
workflow_run_id: str,

View File

@ -4,8 +4,10 @@ SQLAlchemy implementation of WorkflowTriggerLogRepository.
from collections.abc import Sequence
from datetime import UTC, datetime, timedelta
from typing import cast
from sqlalchemy import and_, select
from sqlalchemy import and_, delete, func, select
from sqlalchemy.engine import CursorResult
from sqlalchemy.orm import Session
from models.enums import WorkflowTriggerStatus
@ -84,3 +86,37 @@ class SQLAlchemyWorkflowTriggerLogRepository(WorkflowTriggerLogRepository):
)
return list(self.session.scalars(query).all())
def delete_by_run_ids(self, run_ids: Sequence[str]) -> int:
"""
Delete trigger logs associated with the given workflow run ids.
Args:
run_ids: Collection of workflow run identifiers.
Returns:
Number of rows deleted.
"""
if not run_ids:
return 0
result = self.session.execute(delete(WorkflowTriggerLog).where(WorkflowTriggerLog.workflow_run_id.in_(run_ids)))
return cast(CursorResult, result).rowcount or 0
def count_by_run_ids(self, run_ids: Sequence[str]) -> int:
"""
Count trigger logs associated with the given workflow run ids.
Args:
run_ids: Collection of workflow run identifiers.
Returns:
Number of rows matched.
"""
if not run_ids:
return 0
count = self.session.scalar(
select(func.count()).select_from(WorkflowTriggerLog).where(WorkflowTriggerLog.workflow_run_id.in_(run_ids))
)
return int(count or 0)

View File

@ -109,3 +109,15 @@ class WorkflowTriggerLogRepository(Protocol):
A sequence of recent WorkflowTriggerLog instances
"""
...
def delete_by_run_ids(self, run_ids: Sequence[str]) -> int:
"""
Delete trigger logs for workflow run IDs.
Args:
run_ids: Workflow run IDs to delete
Returns:
Number of rows deleted
"""
...