Add periodic cleanup cron job for temp data and stale processes (#4781)

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
LawyZheng
2026-02-18 23:10:00 +08:00
committed by GitHub
parent 4db25ec04f
commit f6e79781c1
4 changed files with 401 additions and 0 deletions

View File

@@ -968,6 +968,99 @@ class AgentDB(BaseAlchemyDB):
LOG.error("UnexpectedError", exc_info=True)
raise
async def get_running_tasks_info_globally(
self,
stale_threshold_hours: int = 24,
) -> tuple[int, int]:
"""
Get information about running tasks across all organizations.
Used by cleanup service to determine if cleanup should be skipped.
Args:
stale_threshold_hours: Tasks not updated for this many hours are considered stale.
Returns:
Tuple of (active_task_count, stale_task_count).
Active tasks are those updated within the threshold.
Stale tasks are those not updated within the threshold but still in running status.
"""
try:
async with self.Session() as session:
running_statuses = [TaskStatus.created, TaskStatus.queued, TaskStatus.running]
stale_cutoff = datetime.utcnow() - timedelta(hours=stale_threshold_hours)
# Count active tasks (recently updated)
active_query = (
select(func.count())
.select_from(TaskModel)
.filter(TaskModel.status.in_(running_statuses))
.filter(TaskModel.modified_at >= stale_cutoff)
)
active_count = (await session.execute(active_query)).scalar_one()
# Count stale tasks (not updated for a long time)
stale_query = (
select(func.count())
.select_from(TaskModel)
.filter(TaskModel.status.in_(running_statuses))
.filter(TaskModel.modified_at < stale_cutoff)
)
stale_count = (await session.execute(stale_query)).scalar_one()
return (active_count, stale_count)
except SQLAlchemyError:
LOG.error("SQLAlchemyError in get_running_tasks_info_globally", exc_info=True)
raise
async def get_running_workflow_runs_info_globally(
self,
stale_threshold_hours: int = 24,
) -> tuple[int, int]:
"""
Get information about running workflow runs across all organizations.
Used by cleanup service to determine if cleanup should be skipped.
Args:
stale_threshold_hours: Workflow runs not updated for this many hours are considered stale.
Returns:
Tuple of (active_workflow_count, stale_workflow_count).
Active workflows are those updated within the threshold.
Stale workflows are those not updated within the threshold but still in running status.
"""
try:
async with self.Session() as session:
running_statuses = [
WorkflowRunStatus.created,
WorkflowRunStatus.queued,
WorkflowRunStatus.running,
WorkflowRunStatus.paused,
]
stale_cutoff = datetime.utcnow() - timedelta(hours=stale_threshold_hours)
# Count active workflow runs (recently updated)
active_query = (
select(func.count())
.select_from(WorkflowRunModel)
.filter(WorkflowRunModel.status.in_(running_statuses))
.filter(WorkflowRunModel.modified_at >= stale_cutoff)
)
active_count = (await session.execute(active_query)).scalar_one()
# Count stale workflow runs (not updated for a long time)
stale_query = (
select(func.count())
.select_from(WorkflowRunModel)
.filter(WorkflowRunModel.status.in_(running_statuses))
.filter(WorkflowRunModel.modified_at < stale_cutoff)
)
stale_count = (await session.execute(stale_query)).scalar_one()
return (active_count, stale_count)
except SQLAlchemyError:
LOG.error("SQLAlchemyError in get_running_workflow_runs_info_globally", exc_info=True)
raise
async def get_all_organizations(self) -> list[Organization]:
try:
async with self.Session() as session: