Fix stuck processing state: auto-fail stale jobs, improve active job detection
- Frontend: only treat parsing_documents/distilling as actively running; pending jobs older than 2 minutes are ignored as stale - Backend: add fail_stale_jobs() that marks pending/active jobs older than 5 minutes as failed before checking for active jobs in trigger_processing - Prevents UI from getting stuck on old jobs that never completed Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
0b7213af01
commit
42bf5ad003
3 changed files with 31 additions and 3 deletions
|
|
@ -238,6 +238,9 @@ async def trigger_processing(
|
|||
if not kb:
|
||||
raise HTTPException(status_code=404, detail="Knowledge base not found")
|
||||
|
||||
# Auto-fail stale pending jobs (older than 5 minutes) before checking
|
||||
await repo.fail_stale_jobs(kb_id)
|
||||
|
||||
# Check for active jobs
|
||||
has_active = await repo.has_active_job(kb_id)
|
||||
if has_active:
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import uuid
|
||||
from datetime import datetime, timezone
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from typing import Optional
|
||||
|
||||
from sqlalchemy import select, func
|
||||
|
|
@ -286,6 +286,23 @@ class KnowledgeBaseRepository:
|
|||
result = await self.session.execute(query)
|
||||
return result.scalar_one_or_none()
|
||||
|
||||
async def fail_stale_jobs(self, kb_id: uuid.UUID, stale_minutes: int = 5) -> None:
|
||||
"""Mark stale pending/active jobs as failed (older than stale_minutes)."""
|
||||
cutoff = datetime.now(timezone.utc) - timedelta(minutes=stale_minutes)
|
||||
active_statuses = ["pending", "parsing_documents", "distilling"]
|
||||
query = (
|
||||
select(ProcessingJob)
|
||||
.where(ProcessingJob.knowledge_base_id == kb_id)
|
||||
.where(ProcessingJob.status.in_(active_statuses))
|
||||
.where(ProcessingJob.created_at < cutoff)
|
||||
)
|
||||
result = await self.session.execute(query)
|
||||
for job in result.scalars().all():
|
||||
job.status = "failed"
|
||||
job.error_message = "Job timed out (stale)"
|
||||
job.completed_at = datetime.now(timezone.utc)
|
||||
await self.session.flush()
|
||||
|
||||
async def has_active_job(self, kb_id: uuid.UUID) -> bool:
|
||||
"""Check if there's an active (non-terminal) processing job for this KB."""
|
||||
active_statuses = ["pending", "parsing_documents", "distilling"]
|
||||
|
|
|
|||
|
|
@ -93,9 +93,17 @@ export const KnowledgeBase: React.FC = () => {
|
|||
setSelectedKb(detail);
|
||||
setVersions(vers);
|
||||
|
||||
// Check for active job
|
||||
if (detail.latest_job && ['pending', 'parsing_documents', 'distilling'].includes(detail.latest_job.status)) {
|
||||
// Check for active job - only treat as running if actually in progress
|
||||
// A "pending" job older than 2 minutes is considered stale/stuck
|
||||
if (detail.latest_job && ['parsing_documents', 'distilling'].includes(detail.latest_job.status)) {
|
||||
setActiveJob(detail.latest_job);
|
||||
} else if (detail.latest_job && detail.latest_job.status === 'pending' && detail.latest_job.started_at) {
|
||||
const ageMs = Date.now() - new Date(detail.latest_job.started_at).getTime();
|
||||
if (ageMs < 2 * 60 * 1000) {
|
||||
setActiveJob(detail.latest_job);
|
||||
} else {
|
||||
setActiveJob(null);
|
||||
}
|
||||
} else {
|
||||
setActiveJob(null);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue