Fix stuck processing state: auto-fail stale jobs, improve active job detection

- Frontend: only treat parsing_documents/distilling as actively running;
  pending jobs older than 2 minutes are ignored as stale
- Backend: add fail_stale_jobs() that marks pending/active jobs older than
  5 minutes as failed before checking for active jobs in trigger_processing
- Prevents UI from getting stuck on old jobs that never completed

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
michael 2026-02-12 16:03:25 -06:00
parent 0b7213af01
commit 42bf5ad003
3 changed files with 31 additions and 3 deletions

View file

@ -238,6 +238,9 @@ async def trigger_processing(
if not kb:
raise HTTPException(status_code=404, detail="Knowledge base not found")
# Auto-fail stale pending jobs (older than 5 minutes) before checking
await repo.fail_stale_jobs(kb_id)
# Check for active jobs
has_active = await repo.has_active_job(kb_id)
if has_active:

View file

@ -1,5 +1,5 @@
import uuid
from datetime import datetime, timezone
from datetime import datetime, timedelta, timezone
from typing import Optional
from sqlalchemy import select, func
@ -286,6 +286,23 @@ class KnowledgeBaseRepository:
result = await self.session.execute(query)
return result.scalar_one_or_none()
async def fail_stale_jobs(self, kb_id: uuid.UUID, stale_minutes: int = 5) -> None:
"""Mark stale pending/active jobs as failed (older than stale_minutes)."""
cutoff = datetime.now(timezone.utc) - timedelta(minutes=stale_minutes)
active_statuses = ["pending", "parsing_documents", "distilling"]
query = (
select(ProcessingJob)
.where(ProcessingJob.knowledge_base_id == kb_id)
.where(ProcessingJob.status.in_(active_statuses))
.where(ProcessingJob.created_at < cutoff)
)
result = await self.session.execute(query)
for job in result.scalars().all():
job.status = "failed"
job.error_message = "Job timed out (stale)"
job.completed_at = datetime.now(timezone.utc)
await self.session.flush()
async def has_active_job(self, kb_id: uuid.UUID) -> bool:
"""Check if there's an active (non-terminal) processing job for this KB."""
active_statuses = ["pending", "parsing_documents", "distilling"]

View file

@ -93,9 +93,17 @@ export const KnowledgeBase: React.FC = () => {
setSelectedKb(detail);
setVersions(vers);
// Check for active job
if (detail.latest_job && ['pending', 'parsing_documents', 'distilling'].includes(detail.latest_job.status)) {
// Check for active job - only treat as running if actually in progress
// A "pending" job older than 2 minutes is considered stale/stuck
if (detail.latest_job && ['parsing_documents', 'distilling'].includes(detail.latest_job.status)) {
setActiveJob(detail.latest_job);
} else if (detail.latest_job && detail.latest_job.status === 'pending' && detail.latest_job.started_at) {
const ageMs = Date.now() - new Date(detail.latest_job.started_at).getTime();
if (ageMs < 2 * 60 * 1000) {
setActiveJob(detail.latest_job);
} else {
setActiveJob(null);
}
} else {
setActiveJob(null);
}