From 81079c2d174412559146a8c6508bf12e767df4db Mon Sep 17 00:00:00 2001
From: michael
Date: Tue, 30 Dec 2025 15:46:02 -0600
Subject: [PATCH] fix: handle race conditions and 404 errors in bulk job
deletion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
- Deduplicate job IDs to prevent processing same job twice
- Convert GCS blob iterator to list upfront to avoid stale generations
- Clear blob.generation before delete to handle concurrent deletions
- Catch NotFound errors gracefully for already-deleted blobs
- Don't re-raise GCS errors - cleanup failures shouldn't block deletion
- Treat already-deleted jobs as successful (idempotent delete)
- Disable action dropdown during bulk operations in UI
- Show spinner with "Please wait" message during deletion
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5
---
backend/app/api/v1/routes_jobs.py | 52 +++++++++++++++++++++------
frontend/src/routes/jobs/JobsList.tsx | 44 ++++++++++++++---------
2 files changed, 69 insertions(+), 27 deletions(-)
diff --git a/backend/app/api/v1/routes_jobs.py b/backend/app/api/v1/routes_jobs.py
index b2294fe..bf8e657 100644
--- a/backend/app/api/v1/routes_jobs.py
+++ b/backend/app/api/v1/routes_jobs.py
@@ -173,17 +173,24 @@ async def bulk_delete_jobs(
db: AsyncIOMotorDatabase = Depends(get_database),
):
"""Bulk delete jobs (production/admin only)"""
- job_ids = request.job_ids
- logger.info(f"Bulk deleting {len(job_ids)} jobs requested by {current_user.email}")
+ # Deduplicate job IDs to avoid processing the same job twice
+ unique_job_ids = list(dict.fromkeys(request.job_ids))
+ if len(unique_job_ids) != len(request.job_ids):
+ logger.warning(f"Removed {len(request.job_ids) - len(unique_job_ids)} duplicate job IDs from bulk delete request")
+
+ logger.info(f"Bulk deleting {len(unique_job_ids)} jobs requested by {current_user.email}")
deleted_count = 0
+ already_deleted = 0
errors = []
- for job_id in job_ids:
+ for job_id in unique_job_ids:
try:
job_doc = await db.jobs.find_one({"_id": job_id})
if not job_doc:
- errors.append(f"Job {job_id}: not found")
+ # Job may have been deleted by a concurrent request
+ already_deleted += 1
+ logger.debug(f"Job {job_id} not found (may have been deleted by concurrent request)")
continue
# Cancel task if exists
@@ -194,7 +201,7 @@ async def bulk_delete_jobs(
except Exception as e:
logger.warning(f"Could not cancel task {task_id} for job {job_id}: {e}")
- # Delete GCS assets
+ # Delete GCS assets (errors are logged but don't block deletion)
await _delete_job_gcs_assets(job_id, job_doc)
# Delete from database
@@ -203,15 +210,20 @@ async def bulk_delete_jobs(
deleted_count += 1
logger.info(f"Deleted job {job_id}")
else:
- errors.append(f"Job {job_id}: database deletion failed")
+ # Job was deleted between find_one and delete_one (race condition)
+ already_deleted += 1
+ logger.debug(f"Job {job_id} was already deleted")
except Exception as e:
errors.append(f"Job {job_id}: {str(e)}")
logger.error(f"Failed to delete job {job_id}: {e}")
+ # Consider already_deleted as successful (idempotent delete)
+ total_successful = deleted_count + already_deleted
+
return {
- "deleted_count": deleted_count,
- "total_requested": len(job_ids),
+ "deleted_count": total_successful,
+ "total_requested": len(unique_job_ids),
"errors": errors
}
@@ -1100,21 +1112,39 @@ async def _delete_job_gcs_assets(job_id: str, job_doc: dict):
- {lang}/accessible_video.mp4 (rendered video)
- {lang}/accessible_captions.vtt (re-timed captions for pause-insert)
"""
+ from google.api_core.exceptions import NotFound
+
try:
deleted_count = 0
- blobs = gcs_service.bucket.list_blobs(prefix=f"{job_id}/")
+ not_found_count = 0
+
+ # Convert lazy iterator to list upfront to avoid issues with
+ # concurrent deletions causing stale generation numbers
+ blobs = list(gcs_service.bucket.list_blobs(prefix=f"{job_id}/"))
+
for blob in blobs:
try:
+ # Clear generation to avoid 404 errors when another process
+ # already deleted the blob (generation mismatch)
+ blob.generation = None
blob.delete()
deleted_count += 1
logger.debug(f"Deleted GCS file: {blob.name}")
+ except NotFound:
+ # Blob was already deleted (likely by concurrent request)
+ not_found_count += 1
+ logger.debug(f"Blob already deleted: {blob.name}")
except Exception as e:
logger.warning(f"Could not delete {blob.name}: {e}")
- logger.info(f"Deleted {deleted_count} GCS files for job {job_id}")
+ if not_found_count > 0:
+ logger.info(f"Deleted {deleted_count} GCS files for job {job_id} ({not_found_count} already deleted)")
+ else:
+ logger.info(f"Deleted {deleted_count} GCS files for job {job_id}")
except Exception as e:
logger.error(f"Error deleting GCS assets for job {job_id}: {e}")
- raise
+ # Don't re-raise - GCS cleanup failures shouldn't block job deletion
+ # The job record will be deleted and files will eventually be cleaned up
@router.get("/{job_id}/validate", response_model=AssetValidationResponse)
diff --git a/frontend/src/routes/jobs/JobsList.tsx b/frontend/src/routes/jobs/JobsList.tsx
index 90d7dd2..1d9aa7d 100644
--- a/frontend/src/routes/jobs/JobsList.tsx
+++ b/frontend/src/routes/jobs/JobsList.tsx
@@ -649,7 +649,8 @@ export function JobsList() {
-
-
-
-
+ {bulkDeleteMutation.isPending ? (
+
+
+
+
+ Deleting {selectedJobs.size} job{selectedJobs.size !== 1 ? 's' : ''}...
+
+
+
Please wait, this may take a moment.
+
+ ) : (
+
+
+
+
+ )}