fix: handle race conditions and 404 errors in bulk job deletion

- Deduplicate job IDs to prevent processing same job twice
- Convert GCS blob iterator to list upfront to avoid stale generations
- Clear blob.generation before delete to handle concurrent deletions
- Catch NotFound errors gracefully for already-deleted blobs
- Don't re-raise GCS errors - cleanup failures shouldn't block deletion
- Treat already-deleted jobs as successful (idempotent delete)
- Disable action dropdown during bulk operations in UI
- Show spinner with "Please wait" message during deletion

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
michael 2025-12-30 15:46:02 -06:00
parent e8b940aee8
commit 81079c2d17
2 changed files with 69 additions and 27 deletions

View file

@ -173,17 +173,24 @@ async def bulk_delete_jobs(
db: AsyncIOMotorDatabase = Depends(get_database),
):
"""Bulk delete jobs (production/admin only)"""
job_ids = request.job_ids
logger.info(f"Bulk deleting {len(job_ids)} jobs requested by {current_user.email}")
# Deduplicate job IDs to avoid processing the same job twice
unique_job_ids = list(dict.fromkeys(request.job_ids))
if len(unique_job_ids) != len(request.job_ids):
logger.warning(f"Removed {len(request.job_ids) - len(unique_job_ids)} duplicate job IDs from bulk delete request")
logger.info(f"Bulk deleting {len(unique_job_ids)} jobs requested by {current_user.email}")
deleted_count = 0
already_deleted = 0
errors = []
for job_id in job_ids:
for job_id in unique_job_ids:
try:
job_doc = await db.jobs.find_one({"_id": job_id})
if not job_doc:
errors.append(f"Job {job_id}: not found")
# Job may have been deleted by a concurrent request
already_deleted += 1
logger.debug(f"Job {job_id} not found (may have been deleted by concurrent request)")
continue
# Cancel task if exists
@ -194,7 +201,7 @@ async def bulk_delete_jobs(
except Exception as e:
logger.warning(f"Could not cancel task {task_id} for job {job_id}: {e}")
# Delete GCS assets
# Delete GCS assets (errors are logged but don't block deletion)
await _delete_job_gcs_assets(job_id, job_doc)
# Delete from database
@ -203,15 +210,20 @@ async def bulk_delete_jobs(
deleted_count += 1
logger.info(f"Deleted job {job_id}")
else:
errors.append(f"Job {job_id}: database deletion failed")
# Job was deleted between find_one and delete_one (race condition)
already_deleted += 1
logger.debug(f"Job {job_id} was already deleted")
except Exception as e:
errors.append(f"Job {job_id}: {str(e)}")
logger.error(f"Failed to delete job {job_id}: {e}")
# Consider already_deleted as successful (idempotent delete)
total_successful = deleted_count + already_deleted
return {
"deleted_count": deleted_count,
"total_requested": len(job_ids),
"deleted_count": total_successful,
"total_requested": len(unique_job_ids),
"errors": errors
}
@ -1100,21 +1112,39 @@ async def _delete_job_gcs_assets(job_id: str, job_doc: dict):
- {lang}/accessible_video.mp4 (rendered video)
- {lang}/accessible_captions.vtt (re-timed captions for pause-insert)
"""
from google.api_core.exceptions import NotFound
try:
deleted_count = 0
blobs = gcs_service.bucket.list_blobs(prefix=f"{job_id}/")
not_found_count = 0
# Convert lazy iterator to list upfront to avoid issues with
# concurrent deletions causing stale generation numbers
blobs = list(gcs_service.bucket.list_blobs(prefix=f"{job_id}/"))
for blob in blobs:
try:
# Clear generation to avoid 404 errors when another process
# already deleted the blob (generation mismatch)
blob.generation = None
blob.delete()
deleted_count += 1
logger.debug(f"Deleted GCS file: {blob.name}")
except NotFound:
# Blob was already deleted (likely by concurrent request)
not_found_count += 1
logger.debug(f"Blob already deleted: {blob.name}")
except Exception as e:
logger.warning(f"Could not delete {blob.name}: {e}")
logger.info(f"Deleted {deleted_count} GCS files for job {job_id}")
if not_found_count > 0:
logger.info(f"Deleted {deleted_count} GCS files for job {job_id} ({not_found_count} already deleted)")
else:
logger.info(f"Deleted {deleted_count} GCS files for job {job_id}")
except Exception as e:
logger.error(f"Error deleting GCS assets for job {job_id}: {e}")
raise
# Don't re-raise - GCS cleanup failures shouldn't block job deletion
# The job record will be deleted and files will eventually be cleaned up
@router.get("/{job_id}/validate", response_model=AssetValidationResponse)

View file

@ -649,7 +649,8 @@ export function JobsList() {
<select
value={bulkAction}
onChange={(e) => setBulkAction(e.target.value as 'delete' | 'reprocess' | 'download' | '')}
className="text-sm border border-gray-300 rounded px-2 py-1"
disabled={bulkDeleteMutation.isPending || reprocessMutation.isPending}
className="text-sm border border-gray-300 rounded px-2 py-1 disabled:opacity-50 disabled:cursor-not-allowed"
>
<option value="">Choose action...</option>
<option value="delete">Delete Selected</option>
@ -869,21 +870,32 @@ export function JobsList() {
This action cannot be undone.
</p>
</div>
<div className="items-center px-4 py-3 flex flex-col space-y-2 sm:flex-row sm:space-y-0 sm:space-x-3">
<button
onClick={() => setShowDeleteConfirm(false)}
className="px-4 py-2 bg-gray-500 text-white text-base font-medium rounded-md w-full shadow-sm hover:bg-gray-600 focus:outline-none focus:ring-2 focus:ring-gray-300 sm:w-auto"
>
Cancel
</button>
<button
onClick={handleDeleteConfirm}
disabled={bulkDeleteMutation.isPending}
className="px-4 py-2 bg-red-600 text-white text-base font-medium rounded-md w-full shadow-sm hover:bg-red-700 focus:outline-none focus:ring-2 focus:ring-red-300 disabled:opacity-50 sm:w-auto"
>
{bulkDeleteMutation.isPending ? 'Deleting...' : 'Delete'}
</button>
</div>
{bulkDeleteMutation.isPending ? (
<div className="px-4 py-3 flex flex-col items-center space-y-2">
<div className="flex items-center gap-3">
<div className="animate-spin h-5 w-5 border-2 border-red-600 border-t-transparent rounded-full" />
<span className="text-sm text-gray-700">
Deleting {selectedJobs.size} job{selectedJobs.size !== 1 ? 's' : ''}...
</span>
</div>
<p className="text-xs text-gray-500">Please wait, this may take a moment.</p>
</div>
) : (
<div className="items-center px-4 py-3 flex flex-col space-y-2 sm:flex-row sm:space-y-0 sm:space-x-3">
<button
onClick={() => setShowDeleteConfirm(false)}
className="px-4 py-2 bg-gray-500 text-white text-base font-medium rounded-md w-full shadow-sm hover:bg-gray-600 focus:outline-none focus:ring-2 focus:ring-gray-300 sm:w-auto"
>
Cancel
</button>
<button
onClick={handleDeleteConfirm}
className="px-4 py-2 bg-red-600 text-white text-base font-medium rounded-md w-full shadow-sm hover:bg-red-700 focus:outline-none focus:ring-2 focus:ring-red-300 sm:w-auto"
>
Delete
</button>
</div>
)}
</div>
</div>
</div>