refactor: simplify GCS job deletion to use prefix-based cleanup

Replace 3-stage redundant deletion with single prefix-based approach.
All job files are under {job_id}/ prefix, so listing and deleting by
prefix is simpler and catches all files including new types like
accessible_video.mp4 and ad_cues/*.mp3.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
michael 2025-12-28 10:07:43 -06:00
parent e25a0d6ad0
commit 3df163fd13

View file

@ -1089,49 +1089,29 @@ async def delete_job(
async def _delete_job_gcs_assets(job_id: str, job_doc: dict):
"""Delete all GCS assets for a job"""
"""Delete all GCS assets for a job using prefix-based cleanup.
All job files are stored under the {job_id}/ prefix in GCS, including:
- source.mp4 (original video)
- {lang}/captions.vtt (translated captions)
- {lang}/ad.vtt (audio description cues)
- {lang}/ad.mp3 (full AD audio)
- {lang}/ad_cues/cue_*.mp3 (per-cue TTS segments)
- {lang}/accessible_video.mp4 (rendered video)
- {lang}/accessible_captions.vtt (re-timed captions for pause-insert)
"""
try:
# Delete source file
source_gcs_uri = job_doc.get("source", {}).get("gcs_uri")
if source_gcs_uri:
blob_path = source_gcs_uri.replace(f"gs://{settings.gcs_bucket}/", "")
deleted_count = 0
blobs = gcs_service.bucket.list_blobs(prefix=f"{job_id}/")
for blob in blobs:
try:
blob = gcs_service.bucket.blob(blob_path)
blob.delete()
logger.info(f"Deleted source file: {blob_path}")
deleted_count += 1
logger.debug(f"Deleted GCS file: {blob.name}")
except Exception as e:
logger.warning(f"Could not delete source file {blob_path}: {e}")
# Delete output files
outputs = job_doc.get("outputs", {})
for lang, lang_outputs in outputs.items():
if not isinstance(lang_outputs, dict):
continue
# Delete VTT files
for key in ["captions_vtt_gcs", "ad_vtt_gcs", "ad_mp3_gcs"]:
if key in lang_outputs:
gcs_uri = lang_outputs[key]
blob_path = gcs_uri.replace(f"gs://{settings.gcs_bucket}/", "")
try:
blob = gcs_service.bucket.blob(blob_path)
blob.delete()
logger.info(f"Deleted output file: {blob_path}")
except Exception as e:
logger.warning(f"Could not delete output file {blob_path}: {e}")
# Delete entire job folder if it exists
try:
blobs = gcs_service.bucket.list_blobs(prefix=f"{job_id}/")
for blob in blobs:
try:
blob.delete()
logger.info(f"Deleted remaining file: {blob.name}")
except Exception as e:
logger.warning(f"Could not delete {blob.name}: {e}")
except Exception as e:
logger.warning(f"Could not list/delete job folder {job_id}/: {e}")
logger.warning(f"Could not delete {blob.name}: {e}")
logger.info(f"Deleted {deleted_count} GCS files for job {job_id}")
except Exception as e:
logger.error(f"Error deleting GCS assets for job {job_id}: {e}")
raise