refactor: simplify GCS job deletion to use prefix-based cleanup
Replace 3-stage redundant deletion with single prefix-based approach.
All job files are under {job_id}/ prefix, so listing and deleting by
prefix is simpler and catches all files including new types like
accessible_video.mp4 and ad_cues/*.mp3.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
e25a0d6ad0
commit
3df163fd13
1 changed files with 18 additions and 38 deletions
|
|
@ -1089,49 +1089,29 @@ async def delete_job(
|
|||
|
||||
|
||||
async def _delete_job_gcs_assets(job_id: str, job_doc: dict):
|
||||
"""Delete all GCS assets for a job"""
|
||||
"""Delete all GCS assets for a job using prefix-based cleanup.
|
||||
|
||||
All job files are stored under the {job_id}/ prefix in GCS, including:
|
||||
- source.mp4 (original video)
|
||||
- {lang}/captions.vtt (translated captions)
|
||||
- {lang}/ad.vtt (audio description cues)
|
||||
- {lang}/ad.mp3 (full AD audio)
|
||||
- {lang}/ad_cues/cue_*.mp3 (per-cue TTS segments)
|
||||
- {lang}/accessible_video.mp4 (rendered video)
|
||||
- {lang}/accessible_captions.vtt (re-timed captions for pause-insert)
|
||||
"""
|
||||
try:
|
||||
# Delete source file
|
||||
source_gcs_uri = job_doc.get("source", {}).get("gcs_uri")
|
||||
if source_gcs_uri:
|
||||
blob_path = source_gcs_uri.replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
deleted_count = 0
|
||||
blobs = gcs_service.bucket.list_blobs(prefix=f"{job_id}/")
|
||||
for blob in blobs:
|
||||
try:
|
||||
blob = gcs_service.bucket.blob(blob_path)
|
||||
blob.delete()
|
||||
logger.info(f"Deleted source file: {blob_path}")
|
||||
deleted_count += 1
|
||||
logger.debug(f"Deleted GCS file: {blob.name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not delete source file {blob_path}: {e}")
|
||||
|
||||
# Delete output files
|
||||
outputs = job_doc.get("outputs", {})
|
||||
for lang, lang_outputs in outputs.items():
|
||||
if not isinstance(lang_outputs, dict):
|
||||
continue
|
||||
|
||||
# Delete VTT files
|
||||
for key in ["captions_vtt_gcs", "ad_vtt_gcs", "ad_mp3_gcs"]:
|
||||
if key in lang_outputs:
|
||||
gcs_uri = lang_outputs[key]
|
||||
blob_path = gcs_uri.replace(f"gs://{settings.gcs_bucket}/", "")
|
||||
try:
|
||||
blob = gcs_service.bucket.blob(blob_path)
|
||||
blob.delete()
|
||||
logger.info(f"Deleted output file: {blob_path}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not delete output file {blob_path}: {e}")
|
||||
|
||||
# Delete entire job folder if it exists
|
||||
try:
|
||||
blobs = gcs_service.bucket.list_blobs(prefix=f"{job_id}/")
|
||||
for blob in blobs:
|
||||
try:
|
||||
blob.delete()
|
||||
logger.info(f"Deleted remaining file: {blob.name}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not delete {blob.name}: {e}")
|
||||
except Exception as e:
|
||||
logger.warning(f"Could not list/delete job folder {job_id}/: {e}")
|
||||
logger.warning(f"Could not delete {blob.name}: {e}")
|
||||
|
||||
logger.info(f"Deleted {deleted_count} GCS files for job {job_id}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting GCS assets for job {job_id}: {e}")
|
||||
raise
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue