Improve storage purge and fix outline parsing

- Fix image path handling in purge (remove incorrect lstrip)
- Add hard-delete of presentation records from database after file cleanup
- Add debug logging throughout purge process
- Fix outline parsing when LLM returns slides as JSON string
- Fix purge success message to show total files count

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-02-27 17:33:27 +00:00
parent 1280e40eb2
commit 69b18a218f
3 changed files with 29 additions and 6 deletions

View file

@ -6,7 +6,7 @@ from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import FileResponse
from pydantic import BaseModel
from sqlalchemy import func, select, and_
from sqlalchemy import func, select, and_, delete
from sqlalchemy.ext.asyncio import AsyncSession
from models.sql.client import ClientModel
@ -379,6 +379,8 @@ async def purge_deleted_storage(
purged_presentations = 0
purged_images = 0
print(f"[PURGE] Found {len(deleted_presentations)} soft-deleted presentations")
for p in deleted_presentations:
# Delete export files (PDF/PPTX)
if p.file_paths:
@ -397,6 +399,7 @@ async def purge_deleted_storage(
slides_stmt = select(SlideModel).where(SlideModel.presentation == p.id)
slides_result = await session.execute(slides_stmt)
slides = slides_result.scalars().all()
print(f"[PURGE] Presentation {p.id}: {len(slides)} slides")
for slide in slides:
if slide.content and isinstance(slide.content, dict):
@ -404,20 +407,33 @@ async def purge_deleted_storage(
image_data = slide.content.get("image")
if image_data and isinstance(image_data, dict):
image_url = image_data.get("__image_url__")
print(f"[PURGE] Slide has image URL: {image_url}")
if image_url and image_url.startswith("/app_data/images/"):
# Convert URL to filesystem path
image_path = image_url.lstrip("/")
# URL is already an absolute path inside container
image_path = image_url
print(f"[PURGE] Checking path: {image_path}, exists: {os.path.isfile(image_path)}")
if os.path.isfile(image_path):
try:
size = os.path.getsize(image_path)
os.remove(image_path)
purged_images += 1
purged_bytes += size
except OSError:
pass
print(f"[PURGE] ✓ Deleted: {image_path} ({size} bytes)")
except OSError as e:
print(f"[PURGE] ✗ Error deleting {image_path}: {e}")
purged_presentations += 1
print(f"[PURGE] TOTAL: {purged_presentations} presentations, {purged_files} files, {purged_images} images, {purged_bytes} bytes")
# Hard-delete presentation records and their slides from DB
print(f"[PURGE] Hard-deleting {len(deleted_presentations)} presentation records from DB...")
for p in deleted_presentations:
# Slides will be cascade-deleted due to FK ondelete="CASCADE"
await session.delete(p)
print(f"[PURGE] ✓ Deleted {len(deleted_presentations)} presentations from database")
await session.commit()
audit_service.log(

View file

@ -87,6 +87,12 @@ async def stream_outlines(
presentation_outlines_json = dict(
dirtyjson.loads(presentation_outlines_text)
)
# Fix: LLM sometimes returns slides as JSON string instead of list
if "slides" in presentation_outlines_json and isinstance(presentation_outlines_json["slides"], str):
print("[OUTLINE] Warning: slides field is a string, parsing as JSON...")
presentation_outlines_json["slides"] = dirtyjson.loads(presentation_outlines_json["slides"])
except Exception as e:
traceback.print_exc()
yield SSEErrorResponse(

View file

@ -177,8 +177,9 @@ export default function StoragePage() {
});
if (res.ok) {
const data = await res.json();
const totalFiles = (data.purged_files || 0) + (data.purged_images || 0);
toast.success(
`Purged ${data.purged_files} files (${formatBytes(data.purged_bytes)})`
`Purged ${totalFiles} files (${formatBytes(data.purged_bytes || 0)})`
);
load();
} else {