ppt-tool/backend/api/v1/admin/storage_router.py
Vadym Samoilenko 69b18a218f Improve storage purge and fix outline parsing
- Fix image path handling in purge (remove incorrect lstrip)
- Add hard-delete of presentation records from database after file cleanup
- Add debug logging throughout purge process
- Fix outline parsing when LLM returns slides as JSON string
- Fix purge success message to show total files count

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-02-27 17:33:27 +00:00

457 lines
17 KiB
Python

"""Admin router for storage management — list, download, delete presentations."""
import os
import uuid
from typing import List, Optional
from fastapi import APIRouter, Depends, HTTPException, Query
from fastapi.responses import FileResponse
from pydantic import BaseModel
from sqlalchemy import func, select, and_, delete
from sqlalchemy.ext.asyncio import AsyncSession
from models.sql.client import ClientModel
from models.sql.master_deck import MasterDeckModel
from models.sql.presentation import PresentationModel
from models.sql.slide import SlideModel
from models.sql.user import UserModel
from services import audit_service
from services.access_service import get_accessible_client_ids
from services.database import get_async_session
from utils.auth_dependencies import require_client_admin
from utils.datetime_utils import get_current_utc_datetime
STORAGE_ROUTER = APIRouter(tags=["Admin - Storage"])
async def _resolve_client_filter(client_id, user, session):
"""Return SQLAlchemy filter or None (super_admin sees all)."""
if client_id:
return PresentationModel.client_id == client_id
if user.role == "super_admin":
return None
cids = await get_accessible_client_ids(user, session)
if not cids:
return PresentationModel.client_id == None # noqa: E711
if len(cids) == 1:
return PresentationModel.client_id == cids[0]
return PresentationModel.client_id.in_(cids)
@STORAGE_ROUTER.get("/storage/summary")
async def storage_summary(
client_id: Optional[uuid.UUID] = Query(None),
admin: UserModel = Depends(require_client_admin),
session: AsyncSession = Depends(get_async_session),
):
"""Storage summary: total presentations, files, and disk usage."""
cf = await _resolve_client_filter(client_id, admin, session)
filters = [PresentationModel.deleted_at.is_(None)]
if cf is not None:
filters.append(cf)
# Count presentations
count_q = select(func.count()).where(and_(*filters))
total_presentations = (await session.execute(count_q)).scalar() or 0
# Get all file_paths to compute size
path_filters = [
PresentationModel.deleted_at.is_(None),
PresentationModel.file_paths.isnot(None),
]
if cf is not None:
path_filters.append(cf)
paths_q = select(PresentationModel.file_paths).where(and_(*path_filters))
result = await session.execute(paths_q)
all_paths = result.scalars().all()
total_files = 0
total_size_bytes = 0
for file_paths in all_paths:
if not file_paths:
continue
for path in file_paths:
if path and os.path.isfile(path):
total_files += 1
total_size_bytes += os.path.getsize(path)
# Count soft-deleted presentations
deleted_filters = [PresentationModel.deleted_at.isnot(None)]
if cf is not None:
deleted_filters.append(cf)
deleted_count_q = select(func.count()).select_from(PresentationModel).where(and_(*deleted_filters))
total_deleted = (await session.execute(deleted_count_q)).scalar() or 0
# Scan master decks
deck_filters = []
if cf is not None:
# Reuse client filter but on MasterDeckModel
if client_id:
deck_filters.append(MasterDeckModel.client_id == client_id)
elif admin.role != "super_admin":
cids = await get_accessible_client_ids(admin, session)
if cids:
deck_filters.append(MasterDeckModel.client_id.in_(cids))
deck_count_q = select(func.count()).select_from(MasterDeckModel)
if deck_filters:
deck_count_q = deck_count_q.where(and_(*deck_filters))
total_master_decks = (await session.execute(deck_count_q)).scalar() or 0
deck_q = select(MasterDeckModel)
if deck_filters:
deck_q = deck_q.where(and_(*deck_filters))
deck_result = await session.execute(deck_q)
decks = deck_result.scalars().all()
master_deck_files = 0
master_deck_size = 0
for deck in decks:
if deck.original_file_path and os.path.isfile(deck.original_file_path):
master_deck_files += 1
master_deck_size += os.path.getsize(deck.original_file_path)
if deck.thumbnail_path and os.path.isfile(deck.thumbnail_path):
master_deck_files += 1
master_deck_size += os.path.getsize(deck.thumbnail_path)
if deck.layouts:
for layout in deck.layouts:
sp = layout.get("screenshot_path")
if sp and os.path.isfile(sp):
master_deck_files += 1
master_deck_size += os.path.getsize(sp)
return {
"total_presentations": total_presentations,
"total_files": total_files,
"total_size_bytes": total_size_bytes,
"total_deleted": total_deleted,
"total_master_decks": total_master_decks,
"master_deck_files": master_deck_files,
"master_deck_size_bytes": master_deck_size,
}
@STORAGE_ROUTER.get("/storage/presentations")
async def list_storage_presentations(
client_id: Optional[uuid.UUID] = Query(None),
admin: UserModel = Depends(require_client_admin),
session: AsyncSession = Depends(get_async_session),
):
"""List presentations with file metadata for storage management."""
cf = await _resolve_client_filter(client_id, admin, session)
filters = [PresentationModel.deleted_at.is_(None)]
if cf is not None:
filters.append(cf)
stmt = (
select(PresentationModel)
.where(and_(*filters))
.order_by(PresentationModel.created_at.desc())
)
result = await session.execute(stmt)
presentations = result.scalars().all()
items = []
for p in presentations:
file_count = 0
total_size = 0
if p.file_paths:
for path in p.file_paths:
if path and os.path.isfile(path):
file_count += 1
total_size += os.path.getsize(path)
items.append({
"id": str(p.id),
"title": p.title,
"status": p.status,
"created_at": p.created_at.isoformat() if p.created_at else None,
"file_count": file_count,
"total_size_bytes": total_size,
"has_export": bool(p.file_paths and any(
fp.endswith(".pptx") for fp in p.file_paths if fp
)),
})
return items
@STORAGE_ROUTER.get("/storage/presentations/{presentation_id}/download")
async def download_presentation(
presentation_id: uuid.UUID,
admin: UserModel = Depends(require_client_admin),
session: AsyncSession = Depends(get_async_session),
):
"""Download the PPTX export file for a presentation."""
presentation = await session.get(PresentationModel, presentation_id)
if not presentation:
raise HTTPException(status_code=404, detail="Presentation not found")
# Verify access
if presentation.client_id:
cids = await get_accessible_client_ids(admin, session)
if presentation.client_id not in cids:
raise HTTPException(status_code=403, detail="Access denied")
if not presentation.file_paths:
raise HTTPException(status_code=404, detail="No export files available")
# Find the PPTX file
pptx_path = next(
(p for p in presentation.file_paths if p and p.endswith(".pptx") and os.path.isfile(p)),
None,
)
if not pptx_path:
raise HTTPException(status_code=404, detail="PPTX file not found on disk")
filename = f"{presentation.title or 'presentation'}.pptx"
return FileResponse(
pptx_path,
filename=filename,
media_type="application/vnd.openxmlformats-officedocument.presentationml.presentation",
)
@STORAGE_ROUTER.delete("/storage/presentations/{presentation_id}")
async def delete_presentation_storage(
presentation_id: uuid.UUID,
admin: UserModel = Depends(require_client_admin),
session: AsyncSession = Depends(get_async_session),
):
"""Soft-delete a presentation (files cleaned up by retention service)."""
presentation = await session.get(PresentationModel, presentation_id)
if not presentation:
raise HTTPException(status_code=404, detail="Presentation not found")
# Verify access
if presentation.client_id:
cids = await get_accessible_client_ids(admin, session)
if presentation.client_id not in cids:
raise HTTPException(status_code=403, detail="Access denied")
presentation.deleted_at = get_current_utc_datetime()
await session.commit()
audit_service.log(
user_id=admin.id,
action="admin_delete",
resource_type="presentation",
resource_id=presentation.id,
client_id=presentation.client_id,
)
return {"ok": True}
class BulkDeleteRequest(BaseModel):
ids: List[uuid.UUID]
@STORAGE_ROUTER.get("/storage/breakdown")
async def storage_breakdown(
admin: UserModel = Depends(require_client_admin),
session: AsyncSession = Depends(get_async_session),
):
"""Per-client storage breakdown."""
# Only super_admin can see breakdown of all clients
if admin.role != "super_admin":
raise HTTPException(status_code=403, detail="Super admin only")
# Get all clients
clients_result = await session.execute(select(ClientModel))
clients = clients_result.scalars().all()
breakdown = []
for client in clients:
cf = PresentationModel.client_id == client.id
# Count presentations
count_q = select(func.count()).select_from(PresentationModel).where(
and_(PresentationModel.deleted_at.is_(None), cf)
)
pres_count = (await session.execute(count_q)).scalar() or 0
# Presentation file sizes
paths_q = select(PresentationModel.file_paths).where(
and_(PresentationModel.deleted_at.is_(None), PresentationModel.file_paths.isnot(None), cf)
)
result = await session.execute(paths_q)
all_paths = result.scalars().all()
pres_files = 0
pres_size = 0
for file_paths in all_paths:
if not file_paths:
continue
for path in file_paths:
if path and os.path.isfile(path):
pres_files += 1
pres_size += os.path.getsize(path)
# Master deck count & size
deck_q = select(MasterDeckModel).where(MasterDeckModel.client_id == client.id)
deck_result = await session.execute(deck_q)
decks = deck_result.scalars().all()
deck_files = 0
deck_size = 0
for deck in decks:
if deck.original_file_path and os.path.isfile(deck.original_file_path):
deck_files += 1
deck_size += os.path.getsize(deck.original_file_path)
if deck.thumbnail_path and os.path.isfile(deck.thumbnail_path):
deck_files += 1
deck_size += os.path.getsize(deck.thumbnail_path)
if deck.layouts:
for layout in deck.layouts:
sp = layout.get("screenshot_path")
if sp and os.path.isfile(sp):
deck_files += 1
deck_size += os.path.getsize(sp)
breakdown.append({
"client_id": str(client.id),
"client_name": client.name,
"presentations": pres_count,
"presentation_files": pres_files,
"presentation_size_bytes": pres_size,
"master_decks": len(decks),
"master_deck_files": deck_files,
"master_deck_size_bytes": deck_size,
"total_size_bytes": pres_size + deck_size,
})
breakdown.sort(key=lambda x: x["total_size_bytes"], reverse=True)
return breakdown
@STORAGE_ROUTER.post("/storage/presentations/bulk-delete")
async def bulk_delete_presentations(
body: BulkDeleteRequest,
admin: UserModel = Depends(require_client_admin),
session: AsyncSession = Depends(get_async_session),
):
"""Soft-delete multiple presentations at once."""
deleted_count = 0
for pid in body.ids:
presentation = await session.get(PresentationModel, pid)
if not presentation or presentation.deleted_at:
continue
# Access check
if presentation.client_id:
cids = await get_accessible_client_ids(admin, session)
if presentation.client_id not in cids:
continue
presentation.deleted_at = get_current_utc_datetime()
deleted_count += 1
await session.commit()
audit_service.log(
user_id=admin.id,
action="admin_bulk_delete",
resource_type="presentation",
details={"count": deleted_count, "ids": [str(i) for i in body.ids]},
)
return {"ok": True, "deleted_count": deleted_count}
@STORAGE_ROUTER.post("/storage/purge")
async def purge_deleted_storage(
client_id: Optional[uuid.UUID] = Query(None),
admin: UserModel = Depends(require_client_admin),
session: AsyncSession = Depends(get_async_session),
):
"""Hard-delete files for soft-deleted presentations (exports + generated images)."""
if admin.role != "super_admin":
raise HTTPException(status_code=403, detail="Super admin only")
filters = [PresentationModel.deleted_at.isnot(None)]
if client_id:
filters.append(PresentationModel.client_id == client_id)
stmt = select(PresentationModel).where(and_(*filters))
result = await session.execute(stmt)
deleted_presentations = result.scalars().all()
purged_files = 0
purged_bytes = 0
purged_presentations = 0
purged_images = 0
print(f"[PURGE] Found {len(deleted_presentations)} soft-deleted presentations")
for p in deleted_presentations:
# Delete export files (PDF/PPTX)
if p.file_paths:
for path in p.file_paths:
if path and os.path.isfile(path):
try:
size = os.path.getsize(path)
os.remove(path)
purged_files += 1
purged_bytes += size
except OSError:
pass
p.file_paths = []
# Delete generated images from slides
slides_stmt = select(SlideModel).where(SlideModel.presentation == p.id)
slides_result = await session.execute(slides_stmt)
slides = slides_result.scalars().all()
print(f"[PURGE] Presentation {p.id}: {len(slides)} slides")
for slide in slides:
if slide.content and isinstance(slide.content, dict):
# Extract image path from content.image.__image_url__
image_data = slide.content.get("image")
if image_data and isinstance(image_data, dict):
image_url = image_data.get("__image_url__")
print(f"[PURGE] Slide has image URL: {image_url}")
if image_url and image_url.startswith("/app_data/images/"):
# URL is already an absolute path inside container
image_path = image_url
print(f"[PURGE] Checking path: {image_path}, exists: {os.path.isfile(image_path)}")
if os.path.isfile(image_path):
try:
size = os.path.getsize(image_path)
os.remove(image_path)
purged_images += 1
purged_bytes += size
print(f"[PURGE] ✓ Deleted: {image_path} ({size} bytes)")
except OSError as e:
print(f"[PURGE] ✗ Error deleting {image_path}: {e}")
purged_presentations += 1
print(f"[PURGE] TOTAL: {purged_presentations} presentations, {purged_files} files, {purged_images} images, {purged_bytes} bytes")
# Hard-delete presentation records and their slides from DB
print(f"[PURGE] Hard-deleting {len(deleted_presentations)} presentation records from DB...")
for p in deleted_presentations:
# Slides will be cascade-deleted due to FK ondelete="CASCADE"
await session.delete(p)
print(f"[PURGE] ✓ Deleted {len(deleted_presentations)} presentations from database")
await session.commit()
audit_service.log(
user_id=admin.id,
action="admin_purge",
resource_type="storage",
details={
"presentations": purged_presentations,
"files": purged_files,
"images": purged_images,
"bytes": purged_bytes
},
)
return {
"ok": True,
"purged_presentations": purged_presentations,
"purged_files": purged_files,
"purged_images": purged_images,
"purged_bytes": purged_bytes,
}