solventum-image-metadata/backend/app/api/files.py
SamoilenkoVadym 563d476a94 feat(backend): migrate from Flask to FastAPI with Redis sessions
- Create FastAPI application with async I/O
- Implement Redis session storage (fixes session loss on restart)
- Add JWT authentication with refresh tokens
- Add Microsoft SSO support via MSAL
- Copy all processors from src/ (100% reused, no changes)
- Create file upload/download endpoints
- Create metadata update endpoints
- Create template CRUD endpoints
- Add SQLAlchemy async database models
- Add Docker Compose configuration with Redis

Solves critical issues:
- Session management: Redis replaces in-memory dicts
- Scalability: Async FastAPI + microservices architecture
- File handling: Persistent storage with auto-cleanup

Key files:
- backend/app/main.py - FastAPI entry point
- backend/app/core/redis_client.py - Session store
- backend/app/core/auth.py - JWT authentication
- backend/app/api/* - All REST endpoints
- backend/app/processors/ - Reused from src/

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-02-09 13:14:37 +00:00

316 lines
9.7 KiB
Python

"""
File API Endpoints
Handles file upload, download, and session management.
"""
from fastapi import APIRouter, UploadFile, File, Form, Depends, HTTPException, Request, status
from fastapi.responses import FileResponse, StreamingResponse
from sqlalchemy.ext.asyncio import AsyncSession
from typing import List, Optional
from pathlib import Path
from app.core.auth import get_current_user_id
from app.core.database import get_db, AuditLogRepository
from app.core.redis_client import RedisSessionStore
from app.services.file_service import get_file_service, FileService
from app.services.metadata_service import get_metadata_service, MetadataService
from app.processors.file_detector import FileDetector
from app.models.file import (
UploadSessionResponse,
FileUploadResponse,
BatchDownloadRequest
)
router = APIRouter()
@router.post("/upload", response_model=UploadSessionResponse)
async def upload_files(
files: List[UploadFile] = File(...),
metadata_source: str = Form(...),
import_session_id: Optional[str] = Form(None),
excel_session_id: Optional[str] = Form(None),
template_name: Optional[str] = Form(None),
request: Request = None,
user_id: int = Depends(get_current_user_id),
db: AsyncSession = Depends(get_db),
file_service: FileService = Depends(get_file_service),
metadata_service: MetadataService = Depends(get_metadata_service)
):
"""
Upload files and generate metadata.
Args:
files: List of files to upload
metadata_source: Source of metadata ('manual', 'ai', 'excel', 'import', 'template')
import_session_id: Import session ID (for 'import' source)
excel_session_id: Excel session ID (for 'excel' source)
template_name: Template name (for 'template' source)
"""
if not files:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="No files provided"
)
# Get import metadata if import source
import_metadata = None
if metadata_source == "import" and import_session_id:
redis: RedisSessionStore = request.app.state.redis
import_session = await redis.get_import_session(import_session_id)
if import_session:
import_metadata = import_session.get("metadata", {})
# Process each file
file_results = []
for uploaded_file in files:
try:
# Save file
file_info = await file_service.save_upload(uploaded_file, user_id)
# Detect file type
file_type = FileDetector.detect_file_type(file_info["filepath"])
file_type_str = FileDetector.get_file_type_name(file_type)
# Extract current metadata
current_metadata = await metadata_service.extract_current_metadata(
file_info["filepath"]
)
# Generate suggested metadata
suggested_metadata = await metadata_service.generate_metadata(
filepath=file_info["filepath"],
filename=file_info["filename"],
source=metadata_source,
import_metadata=import_metadata,
template_name=template_name
)
# Build file response
file_result = FileUploadResponse(
file_id=file_info["file_id"],
filename=file_info["filename"],
filepath=file_info["filepath"],
file_type=file_type_str,
size=file_info["size"],
uploaded_at=file_info["uploaded_at"],
current_metadata=current_metadata,
suggested_metadata=suggested_metadata,
metadata_source=metadata_source
)
file_results.append(file_result)
except Exception as e:
print(f"Error processing file {uploaded_file.filename}: {e}")
# Continue with other files
continue
if not file_results:
raise HTTPException(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail="Failed to process any files"
)
# Create file session in Redis
redis: RedisSessionStore = request.app.state.redis
session_id = await redis.create_file_session(
user_id=user_id,
files_data=[file.dict() for file in file_results],
metadata_source=metadata_source,
ttl=3600 # 1 hour
)
# Log action
await AuditLogRepository.log_action(
db,
user_id=user_id,
action="file_upload",
details=f"Uploaded {len(file_results)} files with {metadata_source} metadata"
)
return UploadSessionResponse(
success=True,
session_id=session_id,
files=file_results,
message=f"Uploaded {len(file_results)} files successfully"
)
@router.get("/{file_id}/download")
async def download_file(
file_id: str,
request: Request,
user_id: int = Depends(get_current_user_id),
db: AsyncSession = Depends(get_db),
file_service: FileService = Depends(get_file_service)
):
"""
Download a single file by file_id.
"""
# Get all file sessions for user (simplified - in production use better lookup)
redis: RedisSessionStore = request.app.state.redis
# Search through file sessions to find the file
# Note: This is simplified. In production, you'd want a better indexing strategy
pattern = f"file_session:*"
session_keys = await redis.get_all_sessions(pattern)
file_path = None
filename = None
for session_key in session_keys:
session_data = await redis.redis.get(session_key)
if session_data:
import json
session = json.loads(session_data)
# Check if this session belongs to the user
if session.get("user_id") != user_id:
continue
# Search for file with matching file_id
for file_info in session.get("files", []):
if file_info.get("file_id") == file_id:
file_path = file_info.get("filepath")
filename = file_info.get("filename")
break
if file_path:
break
if not file_path or not file_service.file_exists(file_path):
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="File not found or access denied"
)
# Log action
await AuditLogRepository.log_action(
db,
user_id=user_id,
action="file_download",
details=f"Downloaded file: {filename}"
)
return FileResponse(
path=file_path,
filename=filename,
media_type="application/octet-stream"
)
@router.post("/download-batch")
async def download_batch(
download_request: BatchDownloadRequest,
request: Request,
user_id: int = Depends(get_current_user_id),
db: AsyncSession = Depends(get_db),
file_service: FileService = Depends(get_file_service)
):
"""
Download multiple files as ZIP archive.
"""
# Get file session
redis: RedisSessionStore = request.app.state.redis
session_data = await redis.get_file_session(download_request.session_id)
if not session_data or session_data.get("user_id") != user_id:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Session not found or access denied"
)
# Get files from session
all_files = session_data.get("files", [])
# Filter by file_indices
selected_files = [
all_files[i] for i in download_request.file_indices
if i < len(all_files)
]
if not selected_files:
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="No valid files selected"
)
# Create ZIP archive
from datetime import datetime
zip_filename = f"oliver_metadata_files_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
zip_path = await file_service.create_zip_archive(
files=selected_files,
output_filename=zip_filename
)
# Log action
await AuditLogRepository.log_action(
db,
user_id=user_id,
action="batch_download",
details=f"Downloaded {len(selected_files)} files as ZIP"
)
return FileResponse(
path=str(zip_path),
filename=zip_filename,
media_type="application/zip"
)
@router.delete("/session/{session_id}")
async def cleanup_session(
session_id: str,
request: Request,
user_id: int = Depends(get_current_user_id),
db: AsyncSession = Depends(get_db),
file_service: FileService = Depends(get_file_service)
):
"""
Cleanup session - delete files and session data.
"""
# Get file session
redis: RedisSessionStore = request.app.state.redis
session_data = await redis.get_file_session(session_id)
if not session_data or session_data.get("user_id") != user_id:
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Session not found or access denied"
)
# Delete all files in session
files = session_data.get("files", [])
deleted_count = file_service.cleanup_session_files(files)
# Delete session from Redis
await redis.delete_file_session(session_id)
# Log action
await AuditLogRepository.log_action(
db,
user_id=user_id,
action="session_cleanup",
details=f"Cleaned up session {session_id}, deleted {deleted_count} files"
)
return {
"success": True,
"message": f"Session cleaned up, deleted {deleted_count} files"
}
@router.get("/stats")
async def get_storage_stats(
user_id: int = Depends(get_current_user_id),
file_service: FileService = Depends(get_file_service)
):
"""
Get storage statistics (admin/debug endpoint).
"""
stats = file_service.get_storage_stats()
return stats