- Create FastAPI application with async I/O - Implement Redis session storage (fixes session loss on restart) - Add JWT authentication with refresh tokens - Add Microsoft SSO support via MSAL - Copy all processors from src/ (100% reused, no changes) - Create file upload/download endpoints - Create metadata update endpoints - Create template CRUD endpoints - Add SQLAlchemy async database models - Add Docker Compose configuration with Redis Solves critical issues: - Session management: Redis replaces in-memory dicts - Scalability: Async FastAPI + microservices architecture - File handling: Persistent storage with auto-cleanup Key files: - backend/app/main.py - FastAPI entry point - backend/app/core/redis_client.py - Session store - backend/app/core/auth.py - JWT authentication - backend/app/api/* - All REST endpoints - backend/app/processors/ - Reused from src/ Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
316 lines
9.7 KiB
Python
316 lines
9.7 KiB
Python
"""
|
|
File API Endpoints
|
|
Handles file upload, download, and session management.
|
|
"""
|
|
|
|
from fastapi import APIRouter, UploadFile, File, Form, Depends, HTTPException, Request, status
|
|
from fastapi.responses import FileResponse, StreamingResponse
|
|
from sqlalchemy.ext.asyncio import AsyncSession
|
|
from typing import List, Optional
|
|
from pathlib import Path
|
|
|
|
from app.core.auth import get_current_user_id
|
|
from app.core.database import get_db, AuditLogRepository
|
|
from app.core.redis_client import RedisSessionStore
|
|
from app.services.file_service import get_file_service, FileService
|
|
from app.services.metadata_service import get_metadata_service, MetadataService
|
|
from app.processors.file_detector import FileDetector
|
|
from app.models.file import (
|
|
UploadSessionResponse,
|
|
FileUploadResponse,
|
|
BatchDownloadRequest
|
|
)
|
|
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
@router.post("/upload", response_model=UploadSessionResponse)
|
|
async def upload_files(
|
|
files: List[UploadFile] = File(...),
|
|
metadata_source: str = Form(...),
|
|
import_session_id: Optional[str] = Form(None),
|
|
excel_session_id: Optional[str] = Form(None),
|
|
template_name: Optional[str] = Form(None),
|
|
request: Request = None,
|
|
user_id: int = Depends(get_current_user_id),
|
|
db: AsyncSession = Depends(get_db),
|
|
file_service: FileService = Depends(get_file_service),
|
|
metadata_service: MetadataService = Depends(get_metadata_service)
|
|
):
|
|
"""
|
|
Upload files and generate metadata.
|
|
|
|
Args:
|
|
files: List of files to upload
|
|
metadata_source: Source of metadata ('manual', 'ai', 'excel', 'import', 'template')
|
|
import_session_id: Import session ID (for 'import' source)
|
|
excel_session_id: Excel session ID (for 'excel' source)
|
|
template_name: Template name (for 'template' source)
|
|
"""
|
|
if not files:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail="No files provided"
|
|
)
|
|
|
|
# Get import metadata if import source
|
|
import_metadata = None
|
|
if metadata_source == "import" and import_session_id:
|
|
redis: RedisSessionStore = request.app.state.redis
|
|
import_session = await redis.get_import_session(import_session_id)
|
|
if import_session:
|
|
import_metadata = import_session.get("metadata", {})
|
|
|
|
# Process each file
|
|
file_results = []
|
|
|
|
for uploaded_file in files:
|
|
try:
|
|
# Save file
|
|
file_info = await file_service.save_upload(uploaded_file, user_id)
|
|
|
|
# Detect file type
|
|
file_type = FileDetector.detect_file_type(file_info["filepath"])
|
|
file_type_str = FileDetector.get_file_type_name(file_type)
|
|
|
|
# Extract current metadata
|
|
current_metadata = await metadata_service.extract_current_metadata(
|
|
file_info["filepath"]
|
|
)
|
|
|
|
# Generate suggested metadata
|
|
suggested_metadata = await metadata_service.generate_metadata(
|
|
filepath=file_info["filepath"],
|
|
filename=file_info["filename"],
|
|
source=metadata_source,
|
|
import_metadata=import_metadata,
|
|
template_name=template_name
|
|
)
|
|
|
|
# Build file response
|
|
file_result = FileUploadResponse(
|
|
file_id=file_info["file_id"],
|
|
filename=file_info["filename"],
|
|
filepath=file_info["filepath"],
|
|
file_type=file_type_str,
|
|
size=file_info["size"],
|
|
uploaded_at=file_info["uploaded_at"],
|
|
current_metadata=current_metadata,
|
|
suggested_metadata=suggested_metadata,
|
|
metadata_source=metadata_source
|
|
)
|
|
|
|
file_results.append(file_result)
|
|
|
|
except Exception as e:
|
|
print(f"Error processing file {uploaded_file.filename}: {e}")
|
|
# Continue with other files
|
|
continue
|
|
|
|
if not file_results:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
|
|
detail="Failed to process any files"
|
|
)
|
|
|
|
# Create file session in Redis
|
|
redis: RedisSessionStore = request.app.state.redis
|
|
session_id = await redis.create_file_session(
|
|
user_id=user_id,
|
|
files_data=[file.dict() for file in file_results],
|
|
metadata_source=metadata_source,
|
|
ttl=3600 # 1 hour
|
|
)
|
|
|
|
# Log action
|
|
await AuditLogRepository.log_action(
|
|
db,
|
|
user_id=user_id,
|
|
action="file_upload",
|
|
details=f"Uploaded {len(file_results)} files with {metadata_source} metadata"
|
|
)
|
|
|
|
return UploadSessionResponse(
|
|
success=True,
|
|
session_id=session_id,
|
|
files=file_results,
|
|
message=f"Uploaded {len(file_results)} files successfully"
|
|
)
|
|
|
|
|
|
@router.get("/{file_id}/download")
|
|
async def download_file(
|
|
file_id: str,
|
|
request: Request,
|
|
user_id: int = Depends(get_current_user_id),
|
|
db: AsyncSession = Depends(get_db),
|
|
file_service: FileService = Depends(get_file_service)
|
|
):
|
|
"""
|
|
Download a single file by file_id.
|
|
"""
|
|
# Get all file sessions for user (simplified - in production use better lookup)
|
|
redis: RedisSessionStore = request.app.state.redis
|
|
|
|
# Search through file sessions to find the file
|
|
# Note: This is simplified. In production, you'd want a better indexing strategy
|
|
pattern = f"file_session:*"
|
|
session_keys = await redis.get_all_sessions(pattern)
|
|
|
|
file_path = None
|
|
filename = None
|
|
|
|
for session_key in session_keys:
|
|
session_data = await redis.redis.get(session_key)
|
|
if session_data:
|
|
import json
|
|
session = json.loads(session_data)
|
|
|
|
# Check if this session belongs to the user
|
|
if session.get("user_id") != user_id:
|
|
continue
|
|
|
|
# Search for file with matching file_id
|
|
for file_info in session.get("files", []):
|
|
if file_info.get("file_id") == file_id:
|
|
file_path = file_info.get("filepath")
|
|
filename = file_info.get("filename")
|
|
break
|
|
|
|
if file_path:
|
|
break
|
|
|
|
if not file_path or not file_service.file_exists(file_path):
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="File not found or access denied"
|
|
)
|
|
|
|
# Log action
|
|
await AuditLogRepository.log_action(
|
|
db,
|
|
user_id=user_id,
|
|
action="file_download",
|
|
details=f"Downloaded file: {filename}"
|
|
)
|
|
|
|
return FileResponse(
|
|
path=file_path,
|
|
filename=filename,
|
|
media_type="application/octet-stream"
|
|
)
|
|
|
|
|
|
@router.post("/download-batch")
|
|
async def download_batch(
|
|
download_request: BatchDownloadRequest,
|
|
request: Request,
|
|
user_id: int = Depends(get_current_user_id),
|
|
db: AsyncSession = Depends(get_db),
|
|
file_service: FileService = Depends(get_file_service)
|
|
):
|
|
"""
|
|
Download multiple files as ZIP archive.
|
|
"""
|
|
# Get file session
|
|
redis: RedisSessionStore = request.app.state.redis
|
|
session_data = await redis.get_file_session(download_request.session_id)
|
|
|
|
if not session_data or session_data.get("user_id") != user_id:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Session not found or access denied"
|
|
)
|
|
|
|
# Get files from session
|
|
all_files = session_data.get("files", [])
|
|
|
|
# Filter by file_indices
|
|
selected_files = [
|
|
all_files[i] for i in download_request.file_indices
|
|
if i < len(all_files)
|
|
]
|
|
|
|
if not selected_files:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_400_BAD_REQUEST,
|
|
detail="No valid files selected"
|
|
)
|
|
|
|
# Create ZIP archive
|
|
from datetime import datetime
|
|
zip_filename = f"oliver_metadata_files_{datetime.now().strftime('%Y%m%d_%H%M%S')}.zip"
|
|
|
|
zip_path = await file_service.create_zip_archive(
|
|
files=selected_files,
|
|
output_filename=zip_filename
|
|
)
|
|
|
|
# Log action
|
|
await AuditLogRepository.log_action(
|
|
db,
|
|
user_id=user_id,
|
|
action="batch_download",
|
|
details=f"Downloaded {len(selected_files)} files as ZIP"
|
|
)
|
|
|
|
return FileResponse(
|
|
path=str(zip_path),
|
|
filename=zip_filename,
|
|
media_type="application/zip"
|
|
)
|
|
|
|
|
|
@router.delete("/session/{session_id}")
|
|
async def cleanup_session(
|
|
session_id: str,
|
|
request: Request,
|
|
user_id: int = Depends(get_current_user_id),
|
|
db: AsyncSession = Depends(get_db),
|
|
file_service: FileService = Depends(get_file_service)
|
|
):
|
|
"""
|
|
Cleanup session - delete files and session data.
|
|
"""
|
|
# Get file session
|
|
redis: RedisSessionStore = request.app.state.redis
|
|
session_data = await redis.get_file_session(session_id)
|
|
|
|
if not session_data or session_data.get("user_id") != user_id:
|
|
raise HTTPException(
|
|
status_code=status.HTTP_404_NOT_FOUND,
|
|
detail="Session not found or access denied"
|
|
)
|
|
|
|
# Delete all files in session
|
|
files = session_data.get("files", [])
|
|
deleted_count = file_service.cleanup_session_files(files)
|
|
|
|
# Delete session from Redis
|
|
await redis.delete_file_session(session_id)
|
|
|
|
# Log action
|
|
await AuditLogRepository.log_action(
|
|
db,
|
|
user_id=user_id,
|
|
action="session_cleanup",
|
|
details=f"Cleaned up session {session_id}, deleted {deleted_count} files"
|
|
)
|
|
|
|
return {
|
|
"success": True,
|
|
"message": f"Session cleaned up, deleted {deleted_count} files"
|
|
}
|
|
|
|
|
|
@router.get("/stats")
|
|
async def get_storage_stats(
|
|
user_id: int = Depends(get_current_user_id),
|
|
file_service: FileService = Depends(get_file_service)
|
|
):
|
|
"""
|
|
Get storage statistics (admin/debug endpoint).
|
|
"""
|
|
stats = file_service.get_storage_stats()
|
|
return stats
|