- Create FastAPI application with async I/O - Implement Redis session storage (fixes session loss on restart) - Add JWT authentication with refresh tokens - Add Microsoft SSO support via MSAL - Copy all processors from src/ (100% reused, no changes) - Create file upload/download endpoints - Create metadata update endpoints - Create template CRUD endpoints - Add SQLAlchemy async database models - Add Docker Compose configuration with Redis Solves critical issues: - Session management: Redis replaces in-memory dicts - Scalability: Async FastAPI + microservices architecture - File handling: Persistent storage with auto-cleanup Key files: - backend/app/main.py - FastAPI entry point - backend/app/core/redis_client.py - Session store - backend/app/core/auth.py - JWT authentication - backend/app/api/* - All REST endpoints - backend/app/processors/ - Reused from src/ Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
97 lines
3.2 KiB
Python
97 lines
3.2 KiB
Python
"""File type detection and routing."""
|
|
|
|
from enum import Enum
|
|
from pathlib import Path
|
|
from typing import Optional
|
|
import mimetypes
|
|
|
|
class FileType(Enum):
|
|
"""Supported file types."""
|
|
PDF = "pdf"
|
|
IMAGE = "image"
|
|
OFFICE_DOC = "office_doc"
|
|
OFFICE_SHEET = "office_sheet"
|
|
OFFICE_PRESENTATION = "office_presentation"
|
|
VIDEO = "video"
|
|
UNSUPPORTED = "unsupported"
|
|
|
|
class FileDetector:
|
|
"""Detect file type and route to appropriate handlers."""
|
|
|
|
# File extension mappings
|
|
PDF_EXTENSIONS = {'.pdf'}
|
|
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.tiff', '.tif', '.bmp', '.webp'}
|
|
OFFICE_DOC_EXTENSIONS = {'.docx'}
|
|
OFFICE_SHEET_EXTENSIONS = {'.xlsx'}
|
|
OFFICE_PRESENTATION_EXTENSIONS = {'.pptx'}
|
|
VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv', '.m4v', '.wmv'}
|
|
|
|
@classmethod
|
|
def detect_file_type(cls, file_path: str) -> FileType:
|
|
"""
|
|
Detect file type based on extension and MIME type.
|
|
|
|
Args:
|
|
file_path: Path to the file
|
|
|
|
Returns:
|
|
FileType enum value
|
|
"""
|
|
path = Path(file_path)
|
|
|
|
if not path.exists():
|
|
raise FileNotFoundError(f"File not found: {file_path}")
|
|
|
|
extension = path.suffix.lower()
|
|
|
|
# Check by extension first
|
|
if extension in cls.PDF_EXTENSIONS:
|
|
return FileType.PDF
|
|
elif extension in cls.IMAGE_EXTENSIONS:
|
|
return FileType.IMAGE
|
|
elif extension in cls.OFFICE_DOC_EXTENSIONS:
|
|
return FileType.OFFICE_DOC
|
|
elif extension in cls.OFFICE_SHEET_EXTENSIONS:
|
|
return FileType.OFFICE_SHEET
|
|
elif extension in cls.OFFICE_PRESENTATION_EXTENSIONS:
|
|
return FileType.OFFICE_PRESENTATION
|
|
elif extension in cls.VIDEO_EXTENSIONS:
|
|
return FileType.VIDEO
|
|
|
|
# Fallback to MIME type check
|
|
mime_type, _ = mimetypes.guess_type(str(path))
|
|
if mime_type:
|
|
if 'pdf' in mime_type:
|
|
return FileType.PDF
|
|
elif 'image' in mime_type:
|
|
return FileType.IMAGE
|
|
elif 'video' in mime_type:
|
|
return FileType.VIDEO
|
|
elif 'officedocument.wordprocessingml' in mime_type:
|
|
return FileType.OFFICE_DOC
|
|
elif 'officedocument.spreadsheetml' in mime_type:
|
|
return FileType.OFFICE_SHEET
|
|
elif 'officedocument.presentationml' in mime_type:
|
|
return FileType.OFFICE_PRESENTATION
|
|
|
|
return FileType.UNSUPPORTED
|
|
|
|
@classmethod
|
|
def is_supported(cls, file_path: str) -> bool:
|
|
"""Check if file type is supported."""
|
|
file_type = cls.detect_file_type(file_path)
|
|
return file_type != FileType.UNSUPPORTED
|
|
|
|
@classmethod
|
|
def get_file_type_name(cls, file_type: FileType) -> str:
|
|
"""Get human-readable file type name."""
|
|
type_names = {
|
|
FileType.PDF: "PDF Document",
|
|
FileType.IMAGE: "Image",
|
|
FileType.OFFICE_DOC: "Word Document",
|
|
FileType.OFFICE_SHEET: "Excel Spreadsheet",
|
|
FileType.OFFICE_PRESENTATION: "PowerPoint Presentation",
|
|
FileType.VIDEO: "Video",
|
|
FileType.UNSUPPORTED: "Unsupported File"
|
|
}
|
|
return type_names.get(file_type, "Unknown")
|