solventum-image-metadata/backend/app/processors/file_detector.py
SamoilenkoVadym 563d476a94 feat(backend): migrate from Flask to FastAPI with Redis sessions
- Create FastAPI application with async I/O
- Implement Redis session storage (fixes session loss on restart)
- Add JWT authentication with refresh tokens
- Add Microsoft SSO support via MSAL
- Copy all processors from src/ (100% reused, no changes)
- Create file upload/download endpoints
- Create metadata update endpoints
- Create template CRUD endpoints
- Add SQLAlchemy async database models
- Add Docker Compose configuration with Redis

Solves critical issues:
- Session management: Redis replaces in-memory dicts
- Scalability: Async FastAPI + microservices architecture
- File handling: Persistent storage with auto-cleanup

Key files:
- backend/app/main.py - FastAPI entry point
- backend/app/core/redis_client.py - Session store
- backend/app/core/auth.py - JWT authentication
- backend/app/api/* - All REST endpoints
- backend/app/processors/ - Reused from src/

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-02-09 13:14:37 +00:00

97 lines
3.2 KiB
Python

"""File type detection and routing."""
from enum import Enum
from pathlib import Path
from typing import Optional
import mimetypes
class FileType(Enum):
"""Supported file types."""
PDF = "pdf"
IMAGE = "image"
OFFICE_DOC = "office_doc"
OFFICE_SHEET = "office_sheet"
OFFICE_PRESENTATION = "office_presentation"
VIDEO = "video"
UNSUPPORTED = "unsupported"
class FileDetector:
"""Detect file type and route to appropriate handlers."""
# File extension mappings
PDF_EXTENSIONS = {'.pdf'}
IMAGE_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.gif', '.tiff', '.tif', '.bmp', '.webp'}
OFFICE_DOC_EXTENSIONS = {'.docx'}
OFFICE_SHEET_EXTENSIONS = {'.xlsx'}
OFFICE_PRESENTATION_EXTENSIONS = {'.pptx'}
VIDEO_EXTENSIONS = {'.mp4', '.mov', '.avi', '.mkv', '.m4v', '.wmv'}
@classmethod
def detect_file_type(cls, file_path: str) -> FileType:
"""
Detect file type based on extension and MIME type.
Args:
file_path: Path to the file
Returns:
FileType enum value
"""
path = Path(file_path)
if not path.exists():
raise FileNotFoundError(f"File not found: {file_path}")
extension = path.suffix.lower()
# Check by extension first
if extension in cls.PDF_EXTENSIONS:
return FileType.PDF
elif extension in cls.IMAGE_EXTENSIONS:
return FileType.IMAGE
elif extension in cls.OFFICE_DOC_EXTENSIONS:
return FileType.OFFICE_DOC
elif extension in cls.OFFICE_SHEET_EXTENSIONS:
return FileType.OFFICE_SHEET
elif extension in cls.OFFICE_PRESENTATION_EXTENSIONS:
return FileType.OFFICE_PRESENTATION
elif extension in cls.VIDEO_EXTENSIONS:
return FileType.VIDEO
# Fallback to MIME type check
mime_type, _ = mimetypes.guess_type(str(path))
if mime_type:
if 'pdf' in mime_type:
return FileType.PDF
elif 'image' in mime_type:
return FileType.IMAGE
elif 'video' in mime_type:
return FileType.VIDEO
elif 'officedocument.wordprocessingml' in mime_type:
return FileType.OFFICE_DOC
elif 'officedocument.spreadsheetml' in mime_type:
return FileType.OFFICE_SHEET
elif 'officedocument.presentationml' in mime_type:
return FileType.OFFICE_PRESENTATION
return FileType.UNSUPPORTED
@classmethod
def is_supported(cls, file_path: str) -> bool:
"""Check if file type is supported."""
file_type = cls.detect_file_type(file_path)
return file_type != FileType.UNSUPPORTED
@classmethod
def get_file_type_name(cls, file_type: FileType) -> str:
"""Get human-readable file type name."""
type_names = {
FileType.PDF: "PDF Document",
FileType.IMAGE: "Image",
FileType.OFFICE_DOC: "Word Document",
FileType.OFFICE_SHEET: "Excel Spreadsheet",
FileType.OFFICE_PRESENTATION: "PowerPoint Presentation",
FileType.VIDEO: "Video",
FileType.UNSUPPORTED: "Unsupported File"
}
return type_names.get(file_type, "Unknown")