87 lines
No EOL
2.4 KiB
Python
87 lines
No EOL
2.4 KiB
Python
import os
|
|
import mimetypes
|
|
from pathlib import Path
|
|
from typing import Dict, Any, Optional
|
|
from fastapi import UploadFile
|
|
|
|
def validate_file(file: UploadFile) -> Dict[str, Any]:
|
|
"""Validate uploaded file and return file info"""
|
|
if not file.filename:
|
|
raise ValueError("No filename provided")
|
|
|
|
# Get file extension
|
|
file_path = Path(file.filename)
|
|
extension = file_path.suffix.lower()
|
|
|
|
# Get MIME type
|
|
mime_type, _ = mimetypes.guess_type(file.filename)
|
|
|
|
# Validate MIME type
|
|
allowed_mime_types = {
|
|
'application/pdf',
|
|
'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
'application/msword',
|
|
'text/plain',
|
|
'text/csv',
|
|
'application/json',
|
|
'text/html',
|
|
'text/markdown',
|
|
'application/rtf'
|
|
}
|
|
|
|
if mime_type not in allowed_mime_types:
|
|
raise ValueError(f"MIME type {mime_type} not supported")
|
|
|
|
return {
|
|
'filename': file.filename,
|
|
'extension': extension,
|
|
'mime_type': mime_type,
|
|
'size': file.size
|
|
}
|
|
|
|
def get_file_info(file_path: Path) -> Dict[str, Any]:
|
|
"""Get information about a file"""
|
|
if not file_path.exists():
|
|
raise FileNotFoundError(f"File {file_path} not found")
|
|
|
|
stat = file_path.stat()
|
|
mime_type, _ = mimetypes.guess_type(str(file_path))
|
|
|
|
return {
|
|
'filename': file_path.name,
|
|
'extension': file_path.suffix.lower(),
|
|
'mime_type': mime_type,
|
|
'size': stat.st_size,
|
|
'created_at': stat.st_ctime,
|
|
'modified_at': stat.st_mtime
|
|
}
|
|
|
|
def ensure_directory(directory: Path) -> None:
|
|
"""Ensure directory exists"""
|
|
directory.mkdir(parents=True, exist_ok=True)
|
|
|
|
def clean_filename(filename: str) -> str:
|
|
"""Clean filename to be filesystem-safe"""
|
|
# Remove or replace problematic characters
|
|
invalid_chars = '<>:"/\\|?*'
|
|
cleaned = filename
|
|
|
|
for char in invalid_chars:
|
|
cleaned = cleaned.replace(char, '_')
|
|
|
|
# Remove leading/trailing dots and spaces
|
|
cleaned = cleaned.strip('. ')
|
|
|
|
# Ensure it's not empty
|
|
if not cleaned:
|
|
cleaned = "unnamed_file"
|
|
|
|
return cleaned
|
|
|
|
def get_upload_path(index_id: str, filename: str, base_dir: str) -> Path:
|
|
"""Generate upload path for a file"""
|
|
base_path = Path(base_dir)
|
|
index_path = base_path / index_id
|
|
ensure_directory(index_path)
|
|
|
|
return index_path / clean_filename(filename) |