Allow document uploads (PDF, DOCX, TXT, etc.) as focus group assets

- Expand allowed file types from images-only to also include: PDF, DOCX, DOC, TXT, MD, CSV, XLSX, XLS, PPTX, PPT, RTF - validate_asset_file: skip PIL validation for non-image files; 50MB limit for docs / 10MB for images - Correct MIME type detection for document extensions - Store asset_type: "document"|"image" in metadata - ImageDescriptionService: text files → LLM summary; binary docs → label; images → existing multimodal flow Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-03-23 17:08:30 +00:00 · 2026-03-23 17:08:30 +00:00 · 33272cc677
commit 33272cc677
parent 4b47b334d7
2 changed files with 82 additions and 43 deletions
--- a/backend/app/routes/focus_groups.py
+++ b/backend/app/routes/focus_groups.py
@ -1120,43 +1120,61 @@ def ensure_upload_folder(focus_group_id):
        else:
            raise OSError(f"Main uploads directory is not writable: {main_upload_dir}")

-def is_allowed_file(filename, allowed_extensions={'jpg', 'jpeg', 'png'}):
+IMAGE_EXTENSIONS = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff'}
+DOCUMENT_EXTENSIONS = {'pdf', 'docx', 'doc', 'txt', 'md', 'csv', 'xlsx', 'xls', 'pptx', 'ppt', 'rtf'}
+ALLOWED_EXTENSIONS = IMAGE_EXTENSIONS | DOCUMENT_EXTENSIONS
+
+DOCUMENT_MIME_TYPES = {
+    'pdf': 'application/pdf',
+    'docx': 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
+    'doc': 'application/msword',
+    'txt': 'text/plain',
+    'md': 'text/markdown',
+    'csv': 'text/csv',
+    'xlsx': 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
+    'xls': 'application/vnd.ms-excel',
+    'pptx': 'application/vnd.openxmlformats-officedocument.presentationml.presentation',
+    'ppt': 'application/vnd.ms-powerpoint',
+    'rtf': 'application/rtf',
+}
+
+def is_allowed_file(filename, allowed_extensions=None):
    """Check if file has an allowed extension."""
+    if allowed_extensions is None:
+        allowed_extensions = ALLOWED_EXTENSIONS
    return '.' in filename and \
           filename.rsplit('.', 1)[1].lower() in allowed_extensions

-def validate_image_file(file):
-    """Validate uploaded image file."""
+def validate_asset_file(file):
+    """Validate uploaded asset file (images and documents)."""
    if not file:
        return False, "No file provided"
-    
+
    if file.filename == '':
        return False, "No file selected"
-    
+
    if not is_allowed_file(file.filename):
-        return False, "File type not allowed. Only JPG, JPEG, and PNG files are permitted"
-    
-    # Check file size (10MB limit) by reading the content length from the file stream
+        return False, f"File type not allowed. Supported types: images (JPG, PNG, GIF, WebP) and documents (PDF, DOCX, TXT, XLSX, etc.)"
+
+    # Check file size (50MB limit for documents, 10MB for images)
+    ext = file.filename.rsplit('.', 1)[1].lower() if '.' in file.filename else ''
+    size_limit = 50 * 1024 * 1024 if ext in DOCUMENT_EXTENSIONS else 10 * 1024 * 1024
    try:
-        # Store current position
        current_pos = file.tell()
-        
-        # Seek to end to get size
        file.seek(0, os.SEEK_END)
        file_size = file.tell()
-        
-        # Reset to original position
        file.seek(current_pos)
-        
-        if file_size > 10 * 1024 * 1024:  # 10MB in bytes
-            return False, "File size exceeds 10MB limit"
-            
+        if file_size > size_limit:
+            limit_mb = size_limit // (1024 * 1024)
+            return False, f"File size exceeds {limit_mb}MB limit"
    except Exception as e:
-        # If we can't check size, allow it to proceed but log the issue
        logger.warning(f"Could not validate file size: {e}")
-    
+
    return True, "Valid file"

+# Keep old name as alias for backwards compat
+validate_image_file = validate_asset_file
+
 def save_uploaded_file_directly(file, file_path):
    """Save uploaded file directly to avoid temporary file issues."""
    try:
@ -1343,12 +1361,18 @@ async def upload_assets(focus_group_id):
                # Get file info
                file_size = os.path.getsize(file_path)
                
+                # Determine mime type
+                detected_mime = file.mimetype
+                if not detected_mime or detected_mime in ('application/octet-stream', ''):
+                    detected_mime = DOCUMENT_MIME_TYPES.get(file_extension, f"image/{file_extension}")
+
                # Create asset metadata
                asset_metadata = {
                    "filename": unique_filename,
                    "original_name": original_filename,
                    "size": file_size,
-                    "mime_type": file.mimetype or f"image/{file_extension}",
+                    "mime_type": detected_mime,
+                    "asset_type": "document" if file_extension in DOCUMENT_EXTENSIONS else "image",
                    "upload_date": datetime.datetime.now(datetime.timezone.utc),
                    "file_path": file_path
                }
--- a/backend/app/services/image_description_service.py
+++ b/backend/app/services/image_description_service.py
@ -21,45 +21,60 @@ class ImageDescriptionError(Exception):
 class ImageDescriptionService:
    """Service for generating AI-powered descriptions of creative assets."""
    
+    IMAGE_EXTENSIONS = {'jpg', 'jpeg', 'png', 'gif', 'webp', 'bmp', 'tiff'}
+    TEXT_EXTENSIONS = {'txt', 'md', 'csv', 'rtf'}
+
    @staticmethod
    async def generate_description(focus_group_id: str, asset_filename: str) -> str:
        """
-        Generate a detailed AI description of a creative asset image.
-        
-        Args:
-            focus_group_id: The focus group ID containing the asset
-            asset_filename: The filename of the asset to describe
-            
-        Returns:
-            A detailed description of the image
-            
-        Raises:
-            ImageDescriptionError: If description generation fails
+        Generate a detailed AI description of a creative asset (image or document).
+
+        For images: uses multimodal LLM.
+        For text documents: reads content and summarizes with LLM.
+        For other documents (pdf, docx, xlsx): returns a descriptive label.
        """
        try:
            print(f"🎨 DESCRIPTION: Generating AI description for {asset_filename}")
-            
+
            # Resolve the full path to the asset
            asset_path = ConversationContextService._resolve_asset_path(focus_group_id, asset_filename)
            print(f"🔍 DESCRIPTION: Resolved asset path: {asset_path}")
-            
+
            # Check if file exists
            if not os.path.exists(asset_path):
                print(f"❌ DESCRIPTION: File does not exist at path: {asset_path}")
-                # List files in the directory to help debug
-                asset_dir = os.path.dirname(asset_path)
-                if os.path.exists(asset_dir):
-                    files_in_dir = os.listdir(asset_dir)
-                    print(f"🔍 DESCRIPTION: Files in directory {asset_dir}: {files_in_dir}")
-                else:
-                    print(f"❌ DESCRIPTION: Directory does not exist: {asset_dir}")
                raise ImageDescriptionError(f"Asset file not found: {asset_path}")
-            
-            # Verify the image can be loaded (optional validation)
+
+            ext = asset_filename.rsplit('.', 1)[-1].lower() if '.' in asset_filename else ''
+
+            # ── Non-image: text files ──────────────────────────────────────────────
+            if ext in ImageDescriptionService.TEXT_EXTENSIONS:
+                try:
+                    with open(asset_path, 'r', encoding='utf-8', errors='replace') as f:
+                        content = f.read(8000)  # first 8k chars
+                    from app.models.focus_group import FocusGroup
+                    focus_group = await FocusGroup.find_by_id(focus_group_id)
+                    llm_model = focus_group.get('llm_model') if focus_group else None
+                    summary = await LLMService.generate_content(
+                        prompt=f"Summarize this document in 2-3 sentences for focus group context:\n\n{content}",
+                        temperature=0.3,
+                        model_name=llm_model
+                    )
+                    return summary.strip()
+                except Exception as e:
+                    return f"Text document: {asset_filename}"
+
+            # ── Non-image: binary documents (pdf, docx, xlsx, etc.) ───────────────
+            if ext not in ImageDescriptionService.IMAGE_EXTENSIONS:
+                ext_label = ext.upper() if ext else 'Document'
+                original = asset_filename.split('-')[-1] if '-' in asset_filename else asset_filename
+                return f"{ext_label} document: {original}"
+
+            # ── Image: validate with PIL ──────────────────────────────────────────
            try:
                image = Image.open(asset_path)
                print(f"🖼️  DESCRIPTION: Validated image {asset_filename} ({image.size[0]}x{image.size[1]})")
-                image.close()  # Close the image since we're passing the path to LLM
+                image.close()
            except Exception as e:
                raise ImageDescriptionError(f"Failed to validate image {asset_filename}: {str(e)}")