Phase 4: Fix critical bugs, improve document parsing, add vision OCR

- Fix SSE stream 500: use async_session_maker inside StreamingResponse generator (Depends session closes when endpoint returns, before streaming starts) - Fix template application: store template_name in prepare endpoint so worker uses the selected custom template instead of defaulting to "general" - Fix OverlayLoader: replace loading.gif with HamsterLoader component - Fix parse_mode default: change from "slides" to "layouts" to avoid 70+ layouts - Update Gemini Flash model to gemini-3.1-flash-image-preview - Improve DOCX parsing: python-docx for structured table extraction, OCR enabled - Add vision-based image text extraction via Gemini for uploaded images - Add LayoutParser integration for slide layout structure analysis - Add Phase 4 MVP features: transfer ownership, URL input, follow-up questions, attachment-to-slide mapping, content router Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 14:07:00 +00:00 · 2026-02-27 14:07:00 +00:00 · e8295d6e71
commit e8295d6e71
parent 69a8829750
21 changed files with 859 additions and 62 deletions
--- a/backend/Dockerfile
+++ b/backend/Dockerfile
@ -16,6 +16,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
    chromium \
    fontconfig \
    curl \
+    libgl1 \
+    libglib2.0-0 \
    && rm -rf /var/lib/apt/lists/*

 ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
--- a/backend/api/v1/admin/master_decks_router.py
+++ b/backend/api/v1/admin/master_decks_router.py
@ -64,7 +64,7 @@ async def _list_decks(client_id: uuid.UUID, include_inactive: bool, session: Asy
            "name": d.name,
            "description": d.description,
            "thumbnail_path": d.thumbnail_path,
-            "parse_mode": getattr(d, "parse_mode", None) or "slides",
+            "parse_mode": getattr(d, "parse_mode", None) or "layouts",
            "parse_status": d.parse_status,
            "is_active": d.is_active,
            "layouts": d.layouts,
@ -104,7 +104,7 @@ async def list_master_decks(
 async def upload_master_deck(
    client_id: uuid.UUID,
    file: UploadFile = File(...),
-    parse_mode: str = Query("slides", description="Parse mode: 'slides' (default) or 'layouts'"),
+    parse_mode: str = Query("layouts", description="Parse mode: 'layouts' (default, unique slideLayouts) or 'slides' (one layout per slide)"),
    admin: UserModel = Depends(require_client_admin),
    session: AsyncSession = Depends(get_async_session),
 ):
--- a/backend/api/v1/admin/users_router.py
+++ b/backend/api/v1/admin/users_router.py
@ -3,9 +3,12 @@ from typing import List, Optional
 import uuid

 from fastapi import APIRouter, Depends, HTTPException, Query
+from pydantic import BaseModel
+from sqlalchemy import func
 from sqlalchemy.ext.asyncio import AsyncSession
-from sqlmodel import select
+from sqlmodel import select, update

+from models.sql.presentation import PresentationModel
 from models.sql.user import UserModel
 from services.database import get_async_session
 from utils.auth_dependencies import require_super_admin
@ -15,6 +18,11 @@ USERS_ROUTER = APIRouter(prefix="/users", tags=["Admin - Users"])
 VALID_ROLES = {"super_admin", "client_admin", "user"}


+class TransferOwnershipRequest(BaseModel):
+    new_owner_id: uuid.UUID
+    client_id: Optional[uuid.UUID] = None
+
+
@USERS_ROUTER.get("", response_model=List[dict])
 async def list_users(
    _: UserModel = Depends(require_super_admin),
@ -93,6 +101,55 @@ async def update_user_role(
    return {"message": "Role updated", "user_id": str(user.id), "role": role}


+@USERS_ROUTER.post("/{user_id}/transfer-ownership")
+async def transfer_ownership(
+    user_id: uuid.UUID,
+    body: TransferOwnershipRequest,
+    _: UserModel = Depends(require_super_admin),
+    session: AsyncSession = Depends(get_async_session),
+):
+    """Transfer all presentations from one user to another.
+
+    Used for GDPR compliance before deactivating a user.
+    """
+    if user_id == body.new_owner_id:
+        raise HTTPException(
+            status_code=400, detail="Cannot transfer ownership to the same user"
+        )
+
+    # Validate source user exists
+    source_user = await session.get(UserModel, user_id)
+    if not source_user:
+        raise HTTPException(status_code=404, detail="Source user not found")
+
+    # Validate target user exists
+    target_user = await session.get(UserModel, body.new_owner_id)
+    if not target_user:
+        raise HTTPException(status_code=404, detail="Target user not found")
+
+    # Build the update statement for non-deleted presentations owned by the source user
+    stmt = (
+        update(PresentationModel)
+        .where(PresentationModel.owner_id == user_id)
+        .where(PresentationModel.deleted_at.is_(None))
+    )
+
+    if body.client_id is not None:
+        stmt = stmt.where(PresentationModel.client_id == body.client_id)
+
+    stmt = stmt.values(owner_id=body.new_owner_id)
+    result = await session.execute(stmt)
+    await session.commit()
+
+    transferred_count = result.rowcount
+
+    return {
+        "message": f"Transferred {transferred_count} presentations",
+        "from_user_id": str(user_id),
+        "to_user_id": str(body.new_owner_id),
+    }
+
+
@USERS_ROUTER.delete("/{user_id}")
 async def deactivate_user(
    user_id: uuid.UUID,
@ -106,7 +163,26 @@ async def deactivate_user(
    if user.id == admin.id:
        raise HTTPException(status_code=400, detail="Cannot deactivate yourself")

+    # Check how many active presentations this user still owns
+    count_query = (
+        select(func.count())
+        .select_from(PresentationModel)
+        .where(PresentationModel.owner_id == user_id)
+        .where(PresentationModel.deleted_at.is_(None))
+    )
+    count_result = await session.execute(count_query)
+    presentation_count = count_result.scalar_one()
+
    user.is_active = False
    session.add(user)
    await session.commit()
-    return {"message": "User deactivated", "user_id": str(user.id)}
+
+    response = {"message": "User deactivated", "user_id": str(user.id)}
+
+    if presentation_count > 0:
+        response["warning"] = (
+            f"User still has {presentation_count} active presentations. "
+            "Consider transferring ownership first."
+        )
+
+    return response
--- a/backend/api/v1/ppt/endpoints/content.py
+++ b/backend/api/v1/ppt/endpoints/content.py
@ -0,0 +1,25 @@
+from typing import List
+
+from fastapi import APIRouter, Body, Depends, HTTPException
+
+from models.sql.user import UserModel
+from services.content_intelligence_service import ContentIntelligenceService
+from utils.auth_dependencies import get_current_user
+
+CONTENT_ROUTER = APIRouter(prefix="/content", tags=["Content"])
+
+
+@CONTENT_ROUTER.post("/follow-up-questions")
+async def follow_up_questions(
+    content: str = Body(..., embed=True),
+    _current_user: UserModel = Depends(get_current_user),
+):
+    """Classify content and return follow-up questions if the brief is thin."""
+    if not content or not content.strip():
+        raise HTTPException(status_code=400, detail="Content is required")
+
+    ci_service = ContentIntelligenceService()
+    classified = await ci_service.classify(content)
+    questions: List[str] = await ci_service.ask_followup_questions(classified) or []
+
+    return {"questions": questions}
--- a/backend/api/v1/ppt/endpoints/files.py
+++ b/backend/api/v1/ppt/endpoints/files.py
@ -4,9 +4,12 @@ import os
 import uuid
 from typing import Annotated, List, Optional

-from fastapi import APIRouter, Body, File, HTTPException, UploadFile
+from fastapi import APIRouter, Body, Depends, File, HTTPException, UploadFile
 from pydantic import BaseModel

+from models.sql.user import UserModel
+from utils.auth_dependencies import get_current_user
+
 from constants.documents import (
    EXCEL_TYPES,
    IMAGE_UPLOAD_TYPES,
@ -143,17 +146,46 @@ async def decompose_files(file_paths: Annotated[List[str], Body(embed=True)]):
            )
        )

-    # --- Image files ---
+    # --- Image files (with vision-based text extraction) ---
    for img_path in image_files:
        info = extract_images_metadata(img_path)
-        response.append(
-            DecomposedFileInfo(
-                name=info.filename,
-                file_path=img_path,
-                file_type="image",
-                image_info=info.model_dump(),
+
+        # Try to extract text from image via Gemini vision
+        extracted_text = None
+        try:
+            from services.docling_service import extract_text_from_image_via_vision
+            mime_type, _ = mimetypes.guess_type(img_path)
+            with open(img_path, "rb") as f:
+                image_bytes = f.read()
+            extracted_text = await extract_text_from_image_via_vision(
+                image_bytes, mime_type or "image/png"
+            )
+        except Exception as e:
+            print(f"[decompose] Vision text extraction failed for {img_path}: {e}")
+
+        if extracted_text:
+            # Save extracted text as a text file alongside the image
+            text_path = TEMP_FILE_SERVICE.create_temp_file_path(
+                f"{uuid.uuid4()}.txt", temp_dir
+            )
+            with open(text_path, "w") as tf:
+                tf.write(extracted_text)
+            response.append(
+                DecomposedFileInfo(
+                    name=os.path.basename(img_path),
+                    file_path=text_path,
+                    file_type="text",
+                )
+            )
+        else:
+            response.append(
+                DecomposedFileInfo(
+                    name=info.filename,
+                    file_path=img_path,
+                    file_type="image",
+                    image_info=info.model_dump(),
+                )
            )
-        )

    return response

@ -182,6 +214,24 @@ async def parse_url_endpoint(body: UrlParseRequest):
    return UrlParseResponse(content=content, url=body.url)


+@FILES_ROUTER.post("/fetch-url")
+async def fetch_url_content(
+    url: str = Body(..., embed=True),
+    _current_user: UserModel = Depends(get_current_user),
+):
+    """Fetch a URL and extract its text content."""
+    if not url.startswith(("http://", "https://")):
+        raise HTTPException(status_code=400, detail="Invalid URL")
+
+    text = await parse_url(url)
+    if not text:
+        raise HTTPException(
+            status_code=422, detail="Could not extract content from URL"
+        )
+
+    return {"text": text, "url": url}
+
+
@FILES_ROUTER.post("/update")
 async def update_files(
    file_path: Annotated[str, Body()],
--- a/backend/api/v1/ppt/endpoints/presentation.py
+++ b/backend/api/v1/ppt/endpoints/presentation.py
@ -43,7 +43,7 @@ from utils.llm_calls.generate_presentation_outlines import generate_ppt_outline
 from models.sql.slide import SlideModel
 from models.sse_response import SSECompleteResponse, SSEErrorResponse, SSEResponse

-from services.database import get_async_session
+from services.database import get_async_session, async_session_maker
 from services.temp_file_service import TEMP_FILE_SERVICE
 from services.concurrent_service import CONCURRENT_SERVICE
 from models.sql.presentation import PresentationModel
@ -290,6 +290,7 @@ async def prepare_presentation(
    sql_session.add(presentation)
    presentation.outlines = presentation_outline_model.model_dump(mode="json")
    presentation.title = title or presentation.title
+    presentation.template_name = layout.name
    presentation.set_layout(layout)
    presentation.set_structure(presentation_structure)
    await sql_session.commit()
@ -319,11 +320,18 @@ async def stream_presentation(

    image_generation_service = ImageGenerationService(get_images_directory())

-    async def inner():
-        structure = presentation.get_structure()
-        layout = presentation.get_layout()
-        outline = presentation.get_presentation_outline()
+    # Capture data before returning StreamingResponse, because the Depends
+    # session is closed once this function returns.
+    pres_id = id
+    structure = presentation.get_structure()
+    layout = presentation.get_layout()
+    outline = presentation.get_presentation_outline()
+    pres_language = presentation.language
+    pres_tone = presentation.tone
+    pres_verbosity = presentation.verbosity
+    pres_instructions = presentation.instructions

+    async def inner():
        # These tasks will be gathered and awaited after all slides are generated
        async_assets_generation_tasks = []

@ -339,17 +347,17 @@ async def stream_presentation(
                slide_content = await get_slide_content_from_type_and_outline(
                    slide_layout,
                    outline.slides[i],
-                    presentation.language,
-                    presentation.tone,
-                    presentation.verbosity,
-                    presentation.instructions,
+                    pres_language,
+                    pres_tone,
+                    pres_verbosity,
+                    pres_instructions,
                )
            except HTTPException as e:
                yield SSEErrorResponse(detail=e.detail).to_string()
                return

            slide = SlideModel(
-                presentation=id,
+                presentation=pres_id,
                layout_group=layout.name,
                layout=slide_layout.id,
                index=i,
@ -381,21 +389,24 @@ async def stream_presentation(
        for assets_list in generated_assets_lists:
            generated_assets.extend(assets_list)

-        # Moved this here to make sure new slides are generated before deleting the old ones
-        await sql_session.execute(
-            delete(SlideModel).where(SlideModel.presentation == id)
-        )
-        await sql_session.commit()
+        # Use a new session for DB writes — the Depends session is already
+        # closed by the time the streaming generator executes.
+        async with async_session_maker() as session:
+            await session.execute(
+                delete(SlideModel).where(SlideModel.presentation == pres_id)
+            )
+            await session.commit()

-        sql_session.add(presentation)
-        sql_session.add_all(slides)
-        sql_session.add_all(generated_assets)
-        await sql_session.commit()
+            pres = await session.get(PresentationModel, pres_id)
+            session.add(pres)
+            session.add_all(slides)
+            session.add_all(generated_assets)
+            await session.commit()

-        response = PresentationWithSlides(
-            **presentation.model_dump(),
-            slides=slides,
-        )
+            response = PresentationWithSlides(
+                **pres.model_dump(),
+                slides=slides,
+            )

        yield SSECompleteResponse(
            key="presentation",
--- a/backend/api/v1/ppt/router.py
+++ b/backend/api/v1/ppt/router.py
@ -15,6 +15,7 @@ from api.v1.ppt.endpoints.ollama import OLLAMA_ROUTER
 from api.v1.ppt.endpoints.outlines import OUTLINES_ROUTER
 from api.v1.ppt.endpoints.slide import SLIDE_ROUTER
 from api.v1.ppt.endpoints.pptx_slides import PPTX_FONTS_ROUTER
+from api.v1.ppt.endpoints.content import CONTENT_ROUTER


 API_V1_PPT_ROUTER = APIRouter(prefix="/api/v1/ppt")
@ -37,3 +38,4 @@ API_V1_PPT_ROUTER.include_router(OPENAI_ROUTER)
 API_V1_PPT_ROUTER.include_router(ANTHROPIC_ROUTER)
 API_V1_PPT_ROUTER.include_router(GOOGLE_ROUTER)
 API_V1_PPT_ROUTER.include_router(PPTX_FONTS_ROUTER)
+API_V1_PPT_ROUTER.include_router(CONTENT_ROUTER)
--- a/backend/pyproject.toml
+++ b/backend/pyproject.toml
@ -20,7 +20,10 @@ dependencies = [
    "pathvalidate>=3.3.1",
    "pdfplumber>=0.11.7",
    "pytest>=8.4.1",
+    "python-docx>=1.1",
    "python-pptx>=1.0.2",
+    "layoutparser>=0.3",
+    "opencv-python-headless>=4.8",
    "redis>=5.0,<6",
    "sqlmodel>=0.0.24",
    "alembic>=1.15",
--- a/backend/services/docling_service.py
+++ b/backend/services/docling_service.py
@ -1,3 +1,13 @@
+"""Document parsing service.
+
+Uses Docling for PDF/PPTX and python-docx for DOCX (better table handling).
+Optionally extracts text from embedded images via Gemini vision.
+"""
+import asyncio
+import base64
+import os
+from typing import List, Optional
+
 from docling.document_converter import (
    DocumentConverter,
    PdfFormatOption,
@ -11,7 +21,7 @@ from docling.datamodel.base_models import InputFormat
 class DoclingService:
    def __init__(self):
        self.pipeline_options = PdfPipelineOptions()
-        self.pipeline_options.do_ocr = False
+        self.pipeline_options.do_ocr = True

        self.converter = DocumentConverter(
            allowed_formats=[InputFormat.PPTX, InputFormat.PDF, InputFormat.DOCX],
@ -29,5 +39,163 @@ class DoclingService:
        )

    def parse_to_markdown(self, file_path: str) -> str:
+        """Parse any supported document to markdown via Docling."""
        result = self.converter.convert(file_path)
        return result.document.export_to_markdown()
+
+    def parse_docx_structured(self, file_path: str) -> str:
+        """Parse DOCX with python-docx for better table/structure handling.
+
+        Falls back to Docling if python-docx is not available.
+        """
+        try:
+            return self._parse_docx_with_python_docx(file_path)
+        except Exception as e:
+            print(f"[DoclingService] python-docx parsing failed ({e}), falling back to Docling")
+            return self.parse_to_markdown(file_path)
+
+    def _parse_docx_with_python_docx(self, file_path: str) -> str:
+        """Extract text from DOCX using python-docx with proper table handling."""
+        from docx import Document
+
+        doc = Document(file_path)
+        parts: List[str] = []
+
+        for element in doc.element.body:
+            tag = element.tag.split("}")[-1] if "}" in element.tag else element.tag
+
+            if tag == "p":
+                # Paragraph
+                para = _find_paragraph_by_element(doc, element)
+                if para is not None:
+                    text = para.text.strip()
+                    if text:
+                        # Check heading style
+                        style_name = (para.style.name or "").lower() if para.style else ""
+                        if "heading" in style_name:
+                            level = 1
+                            for ch in style_name:
+                                if ch.isdigit():
+                                    level = int(ch)
+                                    break
+                            parts.append(f"{'#' * level} {text}")
+                        else:
+                            parts.append(text)
+
+            elif tag == "tbl":
+                # Table — extract as markdown table
+                tbl = _find_table_by_element(doc, element)
+                if tbl is not None:
+                    md_table = _table_to_markdown(tbl)
+                    if md_table:
+                        parts.append(md_table)
+
+        # Also extract images descriptions if possible
+        embedded_images = self._extract_docx_images(doc)
+        if embedded_images:
+            parts.append("\n## Embedded Images\n")
+            for desc in embedded_images:
+                parts.append(f"- {desc}")
+
+        return "\n\n".join(parts)
+
+    def _extract_docx_images(self, doc) -> List[str]:
+        """Extract image descriptions from DOCX.
+
+        Returns alt text for images, or placeholder if no alt text.
+        """
+        descriptions = []
+        try:
+            for rel in doc.part.rels.values():
+                if "image" in rel.reltype:
+                    descriptions.append("[Embedded image]")
+        except Exception:
+            pass
+        return descriptions
+
+
+def _find_paragraph_by_element(doc, element):
+    """Find a Paragraph object matching the given XML element."""
+    for para in doc.paragraphs:
+        if para._element is element:
+            return para
+    return None
+
+
+def _find_table_by_element(doc, element):
+    """Find a Table object matching the given XML element."""
+    for table in doc.tables:
+        if table._element is element:
+            return table
+    return None
+
+
+def _table_to_markdown(table) -> str:
+    """Convert a python-docx Table to a markdown table string."""
+    rows = []
+    for row in table.rows:
+        cells = [cell.text.strip().replace("|", "\\|") for cell in row.cells]
+        rows.append(cells)
+
+    if not rows:
+        return ""
+
+    # Deduplicate merged cells (python-docx repeats merged cell text)
+    clean_rows = []
+    for row_cells in rows:
+        clean = []
+        for i, cell_text in enumerate(row_cells):
+            if i > 0 and cell_text == row_cells[i - 1]:
+                clean.append("")  # merged cell
+            else:
+                clean.append(cell_text)
+        clean_rows.append(clean)
+
+    # Build markdown table
+    lines = []
+    if clean_rows:
+        header = clean_rows[0]
+        lines.append("| " + " | ".join(header) + " |")
+        lines.append("| " + " | ".join(["---"] * len(header)) + " |")
+        for row in clean_rows[1:]:
+            # Pad row to match header length
+            padded = row + [""] * (len(header) - len(row))
+            lines.append("| " + " | ".join(padded[:len(header)]) + " |")
+
+    return "\n".join(lines)
+
+
+async def extract_text_from_image_via_vision(image_bytes: bytes, mime_type: str = "image/png") -> Optional[str]:
+    """Use Gemini vision to extract text from an image.
+
+    Returns extracted text or None if unavailable.
+    """
+    try:
+        import google.genai as genai
+
+        api_key = os.environ.get("GOOGLE_API_KEY")
+        if not api_key:
+            return None
+
+        client = genai.Client()
+        b64 = base64.b64encode(image_bytes).decode("utf-8")
+
+        response = await asyncio.to_thread(
+            client.models.generate_content,
+            model="gemini-2.5-flash",
+            contents=[
+                {
+                    "parts": [
+                        {"text": "Extract all text from this image. Return only the extracted text, nothing else. If no text is found, return 'No text found'."},
+                        {"inline_data": {"mime_type": mime_type, "data": b64}},
+                    ]
+                }
+            ],
+        )
+        text = response.text.strip() if response.text else None
+        if text and text.lower() != "no text found":
+            return text
+        return None
+    except Exception as e:
+        print(f"[DoclingService] Vision text extraction failed: {e}")
+        return None
--- a/backend/services/documents_loader.py
+++ b/backend/services/documents_loader.py
@ -92,7 +92,8 @@ class DocumentsLoader:
            return await asyncio.to_thread(file.read)

    def load_msword(self, file_path: str) -> str:
-        return self.docling_service.parse_to_markdown(file_path)
+        """Parse DOCX with python-docx for better table/structure handling."""
+        return self.docling_service.parse_docx_structured(file_path)

    def load_powerpoint(self, file_path: str) -> str:
        return self.docling_service.parse_to_markdown(file_path)
--- a/backend/services/image_generation_service.py
+++ b/backend/services/image_generation_service.py
@ -193,9 +193,9 @@ class ImageGenerationService:
    async def generate_image_gemini_flash(
        self, prompt: str, output_directory: str
    ) -> str:
-        """Generate image using Gemini Flash (gemini-2.5-flash-image-preview)."""
+        """Generate image using Gemini Flash (gemini-3.1-flash-image-preview)."""
        return await self._generate_image_google(
-            prompt, output_directory, "gemini-2.5-flash-image-preview"
+            prompt, output_directory, "gemini-3.1-flash-image-preview"
        )

    async def generate_image_nanobanana_pro(
--- a/backend/services/layout_analysis_service.py
+++ b/backend/services/layout_analysis_service.py
@ -0,0 +1,142 @@
+"""Layout analysis service using LayoutParser for slide structure detection.
+
+Analyzes slide screenshots to detect regions (text, image, table, title)
+and provides structural metadata for LLM-based code generation.
+"""
+import os
+from typing import List, Optional
+
+# LayoutParser is optional — graceful fallback if not installed
+_LAYOUTPARSER_AVAILABLE = False
+try:
+    import layoutparser as lp
+    _LAYOUTPARSER_AVAILABLE = True
+except ImportError:
+    pass
+
+
+class DetectedRegion:
+    """A detected region on a slide."""
+    __slots__ = ("type", "x1", "y1", "x2", "y2", "score")
+
+    def __init__(self, type: str, x1: float, y1: float, x2: float, y2: float, score: float = 1.0):
+        self.type = type
+        self.x1 = x1
+        self.y1 = y1
+        self.x2 = x2
+        self.y2 = y2
+        self.score = score
+
+    def to_dict(self) -> dict:
+        return {
+            "type": self.type,
+            "x1": round(self.x1),
+            "y1": round(self.y1),
+            "x2": round(self.x2),
+            "y2": round(self.y2),
+            "score": round(self.score, 3),
+        }
+
+
+def analyze_slide_layout(image_path: str) -> List[DetectedRegion]:
+    """Analyze a slide screenshot and return detected layout regions.
+
+    Uses LayoutParser with a PubLayNet model if available.
+    Falls back to empty list if LayoutParser is not installed.
+    """
+    if not _LAYOUTPARSER_AVAILABLE:
+        return []
+
+    if not os.path.exists(image_path):
+        return []
+
+    try:
+        import cv2
+        image = cv2.imread(image_path)
+        if image is None:
+            return []
+
+        # Use PubLayNet model — detects: Text, Title, List, Table, Figure
+        model = lp.Detectron2LayoutModel(
+            config_path="lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config",
+            label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
+            extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.5],
+        )
+
+        layout = model.detect(image)
+
+        regions = []
+        for block in layout:
+            regions.append(DetectedRegion(
+                type=block.type,
+                x1=block.block.x_1,
+                y1=block.block.y_1,
+                x2=block.block.x_2,
+                y2=block.block.y_2,
+                score=block.score,
+            ))
+
+        return regions
+
+    except Exception as e:
+        print(f"[LayoutAnalysis] Detection failed: {e}")
+        return []
+
+
+def regions_to_description(regions: List[DetectedRegion], image_width: int = 960, image_height: int = 540) -> str:
+    """Convert detected regions to a text description for LLM context.
+
+    Normalizes coordinates to percentages for resolution-independent descriptions.
+    """
+    if not regions:
+        return ""
+
+    lines = ["Detected layout regions (coordinates as % of slide dimensions):"]
+    for r in sorted(regions, key=lambda r: (r.y1, r.x1)):
+        x_pct = round(r.x1 / image_width * 100)
+        y_pct = round(r.y1 / image_height * 100)
+        w_pct = round((r.x2 - r.x1) / image_width * 100)
+        h_pct = round((r.y2 - r.y1) / image_height * 100)
+        lines.append(
+            f"- {r.type}: position ({x_pct}%, {y_pct}%), size ({w_pct}% x {h_pct}%), confidence: {r.score:.0%}"
+        )
+
+    return "\n".join(lines)
+
+
+def classify_layout_from_regions(regions: List[DetectedRegion]) -> Optional[str]:
+    """Classify slide layout type based on detected regions.
+
+    Returns a layout type string or None if classification is uncertain.
+    """
+    if not regions:
+        return None
+
+    type_counts = {}
+    for r in regions:
+        type_counts[r.type] = type_counts.get(r.type, 0) + 1
+
+    has_title = type_counts.get("Title", 0) > 0
+    has_text = type_counts.get("Text", 0) > 0
+    has_figure = type_counts.get("Figure", 0) > 0
+    has_table = type_counts.get("Table", 0) > 0
+    has_list = type_counts.get("List", 0) > 0
+    text_count = type_counts.get("Text", 0)
+
+    # Classification heuristics
+    if has_title and not has_text and not has_figure and not has_table:
+        return "title_slide"
+    if has_title and has_figure and not has_text:
+        return "picture"
+    if has_table:
+        return "table"
+    if text_count >= 2 or (has_text and has_list):
+        return "two_column"
+    if has_title and (has_text or has_list):
+        return "content"
+    if has_figure and (has_text or has_title):
+        return "picture_with_caption"
+    if not any([has_title, has_text, has_figure, has_table, has_list]):
+        return "blank"
+
+    return "content"
--- a/backend/services/master_deck_parser_service.py
+++ b/backend/services/master_deck_parser_service.py
@ -461,7 +461,7 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
            raise ValueError("Deck not found")
        pptx_path = deck.original_file_path
        client_id = deck.client_id
-        parse_mode = getattr(deck, "parse_mode", None) or "slides"
+        parse_mode = getattr(deck, "parse_mode", None) or "layouts"

    if not os.path.exists(pptx_path):
        raise FileNotFoundError(f"PPTX file not found: {pptx_path}")
@ -529,29 +529,55 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
        print(f"[MasterDeckParser] LLM provider: {llm_provider['provider'] if llm_provider else 'NONE'}")
        print(f"[MasterDeckParser] Processing {len(primary_metas)} items, {llm_count} with screenshots for LLM")

+        # Optional: LayoutParser region detection for better classification
+        from services.layout_analysis_service import (
+            analyze_slide_layout,
+            classify_layout_from_regions,
+            regions_to_description,
+        )
+
        for idx, lm in enumerate(primary_metas):
+            screenshot_path = screenshots[idx] if idx < len(screenshots) else None
+
+            # Try LayoutParser classification if a screenshot is available
+            lp_layout_type = None
+            lp_region_desc = ""
+            if screenshot_path and os.path.exists(screenshot_path):
+                try:
+                    regions = await asyncio.to_thread(analyze_slide_layout, screenshot_path)
+                    if regions:
+                        lp_layout_type = classify_layout_from_regions(regions)
+                        lp_region_desc = regions_to_description(regions)
+                except Exception as lp_err:
+                    print(f"[MasterDeckParser] LayoutParser skipped for {idx}: {lp_err}")
+
            layout_entry = {
                "index": idx,
                "layout_name": lm["layout_name"],
-                "layout_type": _guess_layout_type(lm["layout_name"]),
+                "layout_type": lp_layout_type or _guess_layout_type(lm["layout_name"]),
                "xml_snippet": lm["xml_content"][:2000],
                "fonts": list(
                    {normalize_font_family_name(f) for f in extract_fonts_from_oxml(lm["xml_content"]) if f}
                ),
                "html": None,
                "react_code": None,
-                "screenshot_path": screenshots[idx] if idx < len(screenshots) else None,
+                "screenshot_path": screenshot_path,
            }

            # Run LLM pipeline if provider available and we have a screenshot
-            if llm_provider and idx < len(screenshots) and os.path.exists(screenshots[idx]):
+            if llm_provider and screenshot_path and os.path.exists(screenshot_path):
                try:
                    print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — generating HTML...")
-                    with open(screenshots[idx], "rb") as img_f:
+                    with open(screenshot_path, "rb") as img_f:
                        img_b64 = base64.b64encode(img_f.read()).decode("utf-8")

+                    # Include LayoutParser region info in LLM context
+                    xml_context = lm["xml_content"]
+                    if lp_region_desc:
+                        xml_context = f"{lp_region_desc}\n\n---\n\n{xml_context}"
+
                    html = await _llm_generate_html(
-                        llm_provider, img_b64, lm["xml_content"],
+                        llm_provider, img_b64, xml_context,
                        layout_entry["fonts"] or None,
                    )
                    html = html.replace("```html", "").replace("```", "")
--- a/frontend/app/(presentation-generator)/generate/configure/page.tsx
+++ b/frontend/app/(presentation-generator)/generate/configure/page.tsx
@ -21,6 +21,7 @@ import {
 } from "@/store/slices/presentationGeneration";
 import { Button } from "@/components/ui/button";
 import { Textarea } from "@/components/ui/textarea";
+import { Input } from "@/components/ui/input";
 import { Label } from "@/components/ui/label";
 import { Slider } from "@/components/ui/slider";
 import {
@ -30,7 +31,7 @@ import {
  SelectTrigger,
  SelectValue,
 } from "@/components/ui/select";
-import { ChevronLeft, ChevronRight, Layers } from "lucide-react";
+import { ChevronLeft, ChevronRight, Layers, MessageCircleQuestion } from "lucide-react";
 import { toast } from "sonner";
 import { cn } from "@/lib/utils";
 import { OverlayLoader } from "@/components/ui/overlay-loader";
@ -70,6 +71,9 @@ export default function WizardConfigurePage() {
  const [loadingClients, setLoadingClients] = useState(true);
  const [loadingDecks, setLoadingDecks] = useState(false);
  const [isGenerating, setIsGenerating] = useState(false);
+  const [followUpQuestions, setFollowUpQuestions] = useState<string[]>([]);
+  const [followUpAnswers, setFollowUpAnswers] = useState<Record<string, string>>({});
+  const [loadingFollowUp, setLoadingFollowUp] = useState(false);

  // Fetch clients on mount
  useEffect(() => {
@ -90,6 +94,23 @@ export default function WizardConfigurePage() {
      .finally(() => setLoadingDecks(false));
  }, [wizard.selectedClientId]);

+  // Fetch follow-up questions if brief is short
+  useEffect(() => {
+    const briefContent = wizard.briefText;
+    if (!briefContent || briefContent.trim().length < 10) {
+      setFollowUpQuestions([]);
+      return;
+    }
+    setLoadingFollowUp(true);
+    WizardApi.checkFollowUpQuestions(briefContent)
+      .then((questions) => {
+        setFollowUpQuestions(questions);
+        setFollowUpAnswers({});
+      })
+      .finally(() => setLoadingFollowUp(false));
+    // eslint-disable-next-line react-hooks/exhaustive-deps
+  }, []); // Run once on page load
+
  const handleBack = () => {
    dispatch(setWizardStep(1));
    router.push("/generate/upload");
@ -109,6 +130,16 @@ export default function WizardConfigurePage() {
        .map((f) => f.serverPath)
        .filter(Boolean) as string[];

+      // Append follow-up Q&A to instructions if any answers are provided
+      let finalInstructions = wizard.instructions;
+      const answeredPairs = followUpQuestions
+        .filter((q) => followUpAnswers[q]?.trim())
+        .map((q) => `Q: ${q}\nA: ${followUpAnswers[q].trim()}`);
+      if (answeredPairs.length > 0) {
+        const qaSuffix = "\n\n--- Follow-up Context ---\n" + answeredPairs.join("\n\n");
+        finalInstructions = (finalInstructions || "") + qaSuffix;
+      }
+
      // Create presentation (outline mode)
      const result = await WizardApi.createPresentation({
        content: wizard.briefText,
@ -116,7 +147,7 @@ export default function WizardConfigurePage() {
        file_paths: filePaths,
        language: wizard.language,
        tone: wizard.tone,
-        instructions: wizard.instructions,
+        instructions: finalInstructions,
        client_id: wizard.selectedClientId ?? undefined,
        master_deck_id: wizard.selectedDeckId ?? undefined,
      });
@ -266,6 +297,39 @@ export default function WizardConfigurePage() {
          </Select>
        </div>

+        {/* Follow-Up Questions */}
+        {loadingFollowUp && (
+          <div className="rounded-xl border border-amber-200 bg-amber-50 p-4">
+            <p className="text-sm text-amber-700">Checking if we need more context...</p>
+          </div>
+        )}
+        {followUpQuestions.length > 0 && !loadingFollowUp && (
+          <div className="rounded-xl border border-amber-200 bg-amber-50 p-4 space-y-4">
+            <div className="flex items-center gap-2 text-amber-800">
+              <MessageCircleQuestion className="w-5 h-5 flex-shrink-0" />
+              <p className="text-sm font-medium">
+                A few quick questions to improve your presentation
+              </p>
+            </div>
+            {followUpQuestions.map((question, idx) => (
+              <div key={idx} className="space-y-1.5">
+                <Label className="text-sm text-amber-900">{question}</Label>
+                <Input
+                  placeholder="Your answer (optional)"
+                  value={followUpAnswers[question] ?? ""}
+                  onChange={(e) =>
+                    setFollowUpAnswers((prev) => ({
+                      ...prev,
+                      [question]: e.target.value,
+                    }))
+                  }
+                  className="bg-white border-amber-200 focus:border-amber-400"
+                />
+              </div>
+            ))}
+          </div>
+        )}
+
        {/* Instructions */}
        <div>
          <Label className="mb-2 block">Additional Instructions</Label>
--- a/frontend/app/(presentation-generator)/generate/outline/page.tsx
+++ b/frontend/app/(presentation-generator)/generate/outline/page.tsx
@ -9,6 +9,7 @@ import {
  setJobId,
  setPresentationId as setWizardPresentationId,
  WizardOutlineItem,
+  toggleSlideAttachment,
 } from "@/store/slices/wizardSlice";
 import { clearPresentationData } from "@/store/slices/presentationGeneration";
 import { useOutlineStreaming } from "../../outline/hooks/useOutlineStreaming";
@ -22,8 +23,15 @@ import {
  FileText,
  Layers,
  Loader2,
+  Paperclip,
 } from "lucide-react";
 import { toast } from "sonner";
+import {
+  Popover,
+  PopoverContent,
+  PopoverTrigger,
+} from "@/components/ui/popover";
+import { Checkbox } from "@/components/ui/checkbox";
 import { OverlayLoader } from "@/components/ui/overlay-loader";
 import Wrapper from "@/components/Wrapper";
 import { PresentationGenerationApi } from "../../services/api/presentation-generation";
@ -183,15 +191,88 @@ export default function WizardOutlinePage() {
                    Uploaded Files
                  </h4>
                  <div className="space-y-2">
-                    {wizard.uploadedFiles.map((f, i) => (
-                      <div
-                        key={i}
-                        className="flex items-center gap-2 p-2 rounded-lg bg-gray-50 text-xs"
-                      >
-                        <FileText className="w-3.5 h-3.5 text-[#5146E5]" />
-                        <span className="truncate flex-1">{f.name}</span>
-                      </div>
-                    ))}
+                    {wizard.uploadedFiles.map((f, i) => {
+                      // Count how many slides this file is linked to
+                      const linkedCount = Object.values(
+                        wizard.slideAttachments
+                      ).filter((names) => names.includes(f.name)).length;
+
+                      return (
+                        <div
+                          key={i}
+                          className="flex items-center gap-2 p-2 rounded-lg bg-gray-50 text-xs"
+                        >
+                          <FileText className="w-3.5 h-3.5 text-[#5146E5] flex-shrink-0" />
+                          <span className="truncate flex-1">{f.name}</span>
+
+                          {/* Link to slides popover */}
+                          {outlines && outlines.length > 0 && (
+                            <Popover>
+                              <PopoverTrigger asChild>
+                                <button
+                                  className="inline-flex items-center gap-1 px-1.5 py-0.5 rounded text-[10px] font-medium text-gray-500 hover:text-[#5146E5] hover:bg-[#5146E5]/5 transition-colors flex-shrink-0"
+                                  title="Link to slides"
+                                >
+                                  <Paperclip className="w-3 h-3" />
+                                  {linkedCount > 0 && (
+                                    <span className="text-[#5146E5]">
+                                      {linkedCount}
+                                    </span>
+                                  )}
+                                </button>
+                              </PopoverTrigger>
+                              <PopoverContent
+                                side="right"
+                                align="start"
+                                className="w-64 p-3"
+                              >
+                                <p className="text-xs font-semibold text-gray-700 mb-2">
+                                  Link to slides
+                                </p>
+                                <div className="space-y-1.5 max-h-48 overflow-y-auto">
+                                  {outlines.map((outline, slideIdx) => {
+                                    const title =
+                                      (outline.content || "")
+                                        .split("\n")[0]
+                                        ?.replace(/^#+\s*/, "")
+                                        .trim() || `Slide ${slideIdx + 1}`;
+                                    const isLinked = (
+                                      wizard.slideAttachments[slideIdx] || []
+                                    ).includes(f.name);
+
+                                    return (
+                                      <label
+                                        key={slideIdx}
+                                        className="flex items-center gap-2 p-1.5 rounded hover:bg-gray-50 cursor-pointer text-xs"
+                                      >
+                                        <Checkbox
+                                          checked={isLinked}
+                                          onCheckedChange={() =>
+                                            dispatch(
+                                              toggleSlideAttachment({
+                                                slideIndex: slideIdx,
+                                                fileName: f.name,
+                                              })
+                                            )
+                                          }
+                                          className="h-3.5 w-3.5"
+                                        />
+                                        <span className="text-gray-600 font-medium w-5 flex-shrink-0">
+                                          {slideIdx + 1}.
+                                        </span>
+                                        <span className="truncate text-gray-700">
+                                          {title}
+                                        </span>
+                                      </label>
+                                    );
+                                  })}
+                                </div>
+                              </PopoverContent>
+                            </Popover>
+                          )}
+                        </div>
+                      );
+                    })}
                  </div>
                </div>
              )}
@ -248,6 +329,8 @@ export default function WizardOutlinePage() {
                    highestActiveIndex={streamState.highestActiveIndex}
                    onDragEnd={handleDragEnd}
                    onAddSlide={handleAddSlide}
+                    slideAttachments={wizard.slideAttachments}
+                    uploadedFiles={wizard.uploadedFiles}
                  />
                </TabsContent>

--- a/frontend/app/(presentation-generator)/generate/upload/page.tsx
+++ b/frontend/app/(presentation-generator)/generate/upload/page.tsx
@ -11,8 +11,9 @@ import {
  setWizardStep,
  WizardUploadedFile,
 } from "@/store/slices/wizardSlice";
-import { Upload, X, FileText, ChevronRight, Plus } from "lucide-react";
+import { Upload, X, FileText, ChevronRight, Plus, Link } from "lucide-react";
 import { Button } from "@/components/ui/button";
+import { Input } from "@/components/ui/input";
 import { Textarea } from "@/components/ui/textarea";
 import { toast } from "sonner";
 import { cn } from "@/lib/utils";
@ -60,6 +61,8 @@ export default function WizardUploadPage() {
  const [localFiles, setLocalFiles] = useState<File[]>([]);
  const [isDragging, setIsDragging] = useState(false);
  const [isProcessing, setIsProcessing] = useState(false);
+  const [referenceUrl, setReferenceUrl] = useState("");
+  const [isFetchingUrl, setIsFetchingUrl] = useState(false);
  const fileInputRef = useRef<HTMLInputElement>(null);

  const handleDragOver = (e: React.DragEvent) => {
@ -116,6 +119,27 @@ export default function WizardUploadPage() {

  const allFiles = uploadedFiles; // display list from Redux

+  const handleFetchUrl = async () => {
+    if (!referenceUrl.trim()) {
+      toast.error("Please enter a URL");
+      return;
+    }
+    try {
+      setIsFetchingUrl(true);
+      const text = await WizardApi.fetchUrl(referenceUrl.trim());
+      const separator = briefText.trim() ? "\n\n---\n\n" : "";
+      dispatch(setBriefText(briefText + separator + text));
+      toast.success("URL content fetched and appended to brief");
+      setReferenceUrl("");
+    } catch (error: any) {
+      toast.error("Failed to fetch URL", {
+        description: error.message || "Please check the URL and try again.",
+      });
+    } finally {
+      setIsFetchingUrl(false);
+    }
+  };
+
  const handleNext = async () => {
    if (!briefText.trim() && allFiles.length === 0) {
      toast.error("Please enter a brief or upload documents");
@ -244,6 +268,39 @@ export default function WizardUploadPage() {
        </div>
      )}

+      {/* Reference URL */}
+      <div className="mt-6">
+        <label className="block text-sm font-medium text-gray-700 mb-2">
+          <Link className="w-4 h-4 inline mr-1.5 -mt-0.5" />
+          Or add a reference URL
+        </label>
+        <div className="flex gap-2">
+          <Input
+            type="text"
+            placeholder="https://example.com/article"
+            value={referenceUrl}
+            onChange={(e) => setReferenceUrl(e.target.value)}
+            onKeyDown={(e) => {
+              if (e.key === "Enter") {
+                e.preventDefault();
+                handleFetchUrl();
+              }
+            }}
+            className="flex-1"
+          />
+          <Button
+            type="button"
+            variant="outline"
+            size="sm"
+            onClick={handleFetchUrl}
+            disabled={isFetchingUrl || !referenceUrl.trim()}
+            className="px-4 whitespace-nowrap"
+          >
+            {isFetchingUrl ? "Fetching..." : "Fetch"}
+          </Button>
+        </div>
+      </div>
+
      {/* Brief Text */}
      <div className="mt-6">
        <label className="block text-sm font-medium text-gray-700 mb-2">
--- a/frontend/app/(presentation-generator)/outline/components/OutlineContent.tsx
+++ b/frontend/app/(presentation-generator)/outline/components/OutlineContent.tsx
@ -16,6 +16,7 @@ import {
 import { OutlineItem } from "./OutlineItem";
 import { Button } from "@/components/ui/button";
 import { FileText, Loader2 } from "lucide-react";
+import type { WizardUploadedFile } from "@/store/slices/wizardSlice";

 interface OutlineContentProps {
    outlines: { content: string }[] | null;
@ -25,6 +26,10 @@ interface OutlineContentProps {
    highestActiveIndex: number;
    onDragEnd: (event: any) => void;
    onAddSlide: () => void;
+    /** Map of slide index -> attached file names */
+    slideAttachments?: Record<number, string[]>;
+    /** All uploaded files (for reference) */
+    uploadedFiles?: WizardUploadedFile[];
 }

 const OutlineContent: React.FC<OutlineContentProps> = ({
@ -34,7 +39,9 @@ const OutlineContent: React.FC<OutlineContentProps> = ({
    activeSlideIndex,
    highestActiveIndex,
    onDragEnd,
-    onAddSlide
+    onAddSlide,
+    slideAttachments,
+    uploadedFiles,
 }) => {
    const sensors = useSensors(
        useSensor(PointerSensor),
@ -104,6 +111,7 @@ const OutlineContent: React.FC<OutlineContentProps> = ({
                                isStreaming={isStreaming}
                                isActiveStreaming={activeSlideIndex === index}
                                isStableStreaming={highestActiveIndex >= 0 && index < highestActiveIndex}
+                                attachedFiles={slideAttachments?.[index]}
                            />
                        ))
                        ) :
@ -119,6 +127,7 @@ const OutlineContent: React.FC<OutlineContentProps> = ({
                                    isStreaming={isStreaming}
                                    isActiveStreaming={false}
                                    isStableStreaming={false}
+                                    attachedFiles={slideAttachments?.[index]}
                                />
                            ))}
                        </SortableContext>}
--- a/frontend/app/(presentation-generator)/outline/components/OutlineItem.tsx
+++ b/frontend/app/(presentation-generator)/outline/components/OutlineItem.tsx
@ -1,6 +1,6 @@
 import { useSortable } from "@dnd-kit/sortable"
 import { CSS } from "@dnd-kit/utilities"
-import { Trash2 } from "lucide-react"
+import { Paperclip, Trash2 } from "lucide-react"
 import { RootState } from "@/store/store"
 import { useDispatch, useSelector } from "react-redux"
 import { deleteSlideOutline, setOutlines } from "@/store/slices/presentationGeneration"
@ -18,6 +18,8 @@ interface OutlineItemProps {
    isStreaming: boolean
    isActiveStreaming?: boolean
    isStableStreaming?: boolean
+    /** File names attached to this slide */
+    attachedFiles?: string[]
 }

 export function OutlineItem({
@ -26,6 +28,7 @@ export function OutlineItem({
    isStreaming,
    isActiveStreaming = false,
    isStableStreaming = false,
+    attachedFiles,
 }: OutlineItemProps) {
    const {
        outlines,
@ -164,6 +167,21 @@ export function OutlineItem({
                        />
                    )}

+                    {/* Attached file badges */}
+                    {attachedFiles && attachedFiles.length > 0 && (
+                        <div className="flex flex-wrap gap-1.5 mt-1.5">
+                            {attachedFiles.map((fileName) => (
+                                <span
+                                    key={fileName}
+                                    className="inline-flex items-center gap-1 rounded-full bg-[#5146E5]/5 border border-[#5146E5]/15 px-2 py-0.5 text-[10px] text-[#5146E5]/80"
+                                >
+                                    <Paperclip className="w-2.5 h-2.5" />
+                                    <span className="truncate max-w-[120px]">{fileName}</span>
+                                </span>
+                            ))}
+                        </div>
+                    )}
+
                </div>

                {/* Action Buttons */}
--- a/frontend/app/(presentation-generator)/services/api/wizard.ts
+++ b/frontend/app/(presentation-generator)/services/api/wizard.ts
@ -121,6 +121,34 @@ export class WizardApi {
    await ApiResponseHandler.handleResponse(response, "Failed to cancel job");
  }

+  /** Fetch URL content and extract text */
+  static async fetchUrl(url: string): Promise<string> {
+    const response = await fetch("/api/v1/ppt/files/fetch-url", {
+      method: "POST",
+      headers: getHeader(),
+      body: JSON.stringify({ url }),
+      cache: "no-cache",
+    });
+    return await ApiResponseHandler.handleResponse(response, "Failed to fetch URL");
+  }
+
+  /** Check if brief needs follow-up questions */
+  static async checkFollowUpQuestions(content: string): Promise<string[]> {
+    if (!content || content.trim().length < 10) return [];
+    try {
+      const response = await fetch("/api/v1/ppt/content/follow-up-questions", {
+        method: "POST",
+        headers: getHeader(),
+        body: JSON.stringify({ content }),
+        cache: "no-cache",
+      });
+      const data = await ApiResponseHandler.handleResponse(response, "");
+      return data.questions ?? [];
+    } catch {
+      return [];
+    }
+  }
+
  /** Create presentation (outline-only, like existing flow) */
  static async createPresentation(params: {
    content: string;
--- a/frontend/components/ui/overlay-loader.tsx
+++ b/frontend/components/ui/overlay-loader.tsx
@ -1,5 +1,5 @@
 import { cn } from "@/lib/utils"
-import { Loader } from "./loader"
+import { HamsterLoader } from "./hamster-loader"
 import { ProgressBar } from "./progress-bar"
 import { useEffect, useState } from "react"

@ -53,7 +53,9 @@ export const OverlayLoader = ({
                )}

            >
-                <img loading="eager" src={'/loading.gif'} alt="loading" width={250} height={250} />
+                <div className="py-8">
+                    <HamsterLoader size="lg" />
+                </div>
                {showProgress ? (
                    <div className="w-full space-y-6 pt-4">
                        <ProgressBar
--- a/frontend/store/slices/wizardSlice.ts
+++ b/frontend/store/slices/wizardSlice.ts
@ -39,6 +39,8 @@ interface WizardState {
  presentationId: string | null;
  /** Decomposed document data from server */
  decomposedFiles: any[];
+  /** Map of slide index -> attached file names */
+  slideAttachments: Record<number, string[]>;
 }

 const STORAGE_KEY = "deckforge_wizard";
@ -77,6 +79,7 @@ const defaultState: WizardState = {
  jobId: null,
  presentationId: null,
  decomposedFiles: [],
+  slideAttachments: {},
 };

 const persisted = loadFromStorage();
@ -142,6 +145,31 @@ const wizardSlice = createSlice({
      state.decomposedFiles = action.payload;
      saveToStorage(state);
    },
+    setSlideAttachments: (
+      state,
+      action: PayloadAction<Record<number, string[]>>
+    ) => {
+      state.slideAttachments = action.payload;
+      saveToStorage(state);
+    },
+    toggleSlideAttachment: (
+      state,
+      action: PayloadAction<{ slideIndex: number; fileName: string }>
+    ) => {
+      const { slideIndex, fileName } = action.payload;
+      const current = state.slideAttachments[slideIndex] || [];
+      if (current.includes(fileName)) {
+        state.slideAttachments[slideIndex] = current.filter(
+          (f) => f !== fileName
+        );
+        if (state.slideAttachments[slideIndex].length === 0) {
+          delete state.slideAttachments[slideIndex];
+        }
+      } else {
+        state.slideAttachments[slideIndex] = [...current, fileName];
+      }
+      saveToStorage(state);
+    },
    resetWizard: (state) => {
      Object.assign(state, defaultState);
      if (typeof window !== "undefined") {
@ -165,6 +193,8 @@ export const {
  setJobId,
  setPresentationId,
  setDecomposedFiles,
+  setSlideAttachments,
+  toggleSlideAttachment,
  resetWizard,
 } = wizardSlice.actions;