diff --git a/backend/Dockerfile b/backend/Dockerfile index 0fdd935..0159fdb 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -16,6 +16,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ chromium \ fontconfig \ curl \ + libgl1 \ + libglib2.0-0 \ && rm -rf /var/lib/apt/lists/* ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium diff --git a/backend/api/v1/admin/master_decks_router.py b/backend/api/v1/admin/master_decks_router.py index 11e85c1..33fcb44 100644 --- a/backend/api/v1/admin/master_decks_router.py +++ b/backend/api/v1/admin/master_decks_router.py @@ -64,7 +64,7 @@ async def _list_decks(client_id: uuid.UUID, include_inactive: bool, session: Asy "name": d.name, "description": d.description, "thumbnail_path": d.thumbnail_path, - "parse_mode": getattr(d, "parse_mode", None) or "slides", + "parse_mode": getattr(d, "parse_mode", None) or "layouts", "parse_status": d.parse_status, "is_active": d.is_active, "layouts": d.layouts, @@ -104,7 +104,7 @@ async def list_master_decks( async def upload_master_deck( client_id: uuid.UUID, file: UploadFile = File(...), - parse_mode: str = Query("slides", description="Parse mode: 'slides' (default) or 'layouts'"), + parse_mode: str = Query("layouts", description="Parse mode: 'layouts' (default, unique slideLayouts) or 'slides' (one layout per slide)"), admin: UserModel = Depends(require_client_admin), session: AsyncSession = Depends(get_async_session), ): diff --git a/backend/api/v1/admin/users_router.py b/backend/api/v1/admin/users_router.py index dc6a52a..7ba8d07 100644 --- a/backend/api/v1/admin/users_router.py +++ b/backend/api/v1/admin/users_router.py @@ -3,9 +3,12 @@ from typing import List, Optional import uuid from fastapi import APIRouter, Depends, HTTPException, Query +from pydantic import BaseModel +from sqlalchemy import func from sqlalchemy.ext.asyncio import AsyncSession -from sqlmodel import select +from sqlmodel import select, update +from models.sql.presentation import PresentationModel from models.sql.user import UserModel from services.database import get_async_session from utils.auth_dependencies import require_super_admin @@ -15,6 +18,11 @@ USERS_ROUTER = APIRouter(prefix="/users", tags=["Admin - Users"]) VALID_ROLES = {"super_admin", "client_admin", "user"} +class TransferOwnershipRequest(BaseModel): + new_owner_id: uuid.UUID + client_id: Optional[uuid.UUID] = None + + @USERS_ROUTER.get("", response_model=List[dict]) async def list_users( _: UserModel = Depends(require_super_admin), @@ -93,6 +101,55 @@ async def update_user_role( return {"message": "Role updated", "user_id": str(user.id), "role": role} +@USERS_ROUTER.post("/{user_id}/transfer-ownership") +async def transfer_ownership( + user_id: uuid.UUID, + body: TransferOwnershipRequest, + _: UserModel = Depends(require_super_admin), + session: AsyncSession = Depends(get_async_session), +): + """Transfer all presentations from one user to another. + + Used for GDPR compliance before deactivating a user. + """ + if user_id == body.new_owner_id: + raise HTTPException( + status_code=400, detail="Cannot transfer ownership to the same user" + ) + + # Validate source user exists + source_user = await session.get(UserModel, user_id) + if not source_user: + raise HTTPException(status_code=404, detail="Source user not found") + + # Validate target user exists + target_user = await session.get(UserModel, body.new_owner_id) + if not target_user: + raise HTTPException(status_code=404, detail="Target user not found") + + # Build the update statement for non-deleted presentations owned by the source user + stmt = ( + update(PresentationModel) + .where(PresentationModel.owner_id == user_id) + .where(PresentationModel.deleted_at.is_(None)) + ) + + if body.client_id is not None: + stmt = stmt.where(PresentationModel.client_id == body.client_id) + + stmt = stmt.values(owner_id=body.new_owner_id) + result = await session.execute(stmt) + await session.commit() + + transferred_count = result.rowcount + + return { + "message": f"Transferred {transferred_count} presentations", + "from_user_id": str(user_id), + "to_user_id": str(body.new_owner_id), + } + + @USERS_ROUTER.delete("/{user_id}") async def deactivate_user( user_id: uuid.UUID, @@ -106,7 +163,26 @@ async def deactivate_user( if user.id == admin.id: raise HTTPException(status_code=400, detail="Cannot deactivate yourself") + # Check how many active presentations this user still owns + count_query = ( + select(func.count()) + .select_from(PresentationModel) + .where(PresentationModel.owner_id == user_id) + .where(PresentationModel.deleted_at.is_(None)) + ) + count_result = await session.execute(count_query) + presentation_count = count_result.scalar_one() + user.is_active = False session.add(user) await session.commit() - return {"message": "User deactivated", "user_id": str(user.id)} + + response = {"message": "User deactivated", "user_id": str(user.id)} + + if presentation_count > 0: + response["warning"] = ( + f"User still has {presentation_count} active presentations. " + "Consider transferring ownership first." + ) + + return response diff --git a/backend/api/v1/ppt/endpoints/content.py b/backend/api/v1/ppt/endpoints/content.py new file mode 100644 index 0000000..2196128 --- /dev/null +++ b/backend/api/v1/ppt/endpoints/content.py @@ -0,0 +1,25 @@ +from typing import List + +from fastapi import APIRouter, Body, Depends, HTTPException + +from models.sql.user import UserModel +from services.content_intelligence_service import ContentIntelligenceService +from utils.auth_dependencies import get_current_user + +CONTENT_ROUTER = APIRouter(prefix="/content", tags=["Content"]) + + +@CONTENT_ROUTER.post("/follow-up-questions") +async def follow_up_questions( + content: str = Body(..., embed=True), + _current_user: UserModel = Depends(get_current_user), +): + """Classify content and return follow-up questions if the brief is thin.""" + if not content or not content.strip(): + raise HTTPException(status_code=400, detail="Content is required") + + ci_service = ContentIntelligenceService() + classified = await ci_service.classify(content) + questions: List[str] = await ci_service.ask_followup_questions(classified) or [] + + return {"questions": questions} diff --git a/backend/api/v1/ppt/endpoints/files.py b/backend/api/v1/ppt/endpoints/files.py index 77dbd40..bbe0cde 100644 --- a/backend/api/v1/ppt/endpoints/files.py +++ b/backend/api/v1/ppt/endpoints/files.py @@ -4,9 +4,12 @@ import os import uuid from typing import Annotated, List, Optional -from fastapi import APIRouter, Body, File, HTTPException, UploadFile +from fastapi import APIRouter, Body, Depends, File, HTTPException, UploadFile from pydantic import BaseModel +from models.sql.user import UserModel +from utils.auth_dependencies import get_current_user + from constants.documents import ( EXCEL_TYPES, IMAGE_UPLOAD_TYPES, @@ -143,17 +146,46 @@ async def decompose_files(file_paths: Annotated[List[str], Body(embed=True)]): ) ) - # --- Image files --- + # --- Image files (with vision-based text extraction) --- for img_path in image_files: info = extract_images_metadata(img_path) - response.append( - DecomposedFileInfo( - name=info.filename, - file_path=img_path, - file_type="image", - image_info=info.model_dump(), + + # Try to extract text from image via Gemini vision + extracted_text = None + try: + from services.docling_service import extract_text_from_image_via_vision + mime_type, _ = mimetypes.guess_type(img_path) + with open(img_path, "rb") as f: + image_bytes = f.read() + extracted_text = await extract_text_from_image_via_vision( + image_bytes, mime_type or "image/png" + ) + except Exception as e: + print(f"[decompose] Vision text extraction failed for {img_path}: {e}") + + if extracted_text: + # Save extracted text as a text file alongside the image + text_path = TEMP_FILE_SERVICE.create_temp_file_path( + f"{uuid.uuid4()}.txt", temp_dir + ) + with open(text_path, "w") as tf: + tf.write(extracted_text) + response.append( + DecomposedFileInfo( + name=os.path.basename(img_path), + file_path=text_path, + file_type="text", + ) + ) + else: + response.append( + DecomposedFileInfo( + name=info.filename, + file_path=img_path, + file_type="image", + image_info=info.model_dump(), + ) ) - ) return response @@ -182,6 +214,24 @@ async def parse_url_endpoint(body: UrlParseRequest): return UrlParseResponse(content=content, url=body.url) +@FILES_ROUTER.post("/fetch-url") +async def fetch_url_content( + url: str = Body(..., embed=True), + _current_user: UserModel = Depends(get_current_user), +): + """Fetch a URL and extract its text content.""" + if not url.startswith(("http://", "https://")): + raise HTTPException(status_code=400, detail="Invalid URL") + + text = await parse_url(url) + if not text: + raise HTTPException( + status_code=422, detail="Could not extract content from URL" + ) + + return {"text": text, "url": url} + + @FILES_ROUTER.post("/update") async def update_files( file_path: Annotated[str, Body()], diff --git a/backend/api/v1/ppt/endpoints/presentation.py b/backend/api/v1/ppt/endpoints/presentation.py index a9bfd09..2088aa8 100644 --- a/backend/api/v1/ppt/endpoints/presentation.py +++ b/backend/api/v1/ppt/endpoints/presentation.py @@ -43,7 +43,7 @@ from utils.llm_calls.generate_presentation_outlines import generate_ppt_outline from models.sql.slide import SlideModel from models.sse_response import SSECompleteResponse, SSEErrorResponse, SSEResponse -from services.database import get_async_session +from services.database import get_async_session, async_session_maker from services.temp_file_service import TEMP_FILE_SERVICE from services.concurrent_service import CONCURRENT_SERVICE from models.sql.presentation import PresentationModel @@ -290,6 +290,7 @@ async def prepare_presentation( sql_session.add(presentation) presentation.outlines = presentation_outline_model.model_dump(mode="json") presentation.title = title or presentation.title + presentation.template_name = layout.name presentation.set_layout(layout) presentation.set_structure(presentation_structure) await sql_session.commit() @@ -319,11 +320,18 @@ async def stream_presentation( image_generation_service = ImageGenerationService(get_images_directory()) - async def inner(): - structure = presentation.get_structure() - layout = presentation.get_layout() - outline = presentation.get_presentation_outline() + # Capture data before returning StreamingResponse, because the Depends + # session is closed once this function returns. + pres_id = id + structure = presentation.get_structure() + layout = presentation.get_layout() + outline = presentation.get_presentation_outline() + pres_language = presentation.language + pres_tone = presentation.tone + pres_verbosity = presentation.verbosity + pres_instructions = presentation.instructions + async def inner(): # These tasks will be gathered and awaited after all slides are generated async_assets_generation_tasks = [] @@ -339,17 +347,17 @@ async def stream_presentation( slide_content = await get_slide_content_from_type_and_outline( slide_layout, outline.slides[i], - presentation.language, - presentation.tone, - presentation.verbosity, - presentation.instructions, + pres_language, + pres_tone, + pres_verbosity, + pres_instructions, ) except HTTPException as e: yield SSEErrorResponse(detail=e.detail).to_string() return slide = SlideModel( - presentation=id, + presentation=pres_id, layout_group=layout.name, layout=slide_layout.id, index=i, @@ -381,21 +389,24 @@ async def stream_presentation( for assets_list in generated_assets_lists: generated_assets.extend(assets_list) - # Moved this here to make sure new slides are generated before deleting the old ones - await sql_session.execute( - delete(SlideModel).where(SlideModel.presentation == id) - ) - await sql_session.commit() + # Use a new session for DB writes — the Depends session is already + # closed by the time the streaming generator executes. + async with async_session_maker() as session: + await session.execute( + delete(SlideModel).where(SlideModel.presentation == pres_id) + ) + await session.commit() - sql_session.add(presentation) - sql_session.add_all(slides) - sql_session.add_all(generated_assets) - await sql_session.commit() + pres = await session.get(PresentationModel, pres_id) + session.add(pres) + session.add_all(slides) + session.add_all(generated_assets) + await session.commit() - response = PresentationWithSlides( - **presentation.model_dump(), - slides=slides, - ) + response = PresentationWithSlides( + **pres.model_dump(), + slides=slides, + ) yield SSECompleteResponse( key="presentation", diff --git a/backend/api/v1/ppt/router.py b/backend/api/v1/ppt/router.py index 1f89a2f..d3aff16 100644 --- a/backend/api/v1/ppt/router.py +++ b/backend/api/v1/ppt/router.py @@ -15,6 +15,7 @@ from api.v1.ppt.endpoints.ollama import OLLAMA_ROUTER from api.v1.ppt.endpoints.outlines import OUTLINES_ROUTER from api.v1.ppt.endpoints.slide import SLIDE_ROUTER from api.v1.ppt.endpoints.pptx_slides import PPTX_FONTS_ROUTER +from api.v1.ppt.endpoints.content import CONTENT_ROUTER API_V1_PPT_ROUTER = APIRouter(prefix="/api/v1/ppt") @@ -37,3 +38,4 @@ API_V1_PPT_ROUTER.include_router(OPENAI_ROUTER) API_V1_PPT_ROUTER.include_router(ANTHROPIC_ROUTER) API_V1_PPT_ROUTER.include_router(GOOGLE_ROUTER) API_V1_PPT_ROUTER.include_router(PPTX_FONTS_ROUTER) +API_V1_PPT_ROUTER.include_router(CONTENT_ROUTER) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index bf3b9cc..ba94517 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -20,7 +20,10 @@ dependencies = [ "pathvalidate>=3.3.1", "pdfplumber>=0.11.7", "pytest>=8.4.1", + "python-docx>=1.1", "python-pptx>=1.0.2", + "layoutparser>=0.3", + "opencv-python-headless>=4.8", "redis>=5.0,<6", "sqlmodel>=0.0.24", "alembic>=1.15", diff --git a/backend/services/docling_service.py b/backend/services/docling_service.py index f6ae203..1d3b22b 100644 --- a/backend/services/docling_service.py +++ b/backend/services/docling_service.py @@ -1,3 +1,13 @@ +"""Document parsing service. + +Uses Docling for PDF/PPTX and python-docx for DOCX (better table handling). +Optionally extracts text from embedded images via Gemini vision. +""" +import asyncio +import base64 +import os +from typing import List, Optional + from docling.document_converter import ( DocumentConverter, PdfFormatOption, @@ -11,7 +21,7 @@ from docling.datamodel.base_models import InputFormat class DoclingService: def __init__(self): self.pipeline_options = PdfPipelineOptions() - self.pipeline_options.do_ocr = False + self.pipeline_options.do_ocr = True self.converter = DocumentConverter( allowed_formats=[InputFormat.PPTX, InputFormat.PDF, InputFormat.DOCX], @@ -29,5 +39,163 @@ class DoclingService: ) def parse_to_markdown(self, file_path: str) -> str: + """Parse any supported document to markdown via Docling.""" result = self.converter.convert(file_path) return result.document.export_to_markdown() + + def parse_docx_structured(self, file_path: str) -> str: + """Parse DOCX with python-docx for better table/structure handling. + + Falls back to Docling if python-docx is not available. + """ + try: + return self._parse_docx_with_python_docx(file_path) + except Exception as e: + print(f"[DoclingService] python-docx parsing failed ({e}), falling back to Docling") + return self.parse_to_markdown(file_path) + + def _parse_docx_with_python_docx(self, file_path: str) -> str: + """Extract text from DOCX using python-docx with proper table handling.""" + from docx import Document + + doc = Document(file_path) + parts: List[str] = [] + + for element in doc.element.body: + tag = element.tag.split("}")[-1] if "}" in element.tag else element.tag + + if tag == "p": + # Paragraph + para = _find_paragraph_by_element(doc, element) + if para is not None: + text = para.text.strip() + if text: + # Check heading style + style_name = (para.style.name or "").lower() if para.style else "" + if "heading" in style_name: + level = 1 + for ch in style_name: + if ch.isdigit(): + level = int(ch) + break + parts.append(f"{'#' * level} {text}") + else: + parts.append(text) + + elif tag == "tbl": + # Table — extract as markdown table + tbl = _find_table_by_element(doc, element) + if tbl is not None: + md_table = _table_to_markdown(tbl) + if md_table: + parts.append(md_table) + + # Also extract images descriptions if possible + embedded_images = self._extract_docx_images(doc) + if embedded_images: + parts.append("\n## Embedded Images\n") + for desc in embedded_images: + parts.append(f"- {desc}") + + return "\n\n".join(parts) + + def _extract_docx_images(self, doc) -> List[str]: + """Extract image descriptions from DOCX. + + Returns alt text for images, or placeholder if no alt text. + """ + descriptions = [] + try: + for rel in doc.part.rels.values(): + if "image" in rel.reltype: + descriptions.append("[Embedded image]") + except Exception: + pass + return descriptions + + +def _find_paragraph_by_element(doc, element): + """Find a Paragraph object matching the given XML element.""" + for para in doc.paragraphs: + if para._element is element: + return para + return None + + +def _find_table_by_element(doc, element): + """Find a Table object matching the given XML element.""" + for table in doc.tables: + if table._element is element: + return table + return None + + +def _table_to_markdown(table) -> str: + """Convert a python-docx Table to a markdown table string.""" + rows = [] + for row in table.rows: + cells = [cell.text.strip().replace("|", "\\|") for cell in row.cells] + rows.append(cells) + + if not rows: + return "" + + # Deduplicate merged cells (python-docx repeats merged cell text) + clean_rows = [] + for row_cells in rows: + clean = [] + for i, cell_text in enumerate(row_cells): + if i > 0 and cell_text == row_cells[i - 1]: + clean.append("") # merged cell + else: + clean.append(cell_text) + clean_rows.append(clean) + + # Build markdown table + lines = [] + if clean_rows: + header = clean_rows[0] + lines.append("| " + " | ".join(header) + " |") + lines.append("| " + " | ".join(["---"] * len(header)) + " |") + for row in clean_rows[1:]: + # Pad row to match header length + padded = row + [""] * (len(header) - len(row)) + lines.append("| " + " | ".join(padded[:len(header)]) + " |") + + return "\n".join(lines) + + +async def extract_text_from_image_via_vision(image_bytes: bytes, mime_type: str = "image/png") -> Optional[str]: + """Use Gemini vision to extract text from an image. + + Returns extracted text or None if unavailable. + """ + try: + import google.genai as genai + + api_key = os.environ.get("GOOGLE_API_KEY") + if not api_key: + return None + + client = genai.Client() + b64 = base64.b64encode(image_bytes).decode("utf-8") + + response = await asyncio.to_thread( + client.models.generate_content, + model="gemini-2.5-flash", + contents=[ + { + "parts": [ + {"text": "Extract all text from this image. Return only the extracted text, nothing else. If no text is found, return 'No text found'."}, + {"inline_data": {"mime_type": mime_type, "data": b64}}, + ] + } + ], + ) + text = response.text.strip() if response.text else None + if text and text.lower() != "no text found": + return text + return None + except Exception as e: + print(f"[DoclingService] Vision text extraction failed: {e}") + return None diff --git a/backend/services/documents_loader.py b/backend/services/documents_loader.py index 548d19e..b3d395a 100644 --- a/backend/services/documents_loader.py +++ b/backend/services/documents_loader.py @@ -92,7 +92,8 @@ class DocumentsLoader: return await asyncio.to_thread(file.read) def load_msword(self, file_path: str) -> str: - return self.docling_service.parse_to_markdown(file_path) + """Parse DOCX with python-docx for better table/structure handling.""" + return self.docling_service.parse_docx_structured(file_path) def load_powerpoint(self, file_path: str) -> str: return self.docling_service.parse_to_markdown(file_path) diff --git a/backend/services/image_generation_service.py b/backend/services/image_generation_service.py index b17ae5e..ca8fb83 100644 --- a/backend/services/image_generation_service.py +++ b/backend/services/image_generation_service.py @@ -193,9 +193,9 @@ class ImageGenerationService: async def generate_image_gemini_flash( self, prompt: str, output_directory: str ) -> str: - """Generate image using Gemini Flash (gemini-2.5-flash-image-preview).""" + """Generate image using Gemini Flash (gemini-3.1-flash-image-preview).""" return await self._generate_image_google( - prompt, output_directory, "gemini-2.5-flash-image-preview" + prompt, output_directory, "gemini-3.1-flash-image-preview" ) async def generate_image_nanobanana_pro( diff --git a/backend/services/layout_analysis_service.py b/backend/services/layout_analysis_service.py new file mode 100644 index 0000000..920c7db --- /dev/null +++ b/backend/services/layout_analysis_service.py @@ -0,0 +1,142 @@ +"""Layout analysis service using LayoutParser for slide structure detection. + +Analyzes slide screenshots to detect regions (text, image, table, title) +and provides structural metadata for LLM-based code generation. +""" +import os +from typing import List, Optional + +# LayoutParser is optional — graceful fallback if not installed +_LAYOUTPARSER_AVAILABLE = False +try: + import layoutparser as lp + _LAYOUTPARSER_AVAILABLE = True +except ImportError: + pass + + +class DetectedRegion: + """A detected region on a slide.""" + __slots__ = ("type", "x1", "y1", "x2", "y2", "score") + + def __init__(self, type: str, x1: float, y1: float, x2: float, y2: float, score: float = 1.0): + self.type = type + self.x1 = x1 + self.y1 = y1 + self.x2 = x2 + self.y2 = y2 + self.score = score + + def to_dict(self) -> dict: + return { + "type": self.type, + "x1": round(self.x1), + "y1": round(self.y1), + "x2": round(self.x2), + "y2": round(self.y2), + "score": round(self.score, 3), + } + + +def analyze_slide_layout(image_path: str) -> List[DetectedRegion]: + """Analyze a slide screenshot and return detected layout regions. + + Uses LayoutParser with a PubLayNet model if available. + Falls back to empty list if LayoutParser is not installed. + """ + if not _LAYOUTPARSER_AVAILABLE: + return [] + + if not os.path.exists(image_path): + return [] + + try: + import cv2 + image = cv2.imread(image_path) + if image is None: + return [] + + # Use PubLayNet model — detects: Text, Title, List, Table, Figure + model = lp.Detectron2LayoutModel( + config_path="lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config", + label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"}, + extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.5], + ) + + layout = model.detect(image) + + regions = [] + for block in layout: + regions.append(DetectedRegion( + type=block.type, + x1=block.block.x_1, + y1=block.block.y_1, + x2=block.block.x_2, + y2=block.block.y_2, + score=block.score, + )) + + return regions + + except Exception as e: + print(f"[LayoutAnalysis] Detection failed: {e}") + return [] + + +def regions_to_description(regions: List[DetectedRegion], image_width: int = 960, image_height: int = 540) -> str: + """Convert detected regions to a text description for LLM context. + + Normalizes coordinates to percentages for resolution-independent descriptions. + """ + if not regions: + return "" + + lines = ["Detected layout regions (coordinates as % of slide dimensions):"] + for r in sorted(regions, key=lambda r: (r.y1, r.x1)): + x_pct = round(r.x1 / image_width * 100) + y_pct = round(r.y1 / image_height * 100) + w_pct = round((r.x2 - r.x1) / image_width * 100) + h_pct = round((r.y2 - r.y1) / image_height * 100) + lines.append( + f"- {r.type}: position ({x_pct}%, {y_pct}%), size ({w_pct}% x {h_pct}%), confidence: {r.score:.0%}" + ) + + return "\n".join(lines) + + +def classify_layout_from_regions(regions: List[DetectedRegion]) -> Optional[str]: + """Classify slide layout type based on detected regions. + + Returns a layout type string or None if classification is uncertain. + """ + if not regions: + return None + + type_counts = {} + for r in regions: + type_counts[r.type] = type_counts.get(r.type, 0) + 1 + + has_title = type_counts.get("Title", 0) > 0 + has_text = type_counts.get("Text", 0) > 0 + has_figure = type_counts.get("Figure", 0) > 0 + has_table = type_counts.get("Table", 0) > 0 + has_list = type_counts.get("List", 0) > 0 + text_count = type_counts.get("Text", 0) + + # Classification heuristics + if has_title and not has_text and not has_figure and not has_table: + return "title_slide" + if has_title and has_figure and not has_text: + return "picture" + if has_table: + return "table" + if text_count >= 2 or (has_text and has_list): + return "two_column" + if has_title and (has_text or has_list): + return "content" + if has_figure and (has_text or has_title): + return "picture_with_caption" + if not any([has_title, has_text, has_figure, has_table, has_list]): + return "blank" + + return "content" diff --git a/backend/services/master_deck_parser_service.py b/backend/services/master_deck_parser_service.py index d2606c4..ab8b1f2 100644 --- a/backend/services/master_deck_parser_service.py +++ b/backend/services/master_deck_parser_service.py @@ -461,7 +461,7 @@ async def _do_parse(deck_id: uuid.UUID) -> dict: raise ValueError("Deck not found") pptx_path = deck.original_file_path client_id = deck.client_id - parse_mode = getattr(deck, "parse_mode", None) or "slides" + parse_mode = getattr(deck, "parse_mode", None) or "layouts" if not os.path.exists(pptx_path): raise FileNotFoundError(f"PPTX file not found: {pptx_path}") @@ -529,29 +529,55 @@ async def _do_parse(deck_id: uuid.UUID) -> dict: print(f"[MasterDeckParser] LLM provider: {llm_provider['provider'] if llm_provider else 'NONE'}") print(f"[MasterDeckParser] Processing {len(primary_metas)} items, {llm_count} with screenshots for LLM") + # Optional: LayoutParser region detection for better classification + from services.layout_analysis_service import ( + analyze_slide_layout, + classify_layout_from_regions, + regions_to_description, + ) + for idx, lm in enumerate(primary_metas): + screenshot_path = screenshots[idx] if idx < len(screenshots) else None + + # Try LayoutParser classification if a screenshot is available + lp_layout_type = None + lp_region_desc = "" + if screenshot_path and os.path.exists(screenshot_path): + try: + regions = await asyncio.to_thread(analyze_slide_layout, screenshot_path) + if regions: + lp_layout_type = classify_layout_from_regions(regions) + lp_region_desc = regions_to_description(regions) + except Exception as lp_err: + print(f"[MasterDeckParser] LayoutParser skipped for {idx}: {lp_err}") + layout_entry = { "index": idx, "layout_name": lm["layout_name"], - "layout_type": _guess_layout_type(lm["layout_name"]), + "layout_type": lp_layout_type or _guess_layout_type(lm["layout_name"]), "xml_snippet": lm["xml_content"][:2000], "fonts": list( {normalize_font_family_name(f) for f in extract_fonts_from_oxml(lm["xml_content"]) if f} ), "html": None, "react_code": None, - "screenshot_path": screenshots[idx] if idx < len(screenshots) else None, + "screenshot_path": screenshot_path, } # Run LLM pipeline if provider available and we have a screenshot - if llm_provider and idx < len(screenshots) and os.path.exists(screenshots[idx]): + if llm_provider and screenshot_path and os.path.exists(screenshot_path): try: print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — generating HTML...") - with open(screenshots[idx], "rb") as img_f: + with open(screenshot_path, "rb") as img_f: img_b64 = base64.b64encode(img_f.read()).decode("utf-8") + # Include LayoutParser region info in LLM context + xml_context = lm["xml_content"] + if lp_region_desc: + xml_context = f"{lp_region_desc}\n\n---\n\n{xml_context}" + html = await _llm_generate_html( - llm_provider, img_b64, lm["xml_content"], + llm_provider, img_b64, xml_context, layout_entry["fonts"] or None, ) html = html.replace("```html", "").replace("```", "") diff --git a/frontend/app/(presentation-generator)/generate/configure/page.tsx b/frontend/app/(presentation-generator)/generate/configure/page.tsx index e090d0a..d30371e 100644 --- a/frontend/app/(presentation-generator)/generate/configure/page.tsx +++ b/frontend/app/(presentation-generator)/generate/configure/page.tsx @@ -21,6 +21,7 @@ import { } from "@/store/slices/presentationGeneration"; import { Button } from "@/components/ui/button"; import { Textarea } from "@/components/ui/textarea"; +import { Input } from "@/components/ui/input"; import { Label } from "@/components/ui/label"; import { Slider } from "@/components/ui/slider"; import { @@ -30,7 +31,7 @@ import { SelectTrigger, SelectValue, } from "@/components/ui/select"; -import { ChevronLeft, ChevronRight, Layers } from "lucide-react"; +import { ChevronLeft, ChevronRight, Layers, MessageCircleQuestion } from "lucide-react"; import { toast } from "sonner"; import { cn } from "@/lib/utils"; import { OverlayLoader } from "@/components/ui/overlay-loader"; @@ -70,6 +71,9 @@ export default function WizardConfigurePage() { const [loadingClients, setLoadingClients] = useState(true); const [loadingDecks, setLoadingDecks] = useState(false); const [isGenerating, setIsGenerating] = useState(false); + const [followUpQuestions, setFollowUpQuestions] = useState([]); + const [followUpAnswers, setFollowUpAnswers] = useState>({}); + const [loadingFollowUp, setLoadingFollowUp] = useState(false); // Fetch clients on mount useEffect(() => { @@ -90,6 +94,23 @@ export default function WizardConfigurePage() { .finally(() => setLoadingDecks(false)); }, [wizard.selectedClientId]); + // Fetch follow-up questions if brief is short + useEffect(() => { + const briefContent = wizard.briefText; + if (!briefContent || briefContent.trim().length < 10) { + setFollowUpQuestions([]); + return; + } + setLoadingFollowUp(true); + WizardApi.checkFollowUpQuestions(briefContent) + .then((questions) => { + setFollowUpQuestions(questions); + setFollowUpAnswers({}); + }) + .finally(() => setLoadingFollowUp(false)); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); // Run once on page load + const handleBack = () => { dispatch(setWizardStep(1)); router.push("/generate/upload"); @@ -109,6 +130,16 @@ export default function WizardConfigurePage() { .map((f) => f.serverPath) .filter(Boolean) as string[]; + // Append follow-up Q&A to instructions if any answers are provided + let finalInstructions = wizard.instructions; + const answeredPairs = followUpQuestions + .filter((q) => followUpAnswers[q]?.trim()) + .map((q) => `Q: ${q}\nA: ${followUpAnswers[q].trim()}`); + if (answeredPairs.length > 0) { + const qaSuffix = "\n\n--- Follow-up Context ---\n" + answeredPairs.join("\n\n"); + finalInstructions = (finalInstructions || "") + qaSuffix; + } + // Create presentation (outline mode) const result = await WizardApi.createPresentation({ content: wizard.briefText, @@ -116,7 +147,7 @@ export default function WizardConfigurePage() { file_paths: filePaths, language: wizard.language, tone: wizard.tone, - instructions: wizard.instructions, + instructions: finalInstructions, client_id: wizard.selectedClientId ?? undefined, master_deck_id: wizard.selectedDeckId ?? undefined, }); @@ -266,6 +297,39 @@ export default function WizardConfigurePage() { + {/* Follow-Up Questions */} + {loadingFollowUp && ( +
+

Checking if we need more context...

+
+ )} + {followUpQuestions.length > 0 && !loadingFollowUp && ( +
+
+ +

+ A few quick questions to improve your presentation +

+
+ {followUpQuestions.map((question, idx) => ( +
+ + + setFollowUpAnswers((prev) => ({ + ...prev, + [question]: e.target.value, + })) + } + className="bg-white border-amber-200 focus:border-amber-400" + /> +
+ ))} +
+ )} + {/* Instructions */}
diff --git a/frontend/app/(presentation-generator)/generate/outline/page.tsx b/frontend/app/(presentation-generator)/generate/outline/page.tsx index 99c7777..2cc3bd7 100644 --- a/frontend/app/(presentation-generator)/generate/outline/page.tsx +++ b/frontend/app/(presentation-generator)/generate/outline/page.tsx @@ -9,6 +9,7 @@ import { setJobId, setPresentationId as setWizardPresentationId, WizardOutlineItem, + toggleSlideAttachment, } from "@/store/slices/wizardSlice"; import { clearPresentationData } from "@/store/slices/presentationGeneration"; import { useOutlineStreaming } from "../../outline/hooks/useOutlineStreaming"; @@ -22,8 +23,15 @@ import { FileText, Layers, Loader2, + Paperclip, } from "lucide-react"; import { toast } from "sonner"; +import { + Popover, + PopoverContent, + PopoverTrigger, +} from "@/components/ui/popover"; +import { Checkbox } from "@/components/ui/checkbox"; import { OverlayLoader } from "@/components/ui/overlay-loader"; import Wrapper from "@/components/Wrapper"; import { PresentationGenerationApi } from "../../services/api/presentation-generation"; @@ -183,15 +191,88 @@ export default function WizardOutlinePage() { Uploaded Files
- {wizard.uploadedFiles.map((f, i) => ( -
- - {f.name} -
- ))} + {wizard.uploadedFiles.map((f, i) => { + // Count how many slides this file is linked to + const linkedCount = Object.values( + wizard.slideAttachments + ).filter((names) => names.includes(f.name)).length; + + return ( +
+ + {f.name} + + {/* Link to slides popover */} + {outlines && outlines.length > 0 && ( + + + + + +

+ Link to slides +

+
+ {outlines.map((outline, slideIdx) => { + const title = + (outline.content || "") + .split("\n")[0] + ?.replace(/^#+\s*/, "") + .trim() || `Slide ${slideIdx + 1}`; + const isLinked = ( + wizard.slideAttachments[slideIdx] || [] + ).includes(f.name); + + return ( + + ); + })} +
+
+
+ )} +
+ ); + })}
)} @@ -248,6 +329,8 @@ export default function WizardOutlinePage() { highestActiveIndex={streamState.highestActiveIndex} onDragEnd={handleDragEnd} onAddSlide={handleAddSlide} + slideAttachments={wizard.slideAttachments} + uploadedFiles={wizard.uploadedFiles} /> diff --git a/frontend/app/(presentation-generator)/generate/upload/page.tsx b/frontend/app/(presentation-generator)/generate/upload/page.tsx index 9b31f56..35222d7 100644 --- a/frontend/app/(presentation-generator)/generate/upload/page.tsx +++ b/frontend/app/(presentation-generator)/generate/upload/page.tsx @@ -11,8 +11,9 @@ import { setWizardStep, WizardUploadedFile, } from "@/store/slices/wizardSlice"; -import { Upload, X, FileText, ChevronRight, Plus } from "lucide-react"; +import { Upload, X, FileText, ChevronRight, Plus, Link } from "lucide-react"; import { Button } from "@/components/ui/button"; +import { Input } from "@/components/ui/input"; import { Textarea } from "@/components/ui/textarea"; import { toast } from "sonner"; import { cn } from "@/lib/utils"; @@ -60,6 +61,8 @@ export default function WizardUploadPage() { const [localFiles, setLocalFiles] = useState([]); const [isDragging, setIsDragging] = useState(false); const [isProcessing, setIsProcessing] = useState(false); + const [referenceUrl, setReferenceUrl] = useState(""); + const [isFetchingUrl, setIsFetchingUrl] = useState(false); const fileInputRef = useRef(null); const handleDragOver = (e: React.DragEvent) => { @@ -116,6 +119,27 @@ export default function WizardUploadPage() { const allFiles = uploadedFiles; // display list from Redux + const handleFetchUrl = async () => { + if (!referenceUrl.trim()) { + toast.error("Please enter a URL"); + return; + } + try { + setIsFetchingUrl(true); + const text = await WizardApi.fetchUrl(referenceUrl.trim()); + const separator = briefText.trim() ? "\n\n---\n\n" : ""; + dispatch(setBriefText(briefText + separator + text)); + toast.success("URL content fetched and appended to brief"); + setReferenceUrl(""); + } catch (error: any) { + toast.error("Failed to fetch URL", { + description: error.message || "Please check the URL and try again.", + }); + } finally { + setIsFetchingUrl(false); + } + }; + const handleNext = async () => { if (!briefText.trim() && allFiles.length === 0) { toast.error("Please enter a brief or upload documents"); @@ -244,6 +268,39 @@ export default function WizardUploadPage() { )} + {/* Reference URL */} +
+ +
+ setReferenceUrl(e.target.value)} + onKeyDown={(e) => { + if (e.key === "Enter") { + e.preventDefault(); + handleFetchUrl(); + } + }} + className="flex-1" + /> + +
+
+ {/* Brief Text */}
{/* Action Buttons */} diff --git a/frontend/app/(presentation-generator)/services/api/wizard.ts b/frontend/app/(presentation-generator)/services/api/wizard.ts index 4049875..0503dac 100644 --- a/frontend/app/(presentation-generator)/services/api/wizard.ts +++ b/frontend/app/(presentation-generator)/services/api/wizard.ts @@ -121,6 +121,34 @@ export class WizardApi { await ApiResponseHandler.handleResponse(response, "Failed to cancel job"); } + /** Fetch URL content and extract text */ + static async fetchUrl(url: string): Promise { + const response = await fetch("/api/v1/ppt/files/fetch-url", { + method: "POST", + headers: getHeader(), + body: JSON.stringify({ url }), + cache: "no-cache", + }); + return await ApiResponseHandler.handleResponse(response, "Failed to fetch URL"); + } + + /** Check if brief needs follow-up questions */ + static async checkFollowUpQuestions(content: string): Promise { + if (!content || content.trim().length < 10) return []; + try { + const response = await fetch("/api/v1/ppt/content/follow-up-questions", { + method: "POST", + headers: getHeader(), + body: JSON.stringify({ content }), + cache: "no-cache", + }); + const data = await ApiResponseHandler.handleResponse(response, ""); + return data.questions ?? []; + } catch { + return []; + } + } + /** Create presentation (outline-only, like existing flow) */ static async createPresentation(params: { content: string; diff --git a/frontend/components/ui/overlay-loader.tsx b/frontend/components/ui/overlay-loader.tsx index 58a98a6..8a984ce 100644 --- a/frontend/components/ui/overlay-loader.tsx +++ b/frontend/components/ui/overlay-loader.tsx @@ -1,5 +1,5 @@ import { cn } from "@/lib/utils" -import { Loader } from "./loader" +import { HamsterLoader } from "./hamster-loader" import { ProgressBar } from "./progress-bar" import { useEffect, useState } from "react" @@ -53,7 +53,9 @@ export const OverlayLoader = ({ )} > - loading +
+ +
{showProgress ? (
attached file names */ + slideAttachments: Record; } const STORAGE_KEY = "deckforge_wizard"; @@ -77,6 +79,7 @@ const defaultState: WizardState = { jobId: null, presentationId: null, decomposedFiles: [], + slideAttachments: {}, }; const persisted = loadFromStorage(); @@ -142,6 +145,31 @@ const wizardSlice = createSlice({ state.decomposedFiles = action.payload; saveToStorage(state); }, + setSlideAttachments: ( + state, + action: PayloadAction> + ) => { + state.slideAttachments = action.payload; + saveToStorage(state); + }, + toggleSlideAttachment: ( + state, + action: PayloadAction<{ slideIndex: number; fileName: string }> + ) => { + const { slideIndex, fileName } = action.payload; + const current = state.slideAttachments[slideIndex] || []; + if (current.includes(fileName)) { + state.slideAttachments[slideIndex] = current.filter( + (f) => f !== fileName + ); + if (state.slideAttachments[slideIndex].length === 0) { + delete state.slideAttachments[slideIndex]; + } + } else { + state.slideAttachments[slideIndex] = [...current, fileName]; + } + saveToStorage(state); + }, resetWizard: (state) => { Object.assign(state, defaultState); if (typeof window !== "undefined") { @@ -165,6 +193,8 @@ export const { setJobId, setPresentationId, setDecomposedFiles, + setSlideAttachments, + toggleSlideAttachment, resetWizard, } = wizardSlice.actions;