Phase 4: Fix critical bugs, improve document parsing, add vision OCR
- Fix SSE stream 500: use async_session_maker inside StreamingResponse generator (Depends session closes when endpoint returns, before streaming starts) - Fix template application: store template_name in prepare endpoint so worker uses the selected custom template instead of defaulting to "general" - Fix OverlayLoader: replace loading.gif with HamsterLoader component - Fix parse_mode default: change from "slides" to "layouts" to avoid 70+ layouts - Update Gemini Flash model to gemini-3.1-flash-image-preview - Improve DOCX parsing: python-docx for structured table extraction, OCR enabled - Add vision-based image text extraction via Gemini for uploaded images - Add LayoutParser integration for slide layout structure analysis - Add Phase 4 MVP features: transfer ownership, URL input, follow-up questions, attachment-to-slide mapping, content router Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
69a8829750
commit
e8295d6e71
21 changed files with 859 additions and 62 deletions
|
|
@ -16,6 +16,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||
chromium \
|
||||
fontconfig \
|
||||
curl \
|
||||
libgl1 \
|
||||
libglib2.0-0 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
ENV PUPPETEER_EXECUTABLE_PATH=/usr/bin/chromium
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ async def _list_decks(client_id: uuid.UUID, include_inactive: bool, session: Asy
|
|||
"name": d.name,
|
||||
"description": d.description,
|
||||
"thumbnail_path": d.thumbnail_path,
|
||||
"parse_mode": getattr(d, "parse_mode", None) or "slides",
|
||||
"parse_mode": getattr(d, "parse_mode", None) or "layouts",
|
||||
"parse_status": d.parse_status,
|
||||
"is_active": d.is_active,
|
||||
"layouts": d.layouts,
|
||||
|
|
@ -104,7 +104,7 @@ async def list_master_decks(
|
|||
async def upload_master_deck(
|
||||
client_id: uuid.UUID,
|
||||
file: UploadFile = File(...),
|
||||
parse_mode: str = Query("slides", description="Parse mode: 'slides' (default) or 'layouts'"),
|
||||
parse_mode: str = Query("layouts", description="Parse mode: 'layouts' (default, unique slideLayouts) or 'slides' (one layout per slide)"),
|
||||
admin: UserModel = Depends(require_client_admin),
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
):
|
||||
|
|
|
|||
|
|
@ -3,9 +3,12 @@ from typing import List, Optional
|
|||
import uuid
|
||||
|
||||
from fastapi import APIRouter, Depends, HTTPException, Query
|
||||
from pydantic import BaseModel
|
||||
from sqlalchemy import func
|
||||
from sqlalchemy.ext.asyncio import AsyncSession
|
||||
from sqlmodel import select
|
||||
from sqlmodel import select, update
|
||||
|
||||
from models.sql.presentation import PresentationModel
|
||||
from models.sql.user import UserModel
|
||||
from services.database import get_async_session
|
||||
from utils.auth_dependencies import require_super_admin
|
||||
|
|
@ -15,6 +18,11 @@ USERS_ROUTER = APIRouter(prefix="/users", tags=["Admin - Users"])
|
|||
VALID_ROLES = {"super_admin", "client_admin", "user"}
|
||||
|
||||
|
||||
class TransferOwnershipRequest(BaseModel):
|
||||
new_owner_id: uuid.UUID
|
||||
client_id: Optional[uuid.UUID] = None
|
||||
|
||||
|
||||
@USERS_ROUTER.get("", response_model=List[dict])
|
||||
async def list_users(
|
||||
_: UserModel = Depends(require_super_admin),
|
||||
|
|
@ -93,6 +101,55 @@ async def update_user_role(
|
|||
return {"message": "Role updated", "user_id": str(user.id), "role": role}
|
||||
|
||||
|
||||
@USERS_ROUTER.post("/{user_id}/transfer-ownership")
|
||||
async def transfer_ownership(
|
||||
user_id: uuid.UUID,
|
||||
body: TransferOwnershipRequest,
|
||||
_: UserModel = Depends(require_super_admin),
|
||||
session: AsyncSession = Depends(get_async_session),
|
||||
):
|
||||
"""Transfer all presentations from one user to another.
|
||||
|
||||
Used for GDPR compliance before deactivating a user.
|
||||
"""
|
||||
if user_id == body.new_owner_id:
|
||||
raise HTTPException(
|
||||
status_code=400, detail="Cannot transfer ownership to the same user"
|
||||
)
|
||||
|
||||
# Validate source user exists
|
||||
source_user = await session.get(UserModel, user_id)
|
||||
if not source_user:
|
||||
raise HTTPException(status_code=404, detail="Source user not found")
|
||||
|
||||
# Validate target user exists
|
||||
target_user = await session.get(UserModel, body.new_owner_id)
|
||||
if not target_user:
|
||||
raise HTTPException(status_code=404, detail="Target user not found")
|
||||
|
||||
# Build the update statement for non-deleted presentations owned by the source user
|
||||
stmt = (
|
||||
update(PresentationModel)
|
||||
.where(PresentationModel.owner_id == user_id)
|
||||
.where(PresentationModel.deleted_at.is_(None))
|
||||
)
|
||||
|
||||
if body.client_id is not None:
|
||||
stmt = stmt.where(PresentationModel.client_id == body.client_id)
|
||||
|
||||
stmt = stmt.values(owner_id=body.new_owner_id)
|
||||
result = await session.execute(stmt)
|
||||
await session.commit()
|
||||
|
||||
transferred_count = result.rowcount
|
||||
|
||||
return {
|
||||
"message": f"Transferred {transferred_count} presentations",
|
||||
"from_user_id": str(user_id),
|
||||
"to_user_id": str(body.new_owner_id),
|
||||
}
|
||||
|
||||
|
||||
@USERS_ROUTER.delete("/{user_id}")
|
||||
async def deactivate_user(
|
||||
user_id: uuid.UUID,
|
||||
|
|
@ -106,7 +163,26 @@ async def deactivate_user(
|
|||
if user.id == admin.id:
|
||||
raise HTTPException(status_code=400, detail="Cannot deactivate yourself")
|
||||
|
||||
# Check how many active presentations this user still owns
|
||||
count_query = (
|
||||
select(func.count())
|
||||
.select_from(PresentationModel)
|
||||
.where(PresentationModel.owner_id == user_id)
|
||||
.where(PresentationModel.deleted_at.is_(None))
|
||||
)
|
||||
count_result = await session.execute(count_query)
|
||||
presentation_count = count_result.scalar_one()
|
||||
|
||||
user.is_active = False
|
||||
session.add(user)
|
||||
await session.commit()
|
||||
return {"message": "User deactivated", "user_id": str(user.id)}
|
||||
|
||||
response = {"message": "User deactivated", "user_id": str(user.id)}
|
||||
|
||||
if presentation_count > 0:
|
||||
response["warning"] = (
|
||||
f"User still has {presentation_count} active presentations. "
|
||||
"Consider transferring ownership first."
|
||||
)
|
||||
|
||||
return response
|
||||
|
|
|
|||
25
backend/api/v1/ppt/endpoints/content.py
Normal file
25
backend/api/v1/ppt/endpoints/content.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from typing import List
|
||||
|
||||
from fastapi import APIRouter, Body, Depends, HTTPException
|
||||
|
||||
from models.sql.user import UserModel
|
||||
from services.content_intelligence_service import ContentIntelligenceService
|
||||
from utils.auth_dependencies import get_current_user
|
||||
|
||||
CONTENT_ROUTER = APIRouter(prefix="/content", tags=["Content"])
|
||||
|
||||
|
||||
@CONTENT_ROUTER.post("/follow-up-questions")
|
||||
async def follow_up_questions(
|
||||
content: str = Body(..., embed=True),
|
||||
_current_user: UserModel = Depends(get_current_user),
|
||||
):
|
||||
"""Classify content and return follow-up questions if the brief is thin."""
|
||||
if not content or not content.strip():
|
||||
raise HTTPException(status_code=400, detail="Content is required")
|
||||
|
||||
ci_service = ContentIntelligenceService()
|
||||
classified = await ci_service.classify(content)
|
||||
questions: List[str] = await ci_service.ask_followup_questions(classified) or []
|
||||
|
||||
return {"questions": questions}
|
||||
|
|
@ -4,9 +4,12 @@ import os
|
|||
import uuid
|
||||
from typing import Annotated, List, Optional
|
||||
|
||||
from fastapi import APIRouter, Body, File, HTTPException, UploadFile
|
||||
from fastapi import APIRouter, Body, Depends, File, HTTPException, UploadFile
|
||||
from pydantic import BaseModel
|
||||
|
||||
from models.sql.user import UserModel
|
||||
from utils.auth_dependencies import get_current_user
|
||||
|
||||
from constants.documents import (
|
||||
EXCEL_TYPES,
|
||||
IMAGE_UPLOAD_TYPES,
|
||||
|
|
@ -143,17 +146,46 @@ async def decompose_files(file_paths: Annotated[List[str], Body(embed=True)]):
|
|||
)
|
||||
)
|
||||
|
||||
# --- Image files ---
|
||||
# --- Image files (with vision-based text extraction) ---
|
||||
for img_path in image_files:
|
||||
info = extract_images_metadata(img_path)
|
||||
response.append(
|
||||
DecomposedFileInfo(
|
||||
name=info.filename,
|
||||
file_path=img_path,
|
||||
file_type="image",
|
||||
image_info=info.model_dump(),
|
||||
|
||||
# Try to extract text from image via Gemini vision
|
||||
extracted_text = None
|
||||
try:
|
||||
from services.docling_service import extract_text_from_image_via_vision
|
||||
mime_type, _ = mimetypes.guess_type(img_path)
|
||||
with open(img_path, "rb") as f:
|
||||
image_bytes = f.read()
|
||||
extracted_text = await extract_text_from_image_via_vision(
|
||||
image_bytes, mime_type or "image/png"
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"[decompose] Vision text extraction failed for {img_path}: {e}")
|
||||
|
||||
if extracted_text:
|
||||
# Save extracted text as a text file alongside the image
|
||||
text_path = TEMP_FILE_SERVICE.create_temp_file_path(
|
||||
f"{uuid.uuid4()}.txt", temp_dir
|
||||
)
|
||||
with open(text_path, "w") as tf:
|
||||
tf.write(extracted_text)
|
||||
response.append(
|
||||
DecomposedFileInfo(
|
||||
name=os.path.basename(img_path),
|
||||
file_path=text_path,
|
||||
file_type="text",
|
||||
)
|
||||
)
|
||||
else:
|
||||
response.append(
|
||||
DecomposedFileInfo(
|
||||
name=info.filename,
|
||||
file_path=img_path,
|
||||
file_type="image",
|
||||
image_info=info.model_dump(),
|
||||
)
|
||||
)
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
|
|
@ -182,6 +214,24 @@ async def parse_url_endpoint(body: UrlParseRequest):
|
|||
return UrlParseResponse(content=content, url=body.url)
|
||||
|
||||
|
||||
@FILES_ROUTER.post("/fetch-url")
|
||||
async def fetch_url_content(
|
||||
url: str = Body(..., embed=True),
|
||||
_current_user: UserModel = Depends(get_current_user),
|
||||
):
|
||||
"""Fetch a URL and extract its text content."""
|
||||
if not url.startswith(("http://", "https://")):
|
||||
raise HTTPException(status_code=400, detail="Invalid URL")
|
||||
|
||||
text = await parse_url(url)
|
||||
if not text:
|
||||
raise HTTPException(
|
||||
status_code=422, detail="Could not extract content from URL"
|
||||
)
|
||||
|
||||
return {"text": text, "url": url}
|
||||
|
||||
|
||||
@FILES_ROUTER.post("/update")
|
||||
async def update_files(
|
||||
file_path: Annotated[str, Body()],
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ from utils.llm_calls.generate_presentation_outlines import generate_ppt_outline
|
|||
from models.sql.slide import SlideModel
|
||||
from models.sse_response import SSECompleteResponse, SSEErrorResponse, SSEResponse
|
||||
|
||||
from services.database import get_async_session
|
||||
from services.database import get_async_session, async_session_maker
|
||||
from services.temp_file_service import TEMP_FILE_SERVICE
|
||||
from services.concurrent_service import CONCURRENT_SERVICE
|
||||
from models.sql.presentation import PresentationModel
|
||||
|
|
@ -290,6 +290,7 @@ async def prepare_presentation(
|
|||
sql_session.add(presentation)
|
||||
presentation.outlines = presentation_outline_model.model_dump(mode="json")
|
||||
presentation.title = title or presentation.title
|
||||
presentation.template_name = layout.name
|
||||
presentation.set_layout(layout)
|
||||
presentation.set_structure(presentation_structure)
|
||||
await sql_session.commit()
|
||||
|
|
@ -319,11 +320,18 @@ async def stream_presentation(
|
|||
|
||||
image_generation_service = ImageGenerationService(get_images_directory())
|
||||
|
||||
async def inner():
|
||||
structure = presentation.get_structure()
|
||||
layout = presentation.get_layout()
|
||||
outline = presentation.get_presentation_outline()
|
||||
# Capture data before returning StreamingResponse, because the Depends
|
||||
# session is closed once this function returns.
|
||||
pres_id = id
|
||||
structure = presentation.get_structure()
|
||||
layout = presentation.get_layout()
|
||||
outline = presentation.get_presentation_outline()
|
||||
pres_language = presentation.language
|
||||
pres_tone = presentation.tone
|
||||
pres_verbosity = presentation.verbosity
|
||||
pres_instructions = presentation.instructions
|
||||
|
||||
async def inner():
|
||||
# These tasks will be gathered and awaited after all slides are generated
|
||||
async_assets_generation_tasks = []
|
||||
|
||||
|
|
@ -339,17 +347,17 @@ async def stream_presentation(
|
|||
slide_content = await get_slide_content_from_type_and_outline(
|
||||
slide_layout,
|
||||
outline.slides[i],
|
||||
presentation.language,
|
||||
presentation.tone,
|
||||
presentation.verbosity,
|
||||
presentation.instructions,
|
||||
pres_language,
|
||||
pres_tone,
|
||||
pres_verbosity,
|
||||
pres_instructions,
|
||||
)
|
||||
except HTTPException as e:
|
||||
yield SSEErrorResponse(detail=e.detail).to_string()
|
||||
return
|
||||
|
||||
slide = SlideModel(
|
||||
presentation=id,
|
||||
presentation=pres_id,
|
||||
layout_group=layout.name,
|
||||
layout=slide_layout.id,
|
||||
index=i,
|
||||
|
|
@ -381,21 +389,24 @@ async def stream_presentation(
|
|||
for assets_list in generated_assets_lists:
|
||||
generated_assets.extend(assets_list)
|
||||
|
||||
# Moved this here to make sure new slides are generated before deleting the old ones
|
||||
await sql_session.execute(
|
||||
delete(SlideModel).where(SlideModel.presentation == id)
|
||||
)
|
||||
await sql_session.commit()
|
||||
# Use a new session for DB writes — the Depends session is already
|
||||
# closed by the time the streaming generator executes.
|
||||
async with async_session_maker() as session:
|
||||
await session.execute(
|
||||
delete(SlideModel).where(SlideModel.presentation == pres_id)
|
||||
)
|
||||
await session.commit()
|
||||
|
||||
sql_session.add(presentation)
|
||||
sql_session.add_all(slides)
|
||||
sql_session.add_all(generated_assets)
|
||||
await sql_session.commit()
|
||||
pres = await session.get(PresentationModel, pres_id)
|
||||
session.add(pres)
|
||||
session.add_all(slides)
|
||||
session.add_all(generated_assets)
|
||||
await session.commit()
|
||||
|
||||
response = PresentationWithSlides(
|
||||
**presentation.model_dump(),
|
||||
slides=slides,
|
||||
)
|
||||
response = PresentationWithSlides(
|
||||
**pres.model_dump(),
|
||||
slides=slides,
|
||||
)
|
||||
|
||||
yield SSECompleteResponse(
|
||||
key="presentation",
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ from api.v1.ppt.endpoints.ollama import OLLAMA_ROUTER
|
|||
from api.v1.ppt.endpoints.outlines import OUTLINES_ROUTER
|
||||
from api.v1.ppt.endpoints.slide import SLIDE_ROUTER
|
||||
from api.v1.ppt.endpoints.pptx_slides import PPTX_FONTS_ROUTER
|
||||
from api.v1.ppt.endpoints.content import CONTENT_ROUTER
|
||||
|
||||
|
||||
API_V1_PPT_ROUTER = APIRouter(prefix="/api/v1/ppt")
|
||||
|
|
@ -37,3 +38,4 @@ API_V1_PPT_ROUTER.include_router(OPENAI_ROUTER)
|
|||
API_V1_PPT_ROUTER.include_router(ANTHROPIC_ROUTER)
|
||||
API_V1_PPT_ROUTER.include_router(GOOGLE_ROUTER)
|
||||
API_V1_PPT_ROUTER.include_router(PPTX_FONTS_ROUTER)
|
||||
API_V1_PPT_ROUTER.include_router(CONTENT_ROUTER)
|
||||
|
|
|
|||
|
|
@ -20,7 +20,10 @@ dependencies = [
|
|||
"pathvalidate>=3.3.1",
|
||||
"pdfplumber>=0.11.7",
|
||||
"pytest>=8.4.1",
|
||||
"python-docx>=1.1",
|
||||
"python-pptx>=1.0.2",
|
||||
"layoutparser>=0.3",
|
||||
"opencv-python-headless>=4.8",
|
||||
"redis>=5.0,<6",
|
||||
"sqlmodel>=0.0.24",
|
||||
"alembic>=1.15",
|
||||
|
|
|
|||
|
|
@ -1,3 +1,13 @@
|
|||
"""Document parsing service.
|
||||
|
||||
Uses Docling for PDF/PPTX and python-docx for DOCX (better table handling).
|
||||
Optionally extracts text from embedded images via Gemini vision.
|
||||
"""
|
||||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
from docling.document_converter import (
|
||||
DocumentConverter,
|
||||
PdfFormatOption,
|
||||
|
|
@ -11,7 +21,7 @@ from docling.datamodel.base_models import InputFormat
|
|||
class DoclingService:
|
||||
def __init__(self):
|
||||
self.pipeline_options = PdfPipelineOptions()
|
||||
self.pipeline_options.do_ocr = False
|
||||
self.pipeline_options.do_ocr = True
|
||||
|
||||
self.converter = DocumentConverter(
|
||||
allowed_formats=[InputFormat.PPTX, InputFormat.PDF, InputFormat.DOCX],
|
||||
|
|
@ -29,5 +39,163 @@ class DoclingService:
|
|||
)
|
||||
|
||||
def parse_to_markdown(self, file_path: str) -> str:
|
||||
"""Parse any supported document to markdown via Docling."""
|
||||
result = self.converter.convert(file_path)
|
||||
return result.document.export_to_markdown()
|
||||
|
||||
def parse_docx_structured(self, file_path: str) -> str:
|
||||
"""Parse DOCX with python-docx for better table/structure handling.
|
||||
|
||||
Falls back to Docling if python-docx is not available.
|
||||
"""
|
||||
try:
|
||||
return self._parse_docx_with_python_docx(file_path)
|
||||
except Exception as e:
|
||||
print(f"[DoclingService] python-docx parsing failed ({e}), falling back to Docling")
|
||||
return self.parse_to_markdown(file_path)
|
||||
|
||||
def _parse_docx_with_python_docx(self, file_path: str) -> str:
|
||||
"""Extract text from DOCX using python-docx with proper table handling."""
|
||||
from docx import Document
|
||||
|
||||
doc = Document(file_path)
|
||||
parts: List[str] = []
|
||||
|
||||
for element in doc.element.body:
|
||||
tag = element.tag.split("}")[-1] if "}" in element.tag else element.tag
|
||||
|
||||
if tag == "p":
|
||||
# Paragraph
|
||||
para = _find_paragraph_by_element(doc, element)
|
||||
if para is not None:
|
||||
text = para.text.strip()
|
||||
if text:
|
||||
# Check heading style
|
||||
style_name = (para.style.name or "").lower() if para.style else ""
|
||||
if "heading" in style_name:
|
||||
level = 1
|
||||
for ch in style_name:
|
||||
if ch.isdigit():
|
||||
level = int(ch)
|
||||
break
|
||||
parts.append(f"{'#' * level} {text}")
|
||||
else:
|
||||
parts.append(text)
|
||||
|
||||
elif tag == "tbl":
|
||||
# Table — extract as markdown table
|
||||
tbl = _find_table_by_element(doc, element)
|
||||
if tbl is not None:
|
||||
md_table = _table_to_markdown(tbl)
|
||||
if md_table:
|
||||
parts.append(md_table)
|
||||
|
||||
# Also extract images descriptions if possible
|
||||
embedded_images = self._extract_docx_images(doc)
|
||||
if embedded_images:
|
||||
parts.append("\n## Embedded Images\n")
|
||||
for desc in embedded_images:
|
||||
parts.append(f"- {desc}")
|
||||
|
||||
return "\n\n".join(parts)
|
||||
|
||||
def _extract_docx_images(self, doc) -> List[str]:
|
||||
"""Extract image descriptions from DOCX.
|
||||
|
||||
Returns alt text for images, or placeholder if no alt text.
|
||||
"""
|
||||
descriptions = []
|
||||
try:
|
||||
for rel in doc.part.rels.values():
|
||||
if "image" in rel.reltype:
|
||||
descriptions.append("[Embedded image]")
|
||||
except Exception:
|
||||
pass
|
||||
return descriptions
|
||||
|
||||
|
||||
def _find_paragraph_by_element(doc, element):
|
||||
"""Find a Paragraph object matching the given XML element."""
|
||||
for para in doc.paragraphs:
|
||||
if para._element is element:
|
||||
return para
|
||||
return None
|
||||
|
||||
|
||||
def _find_table_by_element(doc, element):
|
||||
"""Find a Table object matching the given XML element."""
|
||||
for table in doc.tables:
|
||||
if table._element is element:
|
||||
return table
|
||||
return None
|
||||
|
||||
|
||||
def _table_to_markdown(table) -> str:
|
||||
"""Convert a python-docx Table to a markdown table string."""
|
||||
rows = []
|
||||
for row in table.rows:
|
||||
cells = [cell.text.strip().replace("|", "\\|") for cell in row.cells]
|
||||
rows.append(cells)
|
||||
|
||||
if not rows:
|
||||
return ""
|
||||
|
||||
# Deduplicate merged cells (python-docx repeats merged cell text)
|
||||
clean_rows = []
|
||||
for row_cells in rows:
|
||||
clean = []
|
||||
for i, cell_text in enumerate(row_cells):
|
||||
if i > 0 and cell_text == row_cells[i - 1]:
|
||||
clean.append("") # merged cell
|
||||
else:
|
||||
clean.append(cell_text)
|
||||
clean_rows.append(clean)
|
||||
|
||||
# Build markdown table
|
||||
lines = []
|
||||
if clean_rows:
|
||||
header = clean_rows[0]
|
||||
lines.append("| " + " | ".join(header) + " |")
|
||||
lines.append("| " + " | ".join(["---"] * len(header)) + " |")
|
||||
for row in clean_rows[1:]:
|
||||
# Pad row to match header length
|
||||
padded = row + [""] * (len(header) - len(row))
|
||||
lines.append("| " + " | ".join(padded[:len(header)]) + " |")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
async def extract_text_from_image_via_vision(image_bytes: bytes, mime_type: str = "image/png") -> Optional[str]:
|
||||
"""Use Gemini vision to extract text from an image.
|
||||
|
||||
Returns extracted text or None if unavailable.
|
||||
"""
|
||||
try:
|
||||
import google.genai as genai
|
||||
|
||||
api_key = os.environ.get("GOOGLE_API_KEY")
|
||||
if not api_key:
|
||||
return None
|
||||
|
||||
client = genai.Client()
|
||||
b64 = base64.b64encode(image_bytes).decode("utf-8")
|
||||
|
||||
response = await asyncio.to_thread(
|
||||
client.models.generate_content,
|
||||
model="gemini-2.5-flash",
|
||||
contents=[
|
||||
{
|
||||
"parts": [
|
||||
{"text": "Extract all text from this image. Return only the extracted text, nothing else. If no text is found, return 'No text found'."},
|
||||
{"inline_data": {"mime_type": mime_type, "data": b64}},
|
||||
]
|
||||
}
|
||||
],
|
||||
)
|
||||
text = response.text.strip() if response.text else None
|
||||
if text and text.lower() != "no text found":
|
||||
return text
|
||||
return None
|
||||
except Exception as e:
|
||||
print(f"[DoclingService] Vision text extraction failed: {e}")
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -92,7 +92,8 @@ class DocumentsLoader:
|
|||
return await asyncio.to_thread(file.read)
|
||||
|
||||
def load_msword(self, file_path: str) -> str:
|
||||
return self.docling_service.parse_to_markdown(file_path)
|
||||
"""Parse DOCX with python-docx for better table/structure handling."""
|
||||
return self.docling_service.parse_docx_structured(file_path)
|
||||
|
||||
def load_powerpoint(self, file_path: str) -> str:
|
||||
return self.docling_service.parse_to_markdown(file_path)
|
||||
|
|
|
|||
|
|
@ -193,9 +193,9 @@ class ImageGenerationService:
|
|||
async def generate_image_gemini_flash(
|
||||
self, prompt: str, output_directory: str
|
||||
) -> str:
|
||||
"""Generate image using Gemini Flash (gemini-2.5-flash-image-preview)."""
|
||||
"""Generate image using Gemini Flash (gemini-3.1-flash-image-preview)."""
|
||||
return await self._generate_image_google(
|
||||
prompt, output_directory, "gemini-2.5-flash-image-preview"
|
||||
prompt, output_directory, "gemini-3.1-flash-image-preview"
|
||||
)
|
||||
|
||||
async def generate_image_nanobanana_pro(
|
||||
|
|
|
|||
142
backend/services/layout_analysis_service.py
Normal file
142
backend/services/layout_analysis_service.py
Normal file
|
|
@ -0,0 +1,142 @@
|
|||
"""Layout analysis service using LayoutParser for slide structure detection.
|
||||
|
||||
Analyzes slide screenshots to detect regions (text, image, table, title)
|
||||
and provides structural metadata for LLM-based code generation.
|
||||
"""
|
||||
import os
|
||||
from typing import List, Optional
|
||||
|
||||
# LayoutParser is optional — graceful fallback if not installed
|
||||
_LAYOUTPARSER_AVAILABLE = False
|
||||
try:
|
||||
import layoutparser as lp
|
||||
_LAYOUTPARSER_AVAILABLE = True
|
||||
except ImportError:
|
||||
pass
|
||||
|
||||
|
||||
class DetectedRegion:
|
||||
"""A detected region on a slide."""
|
||||
__slots__ = ("type", "x1", "y1", "x2", "y2", "score")
|
||||
|
||||
def __init__(self, type: str, x1: float, y1: float, x2: float, y2: float, score: float = 1.0):
|
||||
self.type = type
|
||||
self.x1 = x1
|
||||
self.y1 = y1
|
||||
self.x2 = x2
|
||||
self.y2 = y2
|
||||
self.score = score
|
||||
|
||||
def to_dict(self) -> dict:
|
||||
return {
|
||||
"type": self.type,
|
||||
"x1": round(self.x1),
|
||||
"y1": round(self.y1),
|
||||
"x2": round(self.x2),
|
||||
"y2": round(self.y2),
|
||||
"score": round(self.score, 3),
|
||||
}
|
||||
|
||||
|
||||
def analyze_slide_layout(image_path: str) -> List[DetectedRegion]:
|
||||
"""Analyze a slide screenshot and return detected layout regions.
|
||||
|
||||
Uses LayoutParser with a PubLayNet model if available.
|
||||
Falls back to empty list if LayoutParser is not installed.
|
||||
"""
|
||||
if not _LAYOUTPARSER_AVAILABLE:
|
||||
return []
|
||||
|
||||
if not os.path.exists(image_path):
|
||||
return []
|
||||
|
||||
try:
|
||||
import cv2
|
||||
image = cv2.imread(image_path)
|
||||
if image is None:
|
||||
return []
|
||||
|
||||
# Use PubLayNet model — detects: Text, Title, List, Table, Figure
|
||||
model = lp.Detectron2LayoutModel(
|
||||
config_path="lp://PubLayNet/faster_rcnn_R_50_FPN_3x/config",
|
||||
label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"},
|
||||
extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.5],
|
||||
)
|
||||
|
||||
layout = model.detect(image)
|
||||
|
||||
regions = []
|
||||
for block in layout:
|
||||
regions.append(DetectedRegion(
|
||||
type=block.type,
|
||||
x1=block.block.x_1,
|
||||
y1=block.block.y_1,
|
||||
x2=block.block.x_2,
|
||||
y2=block.block.y_2,
|
||||
score=block.score,
|
||||
))
|
||||
|
||||
return regions
|
||||
|
||||
except Exception as e:
|
||||
print(f"[LayoutAnalysis] Detection failed: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def regions_to_description(regions: List[DetectedRegion], image_width: int = 960, image_height: int = 540) -> str:
|
||||
"""Convert detected regions to a text description for LLM context.
|
||||
|
||||
Normalizes coordinates to percentages for resolution-independent descriptions.
|
||||
"""
|
||||
if not regions:
|
||||
return ""
|
||||
|
||||
lines = ["Detected layout regions (coordinates as % of slide dimensions):"]
|
||||
for r in sorted(regions, key=lambda r: (r.y1, r.x1)):
|
||||
x_pct = round(r.x1 / image_width * 100)
|
||||
y_pct = round(r.y1 / image_height * 100)
|
||||
w_pct = round((r.x2 - r.x1) / image_width * 100)
|
||||
h_pct = round((r.y2 - r.y1) / image_height * 100)
|
||||
lines.append(
|
||||
f"- {r.type}: position ({x_pct}%, {y_pct}%), size ({w_pct}% x {h_pct}%), confidence: {r.score:.0%}"
|
||||
)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
|
||||
def classify_layout_from_regions(regions: List[DetectedRegion]) -> Optional[str]:
|
||||
"""Classify slide layout type based on detected regions.
|
||||
|
||||
Returns a layout type string or None if classification is uncertain.
|
||||
"""
|
||||
if not regions:
|
||||
return None
|
||||
|
||||
type_counts = {}
|
||||
for r in regions:
|
||||
type_counts[r.type] = type_counts.get(r.type, 0) + 1
|
||||
|
||||
has_title = type_counts.get("Title", 0) > 0
|
||||
has_text = type_counts.get("Text", 0) > 0
|
||||
has_figure = type_counts.get("Figure", 0) > 0
|
||||
has_table = type_counts.get("Table", 0) > 0
|
||||
has_list = type_counts.get("List", 0) > 0
|
||||
text_count = type_counts.get("Text", 0)
|
||||
|
||||
# Classification heuristics
|
||||
if has_title and not has_text and not has_figure and not has_table:
|
||||
return "title_slide"
|
||||
if has_title and has_figure and not has_text:
|
||||
return "picture"
|
||||
if has_table:
|
||||
return "table"
|
||||
if text_count >= 2 or (has_text and has_list):
|
||||
return "two_column"
|
||||
if has_title and (has_text or has_list):
|
||||
return "content"
|
||||
if has_figure and (has_text or has_title):
|
||||
return "picture_with_caption"
|
||||
if not any([has_title, has_text, has_figure, has_table, has_list]):
|
||||
return "blank"
|
||||
|
||||
return "content"
|
||||
|
|
@ -461,7 +461,7 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
|
|||
raise ValueError("Deck not found")
|
||||
pptx_path = deck.original_file_path
|
||||
client_id = deck.client_id
|
||||
parse_mode = getattr(deck, "parse_mode", None) or "slides"
|
||||
parse_mode = getattr(deck, "parse_mode", None) or "layouts"
|
||||
|
||||
if not os.path.exists(pptx_path):
|
||||
raise FileNotFoundError(f"PPTX file not found: {pptx_path}")
|
||||
|
|
@ -529,29 +529,55 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
|
|||
print(f"[MasterDeckParser] LLM provider: {llm_provider['provider'] if llm_provider else 'NONE'}")
|
||||
print(f"[MasterDeckParser] Processing {len(primary_metas)} items, {llm_count} with screenshots for LLM")
|
||||
|
||||
# Optional: LayoutParser region detection for better classification
|
||||
from services.layout_analysis_service import (
|
||||
analyze_slide_layout,
|
||||
classify_layout_from_regions,
|
||||
regions_to_description,
|
||||
)
|
||||
|
||||
for idx, lm in enumerate(primary_metas):
|
||||
screenshot_path = screenshots[idx] if idx < len(screenshots) else None
|
||||
|
||||
# Try LayoutParser classification if a screenshot is available
|
||||
lp_layout_type = None
|
||||
lp_region_desc = ""
|
||||
if screenshot_path and os.path.exists(screenshot_path):
|
||||
try:
|
||||
regions = await asyncio.to_thread(analyze_slide_layout, screenshot_path)
|
||||
if regions:
|
||||
lp_layout_type = classify_layout_from_regions(regions)
|
||||
lp_region_desc = regions_to_description(regions)
|
||||
except Exception as lp_err:
|
||||
print(f"[MasterDeckParser] LayoutParser skipped for {idx}: {lp_err}")
|
||||
|
||||
layout_entry = {
|
||||
"index": idx,
|
||||
"layout_name": lm["layout_name"],
|
||||
"layout_type": _guess_layout_type(lm["layout_name"]),
|
||||
"layout_type": lp_layout_type or _guess_layout_type(lm["layout_name"]),
|
||||
"xml_snippet": lm["xml_content"][:2000],
|
||||
"fonts": list(
|
||||
{normalize_font_family_name(f) for f in extract_fonts_from_oxml(lm["xml_content"]) if f}
|
||||
),
|
||||
"html": None,
|
||||
"react_code": None,
|
||||
"screenshot_path": screenshots[idx] if idx < len(screenshots) else None,
|
||||
"screenshot_path": screenshot_path,
|
||||
}
|
||||
|
||||
# Run LLM pipeline if provider available and we have a screenshot
|
||||
if llm_provider and idx < len(screenshots) and os.path.exists(screenshots[idx]):
|
||||
if llm_provider and screenshot_path and os.path.exists(screenshot_path):
|
||||
try:
|
||||
print(f"[MasterDeckParser] Layout {idx + 1}/{llm_count}: {lm['layout_name']} — generating HTML...")
|
||||
with open(screenshots[idx], "rb") as img_f:
|
||||
with open(screenshot_path, "rb") as img_f:
|
||||
img_b64 = base64.b64encode(img_f.read()).decode("utf-8")
|
||||
|
||||
# Include LayoutParser region info in LLM context
|
||||
xml_context = lm["xml_content"]
|
||||
if lp_region_desc:
|
||||
xml_context = f"{lp_region_desc}\n\n---\n\n{xml_context}"
|
||||
|
||||
html = await _llm_generate_html(
|
||||
llm_provider, img_b64, lm["xml_content"],
|
||||
llm_provider, img_b64, xml_context,
|
||||
layout_entry["fonts"] or None,
|
||||
)
|
||||
html = html.replace("```html", "").replace("```", "")
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ import {
|
|||
} from "@/store/slices/presentationGeneration";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Label } from "@/components/ui/label";
|
||||
import { Slider } from "@/components/ui/slider";
|
||||
import {
|
||||
|
|
@ -30,7 +31,7 @@ import {
|
|||
SelectTrigger,
|
||||
SelectValue,
|
||||
} from "@/components/ui/select";
|
||||
import { ChevronLeft, ChevronRight, Layers } from "lucide-react";
|
||||
import { ChevronLeft, ChevronRight, Layers, MessageCircleQuestion } from "lucide-react";
|
||||
import { toast } from "sonner";
|
||||
import { cn } from "@/lib/utils";
|
||||
import { OverlayLoader } from "@/components/ui/overlay-loader";
|
||||
|
|
@ -70,6 +71,9 @@ export default function WizardConfigurePage() {
|
|||
const [loadingClients, setLoadingClients] = useState(true);
|
||||
const [loadingDecks, setLoadingDecks] = useState(false);
|
||||
const [isGenerating, setIsGenerating] = useState(false);
|
||||
const [followUpQuestions, setFollowUpQuestions] = useState<string[]>([]);
|
||||
const [followUpAnswers, setFollowUpAnswers] = useState<Record<string, string>>({});
|
||||
const [loadingFollowUp, setLoadingFollowUp] = useState(false);
|
||||
|
||||
// Fetch clients on mount
|
||||
useEffect(() => {
|
||||
|
|
@ -90,6 +94,23 @@ export default function WizardConfigurePage() {
|
|||
.finally(() => setLoadingDecks(false));
|
||||
}, [wizard.selectedClientId]);
|
||||
|
||||
// Fetch follow-up questions if brief is short
|
||||
useEffect(() => {
|
||||
const briefContent = wizard.briefText;
|
||||
if (!briefContent || briefContent.trim().length < 10) {
|
||||
setFollowUpQuestions([]);
|
||||
return;
|
||||
}
|
||||
setLoadingFollowUp(true);
|
||||
WizardApi.checkFollowUpQuestions(briefContent)
|
||||
.then((questions) => {
|
||||
setFollowUpQuestions(questions);
|
||||
setFollowUpAnswers({});
|
||||
})
|
||||
.finally(() => setLoadingFollowUp(false));
|
||||
// eslint-disable-next-line react-hooks/exhaustive-deps
|
||||
}, []); // Run once on page load
|
||||
|
||||
const handleBack = () => {
|
||||
dispatch(setWizardStep(1));
|
||||
router.push("/generate/upload");
|
||||
|
|
@ -109,6 +130,16 @@ export default function WizardConfigurePage() {
|
|||
.map((f) => f.serverPath)
|
||||
.filter(Boolean) as string[];
|
||||
|
||||
// Append follow-up Q&A to instructions if any answers are provided
|
||||
let finalInstructions = wizard.instructions;
|
||||
const answeredPairs = followUpQuestions
|
||||
.filter((q) => followUpAnswers[q]?.trim())
|
||||
.map((q) => `Q: ${q}\nA: ${followUpAnswers[q].trim()}`);
|
||||
if (answeredPairs.length > 0) {
|
||||
const qaSuffix = "\n\n--- Follow-up Context ---\n" + answeredPairs.join("\n\n");
|
||||
finalInstructions = (finalInstructions || "") + qaSuffix;
|
||||
}
|
||||
|
||||
// Create presentation (outline mode)
|
||||
const result = await WizardApi.createPresentation({
|
||||
content: wizard.briefText,
|
||||
|
|
@ -116,7 +147,7 @@ export default function WizardConfigurePage() {
|
|||
file_paths: filePaths,
|
||||
language: wizard.language,
|
||||
tone: wizard.tone,
|
||||
instructions: wizard.instructions,
|
||||
instructions: finalInstructions,
|
||||
client_id: wizard.selectedClientId ?? undefined,
|
||||
master_deck_id: wizard.selectedDeckId ?? undefined,
|
||||
});
|
||||
|
|
@ -266,6 +297,39 @@ export default function WizardConfigurePage() {
|
|||
</Select>
|
||||
</div>
|
||||
|
||||
{/* Follow-Up Questions */}
|
||||
{loadingFollowUp && (
|
||||
<div className="rounded-xl border border-amber-200 bg-amber-50 p-4">
|
||||
<p className="text-sm text-amber-700">Checking if we need more context...</p>
|
||||
</div>
|
||||
)}
|
||||
{followUpQuestions.length > 0 && !loadingFollowUp && (
|
||||
<div className="rounded-xl border border-amber-200 bg-amber-50 p-4 space-y-4">
|
||||
<div className="flex items-center gap-2 text-amber-800">
|
||||
<MessageCircleQuestion className="w-5 h-5 flex-shrink-0" />
|
||||
<p className="text-sm font-medium">
|
||||
A few quick questions to improve your presentation
|
||||
</p>
|
||||
</div>
|
||||
{followUpQuestions.map((question, idx) => (
|
||||
<div key={idx} className="space-y-1.5">
|
||||
<Label className="text-sm text-amber-900">{question}</Label>
|
||||
<Input
|
||||
placeholder="Your answer (optional)"
|
||||
value={followUpAnswers[question] ?? ""}
|
||||
onChange={(e) =>
|
||||
setFollowUpAnswers((prev) => ({
|
||||
...prev,
|
||||
[question]: e.target.value,
|
||||
}))
|
||||
}
|
||||
className="bg-white border-amber-200 focus:border-amber-400"
|
||||
/>
|
||||
</div>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
{/* Instructions */}
|
||||
<div>
|
||||
<Label className="mb-2 block">Additional Instructions</Label>
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import {
|
|||
setJobId,
|
||||
setPresentationId as setWizardPresentationId,
|
||||
WizardOutlineItem,
|
||||
toggleSlideAttachment,
|
||||
} from "@/store/slices/wizardSlice";
|
||||
import { clearPresentationData } from "@/store/slices/presentationGeneration";
|
||||
import { useOutlineStreaming } from "../../outline/hooks/useOutlineStreaming";
|
||||
|
|
@ -22,8 +23,15 @@ import {
|
|||
FileText,
|
||||
Layers,
|
||||
Loader2,
|
||||
Paperclip,
|
||||
} from "lucide-react";
|
||||
import { toast } from "sonner";
|
||||
import {
|
||||
Popover,
|
||||
PopoverContent,
|
||||
PopoverTrigger,
|
||||
} from "@/components/ui/popover";
|
||||
import { Checkbox } from "@/components/ui/checkbox";
|
||||
import { OverlayLoader } from "@/components/ui/overlay-loader";
|
||||
import Wrapper from "@/components/Wrapper";
|
||||
import { PresentationGenerationApi } from "../../services/api/presentation-generation";
|
||||
|
|
@ -183,15 +191,88 @@ export default function WizardOutlinePage() {
|
|||
Uploaded Files
|
||||
</h4>
|
||||
<div className="space-y-2">
|
||||
{wizard.uploadedFiles.map((f, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className="flex items-center gap-2 p-2 rounded-lg bg-gray-50 text-xs"
|
||||
>
|
||||
<FileText className="w-3.5 h-3.5 text-[#5146E5]" />
|
||||
<span className="truncate flex-1">{f.name}</span>
|
||||
</div>
|
||||
))}
|
||||
{wizard.uploadedFiles.map((f, i) => {
|
||||
// Count how many slides this file is linked to
|
||||
const linkedCount = Object.values(
|
||||
wizard.slideAttachments
|
||||
).filter((names) => names.includes(f.name)).length;
|
||||
|
||||
return (
|
||||
<div
|
||||
key={i}
|
||||
className="flex items-center gap-2 p-2 rounded-lg bg-gray-50 text-xs"
|
||||
>
|
||||
<FileText className="w-3.5 h-3.5 text-[#5146E5] flex-shrink-0" />
|
||||
<span className="truncate flex-1">{f.name}</span>
|
||||
|
||||
{/* Link to slides popover */}
|
||||
{outlines && outlines.length > 0 && (
|
||||
<Popover>
|
||||
<PopoverTrigger asChild>
|
||||
<button
|
||||
className="inline-flex items-center gap-1 px-1.5 py-0.5 rounded text-[10px] font-medium text-gray-500 hover:text-[#5146E5] hover:bg-[#5146E5]/5 transition-colors flex-shrink-0"
|
||||
title="Link to slides"
|
||||
>
|
||||
<Paperclip className="w-3 h-3" />
|
||||
{linkedCount > 0 && (
|
||||
<span className="text-[#5146E5]">
|
||||
{linkedCount}
|
||||
</span>
|
||||
)}
|
||||
</button>
|
||||
</PopoverTrigger>
|
||||
<PopoverContent
|
||||
side="right"
|
||||
align="start"
|
||||
className="w-64 p-3"
|
||||
>
|
||||
<p className="text-xs font-semibold text-gray-700 mb-2">
|
||||
Link to slides
|
||||
</p>
|
||||
<div className="space-y-1.5 max-h-48 overflow-y-auto">
|
||||
{outlines.map((outline, slideIdx) => {
|
||||
const title =
|
||||
(outline.content || "")
|
||||
.split("\n")[0]
|
||||
?.replace(/^#+\s*/, "")
|
||||
.trim() || `Slide ${slideIdx + 1}`;
|
||||
const isLinked = (
|
||||
wizard.slideAttachments[slideIdx] || []
|
||||
).includes(f.name);
|
||||
|
||||
return (
|
||||
<label
|
||||
key={slideIdx}
|
||||
className="flex items-center gap-2 p-1.5 rounded hover:bg-gray-50 cursor-pointer text-xs"
|
||||
>
|
||||
<Checkbox
|
||||
checked={isLinked}
|
||||
onCheckedChange={() =>
|
||||
dispatch(
|
||||
toggleSlideAttachment({
|
||||
slideIndex: slideIdx,
|
||||
fileName: f.name,
|
||||
})
|
||||
)
|
||||
}
|
||||
className="h-3.5 w-3.5"
|
||||
/>
|
||||
<span className="text-gray-600 font-medium w-5 flex-shrink-0">
|
||||
{slideIdx + 1}.
|
||||
</span>
|
||||
<span className="truncate text-gray-700">
|
||||
{title}
|
||||
</span>
|
||||
</label>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</PopoverContent>
|
||||
</Popover>
|
||||
)}
|
||||
</div>
|
||||
);
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
)}
|
||||
|
|
@ -248,6 +329,8 @@ export default function WizardOutlinePage() {
|
|||
highestActiveIndex={streamState.highestActiveIndex}
|
||||
onDragEnd={handleDragEnd}
|
||||
onAddSlide={handleAddSlide}
|
||||
slideAttachments={wizard.slideAttachments}
|
||||
uploadedFiles={wizard.uploadedFiles}
|
||||
/>
|
||||
</TabsContent>
|
||||
|
||||
|
|
|
|||
|
|
@ -11,8 +11,9 @@ import {
|
|||
setWizardStep,
|
||||
WizardUploadedFile,
|
||||
} from "@/store/slices/wizardSlice";
|
||||
import { Upload, X, FileText, ChevronRight, Plus } from "lucide-react";
|
||||
import { Upload, X, FileText, ChevronRight, Plus, Link } from "lucide-react";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { Input } from "@/components/ui/input";
|
||||
import { Textarea } from "@/components/ui/textarea";
|
||||
import { toast } from "sonner";
|
||||
import { cn } from "@/lib/utils";
|
||||
|
|
@ -60,6 +61,8 @@ export default function WizardUploadPage() {
|
|||
const [localFiles, setLocalFiles] = useState<File[]>([]);
|
||||
const [isDragging, setIsDragging] = useState(false);
|
||||
const [isProcessing, setIsProcessing] = useState(false);
|
||||
const [referenceUrl, setReferenceUrl] = useState("");
|
||||
const [isFetchingUrl, setIsFetchingUrl] = useState(false);
|
||||
const fileInputRef = useRef<HTMLInputElement>(null);
|
||||
|
||||
const handleDragOver = (e: React.DragEvent) => {
|
||||
|
|
@ -116,6 +119,27 @@ export default function WizardUploadPage() {
|
|||
|
||||
const allFiles = uploadedFiles; // display list from Redux
|
||||
|
||||
const handleFetchUrl = async () => {
|
||||
if (!referenceUrl.trim()) {
|
||||
toast.error("Please enter a URL");
|
||||
return;
|
||||
}
|
||||
try {
|
||||
setIsFetchingUrl(true);
|
||||
const text = await WizardApi.fetchUrl(referenceUrl.trim());
|
||||
const separator = briefText.trim() ? "\n\n---\n\n" : "";
|
||||
dispatch(setBriefText(briefText + separator + text));
|
||||
toast.success("URL content fetched and appended to brief");
|
||||
setReferenceUrl("");
|
||||
} catch (error: any) {
|
||||
toast.error("Failed to fetch URL", {
|
||||
description: error.message || "Please check the URL and try again.",
|
||||
});
|
||||
} finally {
|
||||
setIsFetchingUrl(false);
|
||||
}
|
||||
};
|
||||
|
||||
const handleNext = async () => {
|
||||
if (!briefText.trim() && allFiles.length === 0) {
|
||||
toast.error("Please enter a brief or upload documents");
|
||||
|
|
@ -244,6 +268,39 @@ export default function WizardUploadPage() {
|
|||
</div>
|
||||
)}
|
||||
|
||||
{/* Reference URL */}
|
||||
<div className="mt-6">
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
<Link className="w-4 h-4 inline mr-1.5 -mt-0.5" />
|
||||
Or add a reference URL
|
||||
</label>
|
||||
<div className="flex gap-2">
|
||||
<Input
|
||||
type="text"
|
||||
placeholder="https://example.com/article"
|
||||
value={referenceUrl}
|
||||
onChange={(e) => setReferenceUrl(e.target.value)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === "Enter") {
|
||||
e.preventDefault();
|
||||
handleFetchUrl();
|
||||
}
|
||||
}}
|
||||
className="flex-1"
|
||||
/>
|
||||
<Button
|
||||
type="button"
|
||||
variant="outline"
|
||||
size="sm"
|
||||
onClick={handleFetchUrl}
|
||||
disabled={isFetchingUrl || !referenceUrl.trim()}
|
||||
className="px-4 whitespace-nowrap"
|
||||
>
|
||||
{isFetchingUrl ? "Fetching..." : "Fetch"}
|
||||
</Button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{/* Brief Text */}
|
||||
<div className="mt-6">
|
||||
<label className="block text-sm font-medium text-gray-700 mb-2">
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ import {
|
|||
import { OutlineItem } from "./OutlineItem";
|
||||
import { Button } from "@/components/ui/button";
|
||||
import { FileText, Loader2 } from "lucide-react";
|
||||
import type { WizardUploadedFile } from "@/store/slices/wizardSlice";
|
||||
|
||||
interface OutlineContentProps {
|
||||
outlines: { content: string }[] | null;
|
||||
|
|
@ -25,6 +26,10 @@ interface OutlineContentProps {
|
|||
highestActiveIndex: number;
|
||||
onDragEnd: (event: any) => void;
|
||||
onAddSlide: () => void;
|
||||
/** Map of slide index -> attached file names */
|
||||
slideAttachments?: Record<number, string[]>;
|
||||
/** All uploaded files (for reference) */
|
||||
uploadedFiles?: WizardUploadedFile[];
|
||||
}
|
||||
|
||||
const OutlineContent: React.FC<OutlineContentProps> = ({
|
||||
|
|
@ -34,7 +39,9 @@ const OutlineContent: React.FC<OutlineContentProps> = ({
|
|||
activeSlideIndex,
|
||||
highestActiveIndex,
|
||||
onDragEnd,
|
||||
onAddSlide
|
||||
onAddSlide,
|
||||
slideAttachments,
|
||||
uploadedFiles,
|
||||
}) => {
|
||||
const sensors = useSensors(
|
||||
useSensor(PointerSensor),
|
||||
|
|
@ -104,6 +111,7 @@ const OutlineContent: React.FC<OutlineContentProps> = ({
|
|||
isStreaming={isStreaming}
|
||||
isActiveStreaming={activeSlideIndex === index}
|
||||
isStableStreaming={highestActiveIndex >= 0 && index < highestActiveIndex}
|
||||
attachedFiles={slideAttachments?.[index]}
|
||||
/>
|
||||
))
|
||||
) :
|
||||
|
|
@ -119,6 +127,7 @@ const OutlineContent: React.FC<OutlineContentProps> = ({
|
|||
isStreaming={isStreaming}
|
||||
isActiveStreaming={false}
|
||||
isStableStreaming={false}
|
||||
attachedFiles={slideAttachments?.[index]}
|
||||
/>
|
||||
))}
|
||||
</SortableContext>}
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import { useSortable } from "@dnd-kit/sortable"
|
||||
import { CSS } from "@dnd-kit/utilities"
|
||||
import { Trash2 } from "lucide-react"
|
||||
import { Paperclip, Trash2 } from "lucide-react"
|
||||
import { RootState } from "@/store/store"
|
||||
import { useDispatch, useSelector } from "react-redux"
|
||||
import { deleteSlideOutline, setOutlines } from "@/store/slices/presentationGeneration"
|
||||
|
|
@ -18,6 +18,8 @@ interface OutlineItemProps {
|
|||
isStreaming: boolean
|
||||
isActiveStreaming?: boolean
|
||||
isStableStreaming?: boolean
|
||||
/** File names attached to this slide */
|
||||
attachedFiles?: string[]
|
||||
}
|
||||
|
||||
export function OutlineItem({
|
||||
|
|
@ -26,6 +28,7 @@ export function OutlineItem({
|
|||
isStreaming,
|
||||
isActiveStreaming = false,
|
||||
isStableStreaming = false,
|
||||
attachedFiles,
|
||||
}: OutlineItemProps) {
|
||||
const {
|
||||
outlines,
|
||||
|
|
@ -164,6 +167,21 @@ export function OutlineItem({
|
|||
/>
|
||||
)}
|
||||
|
||||
{/* Attached file badges */}
|
||||
{attachedFiles && attachedFiles.length > 0 && (
|
||||
<div className="flex flex-wrap gap-1.5 mt-1.5">
|
||||
{attachedFiles.map((fileName) => (
|
||||
<span
|
||||
key={fileName}
|
||||
className="inline-flex items-center gap-1 rounded-full bg-[#5146E5]/5 border border-[#5146E5]/15 px-2 py-0.5 text-[10px] text-[#5146E5]/80"
|
||||
>
|
||||
<Paperclip className="w-2.5 h-2.5" />
|
||||
<span className="truncate max-w-[120px]">{fileName}</span>
|
||||
</span>
|
||||
))}
|
||||
</div>
|
||||
)}
|
||||
|
||||
</div>
|
||||
|
||||
{/* Action Buttons */}
|
||||
|
|
|
|||
|
|
@ -121,6 +121,34 @@ export class WizardApi {
|
|||
await ApiResponseHandler.handleResponse(response, "Failed to cancel job");
|
||||
}
|
||||
|
||||
/** Fetch URL content and extract text */
|
||||
static async fetchUrl(url: string): Promise<string> {
|
||||
const response = await fetch("/api/v1/ppt/files/fetch-url", {
|
||||
method: "POST",
|
||||
headers: getHeader(),
|
||||
body: JSON.stringify({ url }),
|
||||
cache: "no-cache",
|
||||
});
|
||||
return await ApiResponseHandler.handleResponse(response, "Failed to fetch URL");
|
||||
}
|
||||
|
||||
/** Check if brief needs follow-up questions */
|
||||
static async checkFollowUpQuestions(content: string): Promise<string[]> {
|
||||
if (!content || content.trim().length < 10) return [];
|
||||
try {
|
||||
const response = await fetch("/api/v1/ppt/content/follow-up-questions", {
|
||||
method: "POST",
|
||||
headers: getHeader(),
|
||||
body: JSON.stringify({ content }),
|
||||
cache: "no-cache",
|
||||
});
|
||||
const data = await ApiResponseHandler.handleResponse(response, "");
|
||||
return data.questions ?? [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/** Create presentation (outline-only, like existing flow) */
|
||||
static async createPresentation(params: {
|
||||
content: string;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import { cn } from "@/lib/utils"
|
||||
import { Loader } from "./loader"
|
||||
import { HamsterLoader } from "./hamster-loader"
|
||||
import { ProgressBar } from "./progress-bar"
|
||||
import { useEffect, useState } from "react"
|
||||
|
||||
|
|
@ -53,7 +53,9 @@ export const OverlayLoader = ({
|
|||
)}
|
||||
|
||||
>
|
||||
<img loading="eager" src={'/loading.gif'} alt="loading" width={250} height={250} />
|
||||
<div className="py-8">
|
||||
<HamsterLoader size="lg" />
|
||||
</div>
|
||||
{showProgress ? (
|
||||
<div className="w-full space-y-6 pt-4">
|
||||
<ProgressBar
|
||||
|
|
|
|||
|
|
@ -39,6 +39,8 @@ interface WizardState {
|
|||
presentationId: string | null;
|
||||
/** Decomposed document data from server */
|
||||
decomposedFiles: any[];
|
||||
/** Map of slide index -> attached file names */
|
||||
slideAttachments: Record<number, string[]>;
|
||||
}
|
||||
|
||||
const STORAGE_KEY = "deckforge_wizard";
|
||||
|
|
@ -77,6 +79,7 @@ const defaultState: WizardState = {
|
|||
jobId: null,
|
||||
presentationId: null,
|
||||
decomposedFiles: [],
|
||||
slideAttachments: {},
|
||||
};
|
||||
|
||||
const persisted = loadFromStorage();
|
||||
|
|
@ -142,6 +145,31 @@ const wizardSlice = createSlice({
|
|||
state.decomposedFiles = action.payload;
|
||||
saveToStorage(state);
|
||||
},
|
||||
setSlideAttachments: (
|
||||
state,
|
||||
action: PayloadAction<Record<number, string[]>>
|
||||
) => {
|
||||
state.slideAttachments = action.payload;
|
||||
saveToStorage(state);
|
||||
},
|
||||
toggleSlideAttachment: (
|
||||
state,
|
||||
action: PayloadAction<{ slideIndex: number; fileName: string }>
|
||||
) => {
|
||||
const { slideIndex, fileName } = action.payload;
|
||||
const current = state.slideAttachments[slideIndex] || [];
|
||||
if (current.includes(fileName)) {
|
||||
state.slideAttachments[slideIndex] = current.filter(
|
||||
(f) => f !== fileName
|
||||
);
|
||||
if (state.slideAttachments[slideIndex].length === 0) {
|
||||
delete state.slideAttachments[slideIndex];
|
||||
}
|
||||
} else {
|
||||
state.slideAttachments[slideIndex] = [...current, fileName];
|
||||
}
|
||||
saveToStorage(state);
|
||||
},
|
||||
resetWizard: (state) => {
|
||||
Object.assign(state, defaultState);
|
||||
if (typeof window !== "undefined") {
|
||||
|
|
@ -165,6 +193,8 @@ export const {
|
|||
setJobId,
|
||||
setPresentationId,
|
||||
setDecomposedFiles,
|
||||
setSlideAttachments,
|
||||
toggleSlideAttachment,
|
||||
resetWizard,
|
||||
} = wizardSlice.actions;
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue