ppt-tool/backend/api/v1/ppt/endpoints/files.py
Vadym Samoilenko e8295d6e71 Phase 4: Fix critical bugs, improve document parsing, add vision OCR
- Fix SSE stream 500: use async_session_maker inside StreamingResponse generator
  (Depends session closes when endpoint returns, before streaming starts)
- Fix template application: store template_name in prepare endpoint so worker
  uses the selected custom template instead of defaulting to "general"
- Fix OverlayLoader: replace loading.gif with HamsterLoader component
- Fix parse_mode default: change from "slides" to "layouts" to avoid 70+ layouts
- Update Gemini Flash model to gemini-3.1-flash-image-preview
- Improve DOCX parsing: python-docx for structured table extraction, OCR enabled
- Add vision-based image text extraction via Gemini for uploaded images
- Add LayoutParser integration for slide layout structure analysis
- Add Phase 4 MVP features: transfer ownership, URL input, follow-up questions,
  attachment-to-slide mapping, content router

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-27 14:07:00 +00:00

243 lines
7.4 KiB
Python

import json
import mimetypes
import os
import uuid
from typing import Annotated, List, Optional
from fastapi import APIRouter, Body, Depends, File, HTTPException, UploadFile
from pydantic import BaseModel
from models.sql.user import UserModel
from utils.auth_dependencies import get_current_user
from constants.documents import (
EXCEL_TYPES,
IMAGE_UPLOAD_TYPES,
SPREADSHEET_TYPES,
UPLOAD_ACCEPTED_FILE_TYPES,
)
from models.decomposed_file_info import DecomposedFileInfo
from services.attachment_parser_service import (
extract_images_metadata,
parse_csv,
parse_excel,
parse_url,
)
from services.documents_loader import DocumentsLoader
from services.temp_file_service import TEMP_FILE_SERVICE
from utils.validators import validate_files
FILES_ROUTER = APIRouter(prefix="/files", tags=["Files"])
def _is_spreadsheet(file_path: str) -> bool:
mime, _ = mimetypes.guess_type(file_path)
ext = os.path.splitext(file_path)[1].lower()
return (
mime in EXCEL_TYPES
or mime in SPREADSHEET_TYPES
or ext in (".xlsx", ".xls", ".csv")
)
def _is_image(file_path: str) -> bool:
mime, _ = mimetypes.guess_type(file_path)
return mime in IMAGE_UPLOAD_TYPES
@FILES_ROUTER.post("/upload", response_model=List[str])
async def upload_files(files: Optional[List[UploadFile]]):
if not files:
raise HTTPException(status_code=400, detail="Documents are required")
temp_dir = TEMP_FILE_SERVICE.create_temp_dir(str(uuid.uuid4()))
validate_files(files, True, True, 100, UPLOAD_ACCEPTED_FILE_TYPES)
temp_files: List[str] = []
if files:
for each_file in files:
temp_path = TEMP_FILE_SERVICE.create_temp_file_path(
each_file.filename, temp_dir
)
with open(temp_path, "wb") as f:
content = await each_file.read()
f.write(content)
temp_files.append(temp_path)
return temp_files
@FILES_ROUTER.post("/decompose", response_model=List[DecomposedFileInfo])
async def decompose_files(file_paths: Annotated[List[str], Body(embed=True)]):
temp_dir = TEMP_FILE_SERVICE.create_temp_dir(str(uuid.uuid4()))
txt_files = []
spreadsheet_files = []
image_files = []
other_files = []
for file_path in file_paths:
if file_path.endswith(".txt"):
txt_files.append(file_path)
elif _is_spreadsheet(file_path):
spreadsheet_files.append(file_path)
elif _is_image(file_path):
image_files.append(file_path)
else:
other_files.append(file_path)
response: List[DecomposedFileInfo] = []
# --- Document files (PDF, DOCX, PPTX) via DocumentsLoader ---
if other_files:
documents_loader = DocumentsLoader(file_paths=other_files)
await documents_loader.load_documents(temp_dir)
parsed_documents = documents_loader.documents
for index, parsed_doc in enumerate(parsed_documents):
out_path = TEMP_FILE_SERVICE.create_temp_file_path(
f"{uuid.uuid4()}.txt", temp_dir
)
parsed_doc = parsed_doc.replace("<br>", "\n")
with open(out_path, "w") as text_file:
text_file.write(parsed_doc)
response.append(
DecomposedFileInfo(
name=os.path.basename(other_files[index]),
file_path=out_path,
file_type="text",
)
)
# --- Plain text files ---
for each_file in txt_files:
response.append(
DecomposedFileInfo(
name=os.path.basename(each_file),
file_path=each_file,
file_type="text",
)
)
# --- Spreadsheet files (Excel, CSV) ---
for sp_path in spreadsheet_files:
ext = os.path.splitext(sp_path)[1].lower()
if ext in (".xlsx", ".xls"):
tables = parse_excel(sp_path)
else:
tables = [parse_csv(sp_path)]
# Store parsed table data as JSON file for downstream use
json_path = TEMP_FILE_SERVICE.create_temp_file_path(
f"{uuid.uuid4()}.json", temp_dir
)
serialized = [t.model_dump() for t in tables]
with open(json_path, "w") as jf:
json.dump(serialized, jf)
response.append(
DecomposedFileInfo(
name=os.path.basename(sp_path),
file_path=json_path,
file_type="table",
table_data=serialized,
)
)
# --- Image files (with vision-based text extraction) ---
for img_path in image_files:
info = extract_images_metadata(img_path)
# Try to extract text from image via Gemini vision
extracted_text = None
try:
from services.docling_service import extract_text_from_image_via_vision
mime_type, _ = mimetypes.guess_type(img_path)
with open(img_path, "rb") as f:
image_bytes = f.read()
extracted_text = await extract_text_from_image_via_vision(
image_bytes, mime_type or "image/png"
)
except Exception as e:
print(f"[decompose] Vision text extraction failed for {img_path}: {e}")
if extracted_text:
# Save extracted text as a text file alongside the image
text_path = TEMP_FILE_SERVICE.create_temp_file_path(
f"{uuid.uuid4()}.txt", temp_dir
)
with open(text_path, "w") as tf:
tf.write(extracted_text)
response.append(
DecomposedFileInfo(
name=os.path.basename(img_path),
file_path=text_path,
file_type="text",
)
)
else:
response.append(
DecomposedFileInfo(
name=info.filename,
file_path=img_path,
file_type="image",
image_info=info.model_dump(),
)
)
return response
class UrlParseRequest(BaseModel):
url: str
class UrlParseResponse(BaseModel):
content: str
url: str
@FILES_ROUTER.post("/url", response_model=UrlParseResponse)
async def parse_url_endpoint(body: UrlParseRequest):
"""Fetch a URL and extract its article content as text."""
if not body.url or not body.url.strip():
raise HTTPException(status_code=400, detail="URL is required")
content = await parse_url(body.url)
if not content:
raise HTTPException(
status_code=422, detail="Could not extract content from the provided URL"
)
return UrlParseResponse(content=content, url=body.url)
@FILES_ROUTER.post("/fetch-url")
async def fetch_url_content(
url: str = Body(..., embed=True),
_current_user: UserModel = Depends(get_current_user),
):
"""Fetch a URL and extract its text content."""
if not url.startswith(("http://", "https://")):
raise HTTPException(status_code=400, detail="Invalid URL")
text = await parse_url(url)
if not text:
raise HTTPException(
status_code=422, detail="Could not extract content from URL"
)
return {"text": text, "url": url}
@FILES_ROUTER.post("/update")
async def update_files(
file_path: Annotated[str, Body()],
file: Annotated[UploadFile, File()],
):
with open(file_path, "wb") as f:
f.write(await file.read())
return {"message": "File updated successfully"}