Use gemini-2.5-flash-lite for master deck parsing

- Added PARSING_MODEL env var (default: gemini-2.5-flash-lite)
- Master deck parser now uses fast lite model
- 3-4x faster than gemini-3.1-pro: 22 layouts = 8-12 min vs 44-60 min
- Keep gemini-3.1-pro for presentation generation (quality)

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
This commit is contained in:
Vadym Samoilenko 2026-02-27 21:50:45 +00:00
parent 73d8aa8c7a
commit cf76b8048d
2 changed files with 23 additions and 4 deletions

View file

@ -19,12 +19,15 @@ DEV_AUTH_PASSWORD=change-me-to-secure-password
# AI Provider — Google Gemini for all AI operations
GOOGLE_API_KEY=your_google_api_key_here
GOOGLE_MODEL=models/gemini-2.0-flash
GOOGLE_MODEL=gemini-3.1-pro-preview
IMAGE_PROVIDER=gemini_flash
# Master deck parsing model (use fast lite model for speed)
PARSING_MODEL=gemini-2.5-flash-lite
# Get your Google AI API key at: https://aistudio.google.com/app/apikey
# Gemini 2.0 Flash: Fast, cheap, great for text generation
# Gemini 3.1 Flash: Excellent vision model for image analysis
# Gemini 3.1 Pro: Best quality for presentations
# Gemini 2.5 Flash Lite: Super fast for parsing (recommended)
# Optional: Image fallback providers (if Gemini image gen fails)
PEXELS_API_KEY=

View file

@ -36,6 +36,21 @@ from api.v1.ppt.endpoints.prompts import (
from services.documents_loader import DocumentsLoader
from services.llm_service import UnifiedLLMService, _detect_llm_provider, LLMProvider
# Use fast lite model for parsing (gemini-2.5-flash-lite)
def _get_parsing_provider():
"""Get provider config for master deck parsing with fast lite model."""
google_key = os.getenv("GOOGLE_API_KEY")
if not google_key:
raise ValueError("GOOGLE_API_KEY required for parsing")
# Use gemini-2.5-flash-lite: extremely fast, good enough for layout parsing
parsing_model = os.getenv("PARSING_MODEL", "gemini-2.5-flash-lite")
return {
"provider": LLMProvider.GOOGLE,
"api_key": google_key,
"model": parsing_model
}
# OXML namespaces
NS = {
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
@ -460,7 +475,8 @@ async def _do_parse(deck_id: uuid.UUID) -> dict:
all_fonts.update(normalize_font_family_name(f) for f in raw if f)
# 6. Process each item through LLM pipeline
llm_provider = _detect_llm_provider()
llm_provider = _get_parsing_provider() # Use fast lite model for parsing
print(f"[MasterDeckParser] Using {llm_provider['model']} for parsing")
layouts_result = []
# Build per-layout screenshot mapping