diff --git a/.env.example b/.env.example index 4caaace..0cb83e9 100644 --- a/.env.example +++ b/.env.example @@ -19,12 +19,15 @@ DEV_AUTH_PASSWORD=change-me-to-secure-password # AI Provider — Google Gemini for all AI operations GOOGLE_API_KEY=your_google_api_key_here -GOOGLE_MODEL=models/gemini-2.0-flash +GOOGLE_MODEL=gemini-3.1-pro-preview IMAGE_PROVIDER=gemini_flash +# Master deck parsing model (use fast lite model for speed) +PARSING_MODEL=gemini-2.5-flash-lite + # Get your Google AI API key at: https://aistudio.google.com/app/apikey -# Gemini 2.0 Flash: Fast, cheap, great for text generation -# Gemini 3.1 Flash: Excellent vision model for image analysis +# Gemini 3.1 Pro: Best quality for presentations +# Gemini 2.5 Flash Lite: Super fast for parsing (recommended) # Optional: Image fallback providers (if Gemini image gen fails) PEXELS_API_KEY= diff --git a/backend/services/master_deck_parser_service.py b/backend/services/master_deck_parser_service.py index f3ee332..44fc161 100644 --- a/backend/services/master_deck_parser_service.py +++ b/backend/services/master_deck_parser_service.py @@ -36,6 +36,21 @@ from api.v1.ppt.endpoints.prompts import ( from services.documents_loader import DocumentsLoader from services.llm_service import UnifiedLLMService, _detect_llm_provider, LLMProvider +# Use fast lite model for parsing (gemini-2.5-flash-lite) +def _get_parsing_provider(): + """Get provider config for master deck parsing with fast lite model.""" + google_key = os.getenv("GOOGLE_API_KEY") + if not google_key: + raise ValueError("GOOGLE_API_KEY required for parsing") + + # Use gemini-2.5-flash-lite: extremely fast, good enough for layout parsing + parsing_model = os.getenv("PARSING_MODEL", "gemini-2.5-flash-lite") + return { + "provider": LLMProvider.GOOGLE, + "api_key": google_key, + "model": parsing_model + } + # OXML namespaces NS = { "a": "http://schemas.openxmlformats.org/drawingml/2006/main", @@ -460,7 +475,8 @@ async def _do_parse(deck_id: uuid.UUID) -> dict: all_fonts.update(normalize_font_family_name(f) for f in raw if f) # 6. Process each item through LLM pipeline - llm_provider = _detect_llm_provider() + llm_provider = _get_parsing_provider() # Use fast lite model for parsing + print(f"[MasterDeckParser] Using {llm_provider['model']} for parsing") layouts_result = [] # Build per-layout screenshot mapping