ppt-tool/backend/services/llm_service.py
Vadym Samoilenko c431d4ab45 Implement critical security fixes and modern design system (Pre-launch P0 tasks)
Security Improvements (P0.0-P0.4):
- P0.0: Migrate to Gemini-only AI stack (simplified, single billing)
- P0.1: Fix CORS to restrict allowed origins from env (was *)
- P0.2: Remove hardcoded dev password, require env var
- P0.3: Add rate limiting (slowapi) - 3-10 req/min on sensitive endpoints
- P0.4: Add request size limits (100MB default via middleware)

New Features:
- Unified LLM service with Google Gemini priority
- OXML geometry extractor for layout parsing
- TSX validator for generated React components
- Client ID support in presentation requests with access control
- Configurable LLM/image timeouts via env vars

Modern Design System (P0.9 - partial):
- Enhanced CSS design tokens (primary, semantic colors, shadows)
- Typography scale (h1-h4, body variants, caption)
- Modern animations (fadeIn, slideIn, scaleIn)
- Updated Button component with better variants and hover effects
- Created unified Card and StatusBadge components
- Applied design system to Dashboard and Settings pages

Backend Improvements:
- Master deck parser simplification
- Slide-to-HTML endpoint cleanup (325 lines removed)
- Better error handling in prompts endpoint

Frontend Improvements:
- Settings UI simplified to show only Google/Gemini
- Dashboard uses CSS variables instead of hardcoded colors
- Improved button transitions and hover states

Co-Authored-By: Claude Sonnet 4.5 (1M context) <noreply@anthropic.com>
2026-02-27 18:28:24 +00:00

136 lines
5.5 KiB
Python

import os
import asyncio
from typing import Optional, List, Dict
import traceback
class LLMProvider:
OPENAI = "openai"
ANTHROPIC = "anthropic"
GOOGLE = "google"
def _detect_llm_provider() -> Optional[dict]:
"""Use only Google Gemini. No fallback providers."""
google_key = os.getenv("GOOGLE_API_KEY")
if not google_key:
raise ValueError(
"GOOGLE_API_KEY is required. Please set it in .env file.\n"
"Get your key at: https://aistudio.google.com/app/apikey"
)
return {
"provider": LLMProvider.GOOGLE,
"api_key": google_key,
"model": os.getenv("GOOGLE_MODEL", "gemini-2.0-flash-exp")
}
class UnifiedLLMService:
@staticmethod
async def generate_vision_completion(
system_prompt: str,
user_text: str,
image_base64: str,
media_type: str = "image/png",
provider_override: Optional[Dict] = None,
max_tokens: int = 8192
) -> str:
"""
Sends a vision-based generation request to the active LLM provider.
"""
provider = provider_override or _detect_llm_provider()
if not provider:
raise ValueError("No LLM provider configuration found in environment variables.")
print(f"[UnifiedLLMService] Utilizing {provider['provider']} ({provider.get('model', 'default')})")
try:
if provider["provider"] == LLMProvider.OPENAI:
return await UnifiedLLMService._call_openai(provider, system_prompt, user_text, image_base64, media_type)
elif provider["provider"] == LLMProvider.ANTHROPIC:
return await UnifiedLLMService._call_anthropic(provider, system_prompt, user_text, image_base64, media_type, max_tokens)
elif provider["provider"] == LLMProvider.GOOGLE:
return await UnifiedLLMService._call_google(provider, system_prompt, user_text, image_base64, media_type)
else:
raise ValueError(f"Unsupported provider: {provider['provider']}")
except Exception as e:
print(f"[UnifiedLLMService] Error from {provider['provider']}: {e}")
traceback.print_exc()
raise Exception(f"Failed to generate completion using {provider['provider']}: {str(e)}")
@staticmethod
async def _call_openai(provider: dict, system_prompt: str, user_text: str, image_base64: str, media_type: str) -> str:
from openai import OpenAI
def _sync_call():
client = OpenAI(api_key=provider["api_key"])
data_url = f"data:{media_type};base64,{image_base64}"
# Using standard chat completion API instead of beta responses API for better stability
response = client.chat.completions.create(
model=provider.get("model", "gpt-4o"),
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": [
{"type": "image_url", "image_url": {"url": data_url}},
{"type": "text", "text": user_text}
]}
],
max_tokens=8000
)
return response.choices[0].message.content or ""
return await asyncio.to_thread(_sync_call)
@staticmethod
async def _call_anthropic(provider: dict, system_prompt: str, user_text: str, image_base64: str, media_type: str, max_tokens: int) -> str:
import anthropic
def _sync_call():
client = anthropic.Anthropic(api_key=provider["api_key"])
response = client.messages.create(
model=provider.get("model", "claude-3-5-sonnet-20240620"),
max_tokens=max_tokens,
system=system_prompt,
messages=[{
"role": "user",
"content": [
{"type": "image", "source": {
"type": "base64", "media_type": media_type, "data": image_base64,
}},
{"type": "text", "text": user_text},
],
}],
)
return response.content[0].text if response.content else ""
return await asyncio.to_thread(_sync_call)
@staticmethod
async def _call_google(provider: dict, system_prompt: str, user_text: str, image_base64: str, media_type: str) -> str:
import google.genai as genai
def _sync_call():
client = genai.Client(api_key=provider["api_key"])
model_name = provider.get("model", "gemini-2.0-flash")
response = client.models.generate_content(
model=model_name,
contents=[
system_prompt,
{"inline_data": {"mime_type": media_type, "data": image_base64}},
user_text,
],
)
return response.text or ""
return await asyncio.to_thread(_sync_call)
@staticmethod
def clean_llm_code_output(text: str, lang_identifiers: List[str] = ["html", "tsx", "typescript", "javascript"]) -> str:
"""Removes markdown backticks and specific language identifiers from LLM output."""
cleaned = text
for lang in lang_identifiers:
cleaned = cleaned.replace(f"```{lang}", "")
cleaned = cleaned.replace("```", "")
return cleaned.strip()
llm_service = UnifiedLLMService()