diff --git a/core/services/llm_config.py b/core/services/llm_config.py index e170e24..4dba1a5 100644 --- a/core/services/llm_config.py +++ b/core/services/llm_config.py @@ -32,6 +32,10 @@ class LLMConfig: 'api_key_env': 'OPENAI_API_KEY', 'models': ['gpt-4o', 'gpt-4-turbo', 'gpt-4', 'gpt-3.5-turbo'] }, + 'google': { + 'api_key_env': 'GOOGLE_API_KEY', + 'models': ['gemini-2.0-flash', 'gemini-2.5-pro', 'gemini-1.5-pro'] + }, 'anthropic': { 'api_key_env': 'ANTHROPIC_API_KEY', 'models': ['claude-3-opus-20240229', 'claude-3-sonnet-20240229', 'claude-3-haiku-20240307'] @@ -104,6 +108,12 @@ class LLMConfig: api_key = os.getenv('ANTHROPIC_API_KEY') return Anthropic(api_key=api_key) + elif provider == 'google': + import google.generativeai as genai + api_key = os.getenv('GOOGLE_API_KEY') + genai.configure(api_key=api_key) + return genai + elif provider == 'azure_openai': from openai import AzureOpenAI api_key = os.getenv('AZURE_OPENAI_API_KEY') @@ -173,6 +183,10 @@ class LLMConfig: response = cls._call_openai_vision( prompt, image_asset_b64, image_ref_b64, model, debug ) + elif provider == 'google': + response = cls._call_google_vision( + prompt, image_asset, image_ref, model, debug + ) elif provider == 'anthropic': response = cls._call_anthropic_vision( prompt, image_asset_b64, image_ref_b64, model, debug @@ -250,6 +264,55 @@ class LLMConfig: 'tokens_used': response.usage.total_tokens if hasattr(response, 'usage') else None } + @classmethod + def _call_google_vision( + cls, + prompt: str, + image_asset: Any, + image_ref: Any, + model: str, + debug: bool + ) -> Dict[str, Any]: + """Call Google Gemini Vision API.""" + genai = cls.get_client('google', model) + + # Load image as PIL + pil_image = None + if isinstance(image_asset, Image.Image): + pil_image = image_asset + elif isinstance(image_asset, str) and os.path.exists(image_asset): + pil_image = Image.open(image_asset) + elif isinstance(image_asset, bytes): + pil_image = Image.open(BytesIO(image_asset)) + + if pil_image is None: + raise ConfigurationError("Could not load image for Google Vision API") + + # Build content parts + contents = [prompt, pil_image] + + # Add reference image if provided + if image_ref is not None: + ref_image = None + if isinstance(image_ref, Image.Image): + ref_image = image_ref + elif isinstance(image_ref, str) and os.path.exists(image_ref): + ref_image = Image.open(image_ref) + if ref_image: + contents.append(ref_image) + + gen_model = genai.GenerativeModel(model) + response = gen_model.generate_content(contents) + + return { + 'text': response.text, + 'model': model, + 'provider': 'google', + 'tokens_used': getattr(response, 'usage_metadata', {}).total_token_count + if hasattr(getattr(response, 'usage_metadata', None), 'total_token_count') + else None + } + @classmethod def _call_anthropic_vision( cls, diff --git a/modules/hm_qc/checks/sample_quality_check.py b/modules/hm_qc/checks/sample_quality_check.py index e1cdcbc..16209ed 100644 --- a/modules/hm_qc/checks/sample_quality_check.py +++ b/modules/hm_qc/checks/sample_quality_check.py @@ -143,12 +143,18 @@ EVALUATION CRITERIA: b. Proper exposure (not too dark or too bright) c. Consistent white balance -3. Composition and Framing: +3. Text and Title Legibility: + a. All text and titles must be clear, legible, and properly rendered + b. No cut-off, overlapping, or unreadable text + c. Font sizes must be appropriate and readable at intended display size + d. Text must have sufficient contrast against its background + +4. Composition and Framing: a. Subject properly framed and positioned b. Appropriate negative space c. No unwanted elements or distractions -4. Professional Standards: +5. Professional Standards: a. Meets commercial photography standards b. Suitable for marketing/advertising use c. Consistent with H&M brand aesthetics @@ -163,8 +169,9 @@ SCORING GUIDANCE: STEPS TO EVALUATE: 1. Assess overall image quality and technical aspects 2. Evaluate color accuracy and lighting -3. Check composition and framing -4. Determine if image meets professional standards for H&M marketing +3. Check that all text and titles are clear and legible +4. Check composition and framing +5. Determine if image meets professional standards for H&M marketing YOUR OUTPUT MUST INCLUDE: Format your response as JSON (you can include explanatory text before/after the JSON): diff --git a/modules/hm_qc/routes.py b/modules/hm_qc/routes.py index 4d2ab77..fc6c286 100644 --- a/modules/hm_qc/routes.py +++ b/modules/hm_qc/routes.py @@ -186,6 +186,7 @@ def execute(): session_id = data.get('session_id') profile_name = data.get('profile') job_number = data.get('job_number') + llm_provider = data.get('llm_provider') if not session_id or not profile_name: return jsonify({'error': 'Missing required parameters'}), 400 @@ -209,6 +210,14 @@ def execute(): if not profile: return jsonify({'error': f'Profile "{profile_name}" not found'}), 404 + # Override LLM provider if user selected one + if llm_provider: + provider_models = {'openai': 'gpt-4o', 'google': 'gemini-2.0-flash'} + for check in profile.get('checks', []): + if check.get('llm_provider'): + check['llm_provider'] = llm_provider + check['llm_model'] = provider_models.get(llm_provider, check.get('llm_model')) + logger.info(f"Starting QC execution for session {session_id} with profile {profile_name}") executor = QCExecutor( @@ -262,6 +271,7 @@ def execute_batch(): session_id = data.get('session_id') profile_name = data.get('profile') job_number = data.get('job_number') + llm_provider = data.get('llm_provider') if not session_id or not profile_name: return jsonify({'error': 'Missing required parameters'}), 400 @@ -283,6 +293,14 @@ def execute_batch(): if not profile: return jsonify({'error': f'Profile "{profile_name}" not found'}), 404 + # Override LLM provider if user selected one + if llm_provider: + provider_models = {'openai': 'gpt-4o', 'google': 'gemini-2.0-flash'} + for check in profile.get('checks', []): + if check.get('llm_provider'): + check['llm_provider'] = llm_provider + check['llm_model'] = provider_models.get(llm_provider, check.get('llm_model')) + logger.info(f"Starting batch QC for {len(files)} files (session: {session_id})") batch_executor = BatchQCExecutor( diff --git a/modules/hm_qc/templates/hm_qc/configure.html b/modules/hm_qc/templates/hm_qc/configure.html index fa63067..02dfd25 100644 --- a/modules/hm_qc/templates/hm_qc/configure.html +++ b/modules/hm_qc/templates/hm_qc/configure.html @@ -39,6 +39,15 @@ {% endfor %} +
+ + +
Select which AI model to use for image quality analysis
+
+
=1.12.0 anthropic>=0.18.0 +google-generativeai>=0.5.0 # Video Processing (Video QC + Video Master) opencv-python>=4.8.0