Batch 3: Add title legibility check, Google Gemini support, LLM provider selector
- Update image quality prompt to evaluate text/title legibility - Add Google Gemini (generativeai) as LLM provider in LLMConfig - Add AI Provider dropdown on configure page (OpenAI GPT-4o / Google Gemini) - Pass selected provider through execute routes to override profile defaults - Add google-generativeai to requirements.txt Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
1c582ffcf4
commit
91dec41e0b
5 changed files with 105 additions and 5 deletions
|
|
@ -32,6 +32,10 @@ class LLMConfig:
|
|||
'api_key_env': 'OPENAI_API_KEY',
|
||||
'models': ['gpt-4o', 'gpt-4-turbo', 'gpt-4', 'gpt-3.5-turbo']
|
||||
},
|
||||
'google': {
|
||||
'api_key_env': 'GOOGLE_API_KEY',
|
||||
'models': ['gemini-2.0-flash', 'gemini-2.5-pro', 'gemini-1.5-pro']
|
||||
},
|
||||
'anthropic': {
|
||||
'api_key_env': 'ANTHROPIC_API_KEY',
|
||||
'models': ['claude-3-opus-20240229', 'claude-3-sonnet-20240229', 'claude-3-haiku-20240307']
|
||||
|
|
@ -104,6 +108,12 @@ class LLMConfig:
|
|||
api_key = os.getenv('ANTHROPIC_API_KEY')
|
||||
return Anthropic(api_key=api_key)
|
||||
|
||||
elif provider == 'google':
|
||||
import google.generativeai as genai
|
||||
api_key = os.getenv('GOOGLE_API_KEY')
|
||||
genai.configure(api_key=api_key)
|
||||
return genai
|
||||
|
||||
elif provider == 'azure_openai':
|
||||
from openai import AzureOpenAI
|
||||
api_key = os.getenv('AZURE_OPENAI_API_KEY')
|
||||
|
|
@ -173,6 +183,10 @@ class LLMConfig:
|
|||
response = cls._call_openai_vision(
|
||||
prompt, image_asset_b64, image_ref_b64, model, debug
|
||||
)
|
||||
elif provider == 'google':
|
||||
response = cls._call_google_vision(
|
||||
prompt, image_asset, image_ref, model, debug
|
||||
)
|
||||
elif provider == 'anthropic':
|
||||
response = cls._call_anthropic_vision(
|
||||
prompt, image_asset_b64, image_ref_b64, model, debug
|
||||
|
|
@ -250,6 +264,55 @@ class LLMConfig:
|
|||
'tokens_used': response.usage.total_tokens if hasattr(response, 'usage') else None
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _call_google_vision(
|
||||
cls,
|
||||
prompt: str,
|
||||
image_asset: Any,
|
||||
image_ref: Any,
|
||||
model: str,
|
||||
debug: bool
|
||||
) -> Dict[str, Any]:
|
||||
"""Call Google Gemini Vision API."""
|
||||
genai = cls.get_client('google', model)
|
||||
|
||||
# Load image as PIL
|
||||
pil_image = None
|
||||
if isinstance(image_asset, Image.Image):
|
||||
pil_image = image_asset
|
||||
elif isinstance(image_asset, str) and os.path.exists(image_asset):
|
||||
pil_image = Image.open(image_asset)
|
||||
elif isinstance(image_asset, bytes):
|
||||
pil_image = Image.open(BytesIO(image_asset))
|
||||
|
||||
if pil_image is None:
|
||||
raise ConfigurationError("Could not load image for Google Vision API")
|
||||
|
||||
# Build content parts
|
||||
contents = [prompt, pil_image]
|
||||
|
||||
# Add reference image if provided
|
||||
if image_ref is not None:
|
||||
ref_image = None
|
||||
if isinstance(image_ref, Image.Image):
|
||||
ref_image = image_ref
|
||||
elif isinstance(image_ref, str) and os.path.exists(image_ref):
|
||||
ref_image = Image.open(image_ref)
|
||||
if ref_image:
|
||||
contents.append(ref_image)
|
||||
|
||||
gen_model = genai.GenerativeModel(model)
|
||||
response = gen_model.generate_content(contents)
|
||||
|
||||
return {
|
||||
'text': response.text,
|
||||
'model': model,
|
||||
'provider': 'google',
|
||||
'tokens_used': getattr(response, 'usage_metadata', {}).total_token_count
|
||||
if hasattr(getattr(response, 'usage_metadata', None), 'total_token_count')
|
||||
else None
|
||||
}
|
||||
|
||||
@classmethod
|
||||
def _call_anthropic_vision(
|
||||
cls,
|
||||
|
|
|
|||
|
|
@ -143,12 +143,18 @@ EVALUATION CRITERIA:
|
|||
b. Proper exposure (not too dark or too bright)
|
||||
c. Consistent white balance
|
||||
|
||||
3. Composition and Framing:
|
||||
3. Text and Title Legibility:
|
||||
a. All text and titles must be clear, legible, and properly rendered
|
||||
b. No cut-off, overlapping, or unreadable text
|
||||
c. Font sizes must be appropriate and readable at intended display size
|
||||
d. Text must have sufficient contrast against its background
|
||||
|
||||
4. Composition and Framing:
|
||||
a. Subject properly framed and positioned
|
||||
b. Appropriate negative space
|
||||
c. No unwanted elements or distractions
|
||||
|
||||
4. Professional Standards:
|
||||
5. Professional Standards:
|
||||
a. Meets commercial photography standards
|
||||
b. Suitable for marketing/advertising use
|
||||
c. Consistent with H&M brand aesthetics
|
||||
|
|
@ -163,8 +169,9 @@ SCORING GUIDANCE:
|
|||
STEPS TO EVALUATE:
|
||||
1. Assess overall image quality and technical aspects
|
||||
2. Evaluate color accuracy and lighting
|
||||
3. Check composition and framing
|
||||
4. Determine if image meets professional standards for H&M marketing
|
||||
3. Check that all text and titles are clear and legible
|
||||
4. Check composition and framing
|
||||
5. Determine if image meets professional standards for H&M marketing
|
||||
|
||||
YOUR OUTPUT MUST INCLUDE:
|
||||
Format your response as JSON (you can include explanatory text before/after the JSON):
|
||||
|
|
|
|||
|
|
@ -186,6 +186,7 @@ def execute():
|
|||
session_id = data.get('session_id')
|
||||
profile_name = data.get('profile')
|
||||
job_number = data.get('job_number')
|
||||
llm_provider = data.get('llm_provider')
|
||||
|
||||
if not session_id or not profile_name:
|
||||
return jsonify({'error': 'Missing required parameters'}), 400
|
||||
|
|
@ -209,6 +210,14 @@ def execute():
|
|||
if not profile:
|
||||
return jsonify({'error': f'Profile "{profile_name}" not found'}), 404
|
||||
|
||||
# Override LLM provider if user selected one
|
||||
if llm_provider:
|
||||
provider_models = {'openai': 'gpt-4o', 'google': 'gemini-2.0-flash'}
|
||||
for check in profile.get('checks', []):
|
||||
if check.get('llm_provider'):
|
||||
check['llm_provider'] = llm_provider
|
||||
check['llm_model'] = provider_models.get(llm_provider, check.get('llm_model'))
|
||||
|
||||
logger.info(f"Starting QC execution for session {session_id} with profile {profile_name}")
|
||||
|
||||
executor = QCExecutor(
|
||||
|
|
@ -262,6 +271,7 @@ def execute_batch():
|
|||
session_id = data.get('session_id')
|
||||
profile_name = data.get('profile')
|
||||
job_number = data.get('job_number')
|
||||
llm_provider = data.get('llm_provider')
|
||||
|
||||
if not session_id or not profile_name:
|
||||
return jsonify({'error': 'Missing required parameters'}), 400
|
||||
|
|
@ -283,6 +293,14 @@ def execute_batch():
|
|||
if not profile:
|
||||
return jsonify({'error': f'Profile "{profile_name}" not found'}), 404
|
||||
|
||||
# Override LLM provider if user selected one
|
||||
if llm_provider:
|
||||
provider_models = {'openai': 'gpt-4o', 'google': 'gemini-2.0-flash'}
|
||||
for check in profile.get('checks', []):
|
||||
if check.get('llm_provider'):
|
||||
check['llm_provider'] = llm_provider
|
||||
check['llm_model'] = provider_models.get(llm_provider, check.get('llm_model'))
|
||||
|
||||
logger.info(f"Starting batch QC for {len(files)} files (session: {session_id})")
|
||||
|
||||
batch_executor = BatchQCExecutor(
|
||||
|
|
|
|||
|
|
@ -39,6 +39,15 @@
|
|||
{% endfor %}
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label for="llmProvider" class="form-label">AI Provider</label>
|
||||
<select class="form-select" id="llmProvider">
|
||||
<option value="openai" selected>OpenAI GPT-4o</option>
|
||||
<option value="google">Google Gemini</option>
|
||||
</select>
|
||||
<div class="form-text">Select which AI model to use for image quality analysis</div>
|
||||
</div>
|
||||
|
||||
<div class="mb-3">
|
||||
<label for="jobNumber" class="form-label">Job Number (Optional)</label>
|
||||
<input type="text" class="form-control" id="jobNumber"
|
||||
|
|
@ -89,6 +98,7 @@
|
|||
|
||||
const profile = document.getElementById('profile').value;
|
||||
const jobNumber = document.getElementById('jobNumber').value;
|
||||
const llmProvider = document.getElementById('llmProvider').value;
|
||||
const startBtn = document.getElementById('startBtn');
|
||||
|
||||
if (!profile) {
|
||||
|
|
@ -115,7 +125,8 @@
|
|||
body: JSON.stringify({
|
||||
session_id: sessionId,
|
||||
profile: profile,
|
||||
job_number: jobNumber || null
|
||||
job_number: jobNumber || null,
|
||||
llm_provider: llmProvider
|
||||
})
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -21,6 +21,7 @@ beautifulsoup4==4.12.2
|
|||
# LLM Providers
|
||||
openai>=1.12.0
|
||||
anthropic>=0.18.0
|
||||
google-generativeai>=0.5.0
|
||||
|
||||
# Video Processing (Video QC + Video Master)
|
||||
opencv-python>=4.8.0
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue