Full-stack Amazon AI Transcreation Platform with: - FastAPI backend (async, PostgreSQL, Redis, Celery) with 11 DB tables - JWT auth (SSO-ready abstract provider pattern) - 6-agent pipeline orchestrator with deterministic modules - Next.js 14 frontend with Amazon branding (Ember fonts, orange/dark theme) - Job wizard, monitoring HUD, output review, admin screens - 154 TM/reference files imported, 12 locales configured - Docker Compose for all services Agents 2-5 (TM retrieval, ranker, transcreator, compliance) are stubs pending Phase 3 LLM integration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
121 lines
3.7 KiB
Python
121 lines
3.7 KiB
Python
"""Validate date and percentage format strings against approved locale formats.
|
||
|
||
Checks that dates and percentages in transcreated text conform to the
|
||
locale-specific format rules defined in the date/percentage format file.
|
||
"""
|
||
|
||
import re
|
||
from dataclasses import dataclass
|
||
|
||
|
||
@dataclass
|
||
class FormatViolation:
|
||
"""A detected date/percentage format violation."""
|
||
found: str
|
||
expected_format: str
|
||
description: str
|
||
|
||
|
||
def validate_date_formats(
|
||
text: str,
|
||
approved_formats: list[dict],
|
||
) -> list[FormatViolation]:
|
||
"""Validate date strings in text against approved formats.
|
||
|
||
Args:
|
||
text: The text containing dates to validate.
|
||
approved_formats: List of dicts with keys:
|
||
- pattern: str (regex pattern for valid format)
|
||
- example: str (example of the correct format)
|
||
- description: str
|
||
|
||
Returns:
|
||
List of FormatViolation instances.
|
||
"""
|
||
if not text or not approved_formats:
|
||
return []
|
||
|
||
violations: list[FormatViolation] = []
|
||
|
||
# Common date-like patterns to detect
|
||
date_patterns = [
|
||
# DD/MM/YYYY, MM/DD/YYYY, YYYY/MM/DD
|
||
r"\b\d{1,2}[/\-.]\d{1,2}[/\-.]\d{2,4}\b",
|
||
# Month DD, YYYY
|
||
r"\b(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{1,2},?\s+\d{4}\b",
|
||
# DD Month YYYY
|
||
r"\b\d{1,2}\s+(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z]*\.?\s+\d{4}\b",
|
||
]
|
||
|
||
for date_pattern in date_patterns:
|
||
for match in re.finditer(date_pattern, text, re.IGNORECASE):
|
||
found_date = match.group()
|
||
is_valid = False
|
||
|
||
for fmt in approved_formats:
|
||
pattern = fmt.get("pattern", "")
|
||
if pattern and re.match(pattern, found_date, re.IGNORECASE):
|
||
is_valid = True
|
||
break
|
||
|
||
if not is_valid and approved_formats:
|
||
examples = [
|
||
fmt.get("example", "") for fmt in approved_formats if fmt.get("example")
|
||
]
|
||
violations.append(
|
||
FormatViolation(
|
||
found=found_date,
|
||
expected_format=", ".join(examples[:3]),
|
||
description=f"Date format '{found_date}' does not match approved formats",
|
||
)
|
||
)
|
||
|
||
return violations
|
||
|
||
|
||
def validate_percentage_formats(
|
||
text: str,
|
||
approved_formats: list[dict],
|
||
) -> list[FormatViolation]:
|
||
"""Validate percentage strings in text against approved formats.
|
||
|
||
Args:
|
||
text: The text containing percentages to validate.
|
||
approved_formats: List of dicts with keys:
|
||
- pattern: str (regex)
|
||
- example: str
|
||
- description: str
|
||
|
||
Returns:
|
||
List of FormatViolation instances.
|
||
"""
|
||
if not text or not approved_formats:
|
||
return []
|
||
|
||
violations: list[FormatViolation] = []
|
||
|
||
# Find percentage-like patterns
|
||
pct_pattern = r"\b\d+[\.,]?\d*\s*[%%]\b"
|
||
for match in re.finditer(pct_pattern, text):
|
||
found_pct = match.group()
|
||
is_valid = False
|
||
|
||
for fmt in approved_formats:
|
||
pattern = fmt.get("pattern", "")
|
||
if pattern and re.match(pattern, found_pct):
|
||
is_valid = True
|
||
break
|
||
|
||
if not is_valid and approved_formats:
|
||
examples = [
|
||
fmt.get("example", "") for fmt in approved_formats if fmt.get("example")
|
||
]
|
||
violations.append(
|
||
FormatViolation(
|
||
found=found_pct,
|
||
expected_format=", ".join(examples[:3]),
|
||
description=f"Percentage format '{found_pct}' does not match approved formats",
|
||
)
|
||
)
|
||
|
||
return violations
|