Full-stack Amazon AI Transcreation Platform with: - FastAPI backend (async, PostgreSQL, Redis, Celery) with 11 DB tables - JWT auth (SSO-ready abstract provider pattern) - 6-agent pipeline orchestrator with deterministic modules - Next.js 14 frontend with Amazon branding (Ember fonts, orange/dark theme) - Job wizard, monitoring HUD, output review, admin screens - 154 TM/reference files imported, 12 locales configured - Docker Compose for all services Agents 2-5 (TM retrieval, ranker, transcreator, compliance) are stubs pending Phase 3 LLM integration. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
67 lines
1.7 KiB
Python
67 lines
1.7 KiB
Python
"""Line break normalisation utilities.
|
|
|
|
Three modes:
|
|
- normalise_for_query: Strip line breaks, collapse multiple spaces to single.
|
|
Used when building search queries against TM.
|
|
- normalise_for_excel: Convert \\n to openpyxl-compatible line breaks.
|
|
Used when writing output cells.
|
|
- preserve_raw: Return text as-is (identity function for pipeline clarity).
|
|
"""
|
|
|
|
import re
|
|
|
|
|
|
def normalise_for_query(text: str) -> str:
|
|
"""Strip line breaks and collapse spaces for TM query matching.
|
|
|
|
Args:
|
|
text: Raw text potentially containing line breaks.
|
|
|
|
Returns:
|
|
Single-line text with normalised whitespace.
|
|
"""
|
|
if not text:
|
|
return ""
|
|
|
|
# Replace all line break variants with a space
|
|
result = text.replace("\r\n", " ").replace("\r", " ").replace("\n", " ")
|
|
|
|
# Collapse multiple spaces to one
|
|
result = re.sub(r"\s+", " ", result)
|
|
|
|
return result.strip()
|
|
|
|
|
|
def normalise_for_excel(text: str) -> str:
|
|
"""Convert line breaks to openpyxl-compatible format.
|
|
|
|
openpyxl uses \\n for in-cell line breaks when wrap_text is enabled.
|
|
This ensures consistent line break representation.
|
|
|
|
Args:
|
|
text: Text with potential line breaks.
|
|
|
|
Returns:
|
|
Text with standardised \\n line breaks.
|
|
"""
|
|
if not text:
|
|
return ""
|
|
|
|
# Normalise all line break variants to \\n
|
|
result = text.replace("\r\n", "\n").replace("\r", "\n")
|
|
|
|
return result
|
|
|
|
|
|
def preserve_raw(text: str) -> str:
|
|
"""Return text as-is (identity function).
|
|
|
|
Used in the pipeline to explicitly indicate no normalisation is applied.
|
|
|
|
Args:
|
|
text: Any text.
|
|
|
|
Returns:
|
|
The same text, unchanged.
|
|
"""
|
|
return text
|