Fix OCR false positive on landscape formats: report margins as % of shortest side

Amazon guidelines define margins as 7% of shortest side, but OCR was only
reporting % of width — giving misleadingly small numbers on wide formats
(e.g. 2.6% of 1920px width = 50px, but 6.9% of 720px shortest side).

Now includes shortest-side percentage prominently in OCR context, plus the
7% target in pixels so the LLM can compare directly.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
nickviljoen 2026-04-02 13:00:44 +02:00
parent 4d456f45e5
commit 20ed52d2a2

View file

@ -250,6 +250,9 @@ def _calculate_measurements(elements, img_width, img_height):
'format': _detect_format(img_width, img_height),
}
shortest_side = min(img_width, img_height)
m['shortest_side'] = shortest_side
headline = elements.get('headline')
date = elements.get('date')
legal = elements.get('legal')
@ -260,10 +263,12 @@ def _calculate_measurements(elements, img_width, img_height):
'text': headline.get('text', ''),
'left_margin_px': headline['left'],
'left_margin_pct': _pct(headline['left'], img_width),
'left_margin_shortest_side_pct': _pct(headline['left'], shortest_side),
'right_margin_px': img_width - headline['right'],
'right_margin_pct': _pct(img_width - headline['right'], img_width),
'top_margin_px': headline['top'],
'top_margin_pct': _pct(headline['top'], img_height),
'top_margin_shortest_side_pct': _pct(headline['top'], shortest_side),
'width_pct': _pct(headline['right'] - headline['left'], img_width),
'bottom_edge_px': headline['bottom'],
'bottom_edge_pct': _pct(headline['bottom'], img_height),
@ -275,6 +280,7 @@ def _calculate_measurements(elements, img_width, img_height):
'text': date.get('text', ''),
'left_margin_px': date['left'],
'left_margin_pct': _pct(date['left'], img_width),
'left_margin_shortest_side_pct': _pct(date['left'], shortest_side),
'top_px': date['top'],
'top_pct': _pct(date['top'], img_height),
'char_height_px': date.get('char_height', 0),
@ -335,18 +341,21 @@ def _calculate_measurements(elements, img_width, img_height):
def _build_measurement_context(measurements):
"""Build formatted text context for injection into QC check prompts."""
shortest_side = measurements.get('shortest_side', min(measurements['image_width'], measurements['image_height']))
lines = [
"=== OCR LAYOUT MEASUREMENTS (computed from pixel-level analysis — NOT visual estimation) ===",
f"Image: {measurements['image_width']}px x {measurements['image_height']}px ({measurements.get('format', 'unknown')} format)",
f"Shortest side: {shortest_side}px (Amazon guideline: margins should be ~7% of shortest side = ~{round(shortest_side * 0.07)}px)",
"",
]
headline = measurements.get('headline')
if headline:
lines.append(f"HEADLINE: \"{headline['text']}\"")
lines.append(f" Left margin: {headline['left_margin_px']}px ({headline['left_margin_pct']}% of width)")
lines.append(f" Left margin: {headline['left_margin_px']}px ({headline.get('left_margin_shortest_side_pct', headline['left_margin_pct'])}% of shortest side, {headline['left_margin_pct']}% of width)")
lines.append(f" Top margin: {headline['top_margin_px']}px ({headline.get('top_margin_shortest_side_pct', headline['top_margin_pct'])}% of shortest side)")
lines.append(f" Right margin: {headline['right_margin_px']}px ({headline['right_margin_pct']}% of width)")
lines.append(f" Top margin: {headline['top_margin_px']}px ({headline['top_margin_pct']}% of height)")
lines.append(f" Headline width: {headline['width_pct']}% of asset width")
lines.append(f" Character height: {headline['char_height_px']}px")
lines.append("")
@ -354,7 +363,7 @@ def _build_measurement_context(measurements):
date = measurements.get('date')
if date:
lines.append(f"DATE: \"{date['text']}\"")
lines.append(f" Left margin: {date['left_margin_px']}px ({date['left_margin_pct']}% of width)")
lines.append(f" Left margin: {date['left_margin_px']}px ({date.get('left_margin_shortest_side_pct', date['left_margin_pct'])}% of shortest side, {date['left_margin_pct']}% of width)")
lines.append(f" Character height: {date['char_height_px']}px")
lines.append("")