Fix OCR false positive on landscape formats: report margins as % of shortest side
Amazon guidelines define margins as 7% of shortest side, but OCR was only reporting % of width — giving misleadingly small numbers on wide formats (e.g. 2.6% of 1920px width = 50px, but 6.9% of 720px shortest side). Now includes shortest-side percentage prominently in OCR context, plus the 7% target in pixels so the LLM can compare directly. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
4d456f45e5
commit
20ed52d2a2
1 changed files with 12 additions and 3 deletions
|
|
@ -250,6 +250,9 @@ def _calculate_measurements(elements, img_width, img_height):
|
|||
'format': _detect_format(img_width, img_height),
|
||||
}
|
||||
|
||||
shortest_side = min(img_width, img_height)
|
||||
m['shortest_side'] = shortest_side
|
||||
|
||||
headline = elements.get('headline')
|
||||
date = elements.get('date')
|
||||
legal = elements.get('legal')
|
||||
|
|
@ -260,10 +263,12 @@ def _calculate_measurements(elements, img_width, img_height):
|
|||
'text': headline.get('text', ''),
|
||||
'left_margin_px': headline['left'],
|
||||
'left_margin_pct': _pct(headline['left'], img_width),
|
||||
'left_margin_shortest_side_pct': _pct(headline['left'], shortest_side),
|
||||
'right_margin_px': img_width - headline['right'],
|
||||
'right_margin_pct': _pct(img_width - headline['right'], img_width),
|
||||
'top_margin_px': headline['top'],
|
||||
'top_margin_pct': _pct(headline['top'], img_height),
|
||||
'top_margin_shortest_side_pct': _pct(headline['top'], shortest_side),
|
||||
'width_pct': _pct(headline['right'] - headline['left'], img_width),
|
||||
'bottom_edge_px': headline['bottom'],
|
||||
'bottom_edge_pct': _pct(headline['bottom'], img_height),
|
||||
|
|
@ -275,6 +280,7 @@ def _calculate_measurements(elements, img_width, img_height):
|
|||
'text': date.get('text', ''),
|
||||
'left_margin_px': date['left'],
|
||||
'left_margin_pct': _pct(date['left'], img_width),
|
||||
'left_margin_shortest_side_pct': _pct(date['left'], shortest_side),
|
||||
'top_px': date['top'],
|
||||
'top_pct': _pct(date['top'], img_height),
|
||||
'char_height_px': date.get('char_height', 0),
|
||||
|
|
@ -335,18 +341,21 @@ def _calculate_measurements(elements, img_width, img_height):
|
|||
|
||||
def _build_measurement_context(measurements):
|
||||
"""Build formatted text context for injection into QC check prompts."""
|
||||
shortest_side = measurements.get('shortest_side', min(measurements['image_width'], measurements['image_height']))
|
||||
|
||||
lines = [
|
||||
"=== OCR LAYOUT MEASUREMENTS (computed from pixel-level analysis — NOT visual estimation) ===",
|
||||
f"Image: {measurements['image_width']}px x {measurements['image_height']}px ({measurements.get('format', 'unknown')} format)",
|
||||
f"Shortest side: {shortest_side}px (Amazon guideline: margins should be ~7% of shortest side = ~{round(shortest_side * 0.07)}px)",
|
||||
"",
|
||||
]
|
||||
|
||||
headline = measurements.get('headline')
|
||||
if headline:
|
||||
lines.append(f"HEADLINE: \"{headline['text']}\"")
|
||||
lines.append(f" Left margin: {headline['left_margin_px']}px ({headline['left_margin_pct']}% of width)")
|
||||
lines.append(f" Left margin: {headline['left_margin_px']}px ({headline.get('left_margin_shortest_side_pct', headline['left_margin_pct'])}% of shortest side, {headline['left_margin_pct']}% of width)")
|
||||
lines.append(f" Top margin: {headline['top_margin_px']}px ({headline.get('top_margin_shortest_side_pct', headline['top_margin_pct'])}% of shortest side)")
|
||||
lines.append(f" Right margin: {headline['right_margin_px']}px ({headline['right_margin_pct']}% of width)")
|
||||
lines.append(f" Top margin: {headline['top_margin_px']}px ({headline['top_margin_pct']}% of height)")
|
||||
lines.append(f" Headline width: {headline['width_pct']}% of asset width")
|
||||
lines.append(f" Character height: {headline['char_height_px']}px")
|
||||
lines.append("")
|
||||
|
|
@ -354,7 +363,7 @@ def _build_measurement_context(measurements):
|
|||
date = measurements.get('date')
|
||||
if date:
|
||||
lines.append(f"DATE: \"{date['text']}\"")
|
||||
lines.append(f" Left margin: {date['left_margin_px']}px ({date['left_margin_pct']}% of width)")
|
||||
lines.append(f" Left margin: {date['left_margin_px']}px ({date.get('left_margin_shortest_side_pct', date['left_margin_pct'])}% of shortest side, {date['left_margin_pct']}% of width)")
|
||||
lines.append(f" Character height: {date['char_height_px']}px")
|
||||
lines.append("")
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue