From ffbec7e45770086e2bf003709fc619fbfac0d006 Mon Sep 17 00:00:00 2001 From: nickviljoen Date: Wed, 22 Apr 2026 21:23:08 +0200 Subject: [PATCH] Load media-plan workbooks in read_only mode to skip pivot caches openpyxl's default (read/write) loader deserializes pivot cache records, which hangs for minutes on Amazon media plans that use pivot tables. The GCP LB then cuts the request off with "upstream request timeout" / "stream timeout". read_only=True skips pivot cache parsing entirely, and our code only uses iter_rows / sheetnames which are both supported in that mode. Co-Authored-By: Claude Opus 4.7 (1M context) --- backend/media_plan_processor.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/backend/media_plan_processor.py b/backend/media_plan_processor.py index d0930e1..976c833 100644 --- a/backend/media_plan_processor.py +++ b/backend/media_plan_processor.py @@ -89,7 +89,9 @@ def parse_media_plan(excel_path: str) -> Dict: """ import openpyxl - wb = openpyxl.load_workbook(excel_path, data_only=True) + # read_only=True skips pivot-cache deserialization, which hangs this loader + # on workbooks with pivot tables (Amazon media plans use them extensively). + wb = openpyxl.load_workbook(excel_path, data_only=True, read_only=True) all_assets = [] channel_counts = {} @@ -106,7 +108,9 @@ def parse_media_plan(excel_path: str) -> Dict: continue ws = wb[sheet_name] - if ws.max_row < 2: + # In read_only mode max_row may be None until dimensions are read; treat + # that as "has data" and let iter_rows decide. + if ws.max_row is not None and ws.max_row < 2: continue # Get header row (row 1)