From ffbec7e45770086e2bf003709fc619fbfac0d006 Mon Sep 17 00:00:00 2001
From: nickviljoen <nick.viljoen@brandtech.plus>
Date: Wed, 22 Apr 2026 21:23:08 +0200
Subject: [PATCH] Load media-plan workbooks in read_only mode to skip pivot
 caches

openpyxl's default (read/write) loader deserializes pivot cache
records, which hangs for minutes on Amazon media plans that use pivot
tables. The GCP LB then cuts the request off with "upstream request
timeout" / "stream timeout".

read_only=True skips pivot cache parsing entirely, and our code only
uses iter_rows / sheetnames which are both supported in that mode.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 backend/media_plan_processor.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/backend/media_plan_processor.py b/backend/media_plan_processor.py
index d0930e1..976c833 100644
--- a/backend/media_plan_processor.py
+++ b/backend/media_plan_processor.py
@@ -89,7 +89,9 @@ def parse_media_plan(excel_path: str) -> Dict:
     """
     import openpyxl
 
-    wb = openpyxl.load_workbook(excel_path, data_only=True)
+    # read_only=True skips pivot-cache deserialization, which hangs this loader
+    # on workbooks with pivot tables (Amazon media plans use them extensively).
+    wb = openpyxl.load_workbook(excel_path, data_only=True, read_only=True)
     all_assets = []
     channel_counts = {}
 
@@ -106,7 +108,9 @@ def parse_media_plan(excel_path: str) -> Dict:
                 continue
 
         ws = wb[sheet_name]
-        if ws.max_row < 2:
+        # In read_only mode max_row may be None until dimensions are read; treat
+        # that as "has data" and let iter_rows decide.
+        if ws.max_row is not None and ws.max_row < 2:
             continue
 
         # Get header row (row 1)