Load media-plan workbooks in read_only mode to skip pivot caches

openpyxl's default (read/write) loader deserializes pivot cache
records, which hangs for minutes on Amazon media plans that use pivot
tables. The GCP LB then cuts the request off with "upstream request
timeout" / "stream timeout".

read_only=True skips pivot cache parsing entirely, and our code only
uses iter_rows / sheetnames which are both supported in that mode.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
nickviljoen 2026-04-22 21:23:08 +02:00
parent 9771feaa3a
commit ffbec7e457

View file

@ -89,7 +89,9 @@ def parse_media_plan(excel_path: str) -> Dict:
"""
import openpyxl
wb = openpyxl.load_workbook(excel_path, data_only=True)
# read_only=True skips pivot-cache deserialization, which hangs this loader
# on workbooks with pivot tables (Amazon media plans use them extensively).
wb = openpyxl.load_workbook(excel_path, data_only=True, read_only=True)
all_assets = []
channel_counts = {}
@ -106,7 +108,9 @@ def parse_media_plan(excel_path: str) -> Dict:
continue
ws = wb[sheet_name]
if ws.max_row < 2:
# In read_only mode max_row may be None until dimensions are read; treat
# that as "has data" and let iter_rows decide.
if ws.max_row is not None and ws.max_row < 2:
continue
# Get header row (row 1)