Load media-plan workbooks in read_only mode to skip pivot caches
openpyxl's default (read/write) loader deserializes pivot cache records, which hangs for minutes on Amazon media plans that use pivot tables. The GCP LB then cuts the request off with "upstream request timeout" / "stream timeout". read_only=True skips pivot cache parsing entirely, and our code only uses iter_rows / sheetnames which are both supported in that mode. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
9771feaa3a
commit
ffbec7e457
1 changed files with 6 additions and 2 deletions
|
|
@ -89,7 +89,9 @@ def parse_media_plan(excel_path: str) -> Dict:
|
|||
"""
|
||||
import openpyxl
|
||||
|
||||
wb = openpyxl.load_workbook(excel_path, data_only=True)
|
||||
# read_only=True skips pivot-cache deserialization, which hangs this loader
|
||||
# on workbooks with pivot tables (Amazon media plans use them extensively).
|
||||
wb = openpyxl.load_workbook(excel_path, data_only=True, read_only=True)
|
||||
all_assets = []
|
||||
channel_counts = {}
|
||||
|
||||
|
|
@ -106,7 +108,9 @@ def parse_media_plan(excel_path: str) -> Dict:
|
|||
continue
|
||||
|
||||
ws = wb[sheet_name]
|
||||
if ws.max_row < 2:
|
||||
# In read_only mode max_row may be None until dimensions are read; treat
|
||||
# that as "has data" and let iter_rows decide.
|
||||
if ws.max_row is not None and ws.max_row < 2:
|
||||
continue
|
||||
|
||||
# Get header row (row 1)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue