Video Master: revert campaigns folder + lenient name matching

The earlier swap to BOX_CAMPAIGNS_FOLDER_ID=133295752718 was wrong —
Video Master operates on the automation campaigns folder
(156182880490), where subfolders are named by campaign TITLE rather
than the numeric job ID used in Reporting's root.

Reverted the default in config.py and all three .env example files.

Folder naming on Box is inconsistent — '1_CFUL263C01C_Kids drop1' vs
'1_CFUL263C01F-Kids drop 2' vs 'Summer Activation 2026' all coexist.
search_subfolder now strips every non-alphanumeric character from
both the search input and the folder names before substring match,
so:
  "kids drop 1"   →  matches "1_CFUL263C01C_Kids drop1"
  "Spring 2026"   →  matches "4023 Spring 2026"
  "winterfilm"    →  matches "1_WA20263C01 Winter Film"

Form label/placeholder updated to "Campaign Title" with a hint that
spaces/underscores/hyphens/case are all ignored.
This commit is contained in:
nickviljoen 2026-05-09 20:19:35 +02:00
parent 087224976a
commit 6b8b8ea5a6
6 changed files with 38 additions and 25 deletions

View file

@ -9,7 +9,7 @@ ENVIRONMENT=development
# Box Configuration
BOX_CONFIG_PATH=config/box_config.json
BOX_REPORT_FOLDER_ID=133295752718
BOX_CAMPAIGNS_FOLDER_ID=133295752718
BOX_CAMPAIGNS_FOLDER_ID=156182880490
# Flask Configuration
FLASK_APP=app:app

View file

@ -17,9 +17,10 @@ class Config:
# Box
BOX_CONFIG_PATH = os.environ.get('BOX_CONFIG_PATH', 'config/box_config.json')
BOX_REPORT_FOLDER_ID = os.environ.get('BOX_REPORT_FOLDER_ID', '133295752718')
# Same Box folder as REPORT — the Reporting and Video Master tools both
# search inside this root for campaign-named subfolders.
BOX_CAMPAIGNS_FOLDER_ID = os.environ.get('BOX_CAMPAIGNS_FOLDER_ID', '133295752718')
# Different folder root from REPORT — Video Master operates on the
# automation campaigns folder where each subfolder is named by
# campaign title (not by job number like the Reporting tool's root).
BOX_CAMPAIGNS_FOLDER_ID = os.environ.get('BOX_CAMPAIGNS_FOLDER_ID', '156182880490')
# Server
HOST = os.environ.get('HOST', '0.0.0.0')

View file

@ -352,27 +352,43 @@ class BoxReportClient:
def search_subfolder(self, parent_folder_id: str, name_pattern: str) -> Optional[Dict]:
"""
Search for a subfolder by name pattern (case-insensitive) within a parent folder.
Search for a subfolder by name pattern within a parent folder.
Matching is deliberately lenient folder naming on Box is inconsistent
(mixed case, spaces vs underscores vs hyphens, prefix codes etc.). We
strip every non-alphanumeric character and lowercase both sides before
substring matching, so e.g. all of these find the same folder:
"Kids Drop 1" matches "1_CFUL263C01C_Kids drop1"
"kidsdrop1" matches "Kids-Drop-1"
"spring 2026" matches "4023 Spring 2026"
Two strategies, fast path first:
1. Box's indexed search API (typically <2s, even for huge parents).
May miss recently-created folders (~10 min indexing latency).
2. Full pagination of the parent's children (slow on large folders —
~1s per 100 items). Capped at ENUM_TIMEOUT_S so we don't sit in
a 5+ minute loop when the parent has thousands of children.
2. Full pagination of the parent's children (slow on large folders).
Capped at ENUM_TIMEOUT_S so we don't sit in a 5+ minute loop.
Args:
parent_folder_id: Box folder ID to search within
name_pattern: Text to match in folder name (case-insensitive)
name_pattern: Text to match (case- and separator-insensitive)
Returns:
Dict with folder id/name or None
"""
import re
import time
ENUM_TIMEOUT_S = 60 # cap on slow-path enumeration
pattern_lower = name_pattern.lower().replace('_', ' ').replace('-', ' ')
def _norm(s: str) -> str:
"""Lower + strip every non-alphanumeric character."""
return re.sub(r'[^a-z0-9]+', '', (s or '').lower())
pattern_norm = _norm(name_pattern)
if not pattern_norm:
return None # nothing to search for
# Strategy 1 (fast): Box's indexed search API.
try:
@ -386,8 +402,7 @@ class BoxReportClient:
checked = 0
for item in results:
if item.type == 'folder':
name_normalized = item.name.lower().replace('_', ' ').replace('-', ' ')
if pattern_lower in name_normalized:
if pattern_norm in _norm(item.name):
logger.info(f"Search API found subfolder '{item.name}' (ID: {item.id})")
return {'id': item.id, 'name': item.name}
checked += 1
@ -396,10 +411,7 @@ class BoxReportClient:
except Exception as e:
logger.warning(f"Search API error for {parent_folder_id}/{name_pattern!r}: {e}")
# Strategy 2 (slow fallback): paginate the parent's children directly,
# in case the target was created within the search-index latency window.
# We stream the pagination so we can bail out as soon as we find a match
# or hit the wall-clock cap.
# Strategy 2 (slow fallback): paginate the parent's children directly.
deadline = time.monotonic() + ENUM_TIMEOUT_S
try:
folder = self.client.folder(parent_folder_id)
@ -414,8 +426,7 @@ class BoxReportClient:
page = list(folder.get_items(limit=100, offset=offset))
for item in page:
if getattr(item, 'type', None) == 'folder':
name_normalized = item.name.lower().replace('_', ' ').replace('-', ' ')
if pattern_lower in name_normalized:
if pattern_norm in _norm(item.name):
logger.info(f"Enumeration found subfolder '{item.name}' (ID: {item.id})")
return {'id': item.id, 'name': item.name}
if len(page) < 100:

View file

@ -10,7 +10,7 @@ ENVIRONMENT=development
# Box Configuration
BOX_CONFIG_PATH=config/box_config.json
BOX_REPORT_FOLDER_ID=133295752718
BOX_CAMPAIGNS_FOLDER_ID=133295752718
BOX_CAMPAIGNS_FOLDER_ID=156182880490
# Flask
FLASK_APP=app:app

View file

@ -10,7 +10,7 @@ ENVIRONMENT=production
# Box Configuration
BOX_CONFIG_PATH=config/box_config.json
BOX_REPORT_FOLDER_ID=133295752718
BOX_CAMPAIGNS_FOLDER_ID=133295752718
BOX_CAMPAIGNS_FOLDER_ID=156182880490
# Flask
FLASK_APP=app:app

View file

@ -29,17 +29,18 @@
<div class="card-body">
<form id="searchForm">
<div class="mb-3">
<label for="campaignName" class="form-label">Campaign Number</label>
<label for="campaignName" class="form-label">Campaign Title</label>
<input type="text" class="form-control form-control-lg" id="campaignName"
placeholder="e.g. 1993857" required autofocus>
placeholder="e.g. Kids drop 1, Spring 2026, CFUL263C01" required autofocus>
<div class="form-text">
Enter the campaign number — folders on Box are named by the campaign number.
Enter the campaign folder title (or any unique part of it).
Matching is case-insensitive and ignores spaces, underscores,
and hyphens — "kids drop 1" finds "1_CFUL263C01C_Kids drop1".
{% if campaigns_folder_id %}
Searches inside Box folder
<a href="https://app.box.com/folder/{{ campaigns_folder_id }}" target="_blank">
#{{ campaigns_folder_id }}
</a>
(same root as the Reporting tool).
</a>.
{% endif %}
</div>
</div>