Video Master: revert campaigns folder + lenient name matching
The earlier swap to BOX_CAMPAIGNS_FOLDER_ID=133295752718 was wrong — Video Master operates on the automation campaigns folder (156182880490), where subfolders are named by campaign TITLE rather than the numeric job ID used in Reporting's root. Reverted the default in config.py and all three .env example files. Folder naming on Box is inconsistent — '1_CFUL263C01C_Kids drop1' vs '1_CFUL263C01F-Kids drop 2' vs 'Summer Activation 2026' all coexist. search_subfolder now strips every non-alphanumeric character from both the search input and the folder names before substring match, so: "kids drop 1" → matches "1_CFUL263C01C_Kids drop1" "Spring 2026" → matches "4023 Spring 2026" "winterfilm" → matches "1_WA20263C01 Winter Film" Form label/placeholder updated to "Campaign Title" with a hint that spaces/underscores/hyphens/case are all ignored.
This commit is contained in:
parent
087224976a
commit
6b8b8ea5a6
6 changed files with 38 additions and 25 deletions
|
|
@ -9,7 +9,7 @@ ENVIRONMENT=development
|
|||
# Box Configuration
|
||||
BOX_CONFIG_PATH=config/box_config.json
|
||||
BOX_REPORT_FOLDER_ID=133295752718
|
||||
BOX_CAMPAIGNS_FOLDER_ID=133295752718
|
||||
BOX_CAMPAIGNS_FOLDER_ID=156182880490
|
||||
|
||||
# Flask Configuration
|
||||
FLASK_APP=app:app
|
||||
|
|
|
|||
|
|
@ -17,9 +17,10 @@ class Config:
|
|||
# Box
|
||||
BOX_CONFIG_PATH = os.environ.get('BOX_CONFIG_PATH', 'config/box_config.json')
|
||||
BOX_REPORT_FOLDER_ID = os.environ.get('BOX_REPORT_FOLDER_ID', '133295752718')
|
||||
# Same Box folder as REPORT — the Reporting and Video Master tools both
|
||||
# search inside this root for campaign-named subfolders.
|
||||
BOX_CAMPAIGNS_FOLDER_ID = os.environ.get('BOX_CAMPAIGNS_FOLDER_ID', '133295752718')
|
||||
# Different folder root from REPORT — Video Master operates on the
|
||||
# automation campaigns folder where each subfolder is named by
|
||||
# campaign title (not by job number like the Reporting tool's root).
|
||||
BOX_CAMPAIGNS_FOLDER_ID = os.environ.get('BOX_CAMPAIGNS_FOLDER_ID', '156182880490')
|
||||
|
||||
# Server
|
||||
HOST = os.environ.get('HOST', '0.0.0.0')
|
||||
|
|
|
|||
|
|
@ -352,27 +352,43 @@ class BoxReportClient:
|
|||
|
||||
def search_subfolder(self, parent_folder_id: str, name_pattern: str) -> Optional[Dict]:
|
||||
"""
|
||||
Search for a subfolder by name pattern (case-insensitive) within a parent folder.
|
||||
Search for a subfolder by name pattern within a parent folder.
|
||||
|
||||
Matching is deliberately lenient — folder naming on Box is inconsistent
|
||||
(mixed case, spaces vs underscores vs hyphens, prefix codes etc.). We
|
||||
strip every non-alphanumeric character and lowercase both sides before
|
||||
substring matching, so e.g. all of these find the same folder:
|
||||
|
||||
"Kids Drop 1" → matches "1_CFUL263C01C_Kids drop1"
|
||||
"kidsdrop1" → matches "Kids-Drop-1"
|
||||
"spring 2026" → matches "4023 Spring 2026"
|
||||
|
||||
Two strategies, fast path first:
|
||||
|
||||
1. Box's indexed search API (typically <2s, even for huge parents).
|
||||
May miss recently-created folders (~10 min indexing latency).
|
||||
2. Full pagination of the parent's children (slow on large folders —
|
||||
~1s per 100 items). Capped at ENUM_TIMEOUT_S so we don't sit in
|
||||
a 5+ minute loop when the parent has thousands of children.
|
||||
2. Full pagination of the parent's children (slow on large folders).
|
||||
Capped at ENUM_TIMEOUT_S so we don't sit in a 5+ minute loop.
|
||||
|
||||
Args:
|
||||
parent_folder_id: Box folder ID to search within
|
||||
name_pattern: Text to match in folder name (case-insensitive)
|
||||
name_pattern: Text to match (case- and separator-insensitive)
|
||||
|
||||
Returns:
|
||||
Dict with folder id/name or None
|
||||
"""
|
||||
import re
|
||||
import time
|
||||
|
||||
ENUM_TIMEOUT_S = 60 # cap on slow-path enumeration
|
||||
|
||||
pattern_lower = name_pattern.lower().replace('_', ' ').replace('-', ' ')
|
||||
def _norm(s: str) -> str:
|
||||
"""Lower + strip every non-alphanumeric character."""
|
||||
return re.sub(r'[^a-z0-9]+', '', (s or '').lower())
|
||||
|
||||
pattern_norm = _norm(name_pattern)
|
||||
if not pattern_norm:
|
||||
return None # nothing to search for
|
||||
|
||||
# Strategy 1 (fast): Box's indexed search API.
|
||||
try:
|
||||
|
|
@ -386,8 +402,7 @@ class BoxReportClient:
|
|||
checked = 0
|
||||
for item in results:
|
||||
if item.type == 'folder':
|
||||
name_normalized = item.name.lower().replace('_', ' ').replace('-', ' ')
|
||||
if pattern_lower in name_normalized:
|
||||
if pattern_norm in _norm(item.name):
|
||||
logger.info(f"Search API found subfolder '{item.name}' (ID: {item.id})")
|
||||
return {'id': item.id, 'name': item.name}
|
||||
checked += 1
|
||||
|
|
@ -396,10 +411,7 @@ class BoxReportClient:
|
|||
except Exception as e:
|
||||
logger.warning(f"Search API error for {parent_folder_id}/{name_pattern!r}: {e}")
|
||||
|
||||
# Strategy 2 (slow fallback): paginate the parent's children directly,
|
||||
# in case the target was created within the search-index latency window.
|
||||
# We stream the pagination so we can bail out as soon as we find a match
|
||||
# or hit the wall-clock cap.
|
||||
# Strategy 2 (slow fallback): paginate the parent's children directly.
|
||||
deadline = time.monotonic() + ENUM_TIMEOUT_S
|
||||
try:
|
||||
folder = self.client.folder(parent_folder_id)
|
||||
|
|
@ -414,8 +426,7 @@ class BoxReportClient:
|
|||
page = list(folder.get_items(limit=100, offset=offset))
|
||||
for item in page:
|
||||
if getattr(item, 'type', None) == 'folder':
|
||||
name_normalized = item.name.lower().replace('_', ' ').replace('-', ' ')
|
||||
if pattern_lower in name_normalized:
|
||||
if pattern_norm in _norm(item.name):
|
||||
logger.info(f"Enumeration found subfolder '{item.name}' (ID: {item.id})")
|
||||
return {'id': item.id, 'name': item.name}
|
||||
if len(page) < 100:
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ ENVIRONMENT=development
|
|||
# Box Configuration
|
||||
BOX_CONFIG_PATH=config/box_config.json
|
||||
BOX_REPORT_FOLDER_ID=133295752718
|
||||
BOX_CAMPAIGNS_FOLDER_ID=133295752718
|
||||
BOX_CAMPAIGNS_FOLDER_ID=156182880490
|
||||
|
||||
# Flask
|
||||
FLASK_APP=app:app
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ ENVIRONMENT=production
|
|||
# Box Configuration
|
||||
BOX_CONFIG_PATH=config/box_config.json
|
||||
BOX_REPORT_FOLDER_ID=133295752718
|
||||
BOX_CAMPAIGNS_FOLDER_ID=133295752718
|
||||
BOX_CAMPAIGNS_FOLDER_ID=156182880490
|
||||
|
||||
# Flask
|
||||
FLASK_APP=app:app
|
||||
|
|
|
|||
|
|
@ -29,17 +29,18 @@
|
|||
<div class="card-body">
|
||||
<form id="searchForm">
|
||||
<div class="mb-3">
|
||||
<label for="campaignName" class="form-label">Campaign Number</label>
|
||||
<label for="campaignName" class="form-label">Campaign Title</label>
|
||||
<input type="text" class="form-control form-control-lg" id="campaignName"
|
||||
placeholder="e.g. 1993857" required autofocus>
|
||||
placeholder="e.g. Kids drop 1, Spring 2026, CFUL263C01" required autofocus>
|
||||
<div class="form-text">
|
||||
Enter the campaign number — folders on Box are named by the campaign number.
|
||||
Enter the campaign folder title (or any unique part of it).
|
||||
Matching is case-insensitive and ignores spaces, underscores,
|
||||
and hyphens — "kids drop 1" finds "1_CFUL263C01C_Kids drop1".
|
||||
{% if campaigns_folder_id %}
|
||||
Searches inside Box folder
|
||||
<a href="https://app.box.com/folder/{{ campaigns_folder_id }}" target="_blank">
|
||||
#{{ campaigns_folder_id }}
|
||||
</a>
|
||||
(same root as the Reporting tool).
|
||||
</a>.
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue