From 6b8b8ea5a6197ab3b4885e61cf367a66a772baee Mon Sep 17 00:00:00 2001 From: nickviljoen Date: Sat, 9 May 2026 20:19:35 +0200 Subject: [PATCH] Video Master: revert campaigns folder + lenient name matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The earlier swap to BOX_CAMPAIGNS_FOLDER_ID=133295752718 was wrong — Video Master operates on the automation campaigns folder (156182880490), where subfolders are named by campaign TITLE rather than the numeric job ID used in Reporting's root. Reverted the default in config.py and all three .env example files. Folder naming on Box is inconsistent — '1_CFUL263C01C_Kids drop1' vs '1_CFUL263C01F-Kids drop 2' vs 'Summer Activation 2026' all coexist. search_subfolder now strips every non-alphanumeric character from both the search input and the folder names before substring match, so: "kids drop 1" → matches "1_CFUL263C01C_Kids drop1" "Spring 2026" → matches "4023 Spring 2026" "winterfilm" → matches "1_WA20263C01 Winter Film" Form label/placeholder updated to "Campaign Title" with a hint that spaces/underscores/hyphens/case are all ignored. --- .env.example | 2 +- config.py | 7 ++-- core/services/box_client.py | 39 ++++++++++++------- deploy/.env.dev.example | 2 +- deploy/.env.prod.example | 2 +- .../templates/video_master/match.html | 11 +++--- 6 files changed, 38 insertions(+), 25 deletions(-) diff --git a/.env.example b/.env.example index 1b557d0..7f726bd 100644 --- a/.env.example +++ b/.env.example @@ -9,7 +9,7 @@ ENVIRONMENT=development # Box Configuration BOX_CONFIG_PATH=config/box_config.json BOX_REPORT_FOLDER_ID=133295752718 -BOX_CAMPAIGNS_FOLDER_ID=133295752718 +BOX_CAMPAIGNS_FOLDER_ID=156182880490 # Flask Configuration FLASK_APP=app:app diff --git a/config.py b/config.py index 6490e42..18ac4ae 100644 --- a/config.py +++ b/config.py @@ -17,9 +17,10 @@ class Config: # Box BOX_CONFIG_PATH = os.environ.get('BOX_CONFIG_PATH', 'config/box_config.json') BOX_REPORT_FOLDER_ID = os.environ.get('BOX_REPORT_FOLDER_ID', '133295752718') - # Same Box folder as REPORT — the Reporting and Video Master tools both - # search inside this root for campaign-named subfolders. - BOX_CAMPAIGNS_FOLDER_ID = os.environ.get('BOX_CAMPAIGNS_FOLDER_ID', '133295752718') + # Different folder root from REPORT — Video Master operates on the + # automation campaigns folder where each subfolder is named by + # campaign title (not by job number like the Reporting tool's root). + BOX_CAMPAIGNS_FOLDER_ID = os.environ.get('BOX_CAMPAIGNS_FOLDER_ID', '156182880490') # Server HOST = os.environ.get('HOST', '0.0.0.0') diff --git a/core/services/box_client.py b/core/services/box_client.py index a282eb7..0b3d2ea 100644 --- a/core/services/box_client.py +++ b/core/services/box_client.py @@ -352,27 +352,43 @@ class BoxReportClient: def search_subfolder(self, parent_folder_id: str, name_pattern: str) -> Optional[Dict]: """ - Search for a subfolder by name pattern (case-insensitive) within a parent folder. + Search for a subfolder by name pattern within a parent folder. + + Matching is deliberately lenient — folder naming on Box is inconsistent + (mixed case, spaces vs underscores vs hyphens, prefix codes etc.). We + strip every non-alphanumeric character and lowercase both sides before + substring matching, so e.g. all of these find the same folder: + + "Kids Drop 1" → matches "1_CFUL263C01C_Kids drop1" + "kidsdrop1" → matches "Kids-Drop-1" + "spring 2026" → matches "4023 Spring 2026" Two strategies, fast path first: 1. Box's indexed search API (typically <2s, even for huge parents). May miss recently-created folders (~10 min indexing latency). - 2. Full pagination of the parent's children (slow on large folders — - ~1s per 100 items). Capped at ENUM_TIMEOUT_S so we don't sit in - a 5+ minute loop when the parent has thousands of children. + 2. Full pagination of the parent's children (slow on large folders). + Capped at ENUM_TIMEOUT_S so we don't sit in a 5+ minute loop. Args: parent_folder_id: Box folder ID to search within - name_pattern: Text to match in folder name (case-insensitive) + name_pattern: Text to match (case- and separator-insensitive) Returns: Dict with folder id/name or None """ + import re import time + ENUM_TIMEOUT_S = 60 # cap on slow-path enumeration - pattern_lower = name_pattern.lower().replace('_', ' ').replace('-', ' ') + def _norm(s: str) -> str: + """Lower + strip every non-alphanumeric character.""" + return re.sub(r'[^a-z0-9]+', '', (s or '').lower()) + + pattern_norm = _norm(name_pattern) + if not pattern_norm: + return None # nothing to search for # Strategy 1 (fast): Box's indexed search API. try: @@ -386,8 +402,7 @@ class BoxReportClient: checked = 0 for item in results: if item.type == 'folder': - name_normalized = item.name.lower().replace('_', ' ').replace('-', ' ') - if pattern_lower in name_normalized: + if pattern_norm in _norm(item.name): logger.info(f"Search API found subfolder '{item.name}' (ID: {item.id})") return {'id': item.id, 'name': item.name} checked += 1 @@ -396,10 +411,7 @@ class BoxReportClient: except Exception as e: logger.warning(f"Search API error for {parent_folder_id}/{name_pattern!r}: {e}") - # Strategy 2 (slow fallback): paginate the parent's children directly, - # in case the target was created within the search-index latency window. - # We stream the pagination so we can bail out as soon as we find a match - # or hit the wall-clock cap. + # Strategy 2 (slow fallback): paginate the parent's children directly. deadline = time.monotonic() + ENUM_TIMEOUT_S try: folder = self.client.folder(parent_folder_id) @@ -414,8 +426,7 @@ class BoxReportClient: page = list(folder.get_items(limit=100, offset=offset)) for item in page: if getattr(item, 'type', None) == 'folder': - name_normalized = item.name.lower().replace('_', ' ').replace('-', ' ') - if pattern_lower in name_normalized: + if pattern_norm in _norm(item.name): logger.info(f"Enumeration found subfolder '{item.name}' (ID: {item.id})") return {'id': item.id, 'name': item.name} if len(page) < 100: diff --git a/deploy/.env.dev.example b/deploy/.env.dev.example index 9b60016..3fbb0e2 100644 --- a/deploy/.env.dev.example +++ b/deploy/.env.dev.example @@ -10,7 +10,7 @@ ENVIRONMENT=development # Box Configuration BOX_CONFIG_PATH=config/box_config.json BOX_REPORT_FOLDER_ID=133295752718 -BOX_CAMPAIGNS_FOLDER_ID=133295752718 +BOX_CAMPAIGNS_FOLDER_ID=156182880490 # Flask FLASK_APP=app:app diff --git a/deploy/.env.prod.example b/deploy/.env.prod.example index f6f9625..bbc36f6 100644 --- a/deploy/.env.prod.example +++ b/deploy/.env.prod.example @@ -10,7 +10,7 @@ ENVIRONMENT=production # Box Configuration BOX_CONFIG_PATH=config/box_config.json BOX_REPORT_FOLDER_ID=133295752718 -BOX_CAMPAIGNS_FOLDER_ID=133295752718 +BOX_CAMPAIGNS_FOLDER_ID=156182880490 # Flask FLASK_APP=app:app diff --git a/modules/video_master/templates/video_master/match.html b/modules/video_master/templates/video_master/match.html index 378ca89..358ef57 100644 --- a/modules/video_master/templates/video_master/match.html +++ b/modules/video_master/templates/video_master/match.html @@ -29,17 +29,18 @@
- + + placeholder="e.g. Kids drop 1, Spring 2026, CFUL263C01" required autofocus>
- Enter the campaign number — folders on Box are named by the campaign number. + Enter the campaign folder title (or any unique part of it). + Matching is case-insensitive and ignores spaces, underscores, + and hyphens — "kids drop 1" finds "1_CFUL263C01C_Kids drop1". {% if campaigns_folder_id %} Searches inside Box folder #{{ campaigns_folder_id }} - - (same root as the Reporting tool). + . {% endif %}