From 3cb1973f575d5a427a0154b6f5bdba5255e1e33d Mon Sep 17 00:00:00 2001
From: DJP <DJP>
Date: Thu, 9 Apr 2026 15:17:20 -0400
Subject: [PATCH] Fix tier matching: use client tier to pick correct complexity
 variant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Doc parser now extracts tier labels (Tier A, A, Gold, etc.) per asset
- Matching uses tier to find the correct GMAL complexity variant:
  - Claude matches to the GMAL family (asset type)
  - Post-match lookup: (asset_name + target_complexity_level) finds exact variant
  - e.g. "Banner - Tier A" with A=Complex → finds Complex variant by asset_name query
- Tier hint passed to Claude prompt for better matching
- No blind expansion - only the tier-appropriate GMAL is matched
- Expand to Tiers button still available for when client doesn't specify tiers

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 backend/app/api/matching.py         |  1 +
 backend/app/services/ai_matching.py | 56 ++++++++++++++++++++++++++---
 backend/app/services/doc_parser.py  | 10 ++++--
 3 files changed, 60 insertions(+), 7 deletions(-)
diff --git a/backend/app/api/matching.py b/backend/app/api/matching.py
index 061d741..2bccac9 100644
--- a/backend/app/api/matching.py
+++ b/backend/app/api/matching.py
@@ -63,6 +63,7 @@ async def _background_parse(project_id: int, filename: str, text: str, metadata:
                     project_id=project_id,
                     raw_name=item.get("name", "Unknown"),
                     raw_description=item.get("description", ""),
+                    client_tier=item.get("tier", "") or None,
                     volume=volume,
                     sort_order=idx + 1,
                 )
diff --git a/backend/app/services/ai_matching.py b/backend/app/services/ai_matching.py
index db26975..3414ce4 100644
--- a/backend/app/services/ai_matching.py
+++ b/backend/app/services/ai_matching.py
@@ -108,14 +108,18 @@ Guidelines:
 - Be generous with scoring when the match is semantically correct even if the naming differs."""
 
 
-def _match_single_asset(client_asset_name, client_asset_desc, volume, catalog_text, num_assets):
+def _match_single_asset(client_asset_name, client_asset_desc, volume, catalog_text, num_assets, tier_hint=""):
     """Run a single match call to Claude (synchronous, for use in thread pool)."""
+    tier_instruction = ""
+    if tier_hint:
+        tier_instruction = f"\nCLIENT TIER: {tier_hint} — match to the {tier_hint} complexity variant if one exists.\n"
+
     user_msg = f"""Match this client asset to the best GMAL equivalent(s):
 
 CLIENT ASSET:
 Name: {client_asset_name}
 Description: {client_asset_desc or 'No description provided'}
-Volume: {volume}
+Volume: {volume}{tier_instruction}
 
 FULL GMAL CATALOG ({num_assets} assets):
 {catalog_text}"""
@@ -143,9 +147,27 @@ async def match_client_assets(
     """
     _clear_cancel(project_id)
 
+    # Load project tier mapping if set
+    import json as _json
+    from app.models.project import Project
+    proj_result = await db.execute(select(Project).where(Project.id == project_id))
+    project = proj_result.scalar_one_or_none()
+    tier_config = {}
+    if project and project.tier_mapping:
+        try:
+            tier_config = _json.loads(project.tier_mapping)
+        except _json.JSONDecodeError:
+            pass
+    # Build tier label -> complexity_level map
+    tier_to_complexity = {}
+    complexity_map = {"simple": 1, "medium": 2, "mid": 2, "complex": 3}
+    for t in tier_config.get("tiers", []):
+        tier_to_complexity[t["label"].lower()] = complexity_map.get(t["complexity"].lower(), 2)
+
     # Snapshot client asset data before any commits (ORM objects expire after commit)
     asset_snapshots = [
-        {"id": ca.id, "raw_name": ca.raw_name, "raw_description": ca.raw_description, "volume": ca.volume}
+        {"id": ca.id, "raw_name": ca.raw_name, "raw_description": ca.raw_description,
+         "volume": ca.volume, "client_tier": getattr(ca, 'client_tier', None)}
         for ca in client_assets
     ]
 
@@ -155,6 +177,8 @@ async def match_client_assets(
     )
     all_gmals = result.scalars().all()
     gmal_by_id = {g.gmal_id: g for g in all_gmals}
+    # Lookup by (asset_name, complexity_level) for tier variant resolution
+    gmal_by_name_complexity = {(g.asset_name, g.complexity_level): g for g in all_gmals if g.asset_name}
 
     # Build compact catalog once - reused for every match call
     catalog_text = _format_compact_catalog(all_gmals)
@@ -223,6 +247,15 @@ async def match_client_assets(
             for snap in batch:
                 if _is_cancelled(project_id):
                     break
+                # Build tier hint for matching
+                tier_hint = ""
+                client_tier = snap.get("client_tier") or ""
+                if client_tier and tier_to_complexity:
+                    complexity_level = tier_to_complexity.get(client_tier.lower())
+                    if complexity_level:
+                        complexity_names = {1: "Simple", 2: "Medium", 3: "Complex"}
+                        tier_hint = complexity_names.get(complexity_level, "")
+
                 future = loop.run_in_executor(
                     executor,
                     _match_single_asset,
@@ -231,6 +264,7 @@ async def match_client_assets(
                     snap["volume"],
                     catalog_text,
                     len(all_gmals),
+                    tier_hint,
                 )
                 futures.append((snap, future))
 
@@ -263,12 +297,24 @@ async def match_client_assets(
                                 logger.warning(f"Claude returned unknown GMAL ID: {m['gmal_id']}")
                                 continue
 
+                            # If client asset has a tier, find the correct complexity variant
+                            final_gmal = gmal
+                            client_tier = snap.get("client_tier") or ""
+                            if client_tier and tier_to_complexity and gmal.asset_name:
+                                target_complexity = tier_to_complexity.get(client_tier.lower())
+                                if target_complexity and gmal.complexity_level != target_complexity:
+                                    # Find the sibling at the right complexity
+                                    variant = gmal_by_name_complexity.get((gmal.asset_name, target_complexity))
+                                    if variant:
+                                        logger.info(f"Tier remap: '{snap['raw_name']}' ({client_tier}) → {variant.gmal_id} ({variant.complexity_name}) instead of {gmal.gmal_id}")
+                                        final_gmal = variant
+
                             match = Match(
                                 client_asset_id=snap["id"],
-                                gmal_asset_id=gmal.id,
+                                gmal_asset_id=final_gmal.id,
                                 confidence=MatchConfidence(m["confidence"]),
                                 confidence_score=m.get("confidence_score"),
-                                ai_reasoning=m.get("reasoning"),
+                                ai_reasoning=m.get("reasoning") + (f" [Tier {client_tier} → {final_gmal.complexity_name}]" if final_gmal != gmal else ""),
                                 caveat_text=m.get("caveats"),
                                 is_selected=(rank == 1 and auto_select),
                                 rank=rank,
diff --git a/backend/app/services/doc_parser.py b/backend/app/services/doc_parser.py
index 34061c8..3f70a1e 100644
--- a/backend/app/services/doc_parser.py
+++ b/backend/app/services/doc_parser.py
@@ -40,8 +40,12 @@ EXTRACT_TOOLS = [
                                 "type": "integer",
                                 "description": "Number of this asset needed (default 1 if not specified)"
                             },
+                            "tier": {
+                                "type": "string",
+                                "description": "The client's tier/complexity label if specified (e.g. 'Tier A', 'A', 'Gold', '1', 'Premium'). Leave empty string if no tier is specified."
+                            },
                         },
-                        "required": ["name", "description", "complexity_hint", "volume"],
+                        "required": ["name", "description", "complexity_hint", "volume", "tier"],
                     },
                 },
             },
@@ -58,8 +62,10 @@ For each asset, provide:
 - description: What this asset involves based on the document context. Include format, size, channel, and any other relevant details.
 - complexity_hint: Your best estimate of complexity (simple/medium/complex) based on the description. Use "unknown" if unclear.
 - volume: How many of this asset are needed. Default to 1 if not specified.
+- tier: If the client specifies a tier, grade, or complexity label for this asset (e.g. "Tier A", "A", "Gold", "Premium", "1"), include it exactly as written. If the document has columns like A/B/C or Tier 1/2/3, extract those labels. Leave empty string if no tier is specified.
 
-Be thorough - extract every distinct asset type mentioned. If the same asset appears at different complexity levels, list them separately.
+Be thorough - extract every distinct asset type mentioned. If the same asset appears at different tiers or complexity levels, list them as SEPARATE entries with their respective tier labels.
+Do NOT combine different asset types into one entry.
 Do NOT combine different asset types into one entry."""