From 3cb1973f575d5a427a0154b6f5bdba5255e1e33d Mon Sep 17 00:00:00 2001 From: DJP Date: Thu, 9 Apr 2026 15:17:20 -0400 Subject: [PATCH] Fix tier matching: use client tier to pick correct complexity variant MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Doc parser now extracts tier labels (Tier A, A, Gold, etc.) per asset - Matching uses tier to find the correct GMAL complexity variant: - Claude matches to the GMAL family (asset type) - Post-match lookup: (asset_name + target_complexity_level) finds exact variant - e.g. "Banner - Tier A" with A=Complex → finds Complex variant by asset_name query - Tier hint passed to Claude prompt for better matching - No blind expansion - only the tier-appropriate GMAL is matched - Expand to Tiers button still available for when client doesn't specify tiers Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/app/api/matching.py | 1 + backend/app/services/ai_matching.py | 56 ++++++++++++++++++++++++++--- backend/app/services/doc_parser.py | 10 ++++-- 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/backend/app/api/matching.py b/backend/app/api/matching.py index 061d741..2bccac9 100644 --- a/backend/app/api/matching.py +++ b/backend/app/api/matching.py @@ -63,6 +63,7 @@ async def _background_parse(project_id: int, filename: str, text: str, metadata: project_id=project_id, raw_name=item.get("name", "Unknown"), raw_description=item.get("description", ""), + client_tier=item.get("tier", "") or None, volume=volume, sort_order=idx + 1, ) diff --git a/backend/app/services/ai_matching.py b/backend/app/services/ai_matching.py index db26975..3414ce4 100644 --- a/backend/app/services/ai_matching.py +++ b/backend/app/services/ai_matching.py @@ -108,14 +108,18 @@ Guidelines: - Be generous with scoring when the match is semantically correct even if the naming differs.""" -def _match_single_asset(client_asset_name, client_asset_desc, volume, catalog_text, num_assets): +def _match_single_asset(client_asset_name, client_asset_desc, volume, catalog_text, num_assets, tier_hint=""): """Run a single match call to Claude (synchronous, for use in thread pool).""" + tier_instruction = "" + if tier_hint: + tier_instruction = f"\nCLIENT TIER: {tier_hint} — match to the {tier_hint} complexity variant if one exists.\n" + user_msg = f"""Match this client asset to the best GMAL equivalent(s): CLIENT ASSET: Name: {client_asset_name} Description: {client_asset_desc or 'No description provided'} -Volume: {volume} +Volume: {volume}{tier_instruction} FULL GMAL CATALOG ({num_assets} assets): {catalog_text}""" @@ -143,9 +147,27 @@ async def match_client_assets( """ _clear_cancel(project_id) + # Load project tier mapping if set + import json as _json + from app.models.project import Project + proj_result = await db.execute(select(Project).where(Project.id == project_id)) + project = proj_result.scalar_one_or_none() + tier_config = {} + if project and project.tier_mapping: + try: + tier_config = _json.loads(project.tier_mapping) + except _json.JSONDecodeError: + pass + # Build tier label -> complexity_level map + tier_to_complexity = {} + complexity_map = {"simple": 1, "medium": 2, "mid": 2, "complex": 3} + for t in tier_config.get("tiers", []): + tier_to_complexity[t["label"].lower()] = complexity_map.get(t["complexity"].lower(), 2) + # Snapshot client asset data before any commits (ORM objects expire after commit) asset_snapshots = [ - {"id": ca.id, "raw_name": ca.raw_name, "raw_description": ca.raw_description, "volume": ca.volume} + {"id": ca.id, "raw_name": ca.raw_name, "raw_description": ca.raw_description, + "volume": ca.volume, "client_tier": getattr(ca, 'client_tier', None)} for ca in client_assets ] @@ -155,6 +177,8 @@ async def match_client_assets( ) all_gmals = result.scalars().all() gmal_by_id = {g.gmal_id: g for g in all_gmals} + # Lookup by (asset_name, complexity_level) for tier variant resolution + gmal_by_name_complexity = {(g.asset_name, g.complexity_level): g for g in all_gmals if g.asset_name} # Build compact catalog once - reused for every match call catalog_text = _format_compact_catalog(all_gmals) @@ -223,6 +247,15 @@ async def match_client_assets( for snap in batch: if _is_cancelled(project_id): break + # Build tier hint for matching + tier_hint = "" + client_tier = snap.get("client_tier") or "" + if client_tier and tier_to_complexity: + complexity_level = tier_to_complexity.get(client_tier.lower()) + if complexity_level: + complexity_names = {1: "Simple", 2: "Medium", 3: "Complex"} + tier_hint = complexity_names.get(complexity_level, "") + future = loop.run_in_executor( executor, _match_single_asset, @@ -231,6 +264,7 @@ async def match_client_assets( snap["volume"], catalog_text, len(all_gmals), + tier_hint, ) futures.append((snap, future)) @@ -263,12 +297,24 @@ async def match_client_assets( logger.warning(f"Claude returned unknown GMAL ID: {m['gmal_id']}") continue + # If client asset has a tier, find the correct complexity variant + final_gmal = gmal + client_tier = snap.get("client_tier") or "" + if client_tier and tier_to_complexity and gmal.asset_name: + target_complexity = tier_to_complexity.get(client_tier.lower()) + if target_complexity and gmal.complexity_level != target_complexity: + # Find the sibling at the right complexity + variant = gmal_by_name_complexity.get((gmal.asset_name, target_complexity)) + if variant: + logger.info(f"Tier remap: '{snap['raw_name']}' ({client_tier}) → {variant.gmal_id} ({variant.complexity_name}) instead of {gmal.gmal_id}") + final_gmal = variant + match = Match( client_asset_id=snap["id"], - gmal_asset_id=gmal.id, + gmal_asset_id=final_gmal.id, confidence=MatchConfidence(m["confidence"]), confidence_score=m.get("confidence_score"), - ai_reasoning=m.get("reasoning"), + ai_reasoning=m.get("reasoning") + (f" [Tier {client_tier} → {final_gmal.complexity_name}]" if final_gmal != gmal else ""), caveat_text=m.get("caveats"), is_selected=(rank == 1 and auto_select), rank=rank, diff --git a/backend/app/services/doc_parser.py b/backend/app/services/doc_parser.py index 34061c8..3f70a1e 100644 --- a/backend/app/services/doc_parser.py +++ b/backend/app/services/doc_parser.py @@ -40,8 +40,12 @@ EXTRACT_TOOLS = [ "type": "integer", "description": "Number of this asset needed (default 1 if not specified)" }, + "tier": { + "type": "string", + "description": "The client's tier/complexity label if specified (e.g. 'Tier A', 'A', 'Gold', '1', 'Premium'). Leave empty string if no tier is specified." + }, }, - "required": ["name", "description", "complexity_hint", "volume"], + "required": ["name", "description", "complexity_hint", "volume", "tier"], }, }, }, @@ -58,8 +62,10 @@ For each asset, provide: - description: What this asset involves based on the document context. Include format, size, channel, and any other relevant details. - complexity_hint: Your best estimate of complexity (simple/medium/complex) based on the description. Use "unknown" if unclear. - volume: How many of this asset are needed. Default to 1 if not specified. +- tier: If the client specifies a tier, grade, or complexity label for this asset (e.g. "Tier A", "A", "Gold", "Premium", "1"), include it exactly as written. If the document has columns like A/B/C or Tier 1/2/3, extract those labels. Leave empty string if no tier is specified. -Be thorough - extract every distinct asset type mentioned. If the same asset appears at different complexity levels, list them separately. +Be thorough - extract every distinct asset type mentioned. If the same asset appears at different tiers or complexity levels, list them as SEPARATE entries with their respective tier labels. +Do NOT combine different asset types into one entry. Do NOT combine different asset types into one entry."""