From 09441a20b8f67878c55bf248caac5de643fc0a83 Mon Sep 17 00:00:00 2001
From: DJP <DJP>
Date: Sun, 12 Apr 2026 16:51:08 -0400
Subject: [PATCH] =?UTF-8?q?Fix=20deep=20extraction=20max=5Ftokens:=2016k?=
 =?UTF-8?q?=E2=86=9232k,=20shorter=20descriptions?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Root cause: stop_reason=max_tokens - Claude ran out of output tokens
before finishing the tool call JSON for 50+ assets.

Fix:
- Bump max_tokens from 16000 to 32000 for both normal and deep extraction
- Tell Claude to keep descriptions SHORT (1 sentence max)
- Reduce input data to 35k chars (from 40k) to leave more room for output
- Better stop_reason logging on normal extraction too

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 backend/app/services/doc_parser.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)
diff --git a/backend/app/services/doc_parser.py b/backend/app/services/doc_parser.py
index be40525..2854f8f 100644
--- a/backend/app/services/doc_parser.py
+++ b/backend/app/services/doc_parser.py
@@ -109,14 +109,15 @@ def parse_text_with_ai(text: str) -> tuple[list[dict], dict]:
         user_message=f"Extract all deliverable assets from this client document:\n\n{text}",
         tools=EXTRACT_TOOLS,
         tool_choice={"type": "tool", "name": "extract_assets"},
-        max_tokens=16000,
+        max_tokens=32000,
     )
 
     usage_info = getattr(response, '_usage_info', {"input_tokens": 0, "output_tokens": 0, "cost_usd": 0})
 
+    stop_reason = getattr(response, 'stop_reason', 'unknown')
     result = extract_tool_result(response)
     if not result or "assets" not in result:
-        logger.warning("Claude did not return structured asset data, response: %s", extract_text(response))
+        logger.warning(f"Claude did not return structured asset data. stop_reason={stop_reason}, response: %s", extract_text(response)[:500])
         return [], usage_info
 
     return result["assets"], usage_info
@@ -287,18 +288,20 @@ IMPORTANT GUIDELINES:
 - Skip rows that are questions, metadata, or caveats — those are not deliverables
 - If volume is 0 or "No", still extract the asset but set volume to 0
 - Carry forward category names from merged cells (the analysis explains the hierarchy)
-- You MUST call the extract_assets tool with at least one asset. If you cannot find structured assets, extract the best candidates you can identify.
+- You MUST call the extract_assets tool with at least one asset
+- Keep descriptions SHORT (1 sentence max) to stay within output limits
+- For tier/volume, just state the numbers concisely
 
 Now extract all deliverable assets from this data:
 
-{text[:40000]}"""
+{text[:35000]}"""
 
     response = call_claude(
         system=SYSTEM_PROMPT,
         user_message=guided_prompt,
         tools=EXTRACT_TOOLS,
         tool_choice={"type": "tool", "name": "extract_assets"},
-        max_tokens=16000,
+        max_tokens=32000,
     )
 
     usage = getattr(response, '_usage_info', {"input_tokens": 0, "output_tokens": 0, "cost_usd": 0})