diff --git a/backend/app/services/doc_parser.py b/backend/app/services/doc_parser.py index be40525..2854f8f 100644 --- a/backend/app/services/doc_parser.py +++ b/backend/app/services/doc_parser.py @@ -109,14 +109,15 @@ def parse_text_with_ai(text: str) -> tuple[list[dict], dict]: user_message=f"Extract all deliverable assets from this client document:\n\n{text}", tools=EXTRACT_TOOLS, tool_choice={"type": "tool", "name": "extract_assets"}, - max_tokens=16000, + max_tokens=32000, ) usage_info = getattr(response, '_usage_info', {"input_tokens": 0, "output_tokens": 0, "cost_usd": 0}) + stop_reason = getattr(response, 'stop_reason', 'unknown') result = extract_tool_result(response) if not result or "assets" not in result: - logger.warning("Claude did not return structured asset data, response: %s", extract_text(response)) + logger.warning(f"Claude did not return structured asset data. stop_reason={stop_reason}, response: %s", extract_text(response)[:500]) return [], usage_info return result["assets"], usage_info @@ -287,18 +288,20 @@ IMPORTANT GUIDELINES: - Skip rows that are questions, metadata, or caveats — those are not deliverables - If volume is 0 or "No", still extract the asset but set volume to 0 - Carry forward category names from merged cells (the analysis explains the hierarchy) -- You MUST call the extract_assets tool with at least one asset. If you cannot find structured assets, extract the best candidates you can identify. +- You MUST call the extract_assets tool with at least one asset +- Keep descriptions SHORT (1 sentence max) to stay within output limits +- For tier/volume, just state the numbers concisely Now extract all deliverable assets from this data: -{text[:40000]}""" +{text[:35000]}""" response = call_claude( system=SYSTEM_PROMPT, user_message=guided_prompt, tools=EXTRACT_TOOLS, tool_choice={"type": "tool", "name": "extract_assets"}, - max_tokens=16000, + max_tokens=32000, ) usage = getattr(response, '_usage_info', {"input_tokens": 0, "output_tokens": 0, "cost_usd": 0})