Merge pull request #470 from presenton/fix/anthropic-generation-issue

feat: improve handling of Anthropic responses and add retry logic for…
This commit is contained in:
Sudip Parajuli 2026-03-27 17:02:07 +05:45 committed by GitHub
commit e0771b724e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 170 additions and 116 deletions

View file

@ -967,7 +967,10 @@ class LLMClient:
],
)
tool_calls: List[AnthropicToolCall] = []
text_parts: List[str] = []
for content in response.content:
if content.type == "text" and isinstance(content.text, str):
text_parts.append(content.text)
if content.type == "tool_use":
tool_calls.append(
AnthropicToolCall(
@ -1006,6 +1009,24 @@ class LLMClient:
depth=depth + 1,
)
text_content = "".join(text_parts).strip()
if text_content:
try:
return dict(dirtyjson.loads(text_content))
except Exception:
pass
if depth < 2:
await asyncio.sleep(0.4 * (depth + 1))
return await self._generate_anthropic_structured(
model=model,
messages=messages,
max_tokens=max_tokens,
response_format=response_format,
tools=tools,
depth=depth + 1,
)
return None
async def _generate_ollama_structured(
@ -1057,64 +1078,70 @@ class LLMClient:
) -> dict:
parsed_tools = self.tool_calls_handler.parse_tools(tools)
content = None
match self.llm_provider:
case LLMProvider.OPENAI:
content = await self._generate_openai_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.CODEX:
content = await self._generate_codex_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.GOOGLE:
content = await self._generate_google_structured(
model=model,
messages=messages,
response_format=response_format,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.ANTHROPIC:
content = await self._generate_anthropic_structured(
model=model,
messages=messages,
response_format=response_format,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.OLLAMA:
content = await self._generate_ollama_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
case LLMProvider.CUSTOM:
content = await self._generate_custom_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
if content is None:
raise HTTPException(
status_code=400,
detail="LLM did not return any content",
)
return content
for attempt in range(3):
content = None
match self.llm_provider:
case LLMProvider.OPENAI:
content = await self._generate_openai_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.CODEX:
content = await self._generate_codex_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.GOOGLE:
content = await self._generate_google_structured(
model=model,
messages=messages,
response_format=response_format,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.ANTHROPIC:
content = await self._generate_anthropic_structured(
model=model,
messages=messages,
response_format=response_format,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.OLLAMA:
content = await self._generate_ollama_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
case LLMProvider.CUSTOM:
content = await self._generate_custom_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
if content is not None:
return content
if attempt < 2:
await asyncio.sleep(0.5 * (attempt + 1))
raise HTTPException(
status_code=400,
detail="LLM did not return any content",
)
# ? Stream Unstructured Content
async def _stream_openai(

View file

@ -967,7 +967,10 @@ class LLMClient:
],
)
tool_calls: List[AnthropicToolCall] = []
text_parts: List[str] = []
for content in response.content:
if content.type == "text" and isinstance(content.text, str):
text_parts.append(content.text)
if content.type == "tool_use":
tool_calls.append(
AnthropicToolCall(
@ -1006,6 +1009,24 @@ class LLMClient:
depth=depth + 1,
)
text_content = "".join(text_parts).strip()
if text_content:
try:
return dict(dirtyjson.loads(text_content))
except Exception:
pass
if depth < 2:
await asyncio.sleep(0.4 * (depth + 1))
return await self._generate_anthropic_structured(
model=model,
messages=messages,
max_tokens=max_tokens,
response_format=response_format,
tools=tools,
depth=depth + 1,
)
return None
async def _generate_ollama_structured(
@ -1057,64 +1078,70 @@ class LLMClient:
) -> dict:
parsed_tools = self.tool_calls_handler.parse_tools(tools)
content = None
match self.llm_provider:
case LLMProvider.OPENAI:
content = await self._generate_openai_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.CODEX:
content = await self._generate_codex_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.GOOGLE:
content = await self._generate_google_structured(
model=model,
messages=messages,
response_format=response_format,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.ANTHROPIC:
content = await self._generate_anthropic_structured(
model=model,
messages=messages,
response_format=response_format,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.OLLAMA:
content = await self._generate_ollama_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
case LLMProvider.CUSTOM:
content = await self._generate_custom_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
if content is None:
raise HTTPException(
status_code=400,
detail="LLM did not return any content",
)
return content
for attempt in range(3):
content = None
match self.llm_provider:
case LLMProvider.OPENAI:
content = await self._generate_openai_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.CODEX:
content = await self._generate_codex_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.GOOGLE:
content = await self._generate_google_structured(
model=model,
messages=messages,
response_format=response_format,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.ANTHROPIC:
content = await self._generate_anthropic_structured(
model=model,
messages=messages,
response_format=response_format,
tools=parsed_tools,
max_tokens=max_tokens,
)
case LLMProvider.OLLAMA:
content = await self._generate_ollama_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
case LLMProvider.CUSTOM:
content = await self._generate_custom_structured(
model=model,
messages=messages,
response_format=response_format,
strict=strict,
max_tokens=max_tokens,
)
if content is not None:
return content
if attempt < 2:
await asyncio.sleep(0.5 * (attempt + 1))
raise HTTPException(
status_code=400,
detail="LLM did not return any content",
)
# ? Stream Unstructured Content
async def _stream_openai(