diff --git a/servers/fastapi/services/llm_client.py b/servers/fastapi/services/llm_client.py index 448a94b5..ce86f6db 100644 --- a/servers/fastapi/services/llm_client.py +++ b/servers/fastapi/services/llm_client.py @@ -387,7 +387,11 @@ class LLMClient: max_tokens: Optional[int] = None, depth: int = 0, ): - extra_body = {"enable_thinking": not self.disable_thinking()} + extra_body = ( + {"enable_thinking": False} + if self.disable_thinking() + else None + ) return await self._generate_openai( model=model, messages=messages, @@ -754,7 +758,11 @@ class LLMClient: max_tokens: Optional[int] = None, depth: int = 0, ): - extra_body = {"enable_thinking": not self.disable_thinking()} + extra_body = ( + {"enable_thinking": False} + if self.disable_thinking() + else None + ) return await self._generate_openai_structured( model=model, messages=messages, @@ -1071,7 +1079,11 @@ class LLMClient: max_tokens: Optional[int] = None, depth: int = 0, ): - extra_body = {"enable_thinking": not self.disable_thinking()} + extra_body = ( + {"enable_thinking": False} + if self.disable_thinking() + else None + ) return self._stream_openai( model=model, messages=messages, @@ -1484,7 +1496,11 @@ class LLMClient: max_tokens: Optional[int] = None, depth: int = 0, ): - extra_body = {"enable_thinking": not self.disable_thinking()} + extra_body = ( + {"enable_thinking": False} + if self.disable_thinking() + else None + ) return self._stream_openai_structured( model=model, messages=messages,