From 9f50f03b64bbaaca2b0254c9b800eb34f7c78112 Mon Sep 17 00:00:00 2001 From: g97iulio1609 Date: Sat, 28 Feb 2026 21:10:32 +0100 Subject: [PATCH 1/5] fix: preserve reasoning content signature from LiteLLM thinking models When streaming responses from thinking models (e.g., Gemini) via LiteLLM, the reasoning content signature was silently dropped. The LiteLLM adapter's _process_choice_content only emitted reasoning text from delta.reasoning_content but never captured the signature from delta.thinking.signature. This patch adds a check for the thinking.signature attribute on each streaming delta and emits a contentBlockDelta with reasoningContent.signature so the event loop can accumulate and store the signature in the final content block. An isinstance(sig, str) guard prevents unittest.mock.Mock objects from triggering false positives during testing. Fixes #1764 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- src/strands/models/litellm.py | 12 ++++++ tests/strands/models/test_litellm.py | 58 ++++++++++++++++++++++++++++ 2 files changed, 70 insertions(+) diff --git a/src/strands/models/litellm.py b/src/strands/models/litellm.py index be5337f0d..e7d3255b4 100644 --- a/src/strands/models/litellm.py +++ b/src/strands/models/litellm.py @@ -410,6 +410,18 @@ async def _process_choice_content( ) yield data_type, chunk + # Process reasoning signature from LiteLLM's thinking attribute + thinking = getattr(content_source, "thinking", None) + if thinking is not None: + sig = getattr(thinking, "signature", None) + if isinstance(sig, str) and sig: + chunks, data_type = self._stream_switch_content("reasoning_content", data_type) + for chunk in chunks: + yield data_type, chunk + yield data_type, { + "contentBlockDelta": {"delta": {"reasoningContent": {"signature": sig}}} + } + # Process text content if hasattr(content_source, "content") and content_source.content: chunks, data_type = self._stream_switch_content("text", data_type) diff --git a/tests/strands/models/test_litellm.py b/tests/strands/models/test_litellm.py index 9bb0e09ca..d74053a8c 100644 --- a/tests/strands/models/test_litellm.py +++ b/tests/strands/models/test_litellm.py @@ -848,3 +848,61 @@ def test_format_request_messages_with_tool_calls_no_content(): }, ] assert tru_result == exp_result + + +@pytest.mark.asyncio +async def test_stream_preserves_thinking_signature(litellm_acompletion, api_key, model_id, model, agenerator, alist): + """Test that reasoning content signatures from LiteLLM thinking attribute are preserved. + + Gemini thinking models send thought_signature that must be preserved in the conversation + history for subsequent requests to succeed. LiteLLM provides signatures via the `thinking` + attribute on streaming deltas. + """ + + class MockStreamChunk: + def __init__(self, choices=None): + self.choices = choices or [] + + # First chunk: reasoning text (no signature yet) + mock_thinking_no_sig = unittest.mock.Mock() + mock_thinking_no_sig.signature = None + mock_delta_1 = unittest.mock.Mock( + content=None, tool_calls=None, reasoning_content="Let me think...", thinking=mock_thinking_no_sig + ) + + # Second chunk: signature arrives via thinking attribute + mock_thinking_with_sig = unittest.mock.Mock() + mock_thinking_with_sig.signature = "base64encodedSignature==" + mock_delta_2 = unittest.mock.Mock( + content=None, tool_calls=None, reasoning_content=None, thinking=mock_thinking_with_sig + ) + + # Third chunk: text response + mock_delta_3 = unittest.mock.Mock(content="The answer is 42.", tool_calls=None, reasoning_content=None) + mock_delta_3.thinking = None + + mock_event_1 = MockStreamChunk(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_1)]) + mock_event_2 = MockStreamChunk(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_2)]) + mock_event_3 = MockStreamChunk(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta_3)]) + mock_event_4 = MockStreamChunk(choices=[]) + + litellm_acompletion.side_effect = unittest.mock.AsyncMock( + return_value=agenerator([mock_event_1, mock_event_2, mock_event_3, mock_event_4]) + ) + + messages = [{"role": "user", "content": [{"type": "text", "text": "Think about 42"}]}] + response = model.stream(messages) + events = await alist(response) + + # Verify reasoning content signature is emitted + signature_deltas = [ + e + for e in events + if "contentBlockDelta" in e + and "reasoningContent" in e["contentBlockDelta"]["delta"] + and "signature" in e["contentBlockDelta"]["delta"]["reasoningContent"] + ] + assert len(signature_deltas) == 1 + assert signature_deltas[0]["contentBlockDelta"]["delta"]["reasoningContent"]["signature"] == ( + "base64encodedSignature==" + ) From 275b75d83162705d70f8b6b54d1d04c1a670a1f1 Mon Sep 17 00:00:00 2001 From: giulio-leone <6887247+giulio-leone@users.noreply.github.com> Date: Fri, 6 Mar 2026 03:52:03 +0100 Subject: [PATCH 2/5] test: add test for signature arrival with unset data_type Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/strands/models/test_litellm.py | 72 ++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) diff --git a/tests/strands/models/test_litellm.py b/tests/strands/models/test_litellm.py index d74053a8c..d21e16f1f 100644 --- a/tests/strands/models/test_litellm.py +++ b/tests/strands/models/test_litellm.py @@ -906,3 +906,75 @@ def __init__(self, choices=None): assert signature_deltas[0]["contentBlockDelta"]["delta"]["reasoningContent"]["signature"] == ( "base64encodedSignature==" ) + + +@pytest.mark.asyncio +async def test_stream_signature_arrives_with_unset_data_type( + litellm_acompletion, api_key, model_id, model, agenerator, alist +): + """Test that a reasoning signature arriving before any reasoning text triggers content switching. + + When the signature is the first chunk (data_type is None), _stream_switch_content should emit + content_start/content_stop events for the reasoning_content block. + """ + + class MockStreamChunk: + def __init__(self, choices=None): + self.choices = choices or [] + + # First chunk: signature arrives immediately (no prior reasoning text) + mock_thinking_with_sig = unittest.mock.Mock() + mock_thinking_with_sig.signature = "earlySignature==" + mock_delta_1 = unittest.mock.Mock( + content=None, tool_calls=None, reasoning_content=None, thinking=mock_thinking_with_sig + ) + + # Second chunk: text response + mock_delta_2 = unittest.mock.Mock(content="The answer is 42.", tool_calls=None, reasoning_content=None) + mock_delta_2.thinking = None + + mock_event_1 = MockStreamChunk(choices=[unittest.mock.Mock(finish_reason=None, delta=mock_delta_1)]) + mock_event_2 = MockStreamChunk(choices=[unittest.mock.Mock(finish_reason="stop", delta=mock_delta_2)]) + mock_event_3 = MockStreamChunk(choices=[]) + + litellm_acompletion.side_effect = unittest.mock.AsyncMock( + return_value=agenerator([mock_event_1, mock_event_2, mock_event_3]) + ) + + messages = [{"role": "user", "content": [{"type": "text", "text": "Think about 42"}]}] + response = model.stream(messages) + events = await alist(response) + + # Verify content_start was emitted before the signature (content type switch from None) + # When signature arrives first (data_type=None), _stream_switch_content emits content_start + content_starts = [e for e in events if "contentBlockStart" in e] + content_stops = [e for e in events if "contentBlockStop" in e] + assert len(content_starts) == 2 # one for reasoning_content, one for text + assert len(content_stops) == 2 # one for reasoning_content, one for text + + # Verify the signature content block appears before text: + # event order should be: messageStart, contentBlockStart, signature delta, contentBlockStop, + # contentBlockStart, text delta, contentBlockStop, messageStop + sig_index = next(i for i, e in enumerate(events) if "contentBlockDelta" in e and "reasoningContent" in e["contentBlockDelta"]["delta"]) + text_index = next(i for i, e in enumerate(events) if "contentBlockDelta" in e and "text" in e.get("contentBlockDelta", {}).get("delta", {})) + assert sig_index < text_index + + # Verify signature delta is emitted + signature_deltas = [ + e + for e in events + if "contentBlockDelta" in e + and "reasoningContent" in e["contentBlockDelta"]["delta"] + and "signature" in e["contentBlockDelta"]["delta"]["reasoningContent"] + ] + assert len(signature_deltas) == 1 + assert signature_deltas[0]["contentBlockDelta"]["delta"]["reasoningContent"]["signature"] == "earlySignature==" + + # Verify text content follows after reasoning + text_deltas = [ + e + for e in events + if "contentBlockDelta" in e and "text" in e.get("contentBlockDelta", {}).get("delta", {}) + ] + assert len(text_deltas) == 1 + assert text_deltas[0]["contentBlockDelta"]["delta"]["text"] == "The answer is 42." From f6b81af450c662d006a082dfea822e27d8a6d834 Mon Sep 17 00:00:00 2001 From: Giulio Leone <6887247+giulio-leone@users.noreply.github.com> Date: Fri, 6 Mar 2026 17:32:01 +0100 Subject: [PATCH 3/5] fix: resolve E501 line-too-long lint errors in test_litellm.py Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests/strands/models/test_litellm.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/tests/strands/models/test_litellm.py b/tests/strands/models/test_litellm.py index d21e16f1f..d989b0480 100644 --- a/tests/strands/models/test_litellm.py +++ b/tests/strands/models/test_litellm.py @@ -955,8 +955,14 @@ def __init__(self, choices=None): # Verify the signature content block appears before text: # event order should be: messageStart, contentBlockStart, signature delta, contentBlockStop, # contentBlockStart, text delta, contentBlockStop, messageStop - sig_index = next(i for i, e in enumerate(events) if "contentBlockDelta" in e and "reasoningContent" in e["contentBlockDelta"]["delta"]) - text_index = next(i for i, e in enumerate(events) if "contentBlockDelta" in e and "text" in e.get("contentBlockDelta", {}).get("delta", {})) + sig_index = next( + i for i, e in enumerate(events) + if "contentBlockDelta" in e and "reasoningContent" in e["contentBlockDelta"]["delta"] + ) + text_index = next( + i for i, e in enumerate(events) + if "contentBlockDelta" in e and "text" in e.get("contentBlockDelta", {}).get("delta", {}) + ) assert sig_index < text_index # Verify signature delta is emitted From 5056a2e15cb379b2991573b218517556eda039ad Mon Sep 17 00:00:00 2001 From: Giulio Leone <6887247+giulio-leone@users.noreply.github.com> Date: Fri, 6 Mar 2026 20:01:02 +0100 Subject: [PATCH 4/5] test: add litellm integ test for reasoning and signature handling Add parametrized integration test (streaming + non-streaming) that verifies reasoning content and signatures are preserved in model responses when extended thinking is enabled via the LiteLLM adapter. Addresses review feedback from zastrowm on PR #1789. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests_integ/models/test_model_litellm.py | 41 ++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests_integ/models/test_model_litellm.py b/tests_integ/models/test_model_litellm.py index eb0737e0f..f28c54fcf 100644 --- a/tests_integ/models/test_model_litellm.py +++ b/tests_integ/models/test_model_litellm.py @@ -154,6 +154,47 @@ def test_agent_invoke_reasoning(agent, model): assert result.message["content"][0]["reasoningContent"]["reasoningText"]["text"] +@pytest.mark.parametrize("model_fixture", ["streaming_model", "non_streaming_model"]) +def test_agent_reasoning_with_signature(model_fixture, tools, request): + """Test that reasoning content and signatures are preserved in responses. + + Verifies the full reasoning pipeline: reasoning text is returned with + a non-empty signature, ensuring that the model provider's thought + signatures are correctly propagated through the LiteLLM adapter. + """ + model = request.getfixturevalue(model_fixture) + model.update_config( + params={ + "thinking": { + "budget_tokens": 1024, + "type": "enabled", + }, + }, + ) + + agent = Agent(model=model, tools=tools) + result = agent("What is the time and weather? Think step by step.") + + # Find the reasoning content block in the response + reasoning_blocks = [ + block + for block in result.message["content"] + if "reasoningContent" in block + ] + assert reasoning_blocks, "Expected at least one reasoningContent block" + + reasoning_block = reasoning_blocks[0] + reasoning_text = reasoning_block["reasoningContent"]["reasoningText"] + + # Verify reasoning text is present and non-empty + assert reasoning_text.get("text"), "Reasoning text should be non-empty" + + # Verify signature is present and non-empty + assert reasoning_text.get("signature"), ( + "Reasoning signature should be present and non-empty" + ) + + @pytest.mark.parametrize("model_fixture", ["streaming_model", "non_streaming_model"]) def test_structured_output(model_fixture, weather, request): model = request.getfixturevalue(model_fixture) From 59ceafafa8455a4206eb746ad23eeea7b6b7ab09 Mon Sep 17 00:00:00 2001 From: giulio-leone Date: Sat, 7 Mar 2026 06:48:30 +0100 Subject: [PATCH 5/5] fix: remove tools from reasoning test to avoid Bedrock thinking constraint When thinking is enabled with tools, Bedrock requires thinking blocks in assistant messages on subsequent turns. Since this test only needs to verify reasoning + signature preservation, tools are not needed. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --- tests_integ/models/test_model_litellm.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/tests_integ/models/test_model_litellm.py b/tests_integ/models/test_model_litellm.py index f28c54fcf..e14bc7787 100644 --- a/tests_integ/models/test_model_litellm.py +++ b/tests_integ/models/test_model_litellm.py @@ -155,12 +155,16 @@ def test_agent_invoke_reasoning(agent, model): @pytest.mark.parametrize("model_fixture", ["streaming_model", "non_streaming_model"]) -def test_agent_reasoning_with_signature(model_fixture, tools, request): +def test_agent_reasoning_with_signature(model_fixture, request): """Test that reasoning content and signatures are preserved in responses. Verifies the full reasoning pipeline: reasoning text is returned with a non-empty signature, ensuring that the model provider's thought signatures are correctly propagated through the LiteLLM adapter. + + Note: tools are intentionally excluded to avoid the Bedrock constraint + that multi-turn thinking+tools requires thinking blocks in assistant + messages on subsequent turns. """ model = request.getfixturevalue(model_fixture) model.update_config( @@ -172,15 +176,11 @@ def test_agent_reasoning_with_signature(model_fixture, tools, request): }, ) - agent = Agent(model=model, tools=tools) - result = agent("What is the time and weather? Think step by step.") + agent = Agent(model=model) + result = agent("What is 2 + 2? Think step by step.") # Find the reasoning content block in the response - reasoning_blocks = [ - block - for block in result.message["content"] - if "reasoningContent" in block - ] + reasoning_blocks = [block for block in result.message["content"] if "reasoningContent" in block] assert reasoning_blocks, "Expected at least one reasoningContent block" reasoning_block = reasoning_blocks[0] @@ -190,9 +190,7 @@ def test_agent_reasoning_with_signature(model_fixture, tools, request): assert reasoning_text.get("text"), "Reasoning text should be non-empty" # Verify signature is present and non-empty - assert reasoning_text.get("signature"), ( - "Reasoning signature should be present and non-empty" - ) + assert reasoning_text.get("signature"), "Reasoning signature should be present and non-empty" @pytest.mark.parametrize("model_fixture", ["streaming_model", "non_streaming_model"])