strands-agents · BV-Venky · Mar 3, 2026
diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py
@@ -93,6 +93,10 @@ class BedrockConfig(TypedDict, total=False):
             model_id: The Bedrock model ID (e.g., "us.anthropic.claude-sonnet-4-20250514-v1:0")
             include_tool_result_status: Flag to include status field in tool results.
                 True includes status, False removes status, "auto" determines based on model_id. Defaults to "auto".
+            service_tier: Service tier for the request, controlling the trade-off between latency and cost.
+                Valid values: "default" (standard), "priority" (faster, premium), "flex" (cheaper, slower).
+                Please check https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html for all
+                valid tiers.
             stop_sequences: List of sequences that will stop generation when encountered
             streaming: Flag to enable/disable streaming. Defaults to True.
             temperature: Controls randomness in generation (higher = more random)
@@ -117,6 +121,7 @@ class BedrockConfig(TypedDict, total=False):
         max_tokens: int | None
         model_id: str
         include_tool_result_status: Literal["auto"] | bool | None
+        service_tier: str | None
         stop_sequences: list[str] | None
         streaming: bool | None
         temperature: float | None
@@ -243,6 +248,7 @@ def _format_request(
             "modelId": self.config["model_id"],
             "messages": self._format_bedrock_messages(messages),
             "system": system_blocks,
+            **({"serviceTier": {"type": self.config["service_tier"]}} if self.config.get("service_tier") else {}),
             **(
                 {
                     "toolConfig": {

diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py
@@ -379,6 +379,20 @@ def test_format_request_guardrail_config_without_trace_or_stream_processing_mode
     assert tru_request == exp_request
 
 
+def test_format_request_with_service_tier(model, messages, model_id):
+    model.update_config(service_tier="flex")
+    tru_request = model._format_request(messages)
+    exp_request = {
+        "inferenceConfig": {},
+        "modelId": model_id,
+        "messages": messages,
+        "serviceTier": {"type": "flex"},
+        "system": [],
+    }
+
+    assert tru_request == exp_request
+
+
 def test_format_request_inference_config(model, messages, model_id, inference_config):
     model.update_config(**inference_config)
     tru_request = model._format_request(messages)