openai · micheallam130 · Feb 26, 2026 · Feb 26, 2026
@@ -64,6 +64,9 @@ def _build_request(
             model = options.json_data.get("model")
             if model is not None and "/deployments" not in str(self.base_url.path):
                 options.url = f"/deployments/{model}{options.url}"
+                # Remove model from request body for Azure deployments
+                # Azure uses URL-based routing, not body-based model specification
+                del options.json_data["model"]
 
         return super()._build_request(options, retries_taken=retries_taken)
 

@@ -185,7 +185,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     """
 
-    prompt_cache_retention: Optional[Literal["in-memory", "24h"]]
+    prompt_cache_retention: Optional[Literal["in_memory", "24h"]]
     """The retention policy for the prompt cache.
 
     Set to `24h` to enable extended prompt caching, which keeps cached prefixes

@@ -214,7 +214,7 @@ class Response(BaseModel):
     [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     """
 
-    prompt_cache_retention: Optional[Literal["in-memory", "24h"]] = None
+    prompt_cache_retention: Optional[Literal["in_memory", "24h"]] = None
     """The retention policy for the prompt cache.
 
     Set to `24h` to enable extended prompt caching, which keeps cached prefixes

@@ -152,7 +152,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     """
 
-    prompt_cache_retention: Optional[Literal["in-memory", "24h"]]
+    prompt_cache_retention: Optional[Literal["in_memory", "24h"]]
     """The retention policy for the prompt cache.
 
     Set to `24h` to enable extended prompt caching, which keeps cached prefixes

@@ -184,7 +184,7 @@ class ResponsesClientEvent(BaseModel):
     [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     """
 
-    prompt_cache_retention: Optional[Literal["in-memory", "24h"]] = None
+    prompt_cache_retention: Optional[Literal["in_memory", "24h"]] = None
     """The retention policy for the prompt cache.
 
     Set to `24h` to enable extended prompt caching, which keeps cached prefixes

@@ -185,7 +185,7 @@ class ResponsesClientEventParam(TypedDict, total=False):
     [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     """
 
-    prompt_cache_retention: Optional[Literal["in-memory", "24h"]]
+    prompt_cache_retention: Optional[Literal["in_memory", "24h"]]
     """The retention policy for the prompt cache.
 
     Set to `24h` to enable extended prompt caching, which keeps cached prefixes