diff --git a/src/openai/lib/azure.py b/src/openai/lib/azure.py
index ad64707261..51d09bb42a 100644
--- a/src/openai/lib/azure.py
+++ b/src/openai/lib/azure.py
@@ -64,6 +64,9 @@ def _build_request(
             model = options.json_data.get("model")
             if model is not None and "/deployments" not in str(self.base_url.path):
                 options.url = f"/deployments/{model}{options.url}"
+                # Remove model from request body for Azure deployments
+                # Azure uses URL-based routing, not body-based model specification
+                del options.json_data["model"]
 
         return super()._build_request(options, retries_taken=retries_taken)
 
diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py
index 8e71ccbe41..0379ee0865 100644
--- a/src/openai/types/chat/completion_create_params.py
+++ b/src/openai/types/chat/completion_create_params.py
@@ -185,7 +185,7 @@ class CompletionCreateParamsBase(TypedDict, total=False):
     [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     """
 
-    prompt_cache_retention: Optional[Literal["in-memory", "24h"]]
+    prompt_cache_retention: Optional[Literal["in_memory", "24h"]]
     """The retention policy for the prompt cache.
 
     Set to `24h` to enable extended prompt caching, which keeps cached prefixes
diff --git a/src/openai/types/responses/response.py b/src/openai/types/responses/response.py
index ada0783bce..0d2491ea7c 100644
--- a/src/openai/types/responses/response.py
+++ b/src/openai/types/responses/response.py
@@ -214,7 +214,7 @@ class Response(BaseModel):
     [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     """
 
-    prompt_cache_retention: Optional[Literal["in-memory", "24h"]] = None
+    prompt_cache_retention: Optional[Literal["in_memory", "24h"]] = None
     """The retention policy for the prompt cache.
 
     Set to `24h` to enable extended prompt caching, which keeps cached prefixes
diff --git a/src/openai/types/responses/response_create_params.py b/src/openai/types/responses/response_create_params.py
index bf7170da1f..a04495f40a 100644
--- a/src/openai/types/responses/response_create_params.py
+++ b/src/openai/types/responses/response_create_params.py
@@ -152,7 +152,7 @@ class ResponseCreateParamsBase(TypedDict, total=False):
     [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     """
 
-    prompt_cache_retention: Optional[Literal["in-memory", "24h"]]
+    prompt_cache_retention: Optional[Literal["in_memory", "24h"]]
     """The retention policy for the prompt cache.
 
     Set to `24h` to enable extended prompt caching, which keeps cached prefixes
diff --git a/src/openai/types/responses/responses_client_event.py b/src/openai/types/responses/responses_client_event.py
index 2bc6f899c5..5f9e73c61f 100644
--- a/src/openai/types/responses/responses_client_event.py
+++ b/src/openai/types/responses/responses_client_event.py
@@ -184,7 +184,7 @@ class ResponsesClientEvent(BaseModel):
     [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     """
 
-    prompt_cache_retention: Optional[Literal["in-memory", "24h"]] = None
+    prompt_cache_retention: Optional[Literal["in_memory", "24h"]] = None
     """The retention policy for the prompt cache.
 
     Set to `24h` to enable extended prompt caching, which keeps cached prefixes
diff --git a/src/openai/types/responses/responses_client_event_param.py b/src/openai/types/responses/responses_client_event_param.py
index 08596ef9ea..249c812116 100644
--- a/src/openai/types/responses/responses_client_event_param.py
+++ b/src/openai/types/responses/responses_client_event_param.py
@@ -185,7 +185,7 @@ class ResponsesClientEventParam(TypedDict, total=False):
     [Learn more](https://platform.openai.com/docs/guides/prompt-caching).
     """
 
-    prompt_cache_retention: Optional[Literal["in-memory", "24h"]]
+    prompt_cache_retention: Optional[Literal["in_memory", "24h"]]
     """The retention policy for the prompt cache.
 
     Set to `24h` to enable extended prompt caching, which keeps cached prefixes