diff --git a/src/openai/lib/azure.py b/src/openai/lib/azure.py index ad64707261..51d09bb42a 100644 --- a/src/openai/lib/azure.py +++ b/src/openai/lib/azure.py @@ -64,6 +64,9 @@ def _build_request( model = options.json_data.get("model") if model is not None and "/deployments" not in str(self.base_url.path): options.url = f"/deployments/{model}{options.url}" + # Remove model from request body for Azure deployments + # Azure uses URL-based routing, not body-based model specification + del options.json_data["model"] return super()._build_request(options, retries_taken=retries_taken) diff --git a/src/openai/types/chat/completion_create_params.py b/src/openai/types/chat/completion_create_params.py index 8e71ccbe41..0379ee0865 100644 --- a/src/openai/types/chat/completion_create_params.py +++ b/src/openai/types/chat/completion_create_params.py @@ -185,7 +185,7 @@ class CompletionCreateParamsBase(TypedDict, total=False): [Learn more](https://platform.openai.com/docs/guides/prompt-caching). """ - prompt_cache_retention: Optional[Literal["in-memory", "24h"]] + prompt_cache_retention: Optional[Literal["in_memory", "24h"]] """The retention policy for the prompt cache. Set to `24h` to enable extended prompt caching, which keeps cached prefixes diff --git a/src/openai/types/responses/response.py b/src/openai/types/responses/response.py index ada0783bce..0d2491ea7c 100644 --- a/src/openai/types/responses/response.py +++ b/src/openai/types/responses/response.py @@ -214,7 +214,7 @@ class Response(BaseModel): [Learn more](https://platform.openai.com/docs/guides/prompt-caching). """ - prompt_cache_retention: Optional[Literal["in-memory", "24h"]] = None + prompt_cache_retention: Optional[Literal["in_memory", "24h"]] = None """The retention policy for the prompt cache. Set to `24h` to enable extended prompt caching, which keeps cached prefixes diff --git a/src/openai/types/responses/response_create_params.py b/src/openai/types/responses/response_create_params.py index bf7170da1f..a04495f40a 100644 --- a/src/openai/types/responses/response_create_params.py +++ b/src/openai/types/responses/response_create_params.py @@ -152,7 +152,7 @@ class ResponseCreateParamsBase(TypedDict, total=False): [Learn more](https://platform.openai.com/docs/guides/prompt-caching). """ - prompt_cache_retention: Optional[Literal["in-memory", "24h"]] + prompt_cache_retention: Optional[Literal["in_memory", "24h"]] """The retention policy for the prompt cache. Set to `24h` to enable extended prompt caching, which keeps cached prefixes diff --git a/src/openai/types/responses/responses_client_event.py b/src/openai/types/responses/responses_client_event.py index 2bc6f899c5..5f9e73c61f 100644 --- a/src/openai/types/responses/responses_client_event.py +++ b/src/openai/types/responses/responses_client_event.py @@ -184,7 +184,7 @@ class ResponsesClientEvent(BaseModel): [Learn more](https://platform.openai.com/docs/guides/prompt-caching). """ - prompt_cache_retention: Optional[Literal["in-memory", "24h"]] = None + prompt_cache_retention: Optional[Literal["in_memory", "24h"]] = None """The retention policy for the prompt cache. Set to `24h` to enable extended prompt caching, which keeps cached prefixes diff --git a/src/openai/types/responses/responses_client_event_param.py b/src/openai/types/responses/responses_client_event_param.py index 08596ef9ea..249c812116 100644 --- a/src/openai/types/responses/responses_client_event_param.py +++ b/src/openai/types/responses/responses_client_event_param.py @@ -185,7 +185,7 @@ class ResponsesClientEventParam(TypedDict, total=False): [Learn more](https://platform.openai.com/docs/guides/prompt-caching). """ - prompt_cache_retention: Optional[Literal["in-memory", "24h"]] + prompt_cache_retention: Optional[Literal["in_memory", "24h"]] """The retention policy for the prompt cache. Set to `24h` to enable extended prompt caching, which keeps cached prefixes