This commit is contained in:
2025-04-27 21:22:28 +01:00
parent 05f6f149ad
commit 5399169b11
5193 changed files with 843837 additions and 0 deletions

View File

@@ -0,0 +1,61 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from .audio import (
Audio,
AsyncAudio,
AudioWithRawResponse,
AsyncAudioWithRawResponse,
AudioWithStreamingResponse,
AsyncAudioWithStreamingResponse,
)
from .speech import (
Speech,
AsyncSpeech,
SpeechWithRawResponse,
AsyncSpeechWithRawResponse,
SpeechWithStreamingResponse,
AsyncSpeechWithStreamingResponse,
)
from .translations import (
Translations,
AsyncTranslations,
TranslationsWithRawResponse,
AsyncTranslationsWithRawResponse,
TranslationsWithStreamingResponse,
AsyncTranslationsWithStreamingResponse,
)
from .transcriptions import (
Transcriptions,
AsyncTranscriptions,
TranscriptionsWithRawResponse,
AsyncTranscriptionsWithRawResponse,
TranscriptionsWithStreamingResponse,
AsyncTranscriptionsWithStreamingResponse,
)
__all__ = [
"Transcriptions",
"AsyncTranscriptions",
"TranscriptionsWithRawResponse",
"AsyncTranscriptionsWithRawResponse",
"TranscriptionsWithStreamingResponse",
"AsyncTranscriptionsWithStreamingResponse",
"Translations",
"AsyncTranslations",
"TranslationsWithRawResponse",
"AsyncTranslationsWithRawResponse",
"TranslationsWithStreamingResponse",
"AsyncTranslationsWithStreamingResponse",
"Speech",
"AsyncSpeech",
"SpeechWithRawResponse",
"AsyncSpeechWithRawResponse",
"SpeechWithStreamingResponse",
"AsyncSpeechWithStreamingResponse",
"Audio",
"AsyncAudio",
"AudioWithRawResponse",
"AsyncAudioWithRawResponse",
"AudioWithStreamingResponse",
"AsyncAudioWithStreamingResponse",
]

View File

@@ -0,0 +1,166 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from __future__ import annotations
from .speech import (
Speech,
AsyncSpeech,
SpeechWithRawResponse,
AsyncSpeechWithRawResponse,
SpeechWithStreamingResponse,
AsyncSpeechWithStreamingResponse,
)
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from .translations import (
Translations,
AsyncTranslations,
TranslationsWithRawResponse,
AsyncTranslationsWithRawResponse,
TranslationsWithStreamingResponse,
AsyncTranslationsWithStreamingResponse,
)
from .transcriptions import (
Transcriptions,
AsyncTranscriptions,
TranscriptionsWithRawResponse,
AsyncTranscriptionsWithRawResponse,
TranscriptionsWithStreamingResponse,
AsyncTranscriptionsWithStreamingResponse,
)
__all__ = ["Audio", "AsyncAudio"]
class Audio(SyncAPIResource):
@cached_property
def transcriptions(self) -> Transcriptions:
return Transcriptions(self._client)
@cached_property
def translations(self) -> Translations:
return Translations(self._client)
@cached_property
def speech(self) -> Speech:
return Speech(self._client)
@cached_property
def with_raw_response(self) -> AudioWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
"""
return AudioWithRawResponse(self)
@cached_property
def with_streaming_response(self) -> AudioWithStreamingResponse:
"""
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
"""
return AudioWithStreamingResponse(self)
class AsyncAudio(AsyncAPIResource):
@cached_property
def transcriptions(self) -> AsyncTranscriptions:
return AsyncTranscriptions(self._client)
@cached_property
def translations(self) -> AsyncTranslations:
return AsyncTranslations(self._client)
@cached_property
def speech(self) -> AsyncSpeech:
return AsyncSpeech(self._client)
@cached_property
def with_raw_response(self) -> AsyncAudioWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
"""
return AsyncAudioWithRawResponse(self)
@cached_property
def with_streaming_response(self) -> AsyncAudioWithStreamingResponse:
"""
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
"""
return AsyncAudioWithStreamingResponse(self)
class AudioWithRawResponse:
def __init__(self, audio: Audio) -> None:
self._audio = audio
@cached_property
def transcriptions(self) -> TranscriptionsWithRawResponse:
return TranscriptionsWithRawResponse(self._audio.transcriptions)
@cached_property
def translations(self) -> TranslationsWithRawResponse:
return TranslationsWithRawResponse(self._audio.translations)
@cached_property
def speech(self) -> SpeechWithRawResponse:
return SpeechWithRawResponse(self._audio.speech)
class AsyncAudioWithRawResponse:
def __init__(self, audio: AsyncAudio) -> None:
self._audio = audio
@cached_property
def transcriptions(self) -> AsyncTranscriptionsWithRawResponse:
return AsyncTranscriptionsWithRawResponse(self._audio.transcriptions)
@cached_property
def translations(self) -> AsyncTranslationsWithRawResponse:
return AsyncTranslationsWithRawResponse(self._audio.translations)
@cached_property
def speech(self) -> AsyncSpeechWithRawResponse:
return AsyncSpeechWithRawResponse(self._audio.speech)
class AudioWithStreamingResponse:
def __init__(self, audio: Audio) -> None:
self._audio = audio
@cached_property
def transcriptions(self) -> TranscriptionsWithStreamingResponse:
return TranscriptionsWithStreamingResponse(self._audio.transcriptions)
@cached_property
def translations(self) -> TranslationsWithStreamingResponse:
return TranslationsWithStreamingResponse(self._audio.translations)
@cached_property
def speech(self) -> SpeechWithStreamingResponse:
return SpeechWithStreamingResponse(self._audio.speech)
class AsyncAudioWithStreamingResponse:
def __init__(self, audio: AsyncAudio) -> None:
self._audio = audio
@cached_property
def transcriptions(self) -> AsyncTranscriptionsWithStreamingResponse:
return AsyncTranscriptionsWithStreamingResponse(self._audio.transcriptions)
@cached_property
def translations(self) -> AsyncTranslationsWithStreamingResponse:
return AsyncTranslationsWithStreamingResponse(self._audio.translations)
@cached_property
def speech(self) -> AsyncSpeechWithStreamingResponse:
return AsyncSpeechWithStreamingResponse(self._audio.speech)

View File

@@ -0,0 +1,245 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from __future__ import annotations
from typing import Union
from typing_extensions import Literal
import httpx
from ... import _legacy_response
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
from ..._utils import maybe_transform, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import (
StreamedBinaryAPIResponse,
AsyncStreamedBinaryAPIResponse,
to_custom_streamed_response_wrapper,
async_to_custom_streamed_response_wrapper,
)
from ...types.audio import speech_create_params
from ..._base_client import make_request_options
from ...types.audio.speech_model import SpeechModel
__all__ = ["Speech", "AsyncSpeech"]
class Speech(SyncAPIResource):
@cached_property
def with_raw_response(self) -> SpeechWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
"""
return SpeechWithRawResponse(self)
@cached_property
def with_streaming_response(self) -> SpeechWithStreamingResponse:
"""
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
"""
return SpeechWithStreamingResponse(self)
def create(
self,
*,
input: str,
model: Union[str, SpeechModel],
voice: Union[
str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
],
instructions: str | NotGiven = NOT_GIVEN,
response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
speed: float | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> _legacy_response.HttpxBinaryResponseContent:
"""
Generates audio from the input text.
Args:
input: The text to generate audio for. The maximum length is 4096 characters.
model:
One of the available [TTS models](https://platform.openai.com/docs/models#tts):
`tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
`ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and
`verse`. Previews of the voices are available in the
[Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
instructions: Control the voice of your generated audio with additional instructions. Does not
work with `tts-1` or `tts-1-hd`.
response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
`wav`, and `pcm`.
speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
the default.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
return self._post(
"/audio/speech",
body=maybe_transform(
{
"input": input,
"model": model,
"voice": voice,
"instructions": instructions,
"response_format": response_format,
"speed": speed,
},
speech_create_params.SpeechCreateParams,
),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=_legacy_response.HttpxBinaryResponseContent,
)
class AsyncSpeech(AsyncAPIResource):
@cached_property
def with_raw_response(self) -> AsyncSpeechWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
"""
return AsyncSpeechWithRawResponse(self)
@cached_property
def with_streaming_response(self) -> AsyncSpeechWithStreamingResponse:
"""
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
"""
return AsyncSpeechWithStreamingResponse(self)
async def create(
self,
*,
input: str,
model: Union[str, SpeechModel],
voice: Union[
str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
],
instructions: str | NotGiven = NOT_GIVEN,
response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
speed: float | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> _legacy_response.HttpxBinaryResponseContent:
"""
Generates audio from the input text.
Args:
input: The text to generate audio for. The maximum length is 4096 characters.
model:
One of the available [TTS models](https://platform.openai.com/docs/models#tts):
`tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
`ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`, `shimmer`, and
`verse`. Previews of the voices are available in the
[Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
instructions: Control the voice of your generated audio with additional instructions. Does not
work with `tts-1` or `tts-1-hd`.
response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
`wav`, and `pcm`.
speed: The speed of the generated audio. Select a value from `0.25` to `4.0`. `1.0` is
the default.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
extra_headers = {"Accept": "application/octet-stream", **(extra_headers or {})}
return await self._post(
"/audio/speech",
body=await async_maybe_transform(
{
"input": input,
"model": model,
"voice": voice,
"instructions": instructions,
"response_format": response_format,
"speed": speed,
},
speech_create_params.SpeechCreateParams,
),
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=_legacy_response.HttpxBinaryResponseContent,
)
class SpeechWithRawResponse:
def __init__(self, speech: Speech) -> None:
self._speech = speech
self.create = _legacy_response.to_raw_response_wrapper(
speech.create,
)
class AsyncSpeechWithRawResponse:
def __init__(self, speech: AsyncSpeech) -> None:
self._speech = speech
self.create = _legacy_response.async_to_raw_response_wrapper(
speech.create,
)
class SpeechWithStreamingResponse:
def __init__(self, speech: Speech) -> None:
self._speech = speech
self.create = to_custom_streamed_response_wrapper(
speech.create,
StreamedBinaryAPIResponse,
)
class AsyncSpeechWithStreamingResponse:
def __init__(self, speech: AsyncSpeech) -> None:
self._speech = speech
self.create = async_to_custom_streamed_response_wrapper(
speech.create,
AsyncStreamedBinaryAPIResponse,
)

View File

@@ -0,0 +1,686 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, List, Union, Mapping, Optional, cast
from typing_extensions import Literal, overload, assert_never
import httpx
from ... import _legacy_response
from ...types import AudioResponseFormat
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
from ..._utils import extract_files, required_args, maybe_transform, deepcopy_minimal, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
from ..._streaming import Stream, AsyncStream
from ...types.audio import transcription_create_params
from ..._base_client import make_request_options
from ...types.audio_model import AudioModel
from ...types.audio.transcription import Transcription
from ...types.audio_response_format import AudioResponseFormat
from ...types.audio.transcription_include import TranscriptionInclude
from ...types.audio.transcription_verbose import TranscriptionVerbose
from ...types.audio.transcription_stream_event import TranscriptionStreamEvent
from ...types.audio.transcription_create_response import TranscriptionCreateResponse
__all__ = ["Transcriptions", "AsyncTranscriptions"]
log: logging.Logger = logging.getLogger("openai.audio.transcriptions")
class Transcriptions(SyncAPIResource):
@cached_property
def with_raw_response(self) -> TranscriptionsWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
"""
return TranscriptionsWithRawResponse(self)
@cached_property
def with_streaming_response(self) -> TranscriptionsWithStreamingResponse:
"""
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
"""
return TranscriptionsWithStreamingResponse(self)
@overload
def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Transcription: ...
@overload
def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
response_format: Literal["verbose_json"],
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> TranscriptionVerbose: ...
@overload
def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
response_format: Literal["text", "srt", "vtt"],
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> str: ...
@overload
def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
stream: Literal[True],
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Stream[TranscriptionStreamEvent]:
"""
Transcribes audio into the input language.
Args:
file:
The audio file object (not file name) to transcribe, in one of these formats:
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
model: ID of the model to use. The options are `gpt-4o-transcribe`,
`gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
Whisper V2 model).
stream: If set to true, the model response data will be streamed to the client as it is
generated using
[server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
See the
[Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
for more information.
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
include: Additional information to include in the transcription response. `logprobs` will
return the log probabilities of the tokens in the response to understand the
model's confidence in the transcription. `logprobs` only works with
response_format set to `json` and only with the models `gpt-4o-transcribe` and
`gpt-4o-mini-transcribe`.
language: The language of the input audio. Supplying the input language in
[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
format will improve accuracy and latency.
prompt: An optional text to guide the model's style or continue a previous audio
segment. The
[prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
should match the audio language.
response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
`verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
the only supported format is `json`.
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
output more random, while lower values like 0.2 will make it more focused and
deterministic. If set to 0, the model will use
[log probability](https://en.wikipedia.org/wiki/Log_probability) to
automatically increase the temperature until certain thresholds are hit.
timestamp_granularities: The timestamp granularities to populate for this transcription.
`response_format` must be set `verbose_json` to use timestamp granularities.
Either or both of these options are supported: `word`, or `segment`. Note: There
is no additional latency for segment timestamps, but generating word timestamps
incurs additional latency.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
...
@overload
def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
stream: bool,
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> TranscriptionCreateResponse | Stream[TranscriptionStreamEvent]:
"""
Transcribes audio into the input language.
Args:
file:
The audio file object (not file name) to transcribe, in one of these formats:
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
model: ID of the model to use. The options are `gpt-4o-transcribe`,
`gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
Whisper V2 model).
stream: If set to true, the model response data will be streamed to the client as it is
generated using
[server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
See the
[Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
for more information.
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
include: Additional information to include in the transcription response. `logprobs` will
return the log probabilities of the tokens in the response to understand the
model's confidence in the transcription. `logprobs` only works with
response_format set to `json` and only with the models `gpt-4o-transcribe` and
`gpt-4o-mini-transcribe`.
language: The language of the input audio. Supplying the input language in
[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
format will improve accuracy and latency.
prompt: An optional text to guide the model's style or continue a previous audio
segment. The
[prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
should match the audio language.
response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
`verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
the only supported format is `json`.
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
output more random, while lower values like 0.2 will make it more focused and
deterministic. If set to 0, the model will use
[log probability](https://en.wikipedia.org/wiki/Log_probability) to
automatically increase the temperature until certain thresholds are hit.
timestamp_granularities: The timestamp granularities to populate for this transcription.
`response_format` must be set `verbose_json` to use timestamp granularities.
Either or both of these options are supported: `word`, or `segment`. Note: There
is no additional latency for segment timestamps, but generating word timestamps
incurs additional latency.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
...
@required_args(["file", "model"], ["file", "model", "stream"])
def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> str | Transcription | TranscriptionVerbose | Stream[TranscriptionStreamEvent]:
body = deepcopy_minimal(
{
"file": file,
"model": model,
"include": include,
"language": language,
"prompt": prompt,
"response_format": response_format,
"stream": stream,
"temperature": temperature,
"timestamp_granularities": timestamp_granularities,
}
)
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
# It should be noted that the actual Content-Type header that will be
# sent to the server will contain a `boundary` parameter, e.g.
# multipart/form-data; boundary=---abc--
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
return self._post( # type: ignore[return-value]
"/audio/transcriptions",
body=maybe_transform(
body,
transcription_create_params.TranscriptionCreateParamsStreaming
if stream
else transcription_create_params.TranscriptionCreateParamsNonStreaming,
),
files=files,
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=_get_response_format_type(response_format),
stream=stream or False,
stream_cls=Stream[TranscriptionStreamEvent],
)
class AsyncTranscriptions(AsyncAPIResource):
@cached_property
def with_raw_response(self) -> AsyncTranscriptionsWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
"""
return AsyncTranscriptionsWithRawResponse(self)
@cached_property
def with_streaming_response(self) -> AsyncTranscriptionsWithStreamingResponse:
"""
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
"""
return AsyncTranscriptionsWithStreamingResponse(self)
@overload
async def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Transcription: ...
@overload
async def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
response_format: Literal["verbose_json"],
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> TranscriptionVerbose: ...
@overload
async def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
response_format: Literal["text", "srt", "vtt"],
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> str: ...
@overload
async def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
stream: Literal[True],
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> AsyncStream[TranscriptionStreamEvent]:
"""
Transcribes audio into the input language.
Args:
file:
The audio file object (not file name) to transcribe, in one of these formats:
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
model: ID of the model to use. The options are `gpt-4o-transcribe`,
`gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
Whisper V2 model).
stream: If set to true, the model response data will be streamed to the client as it is
generated using
[server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
See the
[Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
for more information.
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
include: Additional information to include in the transcription response. `logprobs` will
return the log probabilities of the tokens in the response to understand the
model's confidence in the transcription. `logprobs` only works with
response_format set to `json` and only with the models `gpt-4o-transcribe` and
`gpt-4o-mini-transcribe`.
language: The language of the input audio. Supplying the input language in
[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
format will improve accuracy and latency.
prompt: An optional text to guide the model's style or continue a previous audio
segment. The
[prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
should match the audio language.
response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
`verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
the only supported format is `json`.
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
output more random, while lower values like 0.2 will make it more focused and
deterministic. If set to 0, the model will use
[log probability](https://en.wikipedia.org/wiki/Log_probability) to
automatically increase the temperature until certain thresholds are hit.
timestamp_granularities: The timestamp granularities to populate for this transcription.
`response_format` must be set `verbose_json` to use timestamp granularities.
Either or both of these options are supported: `word`, or `segment`. Note: There
is no additional latency for segment timestamps, but generating word timestamps
incurs additional latency.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
...
@overload
async def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
stream: bool,
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> TranscriptionCreateResponse | AsyncStream[TranscriptionStreamEvent]:
"""
Transcribes audio into the input language.
Args:
file:
The audio file object (not file name) to transcribe, in one of these formats:
flac, mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
model: ID of the model to use. The options are `gpt-4o-transcribe`,
`gpt-4o-mini-transcribe`, and `whisper-1` (which is powered by our open source
Whisper V2 model).
stream: If set to true, the model response data will be streamed to the client as it is
generated using
[server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format).
See the
[Streaming section of the Speech-to-Text guide](https://platform.openai.com/docs/guides/speech-to-text?lang=curl#streaming-transcriptions)
for more information.
Note: Streaming is not supported for the `whisper-1` model and will be ignored.
include: Additional information to include in the transcription response. `logprobs` will
return the log probabilities of the tokens in the response to understand the
model's confidence in the transcription. `logprobs` only works with
response_format set to `json` and only with the models `gpt-4o-transcribe` and
`gpt-4o-mini-transcribe`.
language: The language of the input audio. Supplying the input language in
[ISO-639-1](https://en.wikipedia.org/wiki/List_of_ISO_639-1_codes) (e.g. `en`)
format will improve accuracy and latency.
prompt: An optional text to guide the model's style or continue a previous audio
segment. The
[prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
should match the audio language.
response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
`verbose_json`, or `vtt`. For `gpt-4o-transcribe` and `gpt-4o-mini-transcribe`,
the only supported format is `json`.
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
output more random, while lower values like 0.2 will make it more focused and
deterministic. If set to 0, the model will use
[log probability](https://en.wikipedia.org/wiki/Log_probability) to
automatically increase the temperature until certain thresholds are hit.
timestamp_granularities: The timestamp granularities to populate for this transcription.
`response_format` must be set `verbose_json` to use timestamp granularities.
Either or both of these options are supported: `word`, or `segment`. Note: There
is no additional latency for segment timestamps, but generating word timestamps
incurs additional latency.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
...
@required_args(["file", "model"], ["file", "model", "stream"])
async def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
include: List[TranscriptionInclude] | NotGiven = NOT_GIVEN,
language: str | NotGiven = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
stream: Optional[Literal[False]] | Literal[True] | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
timestamp_granularities: List[Literal["word", "segment"]] | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Transcription | TranscriptionVerbose | str | AsyncStream[TranscriptionStreamEvent]:
body = deepcopy_minimal(
{
"file": file,
"model": model,
"include": include,
"language": language,
"prompt": prompt,
"response_format": response_format,
"stream": stream,
"temperature": temperature,
"timestamp_granularities": timestamp_granularities,
}
)
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
# It should be noted that the actual Content-Type header that will be
# sent to the server will contain a `boundary` parameter, e.g.
# multipart/form-data; boundary=---abc--
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
return await self._post(
"/audio/transcriptions",
body=await async_maybe_transform(
body,
transcription_create_params.TranscriptionCreateParamsStreaming
if stream
else transcription_create_params.TranscriptionCreateParamsNonStreaming,
),
files=files,
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=_get_response_format_type(response_format),
stream=stream or False,
stream_cls=AsyncStream[TranscriptionStreamEvent],
)
class TranscriptionsWithRawResponse:
def __init__(self, transcriptions: Transcriptions) -> None:
self._transcriptions = transcriptions
self.create = _legacy_response.to_raw_response_wrapper(
transcriptions.create,
)
class AsyncTranscriptionsWithRawResponse:
def __init__(self, transcriptions: AsyncTranscriptions) -> None:
self._transcriptions = transcriptions
self.create = _legacy_response.async_to_raw_response_wrapper(
transcriptions.create,
)
class TranscriptionsWithStreamingResponse:
def __init__(self, transcriptions: Transcriptions) -> None:
self._transcriptions = transcriptions
self.create = to_streamed_response_wrapper(
transcriptions.create,
)
class AsyncTranscriptionsWithStreamingResponse:
def __init__(self, transcriptions: AsyncTranscriptions) -> None:
self._transcriptions = transcriptions
self.create = async_to_streamed_response_wrapper(
transcriptions.create,
)
def _get_response_format_type(
response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven,
) -> type[Transcription | TranscriptionVerbose | str]:
if isinstance(response_format, NotGiven) or response_format is None: # pyright: ignore[reportUnnecessaryComparison]
return Transcription
if response_format == "json":
return Transcription
elif response_format == "verbose_json":
return TranscriptionVerbose
elif response_format == "srt" or response_format == "text" or response_format == "vtt":
return str
elif TYPE_CHECKING: # type: ignore[unreachable]
assert_never(response_format)
else:
log.warn("Unexpected audio response format: %s", response_format)
return Transcription

View File

@@ -0,0 +1,367 @@
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
from __future__ import annotations
import logging
from typing import TYPE_CHECKING, Union, Mapping, cast
from typing_extensions import Literal, overload, assert_never
import httpx
from ... import _legacy_response
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven, FileTypes
from ..._utils import extract_files, maybe_transform, deepcopy_minimal, async_maybe_transform
from ..._compat import cached_property
from ..._resource import SyncAPIResource, AsyncAPIResource
from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
from ...types.audio import translation_create_params
from ..._base_client import make_request_options
from ...types.audio_model import AudioModel
from ...types.audio.translation import Translation
from ...types.audio_response_format import AudioResponseFormat
from ...types.audio.translation_verbose import TranslationVerbose
__all__ = ["Translations", "AsyncTranslations"]
log: logging.Logger = logging.getLogger("openai.audio.transcriptions")
class Translations(SyncAPIResource):
@cached_property
def with_raw_response(self) -> TranslationsWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
"""
return TranslationsWithRawResponse(self)
@cached_property
def with_streaming_response(self) -> TranslationsWithStreamingResponse:
"""
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
"""
return TranslationsWithStreamingResponse(self)
@overload
def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Translation: ...
@overload
def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
response_format: Literal["verbose_json"],
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> TranslationVerbose: ...
@overload
def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
response_format: Literal["text", "srt", "vtt"],
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> str: ...
def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
prompt: str | NotGiven = NOT_GIVEN,
response_format: Union[Literal["json", "text", "srt", "verbose_json", "vtt"], NotGiven] = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Translation | TranslationVerbose | str:
"""
Translates audio into English.
Args:
file: The audio file object (not file name) translate, in one of these formats: flac,
mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
model: ID of the model to use. Only `whisper-1` (which is powered by our open source
Whisper V2 model) is currently available.
prompt: An optional text to guide the model's style or continue a previous audio
segment. The
[prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
should be in English.
response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
`verbose_json`, or `vtt`.
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
output more random, while lower values like 0.2 will make it more focused and
deterministic. If set to 0, the model will use
[log probability](https://en.wikipedia.org/wiki/Log_probability) to
automatically increase the temperature until certain thresholds are hit.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
body = deepcopy_minimal(
{
"file": file,
"model": model,
"prompt": prompt,
"response_format": response_format,
"temperature": temperature,
}
)
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
# It should be noted that the actual Content-Type header that will be
# sent to the server will contain a `boundary` parameter, e.g.
# multipart/form-data; boundary=---abc--
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
return self._post( # type: ignore[return-value]
"/audio/translations",
body=maybe_transform(body, translation_create_params.TranslationCreateParams),
files=files,
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=_get_response_format_type(response_format),
)
class AsyncTranslations(AsyncAPIResource):
@cached_property
def with_raw_response(self) -> AsyncTranslationsWithRawResponse:
"""
This property can be used as a prefix for any HTTP method call to return
the raw response object instead of the parsed content.
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
"""
return AsyncTranslationsWithRawResponse(self)
@cached_property
def with_streaming_response(self) -> AsyncTranslationsWithStreamingResponse:
"""
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
"""
return AsyncTranslationsWithStreamingResponse(self)
@overload
async def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
response_format: Union[Literal["json"], NotGiven] = NOT_GIVEN,
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Translation: ...
@overload
async def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
response_format: Literal["verbose_json"],
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> TranslationVerbose: ...
@overload
async def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
response_format: Literal["text", "srt", "vtt"],
prompt: str | NotGiven = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> str: ...
async def create(
self,
*,
file: FileTypes,
model: Union[str, AudioModel],
prompt: str | NotGiven = NOT_GIVEN,
response_format: Union[AudioResponseFormat, NotGiven] = NOT_GIVEN,
temperature: float | NotGiven = NOT_GIVEN,
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
# The extra values given here take precedence over values defined on the client or passed to this method.
extra_headers: Headers | None = None,
extra_query: Query | None = None,
extra_body: Body | None = None,
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
) -> Translation | TranslationVerbose | str:
"""
Translates audio into English.
Args:
file: The audio file object (not file name) translate, in one of these formats: flac,
mp3, mp4, mpeg, mpga, m4a, ogg, wav, or webm.
model: ID of the model to use. Only `whisper-1` (which is powered by our open source
Whisper V2 model) is currently available.
prompt: An optional text to guide the model's style or continue a previous audio
segment. The
[prompt](https://platform.openai.com/docs/guides/speech-to-text#prompting)
should be in English.
response_format: The format of the output, in one of these options: `json`, `text`, `srt`,
`verbose_json`, or `vtt`.
temperature: The sampling temperature, between 0 and 1. Higher values like 0.8 will make the
output more random, while lower values like 0.2 will make it more focused and
deterministic. If set to 0, the model will use
[log probability](https://en.wikipedia.org/wiki/Log_probability) to
automatically increase the temperature until certain thresholds are hit.
extra_headers: Send extra headers
extra_query: Add additional query parameters to the request
extra_body: Add additional JSON properties to the request
timeout: Override the client-level default timeout for this request, in seconds
"""
body = deepcopy_minimal(
{
"file": file,
"model": model,
"prompt": prompt,
"response_format": response_format,
"temperature": temperature,
}
)
files = extract_files(cast(Mapping[str, object], body), paths=[["file"]])
# It should be noted that the actual Content-Type header that will be
# sent to the server will contain a `boundary` parameter, e.g.
# multipart/form-data; boundary=---abc--
extra_headers = {"Content-Type": "multipart/form-data", **(extra_headers or {})}
return await self._post(
"/audio/translations",
body=await async_maybe_transform(body, translation_create_params.TranslationCreateParams),
files=files,
options=make_request_options(
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
),
cast_to=_get_response_format_type(response_format),
)
class TranslationsWithRawResponse:
def __init__(self, translations: Translations) -> None:
self._translations = translations
self.create = _legacy_response.to_raw_response_wrapper(
translations.create,
)
class AsyncTranslationsWithRawResponse:
def __init__(self, translations: AsyncTranslations) -> None:
self._translations = translations
self.create = _legacy_response.async_to_raw_response_wrapper(
translations.create,
)
class TranslationsWithStreamingResponse:
def __init__(self, translations: Translations) -> None:
self._translations = translations
self.create = to_streamed_response_wrapper(
translations.create,
)
class AsyncTranslationsWithStreamingResponse:
def __init__(self, translations: AsyncTranslations) -> None:
self._translations = translations
self.create = async_to_streamed_response_wrapper(
translations.create,
)
def _get_response_format_type(
response_format: Literal["json", "text", "srt", "verbose_json", "vtt"] | NotGiven,
) -> type[Translation | TranslationVerbose | str]:
if isinstance(response_format, NotGiven) or response_format is None: # pyright: ignore[reportUnnecessaryComparison]
return Translation
if response_format == "json":
return Translation
elif response_format == "verbose_json":
return TranslationVerbose
elif response_format == "srt" or response_format == "text" or response_format == "vtt":
return str
elif TYPE_CHECKING: # type: ignore[unreachable]
assert_never(response_format)
else:
log.warn("Unexpected audio response format: %s", response_format)
return Transcription