chore: automatic commit 2025-04-30 12:48
This commit is contained in:
@@ -0,0 +1,47 @@
|
||||
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
||||
|
||||
from .realtime import (
|
||||
Realtime,
|
||||
AsyncRealtime,
|
||||
RealtimeWithRawResponse,
|
||||
AsyncRealtimeWithRawResponse,
|
||||
RealtimeWithStreamingResponse,
|
||||
AsyncRealtimeWithStreamingResponse,
|
||||
)
|
||||
from .sessions import (
|
||||
Sessions,
|
||||
AsyncSessions,
|
||||
SessionsWithRawResponse,
|
||||
AsyncSessionsWithRawResponse,
|
||||
SessionsWithStreamingResponse,
|
||||
AsyncSessionsWithStreamingResponse,
|
||||
)
|
||||
from .transcription_sessions import (
|
||||
TranscriptionSessions,
|
||||
AsyncTranscriptionSessions,
|
||||
TranscriptionSessionsWithRawResponse,
|
||||
AsyncTranscriptionSessionsWithRawResponse,
|
||||
TranscriptionSessionsWithStreamingResponse,
|
||||
AsyncTranscriptionSessionsWithStreamingResponse,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"Sessions",
|
||||
"AsyncSessions",
|
||||
"SessionsWithRawResponse",
|
||||
"AsyncSessionsWithRawResponse",
|
||||
"SessionsWithStreamingResponse",
|
||||
"AsyncSessionsWithStreamingResponse",
|
||||
"TranscriptionSessions",
|
||||
"AsyncTranscriptionSessions",
|
||||
"TranscriptionSessionsWithRawResponse",
|
||||
"AsyncTranscriptionSessionsWithRawResponse",
|
||||
"TranscriptionSessionsWithStreamingResponse",
|
||||
"AsyncTranscriptionSessionsWithStreamingResponse",
|
||||
"Realtime",
|
||||
"AsyncRealtime",
|
||||
"RealtimeWithRawResponse",
|
||||
"AsyncRealtimeWithRawResponse",
|
||||
"RealtimeWithStreamingResponse",
|
||||
"AsyncRealtimeWithStreamingResponse",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,388 @@
|
||||
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List, Union, Iterable
|
||||
from typing_extensions import Literal
|
||||
|
||||
import httpx
|
||||
|
||||
from .... import _legacy_response
|
||||
from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
|
||||
from ...._utils import maybe_transform, async_maybe_transform
|
||||
from ...._compat import cached_property
|
||||
from ...._resource import SyncAPIResource, AsyncAPIResource
|
||||
from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
|
||||
from ...._base_client import make_request_options
|
||||
from ....types.beta.realtime import session_create_params
|
||||
from ....types.beta.realtime.session_create_response import SessionCreateResponse
|
||||
|
||||
__all__ = ["Sessions", "AsyncSessions"]
|
||||
|
||||
|
||||
class Sessions(SyncAPIResource):
|
||||
@cached_property
|
||||
def with_raw_response(self) -> SessionsWithRawResponse:
|
||||
"""
|
||||
This property can be used as a prefix for any HTTP method call to return
|
||||
the raw response object instead of the parsed content.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
|
||||
"""
|
||||
return SessionsWithRawResponse(self)
|
||||
|
||||
@cached_property
|
||||
def with_streaming_response(self) -> SessionsWithStreamingResponse:
|
||||
"""
|
||||
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
|
||||
"""
|
||||
return SessionsWithStreamingResponse(self)
|
||||
|
||||
def create(
|
||||
self,
|
||||
*,
|
||||
input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
|
||||
input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
|
||||
input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
|
||||
instructions: str | NotGiven = NOT_GIVEN,
|
||||
max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
|
||||
modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
|
||||
model: Literal[
|
||||
"gpt-4o-realtime-preview",
|
||||
"gpt-4o-realtime-preview-2024-10-01",
|
||||
"gpt-4o-realtime-preview-2024-12-17",
|
||||
"gpt-4o-mini-realtime-preview",
|
||||
"gpt-4o-mini-realtime-preview-2024-12-17",
|
||||
]
|
||||
| NotGiven = NOT_GIVEN,
|
||||
output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
|
||||
temperature: float | NotGiven = NOT_GIVEN,
|
||||
tool_choice: str | NotGiven = NOT_GIVEN,
|
||||
tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
|
||||
turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
|
||||
voice: Union[
|
||||
str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
|
||||
]
|
||||
| NotGiven = NOT_GIVEN,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Headers | None = None,
|
||||
extra_query: Query | None = None,
|
||||
extra_body: Body | None = None,
|
||||
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||||
) -> SessionCreateResponse:
|
||||
"""
|
||||
Create an ephemeral API token for use in client-side applications with the
|
||||
Realtime API. Can be configured with the same session parameters as the
|
||||
`session.update` client event.
|
||||
|
||||
It responds with a session object, plus a `client_secret` key which contains a
|
||||
usable ephemeral API token that can be used to authenticate browser clients for
|
||||
the Realtime API.
|
||||
|
||||
Args:
|
||||
input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
|
||||
`pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
|
||||
(mono), and little-endian byte order.
|
||||
|
||||
input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
|
||||
off. Noise reduction filters audio added to the input audio buffer before it is
|
||||
sent to VAD and the model. Filtering the audio can improve VAD and turn
|
||||
detection accuracy (reducing false positives) and model performance by improving
|
||||
perception of the input audio.
|
||||
|
||||
input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
|
||||
`null` to turn off once on. Input audio transcription is not native to the
|
||||
model, since the model consumes audio directly. Transcription runs
|
||||
asynchronously through
|
||||
[the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
|
||||
and should be treated as guidance of input audio content rather than precisely
|
||||
what the model heard. The client can optionally set the language and prompt for
|
||||
transcription, these offer additional guidance to the transcription service.
|
||||
|
||||
instructions: The default system instructions (i.e. system message) prepended to model calls.
|
||||
This field allows the client to guide the model on desired responses. The model
|
||||
can be instructed on response content and format, (e.g. "be extremely succinct",
|
||||
"act friendly", "here are examples of good responses") and on audio behavior
|
||||
(e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
|
||||
instructions are not guaranteed to be followed by the model, but they provide
|
||||
guidance to the model on the desired behavior.
|
||||
|
||||
Note that the server sets default instructions which will be used if this field
|
||||
is not set and are visible in the `session.created` event at the start of the
|
||||
session.
|
||||
|
||||
max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
|
||||
tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
|
||||
`inf` for the maximum available tokens for a given model. Defaults to `inf`.
|
||||
|
||||
modalities: The set of modalities the model can respond with. To disable audio, set this to
|
||||
["text"].
|
||||
|
||||
model: The Realtime model used for this session.
|
||||
|
||||
output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
|
||||
For `pcm16`, output audio is sampled at a rate of 24kHz.
|
||||
|
||||
temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
|
||||
temperature of 0.8 is highly recommended for best performance.
|
||||
|
||||
tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
|
||||
a function.
|
||||
|
||||
tools: Tools (functions) available to the model.
|
||||
|
||||
turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
|
||||
set to `null` to turn off, in which case the client must manually trigger model
|
||||
response. Server VAD means that the model will detect the start and end of
|
||||
speech based on audio volume and respond at the end of user speech. Semantic VAD
|
||||
is more advanced and uses a turn detection model (in conjuction with VAD) to
|
||||
semantically estimate whether the user has finished speaking, then dynamically
|
||||
sets a timeout based on this probability. For example, if user audio trails off
|
||||
with "uhhm", the model will score a low probability of turn end and wait longer
|
||||
for the user to continue speaking. This can be useful for more natural
|
||||
conversations, but may have a higher latency.
|
||||
|
||||
voice: The voice the model uses to respond. Voice cannot be changed during the session
|
||||
once the model has responded with audio at least once. Current voice options are
|
||||
`alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`,
|
||||
`shimmer`, and `verse`.
|
||||
|
||||
extra_headers: Send extra headers
|
||||
|
||||
extra_query: Add additional query parameters to the request
|
||||
|
||||
extra_body: Add additional JSON properties to the request
|
||||
|
||||
timeout: Override the client-level default timeout for this request, in seconds
|
||||
"""
|
||||
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||||
return self._post(
|
||||
"/realtime/sessions",
|
||||
body=maybe_transform(
|
||||
{
|
||||
"input_audio_format": input_audio_format,
|
||||
"input_audio_noise_reduction": input_audio_noise_reduction,
|
||||
"input_audio_transcription": input_audio_transcription,
|
||||
"instructions": instructions,
|
||||
"max_response_output_tokens": max_response_output_tokens,
|
||||
"modalities": modalities,
|
||||
"model": model,
|
||||
"output_audio_format": output_audio_format,
|
||||
"temperature": temperature,
|
||||
"tool_choice": tool_choice,
|
||||
"tools": tools,
|
||||
"turn_detection": turn_detection,
|
||||
"voice": voice,
|
||||
},
|
||||
session_create_params.SessionCreateParams,
|
||||
),
|
||||
options=make_request_options(
|
||||
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||||
),
|
||||
cast_to=SessionCreateResponse,
|
||||
)
|
||||
|
||||
|
||||
class AsyncSessions(AsyncAPIResource):
|
||||
@cached_property
|
||||
def with_raw_response(self) -> AsyncSessionsWithRawResponse:
|
||||
"""
|
||||
This property can be used as a prefix for any HTTP method call to return
|
||||
the raw response object instead of the parsed content.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
|
||||
"""
|
||||
return AsyncSessionsWithRawResponse(self)
|
||||
|
||||
@cached_property
|
||||
def with_streaming_response(self) -> AsyncSessionsWithStreamingResponse:
|
||||
"""
|
||||
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
|
||||
"""
|
||||
return AsyncSessionsWithStreamingResponse(self)
|
||||
|
||||
async def create(
|
||||
self,
|
||||
*,
|
||||
input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
|
||||
input_audio_noise_reduction: session_create_params.InputAudioNoiseReduction | NotGiven = NOT_GIVEN,
|
||||
input_audio_transcription: session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
|
||||
instructions: str | NotGiven = NOT_GIVEN,
|
||||
max_response_output_tokens: Union[int, Literal["inf"]] | NotGiven = NOT_GIVEN,
|
||||
modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
|
||||
model: Literal[
|
||||
"gpt-4o-realtime-preview",
|
||||
"gpt-4o-realtime-preview-2024-10-01",
|
||||
"gpt-4o-realtime-preview-2024-12-17",
|
||||
"gpt-4o-mini-realtime-preview",
|
||||
"gpt-4o-mini-realtime-preview-2024-12-17",
|
||||
]
|
||||
| NotGiven = NOT_GIVEN,
|
||||
output_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
|
||||
temperature: float | NotGiven = NOT_GIVEN,
|
||||
tool_choice: str | NotGiven = NOT_GIVEN,
|
||||
tools: Iterable[session_create_params.Tool] | NotGiven = NOT_GIVEN,
|
||||
turn_detection: session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
|
||||
voice: Union[
|
||||
str, Literal["alloy", "ash", "ballad", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer", "verse"]
|
||||
]
|
||||
| NotGiven = NOT_GIVEN,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Headers | None = None,
|
||||
extra_query: Query | None = None,
|
||||
extra_body: Body | None = None,
|
||||
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||||
) -> SessionCreateResponse:
|
||||
"""
|
||||
Create an ephemeral API token for use in client-side applications with the
|
||||
Realtime API. Can be configured with the same session parameters as the
|
||||
`session.update` client event.
|
||||
|
||||
It responds with a session object, plus a `client_secret` key which contains a
|
||||
usable ephemeral API token that can be used to authenticate browser clients for
|
||||
the Realtime API.
|
||||
|
||||
Args:
|
||||
input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
|
||||
`pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
|
||||
(mono), and little-endian byte order.
|
||||
|
||||
input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
|
||||
off. Noise reduction filters audio added to the input audio buffer before it is
|
||||
sent to VAD and the model. Filtering the audio can improve VAD and turn
|
||||
detection accuracy (reducing false positives) and model performance by improving
|
||||
perception of the input audio.
|
||||
|
||||
input_audio_transcription: Configuration for input audio transcription, defaults to off and can be set to
|
||||
`null` to turn off once on. Input audio transcription is not native to the
|
||||
model, since the model consumes audio directly. Transcription runs
|
||||
asynchronously through
|
||||
[the /audio/transcriptions endpoint](https://platform.openai.com/docs/api-reference/audio/createTranscription)
|
||||
and should be treated as guidance of input audio content rather than precisely
|
||||
what the model heard. The client can optionally set the language and prompt for
|
||||
transcription, these offer additional guidance to the transcription service.
|
||||
|
||||
instructions: The default system instructions (i.e. system message) prepended to model calls.
|
||||
This field allows the client to guide the model on desired responses. The model
|
||||
can be instructed on response content and format, (e.g. "be extremely succinct",
|
||||
"act friendly", "here are examples of good responses") and on audio behavior
|
||||
(e.g. "talk quickly", "inject emotion into your voice", "laugh frequently"). The
|
||||
instructions are not guaranteed to be followed by the model, but they provide
|
||||
guidance to the model on the desired behavior.
|
||||
|
||||
Note that the server sets default instructions which will be used if this field
|
||||
is not set and are visible in the `session.created` event at the start of the
|
||||
session.
|
||||
|
||||
max_response_output_tokens: Maximum number of output tokens for a single assistant response, inclusive of
|
||||
tool calls. Provide an integer between 1 and 4096 to limit output tokens, or
|
||||
`inf` for the maximum available tokens for a given model. Defaults to `inf`.
|
||||
|
||||
modalities: The set of modalities the model can respond with. To disable audio, set this to
|
||||
["text"].
|
||||
|
||||
model: The Realtime model used for this session.
|
||||
|
||||
output_audio_format: The format of output audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`.
|
||||
For `pcm16`, output audio is sampled at a rate of 24kHz.
|
||||
|
||||
temperature: Sampling temperature for the model, limited to [0.6, 1.2]. For audio models a
|
||||
temperature of 0.8 is highly recommended for best performance.
|
||||
|
||||
tool_choice: How the model chooses tools. Options are `auto`, `none`, `required`, or specify
|
||||
a function.
|
||||
|
||||
tools: Tools (functions) available to the model.
|
||||
|
||||
turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
|
||||
set to `null` to turn off, in which case the client must manually trigger model
|
||||
response. Server VAD means that the model will detect the start and end of
|
||||
speech based on audio volume and respond at the end of user speech. Semantic VAD
|
||||
is more advanced and uses a turn detection model (in conjuction with VAD) to
|
||||
semantically estimate whether the user has finished speaking, then dynamically
|
||||
sets a timeout based on this probability. For example, if user audio trails off
|
||||
with "uhhm", the model will score a low probability of turn end and wait longer
|
||||
for the user to continue speaking. This can be useful for more natural
|
||||
conversations, but may have a higher latency.
|
||||
|
||||
voice: The voice the model uses to respond. Voice cannot be changed during the session
|
||||
once the model has responded with audio at least once. Current voice options are
|
||||
`alloy`, `ash`, `ballad`, `coral`, `echo`, `fable`, `onyx`, `nova`, `sage`,
|
||||
`shimmer`, and `verse`.
|
||||
|
||||
extra_headers: Send extra headers
|
||||
|
||||
extra_query: Add additional query parameters to the request
|
||||
|
||||
extra_body: Add additional JSON properties to the request
|
||||
|
||||
timeout: Override the client-level default timeout for this request, in seconds
|
||||
"""
|
||||
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||||
return await self._post(
|
||||
"/realtime/sessions",
|
||||
body=await async_maybe_transform(
|
||||
{
|
||||
"input_audio_format": input_audio_format,
|
||||
"input_audio_noise_reduction": input_audio_noise_reduction,
|
||||
"input_audio_transcription": input_audio_transcription,
|
||||
"instructions": instructions,
|
||||
"max_response_output_tokens": max_response_output_tokens,
|
||||
"modalities": modalities,
|
||||
"model": model,
|
||||
"output_audio_format": output_audio_format,
|
||||
"temperature": temperature,
|
||||
"tool_choice": tool_choice,
|
||||
"tools": tools,
|
||||
"turn_detection": turn_detection,
|
||||
"voice": voice,
|
||||
},
|
||||
session_create_params.SessionCreateParams,
|
||||
),
|
||||
options=make_request_options(
|
||||
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||||
),
|
||||
cast_to=SessionCreateResponse,
|
||||
)
|
||||
|
||||
|
||||
class SessionsWithRawResponse:
|
||||
def __init__(self, sessions: Sessions) -> None:
|
||||
self._sessions = sessions
|
||||
|
||||
self.create = _legacy_response.to_raw_response_wrapper(
|
||||
sessions.create,
|
||||
)
|
||||
|
||||
|
||||
class AsyncSessionsWithRawResponse:
|
||||
def __init__(self, sessions: AsyncSessions) -> None:
|
||||
self._sessions = sessions
|
||||
|
||||
self.create = _legacy_response.async_to_raw_response_wrapper(
|
||||
sessions.create,
|
||||
)
|
||||
|
||||
|
||||
class SessionsWithStreamingResponse:
|
||||
def __init__(self, sessions: Sessions) -> None:
|
||||
self._sessions = sessions
|
||||
|
||||
self.create = to_streamed_response_wrapper(
|
||||
sessions.create,
|
||||
)
|
||||
|
||||
|
||||
class AsyncSessionsWithStreamingResponse:
|
||||
def __init__(self, sessions: AsyncSessions) -> None:
|
||||
self._sessions = sessions
|
||||
|
||||
self.create = async_to_streamed_response_wrapper(
|
||||
sessions.create,
|
||||
)
|
||||
@@ -0,0 +1,274 @@
|
||||
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import List
|
||||
from typing_extensions import Literal
|
||||
|
||||
import httpx
|
||||
|
||||
from .... import _legacy_response
|
||||
from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
|
||||
from ...._utils import maybe_transform, async_maybe_transform
|
||||
from ...._compat import cached_property
|
||||
from ...._resource import SyncAPIResource, AsyncAPIResource
|
||||
from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
|
||||
from ...._base_client import make_request_options
|
||||
from ....types.beta.realtime import transcription_session_create_params
|
||||
from ....types.beta.realtime.transcription_session import TranscriptionSession
|
||||
|
||||
__all__ = ["TranscriptionSessions", "AsyncTranscriptionSessions"]
|
||||
|
||||
|
||||
class TranscriptionSessions(SyncAPIResource):
|
||||
@cached_property
|
||||
def with_raw_response(self) -> TranscriptionSessionsWithRawResponse:
|
||||
"""
|
||||
This property can be used as a prefix for any HTTP method call to return
|
||||
the raw response object instead of the parsed content.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
|
||||
"""
|
||||
return TranscriptionSessionsWithRawResponse(self)
|
||||
|
||||
@cached_property
|
||||
def with_streaming_response(self) -> TranscriptionSessionsWithStreamingResponse:
|
||||
"""
|
||||
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
|
||||
"""
|
||||
return TranscriptionSessionsWithStreamingResponse(self)
|
||||
|
||||
def create(
|
||||
self,
|
||||
*,
|
||||
include: List[str] | NotGiven = NOT_GIVEN,
|
||||
input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
|
||||
input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
|
||||
| NotGiven = NOT_GIVEN,
|
||||
input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
|
||||
modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
|
||||
turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Headers | None = None,
|
||||
extra_query: Query | None = None,
|
||||
extra_body: Body | None = None,
|
||||
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||||
) -> TranscriptionSession:
|
||||
"""
|
||||
Create an ephemeral API token for use in client-side applications with the
|
||||
Realtime API specifically for realtime transcriptions. Can be configured with
|
||||
the same session parameters as the `transcription_session.update` client event.
|
||||
|
||||
It responds with a session object, plus a `client_secret` key which contains a
|
||||
usable ephemeral API token that can be used to authenticate browser clients for
|
||||
the Realtime API.
|
||||
|
||||
Args:
|
||||
include:
|
||||
The set of items to include in the transcription. Current available items are:
|
||||
|
||||
- `item.input_audio_transcription.logprobs`
|
||||
|
||||
input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
|
||||
`pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
|
||||
(mono), and little-endian byte order.
|
||||
|
||||
input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
|
||||
off. Noise reduction filters audio added to the input audio buffer before it is
|
||||
sent to VAD and the model. Filtering the audio can improve VAD and turn
|
||||
detection accuracy (reducing false positives) and model performance by improving
|
||||
perception of the input audio.
|
||||
|
||||
input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
|
||||
language and prompt for transcription, these offer additional guidance to the
|
||||
transcription service.
|
||||
|
||||
modalities: The set of modalities the model can respond with. To disable audio, set this to
|
||||
["text"].
|
||||
|
||||
turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
|
||||
set to `null` to turn off, in which case the client must manually trigger model
|
||||
response. Server VAD means that the model will detect the start and end of
|
||||
speech based on audio volume and respond at the end of user speech. Semantic VAD
|
||||
is more advanced and uses a turn detection model (in conjuction with VAD) to
|
||||
semantically estimate whether the user has finished speaking, then dynamically
|
||||
sets a timeout based on this probability. For example, if user audio trails off
|
||||
with "uhhm", the model will score a low probability of turn end and wait longer
|
||||
for the user to continue speaking. This can be useful for more natural
|
||||
conversations, but may have a higher latency.
|
||||
|
||||
extra_headers: Send extra headers
|
||||
|
||||
extra_query: Add additional query parameters to the request
|
||||
|
||||
extra_body: Add additional JSON properties to the request
|
||||
|
||||
timeout: Override the client-level default timeout for this request, in seconds
|
||||
"""
|
||||
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||||
return self._post(
|
||||
"/realtime/transcription_sessions",
|
||||
body=maybe_transform(
|
||||
{
|
||||
"include": include,
|
||||
"input_audio_format": input_audio_format,
|
||||
"input_audio_noise_reduction": input_audio_noise_reduction,
|
||||
"input_audio_transcription": input_audio_transcription,
|
||||
"modalities": modalities,
|
||||
"turn_detection": turn_detection,
|
||||
},
|
||||
transcription_session_create_params.TranscriptionSessionCreateParams,
|
||||
),
|
||||
options=make_request_options(
|
||||
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||||
),
|
||||
cast_to=TranscriptionSession,
|
||||
)
|
||||
|
||||
|
||||
class AsyncTranscriptionSessions(AsyncAPIResource):
|
||||
@cached_property
|
||||
def with_raw_response(self) -> AsyncTranscriptionSessionsWithRawResponse:
|
||||
"""
|
||||
This property can be used as a prefix for any HTTP method call to return
|
||||
the raw response object instead of the parsed content.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
|
||||
"""
|
||||
return AsyncTranscriptionSessionsWithRawResponse(self)
|
||||
|
||||
@cached_property
|
||||
def with_streaming_response(self) -> AsyncTranscriptionSessionsWithStreamingResponse:
|
||||
"""
|
||||
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
|
||||
"""
|
||||
return AsyncTranscriptionSessionsWithStreamingResponse(self)
|
||||
|
||||
async def create(
|
||||
self,
|
||||
*,
|
||||
include: List[str] | NotGiven = NOT_GIVEN,
|
||||
input_audio_format: Literal["pcm16", "g711_ulaw", "g711_alaw"] | NotGiven = NOT_GIVEN,
|
||||
input_audio_noise_reduction: transcription_session_create_params.InputAudioNoiseReduction
|
||||
| NotGiven = NOT_GIVEN,
|
||||
input_audio_transcription: transcription_session_create_params.InputAudioTranscription | NotGiven = NOT_GIVEN,
|
||||
modalities: List[Literal["text", "audio"]] | NotGiven = NOT_GIVEN,
|
||||
turn_detection: transcription_session_create_params.TurnDetection | NotGiven = NOT_GIVEN,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Headers | None = None,
|
||||
extra_query: Query | None = None,
|
||||
extra_body: Body | None = None,
|
||||
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||||
) -> TranscriptionSession:
|
||||
"""
|
||||
Create an ephemeral API token for use in client-side applications with the
|
||||
Realtime API specifically for realtime transcriptions. Can be configured with
|
||||
the same session parameters as the `transcription_session.update` client event.
|
||||
|
||||
It responds with a session object, plus a `client_secret` key which contains a
|
||||
usable ephemeral API token that can be used to authenticate browser clients for
|
||||
the Realtime API.
|
||||
|
||||
Args:
|
||||
include:
|
||||
The set of items to include in the transcription. Current available items are:
|
||||
|
||||
- `item.input_audio_transcription.logprobs`
|
||||
|
||||
input_audio_format: The format of input audio. Options are `pcm16`, `g711_ulaw`, or `g711_alaw`. For
|
||||
`pcm16`, input audio must be 16-bit PCM at a 24kHz sample rate, single channel
|
||||
(mono), and little-endian byte order.
|
||||
|
||||
input_audio_noise_reduction: Configuration for input audio noise reduction. This can be set to `null` to turn
|
||||
off. Noise reduction filters audio added to the input audio buffer before it is
|
||||
sent to VAD and the model. Filtering the audio can improve VAD and turn
|
||||
detection accuracy (reducing false positives) and model performance by improving
|
||||
perception of the input audio.
|
||||
|
||||
input_audio_transcription: Configuration for input audio transcription. The client can optionally set the
|
||||
language and prompt for transcription, these offer additional guidance to the
|
||||
transcription service.
|
||||
|
||||
modalities: The set of modalities the model can respond with. To disable audio, set this to
|
||||
["text"].
|
||||
|
||||
turn_detection: Configuration for turn detection, ether Server VAD or Semantic VAD. This can be
|
||||
set to `null` to turn off, in which case the client must manually trigger model
|
||||
response. Server VAD means that the model will detect the start and end of
|
||||
speech based on audio volume and respond at the end of user speech. Semantic VAD
|
||||
is more advanced and uses a turn detection model (in conjuction with VAD) to
|
||||
semantically estimate whether the user has finished speaking, then dynamically
|
||||
sets a timeout based on this probability. For example, if user audio trails off
|
||||
with "uhhm", the model will score a low probability of turn end and wait longer
|
||||
for the user to continue speaking. This can be useful for more natural
|
||||
conversations, but may have a higher latency.
|
||||
|
||||
extra_headers: Send extra headers
|
||||
|
||||
extra_query: Add additional query parameters to the request
|
||||
|
||||
extra_body: Add additional JSON properties to the request
|
||||
|
||||
timeout: Override the client-level default timeout for this request, in seconds
|
||||
"""
|
||||
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||||
return await self._post(
|
||||
"/realtime/transcription_sessions",
|
||||
body=await async_maybe_transform(
|
||||
{
|
||||
"include": include,
|
||||
"input_audio_format": input_audio_format,
|
||||
"input_audio_noise_reduction": input_audio_noise_reduction,
|
||||
"input_audio_transcription": input_audio_transcription,
|
||||
"modalities": modalities,
|
||||
"turn_detection": turn_detection,
|
||||
},
|
||||
transcription_session_create_params.TranscriptionSessionCreateParams,
|
||||
),
|
||||
options=make_request_options(
|
||||
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||||
),
|
||||
cast_to=TranscriptionSession,
|
||||
)
|
||||
|
||||
|
||||
class TranscriptionSessionsWithRawResponse:
|
||||
def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
|
||||
self._transcription_sessions = transcription_sessions
|
||||
|
||||
self.create = _legacy_response.to_raw_response_wrapper(
|
||||
transcription_sessions.create,
|
||||
)
|
||||
|
||||
|
||||
class AsyncTranscriptionSessionsWithRawResponse:
|
||||
def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
|
||||
self._transcription_sessions = transcription_sessions
|
||||
|
||||
self.create = _legacy_response.async_to_raw_response_wrapper(
|
||||
transcription_sessions.create,
|
||||
)
|
||||
|
||||
|
||||
class TranscriptionSessionsWithStreamingResponse:
|
||||
def __init__(self, transcription_sessions: TranscriptionSessions) -> None:
|
||||
self._transcription_sessions = transcription_sessions
|
||||
|
||||
self.create = to_streamed_response_wrapper(
|
||||
transcription_sessions.create,
|
||||
)
|
||||
|
||||
|
||||
class AsyncTranscriptionSessionsWithStreamingResponse:
|
||||
def __init__(self, transcription_sessions: AsyncTranscriptionSessions) -> None:
|
||||
self._transcription_sessions = transcription_sessions
|
||||
|
||||
self.create = async_to_streamed_response_wrapper(
|
||||
transcription_sessions.create,
|
||||
)
|
||||
Reference in New Issue
Block a user