jo
This commit is contained in:
@@ -0,0 +1,634 @@
|
||||
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Dict, List, Type, Union, Iterable, Optional, cast
|
||||
from functools import partial
|
||||
from typing_extensions import Literal
|
||||
|
||||
import httpx
|
||||
|
||||
from .... import _legacy_response
|
||||
from ...._types import NOT_GIVEN, Body, Query, Headers, NotGiven
|
||||
from ...._utils import maybe_transform, async_maybe_transform
|
||||
from ...._compat import cached_property
|
||||
from ...._resource import SyncAPIResource, AsyncAPIResource
|
||||
from ...._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
|
||||
from ...._streaming import Stream
|
||||
from ....types.chat import completion_create_params
|
||||
from ...._base_client import make_request_options
|
||||
from ....lib._parsing import (
|
||||
ResponseFormatT,
|
||||
validate_input_tools as _validate_input_tools,
|
||||
parse_chat_completion as _parse_chat_completion,
|
||||
type_to_response_format_param as _type_to_response_format,
|
||||
)
|
||||
from ....types.chat_model import ChatModel
|
||||
from ....lib.streaming.chat import ChatCompletionStreamManager, AsyncChatCompletionStreamManager
|
||||
from ....types.shared_params import Metadata, ReasoningEffort
|
||||
from ....types.chat.chat_completion import ChatCompletion
|
||||
from ....types.chat.chat_completion_chunk import ChatCompletionChunk
|
||||
from ....types.chat.parsed_chat_completion import ParsedChatCompletion
|
||||
from ....types.chat.chat_completion_tool_param import ChatCompletionToolParam
|
||||
from ....types.chat.chat_completion_audio_param import ChatCompletionAudioParam
|
||||
from ....types.chat.chat_completion_message_param import ChatCompletionMessageParam
|
||||
from ....types.chat.chat_completion_stream_options_param import ChatCompletionStreamOptionsParam
|
||||
from ....types.chat.chat_completion_prediction_content_param import ChatCompletionPredictionContentParam
|
||||
from ....types.chat.chat_completion_tool_choice_option_param import ChatCompletionToolChoiceOptionParam
|
||||
|
||||
__all__ = ["Completions", "AsyncCompletions"]
|
||||
|
||||
|
||||
class Completions(SyncAPIResource):
|
||||
@cached_property
|
||||
def with_raw_response(self) -> CompletionsWithRawResponse:
|
||||
"""
|
||||
This property can be used as a prefix for any HTTP method call to return the
|
||||
the raw response object instead of the parsed content.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
|
||||
"""
|
||||
return CompletionsWithRawResponse(self)
|
||||
|
||||
@cached_property
|
||||
def with_streaming_response(self) -> CompletionsWithStreamingResponse:
|
||||
"""
|
||||
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
|
||||
"""
|
||||
return CompletionsWithStreamingResponse(self)
|
||||
|
||||
def parse(
|
||||
self,
|
||||
*,
|
||||
messages: Iterable[ChatCompletionMessageParam],
|
||||
model: Union[str, ChatModel],
|
||||
audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
|
||||
response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
|
||||
frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
|
||||
functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
|
||||
logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
|
||||
logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
|
||||
max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
|
||||
modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
|
||||
n: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
|
||||
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
|
||||
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
|
||||
seed: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
|
||||
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
|
||||
store: Optional[bool] | NotGiven = NOT_GIVEN,
|
||||
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
|
||||
temperature: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
|
||||
tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
|
||||
top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
top_p: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
user: str | NotGiven = NOT_GIVEN,
|
||||
web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Headers | None = None,
|
||||
extra_query: Query | None = None,
|
||||
extra_body: Body | None = None,
|
||||
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||||
) -> ParsedChatCompletion[ResponseFormatT]:
|
||||
"""Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
|
||||
& returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
|
||||
|
||||
You can pass a pydantic model to this method and it will automatically convert the model
|
||||
into a JSON schema, send it to the API and parse the response content back into the given model.
|
||||
|
||||
This method will also automatically parse `function` tool calls if:
|
||||
- You use the `openai.pydantic_function_tool()` helper method
|
||||
- You mark your tool schema with `"strict": True`
|
||||
|
||||
Example usage:
|
||||
```py
|
||||
from pydantic import BaseModel
|
||||
from openai import OpenAI
|
||||
|
||||
|
||||
class Step(BaseModel):
|
||||
explanation: str
|
||||
output: str
|
||||
|
||||
|
||||
class MathResponse(BaseModel):
|
||||
steps: List[Step]
|
||||
final_answer: str
|
||||
|
||||
|
||||
client = OpenAI()
|
||||
completion = client.beta.chat.completions.parse(
|
||||
model="gpt-4o-2024-08-06",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful math tutor."},
|
||||
{"role": "user", "content": "solve 8x + 31 = 2"},
|
||||
],
|
||||
response_format=MathResponse,
|
||||
)
|
||||
|
||||
message = completion.choices[0].message
|
||||
if message.parsed:
|
||||
print(message.parsed.steps)
|
||||
print("answer: ", message.parsed.final_answer)
|
||||
```
|
||||
"""
|
||||
_validate_input_tools(tools)
|
||||
|
||||
extra_headers = {
|
||||
"X-Stainless-Helper-Method": "beta.chat.completions.parse",
|
||||
**(extra_headers or {}),
|
||||
}
|
||||
|
||||
def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
|
||||
return _parse_chat_completion(
|
||||
response_format=response_format,
|
||||
chat_completion=raw_completion,
|
||||
input_tools=tools,
|
||||
)
|
||||
|
||||
return self._post(
|
||||
"/chat/completions",
|
||||
body=maybe_transform(
|
||||
{
|
||||
"messages": messages,
|
||||
"model": model,
|
||||
"audio": audio,
|
||||
"frequency_penalty": frequency_penalty,
|
||||
"function_call": function_call,
|
||||
"functions": functions,
|
||||
"logit_bias": logit_bias,
|
||||
"logprobs": logprobs,
|
||||
"max_completion_tokens": max_completion_tokens,
|
||||
"max_tokens": max_tokens,
|
||||
"metadata": metadata,
|
||||
"modalities": modalities,
|
||||
"n": n,
|
||||
"parallel_tool_calls": parallel_tool_calls,
|
||||
"prediction": prediction,
|
||||
"presence_penalty": presence_penalty,
|
||||
"reasoning_effort": reasoning_effort,
|
||||
"response_format": _type_to_response_format(response_format),
|
||||
"seed": seed,
|
||||
"service_tier": service_tier,
|
||||
"stop": stop,
|
||||
"store": store,
|
||||
"stream": False,
|
||||
"stream_options": stream_options,
|
||||
"temperature": temperature,
|
||||
"tool_choice": tool_choice,
|
||||
"tools": tools,
|
||||
"top_logprobs": top_logprobs,
|
||||
"top_p": top_p,
|
||||
"user": user,
|
||||
"web_search_options": web_search_options,
|
||||
},
|
||||
completion_create_params.CompletionCreateParams,
|
||||
),
|
||||
options=make_request_options(
|
||||
extra_headers=extra_headers,
|
||||
extra_query=extra_query,
|
||||
extra_body=extra_body,
|
||||
timeout=timeout,
|
||||
post_parser=parser,
|
||||
),
|
||||
# we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
|
||||
# in the `parser` function above
|
||||
cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
|
||||
stream=False,
|
||||
)
|
||||
|
||||
def stream(
|
||||
self,
|
||||
*,
|
||||
messages: Iterable[ChatCompletionMessageParam],
|
||||
model: Union[str, ChatModel],
|
||||
audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
|
||||
response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
|
||||
frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
|
||||
functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
|
||||
logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
|
||||
logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
|
||||
max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
|
||||
modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
|
||||
n: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
|
||||
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
|
||||
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
|
||||
seed: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
|
||||
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
|
||||
store: Optional[bool] | NotGiven = NOT_GIVEN,
|
||||
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
|
||||
temperature: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
|
||||
tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
|
||||
top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
top_p: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
user: str | NotGiven = NOT_GIVEN,
|
||||
web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Headers | None = None,
|
||||
extra_query: Query | None = None,
|
||||
extra_body: Body | None = None,
|
||||
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||||
) -> ChatCompletionStreamManager[ResponseFormatT]:
|
||||
"""Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
|
||||
and automatic accumulation of each delta.
|
||||
|
||||
This also supports all of the parsing utilities that `.parse()` does.
|
||||
|
||||
Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
|
||||
|
||||
```py
|
||||
with client.beta.chat.completions.stream(
|
||||
model="gpt-4o-2024-08-06",
|
||||
messages=[...],
|
||||
) as stream:
|
||||
for event in stream:
|
||||
if event.type == "content.delta":
|
||||
print(event.delta, flush=True, end="")
|
||||
```
|
||||
|
||||
When the context manager is entered, a `ChatCompletionStream` instance is returned which, like `.create(stream=True)` is an iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
|
||||
|
||||
When the context manager exits, the response will be closed, however the `stream` instance is still available outside
|
||||
the context manager.
|
||||
"""
|
||||
extra_headers = {
|
||||
"X-Stainless-Helper-Method": "beta.chat.completions.stream",
|
||||
**(extra_headers or {}),
|
||||
}
|
||||
|
||||
api_request: partial[Stream[ChatCompletionChunk]] = partial(
|
||||
self._client.chat.completions.create,
|
||||
messages=messages,
|
||||
model=model,
|
||||
audio=audio,
|
||||
stream=True,
|
||||
response_format=_type_to_response_format(response_format),
|
||||
frequency_penalty=frequency_penalty,
|
||||
function_call=function_call,
|
||||
functions=functions,
|
||||
logit_bias=logit_bias,
|
||||
logprobs=logprobs,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
max_tokens=max_tokens,
|
||||
metadata=metadata,
|
||||
modalities=modalities,
|
||||
n=n,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
prediction=prediction,
|
||||
presence_penalty=presence_penalty,
|
||||
reasoning_effort=reasoning_effort,
|
||||
seed=seed,
|
||||
service_tier=service_tier,
|
||||
store=store,
|
||||
stop=stop,
|
||||
stream_options=stream_options,
|
||||
temperature=temperature,
|
||||
tool_choice=tool_choice,
|
||||
tools=tools,
|
||||
top_logprobs=top_logprobs,
|
||||
top_p=top_p,
|
||||
user=user,
|
||||
web_search_options=web_search_options,
|
||||
extra_headers=extra_headers,
|
||||
extra_query=extra_query,
|
||||
extra_body=extra_body,
|
||||
timeout=timeout,
|
||||
)
|
||||
return ChatCompletionStreamManager(
|
||||
api_request,
|
||||
response_format=response_format,
|
||||
input_tools=tools,
|
||||
)
|
||||
|
||||
|
||||
class AsyncCompletions(AsyncAPIResource):
|
||||
@cached_property
|
||||
def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
|
||||
"""
|
||||
This property can be used as a prefix for any HTTP method call to return the
|
||||
the raw response object instead of the parsed content.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#accessing-raw-response-data-eg-headers
|
||||
"""
|
||||
return AsyncCompletionsWithRawResponse(self)
|
||||
|
||||
@cached_property
|
||||
def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
|
||||
"""
|
||||
An alternative to `.with_raw_response` that doesn't eagerly read the response body.
|
||||
|
||||
For more information, see https://www.github.com/openai/openai-python#with_streaming_response
|
||||
"""
|
||||
return AsyncCompletionsWithStreamingResponse(self)
|
||||
|
||||
async def parse(
|
||||
self,
|
||||
*,
|
||||
messages: Iterable[ChatCompletionMessageParam],
|
||||
model: Union[str, ChatModel],
|
||||
audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
|
||||
response_format: type[ResponseFormatT] | NotGiven = NOT_GIVEN,
|
||||
frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
|
||||
functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
|
||||
logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
|
||||
logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
|
||||
max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
|
||||
modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
|
||||
n: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
|
||||
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
|
||||
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
|
||||
seed: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
|
||||
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
|
||||
store: Optional[bool] | NotGiven = NOT_GIVEN,
|
||||
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
|
||||
temperature: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
|
||||
tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
|
||||
top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
top_p: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
user: str | NotGiven = NOT_GIVEN,
|
||||
web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Headers | None = None,
|
||||
extra_query: Query | None = None,
|
||||
extra_body: Body | None = None,
|
||||
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||||
) -> ParsedChatCompletion[ResponseFormatT]:
|
||||
"""Wrapper over the `client.chat.completions.create()` method that provides richer integrations with Python specific types
|
||||
& returns a `ParsedChatCompletion` object, which is a subclass of the standard `ChatCompletion` class.
|
||||
|
||||
You can pass a pydantic model to this method and it will automatically convert the model
|
||||
into a JSON schema, send it to the API and parse the response content back into the given model.
|
||||
|
||||
This method will also automatically parse `function` tool calls if:
|
||||
- You use the `openai.pydantic_function_tool()` helper method
|
||||
- You mark your tool schema with `"strict": True`
|
||||
|
||||
Example usage:
|
||||
```py
|
||||
from pydantic import BaseModel
|
||||
from openai import AsyncOpenAI
|
||||
|
||||
|
||||
class Step(BaseModel):
|
||||
explanation: str
|
||||
output: str
|
||||
|
||||
|
||||
class MathResponse(BaseModel):
|
||||
steps: List[Step]
|
||||
final_answer: str
|
||||
|
||||
|
||||
client = AsyncOpenAI()
|
||||
completion = await client.beta.chat.completions.parse(
|
||||
model="gpt-4o-2024-08-06",
|
||||
messages=[
|
||||
{"role": "system", "content": "You are a helpful math tutor."},
|
||||
{"role": "user", "content": "solve 8x + 31 = 2"},
|
||||
],
|
||||
response_format=MathResponse,
|
||||
)
|
||||
|
||||
message = completion.choices[0].message
|
||||
if message.parsed:
|
||||
print(message.parsed.steps)
|
||||
print("answer: ", message.parsed.final_answer)
|
||||
```
|
||||
"""
|
||||
_validate_input_tools(tools)
|
||||
|
||||
extra_headers = {
|
||||
"X-Stainless-Helper-Method": "beta.chat.completions.parse",
|
||||
**(extra_headers or {}),
|
||||
}
|
||||
|
||||
def parser(raw_completion: ChatCompletion) -> ParsedChatCompletion[ResponseFormatT]:
|
||||
return _parse_chat_completion(
|
||||
response_format=response_format,
|
||||
chat_completion=raw_completion,
|
||||
input_tools=tools,
|
||||
)
|
||||
|
||||
return await self._post(
|
||||
"/chat/completions",
|
||||
body=await async_maybe_transform(
|
||||
{
|
||||
"messages": messages,
|
||||
"model": model,
|
||||
"audio": audio,
|
||||
"frequency_penalty": frequency_penalty,
|
||||
"function_call": function_call,
|
||||
"functions": functions,
|
||||
"logit_bias": logit_bias,
|
||||
"logprobs": logprobs,
|
||||
"max_completion_tokens": max_completion_tokens,
|
||||
"max_tokens": max_tokens,
|
||||
"metadata": metadata,
|
||||
"modalities": modalities,
|
||||
"n": n,
|
||||
"parallel_tool_calls": parallel_tool_calls,
|
||||
"prediction": prediction,
|
||||
"presence_penalty": presence_penalty,
|
||||
"reasoning_effort": reasoning_effort,
|
||||
"response_format": _type_to_response_format(response_format),
|
||||
"seed": seed,
|
||||
"service_tier": service_tier,
|
||||
"store": store,
|
||||
"stop": stop,
|
||||
"stream": False,
|
||||
"stream_options": stream_options,
|
||||
"temperature": temperature,
|
||||
"tool_choice": tool_choice,
|
||||
"tools": tools,
|
||||
"top_logprobs": top_logprobs,
|
||||
"top_p": top_p,
|
||||
"user": user,
|
||||
"web_search_options": web_search_options,
|
||||
},
|
||||
completion_create_params.CompletionCreateParams,
|
||||
),
|
||||
options=make_request_options(
|
||||
extra_headers=extra_headers,
|
||||
extra_query=extra_query,
|
||||
extra_body=extra_body,
|
||||
timeout=timeout,
|
||||
post_parser=parser,
|
||||
),
|
||||
# we turn the `ChatCompletion` instance into a `ParsedChatCompletion`
|
||||
# in the `parser` function above
|
||||
cast_to=cast(Type[ParsedChatCompletion[ResponseFormatT]], ChatCompletion),
|
||||
stream=False,
|
||||
)
|
||||
|
||||
def stream(
|
||||
self,
|
||||
*,
|
||||
messages: Iterable[ChatCompletionMessageParam],
|
||||
model: Union[str, ChatModel],
|
||||
audio: Optional[ChatCompletionAudioParam] | NotGiven = NOT_GIVEN,
|
||||
response_format: completion_create_params.ResponseFormat | type[ResponseFormatT] | NotGiven = NOT_GIVEN,
|
||||
frequency_penalty: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
function_call: completion_create_params.FunctionCall | NotGiven = NOT_GIVEN,
|
||||
functions: Iterable[completion_create_params.Function] | NotGiven = NOT_GIVEN,
|
||||
logit_bias: Optional[Dict[str, int]] | NotGiven = NOT_GIVEN,
|
||||
logprobs: Optional[bool] | NotGiven = NOT_GIVEN,
|
||||
max_completion_tokens: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
max_tokens: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
metadata: Optional[Metadata] | NotGiven = NOT_GIVEN,
|
||||
modalities: Optional[List[Literal["text", "audio"]]] | NotGiven = NOT_GIVEN,
|
||||
n: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
parallel_tool_calls: bool | NotGiven = NOT_GIVEN,
|
||||
prediction: Optional[ChatCompletionPredictionContentParam] | NotGiven = NOT_GIVEN,
|
||||
presence_penalty: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
reasoning_effort: Optional[ReasoningEffort] | NotGiven = NOT_GIVEN,
|
||||
seed: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
service_tier: Optional[Literal["auto", "default", "flex"]] | NotGiven = NOT_GIVEN,
|
||||
stop: Union[Optional[str], List[str], None] | NotGiven = NOT_GIVEN,
|
||||
store: Optional[bool] | NotGiven = NOT_GIVEN,
|
||||
stream_options: Optional[ChatCompletionStreamOptionsParam] | NotGiven = NOT_GIVEN,
|
||||
temperature: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
tool_choice: ChatCompletionToolChoiceOptionParam | NotGiven = NOT_GIVEN,
|
||||
tools: Iterable[ChatCompletionToolParam] | NotGiven = NOT_GIVEN,
|
||||
top_logprobs: Optional[int] | NotGiven = NOT_GIVEN,
|
||||
top_p: Optional[float] | NotGiven = NOT_GIVEN,
|
||||
user: str | NotGiven = NOT_GIVEN,
|
||||
web_search_options: completion_create_params.WebSearchOptions | NotGiven = NOT_GIVEN,
|
||||
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||||
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||||
extra_headers: Headers | None = None,
|
||||
extra_query: Query | None = None,
|
||||
extra_body: Body | None = None,
|
||||
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||||
) -> AsyncChatCompletionStreamManager[ResponseFormatT]:
|
||||
"""Wrapper over the `client.chat.completions.create(stream=True)` method that provides a more granular event API
|
||||
and automatic accumulation of each delta.
|
||||
|
||||
This also supports all of the parsing utilities that `.parse()` does.
|
||||
|
||||
Unlike `.create(stream=True)`, the `.stream()` method requires usage within a context manager to prevent accidental leakage of the response:
|
||||
|
||||
```py
|
||||
async with client.beta.chat.completions.stream(
|
||||
model="gpt-4o-2024-08-06",
|
||||
messages=[...],
|
||||
) as stream:
|
||||
async for event in stream:
|
||||
if event.type == "content.delta":
|
||||
print(event.delta, flush=True, end="")
|
||||
```
|
||||
|
||||
When the context manager is entered, an `AsyncChatCompletionStream` instance is returned which, like `.create(stream=True)` is an async iterator. The full list of events that are yielded by the iterator are outlined in [these docs](https://github.com/openai/openai-python/blob/main/helpers.md#chat-completions-events).
|
||||
|
||||
When the context manager exits, the response will be closed, however the `stream` instance is still available outside
|
||||
the context manager.
|
||||
"""
|
||||
_validate_input_tools(tools)
|
||||
|
||||
extra_headers = {
|
||||
"X-Stainless-Helper-Method": "beta.chat.completions.stream",
|
||||
**(extra_headers or {}),
|
||||
}
|
||||
|
||||
api_request = self._client.chat.completions.create(
|
||||
messages=messages,
|
||||
model=model,
|
||||
audio=audio,
|
||||
stream=True,
|
||||
response_format=_type_to_response_format(response_format),
|
||||
frequency_penalty=frequency_penalty,
|
||||
function_call=function_call,
|
||||
functions=functions,
|
||||
logit_bias=logit_bias,
|
||||
logprobs=logprobs,
|
||||
max_completion_tokens=max_completion_tokens,
|
||||
max_tokens=max_tokens,
|
||||
metadata=metadata,
|
||||
modalities=modalities,
|
||||
n=n,
|
||||
parallel_tool_calls=parallel_tool_calls,
|
||||
prediction=prediction,
|
||||
presence_penalty=presence_penalty,
|
||||
reasoning_effort=reasoning_effort,
|
||||
seed=seed,
|
||||
service_tier=service_tier,
|
||||
stop=stop,
|
||||
store=store,
|
||||
stream_options=stream_options,
|
||||
temperature=temperature,
|
||||
tool_choice=tool_choice,
|
||||
tools=tools,
|
||||
top_logprobs=top_logprobs,
|
||||
top_p=top_p,
|
||||
user=user,
|
||||
extra_headers=extra_headers,
|
||||
extra_query=extra_query,
|
||||
extra_body=extra_body,
|
||||
timeout=timeout,
|
||||
web_search_options=web_search_options,
|
||||
)
|
||||
return AsyncChatCompletionStreamManager(
|
||||
api_request,
|
||||
response_format=response_format,
|
||||
input_tools=tools,
|
||||
)
|
||||
|
||||
|
||||
class CompletionsWithRawResponse:
|
||||
def __init__(self, completions: Completions) -> None:
|
||||
self._completions = completions
|
||||
|
||||
self.parse = _legacy_response.to_raw_response_wrapper(
|
||||
completions.parse,
|
||||
)
|
||||
|
||||
|
||||
class AsyncCompletionsWithRawResponse:
|
||||
def __init__(self, completions: AsyncCompletions) -> None:
|
||||
self._completions = completions
|
||||
|
||||
self.parse = _legacy_response.async_to_raw_response_wrapper(
|
||||
completions.parse,
|
||||
)
|
||||
|
||||
|
||||
class CompletionsWithStreamingResponse:
|
||||
def __init__(self, completions: Completions) -> None:
|
||||
self._completions = completions
|
||||
|
||||
self.parse = to_streamed_response_wrapper(
|
||||
completions.parse,
|
||||
)
|
||||
|
||||
|
||||
class AsyncCompletionsWithStreamingResponse:
|
||||
def __init__(self, completions: AsyncCompletions) -> None:
|
||||
self._completions = completions
|
||||
|
||||
self.parse = async_to_streamed_response_wrapper(
|
||||
completions.parse,
|
||||
)
|
||||
Reference in New Issue
Block a user