889 lines
38 KiB
Python
889 lines
38 KiB
Python
|
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
||
|
|
||
|
from __future__ import annotations
|
||
|
|
||
|
from typing import Union, Iterable, Optional
|
||
|
from typing_extensions import Literal
|
||
|
|
||
|
import httpx
|
||
|
|
||
|
from ... import _legacy_response
|
||
|
from ..._types import NOT_GIVEN, Body, Query, Headers, NotGiven
|
||
|
from ..._utils import (
|
||
|
maybe_transform,
|
||
|
async_maybe_transform,
|
||
|
)
|
||
|
from ..._compat import cached_property
|
||
|
from ..._resource import SyncAPIResource, AsyncAPIResource
|
||
|
from ..._response import to_streamed_response_wrapper, async_to_streamed_response_wrapper
|
||
|
from ...pagination import SyncCursorPage, AsyncCursorPage
|
||
|
from ...types.beta import (
|
||
|
assistant_list_params,
|
||
|
assistant_create_params,
|
||
|
assistant_update_params,
|
||
|
)
|
||
|
from ..._base_client import (
|
||
|
AsyncPaginator,
|
||
|
make_request_options,
|
||
|
)
|
||
|
from ...types.beta.assistant import Assistant
|
||
|
from ...types.beta.assistant_deleted import AssistantDeleted
|
||
|
from ...types.beta.assistant_tool_param import AssistantToolParam
|
||
|
from ...types.beta.assistant_response_format_option_param import AssistantResponseFormatOptionParam
|
||
|
|
||
|
__all__ = ["Assistants", "AsyncAssistants"]
|
||
|
|
||
|
|
||
|
class Assistants(SyncAPIResource):
|
||
|
@cached_property
|
||
|
def with_raw_response(self) -> AssistantsWithRawResponse:
|
||
|
return AssistantsWithRawResponse(self)
|
||
|
|
||
|
@cached_property
|
||
|
def with_streaming_response(self) -> AssistantsWithStreamingResponse:
|
||
|
return AssistantsWithStreamingResponse(self)
|
||
|
|
||
|
def create(
|
||
|
self,
|
||
|
*,
|
||
|
model: Union[
|
||
|
str,
|
||
|
Literal[
|
||
|
"gpt-4-turbo",
|
||
|
"gpt-4-turbo-2024-04-09",
|
||
|
"gpt-4-0125-preview",
|
||
|
"gpt-4-turbo-preview",
|
||
|
"gpt-4-1106-preview",
|
||
|
"gpt-4-vision-preview",
|
||
|
"gpt-4",
|
||
|
"gpt-4-0314",
|
||
|
"gpt-4-0613",
|
||
|
"gpt-4-32k",
|
||
|
"gpt-4-32k-0314",
|
||
|
"gpt-4-32k-0613",
|
||
|
"gpt-3.5-turbo",
|
||
|
"gpt-3.5-turbo-16k",
|
||
|
"gpt-3.5-turbo-0613",
|
||
|
"gpt-3.5-turbo-1106",
|
||
|
"gpt-3.5-turbo-0125",
|
||
|
"gpt-3.5-turbo-16k-0613",
|
||
|
],
|
||
|
],
|
||
|
description: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
instructions: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
metadata: Optional[object] | NotGiven = NOT_GIVEN,
|
||
|
name: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
|
||
|
temperature: Optional[float] | NotGiven = NOT_GIVEN,
|
||
|
tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
|
||
|
tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
|
||
|
top_p: Optional[float] | NotGiven = NOT_GIVEN,
|
||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||
|
extra_headers: Headers | None = None,
|
||
|
extra_query: Query | None = None,
|
||
|
extra_body: Body | None = None,
|
||
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||
|
) -> Assistant:
|
||
|
"""
|
||
|
Create an assistant with a model and instructions.
|
||
|
|
||
|
Args:
|
||
|
model: ID of the model to use. You can use the
|
||
|
[List models](https://platform.openai.com/docs/api-reference/models/list) API to
|
||
|
see all of your available models, or see our
|
||
|
[Model overview](https://platform.openai.com/docs/models/overview) for
|
||
|
descriptions of them.
|
||
|
|
||
|
description: The description of the assistant. The maximum length is 512 characters.
|
||
|
|
||
|
instructions: The system instructions that the assistant uses. The maximum length is 256,000
|
||
|
characters.
|
||
|
|
||
|
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
|
||
|
for storing additional information about the object in a structured format. Keys
|
||
|
can be a maximum of 64 characters long and values can be a maxium of 512
|
||
|
characters long.
|
||
|
|
||
|
name: The name of the assistant. The maximum length is 256 characters.
|
||
|
|
||
|
response_format: Specifies the format that the model must output. Compatible with
|
||
|
[GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
|
||
|
all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
|
||
|
|
||
|
Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
|
||
|
message the model generates is valid JSON.
|
||
|
|
||
|
**Important:** when using JSON mode, you **must** also instruct the model to
|
||
|
produce JSON yourself via a system or user message. Without this, the model may
|
||
|
generate an unending stream of whitespace until the generation reaches the token
|
||
|
limit, resulting in a long-running and seemingly "stuck" request. Also note that
|
||
|
the message content may be partially cut off if `finish_reason="length"`, which
|
||
|
indicates the generation exceeded `max_tokens` or the conversation exceeded the
|
||
|
max context length.
|
||
|
|
||
|
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
||
|
make the output more random, while lower values like 0.2 will make it more
|
||
|
focused and deterministic.
|
||
|
|
||
|
tool_resources: A set of resources that are used by the assistant's tools. The resources are
|
||
|
specific to the type of tool. For example, the `code_interpreter` tool requires
|
||
|
a list of file IDs, while the `file_search` tool requires a list of vector store
|
||
|
IDs.
|
||
|
|
||
|
tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
|
||
|
assistant. Tools can be of types `code_interpreter`, `file_search`, or
|
||
|
`function`.
|
||
|
|
||
|
top_p: An alternative to sampling with temperature, called nucleus sampling, where the
|
||
|
model considers the results of the tokens with top_p probability mass. So 0.1
|
||
|
means only the tokens comprising the top 10% probability mass are considered.
|
||
|
|
||
|
We generally recommend altering this or temperature but not both.
|
||
|
|
||
|
extra_headers: Send extra headers
|
||
|
|
||
|
extra_query: Add additional query parameters to the request
|
||
|
|
||
|
extra_body: Add additional JSON properties to the request
|
||
|
|
||
|
timeout: Override the client-level default timeout for this request, in seconds
|
||
|
"""
|
||
|
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||
|
return self._post(
|
||
|
"/assistants",
|
||
|
body=maybe_transform(
|
||
|
{
|
||
|
"model": model,
|
||
|
"description": description,
|
||
|
"instructions": instructions,
|
||
|
"metadata": metadata,
|
||
|
"name": name,
|
||
|
"response_format": response_format,
|
||
|
"temperature": temperature,
|
||
|
"tool_resources": tool_resources,
|
||
|
"tools": tools,
|
||
|
"top_p": top_p,
|
||
|
},
|
||
|
assistant_create_params.AssistantCreateParams,
|
||
|
),
|
||
|
options=make_request_options(
|
||
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||
|
),
|
||
|
cast_to=Assistant,
|
||
|
)
|
||
|
|
||
|
def retrieve(
|
||
|
self,
|
||
|
assistant_id: str,
|
||
|
*,
|
||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||
|
extra_headers: Headers | None = None,
|
||
|
extra_query: Query | None = None,
|
||
|
extra_body: Body | None = None,
|
||
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||
|
) -> Assistant:
|
||
|
"""
|
||
|
Retrieves an assistant.
|
||
|
|
||
|
Args:
|
||
|
extra_headers: Send extra headers
|
||
|
|
||
|
extra_query: Add additional query parameters to the request
|
||
|
|
||
|
extra_body: Add additional JSON properties to the request
|
||
|
|
||
|
timeout: Override the client-level default timeout for this request, in seconds
|
||
|
"""
|
||
|
if not assistant_id:
|
||
|
raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
|
||
|
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||
|
return self._get(
|
||
|
f"/assistants/{assistant_id}",
|
||
|
options=make_request_options(
|
||
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||
|
),
|
||
|
cast_to=Assistant,
|
||
|
)
|
||
|
|
||
|
def update(
|
||
|
self,
|
||
|
assistant_id: str,
|
||
|
*,
|
||
|
description: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
instructions: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
metadata: Optional[object] | NotGiven = NOT_GIVEN,
|
||
|
model: str | NotGiven = NOT_GIVEN,
|
||
|
name: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
|
||
|
temperature: Optional[float] | NotGiven = NOT_GIVEN,
|
||
|
tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
|
||
|
tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
|
||
|
top_p: Optional[float] | NotGiven = NOT_GIVEN,
|
||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||
|
extra_headers: Headers | None = None,
|
||
|
extra_query: Query | None = None,
|
||
|
extra_body: Body | None = None,
|
||
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||
|
) -> Assistant:
|
||
|
"""Modifies an assistant.
|
||
|
|
||
|
Args:
|
||
|
description: The description of the assistant.
|
||
|
|
||
|
The maximum length is 512 characters.
|
||
|
|
||
|
instructions: The system instructions that the assistant uses. The maximum length is 256,000
|
||
|
characters.
|
||
|
|
||
|
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
|
||
|
for storing additional information about the object in a structured format. Keys
|
||
|
can be a maximum of 64 characters long and values can be a maxium of 512
|
||
|
characters long.
|
||
|
|
||
|
model: ID of the model to use. You can use the
|
||
|
[List models](https://platform.openai.com/docs/api-reference/models/list) API to
|
||
|
see all of your available models, or see our
|
||
|
[Model overview](https://platform.openai.com/docs/models/overview) for
|
||
|
descriptions of them.
|
||
|
|
||
|
name: The name of the assistant. The maximum length is 256 characters.
|
||
|
|
||
|
response_format: Specifies the format that the model must output. Compatible with
|
||
|
[GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
|
||
|
all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
|
||
|
|
||
|
Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
|
||
|
message the model generates is valid JSON.
|
||
|
|
||
|
**Important:** when using JSON mode, you **must** also instruct the model to
|
||
|
produce JSON yourself via a system or user message. Without this, the model may
|
||
|
generate an unending stream of whitespace until the generation reaches the token
|
||
|
limit, resulting in a long-running and seemingly "stuck" request. Also note that
|
||
|
the message content may be partially cut off if `finish_reason="length"`, which
|
||
|
indicates the generation exceeded `max_tokens` or the conversation exceeded the
|
||
|
max context length.
|
||
|
|
||
|
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
||
|
make the output more random, while lower values like 0.2 will make it more
|
||
|
focused and deterministic.
|
||
|
|
||
|
tool_resources: A set of resources that are used by the assistant's tools. The resources are
|
||
|
specific to the type of tool. For example, the `code_interpreter` tool requires
|
||
|
a list of file IDs, while the `file_search` tool requires a list of vector store
|
||
|
IDs.
|
||
|
|
||
|
tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
|
||
|
assistant. Tools can be of types `code_interpreter`, `file_search`, or
|
||
|
`function`.
|
||
|
|
||
|
top_p: An alternative to sampling with temperature, called nucleus sampling, where the
|
||
|
model considers the results of the tokens with top_p probability mass. So 0.1
|
||
|
means only the tokens comprising the top 10% probability mass are considered.
|
||
|
|
||
|
We generally recommend altering this or temperature but not both.
|
||
|
|
||
|
extra_headers: Send extra headers
|
||
|
|
||
|
extra_query: Add additional query parameters to the request
|
||
|
|
||
|
extra_body: Add additional JSON properties to the request
|
||
|
|
||
|
timeout: Override the client-level default timeout for this request, in seconds
|
||
|
"""
|
||
|
if not assistant_id:
|
||
|
raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
|
||
|
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||
|
return self._post(
|
||
|
f"/assistants/{assistant_id}",
|
||
|
body=maybe_transform(
|
||
|
{
|
||
|
"description": description,
|
||
|
"instructions": instructions,
|
||
|
"metadata": metadata,
|
||
|
"model": model,
|
||
|
"name": name,
|
||
|
"response_format": response_format,
|
||
|
"temperature": temperature,
|
||
|
"tool_resources": tool_resources,
|
||
|
"tools": tools,
|
||
|
"top_p": top_p,
|
||
|
},
|
||
|
assistant_update_params.AssistantUpdateParams,
|
||
|
),
|
||
|
options=make_request_options(
|
||
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||
|
),
|
||
|
cast_to=Assistant,
|
||
|
)
|
||
|
|
||
|
def list(
|
||
|
self,
|
||
|
*,
|
||
|
after: str | NotGiven = NOT_GIVEN,
|
||
|
before: str | NotGiven = NOT_GIVEN,
|
||
|
limit: int | NotGiven = NOT_GIVEN,
|
||
|
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||
|
extra_headers: Headers | None = None,
|
||
|
extra_query: Query | None = None,
|
||
|
extra_body: Body | None = None,
|
||
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||
|
) -> SyncCursorPage[Assistant]:
|
||
|
"""Returns a list of assistants.
|
||
|
|
||
|
Args:
|
||
|
after: A cursor for use in pagination.
|
||
|
|
||
|
`after` is an object ID that defines your place
|
||
|
in the list. For instance, if you make a list request and receive 100 objects,
|
||
|
ending with obj_foo, your subsequent call can include after=obj_foo in order to
|
||
|
fetch the next page of the list.
|
||
|
|
||
|
before: A cursor for use in pagination. `before` is an object ID that defines your place
|
||
|
in the list. For instance, if you make a list request and receive 100 objects,
|
||
|
ending with obj_foo, your subsequent call can include before=obj_foo in order to
|
||
|
fetch the previous page of the list.
|
||
|
|
||
|
limit: A limit on the number of objects to be returned. Limit can range between 1 and
|
||
|
100, and the default is 20.
|
||
|
|
||
|
order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
|
||
|
order and `desc` for descending order.
|
||
|
|
||
|
extra_headers: Send extra headers
|
||
|
|
||
|
extra_query: Add additional query parameters to the request
|
||
|
|
||
|
extra_body: Add additional JSON properties to the request
|
||
|
|
||
|
timeout: Override the client-level default timeout for this request, in seconds
|
||
|
"""
|
||
|
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||
|
return self._get_api_list(
|
||
|
"/assistants",
|
||
|
page=SyncCursorPage[Assistant],
|
||
|
options=make_request_options(
|
||
|
extra_headers=extra_headers,
|
||
|
extra_query=extra_query,
|
||
|
extra_body=extra_body,
|
||
|
timeout=timeout,
|
||
|
query=maybe_transform(
|
||
|
{
|
||
|
"after": after,
|
||
|
"before": before,
|
||
|
"limit": limit,
|
||
|
"order": order,
|
||
|
},
|
||
|
assistant_list_params.AssistantListParams,
|
||
|
),
|
||
|
),
|
||
|
model=Assistant,
|
||
|
)
|
||
|
|
||
|
def delete(
|
||
|
self,
|
||
|
assistant_id: str,
|
||
|
*,
|
||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||
|
extra_headers: Headers | None = None,
|
||
|
extra_query: Query | None = None,
|
||
|
extra_body: Body | None = None,
|
||
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||
|
) -> AssistantDeleted:
|
||
|
"""
|
||
|
Delete an assistant.
|
||
|
|
||
|
Args:
|
||
|
extra_headers: Send extra headers
|
||
|
|
||
|
extra_query: Add additional query parameters to the request
|
||
|
|
||
|
extra_body: Add additional JSON properties to the request
|
||
|
|
||
|
timeout: Override the client-level default timeout for this request, in seconds
|
||
|
"""
|
||
|
if not assistant_id:
|
||
|
raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
|
||
|
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||
|
return self._delete(
|
||
|
f"/assistants/{assistant_id}",
|
||
|
options=make_request_options(
|
||
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||
|
),
|
||
|
cast_to=AssistantDeleted,
|
||
|
)
|
||
|
|
||
|
|
||
|
class AsyncAssistants(AsyncAPIResource):
|
||
|
@cached_property
|
||
|
def with_raw_response(self) -> AsyncAssistantsWithRawResponse:
|
||
|
return AsyncAssistantsWithRawResponse(self)
|
||
|
|
||
|
@cached_property
|
||
|
def with_streaming_response(self) -> AsyncAssistantsWithStreamingResponse:
|
||
|
return AsyncAssistantsWithStreamingResponse(self)
|
||
|
|
||
|
async def create(
|
||
|
self,
|
||
|
*,
|
||
|
model: Union[
|
||
|
str,
|
||
|
Literal[
|
||
|
"gpt-4-turbo",
|
||
|
"gpt-4-turbo-2024-04-09",
|
||
|
"gpt-4-0125-preview",
|
||
|
"gpt-4-turbo-preview",
|
||
|
"gpt-4-1106-preview",
|
||
|
"gpt-4-vision-preview",
|
||
|
"gpt-4",
|
||
|
"gpt-4-0314",
|
||
|
"gpt-4-0613",
|
||
|
"gpt-4-32k",
|
||
|
"gpt-4-32k-0314",
|
||
|
"gpt-4-32k-0613",
|
||
|
"gpt-3.5-turbo",
|
||
|
"gpt-3.5-turbo-16k",
|
||
|
"gpt-3.5-turbo-0613",
|
||
|
"gpt-3.5-turbo-1106",
|
||
|
"gpt-3.5-turbo-0125",
|
||
|
"gpt-3.5-turbo-16k-0613",
|
||
|
],
|
||
|
],
|
||
|
description: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
instructions: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
metadata: Optional[object] | NotGiven = NOT_GIVEN,
|
||
|
name: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
|
||
|
temperature: Optional[float] | NotGiven = NOT_GIVEN,
|
||
|
tool_resources: Optional[assistant_create_params.ToolResources] | NotGiven = NOT_GIVEN,
|
||
|
tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
|
||
|
top_p: Optional[float] | NotGiven = NOT_GIVEN,
|
||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||
|
extra_headers: Headers | None = None,
|
||
|
extra_query: Query | None = None,
|
||
|
extra_body: Body | None = None,
|
||
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||
|
) -> Assistant:
|
||
|
"""
|
||
|
Create an assistant with a model and instructions.
|
||
|
|
||
|
Args:
|
||
|
model: ID of the model to use. You can use the
|
||
|
[List models](https://platform.openai.com/docs/api-reference/models/list) API to
|
||
|
see all of your available models, or see our
|
||
|
[Model overview](https://platform.openai.com/docs/models/overview) for
|
||
|
descriptions of them.
|
||
|
|
||
|
description: The description of the assistant. The maximum length is 512 characters.
|
||
|
|
||
|
instructions: The system instructions that the assistant uses. The maximum length is 256,000
|
||
|
characters.
|
||
|
|
||
|
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
|
||
|
for storing additional information about the object in a structured format. Keys
|
||
|
can be a maximum of 64 characters long and values can be a maxium of 512
|
||
|
characters long.
|
||
|
|
||
|
name: The name of the assistant. The maximum length is 256 characters.
|
||
|
|
||
|
response_format: Specifies the format that the model must output. Compatible with
|
||
|
[GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
|
||
|
all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
|
||
|
|
||
|
Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
|
||
|
message the model generates is valid JSON.
|
||
|
|
||
|
**Important:** when using JSON mode, you **must** also instruct the model to
|
||
|
produce JSON yourself via a system or user message. Without this, the model may
|
||
|
generate an unending stream of whitespace until the generation reaches the token
|
||
|
limit, resulting in a long-running and seemingly "stuck" request. Also note that
|
||
|
the message content may be partially cut off if `finish_reason="length"`, which
|
||
|
indicates the generation exceeded `max_tokens` or the conversation exceeded the
|
||
|
max context length.
|
||
|
|
||
|
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
||
|
make the output more random, while lower values like 0.2 will make it more
|
||
|
focused and deterministic.
|
||
|
|
||
|
tool_resources: A set of resources that are used by the assistant's tools. The resources are
|
||
|
specific to the type of tool. For example, the `code_interpreter` tool requires
|
||
|
a list of file IDs, while the `file_search` tool requires a list of vector store
|
||
|
IDs.
|
||
|
|
||
|
tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
|
||
|
assistant. Tools can be of types `code_interpreter`, `file_search`, or
|
||
|
`function`.
|
||
|
|
||
|
top_p: An alternative to sampling with temperature, called nucleus sampling, where the
|
||
|
model considers the results of the tokens with top_p probability mass. So 0.1
|
||
|
means only the tokens comprising the top 10% probability mass are considered.
|
||
|
|
||
|
We generally recommend altering this or temperature but not both.
|
||
|
|
||
|
extra_headers: Send extra headers
|
||
|
|
||
|
extra_query: Add additional query parameters to the request
|
||
|
|
||
|
extra_body: Add additional JSON properties to the request
|
||
|
|
||
|
timeout: Override the client-level default timeout for this request, in seconds
|
||
|
"""
|
||
|
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||
|
return await self._post(
|
||
|
"/assistants",
|
||
|
body=await async_maybe_transform(
|
||
|
{
|
||
|
"model": model,
|
||
|
"description": description,
|
||
|
"instructions": instructions,
|
||
|
"metadata": metadata,
|
||
|
"name": name,
|
||
|
"response_format": response_format,
|
||
|
"temperature": temperature,
|
||
|
"tool_resources": tool_resources,
|
||
|
"tools": tools,
|
||
|
"top_p": top_p,
|
||
|
},
|
||
|
assistant_create_params.AssistantCreateParams,
|
||
|
),
|
||
|
options=make_request_options(
|
||
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||
|
),
|
||
|
cast_to=Assistant,
|
||
|
)
|
||
|
|
||
|
async def retrieve(
|
||
|
self,
|
||
|
assistant_id: str,
|
||
|
*,
|
||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||
|
extra_headers: Headers | None = None,
|
||
|
extra_query: Query | None = None,
|
||
|
extra_body: Body | None = None,
|
||
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||
|
) -> Assistant:
|
||
|
"""
|
||
|
Retrieves an assistant.
|
||
|
|
||
|
Args:
|
||
|
extra_headers: Send extra headers
|
||
|
|
||
|
extra_query: Add additional query parameters to the request
|
||
|
|
||
|
extra_body: Add additional JSON properties to the request
|
||
|
|
||
|
timeout: Override the client-level default timeout for this request, in seconds
|
||
|
"""
|
||
|
if not assistant_id:
|
||
|
raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
|
||
|
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||
|
return await self._get(
|
||
|
f"/assistants/{assistant_id}",
|
||
|
options=make_request_options(
|
||
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||
|
),
|
||
|
cast_to=Assistant,
|
||
|
)
|
||
|
|
||
|
async def update(
|
||
|
self,
|
||
|
assistant_id: str,
|
||
|
*,
|
||
|
description: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
instructions: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
metadata: Optional[object] | NotGiven = NOT_GIVEN,
|
||
|
model: str | NotGiven = NOT_GIVEN,
|
||
|
name: Optional[str] | NotGiven = NOT_GIVEN,
|
||
|
response_format: Optional[AssistantResponseFormatOptionParam] | NotGiven = NOT_GIVEN,
|
||
|
temperature: Optional[float] | NotGiven = NOT_GIVEN,
|
||
|
tool_resources: Optional[assistant_update_params.ToolResources] | NotGiven = NOT_GIVEN,
|
||
|
tools: Iterable[AssistantToolParam] | NotGiven = NOT_GIVEN,
|
||
|
top_p: Optional[float] | NotGiven = NOT_GIVEN,
|
||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||
|
extra_headers: Headers | None = None,
|
||
|
extra_query: Query | None = None,
|
||
|
extra_body: Body | None = None,
|
||
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||
|
) -> Assistant:
|
||
|
"""Modifies an assistant.
|
||
|
|
||
|
Args:
|
||
|
description: The description of the assistant.
|
||
|
|
||
|
The maximum length is 512 characters.
|
||
|
|
||
|
instructions: The system instructions that the assistant uses. The maximum length is 256,000
|
||
|
characters.
|
||
|
|
||
|
metadata: Set of 16 key-value pairs that can be attached to an object. This can be useful
|
||
|
for storing additional information about the object in a structured format. Keys
|
||
|
can be a maximum of 64 characters long and values can be a maxium of 512
|
||
|
characters long.
|
||
|
|
||
|
model: ID of the model to use. You can use the
|
||
|
[List models](https://platform.openai.com/docs/api-reference/models/list) API to
|
||
|
see all of your available models, or see our
|
||
|
[Model overview](https://platform.openai.com/docs/models/overview) for
|
||
|
descriptions of them.
|
||
|
|
||
|
name: The name of the assistant. The maximum length is 256 characters.
|
||
|
|
||
|
response_format: Specifies the format that the model must output. Compatible with
|
||
|
[GPT-4 Turbo](https://platform.openai.com/docs/models/gpt-4-and-gpt-4-turbo) and
|
||
|
all GPT-3.5 Turbo models since `gpt-3.5-turbo-1106`.
|
||
|
|
||
|
Setting to `{ "type": "json_object" }` enables JSON mode, which guarantees the
|
||
|
message the model generates is valid JSON.
|
||
|
|
||
|
**Important:** when using JSON mode, you **must** also instruct the model to
|
||
|
produce JSON yourself via a system or user message. Without this, the model may
|
||
|
generate an unending stream of whitespace until the generation reaches the token
|
||
|
limit, resulting in a long-running and seemingly "stuck" request. Also note that
|
||
|
the message content may be partially cut off if `finish_reason="length"`, which
|
||
|
indicates the generation exceeded `max_tokens` or the conversation exceeded the
|
||
|
max context length.
|
||
|
|
||
|
temperature: What sampling temperature to use, between 0 and 2. Higher values like 0.8 will
|
||
|
make the output more random, while lower values like 0.2 will make it more
|
||
|
focused and deterministic.
|
||
|
|
||
|
tool_resources: A set of resources that are used by the assistant's tools. The resources are
|
||
|
specific to the type of tool. For example, the `code_interpreter` tool requires
|
||
|
a list of file IDs, while the `file_search` tool requires a list of vector store
|
||
|
IDs.
|
||
|
|
||
|
tools: A list of tool enabled on the assistant. There can be a maximum of 128 tools per
|
||
|
assistant. Tools can be of types `code_interpreter`, `file_search`, or
|
||
|
`function`.
|
||
|
|
||
|
top_p: An alternative to sampling with temperature, called nucleus sampling, where the
|
||
|
model considers the results of the tokens with top_p probability mass. So 0.1
|
||
|
means only the tokens comprising the top 10% probability mass are considered.
|
||
|
|
||
|
We generally recommend altering this or temperature but not both.
|
||
|
|
||
|
extra_headers: Send extra headers
|
||
|
|
||
|
extra_query: Add additional query parameters to the request
|
||
|
|
||
|
extra_body: Add additional JSON properties to the request
|
||
|
|
||
|
timeout: Override the client-level default timeout for this request, in seconds
|
||
|
"""
|
||
|
if not assistant_id:
|
||
|
raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
|
||
|
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||
|
return await self._post(
|
||
|
f"/assistants/{assistant_id}",
|
||
|
body=await async_maybe_transform(
|
||
|
{
|
||
|
"description": description,
|
||
|
"instructions": instructions,
|
||
|
"metadata": metadata,
|
||
|
"model": model,
|
||
|
"name": name,
|
||
|
"response_format": response_format,
|
||
|
"temperature": temperature,
|
||
|
"tool_resources": tool_resources,
|
||
|
"tools": tools,
|
||
|
"top_p": top_p,
|
||
|
},
|
||
|
assistant_update_params.AssistantUpdateParams,
|
||
|
),
|
||
|
options=make_request_options(
|
||
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||
|
),
|
||
|
cast_to=Assistant,
|
||
|
)
|
||
|
|
||
|
def list(
|
||
|
self,
|
||
|
*,
|
||
|
after: str | NotGiven = NOT_GIVEN,
|
||
|
before: str | NotGiven = NOT_GIVEN,
|
||
|
limit: int | NotGiven = NOT_GIVEN,
|
||
|
order: Literal["asc", "desc"] | NotGiven = NOT_GIVEN,
|
||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||
|
extra_headers: Headers | None = None,
|
||
|
extra_query: Query | None = None,
|
||
|
extra_body: Body | None = None,
|
||
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||
|
) -> AsyncPaginator[Assistant, AsyncCursorPage[Assistant]]:
|
||
|
"""Returns a list of assistants.
|
||
|
|
||
|
Args:
|
||
|
after: A cursor for use in pagination.
|
||
|
|
||
|
`after` is an object ID that defines your place
|
||
|
in the list. For instance, if you make a list request and receive 100 objects,
|
||
|
ending with obj_foo, your subsequent call can include after=obj_foo in order to
|
||
|
fetch the next page of the list.
|
||
|
|
||
|
before: A cursor for use in pagination. `before` is an object ID that defines your place
|
||
|
in the list. For instance, if you make a list request and receive 100 objects,
|
||
|
ending with obj_foo, your subsequent call can include before=obj_foo in order to
|
||
|
fetch the previous page of the list.
|
||
|
|
||
|
limit: A limit on the number of objects to be returned. Limit can range between 1 and
|
||
|
100, and the default is 20.
|
||
|
|
||
|
order: Sort order by the `created_at` timestamp of the objects. `asc` for ascending
|
||
|
order and `desc` for descending order.
|
||
|
|
||
|
extra_headers: Send extra headers
|
||
|
|
||
|
extra_query: Add additional query parameters to the request
|
||
|
|
||
|
extra_body: Add additional JSON properties to the request
|
||
|
|
||
|
timeout: Override the client-level default timeout for this request, in seconds
|
||
|
"""
|
||
|
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||
|
return self._get_api_list(
|
||
|
"/assistants",
|
||
|
page=AsyncCursorPage[Assistant],
|
||
|
options=make_request_options(
|
||
|
extra_headers=extra_headers,
|
||
|
extra_query=extra_query,
|
||
|
extra_body=extra_body,
|
||
|
timeout=timeout,
|
||
|
query=maybe_transform(
|
||
|
{
|
||
|
"after": after,
|
||
|
"before": before,
|
||
|
"limit": limit,
|
||
|
"order": order,
|
||
|
},
|
||
|
assistant_list_params.AssistantListParams,
|
||
|
),
|
||
|
),
|
||
|
model=Assistant,
|
||
|
)
|
||
|
|
||
|
async def delete(
|
||
|
self,
|
||
|
assistant_id: str,
|
||
|
*,
|
||
|
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
||
|
# The extra values given here take precedence over values defined on the client or passed to this method.
|
||
|
extra_headers: Headers | None = None,
|
||
|
extra_query: Query | None = None,
|
||
|
extra_body: Body | None = None,
|
||
|
timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
|
||
|
) -> AssistantDeleted:
|
||
|
"""
|
||
|
Delete an assistant.
|
||
|
|
||
|
Args:
|
||
|
extra_headers: Send extra headers
|
||
|
|
||
|
extra_query: Add additional query parameters to the request
|
||
|
|
||
|
extra_body: Add additional JSON properties to the request
|
||
|
|
||
|
timeout: Override the client-level default timeout for this request, in seconds
|
||
|
"""
|
||
|
if not assistant_id:
|
||
|
raise ValueError(f"Expected a non-empty value for `assistant_id` but received {assistant_id!r}")
|
||
|
extra_headers = {"OpenAI-Beta": "assistants=v2", **(extra_headers or {})}
|
||
|
return await self._delete(
|
||
|
f"/assistants/{assistant_id}",
|
||
|
options=make_request_options(
|
||
|
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
||
|
),
|
||
|
cast_to=AssistantDeleted,
|
||
|
)
|
||
|
|
||
|
|
||
|
class AssistantsWithRawResponse:
|
||
|
def __init__(self, assistants: Assistants) -> None:
|
||
|
self._assistants = assistants
|
||
|
|
||
|
self.create = _legacy_response.to_raw_response_wrapper(
|
||
|
assistants.create,
|
||
|
)
|
||
|
self.retrieve = _legacy_response.to_raw_response_wrapper(
|
||
|
assistants.retrieve,
|
||
|
)
|
||
|
self.update = _legacy_response.to_raw_response_wrapper(
|
||
|
assistants.update,
|
||
|
)
|
||
|
self.list = _legacy_response.to_raw_response_wrapper(
|
||
|
assistants.list,
|
||
|
)
|
||
|
self.delete = _legacy_response.to_raw_response_wrapper(
|
||
|
assistants.delete,
|
||
|
)
|
||
|
|
||
|
|
||
|
class AsyncAssistantsWithRawResponse:
|
||
|
def __init__(self, assistants: AsyncAssistants) -> None:
|
||
|
self._assistants = assistants
|
||
|
|
||
|
self.create = _legacy_response.async_to_raw_response_wrapper(
|
||
|
assistants.create,
|
||
|
)
|
||
|
self.retrieve = _legacy_response.async_to_raw_response_wrapper(
|
||
|
assistants.retrieve,
|
||
|
)
|
||
|
self.update = _legacy_response.async_to_raw_response_wrapper(
|
||
|
assistants.update,
|
||
|
)
|
||
|
self.list = _legacy_response.async_to_raw_response_wrapper(
|
||
|
assistants.list,
|
||
|
)
|
||
|
self.delete = _legacy_response.async_to_raw_response_wrapper(
|
||
|
assistants.delete,
|
||
|
)
|
||
|
|
||
|
|
||
|
class AssistantsWithStreamingResponse:
|
||
|
def __init__(self, assistants: Assistants) -> None:
|
||
|
self._assistants = assistants
|
||
|
|
||
|
self.create = to_streamed_response_wrapper(
|
||
|
assistants.create,
|
||
|
)
|
||
|
self.retrieve = to_streamed_response_wrapper(
|
||
|
assistants.retrieve,
|
||
|
)
|
||
|
self.update = to_streamed_response_wrapper(
|
||
|
assistants.update,
|
||
|
)
|
||
|
self.list = to_streamed_response_wrapper(
|
||
|
assistants.list,
|
||
|
)
|
||
|
self.delete = to_streamed_response_wrapper(
|
||
|
assistants.delete,
|
||
|
)
|
||
|
|
||
|
|
||
|
class AsyncAssistantsWithStreamingResponse:
|
||
|
def __init__(self, assistants: AsyncAssistants) -> None:
|
||
|
self._assistants = assistants
|
||
|
|
||
|
self.create = async_to_streamed_response_wrapper(
|
||
|
assistants.create,
|
||
|
)
|
||
|
self.retrieve = async_to_streamed_response_wrapper(
|
||
|
assistants.retrieve,
|
||
|
)
|
||
|
self.update = async_to_streamed_response_wrapper(
|
||
|
assistants.update,
|
||
|
)
|
||
|
self.list = async_to_streamed_response_wrapper(
|
||
|
assistants.list,
|
||
|
)
|
||
|
self.delete = async_to_streamed_response_wrapper(
|
||
|
assistants.delete,
|
||
|
)
|