Skip to content

Commit 8650ee0

Browse files
committed
Merge branch 'feat/ark-add-context-api' into 'master'
feat: ark add context api See merge request iaasng/volcengine-python-sdk!429
2 parents 9546179 + 6edb20b commit 8650ee0

File tree

13 files changed

+555
-2
lines changed

13 files changed

+555
-2
lines changed

volcenginesdkarkruntime/_client.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class Ark(SyncAPIClient):
3737
bot_chat: resources.BotChat
3838
embeddings: resources.Embeddings
3939
tokenization: resources.Tokenization
40+
context: resources.Context
4041

4142
def __init__(
4243
self,
@@ -93,6 +94,7 @@ def __init__(
9394
self.bot_chat = resources.BotChat(self)
9495
self.embeddings = resources.Embeddings(self)
9596
self.tokenization = resources.Tokenization(self)
97+
self.context = resources.Context(self)
9698
# self.classification = resources.Classification(self)
9799

98100
def _get_endpoint_sts_token(self, endpoint_id: str):
@@ -129,6 +131,7 @@ class AsyncArk(AsyncAPIClient):
129131
bot_chat: resources.AsyncBotChat
130132
embeddings: resources.AsyncEmbeddings
131133
tokenization: resources.AsyncTokenization
134+
context: resources.AsyncContext
132135

133136
def __init__(
134137
self,
@@ -184,6 +187,7 @@ def __init__(
184187
self.bot_chat = resources.AsyncBotChat(self)
185188
self.embeddings = resources.AsyncEmbeddings(self)
186189
self.tokenization = resources.AsyncTokenization(self)
190+
self.context = resources.AsyncContext(self)
187191
# self.classification = resources.AsyncClassification(self)
188192

189193
def _get_endpoint_sts_token(self, endpoint_id: str):

volcenginesdkarkruntime/resources/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from .tokenization import Tokenization, AsyncTokenization
44
from .classification import Classification, AsyncClassification
55
from .bot import BotChat, AsyncBotChat
6+
from .context import Context, AsyncContext
67

78
__all__ = [
89
"Chat",
@@ -12,5 +13,7 @@
1213
"Embeddings",
1314
"AsyncEmbeddings",
1415
"Tokenization",
15-
"AsyncTokenization"
16+
"AsyncTokenization",
17+
"Context",
18+
"AsyncContext"
1619
]
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from .context import Context, AsyncContext
2+
3+
__all__ = ["Context", "AsyncContext"]
Lines changed: 221 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,221 @@
1+
from __future__ import annotations
2+
3+
from typing import Dict, List, Union, Iterable, Optional
4+
5+
import httpx
6+
from typing_extensions import Literal
7+
8+
from ..._types import Body, Query, Headers
9+
from ..._utils._utils import with_sts_token, async_with_sts_token
10+
from ..._base_client import make_request_options
11+
from ..._resource import SyncAPIResource, AsyncAPIResource
12+
from ..._compat import cached_property
13+
14+
from ..._response import (
15+
to_raw_response_wrapper,
16+
async_to_raw_response_wrapper,
17+
to_streamed_response_wrapper,
18+
async_to_streamed_response_wrapper,
19+
)
20+
from ..._streaming import Stream, AsyncStream
21+
from ...types.context import (
22+
ContextChatCompletion,
23+
ContextChatCompletionChunk
24+
)
25+
from ...types.chat import (
26+
ChatCompletionMessageParam,
27+
completion_create_params,
28+
ChatCompletionStreamOptionsParam,
29+
ChatCompletionToolParam,
30+
ChatCompletionToolChoiceOptionParam
31+
)
32+
33+
__all__ = ["Completions", "AsyncCompletions"]
34+
35+
36+
class Completions(SyncAPIResource):
37+
@cached_property
38+
def with_raw_response(self) -> CompletionsWithRawResponse:
39+
return CompletionsWithRawResponse(self)
40+
41+
@cached_property
42+
def with_streaming_response(self) -> CompletionsWithStreamingResponse:
43+
return CompletionsWithStreamingResponse(self)
44+
45+
@with_sts_token
46+
def create(
47+
self,
48+
*,
49+
context_id: str,
50+
messages: Iterable[ChatCompletionMessageParam],
51+
model: str,
52+
frequency_penalty: Optional[float] | None = None,
53+
function_call: completion_create_params.FunctionCall | None = None,
54+
logit_bias: Optional[Dict[str, int]] | None = None,
55+
logprobs: Optional[bool] | None = None,
56+
max_tokens: Optional[int] | None = None,
57+
presence_penalty: Optional[float] | None = None,
58+
stop: Union[Optional[str], List[str]] | None = None,
59+
stream: Optional[Literal[False]] | Literal[True] | None = None,
60+
stream_options: Optional[ChatCompletionStreamOptionsParam] | None = None,
61+
temperature: Optional[float] | None = None,
62+
tools: Iterable[ChatCompletionToolParam] | None = None,
63+
top_logprobs: Optional[int] | None = None,
64+
top_p: Optional[float] | None = None,
65+
repetition_penalty: Optional[float] | None = None,
66+
n: Optional[int] | None = None,
67+
tool_choice: ChatCompletionToolChoiceOptionParam | None = None,
68+
response_format: completion_create_params.ResponseFormat | None = None,
69+
user: str | None = None,
70+
extra_headers: Headers | None = None,
71+
extra_query: Query | None = None,
72+
extra_body: Body | None = None,
73+
timeout: float | httpx.Timeout | None = None,
74+
) -> ContextChatCompletion | Stream[ContextChatCompletionChunk]:
75+
return self._post(
76+
"/context/chat/completions",
77+
body={
78+
"context_id": context_id,
79+
"messages": messages,
80+
"model": model,
81+
"frequency_penalty": frequency_penalty,
82+
"function_call": function_call,
83+
"logit_bias": logit_bias,
84+
"logprobs": logprobs,
85+
"max_tokens": max_tokens,
86+
"presence_penalty": presence_penalty,
87+
"stop": stop,
88+
"stream": stream,
89+
"stream_options": stream_options,
90+
"temperature": temperature,
91+
"tools": tools,
92+
"top_logprobs": top_logprobs,
93+
"top_p": top_p,
94+
"user": user,
95+
"repetition_penalty": repetition_penalty,
96+
"n": n,
97+
"tool_choice": tool_choice,
98+
"response_format": response_format,
99+
},
100+
options=make_request_options(
101+
extra_headers=extra_headers,
102+
extra_query=extra_query,
103+
extra_body=extra_body,
104+
timeout=timeout,
105+
),
106+
cast_to=ContextChatCompletion,
107+
stream=stream or False,
108+
stream_cls=Stream[ContextChatCompletionChunk],
109+
)
110+
111+
112+
class AsyncCompletions(AsyncAPIResource):
113+
@cached_property
114+
def with_raw_response(self) -> AsyncCompletionsWithRawResponse:
115+
return AsyncCompletionsWithRawResponse(self)
116+
117+
@cached_property
118+
def with_streaming_response(self) -> AsyncCompletionsWithStreamingResponse:
119+
return AsyncCompletionsWithStreamingResponse(self)
120+
121+
@async_with_sts_token
122+
async def create(
123+
self,
124+
*,
125+
context_id: str,
126+
messages: Iterable[ChatCompletionMessageParam],
127+
model: str,
128+
frequency_penalty: Optional[float] | None = None,
129+
function_call: completion_create_params.FunctionCall | None = None,
130+
logit_bias: Optional[Dict[str, int]] | None = None,
131+
logprobs: Optional[bool] | None = None,
132+
max_tokens: Optional[int] | None = None,
133+
presence_penalty: Optional[float] | None = None,
134+
stop: Union[Optional[str], List[str]] | None = None,
135+
stream: Optional[Literal[False]] | Literal[True] | None = None,
136+
stream_options: Optional[ChatCompletionStreamOptionsParam] | None = None,
137+
temperature: Optional[float] | None = None,
138+
tools: Iterable[ChatCompletionToolParam] | None = None,
139+
top_logprobs: Optional[int] | None = None,
140+
top_p: Optional[float] | None = None,
141+
user: str | None = None,
142+
repetition_penalty: Optional[float] | None = None,
143+
n: Optional[int] | None = None,
144+
tool_choice: ChatCompletionToolChoiceOptionParam | None = None,
145+
response_format: completion_create_params.ResponseFormat | None = None,
146+
extra_headers: Headers | None = None,
147+
extra_query: Query | None = None,
148+
extra_body: Body | None = None,
149+
timeout: float | httpx.Timeout | None = None,
150+
) -> ContextChatCompletion | AsyncStream[ContextChatCompletionChunk]:
151+
return await self._post(
152+
"/context/chat/completions",
153+
body={
154+
"context_id": context_id,
155+
"messages": messages,
156+
"model": model,
157+
"frequency_penalty": frequency_penalty,
158+
"function_call": function_call,
159+
"logit_bias": logit_bias,
160+
"logprobs": logprobs,
161+
"max_tokens": max_tokens,
162+
"presence_penalty": presence_penalty,
163+
"stop": stop,
164+
"stream": stream,
165+
"stream_options": stream_options,
166+
"temperature": temperature,
167+
"tools": tools,
168+
"top_logprobs": top_logprobs,
169+
"top_p": top_p,
170+
"user": user,
171+
"repetition_penalty": repetition_penalty,
172+
"n": n,
173+
"tool_choice": tool_choice,
174+
"response_format": response_format,
175+
},
176+
options=make_request_options(
177+
extra_headers=extra_headers,
178+
extra_query=extra_query,
179+
extra_body=extra_body,
180+
timeout=timeout,
181+
),
182+
cast_to=ContextChatCompletion,
183+
stream=stream or False,
184+
stream_cls=AsyncStream[ContextChatCompletionChunk],
185+
)
186+
187+
188+
class CompletionsWithRawResponse:
189+
def __init__(self, completions: Completions) -> None:
190+
self._completions = completions
191+
192+
self.create = to_raw_response_wrapper(
193+
completions.create,
194+
)
195+
196+
197+
class AsyncCompletionsWithRawResponse:
198+
def __init__(self, completions: AsyncCompletions) -> None:
199+
self._completions = completions
200+
201+
self.create = async_to_raw_response_wrapper(
202+
completions.create,
203+
)
204+
205+
206+
class CompletionsWithStreamingResponse:
207+
def __init__(self, completions: Completions) -> None:
208+
self._completions = completions
209+
210+
self.create = to_streamed_response_wrapper(
211+
completions.create,
212+
)
213+
214+
215+
class AsyncCompletionsWithStreamingResponse:
216+
def __init__(self, completions: AsyncCompletions) -> None:
217+
self._completions = completions
218+
219+
self.create = async_to_streamed_response_wrapper(
220+
completions.create,
221+
)
Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2+
3+
from __future__ import annotations
4+
import httpx
5+
6+
from typing import Iterable, Optional, Literal
7+
8+
from ..._types import Body, Query, Headers
9+
from .completions import Completions, AsyncCompletions
10+
from ..._compat import cached_property
11+
from ..._resource import SyncAPIResource, AsyncAPIResource
12+
from ..._utils._utils import with_sts_token, async_with_sts_token
13+
from ..._base_client import (
14+
make_request_options,
15+
)
16+
from ...types.context import CreateContextResponse
17+
from ...types.context.context_create_params import TTLTypes, TruncationStrategy, to_optional_ttl
18+
from ...types.chat import ChatCompletionMessageParam
19+
20+
__all__ = ["Context", "AsyncContext"]
21+
22+
23+
class Context(SyncAPIResource):
24+
@cached_property
25+
def completions(self) -> Completions:
26+
return Completions(self._client)
27+
28+
@with_sts_token
29+
def create(
30+
self,
31+
*,
32+
model: str,
33+
messages: Iterable[ChatCompletionMessageParam],
34+
ttl: Optional[TTLTypes] | None = None,
35+
mode: Literal["session", "common_prefix"] = "session",
36+
truncation_strategy: Optional[TruncationStrategy] | None = None,
37+
extra_headers: Headers | None = None,
38+
extra_query: Query | None = None,
39+
extra_body: Body | None = None,
40+
timeout: float | httpx.Timeout | None = None,
41+
) -> CreateContextResponse:
42+
ttl = to_optional_ttl(ttl)
43+
return self._post(
44+
"/context/create",
45+
body={
46+
"model": model,
47+
"mode": mode,
48+
"messages": messages,
49+
"ttl": ttl,
50+
"truncation_strategy": truncation_strategy,
51+
},
52+
options=make_request_options(
53+
extra_headers=extra_headers,
54+
extra_query=extra_query,
55+
extra_body=extra_body,
56+
timeout=timeout,
57+
),
58+
cast_to=CreateContextResponse,
59+
)
60+
61+
class AsyncContext(AsyncAPIResource):
62+
@cached_property
63+
def completions(self) -> AsyncCompletions:
64+
return AsyncCompletions(self._client)
65+
66+
@async_with_sts_token
67+
async def create(
68+
self,
69+
*,
70+
model: str,
71+
mode: Literal["session", "common_prefix"] = "session",
72+
messages: Iterable[ChatCompletionMessageParam],
73+
ttl: Optional[TTLTypes] | None = None,
74+
truncation_strategy: Optional[TruncationStrategy] | None = None,
75+
extra_headers: Headers | None = None,
76+
extra_query: Query | None = None,
77+
extra_body: Body | None = None,
78+
timeout: float | httpx.Timeout | None = None,
79+
) -> CreateContextResponse:
80+
ttl = to_optional_ttl(ttl)
81+
return await self._post(
82+
"/context/create",
83+
body={
84+
"model": model,
85+
"mode": mode,
86+
"messages": messages,
87+
"ttl": ttl,
88+
"truncation_strategy": truncation_strategy,
89+
},
90+
options=make_request_options(
91+
extra_headers=extra_headers,
92+
extra_query=extra_query,
93+
extra_body=extra_body,
94+
timeout=timeout,
95+
),
96+
cast_to=CreateContextResponse,
97+
)

volcenginesdkarkruntime/types/completion_usage.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22

33
from pydantic import BaseModel
4+
from typing import Optional
45

5-
__all__ = ["CompletionUsage"]
6+
__all__ = ["CompletionUsage", "PromptTokensDetails"]
7+
8+
9+
class PromptTokensDetails(BaseModel):
10+
cached_tokens: int
11+
"""Number of tokens hit cache."""
612

713

814
class CompletionUsage(BaseModel):
@@ -14,3 +20,6 @@ class CompletionUsage(BaseModel):
1420

1521
total_tokens: int
1622
"""Total number of tokens used in the request (prompt + completion)."""
23+
24+
prompt_tokens_details: Optional[PromptTokensDetails] = None
25+
"""Prompt tokens details."""

0 commit comments

Comments
 (0)