Skip to content

Commit d08442b

Browse files
feat(arkruntime): support context rolling tokens
1 parent 8ada35f commit d08442b

File tree

5 files changed

+22
-37
lines changed

5 files changed

+22
-37
lines changed

volcenginesdkarkruntime/resources/context/context.py

Lines changed: 5 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from __future__ import annotations
44
import httpx
55

6-
from typing import Iterable, Optional
6+
from typing import Iterable, Optional, Literal
77

88
from ..._types import Body, Query, Headers
99
from .completions import Completions, AsyncCompletions
@@ -13,7 +13,7 @@
1313
from ..._base_client import (
1414
make_request_options,
1515
)
16-
from ...types.context import CreateContextResponse, CloneContextResponse
16+
from ...types.context import CreateContextResponse
1717
from ...types.context.context_create_params import TTLTypes, TruncationStrategy, to_optional_ttl
1818
from ...types.chat import ChatCompletionMessageParam
1919

@@ -32,6 +32,7 @@ def create(
3232
model: str,
3333
messages: Iterable[ChatCompletionMessageParam],
3434
ttl: Optional[TTLTypes] | None = None,
35+
mode: Literal["session"] = "session",
3536
truncation_strategy: Optional[TruncationStrategy] | None = None,
3637
extra_headers: Headers | None = None,
3738
extra_query: Query | None = None,
@@ -43,6 +44,7 @@ def create(
4344
"/context/create",
4445
body={
4546
"model": model,
47+
"mode": mode,
4648
"messages": messages,
4749
"ttl": ttl,
4850
"truncation_strategy": truncation_strategy,
@@ -56,30 +58,6 @@ def create(
5658
cast_to=CreateContextResponse,
5759
)
5860

59-
def clone(
60-
self,
61-
*,
62-
context_id: str,
63-
extra_headers: Headers | None = None,
64-
extra_query: Query | None = None,
65-
extra_body: Body | None = None,
66-
timeout: float | httpx.Timeout | None = None,
67-
) -> CloneContextResponse:
68-
return self._post(
69-
"/context/clone",
70-
body={
71-
"context_id": context_id,
72-
},
73-
options=make_request_options(
74-
extra_headers=extra_headers,
75-
extra_query=extra_query,
76-
extra_body=extra_body,
77-
timeout=timeout,
78-
),
79-
cast_to=CloneContextResponse,
80-
)
81-
82-
8361
class AsyncContext(AsyncAPIResource):
8462
@cached_property
8563
def completions(self) -> AsyncCompletions:
@@ -90,6 +68,7 @@ async def create(
9068
self,
9169
*,
9270
model: str,
71+
mode: Literal["session"] = "session",
9372
messages: Iterable[ChatCompletionMessageParam],
9473
ttl: Optional[TTLTypes] | None = None,
9574
truncation_strategy: Optional[TruncationStrategy] | None = None,

volcenginesdkarkruntime/types/context/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,5 +4,5 @@
44

55
from .context_chat_completion_chunk import ContextChatCompletionChunk
66
from .context_chat_completion import ContextChatCompletion
7-
from .create_context_response import CreateContextResponse, CloneContextResponse
7+
from .create_context_response import CreateContextResponse
88
from .context_create_params import TruncationStrategy, TTLTypes

volcenginesdkarkruntime/types/context/context_create_params.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,12 @@
1212

1313

1414
class TruncationStrategy(TypedDict, total=False):
15-
type: Required[Literal["last_history_tokens"]]
15+
type: Required[Literal["last_history_tokens", "rolling_tokens"]]
1616
"""The truncation strategy to use for the context. The default is last_history_tokens."""
1717
last_history_tokens: Optional[int]
1818
"""The number of most recent tokens from the context when constructing the chat completion."""
19+
rolling_tokens: Optional[bool]
20+
"""If true, the context will not rolling when reach the max tokens limit."""
1921

2022

2123
TTLTypes = Union[int, datetime.timedelta]
Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,22 +1,25 @@
11
from ..._models import BaseModel
2+
from ..completion_usage import CompletionUsage
23
from .truncation_strategy import TruncationStrategy
34

4-
__all__ = ["CreateContextResponse", "CloneContextResponse"]
5+
__all__ = ["CreateContextResponse"]
56

67

78
class CreateContextResponse(BaseModel):
89
id: str
910
"""A unique identifier for the context."""
1011
model: str
1112
"""The endpoint used for the context."""
13+
mode: str
14+
"""The mode used for the context."""
1215
ttl: int
1316
"""The time to live (TTL) for the context in seconds."""
1417
truncation_strategy: TruncationStrategy
1518
"""
1619
Controls for how a context will be truncated prior to the run.
1720
Use this to control the context window for the chat completion.
1821
"""
19-
20-
21-
class CloneContextResponse(CreateContextResponse):
22-
pass
22+
usage: CompletionUsage
23+
"""
24+
usage for context create.
25+
"""

volcenginesdkexamples/volcenginesdkarkruntime/context.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,11 @@
1919
print("----- create context -----")
2020
response = client.context.create(
2121
model="${YOUR_ENDPOINT_ID}",
22+
mode="session",
2223
messages=[
2324
{"role": "system", "content": "你是豆包,是由字节跳动开发的 AI 人工智能助手"},
2425
],
2526
ttl=datetime.timedelta(minutes=60),
26-
truncation_strategy={
27-
'type': 'last_history_tokens',
28-
'last_history_tokens': 4096
29-
}
3027
)
3128
print(response)
3229

@@ -40,6 +37,7 @@
4037
stream=False
4138
)
4239
print(chat_response.choices[0].message.content)
40+
print(chat_response.usage)
4341

4442
print("----- chat round 2 (streaming) -----")
4543
stream = client.context.completions.create(
@@ -48,6 +46,9 @@
4846
messages=[
4947
{"role": "user", "content": "我是谁?"},
5048
],
49+
stream_options={
50+
'include_usage': True,
51+
},
5152
stream=True
5253
)
5354
for chunk in stream:

0 commit comments

Comments
 (0)