Skip to content

Commit 8ada35f

Browse files
feat(ark-runtime): add context api for ga
1 parent a36693a commit 8ada35f

File tree

5 files changed

+28
-55
lines changed

5 files changed

+28
-55
lines changed

volcenginesdkarkruntime/types/completion_usage.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
11
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
22

33
from pydantic import BaseModel
4+
from typing import Optional
45

5-
__all__ = ["CompletionUsage"]
6+
__all__ = ["CompletionUsage", "PromptTokensDetails"]
7+
8+
9+
class PromptTokensDetails(BaseModel):
10+
cached_tokens: int
11+
"""Number of tokens hit cache."""
612

713

814
class CompletionUsage(BaseModel):
@@ -14,3 +20,6 @@ class CompletionUsage(BaseModel):
1420

1521
total_tokens: int
1622
"""Total number of tokens used in the request (prompt + completion)."""
23+
24+
prompt_tokens_details: Optional[PromptTokensDetails] = None
25+
"""Prompt tokens details."""

volcenginesdkarkruntime/types/context/context_chat_completion.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from ..._models import BaseModel
55
from ..chat.chat_completion import Choice
6-
from ..context.context_usage import ContextCompletionUsage
6+
from ..completion_usage import CompletionUsage
77

88
__all__ = [
99
"ContextChatCompletion",
@@ -29,5 +29,5 @@ class ContextChatCompletion(BaseModel):
2929
object: Literal["chat.completion"]
3030
"""The object type, which is always `chat.completion`."""
3131

32-
usage: Optional[ContextCompletionUsage] = None
32+
usage: Optional[CompletionUsage] = None
3333
"""Usage statistics for the completion request."""

volcenginesdkarkruntime/types/context/context_chat_completion_chunk.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
from ..._models import BaseModel
55
from ..chat.chat_completion_chunk import Choice
6-
from ..context.context_usage import ContextCompletionUsage
6+
from ..completion_usage import CompletionUsage
77

88
__all__ = [
99
"ContextChatCompletionChunk",
@@ -32,7 +32,7 @@ class ContextChatCompletionChunk(BaseModel):
3232
object: Literal["chat.completion.chunk"]
3333
"""The object type, which is always `chat.completion.chunk`."""
3434

35-
usage: Optional[ContextCompletionUsage] = None
35+
usage: Optional[CompletionUsage] = None
3636
"""
3737
An optional field that will only be present when you set
3838
`stream_options: {"include_usage": true}` in your request. When present, it

volcenginesdkarkruntime/types/context/context_usage.py

Lines changed: 0 additions & 27 deletions
This file was deleted.

volcenginesdkexamples/volcenginesdkarkruntime/context.py

Lines changed: 14 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,47 +15,38 @@
1515
client = Ark(api_key="${YOUR_API_KEY}")
1616

1717
if __name__ == "__main__":
18-
# Create context with 30 minutes cache:
18+
# Create context with 60 minutes cache:
1919
print("----- create context -----")
2020
response = client.context.create(
2121
model="${YOUR_ENDPOINT_ID}",
2222
messages=[
2323
{"role": "system", "content": "你是豆包,是由字节跳动开发的 AI 人工智能助手"},
2424
],
25-
ttl=datetime.timedelta(minutes=30),
25+
ttl=datetime.timedelta(minutes=60),
26+
truncation_strategy={
27+
'type': 'last_history_tokens',
28+
'last_history_tokens': 4096
29+
}
2630
)
2731
print(response)
2832

29-
# Streaming:
30-
print("----- streaming request -----")
31-
stream = client.context.completions.create(
33+
print("----- chat round 1 (non-stream) -----")
34+
chat_response = client.context.completions.create(
3235
context_id=response.id,
3336
model="${YOUR_ENDPOINT_ID}",
3437
messages=[
35-
{"role": "user", "content": "你是谁?"},
38+
{"role": "user", "content": "我是方方"},
3639
],
37-
stream=True
38-
)
39-
for chunk in stream:
40-
if chunk.usage:
41-
print(chunk.usage)
42-
if not chunk.choices:
43-
continue
44-
print(chunk.choices[0].delta.content, end="")
45-
46-
# Clone:
47-
print("----- clone context -----")
48-
clone_response = client.context.clone(
49-
context_id=response.id,
40+
stream=False
5041
)
51-
print(clone_response)
42+
print(chat_response.choices[0].message.content)
5243

53-
print("----- streaming request -----")
44+
print("----- chat round 2 (streaming) -----")
5445
stream = client.context.completions.create(
55-
context_id=clone_response.id,
46+
context_id=response.id,
5647
model="${YOUR_ENDPOINT_ID}",
5748
messages=[
58-
{"role": "user", "content": "刚才你说了什么?"},
49+
{"role": "user", "content": "我是谁?"},
5950
],
6051
stream=True
6152
)

0 commit comments

Comments
 (0)