Skip to content

Commit 4719e11

Browse files
潘婉宁luminghao-bytedance
authored andcommitted
feat: support datetime ttl
1 parent ef44ad0 commit 4719e11

File tree

8 files changed

+75
-26
lines changed

8 files changed

+75
-26
lines changed

volcenginesdkarkruntime/resources/context/context.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from __future__ import annotations
44
import httpx
55

6-
from typing import Iterable
6+
from typing import Iterable, Optional
77

88
from ..._types import Body, Query, Headers
99
from .completions import Completions, AsyncCompletions
@@ -13,7 +13,8 @@
1313
from ..._base_client import (
1414
make_request_options,
1515
)
16-
from ...types.context import CreateContextResponse, TruncationStrategy
16+
from ...types.context import CreateContextResponse
17+
from ...types.context.context_create_params import TTLTypes, TruncationStrategy, to_optional_ttl
1718
from ...types.chat import ChatCompletionMessageParam
1819

1920
__all__ = ["Context", "AsyncContext"]
@@ -30,13 +31,14 @@ def create(
3031
*,
3132
model: str,
3233
messages: Iterable[ChatCompletionMessageParam],
33-
ttl: int | None = None,
34-
truncation_strategy: TruncationStrategy | None = None,
34+
ttl: Optional[TTLTypes] | None = None,
35+
truncation_strategy: Optional[TruncationStrategy] | None = None,
3536
extra_headers: Headers | None = None,
3637
extra_query: Query | None = None,
3738
extra_body: Body | None = None,
3839
timeout: float | httpx.Timeout | None = None,
3940
) -> CreateContextResponse:
41+
ttl = to_optional_ttl(ttl)
4042
return self._post(
4143
"/context/create",
4244
body={
@@ -66,13 +68,14 @@ async def create(
6668
*,
6769
model: str,
6870
messages: Iterable[ChatCompletionMessageParam],
69-
ttl: int | None = None,
70-
truncation_strategy: TruncationStrategy | None = None,
71+
ttl: Optional[TTLTypes] | None = None,
72+
truncation_strategy: Optional[TruncationStrategy] | None = None,
7173
extra_headers: Headers | None = None,
7274
extra_query: Query | None = None,
7375
extra_body: Body | None = None,
7476
timeout: float | httpx.Timeout | None = None,
7577
) -> CreateContextResponse:
78+
ttl = to_optional_ttl(ttl)
7679
return await self._post(
7780
"/context/create",
7881
body={

volcenginesdkarkruntime/types/context/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,4 +4,5 @@
44

55
from .context_chat_completion_chunk import ContextChatCompletionChunk
66
from .context_chat_completion import ContextChatCompletion
7-
from .create_context_response import CreateContextResponse, TruncationStrategy
7+
from .create_context_response import CreateContextResponse
8+
from .context_create_params import TruncationStrategy, TTLTypes

volcenginesdkarkruntime/types/context/context_chat_completion_chunk.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,4 @@ class ContextChatCompletionChunk(BaseModel):
3737
`stream_options: {"include_usage": true}` in your request. When present, it
3838
contains a null value except for the last chunk which contains the token usage
3939
statistics for the entire request.
40-
"""
40+
"""
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
from __future__ import annotations
2+
3+
import datetime
4+
from typing import Union, Optional
5+
from typing_extensions import Literal, Required, TypedDict
6+
7+
__all__ = [
8+
"TruncationStrategy",
9+
"TTL",
10+
"TTLTypes",
11+
"to_optional_ttl"
12+
]
13+
14+
15+
class TruncationStrategy(TypedDict, total=False):
16+
type: Required[Literal["last_history_tokens"]]
17+
"""The truncation strategy to use for the context. The default is last_history_tokens."""
18+
last_history_tokens: Optional[int]
19+
"""The number of most recent tokens from the context when constructing the chat completion."""
20+
21+
22+
TTLTypes = Union[int, datetime.timedelta]
23+
24+
25+
def to_optional_ttl(ttl: TTLTypes | None) -> int | None:
26+
if ttl is None:
27+
return None
28+
elif isinstance(ttl, datetime.timedelta):
29+
return int(ttl.total_seconds())
30+
elif isinstance(ttl, int):
31+
return ttl
32+
else:
33+
raise TypeError(
34+
f"Could not convert input to `ttl` \n'" f" type: {type(ttl)}\n",
35+
ttl,
36+
)

volcenginesdkarkruntime/types/context/context_usage.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
from typing import Optional
22
from ..._models import BaseModel
33

4+
__all__ = ["ContextCompletionUsage"]
5+
46

57
class ContextCompletionUsage(BaseModel):
68
prompt_tokens: int = 0
@@ -22,4 +24,4 @@ class ContextCompletionUsage(BaseModel):
2224
prompt_miss_cache_count: Optional[int] = None
2325
"""
2426
prompt_miss_cache_count stands for the prompt miss cache count of context chat completion
25-
"""
27+
"""

volcenginesdkarkruntime/types/context/create_context_response.py

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,7 @@
1-
from typing import List, Optional
2-
31
from ..._models import BaseModel
4-
from typing_extensions import Literal
5-
6-
__all__ = ["CreateContextResponse", "TruncationStrategy"]
7-
2+
from .truncation_strategy import TruncationStrategy
83

9-
class TruncationStrategy(BaseModel):
10-
type: Literal["last_history_tokens"]
11-
"""The truncation strategy to use for the context. The default is last_history_tokens."""
12-
last_history_tokens: Optional[int] = None
13-
"""The number of most recent tokens from the context when constructing the chat completion."""
4+
__all__ = ["CreateContextResponse"]
145

156

167
class CreateContextResponse(BaseModel):
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from typing import Optional
2+
3+
from ..._models import BaseModel
4+
from typing_extensions import Literal
5+
6+
__all__ = ["TruncationStrategy"]
7+
8+
9+
class TruncationStrategy(BaseModel):
10+
type: Literal["last_history_tokens"]
11+
"""The truncation strategy to use for the context. The default is last_history_tokens."""
12+
last_history_tokens: Optional[int] = None
13+
"""The number of most recent tokens from the context when constructing the chat completion."""

volcenginesdkexamples/volcenginesdkarkruntime/context.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import datetime
12
from volcenginesdkarkruntime import Ark
23

34
# Authentication
@@ -6,27 +7,29 @@
67
# Note: If you use an API key, this API key will not be refreshed.
78
# To prevent the API from expiring and failing after some time, choose an API key with no expiration date.
89

9-
# 2.If you authorize your endpoint with Volcengine Identity and Access Management(IAM), set your api key to environment variable "VOLC_ACCESSKEY", "VOLC_SECRETKEY"
10+
# 2.If you authorize your endpoint with Volcengine Identity and Access Management(IAM),
11+
# set your api key to environment variable "VOLC_ACCESSKEY", "VOLC_SECRETKEY"
1012
# or specify ak&sk by Ark(ak="${YOUR_AK}", sk="${YOUR_SK}").
1113
# To get your ak&sk, please refer to this document(https://www.volcengine.com/docs/6291/65568)
1214
# For more information,please check this document(https://www.volcengine.com/docs/82379/1263279)
13-
client = Ark()
15+
client = Ark(api_key="${YOUR_API_KEY}")
1416

1517
if __name__ == "__main__":
16-
# Create context:
18+
# Create context with 30 minutes cache:
1719
print("----- create context -----")
18-
context_response = client.context.create(
20+
response = client.context.create(
1921
model="${YOUR_ENDPOINT_ID}",
2022
messages=[
2123
{"role": "system", "content": "你是豆包,是由字节跳动开发的 AI 人工智能助手"},
2224
],
25+
ttl=datetime.timedelta(minutes=30),
2326
)
24-
print(context_response)
27+
print(response)
2528

2629
# Streaming:
2730
print("----- streaming request -----")
2831
stream = client.context.completions.create(
29-
context_id=context_response.id,
32+
context_id=response.id,
3033
model="${YOUR_ENDPOINT_ID}",
3134
messages=[
3235
{"role": "user", "content": "你是谁?"},

0 commit comments

Comments
 (0)