feat: support datetime ttl

潘婉宁 · luminghao-bytedance · commit 4719e11eade1 · 2024-12-17T19:45:52.000+08:00
diff --git a/volcenginesdkarkruntime/resources/context/context.py b/volcenginesdkarkruntime/resources/context/context.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 import httpx
 
-from typing import Iterable
+from typing import Iterable, Optional
 
 from ..._types import Body, Query, Headers
 from .completions import Completions, AsyncCompletions
@@ -13,7 +13,8 @@
 from ..._base_client import (
     make_request_options,
 )
-from ...types.context import CreateContextResponse, TruncationStrategy
+from ...types.context import CreateContextResponse
+from ...types.context.context_create_params import TTLTypes, TruncationStrategy, to_optional_ttl
 from ...types.chat import ChatCompletionMessageParam
 
 __all__ = ["Context", "AsyncContext"]
@@ -30,13 +31,14 @@ def create(
             *,
             model: str,
             messages: Iterable[ChatCompletionMessageParam],
-            ttl: int | None = None,
-            truncation_strategy: TruncationStrategy | None = None,
+            ttl: Optional[TTLTypes] | None = None,
+            truncation_strategy: Optional[TruncationStrategy] | None = None,
             extra_headers: Headers | None = None,
             extra_query: Query | None = None,
             extra_body: Body | None = None,
             timeout: float | httpx.Timeout | None = None,
     ) -> CreateContextResponse:
+        ttl = to_optional_ttl(ttl)
         return self._post(
             "/context/create",
             body={
@@ -66,13 +68,14 @@ async def create(
             *,
             model: str,
             messages: Iterable[ChatCompletionMessageParam],
-            ttl: int | None = None,
-            truncation_strategy: TruncationStrategy | None = None,
+            ttl: Optional[TTLTypes] | None = None,
+            truncation_strategy: Optional[TruncationStrategy] | None = None,
             extra_headers: Headers | None = None,
             extra_query: Query | None = None,
             extra_body: Body | None = None,
             timeout: float | httpx.Timeout | None = None,
     ) -> CreateContextResponse:
+        ttl = to_optional_ttl(ttl)
         return await self._post(
             "/context/create",
             body={
diff --git a/volcenginesdkarkruntime/types/context/__init__.py b/volcenginesdkarkruntime/types/context/__init__.py
@@ -4,4 +4,5 @@
 
 from .context_chat_completion_chunk import ContextChatCompletionChunk
 from .context_chat_completion import ContextChatCompletion
-from .create_context_response import CreateContextResponse, TruncationStrategy
+from .create_context_response import CreateContextResponse
+from .context_create_params import TruncationStrategy, TTLTypes
diff --git a/volcenginesdkarkruntime/types/context/context_chat_completion_chunk.py b/volcenginesdkarkruntime/types/context/context_chat_completion_chunk.py
@@ -37,4 +37,4 @@ class ContextChatCompletionChunk(BaseModel):
     `stream_options: {"include_usage": true}` in your request. When present, it
     contains a null value except for the last chunk which contains the token usage
     statistics for the entire request.
-    """
+    """
diff --git a/volcenginesdkarkruntime/types/context/context_create_params.py b/volcenginesdkarkruntime/types/context/context_create_params.py
@@ -0,0 +1,36 @@
+from __future__ import annotations
+
+import datetime
+from typing import Union, Optional
+from typing_extensions import Literal, Required, TypedDict
+
+__all__ = [
+    "TruncationStrategy",
+    "TTL",
+    "TTLTypes",
+    "to_optional_ttl"
+]
+
+
+class TruncationStrategy(TypedDict, total=False):
+    type: Required[Literal["last_history_tokens"]]
+    """The truncation strategy to use for the context. The default is last_history_tokens."""
+    last_history_tokens: Optional[int]
+    """The number of most recent tokens from the context when constructing the chat completion."""
+
+
+TTLTypes = Union[int, datetime.timedelta]
+
+
+def to_optional_ttl(ttl: TTLTypes | None) -> int | None:
+    if ttl is None:
+        return None
+    elif isinstance(ttl, datetime.timedelta):
+        return int(ttl.total_seconds())
+    elif isinstance(ttl, int):
+        return ttl
+    else:
+        raise TypeError(
+            f"Could not convert input to `ttl` \n'" f"  type: {type(ttl)}\n",
+            ttl,
+        )
diff --git a/volcenginesdkarkruntime/types/context/context_usage.py b/volcenginesdkarkruntime/types/context/context_usage.py
@@ -1,6 +1,8 @@
 from typing import Optional
 from ..._models import BaseModel
 
+__all__ = ["ContextCompletionUsage"]
+
 
 class ContextCompletionUsage(BaseModel):
     prompt_tokens: int = 0
@@ -22,4 +24,4 @@ class ContextCompletionUsage(BaseModel):
     prompt_miss_cache_count: Optional[int] = None
     """
     prompt_miss_cache_count stands for the prompt miss cache count of context chat completion
-    """
+    """
diff --git a/volcenginesdkarkruntime/types/context/create_context_response.py b/volcenginesdkarkruntime/types/context/create_context_response.py
@@ -1,16 +1,7 @@
-from typing import List, Optional
-
 from ..._models import BaseModel
-from typing_extensions import Literal
-
-__all__ = ["CreateContextResponse", "TruncationStrategy"]
-
+from .truncation_strategy import TruncationStrategy
 
-class TruncationStrategy(BaseModel):
-    type: Literal["last_history_tokens"]
-    """The truncation strategy to use for the context. The default is last_history_tokens."""
-    last_history_tokens: Optional[int] = None
-    """The number of most recent tokens from the context when constructing the chat completion."""
+__all__ = ["CreateContextResponse"]
 
 
 class CreateContextResponse(BaseModel):
diff --git a/volcenginesdkarkruntime/types/context/truncation_strategy.py b/volcenginesdkarkruntime/types/context/truncation_strategy.py
@@ -0,0 +1,13 @@
+from typing import Optional
+
+from ..._models import BaseModel
+from typing_extensions import Literal
+
+__all__ = ["TruncationStrategy"]
+
+
+class TruncationStrategy(BaseModel):
+    type: Literal["last_history_tokens"]
+    """The truncation strategy to use for the context. The default is last_history_tokens."""
+    last_history_tokens: Optional[int] = None
+    """The number of most recent tokens from the context when constructing the chat completion."""
diff --git a/volcenginesdkexamples/volcenginesdkarkruntime/context.py b/volcenginesdkexamples/volcenginesdkarkruntime/context.py
@@ -1,3 +1,4 @@
+import datetime
 from volcenginesdkarkruntime import Ark
 
 # Authentication
@@ -6,27 +7,29 @@
 # Note: If you use an API key, this API key will not be refreshed.
 # To prevent the API from expiring and failing after some time, choose an API key with no expiration date.
 
-# 2.If you authorize your endpoint with Volcengine Identity and Access Management（IAM), set your api key to environment variable "VOLC_ACCESSKEY", "VOLC_SECRETKEY"
+# 2.If you authorize your endpoint with Volcengine Identity and Access Management（IAM),
+# set your api key to environment variable "VOLC_ACCESSKEY", "VOLC_SECRETKEY"
 # or specify ak&sk by Ark(ak="${YOUR_AK}", sk="${YOUR_SK}").
 # To get your ak&sk, please refer to this document(https://www.volcengine.com/docs/6291/65568)
 # For more information，please check this document（https://www.volcengine.com/docs/82379/1263279）
-client = Ark()
+client = Ark(api_key="${YOUR_API_KEY}")
 
 if __name__ == "__main__":
-    # Create context:
+    # Create context with 30 minutes cache:
     print("----- create context -----")
-    context_response = client.context.create(
+    response = client.context.create(
         model="${YOUR_ENDPOINT_ID}",
         messages=[
             {"role": "system", "content": "你是豆包，是由字节跳动开发的 AI 人工智能助手"},
         ],
+        ttl=datetime.timedelta(minutes=30),
     )
-    print(context_response)
+    print(response)
 
     # Streaming:
     print("----- streaming request -----")
     stream = client.context.completions.create(
-        context_id=context_response.id,
+        context_id=response.id,
         model="${YOUR_ENDPOINT_ID}",
         messages=[
             {"role": "user", "content": "你是谁？"},