fix: add example for batch embeddings

exiaohu · liyuxuan-bd · commit 4d2a005b343a · 2025-08-20T20:56:22.000+08:00
diff --git a/volcenginesdkarkruntime/resources/batch/_utils.py b/volcenginesdkarkruntime/resources/batch/_utils.py
@@ -23,7 +23,7 @@ def _calculate_retry_timeout(retry_times: int) -> float:
     return timeout if timeout >= 0 else 0
 
 
-def _get_retry_after(response: httpx.Response) -> int | None:
+def _get_retry_after(response: httpx.Response) -> Optional[int]:
     retry_after = response.headers.get("Retry-After")
     if retry_after is not None:
         if retry_after.isdigit():
diff --git a/volcenginesdkarkruntime/resources/batch/embeddings.py b/volcenginesdkarkruntime/resources/batch/embeddings.py
@@ -37,7 +37,7 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None = None,
     ) -> CreateEmbeddingResponse:
-        deadline = get_request_last_time(timeout)
+        deadline = get_request_last_time(self._client, timeout)
         breaker = self._client.get_model_breaker(model)
 
         return with_batch_retry(
@@ -81,7 +81,7 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None = None,
     ) -> CreateEmbeddingResponse:
-        deadline = get_request_last_time(timeout)
+        deadline = get_request_last_time(self._client, timeout)
         breaker = await self._client.get_model_breaker(model)
 
         return await with_batch_retry(
diff --git a/volcenginesdkarkruntime/resources/batch/multimodal_embeddings.py b/volcenginesdkarkruntime/resources/batch/multimodal_embeddings.py
@@ -43,7 +43,7 @@ def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None = None,
     ) -> MultimodalEmbeddingResponse:
-        deadline = get_request_last_time(timeout)
+        deadline = get_request_last_time(self._client, timeout)
         breaker = self._client.get_model_breaker(model)
 
         return with_batch_retry(
@@ -87,7 +87,7 @@ async def create(
         extra_body: Body | None = None,
         timeout: float | httpx.Timeout | None = None,
     ) -> MultimodalEmbeddingResponse:
-        deadline = get_request_last_time(timeout)
+        deadline = get_request_last_time(self._client, timeout)
         breaker = await self._client.get_model_breaker(model)
 
         return await async_with_batch_retry(
diff --git a/volcenginesdkexamples/volcenginesdkarkruntime/async_batch_chat_completions.py b/volcenginesdkexamples/volcenginesdkarkruntime/async_batch_chat_completions.py
@@ -19,7 +19,7 @@
 async def worker(
     worker_id: int,
     client: AsyncArk,
-    requests: asyncio.Queue[dict],
+    requests: "asyncio.Queue[dict]",
 ):
     print(f"Worker {worker_id} is starting.")
 
@@ -36,7 +36,7 @@ async def worker(
 
 async def main():
     start = datetime.now()
-    max_concurrent_tasks, task_num = 1000, 10000
+    max_concurrent_tasks, task_num = 10, 100
 
     requests = asyncio.Queue()
     client = AsyncArk(timeout=24 * 3600)
diff --git a/volcenginesdkexamples/volcenginesdkarkruntime/async_batch_embeddings.py b/volcenginesdkexamples/volcenginesdkarkruntime/async_batch_embeddings.py
@@ -0,0 +1,72 @@
+import asyncio
+import sys
+from datetime import datetime
+
+from volcenginesdkarkruntime import AsyncArk
+
+# Authentication
+# 1.If you authorize your endpoint using an API key, you can set your api key to environment variable "ARK_API_KEY"
+# or specify api key by Ark(api_key="${YOUR_API_KEY}").
+# Note: If you use an API key, this API key will not be refreshed.
+# To prevent the API from expiring and failing after some time, choose an API key with no expiration date.
+
+# 2.If you authorize your endpoint with Volcengine Identity and Access Management（IAM), set your api key to environment variable "VOLC_ACCESSKEY", "VOLC_SECRETKEY"
+# or specify ak&sk by Ark(ak="${YOUR_AK}", sk="${YOUR_SK}").
+# To get your ak&sk, please refer to this document(https://www.volcengine.com/docs/6291/65568)
+# For more information，please check this document（https://www.volcengine.com/docs/82379/1263279）
+
+
+async def worker(
+    worker_id: int,
+    client: AsyncArk,
+    requests: "asyncio.Queue[dict]",
+):
+    print(f"Worker {worker_id} is starting.")
+
+    while True:
+        request = await requests.get()
+        try:
+            completion = await client.batch.embeddings.create(**request)
+            print(completion)
+        except Exception as e:
+            print(e, file=sys.stderr)
+        finally:
+            requests.task_done()
+
+
+async def main():
+    start = datetime.now()
+    max_concurrent_tasks, task_num = 10, 100
+
+    requests = asyncio.Queue()
+    client = AsyncArk(timeout=24 * 3600)
+
+    # mock `task_num` tasks
+    for _ in range(task_num):
+        await requests.put(
+            {"model": "${YOUR_ENDPOINT_ID}", "input": ["花椰菜又称菜花、花菜，是一种常见的蔬菜。"]}
+        )
+
+    # create `max_concurrent_tasks` workers and start them
+    tasks = [
+        asyncio.create_task(worker(i, client, requests))
+        for i in range(max_concurrent_tasks)
+    ]
+
+    # wait for all requests is done
+    await requests.join()
+
+    # stop workers
+    for task in tasks:
+        task.cancel()
+
+    # wait for all workers is canceled
+    await asyncio.gather(*tasks, return_exceptions=True)
+    await client.close()
+
+    end = datetime.now()
+    print(f"Total time: {end - start}, Total task: {task_num}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/volcenginesdkexamples/volcenginesdkarkruntime/async_batch_multimodal_embeddings.py b/volcenginesdkexamples/volcenginesdkarkruntime/async_batch_multimodal_embeddings.py
@@ -0,0 +1,83 @@
+import asyncio
+import sys
+from datetime import datetime
+
+from volcenginesdkarkruntime import AsyncArk
+
+# Authentication
+# 1.If you authorize your endpoint using an API key, you can set your api key to environment variable "ARK_API_KEY"
+# or specify api key by Ark(api_key="${YOUR_API_KEY}").
+# Note: If you use an API key, this API key will not be refreshed.
+# To prevent the API from expiring and failing after some time, choose an API key with no expiration date.
+
+# 2.If you authorize your endpoint with Volcengine Identity and Access Management（IAM), set your api key to environment variable "VOLC_ACCESSKEY", "VOLC_SECRETKEY"
+# or specify ak&sk by Ark(ak="${YOUR_AK}", sk="${YOUR_SK}").
+# To get your ak&sk, please refer to this document(https://www.volcengine.com/docs/6291/65568)
+# For more information，please check this document（https://www.volcengine.com/docs/82379/1263279）
+
+
+async def worker(
+    worker_id: int,
+    client: AsyncArk,
+    requests: "asyncio.Queue[dict]",
+):
+    print(f"Worker {worker_id} is starting.")
+
+    while True:
+        request = await requests.get()
+        try:
+            completion = await client.batch.multimodal_embeddings.create(**request)
+            print(completion)
+        except Exception as e:
+            print(e, file=sys.stderr)
+        finally:
+            requests.task_done()
+
+
+async def main():
+    start = datetime.now()
+    max_concurrent_tasks, task_num = 10, 100
+
+    requests = asyncio.Queue()
+    client = AsyncArk(timeout=24 * 3600)
+
+    # mock `task_num` tasks
+    for _ in range(task_num):
+        await requests.put(
+            {
+                "model": "${YOUR_ENDPOINT_ID}",
+                "input": [
+                    {"type": "text", "text": "What is the weather like today?"},
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": "https://ark-project.tos-cn-beijing.volces.com/images/view.jpeg"
+                        },
+                    },
+                ],
+            }
+        )
+
+    # create `max_concurrent_tasks` workers and start them
+    tasks = [
+        asyncio.create_task(worker(i, client, requests))
+        for i in range(max_concurrent_tasks)
+    ]
+
+    # wait for all requests is done
+    await requests.join()
+
+    # stop workers
+    for task in tasks:
+        task.cancel()
+
+    # wait for all workers is canceled
+    await asyncio.gather(*tasks, return_exceptions=True)
+    await client.close()
+
+    end = datetime.now()
+    print(f"Total time: {end - start}, Total task: {task_num}")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
diff --git a/volcenginesdkexamples/volcenginesdkarkruntime/batch_chat_completions.py b/volcenginesdkexamples/volcenginesdkarkruntime/batch_chat_completions.py
@@ -20,7 +20,7 @@
 def worker(
     worker_id: int,
     client: Ark,
-    requests: queue.Queue[dict],
+    requests: "queue.Queue[dict]",
 ):
     print(f"Worker {worker_id} is starting.")
 
@@ -45,7 +45,7 @@ def worker(
 
 def main():
     start = datetime.now()
-    max_concurrent_tasks, task_num = 1000, 10000
+    max_concurrent_tasks, task_num = 10, 100
 
     requests = queue.Queue()
     client = Ark(timeout=24 * 3600)
@@ -72,7 +72,6 @@ def main():
     with ThreadPool(max_concurrent_tasks) as pool:
         for i in range(max_concurrent_tasks):
             pool.apply_async(worker, args=(i, client, requests))
-        pool.apply_async(worker, args=(i, client, requests))
 
         # wait for all request to done
         pool.close()
diff --git a/volcenginesdkexamples/volcenginesdkarkruntime/batch_embeddings.py b/volcenginesdkexamples/volcenginesdkarkruntime/batch_embeddings.py
@@ -0,0 +1,78 @@
+import queue
+import sys
+from datetime import datetime
+from multiprocessing.pool import ThreadPool
+
+from volcenginesdkarkruntime import Ark
+
+# Authentication
+# 1.If you authorize your endpoint using an API key, you can set your api key to environment variable "ARK_API_KEY"
+# or specify api key by Ark(api_key="${YOUR_API_KEY}").
+# Note: If you use an API key, this API key will not be refreshed.
+# To prevent the API from expiring and failing after some time, choose an API key with no expiration date.
+
+# 2.If you authorize your endpoint with Volcengine Identity and Access Management（IAM), set your api key to environment variable "VOLC_ACCESSKEY", "VOLC_SECRETKEY"
+# or specify ak&sk by Ark(ak="${YOUR_AK}", sk="${YOUR_SK}").
+# To get your ak&sk, please refer to this document(https://www.volcengine.com/docs/6291/65568)
+# For more information，please check this document（https://www.volcengine.com/docs/82379/1263279）
+
+
+def worker(
+    worker_id: int,
+    client: Ark,
+    requests: "queue.Queue[dict]",
+):
+    print(f"Worker {worker_id} is starting.")
+
+    while True:
+        request = requests.get()
+
+        # check for signal of no more request
+        if not request:
+            # put back on the queue for other workers
+            requests.put(request)
+            return
+
+        try:
+            # do request
+            completion = client.batch.embeddings.create(**request)
+            print(completion)
+        except Exception as e:
+            print(e, file=sys.stderr)
+        finally:
+            requests.task_done()
+
+
+def main():
+    start = datetime.now()
+    max_concurrent_tasks, task_num = 10, 100
+
+    requests = queue.Queue()
+    client = Ark(timeout=24 * 3600)
+
+    # mock `task_num` tasks
+    for _ in range(task_num):
+        requests.put(
+            {"model": "${YOUR_ENDPOINT_ID}", "input": ["花椰菜又称菜花、花菜，是一种常见的蔬菜。"]}
+        )
+
+    # put a signal of no more request
+    requests.put(None)
+
+    # create `max_concurrent_tasks` workers and start them
+    with ThreadPool(max_concurrent_tasks) as pool:
+        for i in range(max_concurrent_tasks):
+            pool.apply_async(worker, args=(i, client, requests))
+
+        # wait for all request to done
+        pool.close()
+        pool.join()
+
+    client.close()
+
+    end = datetime.now()
+    print(f"Total time: {end - start}, Total task: {task_num}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/volcenginesdkexamples/volcenginesdkarkruntime/batch_multimodal_embeddings.py b/volcenginesdkexamples/volcenginesdkarkruntime/batch_multimodal_embeddings.py