Skip to content

Commit e98d87f

Browse files
author
hks
committed
feat: support sparse embeddings & video input
1 parent c86dd7d commit e98d87f

File tree

9 files changed

+80
-7
lines changed

9 files changed

+80
-7
lines changed

volcenginesdkarkruntime/resources/multimodal_embeddings.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,10 @@
1313
)
1414
from .._utils._utils import with_sts_token, async_with_sts_token
1515
from ..types.multimodal_embedding import EmbeddingInputParam
16-
from ..types.multimodal_embedding import MultimodalEmbeddingResponse
16+
from ..types.multimodal_embedding import (
17+
MultimodalEmbeddingResponse,
18+
SparseEmbeddingInput,
19+
)
1720

1821
__all__ = ["MultimodalEmbeddings", "AsyncMultimodalEmbeddings"]
1922

@@ -31,6 +34,7 @@ def create(
3134
model: str,
3235
encoding_format: Literal["float", "base64"] = "float",
3336
dimensions: int | None = None,
37+
sparse_embedding: SparseEmbeddingInput | None = None,
3438
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
3539
# The extra values given here take precedence over values defined on the client or passed to this method.
3640
extra_headers: Headers | None = None,
@@ -45,6 +49,7 @@ def create(
4549
"model": model,
4650
"encoding_format": encoding_format,
4751
"dimensions": dimensions,
52+
"sparse_embedding": sparse_embedding,
4853
},
4954
options=make_request_options(
5055
extra_headers=extra_headers,
@@ -69,6 +74,7 @@ async def create(
6974
model: str,
7075
encoding_format: Literal["float", "base64"] = "float",
7176
dimensions: int | None = None,
77+
sparse_embedding: SparseEmbeddingInput | None = None,
7278
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
7379
# The extra values given here take precedence over values defined on the client or passed to this method.
7480
extra_headers: Headers | None = None,
@@ -83,6 +89,7 @@ async def create(
8389
"model": model,
8490
"encoding_format": encoding_format,
8591
"dimensions": dimensions,
92+
"sparse_embedding": sparse_embedding,
8693
},
8794
options=make_request_options(
8895
extra_headers=extra_headers,
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
from .images import ImagesResponse
2+
3+
__all__ = ["ImagesResponse"]

volcenginesdkarkruntime/types/multimodal_embedding/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44
from .embedding_content_part_text_param import MultimodalEmbeddingContentPartTextParam
55
from .embedding_content_part_image_param import MultimodalEmbeddingContentPartImageParam
66
from .embedding_input import EmbeddingInputParam
7-
from .embedding_data import MultimodalEmbedding
7+
from .embedding_data import MultimodalEmbedding, SparseEmbedding
8+
from .sparse_embedding_input import SparseEmbeddingInput
89

910
__all__ = [
1011
"MultimodalEmbeddingResponse",
1112
"MultimodalEmbeddingContentPartTextParam",
1213
"MultimodalEmbeddingContentPartImageParam",
1314
"EmbeddingInputParam",
1415
"MultimodalEmbedding",
16+
"SparseEmbeddingInput",
17+
"SparseEmbedding",
1518
]
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from __future__ import annotations
2+
3+
from typing_extensions import Literal, Required, TypedDict
4+
5+
__all__ = ["MultimodalEmbeddingContentPartVideoParam", "VideoURL"]
6+
7+
8+
class VideoURL(TypedDict, total=False):
9+
url: Required[str]
10+
"""Either a URL of the video or the base64 encoded video data."""
11+
12+
13+
class MultimodalEmbeddingContentPartVideoParam(TypedDict, total=False):
14+
video_url: Required[VideoURL]
15+
16+
type: Required[Literal["video_url"]]
17+
"""The type of the content part."""

volcenginesdkarkruntime/types/multimodal_embedding/embedding_data.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,14 @@
33

44
from ..._models import BaseModel
55

6-
__all__ = ["MultimodalEmbedding"]
6+
__all__ = ["MultimodalEmbedding", "SparseEmbedding"]
7+
8+
9+
class SparseEmbedding(BaseModel):
10+
index: int
11+
"""The token index of the embedding."""
12+
value: float
13+
"""The value of the embedding."""
714

815

916
class MultimodalEmbedding(BaseModel):
@@ -12,3 +19,6 @@ class MultimodalEmbedding(BaseModel):
1219

1320
object: Literal["embedding"]
1421
"""The object type, which is always "embedding"."""
22+
23+
sparse_embedding: SparseEmbedding
24+
"""The sparse embeddings generated by the model."""

volcenginesdkarkruntime/types/multimodal_embedding/embedding_input.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,12 @@
66
from .embedding_content_part_text_param import (
77
MultimodalEmbeddingContentPartTextParam,
88
)
9+
from .embedding_content_part_video_param import MultimodalEmbeddingContentPartVideoParam
910

1011
__all__ = ["EmbeddingInputParam"]
1112

1213
EmbeddingInputParam = Union[
13-
MultimodalEmbeddingContentPartImageParam, MultimodalEmbeddingContentPartTextParam
14+
MultimodalEmbeddingContentPartImageParam,
15+
MultimodalEmbeddingContentPartTextParam,
16+
MultimodalEmbeddingContentPartVideoParam,
1417
]

volcenginesdkarkruntime/types/multimodal_embedding/embedding_response.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
from typing import List
21
from typing_extensions import Literal
32

43
from .embedding_data import MultimodalEmbedding
@@ -15,8 +14,8 @@ class MultimodalEmbeddingResponse(BaseModel):
1514
created: int
1615
"""The Unix timestamp (in seconds) of when the embeddings was created."""
1716

18-
data: List[MultimodalEmbedding]
19-
"""The list of embeddings generated by the model."""
17+
data: MultimodalEmbedding
18+
"""The embeddings generated by the model."""
2019

2120
model: str
2221
"""The name of the model used to generate the embedding."""
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
from typing_extensions import Literal, Required, TypedDict
2+
3+
__all__ = ["SparseEmbeddingInput"]
4+
5+
6+
class SparseEmbeddingInput(TypedDict, total=False):
7+
type: Required[Literal["enabled", "disabled"]]
Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from volcenginesdkarkruntime import Ark
2+
from volcenginesdkarkruntime.types.multimodal_embedding import MultimodalEmbeddingResponse
3+
4+
client = Ark()
5+
6+
print("----- multimodal embeddings request -----")
7+
resp: MultimodalEmbeddingResponse = client.multimodal_embeddings.create(
8+
model="doubao-embedding-vision-250615",
9+
input=[
10+
{
11+
"type": "text",
12+
"text": "花椰菜又称菜花、花菜,是一种常见的蔬菜。"
13+
}
14+
],
15+
sparse_embedding={"type": "enabled"}, # enable sparse embedding
16+
)
17+
# dense embeddings
18+
print("---- dense embeddings ----")
19+
print(resp.data.embedding)
20+
21+
# sparse embeddings
22+
print("---- sparse embeddings ----")
23+
for item in resp.data.sparse_embedding:
24+
print(item)
25+

0 commit comments

Comments
 (0)