Skip to content

Commit 7d55834

Browse files
author
hks
committed
feat: support sparse embeddings
1 parent 5c7a9cf commit 7d55834

File tree

7 files changed

+261
-1
lines changed

7 files changed

+261
-1
lines changed

volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/multimodalembeddings/MultimodalEmbedding.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package com.volcengine.ark.runtime.model.multimodalembeddings;
22

3+
import com.fasterxml.jackson.annotation.JsonCreator;
34
import com.fasterxml.jackson.annotation.JsonIgnoreProperties;
5+
import com.fasterxml.jackson.annotation.JsonProperty;
46

57
import java.util.List;
68

@@ -16,6 +18,16 @@ public class MultimodalEmbedding {
1618
*/
1719
private List<Double> embedding;
1820

21+
@JsonProperty("sparse_embedding")
22+
private List<SparseEmbedding> sparseEmbedding;
23+
24+
public List<SparseEmbedding> getSparseEmbedding() {
25+
return sparseEmbedding;
26+
}
27+
28+
public void setSparseEmbedding(List<SparseEmbedding> sparseEmbedding) {
29+
this.sparseEmbedding = sparseEmbedding;
30+
}
1931

2032
public String getObject() {
2133
return object;
@@ -38,6 +50,7 @@ public String toString() {
3850
return "Embedding{" +
3951
"object='" + object + '\'' +
4052
", embedding=" + embedding +
53+
", sparseEmbedding=" + sparseEmbedding +
4154
'}';
4255
}
4356
}

volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/multimodalembeddings/MultimodalEmbeddingInput.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,8 @@ public class MultimodalEmbeddingInput {
1111

1212
MultimodalEmbeddingInput.MultiModalEmbeddingContentPartImageURL imageUrl;
1313

14+
MultimodalEmbeddingInput.MultiModalEmbeddingContentPartVideoURL videoUrl;
15+
1416
public String getType() {
1517
return type;
1618
}
@@ -35,6 +37,14 @@ public void setImageUrl(MultimodalEmbeddingInput.MultiModalEmbeddingContentPartI
3537
this.imageUrl = imageUrl;
3638
}
3739

40+
public MultimodalEmbeddingInput.MultiModalEmbeddingContentPartVideoURL getVideoUrl() {
41+
return videoUrl;
42+
}
43+
44+
public void setVideoUrl(MultimodalEmbeddingInput.MultiModalEmbeddingContentPartVideoURL videoUrl) {
45+
this.videoUrl = videoUrl;
46+
}
47+
3848
public static MultimodalEmbeddingInput.Builder builder() {
3949
return new MultimodalEmbeddingInput.Builder();
4050
}
@@ -45,6 +55,7 @@ public String toString() {
4555
"type='" + type + '\'' +
4656
", text='" + text + '\'' +
4757
", imageUrl=" + imageUrl +
58+
", videoUrl=" + videoUrl +
4859
'}';
4960
}
5061

@@ -90,10 +101,49 @@ public String toString() {
90101
}
91102
}
92103

104+
public static class MultiModalEmbeddingContentPartVideoURL {
105+
String url;
106+
double fps;
107+
108+
public MultiModalEmbeddingContentPartVideoURL(String url, double fps) {
109+
this.url = url;
110+
this.fps = fps;
111+
}
112+
public MultiModalEmbeddingContentPartVideoURL(String url) {
113+
this.url = url;
114+
}
115+
public MultiModalEmbeddingContentPartVideoURL() {}
116+
117+
public String getUrl() {
118+
return url;
119+
}
120+
121+
public void setUrl(String url) {
122+
this.url = url;
123+
}
124+
125+
public double getFps() {
126+
return fps;
127+
}
128+
129+
public void setFps(double fps) {
130+
this.fps = fps;
131+
}
132+
133+
@Override
134+
public String toString() {
135+
return "MultiModalEmbeddingContentPartVideoURL{" +
136+
"url='" + url + '\'' +
137+
", fps=" + fps +
138+
'}';
139+
}
140+
}
141+
93142
public static final class Builder {
94143
private String type;
95144
private String text;
96145
private MultimodalEmbeddingInput.MultiModalEmbeddingContentPartImageURL imageUrl;
146+
private MultimodalEmbeddingInput.MultiModalEmbeddingContentPartVideoURL videoUrl;
97147

98148
public MultimodalEmbeddingInput.Builder type(String type) {
99149
this.type = type;
@@ -110,11 +160,17 @@ public MultimodalEmbeddingInput.Builder imageUrl(MultimodalEmbeddingInput.MultiM
110160
return this;
111161
}
112162

163+
public MultimodalEmbeddingInput.Builder videoUrl(MultimodalEmbeddingInput.MultiModalEmbeddingContentPartVideoURL videoUrl) {
164+
this.videoUrl = videoUrl;
165+
return this;
166+
}
167+
113168
public MultimodalEmbeddingInput build() {
114169
MultimodalEmbeddingInput multiModalEmbeddingInput = new MultimodalEmbeddingInput();
115170
multiModalEmbeddingInput.setType(type);
116171
multiModalEmbeddingInput.setText(text);
117172
multiModalEmbeddingInput.setImageUrl(imageUrl);
173+
multiModalEmbeddingInput.setVideoUrl(videoUrl);
118174
return multiModalEmbeddingInput;
119175
}
120176
}

volcengine-java-sdk-ark-runtime/src/main/java/com/volcengine/ark/runtime/model/multimodalembeddings/MultimodalEmbeddingRequest.java

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,20 @@ public class MultimodalEmbeddingRequest {
3030
@JsonProperty("dimensions")
3131
private Integer dimensions;
3232

33+
@JsonProperty("sparse_embedding")
34+
private SparseEmbeddingInput sparseEmbedding;
35+
36+
37+
@Override
38+
public String toString() {
39+
return "MultimodalEmbeddingRequest{" +
40+
"model='" + model + '\'' +
41+
", input=" + input +
42+
", encodingFormat='" + encodingFormat + '\'' +
43+
", dimensions=" + dimensions +
44+
", sparseEmbedding=" + sparseEmbedding +
45+
'}';
46+
}
3347

3448
public MultimodalEmbeddingRequest() {
3549
}
@@ -66,6 +80,13 @@ public Integer getDimensions() {
6680
return dimensions;
6781
}
6882

83+
public void setSparseEmbedding(SparseEmbeddingInput sparseEmbedding) {
84+
this.sparseEmbedding = sparseEmbedding;
85+
}
86+
public SparseEmbeddingInput getSparseEmbedding() {
87+
return sparseEmbedding;
88+
}
89+
6990
public static MultimodalEmbeddingRequest.Builder builder() {
7091
return new MultimodalEmbeddingRequest.Builder();
7192
}
@@ -75,6 +96,7 @@ public static final class Builder {
7596
private List<MultimodalEmbeddingInput> input;
7697
private String encodingFormat;
7798
private Integer dimensions;
99+
private SparseEmbeddingInput sparseEmbedding;
78100

79101
private Builder() {
80102
}
@@ -99,12 +121,18 @@ public MultimodalEmbeddingRequest.Builder dimensions(Integer dimensions) {
99121
return this;
100122
}
101123

124+
public MultimodalEmbeddingRequest.Builder sparseEmbedding(SparseEmbeddingInput sparseEmbedding) {
125+
this.sparseEmbedding = sparseEmbedding;
126+
return this;
127+
}
128+
102129
public MultimodalEmbeddingRequest build() {
103130
MultimodalEmbeddingRequest embeddingRequest = new MultimodalEmbeddingRequest();
104131
embeddingRequest.setModel(model);
105132
embeddingRequest.setInput(input);
106133
embeddingRequest.setEncodingFormat(encodingFormat);
107134
embeddingRequest.setDimensions(dimensions);
135+
embeddingRequest.setSparseEmbedding(sparseEmbedding);
108136
return embeddingRequest;
109137
}
110138
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
package com.volcengine.ark.runtime.model.multimodalembeddings;
2+
3+
public class SparseEmbedding {
4+
private Integer index;
5+
private Double value;
6+
7+
public SparseEmbedding() {}
8+
9+
public Integer getIndex() {
10+
return index;
11+
}
12+
13+
public void setIndex(Integer index) {
14+
this.index = index;
15+
}
16+
public Double getValue() {
17+
return value;
18+
}
19+
20+
public void setValue(Double value) {
21+
this.value = value;
22+
}
23+
24+
@Override
25+
public String toString() {
26+
return "SparseEmbedding{" +
27+
"index=" + index +
28+
", value=" + value +
29+
'}';
30+
}
31+
public static SparseEmbedding.Builder builder() {
32+
return new SparseEmbedding.Builder();
33+
}
34+
public static final class Builder {
35+
private Integer index;
36+
private Double value;
37+
private Builder() {
38+
}
39+
public static SparseEmbedding.Builder SparseEmbedding() {
40+
return new SparseEmbedding.Builder();
41+
}
42+
public SparseEmbedding.Builder index(Integer index) {
43+
this.index = index;
44+
return this;
45+
}
46+
public SparseEmbedding.Builder value(Double value) {
47+
this.value = value;
48+
return this;
49+
}
50+
public SparseEmbedding build() {
51+
SparseEmbedding sparseEmbedding = new SparseEmbedding();
52+
sparseEmbedding.setIndex(index);
53+
sparseEmbedding.setValue(value);
54+
return sparseEmbedding;
55+
}
56+
}
57+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
package com.volcengine.ark.runtime.model.multimodalembeddings;
2+
3+
public class SparseEmbeddingInput {
4+
private String type;
5+
6+
public SparseEmbeddingInput() {}
7+
8+
public String getType() {
9+
return type;
10+
}
11+
12+
public void setType(String type) {
13+
this.type = type;
14+
}
15+
16+
@Override
17+
public String toString() {
18+
return "SparseEmbeddingInput{" +
19+
"type='" + type + '\'' +
20+
'}';
21+
}
22+
23+
public static SparseEmbeddingInput.Builder builder() {
24+
return new SparseEmbeddingInput.Builder();
25+
}
26+
27+
public static final class Builder {
28+
private String type;
29+
30+
private Builder() {
31+
}
32+
33+
public static SparseEmbeddingInput.Builder SparseEmbeddingInput() {
34+
return new Builder();
35+
}
36+
37+
public SparseEmbeddingInput.Builder type(String type) {
38+
this.type = type;
39+
return this;
40+
}
41+
42+
public SparseEmbeddingInput build() {
43+
SparseEmbeddingInput sparseEmbeddingInput = new SparseEmbeddingInput();
44+
sparseEmbeddingInput.setType(type);
45+
return sparseEmbeddingInput;
46+
}
47+
}
48+
}

volcengine-java-sdk-ark-runtime/test/java/com/volcengine/ark/runtime/MultiModalEmbeddingsExample.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ public static void main(String[] args) {
4848
).build());
4949

5050
MultimodalEmbeddingRequest multiModalEmbeddingRequest = MultimodalEmbeddingRequest.builder()
51-
.model("${YOUR_ENDPOINT_ID}")
51+
.model("doubao-embedding-vision-250615")
5252
.input(inputs)
5353
.build();
5454

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package com.volcengine.ark.runtime;
2+
3+
import com.volcengine.ark.runtime.model.multimodalembeddings.MultimodalEmbeddingInput;
4+
import com.volcengine.ark.runtime.model.multimodalembeddings.MultimodalEmbeddingRequest;
5+
import com.volcengine.ark.runtime.model.multimodalembeddings.MultimodalEmbeddingResult;
6+
import com.volcengine.ark.runtime.model.multimodalembeddings.SparseEmbeddingInput;
7+
import com.volcengine.ark.runtime.service.ArkService;
8+
import okhttp3.ConnectionPool;
9+
import okhttp3.Dispatcher;
10+
11+
import java.util.ArrayList;
12+
import java.util.List;
13+
import java.util.concurrent.TimeUnit;
14+
15+
public class SparseEmbeddingsExample {
16+
/**
17+
* Authentication
18+
* 1.If you authorize your endpoint using an API key, you can set your api key to environment variable "ARK_API_KEY"
19+
* String apiKey = System.getenv("ARK_API_KEY");
20+
* ArkService service = new ArkService(apiKey);
21+
* Note: If you use an API key, this API key will not be refreshed.
22+
* To prevent the API from expiring and failing after some time, choose an API key with no expiration date.
23+
* <p>
24+
* 2.If you authorize your endpoint with Volcengine Identity and Access Management(IAM), set your api key to environment variable "VOLC_ACCESSKEY", "VOLC_SECRETKEY"
25+
* String ak = System.getenv("VOLC_ACCESSKEY");
26+
* String sk = System.getenv("VOLC_SECRETKEY");
27+
* ArkService service = new ArkService(ak, sk);
28+
* To get your ak&sk, please refer to this document(https://www.volcengine.com/docs/6291/65568)
29+
* For more information,please check this document(https://www.volcengine.com/docs/82379/1263279)
30+
*/
31+
32+
static String apiKey = System.getenv("ARK_API_KEY");
33+
static ConnectionPool connectionPool = new ConnectionPool(5, 1, TimeUnit.SECONDS);
34+
static Dispatcher dispatcher = new Dispatcher();
35+
static ArkService service = ArkService.builder().dispatcher(dispatcher).connectionPool(connectionPool).apiKey(apiKey).build();
36+
37+
public static void main(String[] args) {
38+
System.out.println("\n----- sparse embeddings request -----");
39+
40+
List<MultimodalEmbeddingInput> inputs = new ArrayList<>();
41+
inputs.add(MultimodalEmbeddingInput.builder().type("text").text(
42+
"The food was delicious and the waiter..."
43+
).build());
44+
45+
MultimodalEmbeddingRequest multiModalEmbeddingRequest = MultimodalEmbeddingRequest.builder()
46+
.model("doubao-embedding-vision-250615")
47+
.input(inputs)
48+
.sparseEmbedding(SparseEmbeddingInput.builder().type("enabled").build())
49+
.build();
50+
51+
MultimodalEmbeddingResult res = service.createMultiModalEmbeddings(multiModalEmbeddingRequest);
52+
System.out.println(res.getData().getSparseEmbedding());
53+
54+
// shutdown service after all requests is finished
55+
service.shutdownExecutor();
56+
}
57+
58+
}

0 commit comments

Comments
 (0)