Skip to content

Commit 5f99708

Browse files
authored
Refactor EmbeddingRequestChunker (#122818)
* refactor * inference generics * more refactor * unify naming * remove interface "EmbeddingInt" * more renaming * javadoc * revert accidental changeas * remove ununsed EmbeddingRequestChunker.EmbeddingType * polish * support chunking for text embedding bits * Polish error messagex * fix VoyageAI conflicts
1 parent 698bf31 commit 5f99708

File tree

82 files changed

+1247
-1623
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+1247
-1623
lines changed
Lines changed: 5 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,12 +7,8 @@
77

88
package org.elasticsearch.xpack.core.inference.results;
99

10-
import org.elasticsearch.common.bytes.BytesReference;
1110
import org.elasticsearch.inference.ChunkedInference;
12-
import org.elasticsearch.xcontent.ToXContent;
1311
import org.elasticsearch.xcontent.XContent;
14-
import org.elasticsearch.xcontent.XContentBuilder;
15-
import org.elasticsearch.xpack.core.ml.search.WeightedToken;
1612

1713
import java.io.IOException;
1814
import java.util.ArrayList;
@@ -21,17 +17,17 @@
2117

2218
import static org.elasticsearch.xpack.core.inference.results.TextEmbeddingUtils.validateInputSizeAgainstEmbeddings;
2319

24-
public record ChunkedInferenceEmbeddingSparse(List<SparseEmbeddingChunk> chunks) implements ChunkedInference {
20+
public record ChunkedInferenceEmbedding(List<? extends EmbeddingResults.Chunk> chunks) implements ChunkedInference {
2521

2622
public static List<ChunkedInference> listOf(List<String> inputs, SparseEmbeddingResults sparseEmbeddingResults) {
2723
validateInputSizeAgainstEmbeddings(inputs, sparseEmbeddingResults.embeddings().size());
2824

2925
var results = new ArrayList<ChunkedInference>(inputs.size());
3026
for (int i = 0; i < inputs.size(); i++) {
3127
results.add(
32-
new ChunkedInferenceEmbeddingSparse(
28+
new ChunkedInferenceEmbedding(
3329
List.of(
34-
new SparseEmbeddingChunk(
30+
new SparseEmbeddingResults.Chunk(
3531
sparseEmbeddingResults.embeddings().get(i).tokens(),
3632
inputs.get(i),
3733
new TextOffset(0, inputs.get(i).length())
@@ -47,21 +43,9 @@ public static List<ChunkedInference> listOf(List<String> inputs, SparseEmbedding
4743
@Override
4844
public Iterator<Chunk> chunksAsMatchedTextAndByteReference(XContent xcontent) throws IOException {
4945
var asChunk = new ArrayList<Chunk>();
50-
for (var chunk : chunks) {
51-
asChunk.add(new Chunk(chunk.matchedText(), chunk.offset(), toBytesReference(xcontent, chunk.weightedTokens())));
46+
for (var chunk : chunks()) {
47+
asChunk.add(chunk.toChunk(xcontent));
5248
}
5349
return asChunk.iterator();
5450
}
55-
56-
private static BytesReference toBytesReference(XContent xContent, List<WeightedToken> tokens) throws IOException {
57-
XContentBuilder b = XContentBuilder.builder(xContent);
58-
b.startObject();
59-
for (var weightedToken : tokens) {
60-
weightedToken.toXContent(b, ToXContent.EMPTY_PARAMS);
61-
}
62-
b.endObject();
63-
return BytesReference.bytes(b);
64-
}
65-
66-
public record SparseEmbeddingChunk(List<WeightedToken> weightedTokens, String matchedText, TextOffset offset) {}
6751
}

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceEmbeddingByte.java

Lines changed: 0 additions & 45 deletions
This file was deleted.

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceEmbeddingFloat.java

Lines changed: 0 additions & 45 deletions
This file was deleted.

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/EmbeddingInt.java

Lines changed: 0 additions & 12 deletions
This file was deleted.
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the Elastic License
4+
* 2.0; you may not use this file except in compliance with the Elastic License
5+
* 2.0.
6+
*/
7+
8+
package org.elasticsearch.xpack.core.inference.results;
9+
10+
import org.elasticsearch.inference.ChunkedInference;
11+
import org.elasticsearch.inference.InferenceServiceResults;
12+
import org.elasticsearch.xcontent.XContent;
13+
14+
import java.io.IOException;
15+
import java.util.List;
16+
17+
/**
18+
* The results of a call to the inference service that contains embeddings (sparse or dense).
19+
* A call to the inference service may contain multiple input texts, so this results may
20+
* contain multiple results.
21+
*/
22+
public interface EmbeddingResults<C extends EmbeddingResults.Chunk, E extends EmbeddingResults.Embedding<C>>
23+
extends
24+
InferenceServiceResults {
25+
26+
/**
27+
* A resulting embedding together with its input text.
28+
*/
29+
interface Chunk {
30+
ChunkedInference.Chunk toChunk(XContent xcontent) throws IOException;
31+
32+
String matchedText();
33+
34+
ChunkedInference.TextOffset offset();
35+
}
36+
37+
/**
38+
* A resulting embedding for one of the input texts to the inference service.
39+
*/
40+
interface Embedding<C extends Chunk> {
41+
/**
42+
* Combines the resulting embedding with the input into a chunk.
43+
*/
44+
C toChunk(String text, ChunkedInference.TextOffset offset);
45+
}
46+
47+
/**
48+
* The resulting list of embeddings for the input texts to the inference service.
49+
*/
50+
List<E> embeddings();
51+
}

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/InferenceByteEmbedding.java

Lines changed: 0 additions & 95 deletions
This file was deleted.

0 commit comments

Comments
 (0)