Skip to content

Commit 13e875e

Browse files
committed
Removed matched text from chunk (part 2)
1 parent 276b6f1 commit 13e875e

File tree

12 files changed

+19
-37
lines changed

12 files changed

+19
-37
lines changed

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceEmbedding.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,6 @@ public static List<ChunkedInference> listOf(List<String> inputs, SparseEmbedding
2929
List.of(
3030
new SparseEmbeddingResults.Chunk(
3131
sparseEmbeddingResults.embeddings().get(i).tokens(),
32-
inputs.get(i),
3332
new TextOffset(0, inputs.get(i).length())
3433
)
3534
)

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/EmbeddingResults.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,11 @@ public interface EmbeddingResults<C extends EmbeddingResults.Chunk, E extends Em
2424
InferenceServiceResults {
2525

2626
/**
27-
* A resulting embedding together with its input text.
27+
* A resulting embedding together with the offset into the input text.
2828
*/
2929
interface Chunk {
3030
ChunkedInference.Chunk toChunk(XContent xcontent) throws IOException;
3131

32-
String matchedText();
33-
3432
ChunkedInference.TextOffset offset();
3533
}
3634

@@ -39,9 +37,9 @@ interface Chunk {
3937
*/
4038
interface Embedding<C extends Chunk> {
4139
/**
42-
* Combines the resulting embedding with the input into a chunk.
40+
* Combines the resulting embedding with the offset into the input text into a chunk.
4341
*/
44-
C toChunk(String text, ChunkedInference.TextOffset offset);
42+
C toChunk(ChunkedInference.TextOffset offset);
4543
}
4644

4745
/**

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/SparseEmbeddingResults.java

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,14 +175,12 @@ public String toString() {
175175
}
176176

177177
@Override
178-
public Chunk toChunk(String text, ChunkedInference.TextOffset offset) {
179-
return new Chunk(tokens, text, offset);
178+
public Chunk toChunk(ChunkedInference.TextOffset offset) {
179+
return new Chunk(tokens, offset);
180180
}
181181
}
182182

183-
public record Chunk(List<WeightedToken> weightedTokens, String matchedText, ChunkedInference.TextOffset offset)
184-
implements
185-
EmbeddingResults.Chunk {
183+
public record Chunk(List<WeightedToken> weightedTokens, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk {
186184

187185
public ChunkedInference.Chunk toChunk(XContent xcontent) throws IOException {
188186
return new ChunkedInference.Chunk(offset, toBytesReference(xcontent, weightedTokens));

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingByteResults.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -187,15 +187,15 @@ public int hashCode() {
187187
}
188188

189189
@Override
190-
public Chunk toChunk(String text, ChunkedInference.TextOffset offset) {
191-
return new Chunk(values, text, offset);
190+
public Chunk toChunk(ChunkedInference.TextOffset offset) {
191+
return new Chunk(values, offset);
192192
}
193193
}
194194

195195
/**
196196
* Serialises the {@code value} array, according to the provided {@link XContent}, into a {@link BytesReference}.
197197
*/
198-
public record Chunk(byte[] embedding, String matchedText, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk {
198+
public record Chunk(byte[] embedding, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk {
199199

200200
public ChunkedInference.Chunk toChunk(XContent xcontent) throws IOException {
201201
return new ChunkedInference.Chunk(offset, toBytesReference(xcontent, embedding));

x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingFloatResults.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -221,12 +221,12 @@ public int hashCode() {
221221
}
222222

223223
@Override
224-
public Chunk toChunk(String text, ChunkedInference.TextOffset offset) {
225-
return new Chunk(values, text, offset);
224+
public Chunk toChunk(ChunkedInference.TextOffset offset) {
225+
return new Chunk(values, offset);
226226
}
227227
}
228228

229-
public record Chunk(float[] embedding, String matchedText, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk {
229+
public record Chunk(float[] embedding, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk {
230230

231231
public ChunkedInference.Chunk toChunk(XContent xcontent) throws IOException {
232232
return new ChunkedInference.Chunk(offset, toBytesReference(xcontent, embedding));

x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,6 @@ private List<ChunkedInference> makeChunkedResults(List<String> input, int dimens
183183
List.of(
184184
new TextEmbeddingFloatResults.Chunk(
185185
nonChunkedResults.embeddings().get(i).values(),
186-
input.get(i),
187186
new ChunkedInference.TextOffset(0, input.get(i).length())
188187
)
189188
)

x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -172,13 +172,7 @@ private List<ChunkedInference> makeChunkedResults(List<String> input) {
172172
}
173173
results.add(
174174
new ChunkedInferenceEmbedding(
175-
List.of(
176-
new SparseEmbeddingResults.Chunk(
177-
tokens,
178-
input.get(i),
179-
new ChunkedInference.TextOffset(0, input.get(i).length())
180-
)
181-
)
175+
List.of(new SparseEmbeddingResults.Chunk(tokens, new ChunkedInference.TextOffset(0, input.get(i).length())))
182176
)
183177
);
184178
}

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/EmbeddingRequestChunker.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -197,10 +197,7 @@ private ChunkedInference mergeResultsWithInputs(int index) {
197197
AtomicReferenceArray<EmbeddingResults.Embedding<?>> result = results.get(index);
198198
for (int i = 0; i < request.size(); i++) {
199199
EmbeddingResults.Chunk chunk = result.get(i)
200-
.toChunk(
201-
request.get(i).chunkText(),
202-
new ChunkedInference.TextOffset(request.get(i).chunk.start(), request.get(i).chunk.end())
203-
);
200+
.toChunk(new ChunkedInference.TextOffset(request.get(i).chunk.start(), request.get(i).chunk.end()));
204201
chunks.add(chunk);
205202
}
206203
return new ChunkedInferenceEmbedding(chunks);

x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,6 @@ private static List<ChunkedInference> translateToChunkedResults(DocumentsOnlyInp
121121
List.of(
122122
new TextEmbeddingFloatResults.Chunk(
123123
textEmbeddingResults.embeddings().get(i).values(),
124-
inputs.getInputs().get(i),
125124
new ChunkedInference.TextOffset(0, inputs.getInputs().get(i).length())
126125
)
127126
)

x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ public static ChunkedInferenceEmbedding randomChunkedInferenceEmbeddingByte(Mode
177177
for (int j = 0; j < values.length; j++) {
178178
values[j] = randomByte();
179179
}
180-
chunks.add(new TextEmbeddingByteResults.Chunk(values, input, new ChunkedInference.TextOffset(0, input.length())));
180+
chunks.add(new TextEmbeddingByteResults.Chunk(values, new ChunkedInference.TextOffset(0, input.length())));
181181
}
182182
return new ChunkedInferenceEmbedding(chunks);
183183
}
@@ -189,7 +189,7 @@ public static ChunkedInferenceEmbedding randomChunkedInferenceEmbeddingFloat(Mod
189189
for (int j = 0; j < values.length; j++) {
190190
values[j] = randomFloat();
191191
}
192-
chunks.add(new TextEmbeddingFloatResults.Chunk(values, input, new ChunkedInference.TextOffset(0, input.length())));
192+
chunks.add(new TextEmbeddingFloatResults.Chunk(values, new ChunkedInference.TextOffset(0, input.length())));
193193
}
194194
return new ChunkedInferenceEmbedding(chunks);
195195
}
@@ -205,7 +205,7 @@ public static ChunkedInferenceEmbedding randomChunkedInferenceEmbeddingSparse(Li
205205
for (var token : input.split("\\s+")) {
206206
tokens.add(new WeightedToken(token, withFloats ? randomFloat() : randomIntBetween(1, 255)));
207207
}
208-
chunks.add(new SparseEmbeddingResults.Chunk(tokens, input, new ChunkedInference.TextOffset(0, input.length())));
208+
chunks.add(new SparseEmbeddingResults.Chunk(tokens, new ChunkedInference.TextOffset(0, input.length())));
209209
}
210210
return new ChunkedInferenceEmbedding(chunks);
211211
}
@@ -308,7 +308,7 @@ public static ChunkedInference toChunkedResult(
308308
String matchedText = matchedTextIt.next();
309309
ChunkedInference.TextOffset offset = createOffset(useLegacyFormat, chunk, matchedText);
310310
var tokens = parseWeightedTokens(chunk.rawEmbeddings(), field.contentType());
311-
chunks.add(new SparseEmbeddingResults.Chunk(tokens, matchedText, offset));
311+
chunks.add(new SparseEmbeddingResults.Chunk(tokens, offset));
312312
}
313313
}
314314
return new ChunkedInferenceEmbedding(chunks);
@@ -329,7 +329,7 @@ public static ChunkedInference toChunkedResult(
329329
field.inference().modelSettings().dimensions(),
330330
field.contentType()
331331
);
332-
chunks.add(new TextEmbeddingFloatResults.Chunk(FloatConversionUtils.floatArrayOf(values), matchedText, offset));
332+
chunks.add(new TextEmbeddingFloatResults.Chunk(FloatConversionUtils.floatArrayOf(values), offset));
333333
}
334334
}
335335
return new ChunkedInferenceEmbedding(chunks);

0 commit comments

Comments
 (0)