From a935afdb017ad8618887d9557dfec4230c69d303 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 25 Feb 2025 13:30:47 -0500 Subject: [PATCH 1/5] Update ChunkedInferenceError to return an empty chunk iterator --- .../xpack/core/inference/results/ChunkedInferenceError.java | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceError.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceError.java index 65be9f12d7686..c2f6eebfc01bf 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceError.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceError.java @@ -7,17 +7,16 @@ package org.elasticsearch.xpack.core.inference.results; -import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.inference.ChunkedInference; import org.elasticsearch.xcontent.XContent; +import java.util.Collections; import java.util.Iterator; -import java.util.stream.Stream; public record ChunkedInferenceError(Exception exception) implements ChunkedInference { @Override public Iterator chunksAsMatchedTextAndByteReference(XContent xcontent) { - return Stream.of(exception).map(e -> new Chunk(e.getMessage(), new TextOffset(0, 0), BytesArray.EMPTY)).iterator(); + return Collections.emptyIterator(); } } From 276b6f17b42d696261c3de64855b434f2c798878 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 25 Feb 2025 13:42:31 -0500 Subject: [PATCH 2/5] Removed matched text from chunk --- .../java/org/elasticsearch/inference/ChunkedInference.java | 6 ++---- .../core/inference/results/SparseEmbeddingResults.java | 2 +- .../core/inference/results/TextEmbeddingByteResults.java | 2 +- .../core/inference/results/TextEmbeddingFloatResults.java | 2 +- 4 files changed, 5 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/inference/ChunkedInference.java b/server/src/main/java/org/elasticsearch/inference/ChunkedInference.java index c54e5a98d56cc..e48eafde1396b 100644 --- a/server/src/main/java/org/elasticsearch/inference/ChunkedInference.java +++ b/server/src/main/java/org/elasticsearch/inference/ChunkedInference.java @@ -26,13 +26,11 @@ public interface ChunkedInference { Iterator chunksAsMatchedTextAndByteReference(XContent xcontent) throws IOException; /** - * A chunk of inference results containing matched text, the substring location - * in the original text and the bytes reference. - * @param matchedText + * A chunk of inference results containing the substring location in the original text and the bytes reference. * @param textOffset * @param bytesReference */ - record Chunk(String matchedText, TextOffset textOffset, BytesReference bytesReference) {} + record Chunk(TextOffset textOffset, BytesReference bytesReference) {} record TextOffset(int start, int end) {} } diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/SparseEmbeddingResults.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/SparseEmbeddingResults.java index 894e8c6c97bfd..8806407911d61 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/SparseEmbeddingResults.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/SparseEmbeddingResults.java @@ -185,7 +185,7 @@ public record Chunk(List weightedTokens, String matchedText, Chun EmbeddingResults.Chunk { public ChunkedInference.Chunk toChunk(XContent xcontent) throws IOException { - return new ChunkedInference.Chunk(matchedText, offset, toBytesReference(xcontent, weightedTokens)); + return new ChunkedInference.Chunk(offset, toBytesReference(xcontent, weightedTokens)); } private static BytesReference toBytesReference(XContent xContent, List tokens) throws IOException { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingByteResults.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingByteResults.java index f8268d7bd4683..7bc2f522bac64 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingByteResults.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingByteResults.java @@ -198,7 +198,7 @@ public Chunk toChunk(String text, ChunkedInference.TextOffset offset) { public record Chunk(byte[] embedding, String matchedText, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk { public ChunkedInference.Chunk toChunk(XContent xcontent) throws IOException { - return new ChunkedInference.Chunk(matchedText, offset, toBytesReference(xcontent, embedding)); + return new ChunkedInference.Chunk(offset, toBytesReference(xcontent, embedding)); } private static BytesReference toBytesReference(XContent xContent, byte[] value) throws IOException { diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingFloatResults.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingFloatResults.java index cef381982b447..3dbf422ec9905 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingFloatResults.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingFloatResults.java @@ -229,7 +229,7 @@ public Chunk toChunk(String text, ChunkedInference.TextOffset offset) { public record Chunk(float[] embedding, String matchedText, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk { public ChunkedInference.Chunk toChunk(XContent xcontent) throws IOException { - return new ChunkedInference.Chunk(matchedText, offset, toBytesReference(xcontent, embedding)); + return new ChunkedInference.Chunk(offset, toBytesReference(xcontent, embedding)); } /** From 13e875ed550ce711b2fd7097aa8158fbd75fa628 Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Tue, 25 Feb 2025 15:40:35 -0500 Subject: [PATCH 3/5] Removed matched text from chunk (part 2) --- .../inference/results/ChunkedInferenceEmbedding.java | 1 - .../xpack/core/inference/results/EmbeddingResults.java | 8 +++----- .../core/inference/results/SparseEmbeddingResults.java | 8 +++----- .../inference/results/TextEmbeddingByteResults.java | 6 +++--- .../inference/results/TextEmbeddingFloatResults.java | 6 +++--- .../mock/TestDenseInferenceServiceExtension.java | 1 - .../mock/TestSparseInferenceServiceExtension.java | 8 +------- .../inference/chunking/EmbeddingRequestChunker.java | 5 +---- .../huggingface/elser/HuggingFaceElserService.java | 1 - .../xpack/inference/mapper/SemanticTextFieldTests.java | 10 +++++----- .../services/elastic/ElasticInferenceServiceTests.java | 1 - .../huggingface/HuggingFaceElserServiceTests.java | 1 - 12 files changed, 19 insertions(+), 37 deletions(-) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceEmbedding.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceEmbedding.java index e723a3b4f8f60..7158e9c9a16bf 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceEmbedding.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceEmbedding.java @@ -29,7 +29,6 @@ public static List listOf(List inputs, SparseEmbedding List.of( new SparseEmbeddingResults.Chunk( sparseEmbeddingResults.embeddings().get(i).tokens(), - inputs.get(i), new TextOffset(0, inputs.get(i).length()) ) ) diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/EmbeddingResults.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/EmbeddingResults.java index c6f4c6915024b..8cd5d78a8ca9d 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/EmbeddingResults.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/EmbeddingResults.java @@ -24,13 +24,11 @@ public interface EmbeddingResults { /** - * Combines the resulting embedding with the input into a chunk. + * Combines the resulting embedding with the offset into the input text into a chunk. */ - C toChunk(String text, ChunkedInference.TextOffset offset); + C toChunk(ChunkedInference.TextOffset offset); } /** diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/SparseEmbeddingResults.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/SparseEmbeddingResults.java index 8806407911d61..c4001a6325fcf 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/SparseEmbeddingResults.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/SparseEmbeddingResults.java @@ -175,14 +175,12 @@ public String toString() { } @Override - public Chunk toChunk(String text, ChunkedInference.TextOffset offset) { - return new Chunk(tokens, text, offset); + public Chunk toChunk(ChunkedInference.TextOffset offset) { + return new Chunk(tokens, offset); } } - public record Chunk(List weightedTokens, String matchedText, ChunkedInference.TextOffset offset) - implements - EmbeddingResults.Chunk { + public record Chunk(List weightedTokens, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk { public ChunkedInference.Chunk toChunk(XContent xcontent) throws IOException { return new ChunkedInference.Chunk(offset, toBytesReference(xcontent, weightedTokens)); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingByteResults.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingByteResults.java index 7bc2f522bac64..fd8f22e535ee8 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingByteResults.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingByteResults.java @@ -187,15 +187,15 @@ public int hashCode() { } @Override - public Chunk toChunk(String text, ChunkedInference.TextOffset offset) { - return new Chunk(values, text, offset); + public Chunk toChunk(ChunkedInference.TextOffset offset) { + return new Chunk(values, offset); } } /** * Serialises the {@code value} array, according to the provided {@link XContent}, into a {@link BytesReference}. */ - public record Chunk(byte[] embedding, String matchedText, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk { + public record Chunk(byte[] embedding, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk { public ChunkedInference.Chunk toChunk(XContent xcontent) throws IOException { return new ChunkedInference.Chunk(offset, toBytesReference(xcontent, embedding)); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingFloatResults.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingFloatResults.java index 3dbf422ec9905..8dfdf57f9d1b0 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingFloatResults.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/TextEmbeddingFloatResults.java @@ -221,12 +221,12 @@ public int hashCode() { } @Override - public Chunk toChunk(String text, ChunkedInference.TextOffset offset) { - return new Chunk(values, text, offset); + public Chunk toChunk(ChunkedInference.TextOffset offset) { + return new Chunk(values, offset); } } - public record Chunk(float[] embedding, String matchedText, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk { + public record Chunk(float[] embedding, ChunkedInference.TextOffset offset) implements EmbeddingResults.Chunk { public ChunkedInference.Chunk toChunk(XContent xcontent) throws IOException { return new ChunkedInference.Chunk(offset, toBytesReference(xcontent, embedding)); diff --git a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java index da7acf122bb72..b9aee73932e14 100644 --- a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java +++ b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestDenseInferenceServiceExtension.java @@ -183,7 +183,6 @@ private List makeChunkedResults(List input, int dimens List.of( new TextEmbeddingFloatResults.Chunk( nonChunkedResults.embeddings().get(i).values(), - input.get(i), new ChunkedInference.TextOffset(0, input.get(i).length()) ) ) diff --git a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java index fcc175a051964..4e10ce45efeac 100644 --- a/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java +++ b/x-pack/plugin/inference/qa/test-service-plugin/src/main/java/org/elasticsearch/xpack/inference/mock/TestSparseInferenceServiceExtension.java @@ -172,13 +172,7 @@ private List makeChunkedResults(List input) { } results.add( new ChunkedInferenceEmbedding( - List.of( - new SparseEmbeddingResults.Chunk( - tokens, - input.get(i), - new ChunkedInference.TextOffset(0, input.get(i).length()) - ) - ) + List.of(new SparseEmbeddingResults.Chunk(tokens, new ChunkedInference.TextOffset(0, input.get(i).length()))) ) ); } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/EmbeddingRequestChunker.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/EmbeddingRequestChunker.java index d8751a542392d..0d018f30a8a63 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/EmbeddingRequestChunker.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/chunking/EmbeddingRequestChunker.java @@ -197,10 +197,7 @@ private ChunkedInference mergeResultsWithInputs(int index) { AtomicReferenceArray> result = results.get(index); for (int i = 0; i < request.size(); i++) { EmbeddingResults.Chunk chunk = result.get(i) - .toChunk( - request.get(i).chunkText(), - new ChunkedInference.TextOffset(request.get(i).chunk.start(), request.get(i).chunk.end()) - ); + .toChunk(new ChunkedInference.TextOffset(request.get(i).chunk.start(), request.get(i).chunk.end())); chunks.add(chunk); } return new ChunkedInferenceEmbedding(chunks); diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java index 434b94e6f8ac4..8009fae673a34 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/services/huggingface/elser/HuggingFaceElserService.java @@ -121,7 +121,6 @@ private static List translateToChunkedResults(DocumentsOnlyInp List.of( new TextEmbeddingFloatResults.Chunk( textEmbeddingResults.embeddings().get(i).values(), - inputs.getInputs().get(i), new ChunkedInference.TextOffset(0, inputs.getInputs().get(i).length()) ) ) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java index 9eb10cfd9f1a8..27baeb2fca585 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java @@ -177,7 +177,7 @@ public static ChunkedInferenceEmbedding randomChunkedInferenceEmbeddingByte(Mode for (int j = 0; j < values.length; j++) { values[j] = randomByte(); } - chunks.add(new TextEmbeddingByteResults.Chunk(values, input, new ChunkedInference.TextOffset(0, input.length()))); + chunks.add(new TextEmbeddingByteResults.Chunk(values, new ChunkedInference.TextOffset(0, input.length()))); } return new ChunkedInferenceEmbedding(chunks); } @@ -189,7 +189,7 @@ public static ChunkedInferenceEmbedding randomChunkedInferenceEmbeddingFloat(Mod for (int j = 0; j < values.length; j++) { values[j] = randomFloat(); } - chunks.add(new TextEmbeddingFloatResults.Chunk(values, input, new ChunkedInference.TextOffset(0, input.length()))); + chunks.add(new TextEmbeddingFloatResults.Chunk(values, new ChunkedInference.TextOffset(0, input.length()))); } return new ChunkedInferenceEmbedding(chunks); } @@ -205,7 +205,7 @@ public static ChunkedInferenceEmbedding randomChunkedInferenceEmbeddingSparse(Li for (var token : input.split("\\s+")) { tokens.add(new WeightedToken(token, withFloats ? randomFloat() : randomIntBetween(1, 255))); } - chunks.add(new SparseEmbeddingResults.Chunk(tokens, input, new ChunkedInference.TextOffset(0, input.length()))); + chunks.add(new SparseEmbeddingResults.Chunk(tokens, new ChunkedInference.TextOffset(0, input.length()))); } return new ChunkedInferenceEmbedding(chunks); } @@ -308,7 +308,7 @@ public static ChunkedInference toChunkedResult( String matchedText = matchedTextIt.next(); ChunkedInference.TextOffset offset = createOffset(useLegacyFormat, chunk, matchedText); var tokens = parseWeightedTokens(chunk.rawEmbeddings(), field.contentType()); - chunks.add(new SparseEmbeddingResults.Chunk(tokens, matchedText, offset)); + chunks.add(new SparseEmbeddingResults.Chunk(tokens, offset)); } } return new ChunkedInferenceEmbedding(chunks); @@ -329,7 +329,7 @@ public static ChunkedInference toChunkedResult( field.inference().modelSettings().dimensions(), field.contentType() ); - chunks.add(new TextEmbeddingFloatResults.Chunk(FloatConversionUtils.floatArrayOf(values), matchedText, offset)); + chunks.add(new TextEmbeddingFloatResults.Chunk(FloatConversionUtils.floatArrayOf(values), offset)); } } return new ChunkedInferenceEmbedding(chunks); diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java index b3f8579903885..4e1c701553af5 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elastic/ElasticInferenceServiceTests.java @@ -562,7 +562,6 @@ public void testChunkedInfer_PassesThrough() throws IOException { List.of( new SparseEmbeddingResults.Chunk( List.of(new WeightedToken("hello", 2.1259406f), new WeightedToken("greet", 1.7073475f)), - "input text", new ChunkedInference.TextOffset(0, "input text".length()) ) ) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceElserServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceElserServiceTests.java index 1050ac137be8d..d732f4f85f60d 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceElserServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceElserServiceTests.java @@ -111,7 +111,6 @@ public void testChunkedInfer_CallsInfer_Elser_ConvertsFloatResponse() throws IOE List.of( new SparseEmbeddingResults.Chunk( List.of(new WeightedToken(".", 0.13315596f)), - "abc", new ChunkedInference.TextOffset(0, "abc".length()) ) ) From 008e0c0cbcbf0d422cc59a52a5174d06c8bab6ca Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Wed, 26 Feb 2025 13:30:43 -0500 Subject: [PATCH 4/5] Fixed tests --- .../EmbeddingRequestChunkerTests.java | 74 +++++++++---------- .../AmazonBedrockServiceTests.java | 6 +- .../AzureAiStudioServiceTests.java | 8 +- .../azureopenai/AzureOpenAiServiceTests.java | 8 +- .../services/cohere/CohereServiceTests.java | 16 ++-- .../ElasticsearchInternalServiceTests.java | 18 ++--- .../GoogleAiStudioServiceTests.java | 6 +- .../huggingface/HuggingFaceServiceTests.java | 3 +- .../ibmwatsonx/IbmWatsonxServiceTests.java | 8 +- .../services/jinaai/JinaAIServiceTests.java | 8 +- .../services/openai/OpenAiServiceTests.java | 8 +- .../voyageai/VoyageAIServiceTests.java | 8 +- 12 files changed, 85 insertions(+), 86 deletions(-) diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/EmbeddingRequestChunkerTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/EmbeddingRequestChunkerTests.java index 7cdae8d51ce0b..aa33cd0000b42 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/EmbeddingRequestChunkerTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/chunking/EmbeddingRequestChunkerTests.java @@ -246,7 +246,7 @@ public void testMergingListener_Float() { for (int i = 0; i < numberOfWordsInPassage; i++) { passageBuilder.append("passage_input").append(i).append(" "); // chunk on whitespace } - List inputs = List.of("1st small", passageBuilder.toString(), "2nd small", "3rd small"); + List inputs = List.of("a", passageBuilder.toString(), "bb", "ccc"); var finalListener = testListener(); var batches = new EmbeddingRequestChunker(inputs, batchSize, chunkSize, overlap).batchRequestsWithListeners(finalListener); @@ -275,7 +275,7 @@ public void testMergingListener_Float() { assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedFloatResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedFloatResult.chunks(), hasSize(1)); - assertEquals("1st small", chunkedFloatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), chunkedFloatResult.chunks().get(0).offset()); } { // this is the large input split in multiple chunks @@ -283,26 +283,26 @@ public void testMergingListener_Float() { assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedFloatResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedFloatResult.chunks(), hasSize(6)); - assertThat(chunkedFloatResult.chunks().get(0).matchedText(), startsWith("passage_input0 ")); - assertThat(chunkedFloatResult.chunks().get(1).matchedText(), startsWith(" passage_input20 ")); - assertThat(chunkedFloatResult.chunks().get(2).matchedText(), startsWith(" passage_input40 ")); - assertThat(chunkedFloatResult.chunks().get(3).matchedText(), startsWith(" passage_input60 ")); - assertThat(chunkedFloatResult.chunks().get(4).matchedText(), startsWith(" passage_input80 ")); - assertThat(chunkedFloatResult.chunks().get(5).matchedText(), startsWith(" passage_input100 ")); + assertThat(chunkedFloatResult.chunks().get(0).offset(), equalTo(new ChunkedInference.TextOffset(0, 309))); + assertThat(chunkedFloatResult.chunks().get(1).offset(), equalTo(new ChunkedInference.TextOffset(309, 629))); + assertThat(chunkedFloatResult.chunks().get(2).offset(), equalTo(new ChunkedInference.TextOffset(629, 949))); + assertThat(chunkedFloatResult.chunks().get(3).offset(), equalTo(new ChunkedInference.TextOffset(949, 1269))); + assertThat(chunkedFloatResult.chunks().get(4).offset(), equalTo(new ChunkedInference.TextOffset(1269, 1589))); + assertThat(chunkedFloatResult.chunks().get(5).offset(), equalTo(new ChunkedInference.TextOffset(1589, 1675))); } { var chunkedResult = finalListener.results.get(2); assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedFloatResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedFloatResult.chunks(), hasSize(1)); - assertEquals("2nd small", chunkedFloatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), chunkedFloatResult.chunks().get(0).offset()); } { var chunkedResult = finalListener.results.get(3); assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedFloatResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedFloatResult.chunks(), hasSize(1)); - assertEquals("3rd small", chunkedFloatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 3), chunkedFloatResult.chunks().get(0).offset()); } } @@ -318,7 +318,7 @@ public void testMergingListener_Byte() { for (int i = 0; i < numberOfWordsInPassage; i++) { passageBuilder.append("passage_input").append(i).append(" "); // chunk on whitespace } - List inputs = List.of("1st small", passageBuilder.toString(), "2nd small", "3rd small"); + List inputs = List.of("a", passageBuilder.toString(), "bb", "ccc"); var finalListener = testListener(); var batches = new EmbeddingRequestChunker(inputs, batchSize, chunkSize, overlap).batchRequestsWithListeners(finalListener); @@ -347,7 +347,7 @@ public void testMergingListener_Byte() { assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedByteResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedByteResult.chunks(), hasSize(1)); - assertEquals("1st small", chunkedByteResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), chunkedByteResult.chunks().get(0).offset()); } { // this is the large input split in multiple chunks @@ -355,26 +355,26 @@ public void testMergingListener_Byte() { assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedByteResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedByteResult.chunks(), hasSize(6)); - assertThat(chunkedByteResult.chunks().get(0).matchedText(), startsWith("passage_input0 ")); - assertThat(chunkedByteResult.chunks().get(1).matchedText(), startsWith(" passage_input20 ")); - assertThat(chunkedByteResult.chunks().get(2).matchedText(), startsWith(" passage_input40 ")); - assertThat(chunkedByteResult.chunks().get(3).matchedText(), startsWith(" passage_input60 ")); - assertThat(chunkedByteResult.chunks().get(4).matchedText(), startsWith(" passage_input80 ")); - assertThat(chunkedByteResult.chunks().get(5).matchedText(), startsWith(" passage_input100 ")); + assertThat(chunkedByteResult.chunks().get(0).offset(), equalTo(new ChunkedInference.TextOffset(0, 309))); + assertThat(chunkedByteResult.chunks().get(1).offset(), equalTo(new ChunkedInference.TextOffset(309, 629))); + assertThat(chunkedByteResult.chunks().get(2).offset(), equalTo(new ChunkedInference.TextOffset(629, 949))); + assertThat(chunkedByteResult.chunks().get(3).offset(), equalTo(new ChunkedInference.TextOffset(949, 1269))); + assertThat(chunkedByteResult.chunks().get(4).offset(), equalTo(new ChunkedInference.TextOffset(1269, 1589))); + assertThat(chunkedByteResult.chunks().get(5).offset(), equalTo(new ChunkedInference.TextOffset(1589, 1675))); } { var chunkedResult = finalListener.results.get(2); assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedByteResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedByteResult.chunks(), hasSize(1)); - assertEquals("2nd small", chunkedByteResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), chunkedByteResult.chunks().get(0).offset()); } { var chunkedResult = finalListener.results.get(3); assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedByteResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedByteResult.chunks(), hasSize(1)); - assertEquals("3rd small", chunkedByteResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 3), chunkedByteResult.chunks().get(0).offset()); } } @@ -390,7 +390,7 @@ public void testMergingListener_Bit() { for (int i = 0; i < numberOfWordsInPassage; i++) { passageBuilder.append("passage_input").append(i).append(" "); // chunk on whitespace } - List inputs = List.of("1st small", passageBuilder.toString(), "2nd small", "3rd small"); + List inputs = List.of("a", passageBuilder.toString(), "bb", "ccc"); var finalListener = testListener(); var batches = new EmbeddingRequestChunker(inputs, batchSize, chunkSize, overlap).batchRequestsWithListeners(finalListener); @@ -419,7 +419,7 @@ public void testMergingListener_Bit() { assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedByteResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedByteResult.chunks(), hasSize(1)); - assertEquals("1st small", chunkedByteResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), chunkedByteResult.chunks().get(0).offset()); } { // this is the large input split in multiple chunks @@ -427,26 +427,26 @@ public void testMergingListener_Bit() { assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedByteResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedByteResult.chunks(), hasSize(6)); - assertThat(chunkedByteResult.chunks().get(0).matchedText(), startsWith("passage_input0 ")); - assertThat(chunkedByteResult.chunks().get(1).matchedText(), startsWith(" passage_input20 ")); - assertThat(chunkedByteResult.chunks().get(2).matchedText(), startsWith(" passage_input40 ")); - assertThat(chunkedByteResult.chunks().get(3).matchedText(), startsWith(" passage_input60 ")); - assertThat(chunkedByteResult.chunks().get(4).matchedText(), startsWith(" passage_input80 ")); - assertThat(chunkedByteResult.chunks().get(5).matchedText(), startsWith(" passage_input100 ")); + assertThat(chunkedByteResult.chunks().get(0).offset(), equalTo(new ChunkedInference.TextOffset(0, 309))); + assertThat(chunkedByteResult.chunks().get(1).offset(), equalTo(new ChunkedInference.TextOffset(309, 629))); + assertThat(chunkedByteResult.chunks().get(2).offset(), equalTo(new ChunkedInference.TextOffset(629, 949))); + assertThat(chunkedByteResult.chunks().get(3).offset(), equalTo(new ChunkedInference.TextOffset(949, 1269))); + assertThat(chunkedByteResult.chunks().get(4).offset(), equalTo(new ChunkedInference.TextOffset(1269, 1589))); + assertThat(chunkedByteResult.chunks().get(5).offset(), equalTo(new ChunkedInference.TextOffset(1589, 1675))); } { var chunkedResult = finalListener.results.get(2); assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedByteResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedByteResult.chunks(), hasSize(1)); - assertEquals("2nd small", chunkedByteResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), chunkedByteResult.chunks().get(0).offset()); } { var chunkedResult = finalListener.results.get(3); assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedByteResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedByteResult.chunks(), hasSize(1)); - assertEquals("3rd small", chunkedByteResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 3), chunkedByteResult.chunks().get(0).offset()); } } @@ -462,7 +462,7 @@ public void testMergingListener_Sparse() { for (int i = 0; i < numberOfWordsInPassage; i++) { passageBuilder.append("passage_input").append(i).append(" "); // chunk on whitespace } - List inputs = List.of("1st small", "2nd small", "3rd small", passageBuilder.toString()); + List inputs = List.of("a", "bb", "ccc", passageBuilder.toString()); var finalListener = testListener(); var batches = new EmbeddingRequestChunker(inputs, batchSize, chunkSize, overlap).batchRequestsWithListeners(finalListener); @@ -498,21 +498,21 @@ public void testMergingListener_Sparse() { assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedSparseResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedSparseResult.chunks(), hasSize(1)); - assertEquals("1st small", chunkedSparseResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), chunkedSparseResult.chunks().get(0).offset()); } { var chunkedResult = finalListener.results.get(1); assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedSparseResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedSparseResult.chunks(), hasSize(1)); - assertEquals("2nd small", chunkedSparseResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), chunkedSparseResult.chunks().get(0).offset()); } { var chunkedResult = finalListener.results.get(2); assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedSparseResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedSparseResult.chunks(), hasSize(1)); - assertEquals("3rd small", chunkedSparseResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 3), chunkedSparseResult.chunks().get(0).offset()); } { // this is the large input split in multiple chunks @@ -520,9 +520,9 @@ public void testMergingListener_Sparse() { assertThat(chunkedResult, instanceOf(ChunkedInferenceEmbedding.class)); var chunkedSparseResult = (ChunkedInferenceEmbedding) chunkedResult; assertThat(chunkedSparseResult.chunks(), hasSize(9)); // passage is split into 9 chunks, 10 words each - assertThat(chunkedSparseResult.chunks().get(0).matchedText(), startsWith("passage_input0 ")); - assertThat(chunkedSparseResult.chunks().get(1).matchedText(), startsWith(" passage_input10 ")); - assertThat(chunkedSparseResult.chunks().get(8).matchedText(), startsWith(" passage_input80 ")); + assertThat(chunkedSparseResult.chunks().get(0).offset(), equalTo(new ChunkedInference.TextOffset(0, 149))); + assertThat(chunkedSparseResult.chunks().get(1).offset(), equalTo(new ChunkedInference.TextOffset(149, 309))); + assertThat(chunkedSparseResult.chunks().get(8).offset(), equalTo(new ChunkedInference.TextOffset(1269, 1350))); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java index 970dab45731bd..43c6422ee041f 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/amazonbedrock/AmazonBedrockServiceTests.java @@ -1444,7 +1444,7 @@ private void testChunkedInfer(AmazonBedrockEmbeddingsModel model) throws IOExcep service.chunkedInfer( model, null, - List.of("abc", "xyz"), + List.of("a", "bb"), new HashMap<>(), InputType.INGEST, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -1457,7 +1457,7 @@ private void testChunkedInfer(AmazonBedrockEmbeddingsModel model) throws IOExcep assertThat(results.get(0), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(0); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("abc", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( new float[] { 0.123F, 0.678F }, @@ -1469,7 +1469,7 @@ private void testChunkedInfer(AmazonBedrockEmbeddingsModel model) throws IOExcep assertThat(results.get(1), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(1); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("xyz", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( new float[] { 0.223F, 0.278F }, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java index 045789a92bf38..6cb527220703a 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureaistudio/AzureAiStudioServiceTests.java @@ -1192,7 +1192,7 @@ private void testChunkedInfer(AzureAiStudioEmbeddingsModel model) throws IOExcep service.chunkedInfer( model, null, - List.of("foo", "bar"), + List.of("a", "bb"), new HashMap<>(), InputType.INGEST, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -1205,7 +1205,7 @@ private void testChunkedInfer(AzureAiStudioEmbeddingsModel model) throws IOExcep assertThat(results.get(0), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(0); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("foo", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( new float[] { 0.0123f, -0.0123f }, @@ -1217,7 +1217,7 @@ private void testChunkedInfer(AzureAiStudioEmbeddingsModel model) throws IOExcep assertThat(results.get(1), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(1); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("bar", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( new float[] { 1.0123f, -1.0123f }, @@ -1233,7 +1233,7 @@ private void testChunkedInfer(AzureAiStudioEmbeddingsModel model) throws IOExcep var requestMap = entityAsMap(webServer.requests().get(0).getBody()); assertThat(requestMap.size(), Matchers.is(2)); - assertThat(requestMap.get("input"), Matchers.is(List.of("foo", "bar"))); + assertThat(requestMap.get("input"), Matchers.is(List.of("a", "bb"))); assertThat(requestMap.get("user"), Matchers.is("user")); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiServiceTests.java index e58a7049ef872..6e51942fe5e22 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/azureopenai/AzureOpenAiServiceTests.java @@ -1341,7 +1341,7 @@ private void testChunkedInfer(AzureOpenAiEmbeddingsModel model) throws IOExcepti service.chunkedInfer( model, null, - List.of("foo", "bar"), + List.of("a", "bb"), new HashMap<>(), InputType.INGEST, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -1354,7 +1354,7 @@ private void testChunkedInfer(AzureOpenAiEmbeddingsModel model) throws IOExcepti assertThat(results.get(0), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(0); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("foo", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( new float[] { 0.123f, -0.123f }, @@ -1366,7 +1366,7 @@ private void testChunkedInfer(AzureOpenAiEmbeddingsModel model) throws IOExcepti assertThat(results.get(1), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(1); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("bar", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( new float[] { 1.123f, -1.123f }, @@ -1382,7 +1382,7 @@ private void testChunkedInfer(AzureOpenAiEmbeddingsModel model) throws IOExcepti var requestMap = entityAsMap(webServer.requests().get(0).getBody()); assertThat(requestMap.size(), Matchers.is(2)); - assertThat(requestMap.get("input"), Matchers.is(List.of("foo", "bar"))); + assertThat(requestMap.get("input"), Matchers.is(List.of("a", "bb"))); assertThat(requestMap.get("user"), Matchers.is("user")); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/cohere/CohereServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/cohere/CohereServiceTests.java index bd1dbc201f52e..e3eba8703fcb0 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/cohere/CohereServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/cohere/CohereServiceTests.java @@ -1452,7 +1452,7 @@ private void testChunkedInfer(CohereEmbeddingsModel model) throws IOException { service.chunkedInfer( model, null, - List.of("foo", "bar"), + List.of("a", "bb"), new HashMap<>(), InputType.UNSPECIFIED, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -1465,7 +1465,7 @@ private void testChunkedInfer(CohereEmbeddingsModel model) throws IOException { assertThat(results.get(0), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(0); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("foo", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), floatResult.chunks().get(0).offset()); assertArrayEquals( new float[] { 0.123f, -0.123f }, ((TextEmbeddingFloatResults.Chunk) floatResult.chunks().get(0)).embedding(), @@ -1476,7 +1476,7 @@ private void testChunkedInfer(CohereEmbeddingsModel model) throws IOException { assertThat(results.get(1), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(1); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("bar", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), floatResult.chunks().get(0).offset()); assertArrayEquals( new float[] { 0.223f, -0.223f }, ((TextEmbeddingFloatResults.Chunk) floatResult.chunks().get(0)).embedding(), @@ -1495,7 +1495,7 @@ private void testChunkedInfer(CohereEmbeddingsModel model) throws IOException { var requestMap = entityAsMap(webServer.requests().get(0).getBody()); MatcherAssert.assertThat( requestMap, - is(Map.of("texts", List.of("foo", "bar"), "model", "model", "embedding_types", List.of("float"))) + is(Map.of("texts", List.of("a", "bb"), "model", "model", "embedding_types", List.of("float"))) ); } } @@ -1551,7 +1551,7 @@ public void testChunkedInfer_BatchesCalls_Bytes() throws IOException { service.chunkedInfer( model, null, - List.of("foo", "bar"), + List.of("a", "bb"), new HashMap<>(), InputType.UNSPECIFIED, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -1564,7 +1564,7 @@ public void testChunkedInfer_BatchesCalls_Bytes() throws IOException { assertThat(results.get(0), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var byteResult = (ChunkedInferenceEmbedding) results.get(0); assertThat(byteResult.chunks(), hasSize(1)); - assertEquals("foo", byteResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), byteResult.chunks().get(0).offset()); assertThat(byteResult.chunks().get(0), instanceOf(TextEmbeddingByteResults.Chunk.class)); assertArrayEquals(new byte[] { 23, -23 }, ((TextEmbeddingByteResults.Chunk) byteResult.chunks().get(0)).embedding()); } @@ -1572,7 +1572,7 @@ public void testChunkedInfer_BatchesCalls_Bytes() throws IOException { assertThat(results.get(1), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var byteResult = (ChunkedInferenceEmbedding) results.get(1); assertThat(byteResult.chunks(), hasSize(1)); - assertEquals("bar", byteResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), byteResult.chunks().get(0).offset()); assertThat(byteResult.chunks().get(0), instanceOf(TextEmbeddingByteResults.Chunk.class)); assertArrayEquals(new byte[] { 24, -24 }, ((TextEmbeddingByteResults.Chunk) byteResult.chunks().get(0)).embedding()); } @@ -1588,7 +1588,7 @@ public void testChunkedInfer_BatchesCalls_Bytes() throws IOException { var requestMap = entityAsMap(webServer.requests().get(0).getBody()); MatcherAssert.assertThat( requestMap, - is(Map.of("texts", List.of("foo", "bar"), "model", "model", "embedding_types", List.of("int8"))) + is(Map.of("texts", List.of("a", "bb"), "model", "model", "embedding_types", List.of("int8"))) ); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java index e7e654b599fe6..2bf47b06c771c 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/elasticsearch/ElasticsearchInternalServiceTests.java @@ -902,7 +902,7 @@ private void testChunkInfer_e5(ChunkingSettings chunkingSettings) throws Interru ((TextEmbeddingFloatResults.Chunk) result1.chunks().get(0)).embedding(), 0.0001f ); - assertEquals("foo", result1.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), result1.chunks().get(0).offset()); assertThat(chunkedResponse.get(1), instanceOf(ChunkedInferenceEmbedding.class)); var result2 = (ChunkedInferenceEmbedding) chunkedResponse.get(1); assertThat(result2.chunks(), hasSize(1)); @@ -912,7 +912,7 @@ private void testChunkInfer_e5(ChunkingSettings chunkingSettings) throws Interru ((TextEmbeddingFloatResults.Chunk) result2.chunks().get(0)).embedding(), 0.0001f ); - assertEquals("bar", result2.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), result2.chunks().get(0).offset()); gotResults.set(true); }, ESTestCase::fail); @@ -923,7 +923,7 @@ private void testChunkInfer_e5(ChunkingSettings chunkingSettings) throws Interru service.chunkedInfer( model, null, - List.of("foo", "bar"), + List.of("a", "bb"), Map.of(), InputType.SEARCH, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -977,7 +977,7 @@ private void testChunkInfer_Sparse(ChunkingSettings chunkingSettings) throws Int ((TextExpansionResults) mlTrainedModelResults.get(0)).getWeightedTokens(), ((SparseEmbeddingResults.Chunk) result1.chunks().get(0)).weightedTokens() ); - assertEquals("foo", result1.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), result1.chunks().get(0).offset()); assertThat(chunkedResponse.get(1), instanceOf(ChunkedInferenceEmbedding.class)); var result2 = (ChunkedInferenceEmbedding) chunkedResponse.get(1); assertThat(result2.chunks().get(0), instanceOf(SparseEmbeddingResults.Chunk.class)); @@ -985,7 +985,7 @@ private void testChunkInfer_Sparse(ChunkingSettings chunkingSettings) throws Int ((TextExpansionResults) mlTrainedModelResults.get(1)).getWeightedTokens(), ((SparseEmbeddingResults.Chunk) result2.chunks().get(0)).weightedTokens() ); - assertEquals("bar", result2.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), result2.chunks().get(0).offset()); gotResults.set(true); }, ESTestCase::fail); @@ -995,7 +995,7 @@ private void testChunkInfer_Sparse(ChunkingSettings chunkingSettings) throws Int service.chunkedInfer( model, null, - List.of("foo", "bar"), + List.of("a", "bb"), Map.of(), InputType.SEARCH, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -1049,7 +1049,7 @@ private void testChunkInfer_Elser(ChunkingSettings chunkingSettings) throws Inte ((TextExpansionResults) mlTrainedModelResults.get(0)).getWeightedTokens(), ((SparseEmbeddingResults.Chunk) result1.chunks().get(0)).weightedTokens() ); - assertEquals("foo", result1.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), result1.chunks().get(0).offset()); assertThat(chunkedResponse.get(1), instanceOf(ChunkedInferenceEmbedding.class)); var result2 = (ChunkedInferenceEmbedding) chunkedResponse.get(1); assertThat(result2.chunks().get(0), instanceOf(SparseEmbeddingResults.Chunk.class)); @@ -1057,7 +1057,7 @@ private void testChunkInfer_Elser(ChunkingSettings chunkingSettings) throws Inte ((TextExpansionResults) mlTrainedModelResults.get(1)).getWeightedTokens(), ((SparseEmbeddingResults.Chunk) result2.chunks().get(0)).weightedTokens() ); - assertEquals("bar", result2.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), result2.chunks().get(0).offset()); gotResults.set(true); }, ESTestCase::fail); @@ -1067,7 +1067,7 @@ private void testChunkInfer_Elser(ChunkingSettings chunkingSettings) throws Inte service.chunkedInfer( model, null, - List.of("foo", "bar"), + List.of("a", "bb"), Map.of(), InputType.SEARCH, InferenceAction.Request.DEFAULT_TIMEOUT, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioServiceTests.java index 9828a4f21ab51..9343d1c25f48f 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/googleaistudio/GoogleAiStudioServiceTests.java @@ -844,7 +844,7 @@ public void testChunkedInfer_ChunkingSettingsSet() throws IOException { private void testChunkedInfer(String modelId, String apiKey, GoogleAiStudioEmbeddingsModel model) throws IOException { - var input = List.of("foo", "bar"); + var input = List.of("a", "bb"); var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); @@ -881,7 +881,7 @@ private void testChunkedInfer(String modelId, String apiKey, GoogleAiStudioEmbed assertThat(results.get(0), instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(0); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals(input.get(0), floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, input.get(0).length()), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), Matchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertTrue( Arrays.equals( @@ -896,7 +896,7 @@ private void testChunkedInfer(String modelId, String apiKey, GoogleAiStudioEmbed assertThat(results.get(1), instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(1); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals(input.get(1), floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, input.get(1).length()), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), Matchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertTrue( Arrays.equals( diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceTests.java index b9e7cda1461cc..32a597aecb410 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/huggingface/HuggingFaceServiceTests.java @@ -787,7 +787,6 @@ public void testChunkedInfer_CallsInfer_TextEmbedding_ConvertsFloatResponse() th assertThat(result, CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var embeddingResult = (ChunkedInferenceEmbedding) result; assertThat(embeddingResult.chunks(), hasSize(1)); - assertThat(embeddingResult.chunks().get(0).matchedText(), is("abc")); assertThat(embeddingResult.chunks().get(0).offset(), is(new ChunkedInference.TextOffset(0, "abc".length()))); assertThat(embeddingResult.chunks().get(0), Matchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( @@ -842,7 +841,7 @@ public void testChunkedInfer() throws IOException { assertThat(results.get(0), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(0); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("abc", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 3), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), Matchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( new float[] { 0.123f, -0.123f }, diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java index 74d055d44363d..d74c9a7eafd06 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/ibmwatsonx/IbmWatsonxServiceTests.java @@ -686,7 +686,7 @@ public void testChunkedInfer_ChunkingSettingsSet() throws IOException { } private void testChunkedInfer_Batches(ChunkingSettings chunkingSettings) throws IOException { - var input = List.of("foo", "bar"); + var input = List.of("a", "bb"); var senderFactory = HttpRequestSenderTests.createSenderFactory(threadPool, clientManager); @@ -733,7 +733,7 @@ private void testChunkedInfer_Batches(ChunkingSettings chunkingSettings) throws assertThat(results.get(0), instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(0); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals(input.get(0), floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, input.get(0).length()), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), Matchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertTrue( Arrays.equals( @@ -748,7 +748,7 @@ private void testChunkedInfer_Batches(ChunkingSettings chunkingSettings) throws assertThat(results.get(1), instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(1); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals(input.get(1), floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, input.get(1).length()), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), Matchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertTrue( Arrays.equals( @@ -763,7 +763,7 @@ private void testChunkedInfer_Batches(ChunkingSettings chunkingSettings) throws var requestMap = entityAsMap(webServer.requests().get(0).getBody()); assertThat(requestMap, aMapWithSize(3)); - assertThat(requestMap, is(Map.of("project_id", projectId, "inputs", List.of("foo", "bar"), "model_id", modelId))); + assertThat(requestMap, is(Map.of("project_id", projectId, "inputs", List.of("a", "bb"), "model_id", modelId))); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/jinaai/JinaAIServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/jinaai/JinaAIServiceTests.java index 3387b8b73978f..9c10a6d5dbeaf 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/jinaai/JinaAIServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/jinaai/JinaAIServiceTests.java @@ -1788,7 +1788,7 @@ private void test_Embedding_ChunkedInfer_BatchesCalls(JinaAIEmbeddingsModel mode service.chunkedInfer( model, null, - List.of("foo", "bar"), + List.of("a", "bb"), new HashMap<>(), InputType.UNSPECIFIED, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -1801,7 +1801,7 @@ private void test_Embedding_ChunkedInfer_BatchesCalls(JinaAIEmbeddingsModel mode assertThat(results.get(0), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(0); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("foo", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), Matchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( new float[] { 0.123f, -0.123f }, @@ -1813,7 +1813,7 @@ private void test_Embedding_ChunkedInfer_BatchesCalls(JinaAIEmbeddingsModel mode assertThat(results.get(1), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(1); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("bar", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), Matchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( new float[] { 0.223f, -0.223f }, @@ -1831,7 +1831,7 @@ private void test_Embedding_ChunkedInfer_BatchesCalls(JinaAIEmbeddingsModel mode MatcherAssert.assertThat(webServer.requests().get(0).getHeader(HttpHeaders.AUTHORIZATION), equalTo("Bearer secret")); var requestMap = entityAsMap(webServer.requests().get(0).getBody()); - MatcherAssert.assertThat(requestMap, is(Map.of("input", List.of("foo", "bar"), "model", "jina-clip-v2"))); + MatcherAssert.assertThat(requestMap, is(Map.of("input", List.of("a", "bb"), "model", "jina-clip-v2"))); } } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java index 13782a538f1f9..6a93e7a172011 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/openai/OpenAiServiceTests.java @@ -1790,7 +1790,7 @@ private void testChunkedInfer(OpenAiEmbeddingsModel model) throws IOException { service.chunkedInfer( model, null, - List.of("foo", "bar"), + List.of("a", "bb"), new HashMap<>(), InputType.INGEST, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -1803,7 +1803,7 @@ private void testChunkedInfer(OpenAiEmbeddingsModel model) throws IOException { assertThat(results.get(0), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(0); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("foo", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), Matchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertTrue( Arrays.equals( @@ -1816,7 +1816,7 @@ private void testChunkedInfer(OpenAiEmbeddingsModel model) throws IOException { assertThat(results.get(1), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(1); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("bar", floatResult.chunks().get(0).matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), floatResult.chunks().get(0).offset()); assertThat(floatResult.chunks().get(0), Matchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertTrue( Arrays.equals( @@ -1834,7 +1834,7 @@ private void testChunkedInfer(OpenAiEmbeddingsModel model) throws IOException { var requestMap = entityAsMap(webServer.requests().get(0).getBody()); assertThat(requestMap.size(), Matchers.is(3)); - assertThat(requestMap.get("input"), Matchers.is(List.of("foo", "bar"))); + assertThat(requestMap.get("input"), Matchers.is(List.of("a", "bb"))); assertThat(requestMap.get("model"), Matchers.is("model")); assertThat(requestMap.get("user"), Matchers.is("user")); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/voyageai/VoyageAIServiceTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/voyageai/VoyageAIServiceTests.java index 6a0428e962f52..3a5fce350046e 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/voyageai/VoyageAIServiceTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/services/voyageai/VoyageAIServiceTests.java @@ -1826,7 +1826,7 @@ private void test_Embedding_ChunkedInfer_BatchesCalls(VoyageAIEmbeddingsModel mo service.chunkedInfer( model, null, - List.of("foo", "bar"), + List.of("a", "bb"), new HashMap<>(), InputType.UNSPECIFIED, InferenceAction.Request.DEFAULT_TIMEOUT, @@ -1839,7 +1839,7 @@ private void test_Embedding_ChunkedInfer_BatchesCalls(VoyageAIEmbeddingsModel mo assertThat(results.getFirst(), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.getFirst(); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("foo", floatResult.chunks().getFirst().matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 1), floatResult.chunks().getFirst().offset()); assertThat(floatResult.chunks().getFirst(), CoreMatchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( new float[] { 0.123f, -0.123f }, @@ -1851,7 +1851,7 @@ private void test_Embedding_ChunkedInfer_BatchesCalls(VoyageAIEmbeddingsModel mo assertThat(results.get(1), CoreMatchers.instanceOf(ChunkedInferenceEmbedding.class)); var floatResult = (ChunkedInferenceEmbedding) results.get(1); assertThat(floatResult.chunks(), hasSize(1)); - assertEquals("bar", floatResult.chunks().getFirst().matchedText()); + assertEquals(new ChunkedInference.TextOffset(0, 2), floatResult.chunks().getFirst().offset()); assertThat(floatResult.chunks().getFirst(), CoreMatchers.instanceOf(TextEmbeddingFloatResults.Chunk.class)); assertArrayEquals( new float[] { 0.223f, -0.223f }, @@ -1871,7 +1871,7 @@ private void test_Embedding_ChunkedInfer_BatchesCalls(VoyageAIEmbeddingsModel mo var requestMap = entityAsMap(webServer.requests().getFirst().getBody()); MatcherAssert.assertThat( requestMap, - is(Map.of("input", List.of("foo", "bar"), "model", "voyage-3-large", "output_dtype", "float", "output_dimension", 1024)) + is(Map.of("input", List.of("a", "bb"), "model", "voyage-3-large", "output_dtype", "float", "output_dimension", 1024)) ); } } From f9c7ad727d1e4f5701ffbbe1fb904e739a2964cc Mon Sep 17 00:00:00 2001 From: Mike Pellegrini Date: Thu, 27 Feb 2025 13:00:31 -0500 Subject: [PATCH 5/5] Change method name --- .../java/org/elasticsearch/inference/ChunkedInference.java | 5 +++-- .../core/inference/results/ChunkedInferenceEmbedding.java | 2 +- .../xpack/core/inference/results/ChunkedInferenceError.java | 2 +- .../action/filter/ShardBulkInferenceActionFilter.java | 2 +- .../xpack/inference/mapper/SemanticTextField.java | 2 +- .../xpack/inference/mapper/SemanticTextFieldTests.java | 2 +- 6 files changed, 8 insertions(+), 7 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/inference/ChunkedInference.java b/server/src/main/java/org/elasticsearch/inference/ChunkedInference.java index e48eafde1396b..90116b4f0e3b8 100644 --- a/server/src/main/java/org/elasticsearch/inference/ChunkedInference.java +++ b/server/src/main/java/org/elasticsearch/inference/ChunkedInference.java @@ -21,9 +21,10 @@ public interface ChunkedInference { * Implementations of this function serialize their embeddings to {@link BytesReference} for storage in semantic text fields. * * @param xcontent provided by the SemanticTextField - * @return an iterator of the serialized {@link Chunk} which includes the matched text (input) and bytes reference (output/embedding). + * @return an iterator of the serialized {@link Chunk} which includes the offset into the input text and bytes reference + * (output/embedding). */ - Iterator chunksAsMatchedTextAndByteReference(XContent xcontent) throws IOException; + Iterator chunksAsByteReference(XContent xcontent) throws IOException; /** * A chunk of inference results containing the substring location in the original text and the bytes reference. diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceEmbedding.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceEmbedding.java index 7158e9c9a16bf..3159419ad718c 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceEmbedding.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceEmbedding.java @@ -40,7 +40,7 @@ public static List listOf(List inputs, SparseEmbedding } @Override - public Iterator chunksAsMatchedTextAndByteReference(XContent xcontent) throws IOException { + public Iterator chunksAsByteReference(XContent xcontent) throws IOException { var asChunk = new ArrayList(); for (var chunk : chunks()) { asChunk.add(chunk.toChunk(xcontent)); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceError.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceError.java index c2f6eebfc01bf..9b0abd11badc6 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceError.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/inference/results/ChunkedInferenceError.java @@ -16,7 +16,7 @@ public record ChunkedInferenceError(Exception exception) implements ChunkedInference { @Override - public Iterator chunksAsMatchedTextAndByteReference(XContent xcontent) { + public Iterator chunksAsByteReference(XContent xcontent) { return Collections.emptyIterator(); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java index 19942595df45b..a2dd2fc691cb9 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/action/filter/ShardBulkInferenceActionFilter.java @@ -604,7 +604,7 @@ static IndexRequest getIndexRequestOrNull(DocWriteRequest docWriteRequest) { private static class EmptyChunkedInference implements ChunkedInference { @Override - public Iterator chunksAsMatchedTextAndByteReference(XContent xcontent) { + public Iterator chunksAsByteReference(XContent xcontent) { return Collections.emptyIterator(); } } diff --git a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java index 489951a206149..ba86a45159b0d 100644 --- a/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java +++ b/x-pack/plugin/inference/src/main/java/org/elasticsearch/xpack/inference/mapper/SemanticTextField.java @@ -275,7 +275,7 @@ public static List toSemanticTextFieldChunks( boolean useLegacyFormat ) throws IOException { List chunks = new ArrayList<>(); - Iterator it = results.chunksAsMatchedTextAndByteReference(contentType.xContent()); + Iterator it = results.chunksAsByteReference(contentType.xContent()); while (it.hasNext()) { chunks.add(toSemanticTextFieldChunk(input, offsetAdjustment, it.next(), useLegacyFormat)); } diff --git a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java index 27baeb2fca585..2f700b402e957 100644 --- a/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java +++ b/x-pack/plugin/inference/src/test/java/org/elasticsearch/xpack/inference/mapper/SemanticTextFieldTests.java @@ -243,7 +243,7 @@ public static SemanticTextField semanticTextFieldFromChunkedInferenceResults( final List chunks = new ArrayList<>(inputs.size()); int offsetAdjustment = 0; Iterator inputsIt = inputs.iterator(); - Iterator chunkIt = results.chunksAsMatchedTextAndByteReference(contentType.xContent()); + Iterator chunkIt = results.chunksAsByteReference(contentType.xContent()); while (inputsIt.hasNext() && chunkIt.hasNext()) { String input = inputsIt.next(); var chunk = chunkIt.next();