Skip to content

Commit d5bba61

Browse files
authored
Merge branch 'main' into multi-project/search-it
2 parents 1826796 + c0c8c33 commit d5bba61

File tree

128 files changed

+4815
-515
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

128 files changed

+4815
-515
lines changed
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
package org.elasticsearch.gradle.internal.dependencies.patches.hdfs;
10+
package org.elasticsearch.gradle.internal.dependencies.patches;
1111

1212
import org.objectweb.asm.MethodVisitor;
1313
import org.objectweb.asm.Opcodes;
@@ -16,7 +16,7 @@ public class MethodReplacement extends MethodVisitor {
1616
private final MethodVisitor delegate;
1717
private final Runnable bodyWriter;
1818

19-
MethodReplacement(MethodVisitor delegate, Runnable bodyWriter) {
19+
public MethodReplacement(MethodVisitor delegate, Runnable bodyWriter) {
2020
super(Opcodes.ASM9);
2121
this.delegate = delegate;
2222
this.bodyWriter = bodyWriter;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.gradle.internal.dependencies.patches.azurecore;
11+
12+
import org.elasticsearch.gradle.internal.dependencies.patches.PatcherInfo;
13+
import org.elasticsearch.gradle.internal.dependencies.patches.Utils;
14+
import org.gradle.api.artifacts.transform.CacheableTransform;
15+
import org.gradle.api.artifacts.transform.InputArtifact;
16+
import org.gradle.api.artifacts.transform.TransformAction;
17+
import org.gradle.api.artifacts.transform.TransformOutputs;
18+
import org.gradle.api.artifacts.transform.TransformParameters;
19+
import org.gradle.api.file.FileSystemLocation;
20+
import org.gradle.api.provider.Provider;
21+
import org.gradle.api.tasks.Classpath;
22+
import org.jetbrains.annotations.NotNull;
23+
24+
import java.io.File;
25+
import java.util.List;
26+
import java.util.regex.Pattern;
27+
28+
import static org.elasticsearch.gradle.internal.dependencies.patches.PatcherInfo.classPatcher;
29+
30+
@CacheableTransform
31+
public abstract class AzureCoreClassPatcher implements TransformAction<TransformParameters.None> {
32+
33+
private static final String JAR_FILE_TO_PATCH = "azure-core-[\\d.]*\\.jar";
34+
35+
private static final List<PatcherInfo> CLASS_PATCHERS = List.of(
36+
classPatcher(
37+
"com/azure/core/implementation/ImplUtils.class",
38+
"7beda5bdff5ea460cfc08721a188cf07d16e0c987dae45401fca7abf4e6e6c0e",
39+
ImplUtilsPatcher::new
40+
)
41+
);
42+
43+
@Classpath
44+
@InputArtifact
45+
public abstract Provider<FileSystemLocation> getInputArtifact();
46+
47+
@Override
48+
public void transform(@NotNull TransformOutputs outputs) {
49+
File inputFile = getInputArtifact().get().getAsFile();
50+
51+
if (Pattern.matches(JAR_FILE_TO_PATCH, inputFile.getName())) {
52+
System.out.println("Patching " + inputFile.getName());
53+
File outputFile = outputs.file(inputFile.getName().replace(".jar", "-patched.jar"));
54+
Utils.patchJar(inputFile, outputFile, CLASS_PATCHERS, true);
55+
} else {
56+
System.out.println("Skipping " + inputFile.getName());
57+
outputs.file(getInputArtifact());
58+
}
59+
}
60+
61+
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
/*
2+
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
3+
* or more contributor license agreements. Licensed under the "Elastic License
4+
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
5+
* Public License v 1"; you may not use this file except in compliance with, at
6+
* your election, the "Elastic License 2.0", the "GNU Affero General Public
7+
* License v3.0 only", or the "Server Side Public License, v 1".
8+
*/
9+
10+
package org.elasticsearch.gradle.internal.dependencies.patches.azurecore;
11+
12+
import org.elasticsearch.gradle.internal.dependencies.patches.MethodReplacement;
13+
import org.objectweb.asm.ClassVisitor;
14+
import org.objectweb.asm.MethodVisitor;
15+
import org.objectweb.asm.Opcodes;
16+
17+
class ImplUtilsPatcher extends ClassVisitor {
18+
ImplUtilsPatcher(ClassVisitor classVisitor) {
19+
super(Opcodes.ASM9, classVisitor);
20+
}
21+
22+
public MethodVisitor visitMethod(int access, String name, String descriptor, String signature, String[] exceptions) {
23+
MethodVisitor mv = super.visitMethod(access, name, descriptor, signature, exceptions);
24+
// `addShutdownHook` invokes `java.lang.Runtime.addShutdownHook`, which is forbidden (i.e. it will throw an Entitlements error).
25+
// We replace the method body here with `return null`.
26+
if (name.equals("addShutdownHookSafely")) {
27+
return new MethodReplacement(mv, () -> {
28+
mv.visitInsn(Opcodes.ACONST_NULL);
29+
mv.visitInsn(Opcodes.ARETURN);
30+
});
31+
}
32+
return mv;
33+
}
34+
}

build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/dependencies/patches/hdfs/ShellPatcher.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.gradle.internal.dependencies.patches.hdfs;
1111

12+
import org.elasticsearch.gradle.internal.dependencies.patches.MethodReplacement;
1213
import org.objectweb.asm.ClassVisitor;
1314
import org.objectweb.asm.ClassWriter;
1415
import org.objectweb.asm.MethodVisitor;

build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/dependencies/patches/hdfs/ShutdownHookManagerPatcher.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
package org.elasticsearch.gradle.internal.dependencies.patches.hdfs;
1111

12+
import org.elasticsearch.gradle.internal.dependencies.patches.MethodReplacement;
1213
import org.objectweb.asm.ClassVisitor;
1314
import org.objectweb.asm.ClassWriter;
1415
import org.objectweb.asm.MethodVisitor;

distribution/docker/build.gradle

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ dependencies {
117117
log4jConfig project(path: ":distribution", configuration: 'log4jConfig')
118118
tini "krallin:tini:0.19.0:${tiniArch}"
119119
allPlugins project(path: ':plugins', configuration: 'allPlugins')
120+
allPlugins project(path: ':x-pack:extras:plugins', configuration: 'allPlugins')
120121
filebeat_aarch64 "beats:filebeat:${VersionProperties.elasticsearch}:[email protected]"
121122
filebeat_x86_64 "beats:filebeat:${VersionProperties.elasticsearch}:[email protected]"
122123
filebeat_fips_aarch64 "beats:filebeat-fips:${VersionProperties.elasticsearch}:[email protected]"

docs/changelog/128293.yaml

Lines changed: 0 additions & 5 deletions
This file was deleted.

docs/changelog/128396.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128396
2+
summary: Delegated authorization using Microsoft Graph (SDK)
3+
area: Authorization
4+
type: feature
5+
issues: []

docs/changelog/129150.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 129150
2+
summary: Add `none` chunking strategy to disable automatic chunking for inference
3+
endpoints
4+
area: Machine Learning
5+
type: feature
6+
issues: []

docs/reference/elasticsearch/mapping-reference/semantic-text.md

Lines changed: 50 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -117,15 +117,16 @@ If specified, these will override the chunking settings set in the {{infer-cap}}
117117
endpoint associated with `inference_id`.
118118
If chunking settings are updated, they will not be applied to existing documents
119119
until they are reindexed.
120+
To completely disable chunking, use the `none` chunking strategy.
120121

121122
**Valid values for `chunking_settings`**:
122123

123124
`type`
124-
: Indicates the type of chunking strategy to use. Valid values are `word` or
125+
: Indicates the type of chunking strategy to use. Valid values are `none`, `word` or
125126
`sentence`. Required.
126127

127128
`max_chunk_size`
128-
: The maximum number of works in a chunk. Required.
129+
: The maximum number of words in a chunk. Required for `word` and `sentence` strategies.
129130

130131
`overlap`
131132
: The number of overlapping words allowed in chunks. This cannot be defined as
@@ -136,6 +137,12 @@ until they are reindexed.
136137
: The number of overlapping sentences allowed in chunks. Valid values are `0`
137138
or `1`. Required for `sentence` type chunking settings
138139

140+
::::{warning}
141+
If the input exceeds the maximum token limit of the underlying model, some services (such as OpenAI) may return an
142+
error. In contrast, the `elastic` and `elasticsearch` services will automatically truncate the input to fit within the
143+
model's limit.
144+
::::
145+
139146
## {{infer-cap}} endpoint validation [infer-endpoint-validation]
140147

141148
The `inference_id` will not be validated when the mapping is created, but when
@@ -166,10 +173,49 @@ For more details on chunking and how to configure chunking settings,
166173
see [Configuring chunking](https://www.elastic.co/docs/api/doc/elasticsearch/group/endpoint-inference)
167174
in the Inference API documentation.
168175

176+
You can pre-chunk the input by sending it to Elasticsearch as an array of strings.
177+
Example:
178+
179+
```console
180+
PUT test-index
181+
{
182+
"mappings": {
183+
"properties": {
184+
"my_semantic_field": {
185+
"type": "semantic_text",
186+
"chunking_settings": {
187+
"strategy": "none" <1>
188+
}
189+
}
190+
}
191+
}
192+
}
193+
```
194+
195+
1. Disable chunking on `my_semantic_field`.
196+
197+
```console
198+
PUT test-index/_doc/1
199+
{
200+
"my_semantic_field": ["my first chunk", "my second chunk", ...] <1>
201+
...
202+
}
203+
```
204+
205+
1. The text is pre-chunked and provided as an array of strings.
206+
Each element in the array represents a single chunk that will be sent directly to the inference service without further chunking.
207+
208+
**Important considerations**:
209+
210+
* When providing pre-chunked input, ensure that you set the chunking strategy to `none` to avoid additional processing.
211+
* Each chunk should be sized carefully, staying within the token limit of the inference service and the underlying model.
212+
* If a chunk exceeds the model's token limit, the behavior depends on the service:
213+
* Some services (such as OpenAI) will return an error.
214+
* Others (such as `elastic` and `elasticsearch`) will automatically truncate the input.
215+
169216
Refer
170217
to [this tutorial](docs-content://solutions/search/semantic-search/semantic-search-semantic-text.md)
171-
to learn more about semantic search using `semantic_text` and the `semantic`
172-
query.
218+
to learn more about semantic search using `semantic_text`.
173219

174220
## Extracting Relevant Fragments from Semantic Text [semantic-text-highlighting]
175221

0 commit comments

Comments
 (0)