Skip to content

Commit 8edbe11

Browse files
committed
Merge branch 'main' into lucene_snapshot
2 parents 64e66e8 + 6d3abe5 commit 8edbe11

File tree

42 files changed

+1348
-957
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+1348
-957
lines changed

docs/changelog/113900.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 113900
2+
summary: Fix BWC for file-settings based role mappings
3+
area: Authentication
4+
type: bug
5+
issues: []

docs/reference/ingest/processors/inference.asciidoc

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
169169
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
170170
=======
171171
172+
`deberta_v2`::::
173+
(Optional, object)
174+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
175+
+
176+
.Properties of deberta_v2
177+
[%collapsible%open]
178+
=======
179+
`truncate`::::
180+
(Optional, string)
181+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
182+
=======
183+
172184
`roberta`::::
173185
(Optional, object)
174186
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@@ -224,6 +236,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
224236
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
225237
=======
226238
239+
`deberta_v2`::::
240+
(Optional, object)
241+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
242+
+
243+
.Properties of deberta_v2
244+
[%collapsible%open]
245+
=======
246+
`truncate`::::
247+
(Optional, string)
248+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
249+
=======
250+
227251
`roberta`::::
228252
(Optional, object)
229253
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@@ -304,6 +328,23 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
304328
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
305329
=======
306330
331+
`deberta_v2`::::
332+
(Optional, object)
333+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
334+
+
335+
.Properties of deberta_v2
336+
[%collapsible%open]
337+
=======
338+
`span`::::
339+
(Optional, integer)
340+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
341+
342+
`truncate`::::
343+
(Optional, string)
344+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
345+
=======
346+
347+
307348
`roberta`::::
308349
(Optional, object)
309350
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@@ -363,6 +404,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
363404
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
364405
=======
365406
407+
`deberta_v2`::::
408+
(Optional, object)
409+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
410+
+
411+
.Properties of deberta_v2
412+
[%collapsible%open]
413+
=======
414+
`truncate`::::
415+
(Optional, string)
416+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
417+
=======
418+
366419
`roberta`::::
367420
(Optional, object)
368421
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@@ -424,6 +477,22 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
424477
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
425478
=======
426479
480+
`deberta_v2`::::
481+
(Optional, object)
482+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
483+
+
484+
.Properties of deberta_v2
485+
[%collapsible%open]
486+
=======
487+
`span`::::
488+
(Optional, integer)
489+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
490+
491+
`truncate`::::
492+
(Optional, string)
493+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
494+
=======
495+
427496
`roberta`::::
428497
(Optional, object)
429498
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]
@@ -515,6 +584,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
515584
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
516585
=======
517586
587+
`deberta_v2`::::
588+
(Optional, object)
589+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
590+
+
591+
.Properties of deberta_v2
592+
[%collapsible%open]
593+
=======
594+
`truncate`::::
595+
(Optional, string)
596+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
597+
=======
598+
518599
`roberta`::::
519600
(Optional, object)
520601
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]

docs/reference/ml/ml-shared.asciidoc

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -988,6 +988,7 @@ values are
988988
+
989989
--
990990
* `bert`: Use for BERT-style models
991+
* `deberta_v2`: Use for DeBERTa v2 and v3-style models
991992
* `mpnet`: Use for MPNet-style models
992993
* `roberta`: Use for RoBERTa-style and BART-style models
993994
* experimental:[] `xlm_roberta`: Use for XLMRoBERTa-style models
@@ -1037,6 +1038,19 @@ sequence. Therefore, do not use `second` in this case.
10371038

10381039
end::inference-config-nlp-tokenization-truncate[]
10391040

1041+
tag::inference-config-nlp-tokenization-truncate-deberta-v2[]
1042+
Indicates how tokens are truncated when they exceed `max_sequence_length`.
1043+
The default value is `first`.
1044+
+
1045+
--
1046+
* `balanced`: One or both of the first and second sequences may be truncated so as to balance the tokens included from both sequences.
1047+
* `none`: No truncation occurs; the inference request receives an error.
1048+
* `first`: Only the first sequence is truncated.
1049+
* `second`: Only the second sequence is truncated. If there is just one sequence, that sequence is truncated.
1050+
--
1051+
1052+
end::inference-config-nlp-tokenization-truncate-deberta-v2[]
1053+
10401054
tag::inference-config-nlp-tokenization-bert-with-special-tokens[]
10411055
Tokenize with special tokens. The tokens typically included in BERT-style tokenization are:
10421056
+
@@ -1050,10 +1064,23 @@ tag::inference-config-nlp-tokenization-bert-ja-with-special-tokens[]
10501064
Tokenize with special tokens if `true`.
10511065
end::inference-config-nlp-tokenization-bert-ja-with-special-tokens[]
10521066

1067+
tag::inference-config-nlp-tokenization-deberta-v2[]
1068+
DeBERTa-style tokenization is to be performed with the enclosed settings.
1069+
end::inference-config-nlp-tokenization-deberta-v2[]
1070+
10531071
tag::inference-config-nlp-tokenization-max-sequence-length[]
10541072
Specifies the maximum number of tokens allowed to be output by the tokenizer.
10551073
end::inference-config-nlp-tokenization-max-sequence-length[]
10561074

1075+
tag::inference-config-nlp-tokenization-deberta-v2-with-special-tokens[]
1076+
Tokenize with special tokens. The tokens typically included in DeBERTa-style tokenization are:
1077+
+
1078+
--
1079+
* `[CLS]`: The first token of the sequence being classified.
1080+
* `[SEP]`: Indicates sequence separation and sequence end.
1081+
--
1082+
end::inference-config-nlp-tokenization-deberta-v2-with-special-tokens[]
1083+
10571084
tag::inference-config-nlp-tokenization-roberta[]
10581085
RoBERTa-style tokenization is to be performed with the enclosed settings.
10591086
end::inference-config-nlp-tokenization-roberta[]

docs/reference/ml/trained-models/apis/infer-trained-model.asciidoc

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,18 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
137137
(Optional, string)
138138
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate]
139139
=======
140+
`deberta_v2`::::
141+
(Optional, object)
142+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
143+
+
144+
.Properties of deberta_v2
145+
[%collapsible%open]
146+
=======
147+
`truncate`::::
148+
(Optional, string)
149+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
150+
=======
151+
140152
`roberta`::::
141153
(Optional, object)
142154
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]

docs/reference/ml/trained-models/apis/put-trained-models.asciidoc

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -773,6 +773,37 @@ include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenizatio
773773
(Optional, boolean)
774774
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-bert-with-special-tokens]
775775
====
776+
`deberta_v2`::
777+
(Optional, object)
778+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2]
779+
+
780+
.Properties of deberta_v2
781+
[%collapsible%open]
782+
====
783+
`do_lower_case`:::
784+
(Optional, boolean)
785+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-do-lower-case]
786+
+
787+
--
788+
Defaults to `false`.
789+
--
790+
791+
`max_sequence_length`:::
792+
(Optional, integer)
793+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-max-sequence-length]
794+
795+
`span`:::
796+
(Optional, integer)
797+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-span]
798+
799+
`truncate`:::
800+
(Optional, string)
801+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-truncate-deberta-v2]
802+
803+
`with_special_tokens`:::
804+
(Optional, boolean)
805+
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-deberta-v2-with-special-tokens]
806+
====
776807
`roberta`::
777808
(Optional, object)
778809
include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-nlp-tokenization-roberta]

docs/reference/setup/install.asciidoc

Lines changed: 23 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -76,27 +76,29 @@ Docker container images may be downloaded from the Elastic Docker Registry.
7676
[[jvm-version]]
7777
=== Java (JVM) Version
7878

79-
{es} is built using Java, and includes a bundled version of
80-
https://openjdk.java.net[OpenJDK] from the JDK maintainers (GPLv2+CE) within
81-
each distribution. The bundled JVM is the recommended JVM.
82-
83-
To use your own version of Java, set the `ES_JAVA_HOME` environment variable.
84-
If you must use a version of Java that is different from the bundled JVM, it is
85-
best to use the latest release of a link:/support/matrix[supported]
86-
https://www.oracle.com/technetwork/java/eol-135779.html[LTS version of Java].
87-
{es} is closely coupled to certain OpenJDK-specific features, so it may not
88-
work correctly with other JVMs. {es} will refuse to start if a known-bad
89-
version of Java is used.
90-
91-
If you use a JVM other than the bundled one, you are responsible for reacting
92-
to announcements related to its security issues and bug fixes, and must
93-
yourself determine whether each update is necessary or not. In contrast, the
94-
bundled JVM is treated as an integral part of {es}, which means that Elastic
95-
takes responsibility for keeping it up to date. Security issues and bugs within
96-
the bundled JVM are treated as if they were within {es} itself.
97-
98-
The bundled JVM is located within the `jdk` subdirectory of the {es} home
99-
directory. You may remove this directory if using your own JVM.
79+
{es} is built using Java, and includes a bundled version of https://openjdk.java.net[OpenJDK] within each distribution. We strongly
80+
recommend using the bundled JVM in all installations of {es}.
81+
82+
The bundled JVM is treated the same as any other dependency of {es} in terms of support and maintenance. This means that Elastic takes
83+
responsibility for keeping it up to date, and reacts to security issues and bug reports as needed to address vulnerabilities and other bugs
84+
in {es}. Elastic's support of the bundled JVM is subject to Elastic's https://www.elastic.co/support_policy[support policy] and
85+
https://www.elastic.co/support/eol[end-of-life schedule] and is independent of the support policy and end-of-life schedule offered by the
86+
original supplier of the JVM. Elastic does not support using the bundled JVM for purposes other than running {es}.
87+
88+
TIP: {es} uses only a subset of the features offered by the JVM. Bugs and security issues in the bundled JVM often relate to features that
89+
{es} does not use. Such issues do not apply to {es}. Elastic analyzes reports of security vulnerabilities in all its dependencies, including
90+
in the bundled JVM, and will issue an https://www.elastic.co/community/security[Elastic Security Advisory] if such an advisory is needed.
91+
92+
If you decide to run {es} using a version of Java that is different from the bundled one, prefer to use the latest release of a
93+
https://www.oracle.com/technetwork/java/eol-135779.html[LTS version of Java] which is link:/support/matrix[listed in the support matrix].
94+
Although such a configuration is supported, if you encounter a security issue or other bug in your chosen JVM then Elastic may not be able
95+
to help unless the issue is also present in the bundled JVM. Instead, you must seek assistance directly from the supplier of your chosen
96+
JVM. You must also take responsibility for reacting to security and bug announcements from the supplier of your chosen JVM. {es} may not
97+
perform optimally if using a JVM other than the bundled one. {es} is closely coupled to certain OpenJDK-specific features, so it may not
98+
work correctly with JVMs that are not OpenJDK. {es} will refuse to start if you attempt to use a known-bad JVM version.
99+
100+
To use your own version of Java, set the `ES_JAVA_HOME` environment variable to the path to your own JVM installation. The bundled JVM is
101+
located within the `jdk` subdirectory of the {es} home directory. You may remove this directory if using your own JVM.
100102

101103
[discrete]
102104
[[jvm-agents]]

libs/h3/src/main/java/org/elasticsearch/h3/CellBoundary.java

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -22,36 +22,52 @@
2222
*/
2323
package org.elasticsearch.h3;
2424

25+
import java.util.Arrays;
26+
import java.util.Objects;
27+
2528
/**
2629
* cell boundary points as {@link LatLng}
2730
*/
2831
public final class CellBoundary {
29-
3032
/** Maximum number of cell boundary vertices; worst case is pentagon:
3133
* 5 original verts + 5 edge crossings
3234
*/
33-
private static final int MAX_CELL_BNDRY_VERTS = 10;
35+
static final int MAX_CELL_BNDRY_VERTS = 10;
3436
/** How many points it holds */
35-
private int numVertext;
37+
private final int numPoints;
3638
/** The actual points */
37-
private final LatLng[] points = new LatLng[MAX_CELL_BNDRY_VERTS];
38-
39-
CellBoundary() {}
39+
private final LatLng[] points;
4040

41-
void add(LatLng point) {
42-
points[numVertext++] = point;
41+
CellBoundary(LatLng[] points, int numPoints) {
42+
this.points = points;
43+
this.numPoints = numPoints;
4344
}
4445

4546
/** Number of points in this boundary */
4647
public int numPoints() {
47-
return numVertext;
48+
return numPoints;
4849
}
4950

5051
/** Return the point at the given position*/
5152
public LatLng getLatLon(int i) {
52-
if (i >= numVertext) {
53-
throw new IndexOutOfBoundsException();
54-
}
53+
assert i >= 0 && i < numPoints;
5554
return points[i];
5655
}
56+
57+
@Override
58+
public boolean equals(Object o) {
59+
if (this == o) {
60+
return true;
61+
}
62+
if (o == null || getClass() != o.getClass()) {
63+
return false;
64+
}
65+
final CellBoundary that = (CellBoundary) o;
66+
return numPoints == that.numPoints && Arrays.equals(points, that.points);
67+
}
68+
69+
@Override
70+
public int hashCode() {
71+
return Objects.hash(numPoints, Arrays.hashCode(points));
72+
}
5773
}

libs/h3/src/main/java/org/elasticsearch/h3/Constants.java

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,6 @@ final class Constants {
3434
* 2.0 * PI
3535
*/
3636
public static final double M_2PI = 2.0 * Math.PI;
37-
/**
38-
* max H3 resolution; H3 version 1 has 16 resolutions, numbered 0 through 15
39-
*/
40-
public static int MAX_H3_RES = 15;
4137
/**
4238
* The number of H3 base cells
4339
*/

0 commit comments

Comments
 (0)