Skip to content

Commit 1d5380d

Browse files
Merge branch 'main' into fixTrainedModelStatsWait
2 parents ec345fa + ce3c354 commit 1d5380d

File tree

40 files changed

+791
-576
lines changed

40 files changed

+791
-576
lines changed

build-tools-internal/src/main/groovy/elasticsearch.ide.gradle

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,15 +137,15 @@ if (providers.systemProperty('idea.active').getOrNull() == 'true') {
137137
}
138138
}
139139

140-
// modifies the idea module config to enable preview features on 'elasticsearch-native' module
140+
// modifies the idea module config to enable preview features on ':libs:native' module
141141
tasks.register("enablePreviewFeatures") {
142142
group = 'ide'
143143
description = 'Enables preview features on native library module'
144144
dependsOn tasks.named("enableExternalConfiguration")
145145

146146
doLast {
147147
['main', 'test'].each { sourceSet ->
148-
modifyXml(".idea/modules/libs/native/elasticsearch.libs.elasticsearch-native.${sourceSet}.iml") { xml ->
148+
modifyXml(".idea/modules/libs/native/elasticsearch.libs.${project.project(':libs:native').name}.${sourceSet}.iml") { xml ->
149149
xml.component.find { it.'@name' == 'NewModuleRootManager' }?.'@LANGUAGE_LEVEL' = 'JDK_21_PREVIEW'
150150
}
151151
}

build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ResolveAllDependencies.java

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,11 @@
1212
import org.elasticsearch.gradle.VersionProperties;
1313
import org.gradle.api.DefaultTask;
1414
import org.gradle.api.artifacts.Configuration;
15+
import org.gradle.api.artifacts.FileCollectionDependency;
16+
import org.gradle.api.artifacts.component.ModuleComponentIdentifier;
1517
import org.gradle.api.file.FileCollection;
1618
import org.gradle.api.model.ObjectFactory;
19+
import org.gradle.api.provider.ProviderFactory;
1720
import org.gradle.api.tasks.InputFiles;
1821
import org.gradle.api.tasks.Internal;
1922
import org.gradle.api.tasks.TaskAction;
@@ -26,9 +29,6 @@
2629

2730
import javax.inject.Inject;
2831

29-
import static org.elasticsearch.gradle.DistributionDownloadPlugin.DISTRO_EXTRACTED_CONFIG_PREFIX;
30-
import static org.elasticsearch.gradle.internal.test.rest.compat.compat.LegacyYamlRestCompatTestPlugin.BWC_MINOR_CONFIG_NAME;
31-
3232
public abstract class ResolveAllDependencies extends DefaultTask {
3333

3434
private boolean resolveJavaToolChain = false;
@@ -37,18 +37,28 @@ public abstract class ResolveAllDependencies extends DefaultTask {
3737
protected abstract JavaToolchainService getJavaToolchainService();
3838

3939
private final ObjectFactory objectFactory;
40+
private final ProviderFactory providerFactory;
4041

4142
private Collection<Configuration> configs;
4243

4344
@Inject
44-
public ResolveAllDependencies(ObjectFactory objectFactory) {
45+
public ResolveAllDependencies(ObjectFactory objectFactory, ProviderFactory providerFactory) {
4546
this.objectFactory = objectFactory;
47+
this.providerFactory = providerFactory;
4648
}
4749

4850
@InputFiles
4951
public FileCollection getResolvedArtifacts() {
50-
return objectFactory.fileCollection()
51-
.from(configs.stream().filter(ResolveAllDependencies::canBeResolved).collect(Collectors.toList()));
52+
return objectFactory.fileCollection().from(configs.stream().filter(ResolveAllDependencies::canBeResolved).map(c -> {
53+
// Make a copy of the configuration, omitting file collection dependencies to avoid building project artifacts
54+
Configuration copy = c.copyRecursive(d -> d instanceof FileCollectionDependency == false);
55+
copy.setCanBeConsumed(false);
56+
return copy;
57+
})
58+
// Include only module dependencies, ignoring things like project dependencies so we don't unnecessarily build stuff
59+
.map(c -> c.getIncoming().artifactView(v -> v.lenient(true).componentFilter(i -> i instanceof ModuleComponentIdentifier)))
60+
.map(artifactView -> providerFactory.provider(artifactView::getFiles))
61+
.collect(Collectors.toList()));
5262
}
5363

5464
@TaskAction
@@ -95,8 +105,8 @@ private static boolean canBeResolved(Configuration configuration) {
95105
return false;
96106
}
97107
}
98-
return configuration.getName().startsWith(DISTRO_EXTRACTED_CONFIG_PREFIX) == false
99-
&& configuration.getName().equals(BWC_MINOR_CONFIG_NAME) == false;
108+
109+
return true;
100110
}
101111

102112
}

docs/changelog/115624.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
pr: 115624
2+
summary: "ES|QL: fix LIMIT pushdown past MV_EXPAND"
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 102084
7+
- 102061

docs/changelog/115923.yaml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
pr: 115923
2+
summary: Apply more strict parsing of actions in bulk API
3+
area: Indices APIs
4+
type: breaking
5+
issues: [ ]
6+
breaking:
7+
title: Apply more strict parsing of actions in bulk API
8+
area: REST API
9+
details: >-
10+
Previously, the following classes of malformed input were deprecated but not rejected in the action lines of the a
11+
bulk request: missing closing brace; additional keys after the action (which were ignored); additional data after
12+
the closing brace (which was ignored). They will now be considered errors and rejected.
13+
impact: >-
14+
Users must provide well-formed input when using the bulk API. (They can request REST API compatibility with v8 to
15+
get the previous behaviour back as an interim measure.)
16+
notable: false

docs/reference/query-dsl/script-score-query.asciidoc

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,10 +62,17 @@ multiplied by `boost` to produce final documents' scores. Defaults to `1.0`.
6262
===== Use relevance scores in a script
6363

6464
Within a script, you can
65-
{ref}/modules-scripting-fields.html#scripting-score[access]
65+
{ref}/modules-scripting-fields.html#scripting-score[access]
6666
the `_score` variable which represents the current relevance score of a
6767
document.
6868

69+
[[script-score-access-term-statistics]]
70+
===== Use term statistics in a script
71+
72+
Within a script, you can
73+
{ref}/modules-scripting-fields.html#scripting-term-statistics[access]
74+
the `_termStats` variable which provides statistical information about the terms used in the child query of the `script_score` query.
75+
6976
[[script-score-predefined-functions]]
7077
===== Predefined functions
7178
You can use any of the available {painless}/painless-contexts.html[painless
@@ -147,7 +154,7 @@ updated since update operations also update the value of the `_seq_no` field.
147154

148155
[[decay-functions-numeric-fields]]
149156
====== Decay functions for numeric fields
150-
You can read more about decay functions
157+
You can read more about decay functions
151158
{ref}/query-dsl-function-score-query.html#function-decay[here].
152159

153160
* `double decayNumericLinear(double origin, double scale, double offset, double decay, double docValue)`
@@ -233,7 +240,7 @@ The `script_score` query calculates the score for
233240
every matching document, or hit. There are faster alternative query types that
234241
can efficiently skip non-competitive hits:
235242

236-
* If you want to boost documents on some static fields, use the
243+
* If you want to boost documents on some static fields, use the
237244
<<query-dsl-rank-feature-query, `rank_feature`>> query.
238245
* If you want to boost documents closer to a date or geographic point, use the
239246
<<query-dsl-distance-feature-query, `distance_feature`>> query.

docs/reference/reranking/learning-to-rank-model-training.asciidoc

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,21 @@ Feature extractors are defined using templated queries. https://eland.readthedoc
3838
from eland.ml.ltr import QueryFeatureExtractor
3939
4040
feature_extractors=[
41-
# We want to use the score of the match query for the title field as a feature:
41+
# We want to use the BM25 score of the match query for the title field as a feature:
4242
QueryFeatureExtractor(
4343
feature_name="title_bm25",
4444
query={"match": {"title": "{{query}}"}}
4545
),
46+
# We want to use the the number of matched terms in the title field as a feature:
47+
QueryFeatureExtractor(
48+
feature_name="title_matched_term_count",
49+
query={
50+
"script_score": {
51+
"query": {"match": {"title": "{{query}}"}},
52+
"script": {"source": "return _termStats.matchedTermsCount();"},
53+
}
54+
},
55+
),
4656
# We can use a script_score query to get the value
4757
# of the field rating directly as a feature:
4858
QueryFeatureExtractor(
@@ -54,26 +64,29 @@ feature_extractors=[
5464
}
5565
},
5666
),
57-
# We can execute a script on the value of the query
58-
# and use the return value as a feature:
59-
QueryFeatureExtractor(
60-
feature_name="query_length",
67+
# We extract the number of terms in the query as feature.
68+
QueryFeatureExtractor(
69+
feature_name="query_term_count",
6170
query={
6271
"script_score": {
63-
"query": {"match_all": {}},
64-
"script": {
65-
"source": "return params['query'].splitOnToken(' ').length;",
66-
"params": {
67-
"query": "{{query}}",
68-
}
69-
},
72+
"query": {"match": {"title": "{{query}}"}},
73+
"script": {"source": "return _termStats.uniqueTermsCount();"},
7074
}
7175
},
7276
),
7377
]
7478
----
7579
// NOTCONSOLE
7680

81+
[NOTE]
82+
.Tern statistics as features
83+
===================================================
84+
85+
It is very common for an LTR model to leverage raw term statistics as features.
86+
To extract this information, you can use the {ref}/modules-scripting-fields.html#scripting-term-statistics[term statistics feature] provided as part of the <<query-dsl-script-score-query,`script_score`>> query.
87+
88+
===================================================
89+
7790
Once the feature extractors have been defined, they are wrapped in an `eland.ml.ltr.LTRModelConfig` object for use in later training steps:
7891

7992
[source,python]

docs/reference/reranking/learning-to-rank-search-usage.asciidoc

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,3 @@ When exposing pagination to users, `window_size` should remain constant as each
6161
====== Negative scores
6262

6363
Depending on how your model is trained, it’s possible that the model will return negative scores for documents. While negative scores are not allowed from first-stage retrieval and ranking, it is possible to use them in the LTR rescorer.
64-
65-
[discrete]
66-
[[learning-to-rank-rescorer-limitations-term-statistics]]
67-
====== Term statistics as features
68-
69-
We do not currently support term statistics as features, however future releases will introduce this capability.
70-

docs/reference/scripting/fields.asciidoc

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,79 @@ GET my-index-000001/_search
8080
}
8181
-------------------------------------
8282

83+
[discrete]
84+
[[scripting-term-statistics]]
85+
=== Accessing term statistics of a document within a script
86+
87+
Scripts used in a <<query-dsl-script-score-query,`script_score`>> query have access to the `_termStats` variable which provides statistical information about the terms in the child query.
88+
89+
In the following example, `_termStats` is used within a <<query-dsl-script-score-query,`script_score`>> query to retrieve the average term frequency for the terms `quick`, `brown`, and `fox` in the `text` field:
90+
91+
[source,console]
92+
-------------------------------------
93+
PUT my-index-000001/_doc/1?refresh
94+
{
95+
"text": "quick brown fox"
96+
}
97+
98+
PUT my-index-000001/_doc/2?refresh
99+
{
100+
"text": "quick fox"
101+
}
102+
103+
GET my-index-000001/_search
104+
{
105+
"query": {
106+
"script_score": {
107+
"query": { <1>
108+
"match": {
109+
"text": "quick brown fox"
110+
}
111+
},
112+
"script": {
113+
"source": "_termStats.termFreq().getAverage()" <2>
114+
}
115+
}
116+
}
117+
}
118+
-------------------------------------
119+
120+
<1> Child query used to infer the field and the terms considered in term statistics.
121+
122+
<2> The script calculates the average document frequency for the terms in the query using `_termStats`.
123+
124+
`_termStats` provides access to the following functions for working with term statistics:
125+
126+
- `uniqueTermsCount`: Returns the total number of unique terms in the query. This value is the same across all documents.
127+
- `matchedTermsCount`: Returns the count of query terms that matched within the current document.
128+
- `docFreq`: Provides document frequency statistics for the terms in the query, indicating how many documents contain each term. This value is consistent across all documents.
129+
- `totalTermFreq`: Provides the total frequency of terms across all documents, representing how often each term appears in the entire corpus. This value is consistent across all documents.
130+
- `termFreq`: Returns the frequency of query terms within the current document, showing how often each term appears in that document.
131+
132+
[NOTE]
133+
.Functions returning aggregated statistics
134+
===================================================
135+
136+
The `docFreq`, `termFreq` and `totalTermFreq` functions return objects that represent statistics across all terms of the child query.
137+
138+
Statistics provides support for the following methods:
139+
140+
`getAverage()`: Returns the average value of the metric.
141+
`getMin()`: Returns the minimum value of the metric.
142+
`getMax()`: Returns the maximum value of the metric.
143+
`getSum()`: Returns the sum of the metric values.
144+
`getCount()`: Returns the count of terms included in the metric calculation.
145+
146+
===================================================
147+
148+
149+
[NOTE]
150+
.Painless language required
151+
===================================================
152+
153+
The `_termStats` variable is only available when using the <<modules-scripting-painless, Painless>> scripting language.
154+
155+
===================================================
83156

84157
[discrete]
85158
[[modules-scripting-doc-vals]]

gradle/verification-metadata.xml

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -579,11 +579,6 @@
579579
<sha256 value="c8fb4839054d280b3033f800d1f5a97de2f028eb8ba2eb458ad287e536f3f25f" origin="Generated by Gradle"/>
580580
</artifact>
581581
</component>
582-
<component group="com.google.crypto.tink" name="tink" version="1.14.0">
583-
<artifact name="tink-1.14.0.jar">
584-
<sha256 value="47b2248705e0c9771bc259f22465a79655c1296e2d47aaee852adb7cdacb6198" origin="Generated by Gradle"/>
585-
</artifact>
586-
</component>
587582
<component group="com.google.errorprone" name="error_prone_annotations" version="2.11.0">
588583
<artifact name="error_prone_annotations-2.11.0.jar">
589584
<sha256 value="721cb91842b46fa056847d104d5225c8b8e1e8b62263b993051e1e5a0137b7ec" origin="Generated by Gradle"/>
@@ -759,11 +754,6 @@
759754
<sha256 value="8540247fad9e06baefa8fb45eb313802d019f485f14300e0f9d6b556ed88e753" origin="Generated by Gradle"/>
760755
</artifact>
761756
</component>
762-
<component group="com.google.protobuf" name="protobuf-java" version="4.27.0">
763-
<artifact name="protobuf-java-4.27.0.jar">
764-
<sha256 value="9072e60fe66cff5d6c0f11a1df21d8f3e4b29b5ee782b45c3fc75f59fbe2b839" origin="Generated by Gradle"/>
765-
</artifact>
766-
</component>
767757
<component group="com.google.protobuf" name="protobuf-java-util" version="3.25.5">
768758
<artifact name="protobuf-java-util-3.25.5.jar">
769759
<sha256 value="dacc58b2c3d2fa8d4bddc1acb881e78d6cf7c137dd78bc1d67f6aca732436a8d" origin="Generated by Gradle"/>

modules/repository-azure/build.gradle

Lines changed: 11 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -63,8 +63,12 @@ dependencies {
6363
api "com.github.stephenc.jcip:jcip-annotations:1.0-1"
6464
api "com.nimbusds:content-type:2.3"
6565
api "com.nimbusds:lang-tag:1.7"
66-
api "com.nimbusds:nimbus-jose-jwt:9.37.3"
67-
api "com.nimbusds:oauth2-oidc-sdk:11.9.1"
66+
api("com.nimbusds:nimbus-jose-jwt:9.37.3"){
67+
exclude group: 'com.google.crypto.tink', module: 'tink' // it's an optional dependency on which we don't rely
68+
}
69+
api("com.nimbusds:oauth2-oidc-sdk:11.9.1"){
70+
exclude group: 'com.google.crypto.tink', module: 'tink' // it's an optional dependency on which we don't rely
71+
}
6872
api "jakarta.activation:jakarta.activation-api:1.2.1"
6973
api "jakarta.xml.bind:jakarta.xml.bind-api:2.3.3"
7074
api "net.java.dev.jna:jna-platform:${versions.jna}" // Maven says 5.14.0 but this aligns with the Elasticsearch-wide version
@@ -74,8 +78,6 @@ dependencies {
7478
api "org.codehaus.woodstox:stax2-api:4.2.2"
7579
api "org.ow2.asm:asm:9.3"
7680

77-
runtimeOnly "com.google.crypto.tink:tink:1.14.0"
78-
runtimeOnly "com.google.protobuf:protobuf-java:4.27.0"
7981
runtimeOnly "com.google.code.gson:gson:2.11.0"
8082
runtimeOnly "org.cryptomator:siv-mode:1.5.2"
8183

@@ -175,13 +177,11 @@ tasks.named("thirdPartyAudit").configure {
175177
// 'org.slf4j.ext.EventData' - bring back when https://github.com/elastic/elasticsearch/issues/93714 is done
176178

177179
// Optional dependency of tink
178-
'com.google.api.client.http.HttpHeaders',
179-
'com.google.api.client.http.HttpRequest',
180-
'com.google.api.client.http.HttpRequestFactory',
181-
'com.google.api.client.http.HttpResponse',
182-
'com.google.api.client.http.HttpTransport',
183-
'com.google.api.client.http.javanet.NetHttpTransport',
184-
'com.google.api.client.http.javanet.NetHttpTransport$Builder',
180+
'com.google.crypto.tink.subtle.Ed25519Sign',
181+
'com.google.crypto.tink.subtle.Ed25519Sign$KeyPair',
182+
'com.google.crypto.tink.subtle.Ed25519Verify',
183+
'com.google.crypto.tink.subtle.X25519',
184+
'com.google.crypto.tink.subtle.XChaCha20Poly1305',
185185

186186
// Optional dependency of nimbus-jose-jwt and oauth2-oidc-sdk
187187
'org.bouncycastle.asn1.pkcs.PrivateKeyInfo',
@@ -253,14 +253,6 @@ tasks.named("thirdPartyAudit").configure {
253253
'javax.activation.MailcapCommandMap',
254254
'javax.activation.MimetypesFileTypeMap',
255255
'reactor.core.publisher.Traces$SharedSecretsCallSiteSupplierFactory$TracingException',
256-
257-
'com.google.protobuf.MessageSchema',
258-
'com.google.protobuf.UnsafeUtil',
259-
'com.google.protobuf.UnsafeUtil$1',
260-
'com.google.protobuf.UnsafeUtil$Android32MemoryAccessor',
261-
'com.google.protobuf.UnsafeUtil$Android64MemoryAccessor',
262-
'com.google.protobuf.UnsafeUtil$JvmMemoryAccessor',
263-
'com.google.protobuf.UnsafeUtil$MemoryAccessor',
264256
)
265257
}
266258

0 commit comments

Comments
 (0)