Skip to content

Commit 6bd1bd2

Browse files
authored
Merge branch 'main' into feature-update-fleet-mappings-version
2 parents 0c11eb6 + 84f233a commit 6bd1bd2

File tree

33 files changed

+894
-324
lines changed

33 files changed

+894
-324
lines changed

.ci/init.gradle

Lines changed: 2 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -1,95 +1,3 @@
1-
import com.bettercloud.vault.VaultConfig
2-
import com.bettercloud.vault.Vault
3-
4-
initscript {
5-
repositories {
6-
mavenCentral()
7-
}
8-
dependencies {
9-
classpath 'com.bettercloud:vault-java-driver:4.1.0'
10-
}
11-
}
12-
13-
boolean USE_ARTIFACTORY = false
14-
15-
if (System.getenv('VAULT_ADDR') == null) {
16-
// When trying to reproduce errors outside of CI, it can be useful to allow this to just return rather than blowing up
17-
if (System.getenv('CI') == null) {
18-
return
19-
}
20-
21-
throw new GradleException("You must set the VAULT_ADDR environment variable to use this init script.")
22-
}
23-
24-
if (System.getenv('VAULT_ROLE_ID') == null && System.getenv('VAULT_SECRET_ID') == null && System.getenv('VAULT_TOKEN') == null) {
25-
// When trying to reproduce errors outside of CI, it can be useful to allow this to just return rather than blowing up
26-
if (System.getenv('CI') == null) {
27-
return
28-
}
29-
30-
throw new GradleException("You must set either the VAULT_ROLE_ID and VAULT_SECRET_ID environment variables, " +
31-
"or the VAULT_TOKEN environment variable to use this init script.")
32-
}
33-
34-
final String vaultPathPrefix = System.getenv('VAULT_ADDR') ==~ /.+vault-ci.+\.dev.*/ ? "secret/ci/elastic-elasticsearch/migrated" : "secret/elasticsearch-ci"
35-
36-
final String vaultToken = System.getenv('VAULT_TOKEN') ?: new Vault(
37-
new VaultConfig()
38-
.address(System.env.VAULT_ADDR)
39-
.engineVersion(1)
40-
.build()
41-
)
42-
.withRetries(5, 1000)
43-
.auth()
44-
.loginByAppRole("approle", System.env.VAULT_ROLE_ID, System.env.VAULT_SECRET_ID)
45-
.getAuthClientToken()
46-
47-
final Vault vault = new Vault(
48-
new VaultConfig()
49-
.address(System.env.VAULT_ADDR)
50-
.engineVersion(1)
51-
.token(vaultToken)
52-
.build()
53-
)
54-
.withRetries(5, 1000)
55-
56-
57-
if (USE_ARTIFACTORY) {
58-
final Map<String, String> artifactoryCredentials = vault.logical()
59-
.read("${vaultPathPrefix}/artifactory.elstc.co")
60-
.getData()
61-
logger.info("Using elastic artifactory repos")
62-
Closure configCache = {
63-
return {
64-
name "artifactory-gradle-release"
65-
url "https://artifactory.elstc.co/artifactory/gradle-release"
66-
credentials {
67-
username artifactoryCredentials.get("username")
68-
password artifactoryCredentials.get("token")
69-
}
70-
}
71-
}
72-
settingsEvaluated { settings ->
73-
settings.pluginManagement {
74-
repositories {
75-
maven configCache()
76-
}
77-
}
78-
}
79-
projectsLoaded {
80-
allprojects {
81-
buildscript {
82-
repositories {
83-
maven configCache()
84-
}
85-
}
86-
repositories {
87-
maven configCache()
88-
}
89-
}
90-
}
91-
}
92-
931
gradle.settingsEvaluated { settings ->
942
settings.pluginManager.withPlugin("com.gradle.develocity") {
953
settings.develocity {
@@ -98,14 +6,10 @@ gradle.settingsEvaluated { settings ->
986
}
997
}
1008

101-
1029
final String buildCacheUrl = System.getProperty('org.elasticsearch.build.cache.url')
10310
final boolean buildCachePush = Boolean.valueOf(System.getProperty('org.elasticsearch.build.cache.push', 'false'))
10411

10512
if (buildCacheUrl) {
106-
final Map<String, String> buildCacheCredentials = System.getenv("GRADLE_BUILD_CACHE_USERNAME") ? [:] : vault.logical()
107-
.read("${vaultPathPrefix}/gradle-build-cache")
108-
.getData()
10913
gradle.settingsEvaluated { settings ->
11014
settings.buildCache {
11115
local {
@@ -116,11 +20,10 @@ if (buildCacheUrl) {
11620
url = buildCacheUrl
11721
push = buildCachePush
11822
credentials {
119-
username = System.getenv("GRADLE_BUILD_CACHE_USERNAME") ?: buildCacheCredentials.get("username")
120-
password = System.getenv("GRADLE_BUILD_CACHE_PASSWORD") ?: buildCacheCredentials.get("password")
23+
username = System.getenv("GRADLE_BUILD_CACHE_USERNAME")
24+
password = System.getenv("GRADLE_BUILD_CACHE_PASSWORD")
12125
}
12226
}
12327
}
12428
}
12529
}
126-

docs/changelog/118931.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
pr: 118931
2+
summary: Add a `LicenseAware` interface for licensed Nodes
3+
area: ES|QL
4+
type: enhancement
5+
issues:
6+
- 117405

docs/changelog/118941.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 118941
2+
summary: Allow archive and searchable snapshots indices in N-2 version
3+
area: Recovery
4+
type: enhancement
5+
issues: []
Lines changed: 88 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -1,103 +1,149 @@
11
[[task-queue-backlog]]
2-
=== Task queue backlog
2+
=== Backlogged task queue
33

4-
A backlogged task queue can prevent tasks from completing and put the cluster
5-
into an unhealthy state. Resource constraints, a large number of tasks being
6-
triggered at once, and long running tasks can all contribute to a backlogged
7-
task queue.
4+
*******************************
5+
*Product:* Elasticsearch +
6+
*Deployment type:* Elastic Cloud Enterprise, Elastic Cloud Hosted, Elastic Cloud on Kubernetes, Elastic Self-Managed +
7+
*Versions:* All
8+
*******************************
9+
10+
A backlogged task queue can prevent tasks from completing and lead to an
11+
unhealthy cluster state. Contributing factors include resource constraints,
12+
a large number of tasks triggered at once, and long-running tasks.
813

914
[discrete]
1015
[[diagnose-task-queue-backlog]]
11-
==== Diagnose a task queue backlog
16+
==== Diagnose a backlogged task queue
17+
18+
To identify the cause of the backlog, try these diagnostic actions.
1219

13-
**Check the thread pool status**
20+
* <<diagnose-task-queue-thread-pool>>
21+
* <<diagnose-task-queue-hot-thread>>
22+
* <<diagnose-task-queue-long-running-node-tasks>>
23+
* <<diagnose-task-queue-long-running-cluster-tasks>>
24+
25+
[discrete]
26+
[[diagnose-task-queue-thread-pool]]
27+
===== Check the thread pool status
1428

1529
A <<high-cpu-usage,depleted thread pool>> can result in
1630
<<rejected-requests,rejected requests>>.
1731

18-
Thread pool depletion might be restricted to a specific <<data-tiers,data tier>>. If <<hotspotting,hot spotting>> is occuring, one node might experience depletion faster than other nodes, leading to performance issues and a growing task backlog.
19-
20-
You can use the <<cat-thread-pool,cat thread pool API>> to see the number of
21-
active threads in each thread pool and how many tasks are queued, how many
22-
have been rejected, and how many have completed.
32+
Use the <<cat-thread-pool,cat thread pool API>> to monitor
33+
active threads, queued tasks, rejections, and completed tasks:
2334

2435
[source,console]
2536
----
2637
GET /_cat/thread_pool?v&s=t,n&h=type,name,node_name,active,queue,rejected,completed
2738
----
2839

29-
The `active` and `queue` statistics are instantaneous while the `rejected` and
30-
`completed` statistics are cumulative from node startup.
40+
* Look for high `active` and `queue` metrics, which indicate potential bottlenecks
41+
and opportunities to <<reduce-cpu-usage,reduce CPU usage>>.
42+
* Determine whether thread pool issues are specific to a <<data-tiers,data tier>>.
43+
* Check whether a specific node's thread pool is depleting faster than others. This
44+
might indicate <<resolve-task-queue-backlog-hotspotting, hot spotting>>.
3145

32-
**Inspect the hot threads on each node**
46+
[discrete]
47+
[[diagnose-task-queue-hot-thread]]
48+
===== Inspect hot threads on each node
3349

34-
If a particular thread pool queue is backed up, you can periodically poll the
35-
<<cluster-nodes-hot-threads,Nodes hot threads>> API to determine if the thread
36-
has sufficient resources to progress and gauge how quickly it is progressing.
50+
If a particular thread pool queue is backed up, periodically poll the
51+
<<cluster-nodes-hot-threads,nodes hot threads API>> to gauge the thread's
52+
progression and ensure it has sufficient resources:
3753

3854
[source,console]
3955
----
4056
GET /_nodes/hot_threads
4157
----
4258

43-
**Look for long running node tasks**
59+
Although the hot threads API response does not list the specific tasks running on a thread,
60+
it provides a summary of the thread's activities. You can correlate a hot threads response
61+
with a <<tasks,task management API response>> to identify any overlap with specific tasks. For
62+
example, if the hot threads response indicates the thread is `performing a search query`, you can
63+
<<diagnose-task-queue-long-running-node-tasks,check for long-running search tasks>> using the task management API.
64+
65+
[discrete]
66+
[[diagnose-task-queue-long-running-node-tasks]]
67+
===== Identify long-running node tasks
4468

45-
Long-running tasks can also cause a backlog. You can use the <<tasks,task
46-
management>> API to get information about the node tasks that are running.
47-
Check the `running_time_in_nanos` to identify tasks that are taking an
48-
excessive amount of time to complete.
69+
Long-running tasks can also cause a backlog. Use the <<tasks,task
70+
management API>> to check for excessive `running_time_in_nanos` values:
4971

5072
[source,console]
5173
----
5274
GET /_tasks?pretty=true&human=true&detailed=true
5375
----
5476

55-
If a particular `action` is suspected, you can filter the tasks further. The most common long-running tasks are <<docs-bulk,bulk index>>- or search-related.
77+
You can filter on a specific `action`, such as <<docs-bulk,bulk indexing>> or search-related tasks.
78+
These tend to be long-running.
5679

57-
* Filter for <<docs-bulk,bulk index>> actions:
80+
* Filter on <<docs-bulk,bulk index>> actions:
5881
+
5982
[source,console]
6083
----
6184
GET /_tasks?human&detailed&actions=indices:data/write/bulk
6285
----
6386

64-
* Filter for search actions:
87+
* Filter on search actions:
6588
+
6689
[source,console]
6790
----
6891
GET /_tasks?human&detailed&actions=indices:data/write/search
6992
----
7093

71-
The API response may contain additional tasks columns, including `description` and `header`, which provides the task parameters, target, and requestor. You can use this information to perform further diagnosis.
94+
Long-running tasks might need to be <<resolve-task-queue-backlog-stuck-tasks,canceled>>.
7295

73-
**Look for long running cluster tasks**
96+
[discrete]
97+
[[diagnose-task-queue-long-running-cluster-tasks]]
98+
===== Look for long-running cluster tasks
7499

75-
A task backlog might also appear as a delay in synchronizing the cluster state. You
76-
can use the <<cluster-pending,cluster pending tasks API>> to get information
77-
about the pending cluster state sync tasks that are running.
100+
Use the <<cluster-pending,cluster pending tasks API>> to identify delays
101+
in cluster state synchronization:
78102

79103
[source,console]
80104
----
81105
GET /_cluster/pending_tasks
82106
----
83107

84-
Check the `timeInQueue` to identify tasks that are taking an excessive amount
85-
of time to complete.
108+
Tasks with a high `timeInQueue` value are likely contributing to the backlog and might
109+
need to be <<resolve-task-queue-backlog-stuck-tasks,canceled>>.
86110

87111
[discrete]
88112
[[resolve-task-queue-backlog]]
89-
==== Resolve a task queue backlog
113+
==== Recommendations
114+
115+
After identifying problematic threads and tasks, resolve the issue by increasing resources or canceling tasks.
90116

91-
**Increase available resources**
117+
[discrete]
118+
[[resolve-task-queue-backlog-resources]]
119+
===== Increase available resources
92120

93-
If tasks are progressing slowly and the queue is backing up,
94-
you might need to take steps to <<reduce-cpu-usage>>.
121+
If tasks are progressing slowly, try <<reduce-cpu-usage,reducing CPU usage>>.
95122

96-
In some cases, increasing the thread pool size might help.
97-
For example, the `force_merge` thread pool defaults to a single thread.
123+
In some cases, you might need to increase the thread pool size. For example, the `force_merge` thread pool defaults to a single thread.
98124
Increasing the size to 2 might help reduce a backlog of force merge requests.
99125

100-
**Cancel stuck tasks**
126+
[discrete]
127+
[[resolve-task-queue-backlog-stuck-tasks]]
128+
===== Cancel stuck tasks
129+
130+
If an active task's <<diagnose-task-queue-hot-thread,hot thread>> shows no progress, consider <<task-cancellation,canceling the task>>.
131+
132+
[discrete]
133+
[[resolve-task-queue-backlog-hotspotting]]
134+
===== Address hot spotting
135+
136+
If a specific node's thread pool is depleting faster than others, try addressing
137+
uneven node resource utilization, also known as hot spotting.
138+
For details on actions you can take, such as rebalancing shards, see <<hotspotting>>.
139+
140+
[discrete]
141+
==== Resources
142+
143+
Related symptoms:
144+
145+
* <<high-cpu-usage>>
146+
* <<rejected-requests>>
147+
* <<hotspotting>>
101148

102-
If you find the active task's hot thread isn't progressing and there's a backlog,
103-
consider canceling the task.
149+
// TODO add link to standard Additional resources when that topic exists

muted-tests.yml

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -298,9 +298,16 @@ tests:
298298
issue: https://github.com/elastic/elasticsearch/issues/116777
299299
- class: org.elasticsearch.xpack.security.authc.ldap.ActiveDirectoryRunAsIT
300300
issue: https://github.com/elastic/elasticsearch/issues/115727
301-
- class: org.elasticsearch.cluster.coordination.NodeJoinExecutorTests
302-
method: testSuccess
303-
issue: https://github.com/elastic/elasticsearch/issues/119052
301+
- class: org.elasticsearch.xpack.security.authc.kerberos.KerberosAuthenticationIT
302+
issue: https://github.com/elastic/elasticsearch/issues/118414
303+
- class: org.elasticsearch.xpack.esql.qa.multi_node.EsqlClientYamlIT
304+
issue: https://github.com/elastic/elasticsearch/issues/119086
305+
- class: org.elasticsearch.xpack.spatial.index.query.ShapeQueryBuilderOverShapeTests
306+
method: testToQuery
307+
issue: https://github.com/elastic/elasticsearch/issues/119090
308+
- class: org.elasticsearch.xpack.spatial.index.query.GeoShapeQueryBuilderGeoShapeTests
309+
method: testToQuery
310+
issue: https://github.com/elastic/elasticsearch/issues/119091
304311

305312
# Examples:
306313
#

0 commit comments

Comments
 (0)