Skip to content

Commit 0153756

Browse files
authored
Merge branch 'main' into fix_128774_for_floating_point_comparisons
2 parents 56ea498 + fb87484 commit 0153756

File tree

57 files changed

+1571
-350
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

57 files changed

+1571
-350
lines changed

docs/changelog/128736.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128736
2+
summary: Add `index.lifecycle.skip` index-scoped setting to instruct ILM to skip processing specific indices
3+
area: ILM+SLM
4+
type: enhancement
5+
issues: []

docs/changelog/128750.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
pr: 128750
2+
summary: Fix conversion of a Lucene wildcard pattern to a regexp
3+
area: ES|QL
4+
type: bug
5+
issues:
6+
- 128677
7+
- 128676

docs/changelog/128848.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128848
2+
summary: Add `bucketedSort` based on int
3+
area: Search
4+
type: enhancement
5+
issues: []

docs/changelog/128870.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
pr: 128870
2+
summary: Check `TooComplex` exception for `HasPrivileges` body
3+
area: Authorization
4+
type: enhancement
5+
issues: []

modules/repository-azure/src/main/java/org/elasticsearch/repositories/azure/AzureBlobStore.java

Lines changed: 88 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@
6666
import org.elasticsearch.common.unit.ByteSizeUnit;
6767
import org.elasticsearch.common.unit.ByteSizeValue;
6868
import org.elasticsearch.common.util.BigArrays;
69+
import org.elasticsearch.core.Assertions;
6970
import org.elasticsearch.core.CheckedConsumer;
7071
import org.elasticsearch.core.IOUtils;
7172
import org.elasticsearch.core.Nullable;
@@ -74,6 +75,7 @@
7475
import org.elasticsearch.repositories.azure.AzureRepository.Repository;
7576
import org.elasticsearch.repositories.blobstore.ChunkedBlobOutputStream;
7677
import org.elasticsearch.rest.RestStatus;
78+
import org.elasticsearch.threadpool.ThreadPool;
7779

7880
import java.io.FilterInputStream;
7981
import java.io.IOException;
@@ -101,6 +103,7 @@
101103
import java.util.concurrent.TimeUnit;
102104
import java.util.concurrent.atomic.AtomicInteger;
103105
import java.util.concurrent.atomic.AtomicLong;
106+
import java.util.concurrent.atomic.AtomicReference;
104107
import java.util.concurrent.atomic.LongAdder;
105108
import java.util.function.BiPredicate;
106109
import java.util.stream.Collectors;
@@ -507,7 +510,11 @@ void writeBlobAtomic(
507510
return asyncClient.commitBlockList(
508511
multiParts.stream().map(MultiPart::blockId).toList(),
509512
failIfAlreadyExists == false
510-
).doOnSuccess(unused -> logger.debug("{}: all {} parts committed", blobName, multiParts.size()));
513+
)
514+
.doOnSuccess(unused -> logger.debug("{}: all {} parts committed", blobName, multiParts.size()))
515+
// Note: non-committed uploaded blocks will be deleted by Azure after a week
516+
// (see https://docs.microsoft.com/en-us/rest/api/storageservices/put-block#remarks)
517+
.doOnError(e -> logger.error(() -> format("%s: failed to commit %d parts", blobName, multiParts.size()), e));
511518
})
512519
.block();
513520
}
@@ -562,12 +569,13 @@ private static Mono<String> stageBlock(
562569
multiPart.blockOffset()
563570
);
564571
try {
565-
var stream = toSynchronizedInputStream(blobName, provider.apply(multiPart.blockOffset(), multiPart.blockSize()), multiPart);
572+
final var stream = provider.apply(multiPart.blockOffset(), multiPart.blockSize());
573+
assert stream.markSupported() : "provided input stream must support mark and reset";
566574
boolean success = false;
567575
try {
568576
var stageBlock = asyncClient.stageBlock(
569577
multiPart.blockId(),
570-
toFlux(stream, multiPart.blockSize(), DEFAULT_UPLOAD_BUFFERS_SIZE),
578+
toFlux(wrapInputStream(blobName, stream, multiPart), multiPart.blockSize(), DEFAULT_UPLOAD_BUFFERS_SIZE),
571579
multiPart.blockSize()
572580
).doOnSuccess(unused -> {
573581
logger.debug(() -> format("%s: part [%s] of size [%s] uploaded", blobName, multiPart.part(), multiPart.blockSize()));
@@ -760,88 +768,106 @@ public synchronized int read() throws IOException {
760768
// we read the input stream (i.e. when it's rate limited)
761769
}
762770

763-
private static InputStream toSynchronizedInputStream(String blobName, InputStream delegate, MultiPart multipart) {
764-
assert delegate.markSupported() : "An InputStream with mark support was expected";
765-
// We need to introduce a read barrier in order to provide visibility for the underlying
766-
// input stream state as the input stream can be read from different threads.
767-
// TODO See if this is still needed
771+
/**
772+
* Wraps an {@link InputStream} to assert that it is read only by a single thread at a time and to add log traces.
773+
*/
774+
private static InputStream wrapInputStream(final String blobName, final InputStream delegate, final MultiPart multipart) {
768775
return new FilterInputStream(delegate) {
769776

777+
private final AtomicReference<Thread> currentThread = Assertions.ENABLED ? new AtomicReference<>() : null;
770778
private final boolean isTraceEnabled = logger.isTraceEnabled();
771779

772780
@Override
773-
public synchronized int read(byte[] b, int off, int len) throws IOException {
774-
var result = super.read(b, off, len);
775-
if (isTraceEnabled) {
776-
logger.trace("{} reads {} bytes from {} part {}", Thread.currentThread(), result, blobName, multipart.part());
777-
}
778-
return result;
779-
}
780-
781-
@Override
782-
public synchronized int read() throws IOException {
783-
var result = super.read();
784-
if (isTraceEnabled) {
785-
logger.trace("{} reads {} byte from {} part {}", Thread.currentThread(), result, blobName, multipart.part());
786-
}
787-
return result;
788-
}
789-
790-
@Override
791-
public synchronized void mark(int readlimit) {
792-
if (isTraceEnabled) {
793-
logger.trace("{} marks stream {} part {}", Thread.currentThread(), blobName, multipart.part());
794-
}
795-
super.mark(readlimit);
796-
}
797-
798-
@Override
799-
public synchronized void reset() throws IOException {
800-
if (isTraceEnabled) {
801-
logger.trace("{} resets stream {} part {}", Thread.currentThread(), blobName, multipart.part());
781+
public int read(byte[] b, int off, int len) throws IOException {
782+
assert assertThread(null, Thread.currentThread());
783+
assert ThreadPool.assertCurrentThreadPool(AzureRepositoryPlugin.REPOSITORY_THREAD_POOL_NAME);
784+
try {
785+
var result = super.read(b, off, len);
786+
if (isTraceEnabled) {
787+
logger.trace("{} reads {} bytes from {} part {}", Thread.currentThread(), result, blobName, multipart.part());
788+
}
789+
return result;
790+
} finally {
791+
assert assertThread(Thread.currentThread(), null);
802792
}
803-
super.reset();
804793
}
805794

806795
@Override
807-
public synchronized void close() throws IOException {
808-
if (isTraceEnabled) {
809-
logger.trace("{} closes stream {} part {}", Thread.currentThread(), blobName, multipart.part());
796+
public int read() throws IOException {
797+
assert assertThread(null, Thread.currentThread());
798+
assert ThreadPool.assertCurrentThreadPool(AzureRepositoryPlugin.REPOSITORY_THREAD_POOL_NAME);
799+
try {
800+
var result = super.read();
801+
if (isTraceEnabled) {
802+
logger.trace("{} reads {} byte from {} part {}", Thread.currentThread(), result, blobName, multipart.part());
803+
}
804+
return result;
805+
} finally {
806+
assert assertThread(Thread.currentThread(), null);
810807
}
811-
super.close();
812808
}
813809

814-
@Override
815-
public String toString() {
816-
return blobName + " part [" + multipart.part() + "] of size [" + multipart.blockSize() + ']';
810+
private boolean assertThread(Thread current, Thread updated) {
811+
final Thread witness = currentThread.compareAndExchange(current, updated);
812+
assert witness == current
813+
: "Unable to set current thread to ["
814+
+ updated
815+
+ "]: expected thread ["
816+
+ current
817+
+ "] to be the thread currently accessing the input stream for reading, but thread "
818+
+ witness
819+
+ " is already reading "
820+
+ blobName
821+
+ " part "
822+
+ multipart.part();
823+
return true;
817824
}
818825
};
819826
}
820827

821-
private static Flux<ByteBuffer> toFlux(InputStream stream, long length, int chunkSize) {
822-
assert stream.markSupported() : "An InputStream with mark support was expected";
823-
// We need to mark the InputStream as it's possible that we need to retry for the same chunk
828+
/**
829+
* Converts an input stream to a Flux of ByteBuffer. This method also checks that the stream has provided the expected number of bytes.
830+
*
831+
* @param stream the input stream that needs to be converted
832+
* @param length the expected length in bytes of the input stream
833+
* @param byteBufferSize the size of the ByteBuffers to be created
834+
**/
835+
private static Flux<ByteBuffer> toFlux(InputStream stream, long length, final int byteBufferSize) {
836+
assert stream.markSupported() : "input stream must support mark and reset";
837+
// always marks the input stream in case it needs to be retried
824838
stream.mark(Integer.MAX_VALUE);
839+
// defer the creation of the flux until it is subscribed
825840
return Flux.defer(() -> {
826-
// TODO Code in this Flux.defer() can be concurrently executed by multiple threads?
827841
try {
828842
stream.reset();
829843
} catch (IOException e) {
830-
throw new RuntimeException(e);
844+
// Flux.defer() catches and propagates the exception
845+
throw new UncheckedIOException(e);
831846
}
847+
// the number of bytes read is updated in a thread pool (repository_azure) and later compared to the expected length in another
848+
// thread pool (azure_event_loop), so we need this to be atomic.
832849
final var bytesRead = new AtomicLong(0L);
833-
// This flux is subscribed by a downstream operator that finally queues the
834-
// buffers into netty output queue. Sadly we are not able to get a signal once
835-
// the buffer has been flushed, so we have to allocate those and let the GC to
836-
// reclaim them (see MonoSendMany). Additionally, that very same operator requests
837-
// 128 elements (that's hardcoded) once it's subscribed (later on, it requests
838-
// by 64 elements), that's why we provide 64kb buffers.
839850

840-
// length is at most 100MB so it's safe to cast back to an integer in this case
841-
final int parts = (int) length / chunkSize;
842-
final long remaining = length % chunkSize;
843-
return Flux.range(0, remaining == 0 ? parts : parts + 1).map(i -> i * chunkSize).concatMap(pos -> Mono.fromCallable(() -> {
844-
long count = pos + chunkSize > length ? length - pos : chunkSize;
851+
assert length <= ByteSizeValue.ofMb(100L).getBytes() : length;
852+
// length is at most 100MB so it's safe to cast back to an integer
853+
final int parts = Math.toIntExact(length / byteBufferSize);
854+
final long remaining = length % byteBufferSize;
855+
856+
// This flux is subscribed by a downstream subscriber (reactor.netty.channel.MonoSendMany) that queues the buffers into netty
857+
// output queue. Sadly we are not able to get a signal once the buffer has been flushed, so we have to allocate those and let
858+
// the GC to reclaim them. Additionally, the MonoSendMany subscriber requests 128 elements from the flux when it subscribes to
859+
// it. This 128 value is hardcoded in reactor.netty.channel.MonoSend.MAX_SIZE). After 128 byte buffers have been published by
860+
// the flux, the MonoSendMany subscriber requests 64 more byte buffers (see reactor.netty.channel.MonoSend.REFILL_SIZE) and so
861+
// on.
862+
//
863+
// So this flux instantiates 128 ByteBuffer objects of DEFAULT_UPLOAD_BUFFERS_SIZE bytes in heap every time the NettyOutbound in
864+
// the Azure's Netty event loop requests byte buffers to write to the network channel. That represents 128 * 64kb = 8 mb per
865+
// flux which is aligned with BlobAsyncClient.BLOB_DEFAULT_HTBB_UPLOAD_BLOCK_SIZE. The creation of the ByteBuffer objects are
866+
// forked to the repository_azure thread pool, which has a maximum of 15 threads (most of the time, can be less than that for
867+
// nodes with less than 750mb heap). It means that max. 15 * 8 = 120mb bytes are allocated on heap at a time here (omitting the
868+
// ones already created and pending garbage collection).
869+
return Flux.range(0, remaining == 0 ? parts : parts + 1).map(i -> i * byteBufferSize).concatMap(pos -> Mono.fromCallable(() -> {
870+
long count = pos + byteBufferSize > length ? length - pos : byteBufferSize;
845871
int numOfBytesRead = 0;
846872
int offset = 0;
847873
int len = (int) count;
@@ -867,9 +893,8 @@ private static Flux<ByteBuffer> toFlux(InputStream stream, long length, int chun
867893
);
868894
}
869895
});
870-
// We need to subscribe on a different scheduler to avoid blocking the io threads when we read the input stream
896+
// subscribe on a different scheduler to avoid blocking the network io threads when reading bytes from disk
871897
}).subscribeOn(Schedulers.elastic());
872-
873898
}
874899

875900
/**

muted-tests.yml

Lines changed: 3 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -489,42 +489,6 @@ tests:
489489
- class: org.elasticsearch.packaging.test.DockerTests
490490
method: test085EnvironmentVariablesAreRespectedUnderDockerExec
491491
issue: https://github.com/elastic/elasticsearch/issues/128115
492-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.WildcardLikeTests
493-
method: testEvaluateInManyThreads {TestCase=100 random code points matches self case insensitive with keyword}
494-
issue: https://github.com/elastic/elasticsearch/issues/128676
495-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.WildcardLikeTests
496-
method: testEvaluateInManyThreads {TestCase=100 random code points matches self case insensitive with text}
497-
issue: https://github.com/elastic/elasticsearch/issues/128677
498-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.RLikeTests
499-
method: testEvaluateInManyThreads {TestCase=100 random code points matches self case insensitive with text}
500-
issue: https://github.com/elastic/elasticsearch/issues/128705
501-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.RLikeTests
502-
method: testEvaluateInManyThreads {TestCase=100 random code points matches self case insensitive with keyword}
503-
issue: https://github.com/elastic/elasticsearch/issues/128706
504-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.RLikeTests
505-
method: testCrankyEvaluateBlockWithNulls {TestCase=100 random code points matches self case insensitive with text}
506-
issue: https://github.com/elastic/elasticsearch/issues/128710
507-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.RLikeTests
508-
method: testCrankyEvaluateBlockWithNulls {TestCase=100 random code points matches self case insensitive with keyword}
509-
issue: https://github.com/elastic/elasticsearch/issues/128711
510-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.WildcardLikeTests
511-
method: testCrankyEvaluateBlockWithNulls {TestCase=100 random code points matches self case insensitive with text}
512-
issue: https://github.com/elastic/elasticsearch/issues/128712
513-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.WildcardLikeTests
514-
method: testCrankyEvaluateBlockWithNulls {TestCase=100 random code points matches self case insensitive with keyword}
515-
issue: https://github.com/elastic/elasticsearch/issues/128713
516-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.WildcardLikeTests
517-
method: testEvaluateBlockWithNulls {TestCase=100 random code points matches self case insensitive with text}
518-
issue: https://github.com/elastic/elasticsearch/issues/128714
519-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.WildcardLikeTests
520-
method: testEvaluateBlockWithNulls {TestCase=100 random code points matches self case insensitive with keyword}
521-
issue: https://github.com/elastic/elasticsearch/issues/128715
522-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.RLikeTests
523-
method: testEvaluateBlockWithNulls {TestCase=100 random code points matches self case insensitive with keyword}
524-
issue: https://github.com/elastic/elasticsearch/issues/128716
525-
- class: org.elasticsearch.xpack.esql.expression.function.scalar.string.RLikeTests
526-
method: testEvaluateBlockWithNulls {TestCase=100 random code points matches self case insensitive with text}
527-
issue: https://github.com/elastic/elasticsearch/issues/128717
528492
- class: org.elasticsearch.compute.operator.LimitOperatorTests
529493
method: testEarlyTermination
530494
issue: https://github.com/elastic/elasticsearch/issues/128721
@@ -561,6 +525,9 @@ tests:
561525
- class: org.elasticsearch.upgrades.IndexSortUpgradeIT
562526
method: testIndexSortForNumericTypes {upgradedNodes=2}
563527
issue: https://github.com/elastic/elasticsearch/issues/128863
528+
- class: org.elasticsearch.packaging.test.DockerTests
529+
method: test150MachineDependentHeap
530+
issue: https://github.com/elastic/elasticsearch/issues/128120
564531

565532
# Examples:
566533
#

plugins/repository-hdfs/build.gradle

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,26 +7,36 @@
77
* License v3.0 only", or the "Server Side Public License, v 1".
88
*/
99

10-
import org.elasticsearch.gradle.internal.test.RestIntegTestTask
10+
1111
import org.elasticsearch.gradle.OS
12+
import org.elasticsearch.gradle.internal.test.RestIntegTestTask
1213

1314
apply plugin: 'elasticsearch.internal-java-rest-test'
1415
apply plugin: 'elasticsearch.internal-yaml-rest-test'
1516

1617
esplugin {
1718
description = 'The HDFS repository plugin adds support for Hadoop Distributed File-System (HDFS) repositories.'
18-
classname ='org.elasticsearch.repositories.hdfs.HdfsPlugin'
19+
classname = 'org.elasticsearch.repositories.hdfs.HdfsPlugin'
1920
}
2021

2122
versions << [
2223
'hadoop': '3.4.1'
2324
]
2425

2526
def patched = Attribute.of('patched', Boolean)
27+
def hdfsVersionAttr = Attribute.of('hdfs.major.version', Integer)
2628

2729
configurations {
28-
hdfsFixture2
29-
hdfsFixture3
30+
hdfsFixture2 {
31+
attributes {
32+
attribute(hdfsVersionAttr, 2)
33+
}
34+
}
35+
hdfsFixture3 {
36+
attributes {
37+
attribute(hdfsVersionAttr, 3)
38+
}
39+
}
3040
compileClasspath {
3141
attributes {
3242
attribute(patched, true)
@@ -82,8 +92,8 @@ dependencies {
8292
yamlRestTestRuntimeOnly "commons-cli:commons-cli:1.5.0"
8393
yamlRestTestRuntimeOnly "org.apache.logging.log4j:log4j-1.2-api:${versions.log4j}"
8494

85-
hdfsFixture2 project(path: ':test:fixtures:hdfs-fixture', configuration: 'shadowedHdfs2')
86-
hdfsFixture3 project(path: ':test:fixtures:hdfs-fixture', configuration: 'shadow')
95+
hdfsFixture2 project(':test:fixtures:hdfs-fixture')
96+
hdfsFixture3 project(':test:fixtures:hdfs-fixture')
8797

8898
attributesSchema {
8999
attribute(patched)

0 commit comments

Comments
 (0)