From 8de8f1ff425b0a3bfdecfed3420a7968948f35c1 Mon Sep 17 00:00:00 2001 From: Pawan Kartik Date: Thu, 14 Aug 2025 14:06:37 +0100 Subject: [PATCH 1/9] CPS search should not use `skip_unavailable` --- .../pull-request/performance-benchmark.yml | 3 + .buildkite/pull-requests.json | 2 +- .../ExponentialHistogramGenerationBench.java | 3 +- .../ExponentialHistogramMergeBench.java | 14 +- .../vector/PackAsBinaryBenchmark.java | 9 + .../vector/TransposeHalfByteBenchmark.java | 9 + catalog-info.yaml | 36 ++ docs/changelog/132675.yaml | 5 + docs/changelog/132765.yaml | 5 + docs/changelog/132833.yaml | 5 + docs/changelog/132950.yaml | 5 + .../functions/description/v_magnitude.md | 6 + .../functions/examples/v_magnitude.md | 24 + .../_snippets/functions/layout/v_magnitude.md | 27 + .../functions/parameters/v_magnitude.md | 7 + .../esql/images/functions/v_magnitude.svg | 1 + .../definition/functions/v_magnitude.json | 12 + .../esql/kibana/docs/functions/v_magnitude.md | 10 + libs/exponential-histogram/build.gradle | 3 + .../licenses/lucene-core-LICENSE.txt | 475 ++++++++++++++++++ .../licenses/lucene-core-NOTICE.txt | 192 +++++++ .../BucketArrayIterator.java | 79 +++ .../exponentialhistogram/DownscaleStats.java | 4 + .../EmptyExponentialHistogram.java | 84 ++++ .../ExponentialHistogram.java | 51 +- .../ExponentialHistogramCircuitBreaker.java | 41 ++ .../ExponentialHistogramGenerator.java | 72 ++- .../ExponentialHistogramMerger.java | 107 ++-- .../FixedCapacityExponentialHistogram.java | 90 ++-- .../RamEstimationUtil.java | 44 ++ .../ReleasableExponentialHistogram.java | 37 ++ .../exponentialhistogram/ZeroBucket.java | 4 + .../DownscaleStatsTests.java | 5 +- .../ExponentialHistogramGeneratorTests.java | 51 +- .../ExponentialHistogramMergerTests.java | 95 +++- .../ExponentialHistogramTestCase.java | 75 +++ ...ixedCapacityExponentialHistogramTests.java | 17 +- .../QuantileAccuracyTests.java | 17 +- .../RamEstimationUtilTests.java | 48 ++ .../exponentialhistogram/ZeroBucketTests.java | 4 +- .../elasticsearch/simdvec/ESVectorUtil.java | 31 ++ .../DefaultESVectorUtilSupport.java | 83 +++ .../vectorization/ESVectorUtilSupport.java | 4 + .../PanamaESVectorUtilSupport.java | 180 +++++++ .../simdvec/ESVectorUtilTests.java | 29 ++ .../mapper/extras/ScaledFloatFieldMapper.java | 7 +- muted-tests.yml | 57 ++- .../org/elasticsearch/search/CCSDuelIT.java | 2 - ...rchiveGenerateInitialCredentialsTests.java | 2 - .../test/ingest/80_ingest_simulate.yml | 128 +++++ .../elasticsearch/test/knn/CmdLineArgs.java | 19 +- .../test/knn/KnnIndexTester.java | 19 +- .../elasticsearch/test/knn/KnnSearcher.java | 11 +- .../rest-api-spec/api/inference.put_ai21.json | 35 ++ .../api/inference.put_llama.json | 35 ++ rest-api-spec/src/main/resources/schema.json | 12 +- .../cluster/ClusterInfoServiceIT.java | 188 ++++++- .../index/shard/IndexShardIT.java | 8 +- .../indices/recovery/DanglingIndicesIT.java | 1 - .../ccs/CpsDoesNotUseSkipUnavailableIT.java | 129 +++++ .../org/elasticsearch/TransportVersions.java | 1 + ...ortNodeUsageStatsForThreadPoolsAction.java | 5 +- .../bulk/TransportSimulateBulkAction.java | 34 +- .../action/ingest/SimulateIndexResponse.java | 30 +- .../action/search/SearchResponse.java | 12 +- .../action/search/TransportSearchAction.java | 61 +-- .../elasticsearch/cluster/ClusterModule.java | 2 + .../cluster/InternalClusterInfoService.java | 6 +- .../ShardMovementWriteLoadSimulator.java | 27 +- .../WriteLoadConstraintSettings.java | 41 +- .../allocator/DesiredBalanceMetrics.java | 112 +++-- .../allocator/DesiredBalanceReconciler.java | 20 +- .../DesiredBalanceShardsAllocator.java | 4 + .../decider/WriteLoadConstraintDecider.java | 124 +++++ ...utionTimeTrackingEsThreadPoolExecutor.java | 21 +- .../elasticsearch/index/IndexVersions.java | 1 + .../tsdb/es819/BulkNumericDocValues.java | 27 - .../es819/ES819TSDBDocValuesProducer.java | 121 ++++- .../index/codec/vectors/BQSpaceUtils.java | 45 +- .../index/codec/vectors/BQVectorUtils.java | 27 +- .../index/codec/vectors/IVFVectorsFormat.java | 4 +- .../index/codec/vectors/IVFVectorsReader.java | 26 +- .../index/fieldvisitor/StoredFieldLoader.java | 20 +- .../elasticsearch/index/get/GetResult.java | 2 +- .../index/get/ShardGetService.java | 3 +- .../mapper/AbstractGeometryFieldMapper.java | 6 +- .../index/mapper/BlockDocValuesReader.java | 14 +- .../index/mapper/BlockLoader.java | 18 +- .../index/mapper/BooleanFieldMapper.java | 6 +- .../index/mapper/DateFieldMapper.java | 6 +- .../FallbackSyntheticSourceBlockLoader.java | 88 ++-- .../mapper/IgnoredSourceFieldMapper.java | 277 +++++++++- .../index/mapper/IpFieldMapper.java | 6 +- .../index/mapper/KeywordFieldMapper.java | 6 +- .../index/mapper/MapperFeatures.java | 4 +- .../elasticsearch/index/mapper/Mapping.java | 8 + .../index/mapper/MappingLookup.java | 2 +- .../index/mapper/NestedObjectMapper.java | 7 +- .../index/mapper/NumberFieldMapper.java | 94 +++- .../index/mapper/SourceLoader.java | 52 +- .../index/mapper/TextFieldMapper.java | 10 +- .../vectors/DenseVectorFieldMapper.java | 35 +- .../index/query/SearchExecutionContext.java | 4 +- .../org/elasticsearch/search/SearchHit.java | 2 +- .../fetch/PreloadedFieldLookupProvider.java | 22 +- .../vectors/AbstractIVFKnnVectorQuery.java | 61 ++- ...sifyingChildrenIVFKnnFloatVectorQuery.java | 6 +- .../vectors/IVFKnnFloatVectorQuery.java | 21 +- .../search/vectors/IVFKnnSearchStrategy.java | 14 +- .../elasticsearch/threadpool/ThreadPool.java | 17 + .../transport/RemoteClusterService.java | 14 + .../TransportSimulateBulkActionTests.java | 13 +- .../ingest/SimulateIndexResponseTests.java | 56 ++- .../cluster/ClusterModuleTests.java | 2 + ...rnalClusterInfoServiceSchedulingTests.java | 4 +- .../WriteLoadConstraintDeciderTests.java | 306 +++++++++++ ...TimeTrackingEsThreadPoolExecutorTests.java | 14 +- .../es819/ES819TSDBDocValuesFormatTests.java | 179 ++++++- .../index/mapper/DateFieldMapperTests.java | 9 +- ...edSourceFieldMapperConfigurationTests.java | 8 +- .../mapper/IgnoredSourceFieldMapperTests.java | 81 +-- .../index/mapper/LongFieldMapperTests.java | 2 - .../TextFieldBlockLoaderTests.java | 2 +- .../TextFieldWithParentBlockLoaderTests.java | 55 +- .../vectors/DenseVectorFieldMapperTests.java | 8 +- .../ingest/RestSimulateIngestActionTests.java | 23 +- .../AbstractIVFKnnVectorQueryTestCase.java | 7 +- ...ngChildrenIVFKnnFloatVectorQueryTests.java | 2 +- .../vectors/IVFKnnFloatVectorQueryTests.java | 4 +- .../datageneration/FieldType.java | 6 +- .../DefaultMappingParametersHandler.java | 25 +- .../DefaultObjectGenerationHandler.java | 11 + .../datasource/MultifieldAddonHandler.java | 97 ++++ .../leaf/MatchOnlyTextFieldDataGenerator.java | 28 ++ .../matchers/source/FieldSpecificMatcher.java | 25 +- .../queries/LeafQueryGenerator.java | 38 +- .../queries/MappingPredicates.java | 64 +++ .../queries/QueryGenerator.java | 56 +-- .../index/mapper/MapperServiceTestCase.java | 3 +- .../index/mapper/MapperTestCase.java | 5 +- .../NativeArrayIntegrationTestCase.java | 19 +- .../elasticsearch/test/ESIntegTestCase.java | 39 ++ .../test/WildcardFieldMaskingReader.java | 80 +++ .../analytics/ttest/TTestAggregatorTests.java | 1 - .../DocumentSubsetBitsetCache.java | 45 +- .../accesscontrol/FieldSubsetReader.java | 27 +- .../accesscontrol/FieldSubsetReaderTests.java | 15 +- .../xpack/deprecation/DeprecationHttpIT.java | 1 - .../xpack/eql/EsEQLCorrectnessIT.java | 2 - .../read/ComputeBlockLoaderFactory.java | 7 - .../read/DelegatingBlockLoaderFactory.java | 27 + .../read/TimeSeriesExtractFieldOperator.java | 6 - .../vector-cosine-similarity.csv-spec | 13 + .../resources/vector-dot-product.csv-spec | 46 +- .../main/resources/vector-l1-norm.csv-spec | 45 +- .../main/resources/vector-l2-norm.csv-spec | 39 +- .../main/resources/vector-magnitude.csv-spec | 87 ++++ .../action/CrossClusterCancellationIT.java | 21 - .../esql/action/CrossClusterEnrichIT.java | 7 +- .../esql/action/EsqlActionBreakerIT.java | 4 - ...sqlPluginWithEnterpriseOrTrialLicense.java | 5 - ...uginWithNonEnterpriseOrExpiredLicense.java | 5 - .../spatial/SpatialNoLicenseTestCase.java | 5 - .../vector/VectorSimilarityFunctionsIT.java | 40 +- .../xpack/esql/action/EsqlCapabilities.java | 12 +- .../xpack/esql/action/EsqlExecutionInfo.java | 24 +- .../esql/analysis/PlanCheckerProvider.java | 27 + .../xpack/esql/analysis/Verifier.java | 20 +- .../xpack/esql/execution/PlanExecutor.java | 9 +- .../function/EsqlFunctionRegistry.java | 4 +- .../expression/function/vector/Magnitude.java | 180 +++++++ .../vector/VectorSimilarityFunction.java | 15 +- .../function/vector/VectorWritables.java | 3 + .../logical/ReplaceLimitAndSortAsTopN.java | 2 +- .../xpack/esql/plan/logical/Aggregate.java | 8 +- .../xpack/esql/plan/logical/ChangePoint.java | 7 +- .../xpack/esql/plan/logical/Enrich.java | 67 ++- .../xpack/esql/plan/logical/ExecutesOn.java | 41 ++ .../xpack/esql/plan/logical/Fork.java | 2 +- .../plan/logical/inference/Completion.java | 3 +- .../esql/plan/logical/inference/Rerank.java | 3 +- .../xpack/esql/plan/logical/join/Join.java | 44 +- .../esql/plan/logical/join/LookupJoin.java | 27 +- .../esql/plugin/ClusterComputeHandler.java | 4 +- .../xpack/esql/plugin/EsqlPlugin.java | 21 +- .../xpack/esql/session/EsqlCCSUtils.java | 18 +- .../xpack/esql/session/EsqlSession.java | 167 +++--- .../xpack/esql/analysis/AnalyzerTests.java | 25 + .../xpack/esql/analysis/VerifierTests.java | 210 +------- ...tractVectorSimilarityFunctionTestCase.java | 34 +- .../vector/AbstractVectorTestCase.java | 37 ++ .../function/vector/MagnitudeTests.java | 76 +++ .../optimizer/LogicalPlanOptimizerTests.java | 1 - .../optimizer/OptimizerVerificationTests.java | 327 ++++++++++++ .../optimizer/PhysicalPlanOptimizerTests.java | 7 +- .../esql/plugin/ClusterRequestTests.java | 2 +- .../DataNodeRequestSerializationTests.java | 2 +- .../xpack/esql/session/EsqlCCSUtilsTests.java | 33 -- .../telemetry/PlanExecutorMetricsTests.java | 9 +- x-pack/plugin/logsdb/build.gradle | 8 +- .../xpack/logsdb/qa/DataGenerationHelper.java | 4 +- ...ardVersusLogsIndexModeChallengeRestIT.java | 1 + .../rest-api-spec/test/20_ignored_source.yml | 38 +- ...synthetic_source_mixed_disabled_fields.yml | 129 +++++ .../unsignedlong/UnsignedLongFieldMapper.java | 7 +- .../UnsignedLongFieldMapperTests.java | 1 - .../ml/integration/InferenceIngestIT.java | 2 - .../integration/BasicDistributedJobsIT.java | 1 - .../transport/filter/IPFilterTests.java | 1 - .../xpack/sql/qa/jdbc/FetchSizeTestCase.java | 3 - .../xpack/sql/qa/jdbc/JdbcErrorsTestCase.java | 2 - .../qa/jdbc/PreparedStatementTestCase.java | 2 - .../xpack/sql/qa/jdbc/ResultSetTestCase.java | 6 - .../xpack/sql/qa/security/JdbcSecurityIT.java | 1 - .../test/esql/190_lookup_join.yml | 16 + .../rest-api-spec/test/esql/230_folding.yml | 7 +- .../SmokeTestWatcherTestSuiteIT.java | 1 - .../authc/oidc/OpenIdConnectAuthIT.java | 1 - .../upgrades/TransformSurvivesUpgradeIT.java | 1 - 219 files changed, 6192 insertions(+), 1564 deletions(-) create mode 100644 .buildkite/pipelines/pull-request/performance-benchmark.yml create mode 100644 docs/changelog/132675.yaml create mode 100644 docs/changelog/132765.yaml create mode 100644 docs/changelog/132833.yaml create mode 100644 docs/changelog/132950.yaml create mode 100644 docs/reference/query-languages/esql/_snippets/functions/description/v_magnitude.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/examples/v_magnitude.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/layout/v_magnitude.md create mode 100644 docs/reference/query-languages/esql/_snippets/functions/parameters/v_magnitude.md create mode 100644 docs/reference/query-languages/esql/images/functions/v_magnitude.svg create mode 100644 docs/reference/query-languages/esql/kibana/definition/functions/v_magnitude.json create mode 100644 docs/reference/query-languages/esql/kibana/docs/functions/v_magnitude.md create mode 100644 libs/exponential-histogram/licenses/lucene-core-LICENSE.txt create mode 100644 libs/exponential-histogram/licenses/lucene-core-NOTICE.txt create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketArrayIterator.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramCircuitBreaker.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/RamEstimationUtil.java create mode 100644 libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ReleasableExponentialHistogram.java create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramTestCase.java create mode 100644 libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/RamEstimationUtilTests.java create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_ai21.json create mode 100644 rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_llama.json create mode 100644 server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java create mode 100644 server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java delete mode 100644 server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/BulkNumericDocValues.java create mode 100644 server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java create mode 100644 test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java create mode 100644 test/framework/src/main/java/org/elasticsearch/datageneration/fields/leaf/MatchOnlyTextFieldDataGenerator.java create mode 100644 test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingPredicates.java create mode 100644 test/framework/src/main/java/org/elasticsearch/test/WildcardFieldMaskingReader.java create mode 100644 x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/PlanCheckerProvider.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java create mode 100644 x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/ExecutesOn.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/AbstractVectorTestCase.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/MagnitudeTests.java create mode 100644 x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/OptimizerVerificationTests.java create mode 100644 x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/53_esql_synthetic_source_mixed_disabled_fields.yml diff --git a/.buildkite/pipelines/pull-request/performance-benchmark.yml b/.buildkite/pipelines/pull-request/performance-benchmark.yml new file mode 100644 index 0000000000000..a08905513a37c --- /dev/null +++ b/.buildkite/pipelines/pull-request/performance-benchmark.yml @@ -0,0 +1,3 @@ +steps: + - label: ":pipeline: TODO" + command: echo TODO diff --git a/.buildkite/pull-requests.json b/.buildkite/pull-requests.json index cd87f80375d85..9d383ddd72ddc 100644 --- a/.buildkite/pull-requests.json +++ b/.buildkite/pull-requests.json @@ -18,7 +18,7 @@ }, { "enabled": true, - "pipeline_slug": "elasticsearch-performance-esbench-pr", + "pipeline_slug": "elasticsearch-pull-request-performance-benchmark", "allow_org_users": true, "allowed_repo_permissions": [ "admin", diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java index f4c6e5315ed30..121789210eab4 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramGenerationBench.java @@ -9,6 +9,7 @@ package org.elasticsearch.benchmark.exponentialhistogram; +import org.elasticsearch.exponentialhistogram.ExponentialHistogramCircuitBreaker; import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -59,7 +60,7 @@ public class ExponentialHistogramGenerationBench { @Setup public void setUp() { random = ThreadLocalRandom.current(); - histoGenerator = new ExponentialHistogramGenerator(bucketCount); + histoGenerator = ExponentialHistogramGenerator.create(bucketCount, ExponentialHistogramCircuitBreaker.noop()); DoubleSupplier nextRandom = () -> distribution.equals("GAUSSIAN") ? random.nextGaussian() : random.nextDouble(); diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java index 36e56e12a9190..b638ff140bf19 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/exponentialhistogram/ExponentialHistogramMergeBench.java @@ -11,6 +11,7 @@ import org.elasticsearch.exponentialhistogram.BucketIterator; import org.elasticsearch.exponentialhistogram.ExponentialHistogram; +import org.elasticsearch.exponentialhistogram.ExponentialHistogramCircuitBreaker; import org.elasticsearch.exponentialhistogram.ExponentialHistogramGenerator; import org.elasticsearch.exponentialhistogram.ExponentialHistogramMerger; import org.openjdk.jmh.annotations.Benchmark; @@ -56,13 +57,14 @@ public class ExponentialHistogramMergeBench { @Setup public void setUp() { random = ThreadLocalRandom.current(); - histoMerger = new ExponentialHistogramMerger(bucketCount); + ExponentialHistogramCircuitBreaker breaker = ExponentialHistogramCircuitBreaker.noop(); + histoMerger = ExponentialHistogramMerger.create(bucketCount, breaker); - ExponentialHistogramGenerator initial = new ExponentialHistogramGenerator(bucketCount); + ExponentialHistogramGenerator initialGenerator = ExponentialHistogramGenerator.create(bucketCount, breaker); for (int j = 0; j < bucketCount; j++) { - initial.add(Math.pow(1.001, j)); + initialGenerator.add(Math.pow(1.001, j)); } - ExponentialHistogram initialHisto = initial.get(); + ExponentialHistogram initialHisto = initialGenerator.getAndClear(); int cnt = getBucketCount(initialHisto); if (cnt < bucketCount) { throw new IllegalArgumentException("Expected bucket count to be " + bucketCount + ", but was " + cnt); @@ -72,14 +74,14 @@ public void setUp() { int dataPointSize = (int) Math.round(bucketCount * mergedHistoSizeFactor); for (int i = 0; i < toMerge.length; i++) { - ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(dataPointSize); + ExponentialHistogramGenerator generator = ExponentialHistogramGenerator.create(dataPointSize, breaker); int bucketIndex = 0; for (int j = 0; j < dataPointSize; j++) { bucketIndex += 1 + random.nextInt(bucketCount) % (Math.max(1, bucketCount / dataPointSize)); generator.add(Math.pow(1.001, bucketIndex)); } - toMerge[i] = generator.get(); + toMerge[i] = generator.getAndClear(); cnt = getBucketCount(toMerge[i]); if (cnt < dataPointSize) { throw new IllegalArgumentException("Expected bucket count to be " + dataPointSize + ", but was " + cnt); diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/PackAsBinaryBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/PackAsBinaryBenchmark.java index babfcfcc84745..38ab305a01441 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/PackAsBinaryBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/PackAsBinaryBenchmark.java @@ -82,4 +82,13 @@ public void packAsBinaryLegacy(Blackhole bh) { bh.consume(packed); } } + + @Benchmark + @Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" }) + public void packAsBinaryPanama(Blackhole bh) { + for (int i = 0; i < numVectors; i++) { + BQVectorUtils.packAsBinary(qVectors[i], packed); + bh.consume(packed); + } + } } diff --git a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/TransposeHalfByteBenchmark.java b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/TransposeHalfByteBenchmark.java index ce2341f3442ff..b612e35d37292 100644 --- a/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/TransposeHalfByteBenchmark.java +++ b/benchmarks/src/main/java/org/elasticsearch/benchmark/vector/TransposeHalfByteBenchmark.java @@ -83,4 +83,13 @@ public void transposeHalfByteLegacy(Blackhole bh) { bh.consume(packed); } } + + @Benchmark + @Fork(jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" }) + public void transposeHalfBytePanama(Blackhole bh) { + for (int i = 0; i < numVectors; i++) { + BQSpaceUtils.transposeHalfByte(qVectors[i], packed); + bh.consume(packed); + } + } } diff --git a/catalog-info.yaml b/catalog-info.yaml index 0768e1670666f..9e98f56811220 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -301,3 +301,39 @@ spec: Daily: branch: main cronline: "@daily" +--- +# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/e57ee3bed7a6f73077a3f55a38e76e40ec87a7cf/rre.schema.json +apiVersion: backstage.io/v1alpha1 +kind: Resource +metadata: + name: buildkite-pipeline-elasticsearch-pull-request-performance-benchmark + description: Elasticsearch pull request performance benchmark + links: + - title: Pipeline + url: https://buildkite.com/elastic/elasticsearch-pull-request-performance-benchmark +spec: + type: buildkite-pipeline + system: buildkite + owner: group:elasticsearch-team + implementation: + apiVersion: buildkite.elastic.dev/v1 + kind: Pipeline + metadata: + description: ":elasticsearch: Runs performance benchmark in the PR context" + name: elasticsearch / pull-request / performance-benchmark + spec: + repository: elastic/elasticsearch + pipeline_file: .buildkite/pipelines/pull-request/performance-benchmark.yml + env: + ELASTIC_PR_COMMENTS_ENABLED: "true" + ELASTIC_SLACK_NOTIFICATIONS_ENABLED: "true" + SLACK_NOTIFICATIONS_CHANNEL: "#es-perf-build" + SLACK_NOTIFICATIONS_ALL_BRANCHES: "false" # TODO: change to true later + teams: + elasticsearch-team: {} + ml-core: {} + everyone: + access_level: BUILD_AND_READ + provider_settings: + build_pull_requests: true + trigger_mode: none diff --git a/docs/changelog/132675.yaml b/docs/changelog/132675.yaml new file mode 100644 index 0000000000000..a451a27334be7 --- /dev/null +++ b/docs/changelog/132675.yaml @@ -0,0 +1,5 @@ +pr: 132675 +summary: Add second max queue latency stat to `ClusterInfo` +area: Allocation +type: enhancement +issues: [] diff --git a/docs/changelog/132765.yaml b/docs/changelog/132765.yaml new file mode 100644 index 0000000000000..1b019e224c0ae --- /dev/null +++ b/docs/changelog/132765.yaml @@ -0,0 +1,5 @@ +pr: 132765 +summary: Implement `v_magnitude` function +area: ES|QL +type: feature +issues: [132768] diff --git a/docs/changelog/132833.yaml b/docs/changelog/132833.yaml new file mode 100644 index 0000000000000..e8b79035f78db --- /dev/null +++ b/docs/changelog/132833.yaml @@ -0,0 +1,5 @@ +pr: 132833 +summary: Adding simulate ingest effective mapping +area: Ingest Node +type: enhancement +issues: [] diff --git a/docs/changelog/132950.yaml b/docs/changelog/132950.yaml new file mode 100644 index 0000000000000..cba79800d2dfc --- /dev/null +++ b/docs/changelog/132950.yaml @@ -0,0 +1,5 @@ +pr: 132950 +summary: Speed up loading keyword fields with index sorts +area: "ES|QL" +type: enhancement +issues: [] diff --git a/docs/reference/query-languages/esql/_snippets/functions/description/v_magnitude.md b/docs/reference/query-languages/esql/_snippets/functions/description/v_magnitude.md new file mode 100644 index 0000000000000..5b66acddf19c5 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/description/v_magnitude.md @@ -0,0 +1,6 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Description** + +Calculates the magnitude of a dense_vector. + diff --git a/docs/reference/query-languages/esql/_snippets/functions/examples/v_magnitude.md b/docs/reference/query-languages/esql/_snippets/functions/examples/v_magnitude.md new file mode 100644 index 0000000000000..c9bed2cbc864e --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/examples/v_magnitude.md @@ -0,0 +1,24 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Example** + +```esql + from colors + | eval magnitude = v_magnitude(rgb_vector) + | sort magnitude desc, color asc +``` + +| color:text | magnitude:double | +| --- | --- | +| white | 441.6729431152344 | +| snow | 435.9185791015625 | +| azure | 433.1858825683594 | +| ivory | 433.1858825683594 | +| mint cream | 433.0704345703125 | +| sea shell | 426.25579833984375 | +| honeydew | 424.5291442871094 | +| old lace | 420.6352233886719 | +| corn silk | 418.2451477050781 | +| linen | 415.93267822265625 | + + diff --git a/docs/reference/query-languages/esql/_snippets/functions/layout/v_magnitude.md b/docs/reference/query-languages/esql/_snippets/functions/layout/v_magnitude.md new file mode 100644 index 0000000000000..2d2e8ae1fc0e0 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/layout/v_magnitude.md @@ -0,0 +1,27 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +## `V_MAGNITUDE` [esql-v_magnitude] +```{applies_to} +stack: development +serverless: preview +``` + +**Syntax** + +:::{image} ../../../images/functions/v_magnitude.svg +:alt: Embedded +:class: text-center +::: + + +:::{include} ../parameters/v_magnitude.md +::: + +:::{include} ../description/v_magnitude.md +::: + +:::{include} ../types/v_magnitude.md +::: + +:::{include} ../examples/v_magnitude.md +::: diff --git a/docs/reference/query-languages/esql/_snippets/functions/parameters/v_magnitude.md b/docs/reference/query-languages/esql/_snippets/functions/parameters/v_magnitude.md new file mode 100644 index 0000000000000..5a7cf14ed7137 --- /dev/null +++ b/docs/reference/query-languages/esql/_snippets/functions/parameters/v_magnitude.md @@ -0,0 +1,7 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +**Parameters** + +`input` +: dense_vector for which to compute the magnitude + diff --git a/docs/reference/query-languages/esql/images/functions/v_magnitude.svg b/docs/reference/query-languages/esql/images/functions/v_magnitude.svg new file mode 100644 index 0000000000000..7b32eee3f3d65 --- /dev/null +++ b/docs/reference/query-languages/esql/images/functions/v_magnitude.svg @@ -0,0 +1 @@ +V_MAGNITUDE(input) \ No newline at end of file diff --git a/docs/reference/query-languages/esql/kibana/definition/functions/v_magnitude.json b/docs/reference/query-languages/esql/kibana/definition/functions/v_magnitude.json new file mode 100644 index 0000000000000..2835d403e656e --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/definition/functions/v_magnitude.json @@ -0,0 +1,12 @@ +{ + "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it.", + "type" : "scalar", + "name" : "v_magnitude", + "description" : "Calculates the magnitude of a dense_vector.", + "signatures" : [ ], + "examples" : [ + " from colors\n | eval magnitude = v_magnitude(rgb_vector)\n | sort magnitude desc, color asc" + ], + "preview" : true, + "snapshot_only" : true +} diff --git a/docs/reference/query-languages/esql/kibana/docs/functions/v_magnitude.md b/docs/reference/query-languages/esql/kibana/docs/functions/v_magnitude.md new file mode 100644 index 0000000000000..236f4880eda49 --- /dev/null +++ b/docs/reference/query-languages/esql/kibana/docs/functions/v_magnitude.md @@ -0,0 +1,10 @@ +% This is generated by ESQL's AbstractFunctionTestCase. Do not edit it. See ../README.md for how to regenerate it. + +### V MAGNITUDE +Calculates the magnitude of a dense_vector. + +```esql + from colors + | eval magnitude = v_magnitude(rgb_vector) + | sort magnitude desc, color asc +``` diff --git a/libs/exponential-histogram/build.gradle b/libs/exponential-histogram/build.gradle index 08628d0f20875..3b163f42f1015 100644 --- a/libs/exponential-histogram/build.gradle +++ b/libs/exponential-histogram/build.gradle @@ -12,6 +12,9 @@ apply plugin: 'elasticsearch.build' dependencies { + api project(':libs:core') + api "org.apache.lucene:lucene-core:${versions.lucene}" + testImplementation(project(":test:framework")) testImplementation('ch.obermuhlner:big-math:2.3.2') testImplementation('org.apache.commons:commons-math3:3.6.1') diff --git a/libs/exponential-histogram/licenses/lucene-core-LICENSE.txt b/libs/exponential-histogram/licenses/lucene-core-LICENSE.txt new file mode 100644 index 0000000000000..28b134f5f8e4d --- /dev/null +++ b/libs/exponential-histogram/licenses/lucene-core-LICENSE.txt @@ -0,0 +1,475 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + + +Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was +derived from unicode conversion examples available at +http://www.unicode.org/Public/PROGRAMS/CVTUTF. Here is the copyright +from those sources: + +/* + * Copyright 2001-2004 Unicode, Inc. + * + * Disclaimer + * + * This source code is provided as is by Unicode, Inc. No claims are + * made as to fitness for any particular purpose. No warranties of any + * kind are expressed or implied. The recipient agrees to determine + * applicability of information provided. If this file has been + * purchased on magnetic or optical media from Unicode, Inc., the + * sole remedy for any claim will be exchange of defective media + * within 90 days of receipt. + * + * Limitations on Rights to Redistribute This Code + * + * Unicode, Inc. hereby grants the right to freely use the information + * supplied in this file in the creation of products supporting the + * Unicode Standard, and to make copies of this file in any form + * for internal or external distribution as long as this notice + * remains attached. + */ + + +Some code in core/src/java/org/apache/lucene/util/ArrayUtil.java was +derived from Python 2.4.2 sources available at +http://www.python.org. Full license is here: + + http://www.python.org/download/releases/2.4.2/license/ + +Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was +derived from Python 3.1.2 sources available at +http://www.python.org. Full license is here: + + http://www.python.org/download/releases/3.1.2/license/ + +Some code in core/src/java/org/apache/lucene/util/automaton was +derived from Brics automaton sources available at +www.brics.dk/automaton/. Here is the copyright from those sources: + +/* + * Copyright (c) 2001-2009 Anders Moeller + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +The levenshtein automata tables in core/src/java/org/apache/lucene/util/automaton +were automatically generated with the moman/finenight FSA package. +Here is the copyright for those sources: + +# Copyright (c) 2010, Jean-Philippe Barrette-LaPierre, +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. + +Some code in core/src/java/org/apache/lucene/util/UnicodeUtil.java was +derived from ICU (http://www.icu-project.org) +The full license is available here: + http://source.icu-project.org/repos/icu/icu/trunk/license.html + +/* + * Copyright (C) 1999-2010, International Business Machines + * Corporation and others. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, and/or sell copies of the + * Software, and to permit persons to whom the Software is furnished to do so, + * provided that the above copyright notice(s) and this permission notice appear + * in all copies of the Software and that both the above copyright notice(s) and + * this permission notice appear in supporting documentation. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE + * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR + * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization of the + * copyright holder. + */ + +The following license applies to the Snowball stemmers: + +Copyright (c) 2001, Dr Martin Porter +Copyright (c) 2002, Richard Boulton +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holders nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The following license applies to the KStemmer: + +Copyright © 2003, +Center for Intelligent Information Retrieval, +University of Massachusetts, Amherst. +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this +list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. The names "Center for Intelligent Information Retrieval" and +"University of Massachusetts" must not be used to endorse or promote products +derived from this software without prior written permission. To obtain +permission, contact info@ciir.cs.umass.edu. + +THIS SOFTWARE IS PROVIDED BY UNIVERSITY OF MASSACHUSETTS AND OTHER CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +SUCH DAMAGE. + +The following license applies to the Morfologik project: + +Copyright (c) 2006 Dawid Weiss +Copyright (c) 2007-2011 Dawid Weiss, Marcin Miłkowski +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name of Morfologik nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +--- + +The dictionary comes from Morfologik project. Morfologik uses data from +Polish ispell/myspell dictionary hosted at http://www.sjp.pl/slownik/en/ and +is licenced on the terms of (inter alia) LGPL and Creative Commons +ShareAlike. The part-of-speech tags were added in Morfologik project and +are not found in the data from sjp.pl. The tagset is similar to IPI PAN +tagset. + +--- + +The following license applies to the Morfeusz project, +used by org.apache.lucene.analysis.morfologik. + +BSD-licensed dictionary of Polish (SGJP) +http://sgjp.pl/morfeusz/ + +Copyright © 2011 Zygmunt Saloni, Włodzimierz Gruszczyński, + Marcin Woliński, Robert Wołosz + +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the + distribution. + +THIS SOFTWARE IS PROVIDED BY COPYRIGHT HOLDERS “AS IS” AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL COPYRIGHT HOLDERS OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR +BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN +IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/libs/exponential-histogram/licenses/lucene-core-NOTICE.txt b/libs/exponential-histogram/licenses/lucene-core-NOTICE.txt new file mode 100644 index 0000000000000..1a1d51572432a --- /dev/null +++ b/libs/exponential-histogram/licenses/lucene-core-NOTICE.txt @@ -0,0 +1,192 @@ +Apache Lucene +Copyright 2014 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +Includes software from other Apache Software Foundation projects, +including, but not limited to: + - Apache Ant + - Apache Jakarta Regexp + - Apache Commons + - Apache Xerces + +ICU4J, (under analysis/icu) is licensed under an MIT styles license +and Copyright (c) 1995-2008 International Business Machines Corporation and others + +Some data files (under analysis/icu/src/data) are derived from Unicode data such +as the Unicode Character Database. See http://unicode.org/copyright.html for more +details. + +Brics Automaton (under core/src/java/org/apache/lucene/util/automaton) is +BSD-licensed, created by Anders Møller. See http://www.brics.dk/automaton/ + +The levenshtein automata tables (under core/src/java/org/apache/lucene/util/automaton) were +automatically generated with the moman/finenight FSA library, created by +Jean-Philippe Barrette-LaPierre. This library is available under an MIT license, +see http://sites.google.com/site/rrettesite/moman and +http://bitbucket.org/jpbarrette/moman/overview/ + +The class org.apache.lucene.util.WeakIdentityMap was derived from +the Apache CXF project and is Apache License 2.0. + +The Google Code Prettify is Apache License 2.0. +See http://code.google.com/p/google-code-prettify/ + +JUnit (junit-4.10) is licensed under the Common Public License v. 1.0 +See http://junit.sourceforge.net/cpl-v10.html + +This product includes code (JaspellTernarySearchTrie) from Java Spelling Checkin +g Package (jaspell): http://jaspell.sourceforge.net/ +License: The BSD License (http://www.opensource.org/licenses/bsd-license.php) + +The snowball stemmers in + analysis/common/src/java/net/sf/snowball +were developed by Martin Porter and Richard Boulton. +The snowball stopword lists in + analysis/common/src/resources/org/apache/lucene/analysis/snowball +were developed by Martin Porter and Richard Boulton. +The full snowball package is available from + http://snowball.tartarus.org/ + +The KStem stemmer in + analysis/common/src/org/apache/lucene/analysis/en +was developed by Bob Krovetz and Sergio Guzman-Lara (CIIR-UMass Amherst) +under the BSD-license. + +The Arabic,Persian,Romanian,Bulgarian, Hindi and Bengali analyzers (common) come with a default +stopword list that is BSD-licensed created by Jacques Savoy. These files reside in: +analysis/common/src/resources/org/apache/lucene/analysis/ar/stopwords.txt, +analysis/common/src/resources/org/apache/lucene/analysis/fa/stopwords.txt, +analysis/common/src/resources/org/apache/lucene/analysis/ro/stopwords.txt, +analysis/common/src/resources/org/apache/lucene/analysis/bg/stopwords.txt, +analysis/common/src/resources/org/apache/lucene/analysis/hi/stopwords.txt, +analysis/common/src/resources/org/apache/lucene/analysis/bn/stopwords.txt +See http://members.unine.ch/jacques.savoy/clef/index.html. + +The German,Spanish,Finnish,French,Hungarian,Italian,Portuguese,Russian and Swedish light stemmers +(common) are based on BSD-licensed reference implementations created by Jacques Savoy and +Ljiljana Dolamic. These files reside in: +analysis/common/src/java/org/apache/lucene/analysis/de/GermanLightStemmer.java +analysis/common/src/java/org/apache/lucene/analysis/de/GermanMinimalStemmer.java +analysis/common/src/java/org/apache/lucene/analysis/es/SpanishLightStemmer.java +analysis/common/src/java/org/apache/lucene/analysis/fi/FinnishLightStemmer.java +analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchLightStemmer.java +analysis/common/src/java/org/apache/lucene/analysis/fr/FrenchMinimalStemmer.java +analysis/common/src/java/org/apache/lucene/analysis/hu/HungarianLightStemmer.java +analysis/common/src/java/org/apache/lucene/analysis/it/ItalianLightStemmer.java +analysis/common/src/java/org/apache/lucene/analysis/pt/PortugueseLightStemmer.java +analysis/common/src/java/org/apache/lucene/analysis/ru/RussianLightStemmer.java +analysis/common/src/java/org/apache/lucene/analysis/sv/SwedishLightStemmer.java + +The Stempel analyzer (stempel) includes BSD-licensed software developed +by the Egothor project http://egothor.sf.net/, created by Leo Galambos, Martin Kvapil, +and Edmond Nolan. + +The Polish analyzer (stempel) comes with a default +stopword list that is BSD-licensed created by the Carrot2 project. The file resides +in stempel/src/resources/org/apache/lucene/analysis/pl/stopwords.txt. +See http://project.carrot2.org/license.html. + +The SmartChineseAnalyzer source code (smartcn) was +provided by Xiaoping Gao and copyright 2009 by www.imdict.net. + +WordBreakTestUnicode_*.java (under modules/analysis/common/src/test/) +is derived from Unicode data such as the Unicode Character Database. +See http://unicode.org/copyright.html for more details. + +The Morfologik analyzer (morfologik) includes BSD-licensed software +developed by Dawid Weiss and Marcin Miłkowski (http://morfologik.blogspot.com/). + +Morfologik uses data from Polish ispell/myspell dictionary +(http://www.sjp.pl/slownik/en/) licenced on the terms of (inter alia) +LGPL and Creative Commons ShareAlike. + +Morfologic includes data from BSD-licensed dictionary of Polish (SGJP) +(http://sgjp.pl/morfeusz/) + +Servlet-api.jar and javax.servlet-*.jar are under the CDDL license, the original +source code for this can be found at http://www.eclipse.org/jetty/downloads.php + +=========================================================================== +Kuromoji Japanese Morphological Analyzer - Apache Lucene Integration +=========================================================================== + +This software includes a binary and/or source version of data from + + mecab-ipadic-2.7.0-20070801 + +which can be obtained from + + http://atilika.com/releases/mecab-ipadic/mecab-ipadic-2.7.0-20070801.tar.gz + +or + + http://jaist.dl.sourceforge.net/project/mecab/mecab-ipadic/2.7.0-20070801/mecab-ipadic-2.7.0-20070801.tar.gz + +=========================================================================== +mecab-ipadic-2.7.0-20070801 Notice +=========================================================================== + +Nara Institute of Science and Technology (NAIST), +the copyright holders, disclaims all warranties with regard to this +software, including all implied warranties of merchantability and +fitness, in no event shall NAIST be liable for +any special, indirect or consequential damages or any damages +whatsoever resulting from loss of use, data or profits, whether in an +action of contract, negligence or other tortuous action, arising out +of or in connection with the use or performance of this software. + +A large portion of the dictionary entries +originate from ICOT Free Software. The following conditions for ICOT +Free Software applies to the current dictionary as well. + +Each User may also freely distribute the Program, whether in its +original form or modified, to any third party or parties, PROVIDED +that the provisions of Section 3 ("NO WARRANTY") will ALWAYS appear +on, or be attached to, the Program, which is distributed substantially +in the same form as set out herein and that such intended +distribution, if actually made, will neither violate or otherwise +contravene any of the laws and regulations of the countries having +jurisdiction over the User or the intended distribution itself. + +NO WARRANTY + +The program was produced on an experimental basis in the course of the +research and development conducted during the project and is provided +to users as so produced on an experimental basis. Accordingly, the +program is provided without any warranty whatsoever, whether express, +implied, statutory or otherwise. The term "warranty" used herein +includes, but is not limited to, any warranty of the quality, +performance, merchantability and fitness for a particular purpose of +the program and the nonexistence of any infringement or violation of +any right of any third party. + +Each user of the program will agree and understand, and be deemed to +have agreed and understood, that there is no warranty whatsoever for +the program and, accordingly, the entire risk arising from or +otherwise connected with the program is assumed by the user. + +Therefore, neither ICOT, the copyright holder, or any other +organization that participated in or was otherwise related to the +development of the program and their respective officials, directors, +officers and other employees shall be held liable for any and all +damages, including, without limitation, general, special, incidental +and consequential damages, arising out of or otherwise in connection +with the use or inability to use the program or any product, material +or result produced or otherwise obtained by using the program, +regardless of whether they have been advised of, or otherwise had +knowledge of, the possibility of such damages at any time during the +project or thereafter. Each user will be deemed to have agreed to the +foregoing by his or her commencement of use of the program. The term +"use" as used herein includes, but is not limited to, the use, +modification, copying and distribution of the program and the +production of secondary products from the program. + +In the case where the program, whether in its original form or +modified, was distributed or delivered to or received by a user from +any person, organization or entity other than ICOT, unless it makes or +grants independently of ICOT any specific warranty to the user in +writing, such person, organization or entity, will also be exempted +from and not be held liable to the user for any such damages as noted +above as far as the program is concerned. diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketArrayIterator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketArrayIterator.java new file mode 100644 index 0000000000000..8ddd259bea739 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/BucketArrayIterator.java @@ -0,0 +1,79 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +class BucketArrayIterator implements CopyableBucketIterator { + + private final int scale; + private final long[] bucketCounts; + private final long[] bucketIndices; + + private int currentSlot; + private final int limit; + + BucketArrayIterator(int scale, long[] bucketCounts, long[] bucketIndices, int startSlot, int limit) { + this.scale = scale; + this.bucketCounts = bucketCounts; + this.bucketIndices = bucketIndices; + this.currentSlot = startSlot; + this.limit = limit; + } + + @Override + public boolean hasNext() { + return currentSlot < limit; + } + + @Override + public long peekCount() { + ensureEndNotReached(); + return bucketCounts[currentSlot]; + } + + @Override + public long peekIndex() { + ensureEndNotReached(); + return bucketIndices[currentSlot]; + } + + @Override + public void advance() { + ensureEndNotReached(); + currentSlot++; + } + + @Override + public int scale() { + return scale; + } + + @Override + public CopyableBucketIterator copy() { + return new BucketArrayIterator(scale, bucketCounts, bucketIndices, currentSlot, limit); + } + + private void ensureEndNotReached() { + if (hasNext() == false) { + throw new IllegalStateException("Iterator has no more buckets"); + } + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java index 30926ed5865b4..4cf8f6f89d18f 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/DownscaleStats.java @@ -21,6 +21,8 @@ package org.elasticsearch.exponentialhistogram; +import org.apache.lucene.util.RamUsageEstimator; + import java.util.Arrays; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; @@ -33,6 +35,8 @@ */ class DownscaleStats { + static final long SIZE = RamUsageEstimator.shallowSizeOf(DownscaleStats.class) + RamEstimationUtil.estimateIntArray(MAX_INDEX_BITS); + // collapsedBucketCount[i] stores the number of additional // collapsed buckets when increasing the scale by (i+1) instead of just by (i) int[] collapsedBucketCount = new int[MAX_INDEX_BITS]; diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java new file mode 100644 index 0000000000000..f2d21f0861065 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/EmptyExponentialHistogram.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +import java.util.OptionalLong; + +class EmptyExponentialHistogram implements ReleasableExponentialHistogram { + + static final EmptyExponentialHistogram INSTANCE = new EmptyExponentialHistogram(); + + /** + * The default empty histogram always has MAX_SCALE to not cause accidental downscaling + * when combining with other histograms. + */ + private static final int SCALE = ExponentialHistogram.MAX_SCALE; + + private static class EmptyBuckets implements Buckets { + + private static final EmptyBuckets INSTANCE = new EmptyBuckets(); + private static final CopyableBucketIterator EMPTY_ITERATOR = new BucketArrayIterator(SCALE, new long[0], new long[0], 0, 0); + + @Override + public CopyableBucketIterator iterator() { + return EMPTY_ITERATOR; + } + + @Override + public OptionalLong maxBucketIndex() { + return OptionalLong.empty(); + } + + @Override + public long valueCount() { + return 0; + } + } + + @Override + public void close() {} + + @Override + public int scale() { + return SCALE; + } + + @Override + public ZeroBucket zeroBucket() { + return ZeroBucket.minimalEmpty(); + } + + @Override + public Buckets positiveBuckets() { + return EmptyBuckets.INSTANCE; + } + + @Override + public Buckets negativeBuckets() { + return EmptyBuckets.INSTANCE; + } + + @Override + public long ramBytesUsed() { + return 0; + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java index 38583d05c8d6f..f4603f3fe679c 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogram.java @@ -21,6 +21,8 @@ package org.elasticsearch.exponentialhistogram; +import org.apache.lucene.util.Accountable; + import java.util.Iterator; import java.util.List; import java.util.OptionalLong; @@ -43,7 +45,7 @@ * Additionally, all algorithms assume that samples within a bucket are located at a single point: the point of least relative error * (see {@link ExponentialScaleUtils#getPointOfLeastRelativeError(long, int)}). */ -public interface ExponentialHistogram { +public interface ExponentialHistogram extends Accountable { // TODO(b/128622): support min/max/sum/count storage and merging. // TODO(b/128622): Add special positive and negative infinity buckets @@ -114,47 +116,64 @@ interface Buckets { } + static ExponentialHistogram empty() { + return EmptyExponentialHistogram.INSTANCE; + } + /** * Creates a histogram representing the distribution of the given values with at most the given number of buckets. * If the given {@code maxBucketCount} is greater than or equal to the number of values, the resulting histogram will have a * relative error of less than {@code 2^(2^-MAX_SCALE) - 1}. * * @param maxBucketCount the maximum number of buckets + * @param breaker the circuit breaker to use to limit memory allocations * @param values the values to be added to the histogram - * @return a new {@link ExponentialHistogram} + * @return a new {@link ReleasableExponentialHistogram} */ - static ExponentialHistogram create(int maxBucketCount, double... values) { - ExponentialHistogramGenerator generator = new ExponentialHistogramGenerator(maxBucketCount); - for (double val : values) { - generator.add(val); + static ReleasableExponentialHistogram create(int maxBucketCount, ExponentialHistogramCircuitBreaker breaker, double... values) { + try (ExponentialHistogramGenerator generator = ExponentialHistogramGenerator.create(maxBucketCount, breaker)) { + for (double val : values) { + generator.add(val); + } + return generator.getAndClear(); } - return generator.get(); } /** * Merges the provided exponential histograms to a new, single histogram with at most the given amount of buckets. * * @param maxBucketCount the maximum number of buckets the result histogram is allowed to have - * @param histograms teh histograms to merge + * @param breaker the circuit breaker to use to limit memory allocations + * @param histograms the histograms to merge * @return the merged histogram */ - static ExponentialHistogram merge(int maxBucketCount, Iterator histograms) { - ExponentialHistogramMerger merger = new ExponentialHistogramMerger(maxBucketCount); - while (histograms.hasNext()) { - merger.add(histograms.next()); + static ReleasableExponentialHistogram merge( + int maxBucketCount, + ExponentialHistogramCircuitBreaker breaker, + Iterator histograms + ) { + try (ExponentialHistogramMerger merger = ExponentialHistogramMerger.create(maxBucketCount, breaker)) { + while (histograms.hasNext()) { + merger.add(histograms.next()); + } + return merger.getAndClear(); } - return merger.get(); } /** * Merges the provided exponential histograms to a new, single histogram with at most the given amount of buckets. * * @param maxBucketCount the maximum number of buckets the result histogram is allowed to have - * @param histograms teh histograms to merge + * @param breaker the circuit breaker to use to limit memory allocations + * @param histograms the histograms to merge * @return the merged histogram */ - static ExponentialHistogram merge(int maxBucketCount, ExponentialHistogram... histograms) { - return merge(maxBucketCount, List.of(histograms).iterator()); + static ReleasableExponentialHistogram merge( + int maxBucketCount, + ExponentialHistogramCircuitBreaker breaker, + ExponentialHistogram... histograms + ) { + return merge(maxBucketCount, breaker, List.of(histograms).iterator()); } } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramCircuitBreaker.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramCircuitBreaker.java new file mode 100644 index 0000000000000..8bf21fd9e57f7 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramCircuitBreaker.java @@ -0,0 +1,41 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +/** + * Interface for a memory-allocation circuit breaker used for {@link ReleasableExponentialHistogram}s. + */ +public interface ExponentialHistogramCircuitBreaker { + + /** + * Adjusts the circuit breaker, potentially throwing an exception if the limit is exceeded. + * Guaranteed to never cause an exception when called with a negative number to reduce the breaker count. + * + * @param bytesAllocated the number of bytes allocated, or a negative value if deallocated + */ + void adjustBreaker(long bytesAllocated); + + static ExponentialHistogramCircuitBreaker noop() { + return bytesAllocated -> {}; + } + +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java index 13f2fa4215a36..ef4e7f57dba44 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGenerator.java @@ -21,6 +21,11 @@ package org.elasticsearch.exponentialhistogram; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.core.Releasable; +import org.elasticsearch.core.Releasables; + import java.util.Arrays; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.computeIndex; @@ -32,42 +37,64 @@ * If the number of values is less than or equal to the bucket capacity, the resulting histogram is guaranteed * to represent the exact raw values with a relative error less than {@code 2^(2^-MAX_SCALE) - 1}. */ -public class ExponentialHistogramGenerator { +public class ExponentialHistogramGenerator implements Accountable, Releasable { + + private static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ExponentialHistogramGenerator.class); // Merging individual values into a histogram would be way too slow with our sparse, array-backed histogram representation. // Therefore, for a bucket capacity of c, we first buffer c raw values to be inserted. // We then turn those into an "exact" histogram, which in turn we merge with our actual result accumulator. // This yields an amortized runtime of O(log(c)). private final double[] rawValueBuffer; - int valueCount; + private int valueCount; private final ExponentialHistogramMerger resultMerger; private final FixedCapacityExponentialHistogram valueBuffer; - private boolean isFinished = false; + private final ExponentialHistogramCircuitBreaker circuitBreaker; + private boolean closed = false; /** * Creates a new instance with the specified maximum number of buckets. * * @param maxBucketCount the maximum number of buckets for the generated histogram + * @param circuitBreaker the circuit breaker to use to limit memory allocations */ - public ExponentialHistogramGenerator(int maxBucketCount) { + public static ExponentialHistogramGenerator create(int maxBucketCount, ExponentialHistogramCircuitBreaker circuitBreaker) { + long size = estimateBaseSize(maxBucketCount); + circuitBreaker.adjustBreaker(size); + try { + return new ExponentialHistogramGenerator(maxBucketCount, circuitBreaker); + } catch (RuntimeException e) { + circuitBreaker.adjustBreaker(-size); + throw e; + } + } + + private ExponentialHistogramGenerator(int maxBucketCount, ExponentialHistogramCircuitBreaker circuitBreaker) { + this.circuitBreaker = circuitBreaker; rawValueBuffer = new double[maxBucketCount]; valueCount = 0; - valueBuffer = new FixedCapacityExponentialHistogram(maxBucketCount); - resultMerger = new ExponentialHistogramMerger(maxBucketCount); + FixedCapacityExponentialHistogram buffer = null; + ExponentialHistogramMerger merger = null; + try { + buffer = FixedCapacityExponentialHistogram.create(maxBucketCount, circuitBreaker); + merger = ExponentialHistogramMerger.create(maxBucketCount, circuitBreaker); + } catch (RuntimeException e) { + Releasables.close(buffer, merger); + throw e; + } + this.valueBuffer = buffer; + this.resultMerger = merger; } /** * Adds the given value to the histogram. - * Must not be called after {@link #get()} has been called. * * @param value the value to add */ public void add(double value) { - if (isFinished) { - throw new IllegalStateException("get() has already been called"); - } + assert closed == false : "ExponentialHistogramGenerator has already been closed"; if (valueCount == rawValueBuffer.length) { mergeValuesToHistogram(); } @@ -80,10 +107,9 @@ public void add(double value) { * * @return the histogram representing the distribution of all accumulated values */ - public ExponentialHistogram get() { - isFinished = true; + public ReleasableExponentialHistogram getAndClear() { mergeValuesToHistogram(); - return resultMerger.get(); + return resultMerger.getAndClear(); } private void mergeValuesToHistogram() { @@ -135,4 +161,24 @@ private void mergeValuesToHistogram() { valueCount = 0; } + private static long estimateBaseSize(int numBuckets) { + return SHALLOW_SIZE + RamEstimationUtil.estimateDoubleArray(numBuckets); + }; + + @Override + public long ramBytesUsed() { + return estimateBaseSize(rawValueBuffer.length) + resultMerger.ramBytesUsed() + valueBuffer.ramBytesUsed(); + } + + @Override + public void close() { + if (closed) { + assert false : "ExponentialHistogramGenerator closed multiple times"; + } else { + closed = true; + resultMerger.close(); + valueBuffer.close(); + circuitBreaker.adjustBreaker(-estimateBaseSize(rawValueBuffer.length)); + } + } } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java index b00ad053837d9..a92fb376abc9a 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMerger.java @@ -21,6 +21,11 @@ package org.elasticsearch.exponentialhistogram; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.RamUsageEstimator; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Releasable; + import java.util.OptionalLong; import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.getMaximumScaleIncrease; @@ -29,59 +34,93 @@ * Allows accumulating multiple {@link ExponentialHistogram} into a single one * while keeping the bucket count in the result below a given limit. */ -public class ExponentialHistogramMerger { +public class ExponentialHistogramMerger implements Accountable, Releasable { + + private static final long BASE_SIZE = RamUsageEstimator.shallowSizeOfInstance(ExponentialHistogramMerger.class) + DownscaleStats.SIZE; // Our algorithm is not in-place, therefore we use two histograms and ping-pong between them + @Nullable private FixedCapacityExponentialHistogram result; + @Nullable private FixedCapacityExponentialHistogram buffer; + private final int bucketLimit; + private final int maxScale; + private final DownscaleStats downscaleStats; - private boolean isFinished; + private final ExponentialHistogramCircuitBreaker circuitBreaker; + private boolean closed = false; /** * Creates a new instance with the specified bucket limit. * * @param bucketLimit the maximum number of buckets the result histogram is allowed to have + * @param circuitBreaker the circuit breaker to use to limit memory allocations */ - public ExponentialHistogramMerger(int bucketLimit) { - downscaleStats = new DownscaleStats(); - result = new FixedCapacityExponentialHistogram(bucketLimit); - buffer = new FixedCapacityExponentialHistogram(bucketLimit); + public static ExponentialHistogramMerger create(int bucketLimit, ExponentialHistogramCircuitBreaker circuitBreaker) { + circuitBreaker.adjustBreaker(BASE_SIZE); + return new ExponentialHistogramMerger(bucketLimit, circuitBreaker); + } + + private ExponentialHistogramMerger(int bucketLimit, ExponentialHistogramCircuitBreaker circuitBreaker) { + this(bucketLimit, ExponentialHistogram.MAX_SCALE, circuitBreaker); } // Only intended for testing, using this in production means an unnecessary reduction of precision - private ExponentialHistogramMerger(int bucketLimit, int minScale) { - this(bucketLimit); - result.resetBuckets(minScale); - buffer.resetBuckets(minScale); + private ExponentialHistogramMerger(int bucketLimit, int maxScale, ExponentialHistogramCircuitBreaker circuitBreaker) { + this.bucketLimit = bucketLimit; + this.maxScale = maxScale; + this.circuitBreaker = circuitBreaker; + downscaleStats = new DownscaleStats(); } - static ExponentialHistogramMerger createForTesting(int bucketLimit, int minScale) { - return new ExponentialHistogramMerger(bucketLimit, minScale); + static ExponentialHistogramMerger createForTesting(int bucketLimit, int maxScale, ExponentialHistogramCircuitBreaker circuitBreaker) { + circuitBreaker.adjustBreaker(BASE_SIZE); + return new ExponentialHistogramMerger(bucketLimit, maxScale, circuitBreaker); } - /** - * Merges the given histogram into the current result. - * Must not be called after {@link #get()} has been called. - * - * @param toAdd the histogram to merge - */ - public void add(ExponentialHistogram toAdd) { - if (isFinished) { - throw new IllegalStateException("get() has already been called"); + @Override + public void close() { + if (closed) { + assert false : "ExponentialHistogramMerger closed multiple times"; + } else { + closed = true; + if (result != null) { + result.close(); + result = null; + } + if (buffer != null) { + buffer.close(); + buffer = null; + } + circuitBreaker.adjustBreaker(-BASE_SIZE); } - doMerge(toAdd); + } + + @Override + public long ramBytesUsed() { + long size = BASE_SIZE; + if (result != null) { + size += result.ramBytesUsed(); + } + if (buffer != null) { + size += buffer.ramBytesUsed(); + } + return size; } /** - * Returns the merged histogram. + * Returns the merged histogram and clears this merger. + * The caller takes ownership of the returned histogram and must ensure that {@link #close()} is called. * * @return the merged histogram */ - public ExponentialHistogram get() { - isFinished = true; - return result; + public ReleasableExponentialHistogram getAndClear() { + assert closed == false : "ExponentialHistogramMerger already closed"; + ReleasableExponentialHistogram retVal = (result == null) ? ReleasableExponentialHistogram.empty() : result; + result = null; + return retVal; } // TODO(b/128622): this algorithm is very efficient if b has roughly as many buckets as a @@ -90,9 +129,14 @@ public ExponentialHistogram get() { // then in O(log(n)) turn them into a single, merged histogram. // (n is the number of buffered buckets) - private void doMerge(ExponentialHistogram b) { - - ExponentialHistogram a = result; + /** + * Merges the given histogram into the current result. + * + * @param toAdd the histogram to merge + */ + public void add(ExponentialHistogram toAdd) { + ExponentialHistogram a = result == null ? ExponentialHistogram.empty() : result; + ExponentialHistogram b = toAdd; CopyableBucketIterator posBucketsA = a.positiveBuckets().iterator(); CopyableBucketIterator negBucketsA = a.negativeBuckets().iterator(); @@ -102,12 +146,15 @@ private void doMerge(ExponentialHistogram b) { ZeroBucket zeroBucket = a.zeroBucket().merge(b.zeroBucket()); zeroBucket = zeroBucket.collapseOverlappingBucketsForAll(posBucketsA, negBucketsA, posBucketsB, negBucketsB); + if (buffer == null) { + buffer = FixedCapacityExponentialHistogram.create(bucketLimit, circuitBreaker); + } buffer.setZeroBucket(zeroBucket); // We attempt to bring everything to the scale of A. // This might involve increasing the scale for B, which would increase its indices. // We need to ensure that we do not exceed MAX_INDEX / MIN_INDEX in this case. - int targetScale = a.scale(); + int targetScale = Math.min(maxScale, a.scale()); if (targetScale > b.scale()) { if (negBucketsB.hasNext()) { long smallestIndex = negBucketsB.peekIndex(); diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java index 5dde66d4a46ec..255206cad8e81 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogram.java @@ -21,6 +21,8 @@ package org.elasticsearch.exponentialhistogram; +import org.apache.lucene.util.RamUsageEstimator; + import java.util.OptionalLong; /** @@ -29,7 +31,10 @@ * Consumers must ensure that if the histogram is mutated, all previously acquired {@link BucketIterator} * instances are no longer used. */ -final class FixedCapacityExponentialHistogram implements ExponentialHistogram { +final class FixedCapacityExponentialHistogram implements ReleasableExponentialHistogram { + + static final long BASE_SIZE = RamUsageEstimator.shallowSizeOfInstance(FixedCapacityExponentialHistogram.class) + ZeroBucket.SHALLOW_SIZE + + 2 * Buckets.SHALLOW_SIZE; // These arrays represent both the positive and the negative buckets. // To avoid confusion, we refer to positions within the array as "slots" instead of indices in this file @@ -48,13 +53,22 @@ final class FixedCapacityExponentialHistogram implements ExponentialHistogram { private final Buckets positiveBuckets = new Buckets(true); + private final ExponentialHistogramCircuitBreaker circuitBreaker; + private boolean closed = false; + + static FixedCapacityExponentialHistogram create(int bucketCapacity, ExponentialHistogramCircuitBreaker circuitBreaker) { + circuitBreaker.adjustBreaker(estimateSize(bucketCapacity)); + return new FixedCapacityExponentialHistogram(bucketCapacity, circuitBreaker); + } + /** * Creates an empty histogram with the given capacity and a {@link ZeroBucket#minimalEmpty()} zero bucket. * The scale is initialized to the maximum possible precision ({@link #MAX_SCALE}). * * @param bucketCapacity the maximum total number of positive and negative buckets this histogram can hold. */ - FixedCapacityExponentialHistogram(int bucketCapacity) { + private FixedCapacityExponentialHistogram(int bucketCapacity, ExponentialHistogramCircuitBreaker circuitBreaker) { + this.circuitBreaker = circuitBreaker; bucketIndices = new long[bucketCapacity]; bucketCounts = new long[bucketCapacity]; reset(); @@ -142,8 +156,29 @@ public ExponentialHistogram.Buckets positiveBuckets() { return positiveBuckets; } + @Override + public void close() { + if (closed) { + assert false : "FixedCapacityExponentialHistogram closed multiple times"; + } else { + closed = true; + circuitBreaker.adjustBreaker(-ramBytesUsed()); + } + } + + static long estimateSize(int bucketCapacity) { + return BASE_SIZE + 2 * RamEstimationUtil.estimateLongArray(bucketCapacity); + } + + @Override + public long ramBytesUsed() { + return estimateSize(bucketIndices.length); + } + private class Buckets implements ExponentialHistogram.Buckets { + static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(Buckets.class); + private final boolean isPositive; private int numBuckets; private int cachedValueSumForNumBuckets; @@ -186,7 +221,7 @@ boolean tryAddBucket(long index, long count) { @Override public CopyableBucketIterator iterator() { int start = startSlot(); - return new BucketArrayIterator(start, start + numBuckets); + return new BucketArrayIterator(bucketScale, bucketCounts, bucketIndices, start, start + numBuckets); } @Override @@ -209,53 +244,4 @@ public long valueCount() { } } - private class BucketArrayIterator implements CopyableBucketIterator { - - int currentSlot; - final int limit; - - private BucketArrayIterator(int startSlot, int limit) { - this.currentSlot = startSlot; - this.limit = limit; - } - - @Override - public boolean hasNext() { - return currentSlot < limit; - } - - @Override - public long peekCount() { - ensureEndNotReached(); - return bucketCounts[currentSlot]; - } - - @Override - public long peekIndex() { - ensureEndNotReached(); - return bucketIndices[currentSlot]; - } - - @Override - public void advance() { - ensureEndNotReached(); - currentSlot++; - } - - @Override - public int scale() { - return FixedCapacityExponentialHistogram.this.scale(); - } - - @Override - public CopyableBucketIterator copy() { - return new BucketArrayIterator(currentSlot, limit); - } - - private void ensureEndNotReached() { - if (hasNext() == false) { - throw new IllegalStateException("Iterator has no more buckets"); - } - } - } } diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/RamEstimationUtil.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/RamEstimationUtil.java new file mode 100644 index 0000000000000..25f5eb69d31f4 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/RamEstimationUtil.java @@ -0,0 +1,44 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +import org.apache.lucene.util.RamUsageEstimator; + +class RamEstimationUtil { + + private static long estimatedArraySize(int arrayLength, int bytesPerElement) { + return RamUsageEstimator.alignObjectSize(RamUsageEstimator.NUM_BYTES_ARRAY_HEADER + ((long) arrayLength) * bytesPerElement); + } + + static long estimateLongArray(int length) { + return estimatedArraySize(length, Long.BYTES); + } + + static long estimateIntArray(int length) { + return estimatedArraySize(length, Integer.BYTES); + } + + static long estimateDoubleArray(int length) { + return estimatedArraySize(length, Double.BYTES); + } + +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ReleasableExponentialHistogram.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ReleasableExponentialHistogram.java new file mode 100644 index 0000000000000..7bf1ad5e394e6 --- /dev/null +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ReleasableExponentialHistogram.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +import org.elasticsearch.core.Releasable; + +/** + * A histogram which participates in the {@link ExponentialHistogramCircuitBreaker} and therefore requires proper releasing. + */ +public interface ReleasableExponentialHistogram extends ExponentialHistogram, Releasable { + + /** + * @return an empty singleton, which does not allocate any memory and therefore {@link #close()} is a no-op. + */ + static ReleasableExponentialHistogram empty() { + return EmptyExponentialHistogram.INSTANCE; + } +} diff --git a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java index 9ea75f3a82c27..1341f283c5487 100644 --- a/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java +++ b/libs/exponential-histogram/src/main/java/org/elasticsearch/exponentialhistogram/ZeroBucket.java @@ -21,6 +21,8 @@ package org.elasticsearch.exponentialhistogram; +import org.apache.lucene.util.RamUsageEstimator; + import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_SCALE; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_SCALE; @@ -41,6 +43,8 @@ */ public record ZeroBucket(long index, int scale, long count) { + public static final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(ZeroBucket.class); + // A singleton for an empty zero bucket with the smallest possible threshold. private static final ZeroBucket MINIMAL_EMPTY = new ZeroBucket(MIN_INDEX, MIN_SCALE, 0); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java index 63f0b8a301cfe..d32e1d3a43c9a 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/DownscaleStatsTests.java @@ -21,8 +21,6 @@ package org.elasticsearch.exponentialhistogram; -import org.elasticsearch.test.ESTestCase; - import java.util.ArrayList; import java.util.Collection; import java.util.HashSet; @@ -33,10 +31,9 @@ import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MAX_INDEX_BITS; import static org.elasticsearch.exponentialhistogram.ExponentialHistogram.MIN_INDEX; -import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; -public class DownscaleStatsTests extends ESTestCase { +public class DownscaleStatsTests extends ExponentialHistogramTestCase { public void testExponential() { long[] values = IntStream.range(0, 100).mapToLong(i -> (long) Math.min(MAX_INDEX, Math.pow(1.1, i))).distinct().toArray(); diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java index 337cfbd69033e..aa9746e22d54c 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramGeneratorTests.java @@ -21,17 +21,19 @@ package org.elasticsearch.exponentialhistogram; -import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.unit.ByteSizeValue; +import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.lessThanOrEqualTo; -public class ExponentialHistogramGeneratorTests extends ESTestCase { +public class ExponentialHistogramGeneratorTests extends ExponentialHistogramTestCase { public void testVeryLargeValue() { double value = Double.MAX_VALUE / 10; - ExponentialHistogram histo = ExponentialHistogram.create(1, value); - + ExponentialHistogram histo = createAutoReleasedHistogram(1, value); long index = histo.positiveBuckets().iterator().peekIndex(); int scale = histo.scale(); @@ -42,4 +44,45 @@ public void testVeryLargeValue() { assertThat("Upper bucket boundary should be greater than value", upperBound, greaterThanOrEqualTo(value)); } + public void testCircuitBreakerTripDuringConstruction() { + for (int allowedAllocations = 0; allowedAllocations < 5; allowedAllocations++) { + TrippingCircuitBreaker breaker = new TrippingCircuitBreaker(allowedAllocations); + + try (ReleasableExponentialHistogram histo = ExponentialHistogram.create(100, breaker, 1.0, 2.0, 3.0)) { + assertThat(breaker.getUsed(), greaterThan(0L)); + assertThat(breaker.getUsed(), equalTo(histo.ramBytesUsed())); + } catch (DummyCircuitBreakerTripException dummyTrip) {} + + assertThat(breaker.getUsed(), equalTo(0L)); + } + } + + private static class DummyCircuitBreakerTripException extends RuntimeException {} + + static class TrippingCircuitBreaker implements ExponentialHistogramCircuitBreaker { + + private final CircuitBreaker esBreaker = newLimitedBreaker(ByteSizeValue.ofMb(100)); + private int allocationsLeftUntilTrip; + + TrippingCircuitBreaker(int allocationsUntilTrip) { + this.allocationsLeftUntilTrip = allocationsUntilTrip; + } + + @Override + public void adjustBreaker(long bytesAllocated) { + if (bytesAllocated > 0) { + if (allocationsLeftUntilTrip > 0) { + allocationsLeftUntilTrip--; + } else { + throw new DummyCircuitBreakerTripException(); + } + } + esBreaker.addWithoutBreaking(bytesAllocated); + } + + public long getUsed() { + return esBreaker.getUsed(); + } + } + } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java index 9d46798d1a627..5113b73653641 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramMergerTests.java @@ -21,7 +21,8 @@ package org.elasticsearch.exponentialhistogram; -import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.unit.ByteSizeValue; import java.util.ArrayList; import java.util.Arrays; @@ -35,14 +36,16 @@ import static org.elasticsearch.exponentialhistogram.ExponentialScaleUtils.adjustScale; import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; -public class ExponentialHistogramMergerTests extends ESTestCase { +public class ExponentialHistogramMergerTests extends ExponentialHistogramTestCase { public void testZeroThresholdCollapsesOverlappingBuckets() { - FixedCapacityExponentialHistogram first = new FixedCapacityExponentialHistogram(100); + + FixedCapacityExponentialHistogram first = createAutoReleasedHistogram(100); first.setZeroBucket(new ZeroBucket(2.0001, 10)); - FixedCapacityExponentialHistogram second = new FixedCapacityExponentialHistogram(100); + FixedCapacityExponentialHistogram second = createAutoReleasedHistogram(100); first.resetBuckets(0); // scale 0 means base 2 first.tryAddBucket(0, 1, false); // bucket (-2, 1] first.tryAddBucket(1, 1, false); // bucket (-4, 2] @@ -72,7 +75,7 @@ public void testZeroThresholdCollapsesOverlappingBuckets() { assertThat(posBuckets.hasNext(), equalTo(false)); // ensure buckets of the accumulated histogram are collapsed too if needed - FixedCapacityExponentialHistogram third = new FixedCapacityExponentialHistogram(100); + FixedCapacityExponentialHistogram third = createAutoReleasedHistogram(100); third.setZeroBucket(new ZeroBucket(45.0, 1)); mergeResult = mergeWithMinimumScale(100, 0, mergeResult, third); @@ -83,12 +86,12 @@ public void testZeroThresholdCollapsesOverlappingBuckets() { } public void testEmptyZeroBucketIgnored() { - FixedCapacityExponentialHistogram first = new FixedCapacityExponentialHistogram(100); + FixedCapacityExponentialHistogram first = createAutoReleasedHistogram(100); first.setZeroBucket(new ZeroBucket(2.0, 10)); first.resetBuckets(0); // scale 0 means base 2 first.tryAddBucket(2, 42L, true); // bucket (4, 8] - FixedCapacityExponentialHistogram second = new FixedCapacityExponentialHistogram(100); + FixedCapacityExponentialHistogram second = createAutoReleasedHistogram(100); second.setZeroBucket(new ZeroBucket(100.0, 0)); ExponentialHistogram mergeResult = mergeWithMinimumScale(100, 0, first, second); @@ -109,22 +112,20 @@ public void testUpscalingDoesNotExceedIndexLimits() { boolean isPositive = i % 2 == 0; boolean useMinIndex = i > 1; - FixedCapacityExponentialHistogram histo = new FixedCapacityExponentialHistogram(2); + FixedCapacityExponentialHistogram histo = createAutoReleasedHistogram(2); histo.resetBuckets(20); long index = useMinIndex ? MIN_INDEX / 2 : MAX_INDEX / 2; histo.tryAddBucket(index, 1, isPositive); - ExponentialHistogramMerger merger = new ExponentialHistogramMerger(100); - merger.add(histo); - ExponentialHistogram result = merger.get(); - - assertThat(result.scale(), equalTo(21)); - if (isPositive) { - assertThat(result.positiveBuckets().iterator().peekIndex(), equalTo(adjustScale(index, 20, 1))); - } else { - assertThat(result.negativeBuckets().iterator().peekIndex(), equalTo(adjustScale(index, 20, 1))); + try (ReleasableExponentialHistogram result = ExponentialHistogram.merge(100, breaker(), histo)) { + assertThat(result.scale(), equalTo(21)); + if (isPositive) { + assertThat(result.positiveBuckets().iterator().peekIndex(), equalTo(adjustScale(index, 20, 1))); + } else { + assertThat(result.negativeBuckets().iterator().peekIndex(), equalTo(adjustScale(index, 20, 1))); + } } } } @@ -138,17 +139,54 @@ public void testMergeOrderIndependence() { .boxed() .collect(Collectors.toCollection(ArrayList::new)); - ExponentialHistogram reference = ExponentialHistogram.create(20, values.stream().mapToDouble(Double::doubleValue).toArray()); + ReleasableExponentialHistogram reference = ExponentialHistogram.create( + 20, + breaker(), + values.stream().mapToDouble(Double::doubleValue).toArray() + ); + autoReleaseOnTestEnd(reference); for (int i = 0; i < 100; i++) { Collections.shuffle(values, random()); - ExponentialHistogram shuffled = ExponentialHistogram.create(20, values.stream().mapToDouble(Double::doubleValue).toArray()); + double[] vals = values.stream().mapToDouble(Double::doubleValue).toArray(); + try (ReleasableExponentialHistogram shuffled = ExponentialHistogram.create(20, breaker(), vals)) { + assertThat("Expected same scale", shuffled.scale(), equalTo(reference.scale())); + assertThat("Expected same zero-bucket", shuffled.zeroBucket(), equalTo(reference.zeroBucket())); + assertBucketsEqual(shuffled.negativeBuckets(), reference.negativeBuckets()); + assertBucketsEqual(shuffled.positiveBuckets(), reference.positiveBuckets()); + } + } + } + + public void testMemoryAccounting() { + CircuitBreaker esBreaker = newLimitedBreaker(ByteSizeValue.ofMb(100)); + try (ExponentialHistogramMerger merger = ExponentialHistogramMerger.create(100, breaker(esBreaker))) { + + long emptyMergerSize = merger.ramBytesUsed(); + assertThat(emptyMergerSize, greaterThan(0L)); + assertThat(esBreaker.getUsed(), equalTo(emptyMergerSize)); - assertThat("Expected same scale", shuffled.scale(), equalTo(reference.scale())); - assertThat("Expected same zero-bucket", shuffled.zeroBucket(), equalTo(reference.zeroBucket())); - assertBucketsEqual(shuffled.negativeBuckets(), reference.negativeBuckets()); - assertBucketsEqual(shuffled.positiveBuckets(), reference.positiveBuckets()); + merger.add(createAutoReleasedHistogram(10, 1.0, 2.0, 3.0)); + + long singleBufferSize = merger.ramBytesUsed(); + assertThat(singleBufferSize, greaterThan(emptyMergerSize)); + assertThat(esBreaker.getUsed(), equalTo(singleBufferSize)); + + merger.add(createAutoReleasedHistogram(10, 1.0, 2.0, 3.0)); + + long doubleBufferSize = merger.ramBytesUsed(); + assertThat(doubleBufferSize, greaterThan(singleBufferSize)); + assertThat(esBreaker.getUsed(), equalTo(doubleBufferSize)); + + ReleasableExponentialHistogram result = merger.getAndClear(); + + assertThat(merger.ramBytesUsed(), equalTo(singleBufferSize)); + assertThat(esBreaker.getUsed(), equalTo(doubleBufferSize)); + + result.close(); + assertThat(esBreaker.getUsed(), equalTo(singleBufferSize)); } + assertThat(esBreaker.getUsed(), equalTo(0L)); } private void assertBucketsEqual(ExponentialHistogram.Buckets bucketsA, ExponentialHistogram.Buckets bucketsB) { @@ -164,10 +202,13 @@ private void assertBucketsEqual(ExponentialHistogram.Buckets bucketsA, Exponenti } } - private static ExponentialHistogram mergeWithMinimumScale(int bucketCount, int scale, ExponentialHistogram... histograms) { - ExponentialHistogramMerger merger = ExponentialHistogramMerger.createForTesting(bucketCount, scale); - Arrays.stream(histograms).forEach(merger::add); - return merger.get(); + private ExponentialHistogram mergeWithMinimumScale(int bucketCount, int scale, ExponentialHistogram... histograms) { + try (ExponentialHistogramMerger merger = ExponentialHistogramMerger.createForTesting(bucketCount, scale, breaker())) { + Arrays.stream(histograms).forEach(merger::add); + ReleasableExponentialHistogram result = merger.getAndClear(); + autoReleaseOnTestEnd(result); + return result; + } } } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramTestCase.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramTestCase.java new file mode 100644 index 0000000000000..c13c4d2608295 --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ExponentialHistogramTestCase.java @@ -0,0 +1,75 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.unit.ByteSizeValue; +import org.elasticsearch.core.Releasables; +import org.elasticsearch.test.ESTestCase; +import org.junit.After; + +import java.util.ArrayList; + +public abstract class ExponentialHistogramTestCase extends ESTestCase { + + private final ArrayList releaseBeforeEnd = new ArrayList<>(); + + /** + * Release all histograms created via {@link #createAutoReleasedHistogram(int)} + * before {@link ESTestCase} checks for unreleased bytes. + */ + @After + public void releaseHistograms() { + Releasables.close(releaseBeforeEnd); + releaseBeforeEnd.clear(); + } + + ExponentialHistogramCircuitBreaker breaker(CircuitBreaker esBreaker) { + return bytesAllocated -> { + if (bytesAllocated > 0) { + esBreaker.addEstimateBytesAndMaybeBreak(bytesAllocated, "exponential-histo-test-case"); + } else { + esBreaker.addWithoutBreaking(bytesAllocated); + } + }; + } + + ExponentialHistogramCircuitBreaker breaker() { + return breaker(newLimitedBreaker(ByteSizeValue.ofMb(100))); + } + + void autoReleaseOnTestEnd(ReleasableExponentialHistogram toRelease) { + releaseBeforeEnd.add(toRelease); + } + + FixedCapacityExponentialHistogram createAutoReleasedHistogram(int numBuckets) { + FixedCapacityExponentialHistogram result = FixedCapacityExponentialHistogram.create(numBuckets, breaker()); + releaseBeforeEnd.add(result); + return result; + } + + ExponentialHistogram createAutoReleasedHistogram(int numBuckets, double... values) { + ReleasableExponentialHistogram result = ExponentialHistogram.create(numBuckets, breaker(), values); + releaseBeforeEnd.add(result); + return result; + } +} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java index 6701bee418299..7f98c3e2d4e74 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/FixedCapacityExponentialHistogramTests.java @@ -21,15 +21,17 @@ package org.elasticsearch.exponentialhistogram; -import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.common.breaker.CircuitBreaker; +import org.elasticsearch.common.unit.ByteSizeValue; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThan; -public class FixedCapacityExponentialHistogramTests extends ESTestCase { +public class FixedCapacityExponentialHistogramTests extends ExponentialHistogramTestCase { public void testValueCountUpdatedCorrectly() { - FixedCapacityExponentialHistogram histogram = new FixedCapacityExponentialHistogram(100); + FixedCapacityExponentialHistogram histogram = createAutoReleasedHistogram(100); assertThat(histogram.negativeBuckets().valueCount(), equalTo(0L)); assertThat(histogram.positiveBuckets().valueCount(), equalTo(0L)); @@ -57,4 +59,13 @@ public void testValueCountUpdatedCorrectly() { assertThat(histogram.negativeBuckets().valueCount(), equalTo(0L)); assertThat(histogram.positiveBuckets().valueCount(), equalTo(0L)); } + + public void testMemoryAccounting() { + CircuitBreaker esBreaker = newLimitedBreaker(ByteSizeValue.ofMb(100)); + try (FixedCapacityExponentialHistogram histogram = FixedCapacityExponentialHistogram.create(100, breaker(esBreaker))) { + assertThat(histogram.ramBytesUsed(), greaterThan(2 * RamEstimationUtil.estimateLongArray(100))); + assertThat(esBreaker.getUsed(), equalTo(histogram.ramBytesUsed())); + } + assertThat(esBreaker.getUsed(), equalTo(0L)); + } } diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java index 3f1ffaffda30b..cec0d1f0f0ff7 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/QuantileAccuracyTests.java @@ -30,7 +30,6 @@ import org.apache.commons.math3.distribution.UniformRealDistribution; import org.apache.commons.math3.distribution.WeibullDistribution; import org.apache.commons.math3.random.Well19937c; -import org.elasticsearch.test.ESTestCase; import java.util.Arrays; import java.util.HashSet; @@ -47,7 +46,7 @@ import static org.hamcrest.Matchers.lessThanOrEqualTo; import static org.hamcrest.Matchers.notANumber; -public class QuantileAccuracyTests extends ESTestCase { +public class QuantileAccuracyTests extends ExponentialHistogramTestCase { public static final double[] QUANTILES_TO_TEST = { 0, 0.0000001, 0.01, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99, 0.999999, 1.0 }; @@ -57,7 +56,7 @@ private static int randomBucketCount() { } public void testNoNegativeZeroReturned() { - FixedCapacityExponentialHistogram histogram = new FixedCapacityExponentialHistogram(2); + FixedCapacityExponentialHistogram histogram = createAutoReleasedHistogram(2); histogram.resetBuckets(MAX_SCALE); // add a single, negative bucket close to zero histogram.tryAddBucket(MIN_INDEX, 3, false); @@ -100,7 +99,7 @@ public void testBasicSmall() { } public void testPercentileOverlapsZeroBucket() { - ExponentialHistogram histo = ExponentialHistogram.create(9, -3.0, -2, -1, 0, 0, 0, 1, 2, 3); + ExponentialHistogram histo = createAutoReleasedHistogram(9, -3.0, -2, -1, 0, 0, 0, 1, 2, 3); assertThat(ExponentialHistogramQuantile.getQuantile(histo, 8.0 / 16.0), equalTo(0.0)); assertThat(ExponentialHistogramQuantile.getQuantile(histo, 7.0 / 16.0), equalTo(0.0)); assertThat(ExponentialHistogramQuantile.getQuantile(histo, 9.0 / 16.0), equalTo(0.0)); @@ -154,14 +153,14 @@ public void testExplicitSkewedData() { } public void testEmptyHistogram() { - ExponentialHistogram histo = ExponentialHistogram.create(1); + ExponentialHistogram histo = ExponentialHistogram.empty(); for (double q : QUANTILES_TO_TEST) { assertThat(ExponentialHistogramQuantile.getQuantile(histo, q), notANumber()); } } public void testSingleValueHistogram() { - ExponentialHistogram histo = ExponentialHistogram.create(1, 42.0); + ExponentialHistogram histo = createAutoReleasedHistogram(1, 42.0); for (double q : QUANTILES_TO_TEST) { assertThat(ExponentialHistogramQuantile.getQuantile(histo, q), closeTo(42, 0.0000001)); } @@ -232,12 +231,12 @@ private static double[] generateSamples(RealDistribution distribution, int sampl } private double testQuantileAccuracy(double[] values, int bucketCount) { - // Create histogram - ExponentialHistogram histogram = ExponentialHistogram.create(bucketCount, values); + ExponentialHistogram histogram = createAutoReleasedHistogram(bucketCount, values); + Arrays.sort(values); - double allowedError = getMaximumRelativeError(values, bucketCount); double maxError = 0; + double allowedError = getMaximumRelativeError(values, bucketCount); // Compare histogram quantiles with exact quantiles for (double q : QUANTILES_TO_TEST) { diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/RamEstimationUtilTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/RamEstimationUtilTests.java new file mode 100644 index 0000000000000..2bb17ae264270 --- /dev/null +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/RamEstimationUtilTests.java @@ -0,0 +1,48 @@ +/* + * Copyright Elasticsearch B.V., and/or licensed to Elasticsearch B.V. + * under one or more license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright + * ownership. Elasticsearch B.V. licenses this file to you under + * the Apache License, Version 2.0 (the "License"); you may + * not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + * + * This file is based on a modification of https://github.com/open-telemetry/opentelemetry-java which is licensed under the Apache 2.0 License. + */ + +package org.elasticsearch.exponentialhistogram; + +import org.apache.lucene.util.RamUsageEstimator; + +import static org.hamcrest.Matchers.equalTo; + +public class RamEstimationUtilTests extends ExponentialHistogramTestCase { + + public void testLongArrayEstimation() { + assertThat(RamEstimationUtil.estimateLongArray(0), equalTo(RamUsageEstimator.sizeOf(new long[0]))); + assertThat(RamEstimationUtil.estimateLongArray(1), equalTo(RamUsageEstimator.sizeOf(new long[1]))); + assertThat(RamEstimationUtil.estimateLongArray(1000), equalTo(RamUsageEstimator.sizeOf(new long[1000]))); + } + + public void testDoubleArrayEstimation() { + assertThat(RamEstimationUtil.estimateDoubleArray(0), equalTo(RamUsageEstimator.sizeOf(new double[0]))); + assertThat(RamEstimationUtil.estimateDoubleArray(1), equalTo(RamUsageEstimator.sizeOf(new double[1]))); + assertThat(RamEstimationUtil.estimateDoubleArray(1000), equalTo(RamUsageEstimator.sizeOf(new double[1000]))); + } + + public void testIntArrayEstimation() { + assertThat(RamEstimationUtil.estimateIntArray(0), equalTo(RamUsageEstimator.sizeOf(new int[0]))); + assertThat(RamEstimationUtil.estimateIntArray(1), equalTo(RamUsageEstimator.sizeOf(new int[1]))); + assertThat(RamEstimationUtil.estimateIntArray(1000), equalTo(RamUsageEstimator.sizeOf(new int[1000]))); + } + +} diff --git a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java index 43873fba53ec7..fdea89d0421c5 100644 --- a/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java +++ b/libs/exponential-histogram/src/test/java/org/elasticsearch/exponentialhistogram/ZeroBucketTests.java @@ -21,11 +21,9 @@ package org.elasticsearch.exponentialhistogram; -import org.elasticsearch.test.ESTestCase; - import static org.hamcrest.Matchers.equalTo; -public class ZeroBucketTests extends ESTestCase { +public class ZeroBucketTests extends ExponentialHistogramTestCase { public void testMinimalBucketHasZeroThreshold() { assertThat(ZeroBucket.minimalWithCount(42).zeroThreshold(), equalTo(0.0)); diff --git a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java index a02370a89f931..c083f1c92a4fd 100644 --- a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java +++ b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/ESVectorUtil.java @@ -368,4 +368,35 @@ public static void soarDistanceBulk( } IMPL.soarDistanceBulk(v1, c0, c1, c2, c3, originalResidual, soarLambda, rnorm, distances); } + + /** + * Packs the provided int array populated with "0" and "1" values into a byte array. + * + * @param vector the int array to pack, must contain only "0" and "1" values. + * @param packed the byte array to store the packed result, must be large enough to hold the packed data. + */ + public static void packAsBinary(int[] vector, byte[] packed) { + if (packed.length * Byte.SIZE < vector.length) { + throw new IllegalArgumentException("packed array is too small: " + packed.length * Byte.SIZE + " < " + vector.length); + } + IMPL.packAsBinary(vector, packed); + } + + /** + * The idea here is to organize the query vector bits such that the first bit + * of every dimension is in the first set dimensions bits, or (dimensions/8) bytes. The second, + * third, and fourth bits are in the second, third, and fourth set of dimensions bits, + * respectively. This allows for direct bitwise comparisons with the stored index vectors through + * summing the bitwise results with the relative required bit shifts. + * + * @param q the query vector, assumed to be half-byte quantized with values between 0 and 15 + * @param quantQueryByte the byte array to store the transposed query vector. + * + **/ + public static void transposeHalfByte(int[] q, byte[] quantQueryByte) { + if (quantQueryByte.length * Byte.SIZE < 4 * q.length) { + throw new IllegalArgumentException("packed array is too small: " + quantQueryByte.length * Byte.SIZE + " < " + 4 * q.length); + } + IMPL.transposeHalfByte(q, quantQueryByte); + } } diff --git a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java index 12abda2506252..c78970a0c8794 100644 --- a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java +++ b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/DefaultESVectorUtilSupport.java @@ -320,4 +320,87 @@ public void soarDistanceBulk( distances[2] = soarDistance(v1, c2, originalResidual, soarLambda, rnorm); distances[3] = soarDistance(v1, c3, originalResidual, soarLambda, rnorm); } + + @Override + public void packAsBinary(int[] vector, byte[] packed) { + packAsBinaryImpl(vector, packed); + } + + public static void packAsBinaryImpl(int[] vector, byte[] packed) { + int limit = vector.length - 7; + int i = 0; + int index = 0; + for (; i < limit; i += 8, index++) { + assert vector[i] == 0 || vector[i] == 1; + assert vector[i + 1] == 0 || vector[i + 1] == 1; + assert vector[i + 2] == 0 || vector[i + 2] == 1; + assert vector[i + 3] == 0 || vector[i + 3] == 1; + assert vector[i + 4] == 0 || vector[i + 4] == 1; + assert vector[i + 5] == 0 || vector[i + 5] == 1; + assert vector[i + 6] == 0 || vector[i + 6] == 1; + assert vector[i + 7] == 0 || vector[i + 7] == 1; + int result = vector[i] << 7 | (vector[i + 1] << 6) | (vector[i + 2] << 5) | (vector[i + 3] << 4) | (vector[i + 4] << 3) + | (vector[i + 5] << 2) | (vector[i + 6] << 1) | (vector[i + 7]); + packed[index] = (byte) result; + } + if (i == vector.length) { + return; + } + byte result = 0; + for (int j = 7; j >= 0 && i < vector.length; i++, j--) { + assert vector[i] == 0 || vector[i] == 1; + result |= (byte) ((vector[i] & 1) << j); + } + packed[index] = result; + } + + @Override + public void transposeHalfByte(int[] q, byte[] quantQueryByte) { + transposeHalfByteImpl(q, quantQueryByte); + } + + public static void transposeHalfByteImpl(int[] q, byte[] quantQueryByte) { + int limit = q.length - 7; + int i = 0; + int index = 0; + for (; i < limit; i += 8, index++) { + assert q[i] >= 0 && q[i] <= 15; + assert q[i + 1] >= 0 && q[i + 1] <= 15; + assert q[i + 2] >= 0 && q[i + 2] <= 15; + assert q[i + 3] >= 0 && q[i + 3] <= 15; + assert q[i + 4] >= 0 && q[i + 4] <= 15; + assert q[i + 5] >= 0 && q[i + 5] <= 15; + assert q[i + 6] >= 0 && q[i + 6] <= 15; + assert q[i + 7] >= 0 && q[i + 7] <= 15; + int lowerByte = (q[i] & 1) << 7 | (q[i + 1] & 1) << 6 | (q[i + 2] & 1) << 5 | (q[i + 3] & 1) << 4 | (q[i + 4] & 1) << 3 | (q[i + + 5] & 1) << 2 | (q[i + 6] & 1) << 1 | (q[i + 7] & 1); + int lowerMiddleByte = ((q[i] >> 1) & 1) << 7 | ((q[i + 1] >> 1) & 1) << 6 | ((q[i + 2] >> 1) & 1) << 5 | ((q[i + 3] >> 1) & 1) + << 4 | ((q[i + 4] >> 1) & 1) << 3 | ((q[i + 5] >> 1) & 1) << 2 | ((q[i + 6] >> 1) & 1) << 1 | ((q[i + 7] >> 1) & 1); + int upperMiddleByte = ((q[i] >> 2) & 1) << 7 | ((q[i + 1] >> 2) & 1) << 6 | ((q[i + 2] >> 2) & 1) << 5 | ((q[i + 3] >> 2) & 1) + << 4 | ((q[i + 4] >> 2) & 1) << 3 | ((q[i + 5] >> 2) & 1) << 2 | ((q[i + 6] >> 2) & 1) << 1 | ((q[i + 7] >> 2) & 1); + int upperByte = ((q[i] >> 3) & 1) << 7 | ((q[i + 1] >> 3) & 1) << 6 | ((q[i + 2] >> 3) & 1) << 5 | ((q[i + 3] >> 3) & 1) << 4 + | ((q[i + 4] >> 3) & 1) << 3 | ((q[i + 5] >> 3) & 1) << 2 | ((q[i + 6] >> 3) & 1) << 1 | ((q[i + 7] >> 3) & 1); + quantQueryByte[index] = (byte) lowerByte; + quantQueryByte[index + quantQueryByte.length / 4] = (byte) lowerMiddleByte; + quantQueryByte[index + quantQueryByte.length / 2] = (byte) upperMiddleByte; + quantQueryByte[index + 3 * quantQueryByte.length / 4] = (byte) upperByte; + } + if (i == q.length) { + return; // all done + } + int lowerByte = 0; + int lowerMiddleByte = 0; + int upperMiddleByte = 0; + int upperByte = 0; + for (int j = 7; i < q.length; j--, i++) { + lowerByte |= (q[i] & 1) << j; + lowerMiddleByte |= ((q[i] >> 1) & 1) << j; + upperMiddleByte |= ((q[i] >> 2) & 1) << j; + upperByte |= ((q[i] >> 3) & 1) << j; + } + quantQueryByte[index] = (byte) lowerByte; + quantQueryByte[index + quantQueryByte.length / 4] = (byte) lowerMiddleByte; + quantQueryByte[index + quantQueryByte.length / 2] = (byte) upperMiddleByte; + quantQueryByte[index + 3 * quantQueryByte.length / 4] = (byte) upperByte; + } } diff --git a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java index 895105a452b0c..08c256051661e 100644 --- a/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java +++ b/libs/simdvec/src/main/java/org/elasticsearch/simdvec/internal/vectorization/ESVectorUtilSupport.java @@ -63,4 +63,8 @@ void soarDistanceBulk( float rnorm, float[] distances ); + + void packAsBinary(int[] vector, byte[] packed); + + void transposeHalfByte(int[] q, byte[] quantQueryByte); } diff --git a/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java index 2a5f633d51b78..62637a621cd0b 100644 --- a/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java +++ b/libs/simdvec/src/main21/java/org/elasticsearch/simdvec/internal/vectorization/PanamaESVectorUtilSupport.java @@ -22,8 +22,11 @@ import org.apache.lucene.util.Constants; import static jdk.incubator.vector.VectorOperators.ADD; +import static jdk.incubator.vector.VectorOperators.ASHR; +import static jdk.incubator.vector.VectorOperators.LSHL; import static jdk.incubator.vector.VectorOperators.MAX; import static jdk.incubator.vector.VectorOperators.MIN; +import static jdk.incubator.vector.VectorOperators.OR; public final class PanamaESVectorUtilSupport implements ESVectorUtilSupport { @@ -942,4 +945,181 @@ public void soarDistanceBulk( distances[2] = dsq2 + soarLambda * proj2 * proj2 / rnorm; distances[3] = dsq3 + soarLambda * proj3 * proj3 / rnorm; } + + private static final VectorSpecies INT_SPECIES_128 = IntVector.SPECIES_128; + private static final IntVector SHIFTS_256; + private static final IntVector HIGH_SHIFTS_128; + private static final IntVector LOW_SHIFTS_128; + static { + final int[] shifts = new int[] { 7, 6, 5, 4, 3, 2, 1, 0 }; + if (VECTOR_BITSIZE == 128) { + HIGH_SHIFTS_128 = IntVector.fromArray(INT_SPECIES_128, shifts, 0); + LOW_SHIFTS_128 = IntVector.fromArray(INT_SPECIES_128, shifts, INT_SPECIES_128.length()); + SHIFTS_256 = null; + } else { + SHIFTS_256 = IntVector.fromArray(INT_SPECIES_256, shifts, 0); + HIGH_SHIFTS_128 = null; + LOW_SHIFTS_128 = null; + } + } + private static final int[] SHIFTS = new int[] { 7, 6, 5, 4, 3, 2, 1, 0 }; + + @Override + public void packAsBinary(int[] vector, byte[] packed) { + // 128 / 32 == 4 + if (vector.length >= 8 && HAS_FAST_INTEGER_VECTORS) { + // TODO: can we optimize for >= 512? + if (VECTOR_BITSIZE >= 256) { + packAsBinary256(vector, packed); + return; + } else if (VECTOR_BITSIZE == 128) { + packAsBinary128(vector, packed); + return; + } + } + DefaultESVectorUtilSupport.packAsBinaryImpl(vector, packed); + } + + private void packAsBinary256(int[] vector, byte[] packed) { + final int limit = INT_SPECIES_256.loopBound(vector.length); + int i = 0; + int index = 0; + for (; i < limit; i += INT_SPECIES_256.length(), index++) { + IntVector v = IntVector.fromArray(INT_SPECIES_256, vector, i); + int result = v.lanewise(LSHL, SHIFTS_256).reduceLanes(OR); + packed[index] = (byte) result; + } + if (i == vector.length) { + return; // all done + } + byte result = 0; + for (int j = 7; j >= 0 && i < vector.length; i++, j--) { + assert vector[i] == 0 || vector[i] == 1; + result |= (byte) ((vector[i] & 1) << j); + } + packed[index] = result; + } + + private void packAsBinary128(int[] vector, byte[] packed) { + final int limit = INT_SPECIES_128.loopBound(vector.length) - INT_SPECIES_128.length(); + int i = 0; + int index = 0; + for (; i < limit; i += 2 * INT_SPECIES_128.length(), index++) { + IntVector v = IntVector.fromArray(INT_SPECIES_128, vector, i); + var v1 = v.lanewise(LSHL, HIGH_SHIFTS_128); + v = IntVector.fromArray(INT_SPECIES_128, vector, i + INT_SPECIES_128.length()); + var v2 = v.lanewise(LSHL, LOW_SHIFTS_128); + int result = v1.lanewise(OR, v2).reduceLanes(OR); + packed[index] = (byte) result; + } + if (i == vector.length) { + return; // all done + } + byte result = 0; + for (int j = 7; j >= 0 && i < vector.length; i++, j--) { + assert vector[i] == 0 || vector[i] == 1; + result |= (byte) ((vector[i] & 1) << j); + } + packed[index] = result; + } + + @Override + public void transposeHalfByte(int[] q, byte[] quantQueryByte) { + // 128 / 32 == 4 + if (q.length >= 8 && HAS_FAST_INTEGER_VECTORS) { + if (VECTOR_BITSIZE >= 256) { + transposeHalfByte256(q, quantQueryByte); + return; + } else if (VECTOR_BITSIZE == 128) { + transposeHalfByte128(q, quantQueryByte); + return; + } + } + DefaultESVectorUtilSupport.transposeHalfByteImpl(q, quantQueryByte); + } + + private void transposeHalfByte256(int[] q, byte[] quantQueryByte) { + final int limit = INT_SPECIES_256.loopBound(q.length); + int i = 0; + int index = 0; + for (; i < limit; i += INT_SPECIES_256.length(), index++) { + IntVector v = IntVector.fromArray(INT_SPECIES_256, q, i); + + int lowerByte = v.and(1).lanewise(LSHL, SHIFTS_256).reduceLanes(VectorOperators.OR); + int lowerMiddleByte = v.lanewise(ASHR, 1).and(1).lanewise(LSHL, SHIFTS_256).reduceLanes(VectorOperators.OR); + int upperMiddleByte = v.lanewise(ASHR, 2).and(1).lanewise(LSHL, SHIFTS_256).reduceLanes(VectorOperators.OR); + int upperByte = v.lanewise(ASHR, 3).and(1).lanewise(LSHL, SHIFTS_256).reduceLanes(VectorOperators.OR); + + quantQueryByte[index] = (byte) lowerByte; + quantQueryByte[index + quantQueryByte.length / 4] = (byte) lowerMiddleByte; + quantQueryByte[index + quantQueryByte.length / 2] = (byte) upperMiddleByte; + quantQueryByte[index + 3 * quantQueryByte.length / 4] = (byte) upperByte; + + } + if (i == q.length) { + return; // all done + } + int lowerByte = 0; + int lowerMiddleByte = 0; + int upperMiddleByte = 0; + int upperByte = 0; + for (int j = 7; i < q.length; j--, i++) { + lowerByte |= (q[i] & 1) << j; + lowerMiddleByte |= ((q[i] >> 1) & 1) << j; + upperMiddleByte |= ((q[i] >> 2) & 1) << j; + upperByte |= ((q[i] >> 3) & 1) << j; + } + quantQueryByte[index] = (byte) lowerByte; + quantQueryByte[index + quantQueryByte.length / 4] = (byte) lowerMiddleByte; + quantQueryByte[index + quantQueryByte.length / 2] = (byte) upperMiddleByte; + quantQueryByte[index + 3 * quantQueryByte.length / 4] = (byte) upperByte; + } + + private void transposeHalfByte128(int[] q, byte[] quantQueryByte) { + final int limit = INT_SPECIES_128.loopBound(q.length) - INT_SPECIES_128.length(); + int i = 0; + int index = 0; + for (; i < limit; i += 2 * INT_SPECIES_128.length(), index++) { + IntVector v = IntVector.fromArray(INT_SPECIES_128, q, i); + + var lowerByteHigh = v.and(1).lanewise(LSHL, HIGH_SHIFTS_128); + var lowerMiddleByteHigh = v.lanewise(ASHR, 1).and(1).lanewise(LSHL, HIGH_SHIFTS_128); + var upperMiddleByteHigh = v.lanewise(ASHR, 2).and(1).lanewise(LSHL, HIGH_SHIFTS_128); + var upperByteHigh = v.lanewise(ASHR, 3).and(1).lanewise(LSHL, HIGH_SHIFTS_128); + + v = IntVector.fromArray(INT_SPECIES_128, q, i + INT_SPECIES_128.length()); + var lowerByteLow = v.and(1).lanewise(LSHL, LOW_SHIFTS_128); + var lowerMiddleByteLow = v.lanewise(ASHR, 1).and(1).lanewise(LSHL, LOW_SHIFTS_128); + var upperMiddleByteLow = v.lanewise(ASHR, 2).and(1).lanewise(LSHL, LOW_SHIFTS_128); + var upperByteLow = v.lanewise(ASHR, 3).and(1).lanewise(LSHL, LOW_SHIFTS_128); + + int lowerByte = lowerByteHigh.lanewise(OR, lowerByteLow).reduceLanes(OR); + int lowerMiddleByte = lowerMiddleByteHigh.lanewise(OR, lowerMiddleByteLow).reduceLanes(OR); + int upperMiddleByte = upperMiddleByteHigh.lanewise(OR, upperMiddleByteLow).reduceLanes(OR); + int upperByte = upperByteHigh.lanewise(OR, upperByteLow).reduceLanes(OR); + + quantQueryByte[index] = (byte) lowerByte; + quantQueryByte[index + quantQueryByte.length / 4] = (byte) lowerMiddleByte; + quantQueryByte[index + quantQueryByte.length / 2] = (byte) upperMiddleByte; + quantQueryByte[index + 3 * quantQueryByte.length / 4] = (byte) upperByte; + + } + if (i == q.length) { + return; // all done + } + int lowerByte = 0; + int lowerMiddleByte = 0; + int upperMiddleByte = 0; + int upperByte = 0; + for (int j = 7; i < q.length; j--, i++) { + lowerByte |= (q[i] & 1) << j; + lowerMiddleByte |= ((q[i] >> 1) & 1) << j; + upperMiddleByte |= ((q[i] >> 2) & 1) << j; + upperByte |= ((q[i] >> 3) & 1) << j; + } + quantQueryByte[index] = (byte) lowerByte; + quantQueryByte[index + quantQueryByte.length / 4] = (byte) lowerMiddleByte; + quantQueryByte[index + quantQueryByte.length / 2] = (byte) upperMiddleByte; + quantQueryByte[index + 3 * quantQueryByte.length / 4] = (byte) upperByte; + } } diff --git a/libs/simdvec/src/test/java/org/elasticsearch/simdvec/ESVectorUtilTests.java b/libs/simdvec/src/test/java/org/elasticsearch/simdvec/ESVectorUtilTests.java index b51fc25fab9f1..24aff1107d7e7 100644 --- a/libs/simdvec/src/test/java/org/elasticsearch/simdvec/ESVectorUtilTests.java +++ b/libs/simdvec/src/test/java/org/elasticsearch/simdvec/ESVectorUtilTests.java @@ -9,6 +9,7 @@ package org.elasticsearch.simdvec; +import org.elasticsearch.index.codec.vectors.BQVectorUtils; import org.elasticsearch.index.codec.vectors.OptimizedScalarQuantizer; import org.elasticsearch.simdvec.internal.vectorization.BaseVectorizationTests; import org.elasticsearch.simdvec.internal.vectorization.ESVectorizationProvider; @@ -355,6 +356,34 @@ public void testSoarDistanceBulk() { assertArrayEquals(expectedDistances, panamaDistances, deltaEps); } + public void testPackAsBinary() { + int dims = randomIntBetween(16, 2048); + int[] toPack = new int[dims]; + for (int i = 0; i < dims; i++) { + toPack[i] = randomInt(1); + } + int length = BQVectorUtils.discretize(dims, 64) / 8; + byte[] packed = new byte[length]; + byte[] packedLegacy = new byte[length]; + defaultedProvider.getVectorUtilSupport().packAsBinary(toPack, packedLegacy); + defOrPanamaProvider.getVectorUtilSupport().packAsBinary(toPack, packed); + assertArrayEquals(packedLegacy, packed); + } + + public void testTransposeHalfByte() { + int dims = randomIntBetween(16, 2048); + int[] toPack = new int[dims]; + for (int i = 0; i < dims; i++) { + toPack[i] = randomInt(15); + } + int length = 4 * BQVectorUtils.discretize(dims, 64) / 8; + byte[] packed = new byte[length]; + byte[] packedLegacy = new byte[length]; + defaultedProvider.getVectorUtilSupport().transposeHalfByte(toPack, packedLegacy); + defOrPanamaProvider.getVectorUtilSupport().transposeHalfByte(toPack, packed); + assertArrayEquals(packedLegacy, packed); + } + private float[] generateRandomVector(int size) { float[] vector = new float[size]; for (int i = 0; i < size; ++i) { diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java index 5bf5295e7adde..eb3372e9b6e85 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/ScaledFloatFieldMapper.java @@ -39,6 +39,7 @@ import org.elasticsearch.index.mapper.FallbackSyntheticSourceBlockLoader; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.IgnoreMalformedStoredValues; +import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.NumberFieldMapper; import org.elasticsearch.index.mapper.SimpleMappedFieldType; @@ -380,7 +381,11 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) { } // Multi fields don't have fallback synthetic source. if (isSyntheticSource && blContext.parentField(name()) == null) { - return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) { + return new FallbackSyntheticSourceBlockLoader( + fallbackSyntheticSourceBlockLoaderReader(), + name(), + IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated()) + ) { @Override public Builder builder(BlockFactory factory, int expectedCount) { return factory.doubles(expectedCount); diff --git a/muted-tests.yml b/muted-tests.yml index a22f4a890ca2d..6d1a2a1d45b6d 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -468,9 +468,6 @@ tests: - class: org.elasticsearch.xpack.ml.integration.TextEmbeddingQueryIT method: testHybridSearch issue: https://github.com/elastic/elasticsearch/issues/132703 -- class: org.elasticsearch.test.rest.ClientYamlTestSuiteIT - method: test {yaml=indices.simulate_index_template/10_basic/Simulate index template with data stream with mapping and setting overrides} - issue: https://github.com/elastic/elasticsearch/issues/132702 - class: org.elasticsearch.xpack.ml.integration.RevertModelSnapshotIT method: testRevertModelSnapshot issue: https://github.com/elastic/elasticsearch/issues/132733 @@ -501,27 +498,51 @@ tests: - class: org.elasticsearch.test.rest.yaml.RcsCcsCommonYamlTestSuiteIT method: test {p0=search/160_exists_query/Test exists query on keyword field in empty index} issue: https://github.com/elastic/elasticsearch/issues/132829 -- class: org.elasticsearch.xpack.esql.CsvTests - method: test {csv-spec:floats.EqualToMultivalue} - issue: https://github.com/elastic/elasticsearch/issues/132852 -- class: org.elasticsearch.xpack.esql.CsvTests - method: test {csv-spec:string.MvStringEquals} - issue: https://github.com/elastic/elasticsearch/issues/132856 -- class: org.elasticsearch.xpack.esql.CsvTests - method: test {csv-spec:string.LengthOfText} - issue: https://github.com/elastic/elasticsearch/issues/132857 -- class: org.elasticsearch.xpack.esql.CsvTests - method: test {csv-spec:floats.InMultivalue} - issue: https://github.com/elastic/elasticsearch/issues/132859 -- class: org.elasticsearch.xpack.esql.CsvTests - method: test {csv-spec:ip.CdirMatchEqualsInsOrs} - issue: https://github.com/elastic/elasticsearch/issues/132860 - class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT method: test {p0=search.vectors/40_knn_search_cosine/kNN search only regular query} issue: https://github.com/elastic/elasticsearch/issues/132890 - class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT method: test {p0=search/410_named_queries/named_queries_with_score} issue: https://github.com/elastic/elasticsearch/issues/132906 +- class: org.elasticsearch.packaging.test.ArchiveGenerateInitialCredentialsTests + method: test40VerifyAutogeneratedCredentials + issue: https://github.com/elastic/elasticsearch/issues/132877 +- class: org.elasticsearch.packaging.test.ArchiveGenerateInitialCredentialsTests + method: test50CredentialAutogenerationOnlyOnce + issue: https://github.com/elastic/elasticsearch/issues/132878 +- class: org.elasticsearch.upgrades.TransformSurvivesUpgradeIT + method: testTransformRollingUpgrade + issue: https://github.com/elastic/elasticsearch/issues/132892 +- class: org.elasticsearch.index.mapper.LongFieldMapperTests + method: testFetchCoerced + issue: https://github.com/elastic/elasticsearch/issues/132893 +- class: org.elasticsearch.xpack.eql.planner.QueryTranslatorTests + method: testMatchOptimization + issue: https://github.com/elastic/elasticsearch/issues/132894 +- class: org.elasticsearch.xpack.deprecation.DeprecationHttpIT + method: testUniqueDeprecationResponsesMergedTogether + issue: https://github.com/elastic/elasticsearch/issues/132895 +- class: org.elasticsearch.search.CCSDuelIT + method: testTermsAggs + issue: https://github.com/elastic/elasticsearch/issues/132879 +- class: org.elasticsearch.search.CCSDuelIT + method: testTermsAggsWithProfile + issue: https://github.com/elastic/elasticsearch/issues/132880 +- class: org.elasticsearch.index.mapper.LongFieldMapperTests + method: testFetchMany + issue: https://github.com/elastic/elasticsearch/issues/132948 +- class: org.elasticsearch.index.mapper.LongFieldMapperTests + method: testFetch + issue: https://github.com/elastic/elasticsearch/issues/132956 +- class: org.elasticsearch.cluster.ClusterInfoServiceIT + method: testMaxQueueLatenciesInClusterInfo + issue: https://github.com/elastic/elasticsearch/issues/132957 +- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT + method: test {p0=search/400_synthetic_source/_doc_count} + issue: https://github.com/elastic/elasticsearch/issues/132965 +- class: org.elasticsearch.index.mapper.LongFieldMapperTests + method: testSyntheticSourceWithTranslogSnapshot + issue: https://github.com/elastic/elasticsearch/issues/132964 # Examples: # diff --git a/qa/multi-cluster-search/src/test/java/org/elasticsearch/search/CCSDuelIT.java b/qa/multi-cluster-search/src/test/java/org/elasticsearch/search/CCSDuelIT.java index 715a009692bf5..5b4ff08d8dfc7 100644 --- a/qa/multi-cluster-search/src/test/java/org/elasticsearch/search/CCSDuelIT.java +++ b/qa/multi-cluster-search/src/test/java/org/elasticsearch/search/CCSDuelIT.java @@ -670,7 +670,6 @@ public void testFieldCollapsingSortByField() throws Exception { } } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/40005") public void testTermsAggs() throws Exception { assumeMultiClusterSetup(); { @@ -685,7 +684,6 @@ public void testTermsAggs() throws Exception { } } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/40005") public void testTermsAggsWithProfile() throws Exception { assumeMultiClusterSetup(); { diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveGenerateInitialCredentialsTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveGenerateInitialCredentialsTests.java index f155b05d9552e..6f5d629b8a6d7 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveGenerateInitialCredentialsTests.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ArchiveGenerateInitialCredentialsTests.java @@ -78,7 +78,6 @@ public void test30NoAutogenerationWhenDaemonized() throws Exception { stopElasticsearch(); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/84407") public void test40VerifyAutogeneratedCredentials() throws Exception { /* Windows issue awaits fix: https://github.com/elastic/elasticsearch/issues/49340 */ assumeTrue("expect command isn't on Windows", distribution.platform != Distribution.Platform.WINDOWS); @@ -97,7 +96,6 @@ public void test40VerifyAutogeneratedCredentials() throws Exception { stopElasticsearch(); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/84407") public void test50CredentialAutogenerationOnlyOnce() throws Exception { /* Windows issue awaits fix: https://github.com/elastic/elasticsearch/issues/49340 */ assumeTrue("expect command isn't on Windows", distribution.platform != Distribution.Platform.WINDOWS); diff --git a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml index e1c200b23576b..664a76ef6778c 100644 --- a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml +++ b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/80_ingest_simulate.yml @@ -2182,3 +2182,131 @@ setup: - match: { docs.0.doc._index: "test" } - match: { docs.0.doc._source.foo: "bar" } - match: { docs.0.doc.error.type: "document_parsing_exception" } + +--- +"Test effective mapping": + + # This creates two templates, where the first reroutes to the second. Then we simulate ingesting and make sure that + # the effective_mapping is for the index where the document eventually would land. Also, the second index is really + # a data stream, so we expect to see a @timestamp field. + + - skip: + features: + - headers + - allowed_warnings + + - do: + headers: + Content-Type: application/json + ingest.put_pipeline: + id: "reroute-pipeline" + body: > + { + "processors": [ + { + "reroute": { + "destination": "second-index" + } + } + ] + } + - match: { acknowledged: true } + + - do: + allowed_warnings: + - "index template [first-index-template] has index patterns [first-index*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [first-index-template] will take precedence during new index creation" + indices.put_index_template: + name: first-index-template + body: + index_patterns: first-index* + template: + settings: + default_pipeline: "reroute-pipeline" + mappings: + dynamic: strict + properties: + foo: + type: text + + - do: + allowed_warnings: + - "index template [second-index-template] has index patterns [second-index*] matching patterns from existing older templates [global] with patterns (global => [*]); this template [second-index-template] will take precedence during new index creation" + indices.put_index_template: + name: second-index-template + body: + index_patterns: second-index* + template: + mappings: + dynamic: strict + properties: + bar: + type: text + + - do: + indices.put_index_template: + name: second-index-template + body: + index_patterns: second-index* + template: + lifecycle: + data_retention: "7d" + mappings: + dynamic: strict + properties: + bar: + type: text + data_stream: {} + + - do: + indices.create_data_stream: + name: second-index + - is_true: acknowledged + + - do: + cluster.health: + wait_for_status: yellow + + - do: + indices.put_data_stream_mappings: + name: second-index + body: + properties: + foo: + type: boolean + + - match: { data_streams.0.applied_to_data_stream: true } + + # Here is the meat of the test. We simulate ingesting into first-index, knowing it will be rerouted to second-index, + # which is actually a data stream. So we expect the effective_mapping to contain the fields from second-index + # (including the implicit @timestamp field) and not second-index. Plus, it ought to include fields from the + # mapping_addition that we pass in. + - do: + headers: + Content-Type: application/json + simulate.ingest: + body: > + { + "docs": [ + { + "_index": "first-index", + "_id": "id", + "_source": { + "foo": "bar" + } + } + ], + "mapping_addition": { + "dynamic": "strict", + "properties": { + "baz": { + "type": "keyword" + } + } + } + } + - length: { docs: 1 } + - match: { docs.0.doc._index: "second-index" } + - not_exists: docs.0.doc.effective_mapping._doc.properties.foo + - match: { docs.0.doc.effective_mapping._doc.properties.@timestamp.type: "date" } + - match: { docs.0.doc.effective_mapping._doc.properties.bar.type: "text" } + - match: { docs.0.doc.effective_mapping._doc.properties.baz.type: "keyword" } diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java index 05abbb8af7f12..50c8fd107c444 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/CmdLineArgs.java @@ -36,7 +36,7 @@ record CmdLineArgs( KnnIndexTester.IndexType indexType, int numCandidates, int k, - int[] nProbes, + double[] visitPercentages, int ivfClusterSize, int overSamplingFactor, int hnswM, @@ -63,7 +63,8 @@ record CmdLineArgs( static final ParseField INDEX_TYPE_FIELD = new ParseField("index_type"); static final ParseField NUM_CANDIDATES_FIELD = new ParseField("num_candidates"); static final ParseField K_FIELD = new ParseField("k"); - static final ParseField N_PROBE_FIELD = new ParseField("n_probe"); + // static final ParseField N_PROBE_FIELD = new ParseField("n_probe"); + static final ParseField VISIT_PERCENTAGE_FIELD = new ParseField("visit_percentage"); static final ParseField IVF_CLUSTER_SIZE_FIELD = new ParseField("ivf_cluster_size"); static final ParseField OVER_SAMPLING_FACTOR_FIELD = new ParseField("over_sampling_factor"); static final ParseField HNSW_M_FIELD = new ParseField("hnsw_m"); @@ -97,7 +98,8 @@ static CmdLineArgs fromXContent(XContentParser parser) throws IOException { PARSER.declareString(Builder::setIndexType, INDEX_TYPE_FIELD); PARSER.declareInt(Builder::setNumCandidates, NUM_CANDIDATES_FIELD); PARSER.declareInt(Builder::setK, K_FIELD); - PARSER.declareIntArray(Builder::setNProbe, N_PROBE_FIELD); + // PARSER.declareIntArray(Builder::setNProbe, N_PROBE_FIELD); + PARSER.declareDoubleArray(Builder::setVisitPercentages, VISIT_PERCENTAGE_FIELD); PARSER.declareInt(Builder::setIvfClusterSize, IVF_CLUSTER_SIZE_FIELD); PARSER.declareInt(Builder::setOverSamplingFactor, OVER_SAMPLING_FACTOR_FIELD); PARSER.declareInt(Builder::setHnswM, HNSW_M_FIELD); @@ -132,7 +134,8 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(INDEX_TYPE_FIELD.getPreferredName(), indexType.name().toLowerCase(Locale.ROOT)); builder.field(NUM_CANDIDATES_FIELD.getPreferredName(), numCandidates); builder.field(K_FIELD.getPreferredName(), k); - builder.field(N_PROBE_FIELD.getPreferredName(), nProbes); + // builder.field(N_PROBE_FIELD.getPreferredName(), nProbes); + builder.field(VISIT_PERCENTAGE_FIELD.getPreferredName(), visitPercentages); builder.field(IVF_CLUSTER_SIZE_FIELD.getPreferredName(), ivfClusterSize); builder.field(OVER_SAMPLING_FACTOR_FIELD.getPreferredName(), overSamplingFactor); builder.field(HNSW_M_FIELD.getPreferredName(), hnswM); @@ -165,7 +168,7 @@ static class Builder { private KnnIndexTester.IndexType indexType = KnnIndexTester.IndexType.HNSW; private int numCandidates = 1000; private int k = 10; - private int[] nProbes = new int[] { 10 }; + private double[] visitPercentages = new double[] { 1.0 }; private int ivfClusterSize = 1000; private int overSamplingFactor = 1; private int hnswM = 16; @@ -223,8 +226,8 @@ public Builder setK(int k) { return this; } - public Builder setNProbe(List nProbes) { - this.nProbes = nProbes.stream().mapToInt(Integer::intValue).toArray(); + public Builder setVisitPercentages(List visitPercentages) { + this.visitPercentages = visitPercentages.stream().mapToDouble(Double::doubleValue).toArray(); return this; } @@ -330,7 +333,7 @@ public CmdLineArgs build() { indexType, numCandidates, k, - nProbes, + visitPercentages, ivfClusterSize, overSamplingFactor, hnswM, diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java index def4e3c14c6dc..ac4d1f948e4df 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnIndexTester.java @@ -191,9 +191,9 @@ public static void main(String[] args) throws Exception { FormattedResults formattedResults = new FormattedResults(); for (CmdLineArgs cmdLineArgs : cmdLineArgsList) { - int[] nProbes = cmdLineArgs.indexType().equals(IndexType.IVF) && cmdLineArgs.numQueries() > 0 - ? cmdLineArgs.nProbes() - : new int[] { 0 }; + double[] visitPercentages = cmdLineArgs.indexType().equals(IndexType.IVF) && cmdLineArgs.numQueries() > 0 + ? cmdLineArgs.visitPercentages() + : new double[] { 0 }; String indexType = cmdLineArgs.indexType().name().toLowerCase(Locale.ROOT); Results indexResults = new Results( cmdLineArgs.docVectors().get(0).getFileName().toString(), @@ -201,8 +201,8 @@ public static void main(String[] args) throws Exception { cmdLineArgs.numDocs(), cmdLineArgs.filterSelectivity() ); - Results[] results = new Results[nProbes.length]; - for (int i = 0; i < nProbes.length; i++) { + Results[] results = new Results[visitPercentages.length]; + for (int i = 0; i < visitPercentages.length; i++) { results[i] = new Results( cmdLineArgs.docVectors().get(0).getFileName().toString(), indexType, @@ -240,8 +240,7 @@ public static void main(String[] args) throws Exception { numSegments(indexPath, indexResults); if (cmdLineArgs.queryVectors() != null && cmdLineArgs.numQueries() > 0) { for (int i = 0; i < results.length; i++) { - int nProbe = nProbes[i]; - KnnSearcher knnSearcher = new KnnSearcher(indexPath, cmdLineArgs, nProbe); + KnnSearcher knnSearcher = new KnnSearcher(indexPath, cmdLineArgs, visitPercentages[i]); knnSearcher.runSearch(results[i], cmdLineArgs.earlyTermination()); } } @@ -293,7 +292,7 @@ public String toString() { String[] searchHeaders = { "index_name", "index_type", - "n_probe", + "visit_percentage(%)", "latency(ms)", "net_cpu_time(ms)", "avg_cpu_count", @@ -324,7 +323,7 @@ public String toString() { queryResultsArray[i] = new String[] { queryResult.indexName, queryResult.indexType, - Integer.toString(queryResult.nProbe), + String.format(Locale.ROOT, "%.2f", queryResult.visitPercentage), String.format(Locale.ROOT, "%.2f", queryResult.avgLatency), String.format(Locale.ROOT, "%.2f", queryResult.netCpuTimeMS), String.format(Locale.ROOT, "%.2f", queryResult.avgCpuCount), @@ -400,7 +399,7 @@ static class Results { long indexTimeMS; long forceMergeTimeMS; int numSegments; - int nProbe; + double visitPercentage; double avgLatency; double qps; double avgRecall; diff --git a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java index bb13dd75a4d9e..4b41a2664aa97 100644 --- a/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java +++ b/qa/vector/src/main/java/org/elasticsearch/test/knn/KnnSearcher.java @@ -107,7 +107,7 @@ class KnnSearcher { private final float selectivity; private final int topK; private final int efSearch; - private final int nProbe; + private final double visitPercentage; private final KnnIndexTester.IndexType indexType; private int dim; private final VectorSimilarityFunction similarityFunction; @@ -116,7 +116,7 @@ class KnnSearcher { private final int searchThreads; private final int numSearchers; - KnnSearcher(Path indexPath, CmdLineArgs cmdLineArgs, int nProbe) { + KnnSearcher(Path indexPath, CmdLineArgs cmdLineArgs, double visitPercentage) { this.docPath = cmdLineArgs.docVectors(); this.indexPath = indexPath; this.queryPath = cmdLineArgs.queryVectors(); @@ -131,7 +131,7 @@ class KnnSearcher { throw new IllegalArgumentException("numQueryVectors must be > 0"); } this.efSearch = cmdLineArgs.numCandidates(); - this.nProbe = nProbe; + this.visitPercentage = visitPercentage; this.indexType = cmdLineArgs.indexType(); this.searchThreads = cmdLineArgs.searchThreads(); this.numSearchers = cmdLineArgs.numSearchers(); @@ -298,7 +298,7 @@ void runSearch(KnnIndexTester.Results finalResults, boolean earlyTermination) th } logger.info("checking results"); int[][] nn = getOrCalculateExactNN(offsetByteSize, filterQuery); - finalResults.nProbe = indexType == KnnIndexTester.IndexType.IVF ? nProbe : 0; + finalResults.visitPercentage = indexType == KnnIndexTester.IndexType.IVF ? visitPercentage : 0; finalResults.avgRecall = checkResults(resultIds, nn, topK); finalResults.qps = (1000f * numQueryVectors) / elapsed; finalResults.avgLatency = (float) elapsed / numQueryVectors; @@ -424,7 +424,8 @@ TopDocs doVectorQuery(float[] vector, IndexSearcher searcher, Query filterQuery, } int efSearch = Math.max(topK, this.efSearch); if (indexType == KnnIndexTester.IndexType.IVF) { - knnQuery = new IVFKnnFloatVectorQuery(VECTOR_FIELD, vector, topK, efSearch, filterQuery, nProbe); + float visitRatio = (float) (visitPercentage / 100); + knnQuery = new IVFKnnFloatVectorQuery(VECTOR_FIELD, vector, topK, efSearch, filterQuery, visitRatio); } else { knnQuery = new ESKnnFloatVectorQuery( VECTOR_FIELD, diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_ai21.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_ai21.json new file mode 100644 index 0000000000000..6d59087402068 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_ai21.json @@ -0,0 +1,35 @@ +{ + "inference.put_ai21": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-ai21.html", + "description": "Configure a AI21 inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{ai21_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "ai21_inference_id": { + "type": "string", + "description": "The inference ID" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_llama.json b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_llama.json new file mode 100644 index 0000000000000..5551f655cb616 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/inference.put_llama.json @@ -0,0 +1,35 @@ +{ + "inference.put_llama": { + "documentation": { + "url": "https://www.elastic.co/guide/en/elasticsearch/reference/current/infer-service-llama.html", + "description": "Configure a Llama inference endpoint" + }, + "stability": "stable", + "visibility": "public", + "headers": { + "accept": ["application/json"], + "content_type": ["application/json"] + }, + "url": { + "paths": [ + { + "path": "/_inference/{task_type}/{llama_inference_id}", + "methods": ["PUT"], + "parts": { + "task_type": { + "type": "string", + "description": "The task type" + }, + "llama_inference_id": { + "type": "string", + "description": "The inference ID" + } + } + } + ] + }, + "body": { + "description": "The inference endpoint's task and service settings" + } + } +} diff --git a/rest-api-spec/src/main/resources/schema.json b/rest-api-spec/src/main/resources/schema.json index 02fdd920f9216..24b75aabefa89 100644 --- a/rest-api-spec/src/main/resources/schema.json +++ b/rest-api-spec/src/main/resources/schema.json @@ -6,12 +6,12 @@ "Api": { "type": "object", "propertyNames": { - "pattern": "^(?:[a-z]+?(_[a-z]+)*(\\.[a-z]+?(_[a-z]+)*)?|_internal\\.[a-z]+?(_[a-z]+)*)$" + "pattern": "^(?:[a-z][a-z0-9]*?(_[a-z0-9]+)*(\\.[a-z][a-z0-9]*?(_[a-z0-9]+)*)?|_internal\\.[a-z][a-z0-9]*?(_[a-z0-9]+)*)$" }, "minProperties": 1, "maxProperties": 1, "patternProperties": { - "^(?:[a-z]+?(_[a-z]+)*(\\.[a-z]+?(_[a-z]+)*)?|_internal\\.[a-z]+?(_[a-z]+)*)$": { + "^(?:[a-z][a-z0-9]*?(_[a-z0-9]+)*(\\.[a-z][a-z0-9]*?(_[a-z0-9]+)*)?|_internal\\.[a-z][a-z0-9]*?(_[a-z0-9]+)*)$": { "$ref": "#/definitions/Components" } }, @@ -135,10 +135,10 @@ "type": "object", "additionalProperties": true, "propertyNames": { - "pattern": "^_?[a-z]+?(_[a-z]+)*$" + "pattern": "^_?[a-z][a-z0-9]*?(_[a-z0-9]+)*$" }, "patternProperties": { - "^_?[a-z]+?(_[a-z]+)*$": { + "^_?[a-z][a-z0-9]*?(_[a-z0-9]+)*$": { "$ref": "#/definitions/ParamPart" } }, @@ -232,10 +232,10 @@ "type": "object", "additionalProperties": true, "propertyNames": { - "pattern": "^_?[a-z]+?(_[a-z]+)*$" + "pattern": "^_?[a-z][a-z0-9]*?(_[a-z0-9]+)*$" }, "patternProperties": { - "^_?[a-z]+?(_[a-z]+)*$": { + "^_?[a-z][a-z0-9]*?(_[a-z0-9]+)*$": { "$ref": "#/definitions/ParamPart" } }, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterInfoServiceIT.java b/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterInfoServiceIT.java index 6fd3133686b64..e138a2d0b5f6d 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterInfoServiceIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/cluster/ClusterInfoServiceIT.java @@ -27,6 +27,7 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.concurrent.TaskExecutionTimeTrackingEsThreadPoolExecutor; import org.elasticsearch.core.TimeValue; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.shard.IndexShard; @@ -59,6 +60,8 @@ import static java.util.Collections.emptySet; import static java.util.Collections.singletonList; import static java.util.Collections.unmodifiableSet; +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS; +import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.common.util.set.Sets.newHashSet; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.hamcrest.CoreMatchers.equalTo; @@ -344,8 +347,12 @@ public void testClusterInfoIncludesNodeUsageStatsForThreadPools() { var settings = Settings.builder() .put( WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(), - WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED + randomBoolean() + ? WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED + : WriteLoadConstraintSettings.WriteLoadDeciderStatus.LOW_THRESHOLD_ONLY ) + // Manually control cluster info refreshes + .put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING.getKey(), "60m") .build(); var masterName = internalCluster().startMasterOnlyNode(settings); var dataNodeName = internalCluster().startDataOnlyNode(settings); @@ -369,11 +376,8 @@ public void testClusterInfoIncludesNodeUsageStatsForThreadPools() { } ); - // Do some writes to create some write thread pool activity. - final String indexName = randomIdentifier(); - for (int i = 0; i < randomIntBetween(1, 1000); i++) { - index(indexName, Integer.toString(i), Collections.singletonMap("foo", "bar")); - } + // Generate some writes to get some non-zero write thread pool stats. + doALotOfDataNodeWrites(); // Force a refresh of the ClusterInfo state to collect fresh info from the data nodes. final InternalClusterInfoService masterClusterInfoService = asInstanceOf( @@ -387,7 +391,7 @@ public void testClusterInfoIncludesNodeUsageStatsForThreadPools() { final Map usageStatsForThreadPools = clusterInfo.getNodeUsageStatsForThreadPools(); logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools); - assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collectedg + assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collected var dataNodeId = getNodeId(dataNodeName); var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId); assertNotNull(nodeUsageStatsForThreadPool); @@ -400,4 +404,174 @@ public void testClusterInfoIncludesNodeUsageStatsForThreadPools() { assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThan(0f)); assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), greaterThanOrEqualTo(0L)); } + + /** + * The {@link TransportNodeUsageStatsForThreadPoolsAction} returns the max value of two kinds of queue latencies: + * {@link TaskExecutionTimeTrackingEsThreadPoolExecutor#getMaxQueueLatencyMillisSinceLastPollAndReset()} and + * {@link TaskExecutionTimeTrackingEsThreadPoolExecutor#peekMaxQueueLatencyInQueueMillis()}. The latter looks at currently queued tasks, + * and the former tracks the queue latency of tasks when they are taken off of the queue to start execution. + */ + public void testMaxQueueLatenciesInClusterInfo() throws Exception { + var settings = Settings.builder() + .put( + WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(), + WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED + ) + // Manually control cluster info refreshes + .put(InternalClusterInfoService.INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING.getKey(), "60m") + .build(); + var masterName = internalCluster().startMasterOnlyNode(settings); + var dataNodeName = internalCluster().startDataOnlyNode(settings); + ensureStableCluster(2); + assertEquals(internalCluster().getMasterName(), masterName); + assertNotEquals(internalCluster().getMasterName(), dataNodeName); + logger.info("---> master node: " + masterName + ", data node: " + dataNodeName); + + // Block indexing on the data node by submitting write thread pool tasks equal to the number of write threads. + var barrier = blockDataNodeIndexing(dataNodeName); + try { + // Arbitrary number of tasks, which will queue because all the write threads are occupied already, greater than one: only + // strictly need a single task to occupy the queue. + int numberOfTasks = randomIntBetween(1, 5); + Thread[] threadsToJoin = new Thread[numberOfTasks]; + String indexName = randomIdentifier(); + createIndex( + indexName, + // NB: Set 0 replicas so that there aren't any stray GlobalCheckpointSyncAction tasks on the write thread pool. + Settings.builder().put(SETTING_NUMBER_OF_SHARDS, randomIntBetween(1, 5)).put(SETTING_NUMBER_OF_REPLICAS, 0).build() + ); + for (int i = 0; i < numberOfTasks; ++i) { + threadsToJoin[i] = startParallelSingleWrite(indexName); + } + + // Reach into the data node's write thread pool to check that tasks have reached the queue. + var dataNodeThreadPool = internalCluster().getInstance(ThreadPool.class, dataNodeName); + var writeExecutor = dataNodeThreadPool.executor(ThreadPool.Names.WRITE); + assert writeExecutor instanceof TaskExecutionTimeTrackingEsThreadPoolExecutor; + var trackingWriteExecutor = (TaskExecutionTimeTrackingEsThreadPoolExecutor) writeExecutor; + assertBusy( + // Wait for the parallel threads' writes to get queued in the write thread pool. + () -> assertThat( + "Write thread pool dump: " + trackingWriteExecutor, + trackingWriteExecutor.peekMaxQueueLatencyInQueueMillis(), + greaterThan(0L) + ) + ); + + // Force a refresh of the ClusterInfo state to collect fresh info from the data node. + final InternalClusterInfoService masterClusterInfoService = asInstanceOf( + InternalClusterInfoService.class, + internalCluster().getCurrentMasterNodeInstance(ClusterInfoService.class) + ); + final ClusterInfo clusterInfo = ClusterInfoServiceUtils.refresh(masterClusterInfoService); + + // Since tasks are actively queued right now, #peekMaxQueueLatencyInQueue, which is called from the + // TransportNodeUsageStatsForThreadPoolsAction that a ClusterInfoService refresh initiates, should return a max queue + // latency > 0; + { + final Map usageStatsForThreadPools = clusterInfo.getNodeUsageStatsForThreadPools(); + logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools); + assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data node should be collected + var dataNodeId = getNodeId(dataNodeName); + var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId); + assertNotNull(nodeUsageStatsForThreadPool); + logger.info("---> Data node's thread pool stats: " + nodeUsageStatsForThreadPool); + + assertEquals(dataNodeId, nodeUsageStatsForThreadPool.nodeId()); + var writeThreadPoolStats = nodeUsageStatsForThreadPool.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE); + assertNotNull("Expected to find stats for the WRITE thread pool", writeThreadPoolStats); + assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThan(0)); + assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThanOrEqualTo(0f)); + assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), greaterThan(0L)); + } + + // Now release the data node's indexing, and drain the queued tasks. Max queue latency of executed (not queued) tasks is reset + // by each TransportNodeUsageStatsForThreadPoolsAction call (#getMaxQueueLatencyMillisSinceLastPollAndReset), so the new queue + // latencies will be present in the next call. There will be nothing in the queue to peek at now, so the result of the max + // queue latency result in TransportNodeUsageStatsForThreadPoolsAction will reflect + // #getMaxQueueLatencyMillisSinceLastPollAndReset and not #peekMaxQueueLatencyInQueue. + barrier.await(); + for (int i = 0; i < numberOfTasks; ++i) { + threadsToJoin[i].join(); + } + assertThat( + "Unexpectedly found a task queued for the write thread pool. Write thread pool dump: " + trackingWriteExecutor, + trackingWriteExecutor.peekMaxQueueLatencyInQueueMillis(), + equalTo(0L) + ); + + final ClusterInfo nextClusterInfo = ClusterInfoServiceUtils.refresh(masterClusterInfoService); + { + final Map usageStatsForThreadPools = nextClusterInfo + .getNodeUsageStatsForThreadPools(); + logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools); + assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collected + var dataNodeId = getNodeId(dataNodeName); + var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId); + assertNotNull(nodeUsageStatsForThreadPool); + logger.info("---> Data node's thread pool stats: " + nodeUsageStatsForThreadPool); + + assertEquals(dataNodeId, nodeUsageStatsForThreadPool.nodeId()); + var writeThreadPoolStats = nodeUsageStatsForThreadPool.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE); + assertNotNull("Expected to find stats for the WRITE thread pool", writeThreadPoolStats); + assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThan(0)); + assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), greaterThan(0f)); + assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), greaterThanOrEqualTo(0L)); + } + } finally { + // Ensure that the write threads have been released by signalling an interrupt on any callers waiting on the barrier. If the + // callers have already all been successfully released, then there will be nothing left to interrupt. + logger.info("---> Ensuring release of the barrier on write thread pool tasks"); + barrier.reset(); + } + + // Now that there's nothing in the queue, and no activity since the last ClusterInfo refresh, the max latency returned should be + // zero. Verify this. + final InternalClusterInfoService masterClusterInfoService = asInstanceOf( + InternalClusterInfoService.class, + internalCluster().getCurrentMasterNodeInstance(ClusterInfoService.class) + ); + final ClusterInfo clusterInfo = ClusterInfoServiceUtils.refresh(masterClusterInfoService); + { + final Map usageStatsForThreadPools = clusterInfo.getNodeUsageStatsForThreadPools(); + logger.info("---> Thread pool usage stats reported by data nodes to the master: " + usageStatsForThreadPools); + assertThat(usageStatsForThreadPools.size(), equalTo(1)); // only stats from data nodes should be collected + var dataNodeId = getNodeId(dataNodeName); + var nodeUsageStatsForThreadPool = usageStatsForThreadPools.get(dataNodeId); + assertNotNull(nodeUsageStatsForThreadPool); + logger.info("---> Data node's thread pool stats: " + nodeUsageStatsForThreadPool); + + assertEquals(dataNodeId, nodeUsageStatsForThreadPool.nodeId()); + var writeThreadPoolStats = nodeUsageStatsForThreadPool.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE); + assertNotNull("Expected to find stats for the WRITE thread pool", writeThreadPoolStats); + assertThat(writeThreadPoolStats.totalThreadPoolThreads(), greaterThan(0)); + assertThat(writeThreadPoolStats.averageThreadPoolUtilization(), equalTo(0f)); + assertThat(writeThreadPoolStats.maxThreadPoolQueueLatencyMillis(), equalTo(0L)); + } + } + + /** + * Do some writes to create some write thread pool activity. + */ + private void doALotOfDataNodeWrites() { + final String indexName = randomIdentifier(); + final int randomInt = randomIntBetween(500, 1000); + for (int i = 0; i < randomInt; i++) { + index(indexName, Integer.toString(i), Collections.singletonMap("foo", "bar")); + } + } + + /** + * Starts a single index request on a parallel thread and returns the reference so {@link Thread#join()} can be called eventually. + */ + private Thread startParallelSingleWrite(String indexName) { + Thread running = new Thread(() -> doSingleWrite(indexName)); + running.start(); + return running; + } + + private void doSingleWrite(String indexName) { + final int randomId = randomIntBetween(500, 1000); + index(indexName, Integer.toString(randomId), Collections.singletonMap("foo", "bar")); + } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/index/shard/IndexShardIT.java b/server/src/internalClusterTest/java/org/elasticsearch/index/shard/IndexShardIT.java index f752650e3f3d7..a7821b5bf1f13 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/index/shard/IndexShardIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/index/shard/IndexShardIT.java @@ -319,7 +319,9 @@ public void testNodeWriteLoadsArePresent() { Settings.builder() .put( WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(), - WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED + randomBoolean() + ? WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED + : WriteLoadConstraintSettings.WriteLoadDeciderStatus.LOW_THRESHOLD_ONLY ) .build() ); @@ -376,7 +378,9 @@ public void testShardWriteLoadsArePresent() { Settings.builder() .put( WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(), - WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED + randomBoolean() + ? WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED + : WriteLoadConstraintSettings.WriteLoadDeciderStatus.LOW_THRESHOLD_ONLY ) .build() ); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/DanglingIndicesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/DanglingIndicesIT.java index 50a5155d8ad4a..ce2141f96c418 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/DanglingIndicesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/indices/recovery/DanglingIndicesIT.java @@ -176,7 +176,6 @@ public void testMustAcceptDataLossToImportDanglingIndex() throws Exception { * other will be considered dangling, and can therefore be listed and * deleted through the API */ - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/108288") public void testDanglingIndexCanBeDeleted() throws Exception { final Settings settings = buildSettings(1, true); internalCluster().startNodes(3, settings); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java new file mode 100644 index 0000000000000..a49c624fb6edb --- /dev/null +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java @@ -0,0 +1,129 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.search.ccs; + +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.TransportSearchAction; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.plugins.ClusterPlugin; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.test.AbstractMultiClustersTestCase; +import org.elasticsearch.transport.ConnectTransportException; +import org.elasticsearch.transport.RemoteTransportException; +import org.hamcrest.Matchers; + +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ExecutionException; + +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; + +public class CpsDoesNotUseSkipUnavailableIT extends AbstractMultiClustersTestCase { + private static final String LINKED_CLUSTER_1 = "cluster-a"; + + public static class CpsPlugin extends Plugin implements ClusterPlugin { + @Override + public List> getSettings() { + return List.of(CpsEnableSetting); + } + } + + private static final Setting CpsEnableSetting = Setting.simpleString( + "serverless.cross_project.enabled", + Setting.Property.NodeScope + ); + + @Override + protected List remoteClusterAlias() { + return List.of(LINKED_CLUSTER_1); + } + + @Override + protected Collection> nodePlugins(String clusterAlias) { + return CollectionUtils.appendToCopy(super.nodePlugins(clusterAlias), CpsPlugin.class); + } + + @Override + protected Settings nodeSettings() { + return Settings.builder().put(super.nodeSettings()).put("serverless.cross_project.enabled", "true").build(); + } + + @Override + protected Map skipUnavailableForRemoteClusters() { + // Setting skip_unavailable=false results in a fatal error when the linked cluster is not available. + return Map.of(LINKED_CLUSTER_1, false); + } + + public void testCpsShouldNotUseSkipUnavailable() { + // Add some dummy data to prove we are communicating fine with the remote. + assertAcked(client(LINKED_CLUSTER_1).admin().indices().prepareCreate("test-index")); + client(LINKED_CLUSTER_1).prepareIndex("test-index").setSource("sample-field", "sample-value").get(); + client(LINKED_CLUSTER_1).admin().indices().prepareRefresh("test-index").get(); + + // Shut down the linked cluster we'd be targeting in the search. + try { + cluster(LINKED_CLUSTER_1).close(); + } catch (Exception e) { + throw new AssertionError(e); + } + + /* + * Under normal circumstances, we should get a fatal error for when skip_unavailable=false for a linked cluster + * and that cluster is targeted in a search op. However, in CPS environment, setting allow_partial_search_results=true + * should not result in a fatal error. + */ + { + var searchRequest = getSearchRequest(true); + searchRequest.setCcsMinimizeRoundtrips(randomBoolean()); + var result = safeGet(client().execute(TransportSearchAction.TYPE, searchRequest)); + + var originCluster = result.getClusters().getCluster(LOCAL_CLUSTER); + assertThat(originCluster.getStatus(), Matchers.is(SearchResponse.Cluster.Status.SUCCESSFUL)); + + var linkedCluster = result.getClusters().getCluster(LINKED_CLUSTER_1); + assertThat(linkedCluster.getStatus(), Matchers.is(SearchResponse.Cluster.Status.SKIPPED)); + + var linkedClusterFailures = result.getClusters().getCluster(LINKED_CLUSTER_1).getFailures(); + assertThat(linkedClusterFailures.size(), Matchers.is(1)); + // Failure is something along the lines of shard failure and is caused by a connection error. + assertThat( + linkedClusterFailures.getFirst().getCause(), + Matchers.anyOf(Matchers.instanceOf(RemoteTransportException.class), Matchers.instanceOf(ConnectTransportException.class)) + ); + } + + /* + * Previously, we did not get a fatal error even when skip_unavailable=false for the linked cluster. + * Now, we disable partial results and expect a fatal error. This proves that in CPS environment, + * search uses allow_partial_search_results and not skip_unavailable. + */ + { + var searchRequest = getSearchRequest(false); + searchRequest.setCcsMinimizeRoundtrips(randomBoolean()); + var ae = expectThrows(AssertionError.class, () -> safeGet(client().execute(TransportSearchAction.TYPE, searchRequest))); + assertThat(ae.getCause(), Matchers.instanceOf(ExecutionException.class)); + assertThat( + ae.getCause().getCause(), + Matchers.anyOf(Matchers.instanceOf(RemoteTransportException.class), Matchers.instanceOf(ConnectTransportException.class)) + ); + } + } + + private SearchRequest getSearchRequest(boolean allowPartialResults) { + // Include both origin and linked cluster in the search op. + var searchRequest = new SearchRequest("*", "*:*"); + searchRequest.allowPartialSearchResults(allowPartialResults); + return searchRequest; + } +} diff --git a/server/src/main/java/org/elasticsearch/TransportVersions.java b/server/src/main/java/org/elasticsearch/TransportVersions.java index baf301147c5fd..f5283510bd1c9 100644 --- a/server/src/main/java/org/elasticsearch/TransportVersions.java +++ b/server/src/main/java/org/elasticsearch/TransportVersions.java @@ -364,6 +364,7 @@ static TransportVersion def(int id) { public static final TransportVersion EXTENDED_SNAPSHOT_STATS_IN_NODE_INFO = def(9_137_0_00); public static final TransportVersion SIMULATE_INGEST_MAPPING_MERGE_TYPE = def(9_138_0_00); public static final TransportVersion ESQL_LOOKUP_JOIN_ON_MANY_FIELDS = def(9_139_0_00); + public static final TransportVersion SIMULATE_INGEST_EFFECTIVE_MAPPING = def(9_140_0_00); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/usage/TransportNodeUsageStatsForThreadPoolsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/usage/TransportNodeUsageStatsForThreadPoolsAction.java index 29bc8efbbb192..93a5c6f7dad88 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/usage/TransportNodeUsageStatsForThreadPoolsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/usage/TransportNodeUsageStatsForThreadPoolsAction.java @@ -104,7 +104,10 @@ protected NodeUsageStatsForThreadPoolsAction.NodeResponse nodeOperation( (float) trackingForWriteExecutor.pollUtilization( TaskExecutionTimeTrackingEsThreadPoolExecutor.UtilizationTrackingPurpose.ALLOCATION ), - trackingForWriteExecutor.getMaxQueueLatencyMillisSinceLastPollAndReset() + Math.max( + trackingForWriteExecutor.getMaxQueueLatencyMillisSinceLastPollAndReset(), + trackingForWriteExecutor.peekMaxQueueLatencyInQueueMillis() + ) ); Map perThreadPool = new HashMap<>(); diff --git a/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java b/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java index 338ab8f1e7b14..b52f5447b9311 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java @@ -35,7 +35,6 @@ import org.elasticsearch.common.util.concurrent.AtomicArray; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Nullable; -import org.elasticsearch.core.Tuple; import org.elasticsearch.features.FeatureService; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexSettingProvider; @@ -144,14 +143,13 @@ protected void doInternalExecute( DocWriteRequest docRequest = bulkRequest.requests.get(i); assert docRequest instanceof IndexRequest : "TransportSimulateBulkAction should only ever be called with IndexRequests"; IndexRequest request = (IndexRequest) docRequest; - Tuple, Exception> validationResult = validateMappings( + ValidationResult validationResult = validateMappings( componentTemplateSubstitutions, indexTemplateSubstitutions, mappingAddition, request, mappingMergeReason ); - Exception mappingValidationException = validationResult.v2(); responses.set( i, BulkItemResponse.success( @@ -164,8 +162,9 @@ protected void doInternalExecute( request.source(), request.getContentType(), request.getExecutedPipelines(), - validationResult.v1(), - mappingValidationException + validationResult.ignoredFields, + validationResult.validationException, + validationResult.effectiveMapping ) ) ); @@ -193,7 +192,7 @@ private MapperService.MergeReason getMergeReason(String mergeType) { * @return a Tuple containing: (1) in v1 the names of any fields that would be ignored upon indexing and (2) in v2 the mapping * exception if the source does not match the mappings, otherwise null */ - private Tuple, Exception> validateMappings( + private ValidationResult validateMappings( Map componentTemplateSubstitutions, Map indexTemplateSubstitutions, Map mappingAddition, @@ -211,6 +210,7 @@ private Tuple, Exception> validateMappings( ); ProjectMetadata project = projectResolver.getProjectMetadata(clusterService.state()); + CompressedXContent effectiveMapping = null; Exception mappingValidationException = null; Collection ignoredFields = List.of(); IndexAbstraction indexAbstraction = project.getIndicesLookup().get(request.index()); @@ -222,8 +222,8 @@ private Tuple, Exception> validateMappings( */ IndexMetadata imd = project.getIndexSafe(indexAbstraction.getWriteIndex(request, project)); CompressedXContent mappings = Optional.ofNullable(imd.mapping()).map(MappingMetadata::source).orElse(null); - CompressedXContent mergedMappings = mappingAddition == null ? null : mergeMappings(mappings, mappingAddition); - ignoredFields = validateUpdatedMappingsFromIndexMetadata(imd, mergedMappings, request, sourceToParse, mappingMergeReason); + effectiveMapping = mappingAddition == null ? null : mergeMappings(mappings, mappingAddition); + ignoredFields = validateUpdatedMappingsFromIndexMetadata(imd, effectiveMapping, request, sourceToParse, mappingMergeReason); } else { /* * The index did not exist, or we have component template substitutions, so we put together the mappings from existing @@ -281,8 +281,8 @@ private Tuple, Exception> validateMappings( indexSettingProviders ); CompressedXContent mappings = template.mappings(); - CompressedXContent mergedMappings = mergeMappings(mappings, mappingAddition); - ignoredFields = validateUpdatedMappings(mappings, mergedMappings, request, sourceToParse, mappingMergeReason); + effectiveMapping = mergeMappings(mappings, mappingAddition); + ignoredFields = validateUpdatedMappings(mappings, effectiveMapping, request, sourceToParse, mappingMergeReason); } else { List matchingTemplates = findV1Templates(simulatedProjectMetadata, request.index(), false); if (matchingTemplates.isEmpty() == false) { @@ -295,23 +295,27 @@ private Tuple, Exception> validateMappings( matchingTemplates.stream().map(IndexTemplateMetadata::getMappings).collect(toList()), xContentRegistry ); - final CompressedXContent combinedMappings = mergeMappings(new CompressedXContent(mappingsMap), mappingAddition); - ignoredFields = validateUpdatedMappings(null, combinedMappings, request, sourceToParse, mappingMergeReason); + effectiveMapping = mergeMappings(new CompressedXContent(mappingsMap), mappingAddition); + ignoredFields = validateUpdatedMappings(null, effectiveMapping, request, sourceToParse, mappingMergeReason); } else { /* * The index matched no templates and had no mapping of its own. If there were component template substitutions * or index template substitutions, they didn't match anything. So just apply the mapping addition if it exists, * and validate. */ - final CompressedXContent combinedMappings = mergeMappings(null, mappingAddition); - ignoredFields = validateUpdatedMappings(null, combinedMappings, request, sourceToParse, mappingMergeReason); + effectiveMapping = mergeMappings(null, mappingAddition); + ignoredFields = validateUpdatedMappings(null, effectiveMapping, request, sourceToParse, mappingMergeReason); } } } } catch (Exception e) { mappingValidationException = e; } - return Tuple.tuple(ignoredFields, mappingValidationException); + return new ValidationResult(effectiveMapping, mappingValidationException, ignoredFields); + } + + private record ValidationResult(CompressedXContent effectiveMapping, Exception validationException, Collection ignoredFields) { + } /* diff --git a/server/src/main/java/org/elasticsearch/action/ingest/SimulateIndexResponse.java b/server/src/main/java/org/elasticsearch/action/ingest/SimulateIndexResponse.java index 307996a4c72cb..1cad5d138f65e 100644 --- a/server/src/main/java/org/elasticsearch/action/ingest/SimulateIndexResponse.java +++ b/server/src/main/java/org/elasticsearch/action/ingest/SimulateIndexResponse.java @@ -14,6 +14,7 @@ import org.elasticsearch.action.bulk.IndexDocFailureStoreStatus; import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.xcontent.XContentHelper; @@ -26,6 +27,7 @@ import java.io.IOException; import java.util.Collection; import java.util.List; +import java.util.Map; /** * This is an IndexResponse that is specifically for simulate requests. Unlike typical IndexResponses, we need to include the original @@ -37,6 +39,7 @@ public class SimulateIndexResponse extends IndexResponse { private final XContentType sourceXContentType; private final Collection ignoredFields; private final Exception exception; + private final CompressedXContent effectiveMapping; @SuppressWarnings("this-escape") public SimulateIndexResponse(StreamInput in) throws IOException { @@ -54,6 +57,15 @@ public SimulateIndexResponse(StreamInput in) throws IOException { } else { this.ignoredFields = List.of(); } + if (in.getTransportVersion().onOrAfter(TransportVersions.SIMULATE_INGEST_EFFECTIVE_MAPPING)) { + if (in.readBoolean()) { + this.effectiveMapping = CompressedXContent.readCompressedString(in); + } else { + this.effectiveMapping = null; + } + } else { + effectiveMapping = null; + } } @SuppressWarnings("this-escape") @@ -65,7 +77,8 @@ public SimulateIndexResponse( XContentType sourceXContentType, List pipelines, Collection ignoredFields, - @Nullable Exception exception + @Nullable Exception exception, + @Nullable CompressedXContent effectiveMapping ) { // We don't actually care about most of the IndexResponse fields: super( @@ -83,6 +96,7 @@ public SimulateIndexResponse( setShardInfo(ShardInfo.EMPTY); this.ignoredFields = ignoredFields; this.exception = exception; + this.effectiveMapping = effectiveMapping; } @Override @@ -108,6 +122,14 @@ public XContentBuilder innerToXContent(XContentBuilder builder, Params params) t ElasticsearchException.generateThrowableXContent(builder, params, exception); builder.endObject(); } + if (effectiveMapping == null) { + builder.field("effective_mapping", Map.of()); + } else { + builder.field( + "effective_mapping", + XContentHelper.convertToMap(effectiveMapping.uncompressed(), true, builder.contentType()).v2() + ); + } return builder; } @@ -127,6 +149,12 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getTransportVersion().onOrAfter(TransportVersions.SIMULATE_IGNORED_FIELDS)) { out.writeStringCollection(ignoredFields); } + if (out.getTransportVersion().onOrAfter(TransportVersions.SIMULATE_INGEST_EFFECTIVE_MAPPING)) { + out.writeBoolean(effectiveMapping != null); + if (effectiveMapping != null) { + effectiveMapping.writeTo(out); + } + } } public Exception getException() { diff --git a/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java b/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java index 1f676f29e446e..7b82116e5b447 100644 --- a/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java +++ b/server/src/main/java/org/elasticsearch/action/search/SearchResponse.java @@ -508,14 +508,16 @@ public static final class Clusters implements ToXContentFragment, Writeable { * @param localIndices The localIndices to be searched - null if no local indices are to be searched * @param remoteClusterIndices mapping of clusterAlias -> OriginalIndices for each remote cluster * @param ccsMinimizeRoundtrips whether minimizing roundtrips for the CCS - * @param skipUnavailablePredicate given a cluster alias, returns true if that cluster is skip_unavailable=true - * and false otherwise + * @param skipOnFailurePredicate given a cluster alias, returns true if that cluster is marked as skippable + * and false otherwise. For a cluster to be considered as skippable, either + * we should be in CPS environment and allow_partial_results=true, or, + * skip_unavailable=true. */ public Clusters( @Nullable OriginalIndices localIndices, Map remoteClusterIndices, boolean ccsMinimizeRoundtrips, - Predicate skipUnavailablePredicate + Predicate skipOnFailurePredicate ) { assert remoteClusterIndices.size() > 0 : "At least one remote cluster must be passed into this Cluster constructor"; this.total = remoteClusterIndices.size() + (localIndices == null ? 0 : 1); @@ -531,8 +533,8 @@ public Clusters( } for (Map.Entry remote : remoteClusterIndices.entrySet()) { String clusterAlias = remote.getKey(); - boolean skipUnavailable = skipUnavailablePredicate.test(clusterAlias); - Cluster c = new Cluster(clusterAlias, String.join(",", remote.getValue().indices()), skipUnavailable); + boolean skipOnFailure = skipOnFailurePredicate.test(clusterAlias); + Cluster c = new Cluster(clusterAlias, String.join(",", remote.getValue().indices()), skipOnFailure); m.put(clusterAlias, c); } this.clusterInfo = m; diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java index bf85075781bc8..86a97405ca549 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java @@ -422,7 +422,7 @@ void executeRequest( resolvedIndices.getLocalIndices(), resolvedIndices.getRemoteClusterIndices(), true, - remoteClusterService::isSkipUnavailable + (clusterAlias) -> remoteClusterService.shouldSkipOnFailure(clusterAlias, rewritten.allowPartialSearchResults()) ); if (resolvedIndices.getLocalIndices() == null) { // Notify the progress listener that a CCS with minimize_roundtrips is happening remote-only (no local shards) @@ -458,7 +458,7 @@ void executeRequest( resolvedIndices.getLocalIndices(), resolvedIndices.getRemoteClusterIndices(), false, - remoteClusterService::isSkipUnavailable + (clusterAlias) -> remoteClusterService.shouldSkipOnFailure(clusterAlias, rewritten.allowPartialSearchResults()) ); // TODO: pass parentTaskId @@ -697,7 +697,7 @@ static void ccsRemoteReduce( // and we directly perform final reduction in the remote cluster Map.Entry entry = resolvedIndices.getRemoteClusterIndices().entrySet().iterator().next(); String clusterAlias = entry.getKey(); - boolean skipUnavailable = remoteClusterService.isSkipUnavailable(clusterAlias); + boolean shouldSkipOnFailure = remoteClusterService.shouldSkipOnFailure(clusterAlias, searchRequest.allowPartialSearchResults()); OriginalIndices indices = entry.getValue(); SearchRequest ccsSearchRequest = SearchRequest.subSearchRequest( parentTaskId, @@ -713,7 +713,7 @@ static void ccsRemoteReduce( @Override public void onResponse(SearchResponse searchResponse) { // overwrite the existing cluster entry with the updated one - ccsClusterInfoUpdate(searchResponse, clusters, clusterAlias, skipUnavailable); + ccsClusterInfoUpdate(searchResponse, clusters, clusterAlias, shouldSkipOnFailure); Map profileResults = searchResponse.getProfileResults(); SearchProfileResults profile = profileResults == null || profileResults.isEmpty() ? null @@ -744,9 +744,9 @@ public void onResponse(SearchResponse searchResponse) { @Override public void onFailure(Exception e) { ShardSearchFailure failure = new ShardSearchFailure(e); - logCCSError(failure, clusterAlias, skipUnavailable); - ccsClusterInfoUpdate(failure, clusters, clusterAlias, skipUnavailable); - if (skipUnavailable) { + logCCSError(failure, clusterAlias, shouldSkipOnFailure); + ccsClusterInfoUpdate(failure, clusters, clusterAlias, shouldSkipOnFailure); + if (shouldSkipOnFailure) { ActionListener.respondAndRelease(listener, SearchResponse.empty(timeProvider::buildTookInMillis, clusters)); } else { listener.onFailure(wrapRemoteClusterFailure(clusterAlias, e)); @@ -768,7 +768,7 @@ public void onFailure(Exception e) { remoteClusterService.maybeEnsureConnectedAndGetConnection( clusterAlias, - shouldEstablishConnection(forceConnectTimeoutSecs, skipUnavailable), + shouldEstablishConnection(forceConnectTimeoutSecs, shouldSkipOnFailure), connectionListener ); } else { @@ -785,7 +785,10 @@ public void onFailure(Exception e) { final CountDown countDown = new CountDown(totalClusters); for (Map.Entry entry : resolvedIndices.getRemoteClusterIndices().entrySet()) { String clusterAlias = entry.getKey(); - boolean skipUnavailable = remoteClusterService.isSkipUnavailable(clusterAlias); + boolean shouldSkipOnFailure = remoteClusterService.shouldSkipOnFailure( + clusterAlias, + searchRequest.allowPartialSearchResults() + ); OriginalIndices indices = entry.getValue(); SearchRequest ccsSearchRequest = SearchRequest.subSearchRequest( parentTaskId, @@ -797,7 +800,7 @@ public void onFailure(Exception e) { ); ActionListener ccsListener = createCCSListener( clusterAlias, - skipUnavailable, + shouldSkipOnFailure, countDown, exceptions, searchResponseMerger, @@ -826,7 +829,7 @@ public void onFailure(Exception e) { remoteClusterService.maybeEnsureConnectedAndGetConnection( clusterAlias, - shouldEstablishConnection(forceConnectTimeoutSecs, skipUnavailable), + shouldEstablishConnection(forceConnectTimeoutSecs, shouldSkipOnFailure), connectionListener ); } @@ -903,10 +906,10 @@ static void collectSearchShards( final AtomicReference exceptions = new AtomicReference<>(); for (Map.Entry entry : remoteIndicesByCluster.entrySet()) { final String clusterAlias = entry.getKey(); - boolean skipUnavailable = remoteClusterService.isSkipUnavailable(clusterAlias); + boolean shouldSkipOnFailure = remoteClusterService.shouldSkipOnFailure(clusterAlias, allowPartialResults); CCSActionListener> singleListener = new CCSActionListener<>( clusterAlias, - skipUnavailable, + shouldSkipOnFailure, responsesCountDown, exceptions, clusters, @@ -975,7 +978,7 @@ Map createFinalResponse() { remoteClusterService.maybeEnsureConnectedAndGetConnection( clusterAlias, - shouldEstablishConnection(forceConnectTimeoutSecs, skipUnavailable), + shouldEstablishConnection(forceConnectTimeoutSecs, shouldSkipOnFailure), connectionListener ); } @@ -986,7 +989,7 @@ Map createFinalResponse() { */ private static ActionListener createCCSListener( String clusterAlias, - boolean skipUnavailable, + boolean shouldSkipOnFailure, CountDown countDown, AtomicReference exceptions, SearchResponseMerger searchResponseMerger, @@ -996,7 +999,7 @@ private static ActionListener createCCSListener( ) { return new CCSActionListener<>( clusterAlias, - skipUnavailable, + shouldSkipOnFailure, countDown, exceptions, clusters, @@ -1004,7 +1007,7 @@ private static ActionListener createCCSListener( ) { @Override void innerOnResponse(SearchResponse searchResponse) { - ccsClusterInfoUpdate(searchResponse, clusters, clusterAlias, skipUnavailable); + ccsClusterInfoUpdate(searchResponse, clusters, clusterAlias, shouldSkipOnFailure); searchResponseMerger.add(searchResponse); progressListener.notifyClusterResponseMinimizeRoundtrips(clusterAlias, searchResponse); } @@ -1029,11 +1032,11 @@ static void ccsClusterInfoUpdate( ShardSearchFailure failure, SearchResponse.Clusters clusters, String clusterAlias, - boolean skipUnavailable + boolean shouldSkipOnFailure ) { clusters.swapCluster(clusterAlias, (k, v) -> { SearchResponse.Cluster.Status status; - if (skipUnavailable) { + if (shouldSkipOnFailure) { status = SearchResponse.Cluster.Status.SKIPPED; } else { status = SearchResponse.Cluster.Status.FAILED; @@ -1056,7 +1059,7 @@ private static void ccsClusterInfoUpdate( SearchResponse searchResponse, SearchResponse.Clusters clusters, String clusterAlias, - boolean skipUnavailable + boolean shouldSkipOnFailure ) { /* * Cluster Status logic: @@ -1070,7 +1073,7 @@ private static void ccsClusterInfoUpdate( SearchResponse.Cluster.Status status; int totalShards = searchResponse.getTotalShards(); if (totalShards > 0 && searchResponse.getFailedShards() >= totalShards) { - if (skipUnavailable) { + if (shouldSkipOnFailure) { status = SearchResponse.Cluster.Status.SKIPPED; } else { status = SearchResponse.Cluster.Status.FAILED; @@ -1762,7 +1765,7 @@ private static void failIfOverShardCountLimit(ClusterService clusterService, int */ abstract static class CCSActionListener implements ActionListener { protected final String clusterAlias; - protected final boolean skipUnavailable; + protected final boolean skipOnFailure; private final CountDown countDown; private final AtomicReference exceptions; protected final SearchResponse.Clusters clusters; @@ -1773,14 +1776,14 @@ abstract static class CCSActionListener implements Acti */ CCSActionListener( String clusterAlias, - boolean skipUnavailable, + boolean skipOnFailure, CountDown countDown, AtomicReference exceptions, SearchResponse.Clusters clusters, ActionListener originalListener ) { this.clusterAlias = clusterAlias; - this.skipUnavailable = skipUnavailable; + this.skipOnFailure = skipOnFailure; this.countDown = countDown; this.exceptions = exceptions; this.clusters = clusters; @@ -1801,9 +1804,9 @@ public final void onResponse(Response response) { @Override public final void onFailure(Exception e) { ShardSearchFailure f = new ShardSearchFailure(e); - logCCSError(f, clusterAlias, skipUnavailable); + logCCSError(f, clusterAlias, skipOnFailure); SearchResponse.Cluster cluster = clusters.getCluster(clusterAlias); - if (skipUnavailable && ExceptionsHelper.isTaskCancelledException(e) == false) { + if (skipOnFailure && ExceptionsHelper.isTaskCancelledException(e) == false) { if (cluster != null) { ccsClusterInfoUpdate(f, clusters, clusterAlias, true); } @@ -1859,9 +1862,9 @@ protected void releaseResponse(FinalResponse response) {} * causes of shard failures. * @param f ShardSearchFailure to log * @param clusterAlias cluster on which the failure occurred - * @param skipUnavailable the skip_unavailable setting of the cluster with the search error + * @param shouldSkipOnFailure the skip_unavailable setting of the cluster with the search error */ - private static void logCCSError(ShardSearchFailure f, String clusterAlias, boolean skipUnavailable) { + private static void logCCSError(ShardSearchFailure f, String clusterAlias, boolean shouldSkipOnFailure) { String errorInfo; try { errorInfo = Strings.toString(f.toXContent(XContentFactory.jsonBuilder(), ToXContent.EMPTY_PARAMS)); @@ -1872,7 +1875,7 @@ private static void logCCSError(ShardSearchFailure f, String clusterAlias, boole logger.debug( "CCS remote cluster failure. Cluster [{}]. skip_unavailable: [{}]. Error: {}", clusterAlias, - skipUnavailable, + shouldSkipOnFailure, errorInfo ); } diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java index 5633bd8b89e1e..1d3b79a0dc1af 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterModule.java @@ -66,6 +66,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.WriteLoadConstraintDecider; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.io.stream.NamedWriteable; import org.elasticsearch.common.io.stream.NamedWriteableRegistry.Entry; @@ -446,6 +447,7 @@ public static Collection createAllocationDeciders( addAllocationDecider(deciders, new SnapshotInProgressAllocationDecider()); addAllocationDecider(deciders, new RestoreInProgressAllocationDecider()); addAllocationDecider(deciders, new NodeShutdownAllocationDecider()); + addAllocationDecider(deciders, new WriteLoadConstraintDecider(clusterSettings)); addAllocationDecider(deciders, new NodeReplacementAllocationDecider()); addAllocationDecider(deciders, new FilterAllocationDecider(settings, clusterSettings)); addAllocationDecider(deciders, new SameShardAllocationDecider(clusterSettings)); diff --git a/server/src/main/java/org/elasticsearch/cluster/InternalClusterInfoService.java b/server/src/main/java/org/elasticsearch/cluster/InternalClusterInfoService.java index 6de0640f4422f..bd947c117f0e5 100644 --- a/server/src/main/java/org/elasticsearch/cluster/InternalClusterInfoService.java +++ b/server/src/main/java/org/elasticsearch/cluster/InternalClusterInfoService.java @@ -219,7 +219,7 @@ void execute() { logger.trace("starting async refresh"); try (var ignoredRefs = fetchRefs) { - maybeFetchIndicesStats(diskThresholdEnabled || writeLoadConstraintEnabled == WriteLoadDeciderStatus.ENABLED); + maybeFetchIndicesStats(diskThresholdEnabled || writeLoadConstraintEnabled.atLeastLowThresholdEnabled()); maybeFetchNodeStats(diskThresholdEnabled || estimatedHeapThresholdEnabled); maybeFetchNodesEstimatedHeapUsage(estimatedHeapThresholdEnabled); maybeFetchNodesUsageStatsForThreadPools(writeLoadConstraintEnabled); @@ -262,7 +262,7 @@ private void maybeFetchNodesEstimatedHeapUsage(boolean shouldFetch) { } private void maybeFetchNodesUsageStatsForThreadPools(WriteLoadDeciderStatus writeLoadConstraintEnabled) { - if (writeLoadConstraintEnabled != WriteLoadDeciderStatus.DISABLED) { + if (writeLoadConstraintEnabled.atLeastLowThresholdEnabled()) { try (var ignored = threadPool.getThreadContext().clearTraceContext()) { fetchNodesUsageStatsForThreadPools(); } @@ -313,7 +313,7 @@ private void fetchIndicesStats() { // This returns the shard sizes on disk indicesStatsRequest.store(true); } - if (writeLoadConstraintEnabled == WriteLoadDeciderStatus.ENABLED) { + if (writeLoadConstraintEnabled.atLeastLowThresholdEnabled()) { // This returns the shard write-loads indicesStatsRequest.indexing(true); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/ShardMovementWriteLoadSimulator.java b/server/src/main/java/org/elasticsearch/cluster/routing/ShardMovementWriteLoadSimulator.java index dda98184b5f5a..a807e3d9b6427 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/ShardMovementWriteLoadSimulator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/ShardMovementWriteLoadSimulator.java @@ -88,11 +88,32 @@ private static NodeUsageStatsForThreadPools.ThreadPoolUsageStats replaceWritePoo .get(ThreadPool.Names.WRITE); return new NodeUsageStatsForThreadPools.ThreadPoolUsageStats( writeThreadPoolStats.totalThreadPoolThreads(), - (float) Math.max( - (writeThreadPoolStats.averageThreadPoolUtilization() + (writeLoadDelta / writeThreadPoolStats.totalThreadPoolThreads())), - 0.0 + updateNodeUtilizationWithShardMovements( + writeThreadPoolStats.averageThreadPoolUtilization(), + (float) writeLoadDelta, + writeThreadPoolStats.totalThreadPoolThreads() ), writeThreadPoolStats.maxThreadPoolQueueLatencyMillis() ); } + + /** + * The {@code nodeUtilization} is the average utilization per thread for some duration of time. The {@code shardWriteLoadDelta} is the + * sum of shards' total execution time. Dividing the shards total execution time by the number of threads provides the average + * utilization of each write thread for those shards. The change in shard load can then be added to the node utilization. + * + * @param nodeUtilization The current node-level write load percent utilization. + * @param shardWriteLoadDelta The change in shard(s) execution time across all threads. This can be positive or negative depending on + * whether shards were moved onto the node or off of the node. + * @param numberOfWriteThreads The number of threads available in the node's write thread pool. + * @return The new node-level write load percent utilization after adding the shard write load delta. + */ + public static float updateNodeUtilizationWithShardMovements( + float nodeUtilization, + float shardWriteLoadDelta, + int numberOfWriteThreads + ) { + float newNodeUtilization = nodeUtilization + (shardWriteLoadDelta / numberOfWriteThreads); + return (float) Math.max(newNodeUtilization, 0.0); + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintSettings.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintSettings.java index 23e1cb563f9fd..3ee0702b13192 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintSettings.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/WriteLoadConstraintSettings.java @@ -30,13 +30,30 @@ public enum WriteLoadDeciderStatus { */ DISABLED, /** - * Only the low-threshold is enabled (write-load will not trigger rebalance) + * Only the low write low threshold, to try to avoid allocating to a node exceeding + * {@link #WRITE_LOAD_DECIDER_HIGH_UTILIZATION_THRESHOLD_SETTING}. Write-load hot-spot will not trigger rebalancing. */ - LOW_ONLY, + LOW_THRESHOLD_ONLY, /** - * The decider is enabled + * All write load decider development work is turned on. */ - ENABLED + ENABLED; + + public boolean fullyEnabled() { + return this == ENABLED; + } + + public boolean notFullyEnabled() { + return this != ENABLED; + } + + public boolean atLeastLowThresholdEnabled() { + return this != DISABLED; + } + + public boolean disabled() { + return this == DISABLED; + } } public static final Setting WRITE_LOAD_DECIDER_ENABLED_SETTING = Setting.enumSetting( @@ -102,10 +119,16 @@ public enum WriteLoadDeciderStatus { WriteLoadDeciderStatus writeLoadDeciderStatus; TimeValue writeLoadDeciderRerouteIntervalSetting; + double writeThreadPoolHighUtilizationThresholdSetting; - WriteLoadConstraintSettings(ClusterSettings clusterSettings) { + public WriteLoadConstraintSettings(ClusterSettings clusterSettings) { clusterSettings.initializeAndWatch(WRITE_LOAD_DECIDER_ENABLED_SETTING, this::setWriteLoadConstraintEnabled); clusterSettings.initializeAndWatch(WRITE_LOAD_DECIDER_REROUTE_INTERVAL_SETTING, this::setWriteLoadDeciderRerouteIntervalSetting); + clusterSettings.initializeAndWatch( + WRITE_LOAD_DECIDER_HIGH_UTILIZATION_THRESHOLD_SETTING, + this::setWriteThreadPoolHighUtilizationThresholdSetting + ); + }; private void setWriteLoadConstraintEnabled(WriteLoadDeciderStatus status) { @@ -120,7 +143,15 @@ public TimeValue getWriteLoadDeciderRerouteIntervalSetting() { return this.writeLoadDeciderRerouteIntervalSetting; } + public double getWriteThreadPoolHighUtilizationThresholdSetting() { + return this.writeThreadPoolHighUtilizationThresholdSetting; + } + private void setWriteLoadDeciderRerouteIntervalSetting(TimeValue timeValue) { this.writeLoadDeciderRerouteIntervalSetting = timeValue; } + + private void setWriteThreadPoolHighUtilizationThresholdSetting(RatioValue percent) { + this.writeThreadPoolHighUtilizationThresholdSetting = percent.getAsRatio(); + } } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceMetrics.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceMetrics.java index 5771c27c5d5ab..158b4941781f4 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceMetrics.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceMetrics.java @@ -10,6 +10,7 @@ package org.elasticsearch.cluster.routing.allocation.allocator; import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.NodeAllocationStatsAndWeightsCalculator.NodeAllocationStatsAndWeight; import org.elasticsearch.cluster.routing.allocation.decider.AllocationDeciders; import org.elasticsearch.telemetry.metric.DoubleWithAttributes; @@ -20,6 +21,7 @@ import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; +import java.util.function.ToLongFunction; /** * Maintains balancer metrics and makes them accessible to the {@link MeterRegistry} and APM reporting. Metrics are updated @@ -31,12 +33,63 @@ public class DesiredBalanceMetrics { /** * @param unassignedShards Shards that are not assigned to any node. + * @param allocationStatsByRole A breakdown of the allocations stats by {@link ShardRouting.Role} + */ + public record AllocationStats(long unassignedShards, Map allocationStatsByRole) { + + public AllocationStats(long unassignedShards, long totalAllocations, long undesiredAllocationsExcludingShuttingDownNodes) { + this( + unassignedShards, + Map.of(ShardRouting.Role.DEFAULT, new RoleAllocationStats(totalAllocations, undesiredAllocationsExcludingShuttingDownNodes)) + ); + } + + public long totalAllocations() { + return allocationStatsByRole.values().stream().mapToLong(RoleAllocationStats::totalAllocations).sum(); + } + + public long undesiredAllocationsExcludingShuttingDownNodes() { + return allocationStatsByRole.values() + .stream() + .mapToLong(RoleAllocationStats::undesiredAllocationsExcludingShuttingDownNodes) + .sum(); + } + + /** + * Return the ratio of undesired allocations to the total number of allocations. + * + * @return a value in [0.0, 1.0] + */ + public double undesiredAllocationsRatio() { + final long totalAllocations = totalAllocations(); + if (totalAllocations == 0) { + return 0; + } + return undesiredAllocationsExcludingShuttingDownNodes() / (double) totalAllocations; + } + } + + /** * @param totalAllocations Shards that are assigned to a node. * @param undesiredAllocationsExcludingShuttingDownNodes Shards that are assigned to a node but must move to alleviate a resource * constraint per the {@link AllocationDeciders}. Excludes shards that must move * because of a node shutting down. */ - public record AllocationStats(long unassignedShards, long totalAllocations, long undesiredAllocationsExcludingShuttingDownNodes) {} + public record RoleAllocationStats(long totalAllocations, long undesiredAllocationsExcludingShuttingDownNodes) { + public static final RoleAllocationStats EMPTY = new RoleAllocationStats(0L, 0L); + + /** + * Return the ratio of undesired allocations to the total number of allocations. + * + * @return a value in [0.0, 1.0] + */ + public double undesiredAllocationsRatio() { + if (totalAllocations == 0) { + return 0.0; + } + return undesiredAllocationsExcludingShuttingDownNodes / (double) totalAllocations; + } + } public record NodeWeightStats(long shardCount, double diskUsageInBytes, double writeLoad, double nodeWeight) { public static final NodeWeightStats ZERO = new NodeWeightStats(0, 0, 0, 0); @@ -47,7 +100,7 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w public static final String UNASSIGNED_SHARDS_METRIC_NAME = "es.allocator.desired_balance.shards.unassigned.current"; /** See {@link #totalAllocations} */ public static final String TOTAL_SHARDS_METRIC_NAME = "es.allocator.desired_balance.shards.current"; - /** See {@link #undesiredAllocationsExcludingShuttingDownNodes} */ + /** See {@link #undesiredAllocations} */ public static final String UNDESIRED_ALLOCATION_COUNT_METRIC_NAME = "es.allocator.desired_balance.allocations.undesired.current"; /** {@link #UNDESIRED_ALLOCATION_COUNT_METRIC_NAME} / {@link #TOTAL_SHARDS_METRIC_NAME} */ public static final String UNDESIRED_ALLOCATION_RATIO_METRIC_NAME = "es.allocator.desired_balance.allocations.undesired.ratio"; @@ -71,25 +124,14 @@ public record NodeWeightStats(long shardCount, double diskUsageInBytes, double w public static final String CURRENT_NODE_FORECASTED_DISK_USAGE_METRIC_NAME = "es.allocator.allocations.node.forecasted_disk_usage_bytes.current"; - public static final AllocationStats EMPTY_ALLOCATION_STATS = new AllocationStats(-1, -1, -1); + public static final AllocationStats EMPTY_ALLOCATION_STATS = new AllocationStats(0, Map.of()); private volatile boolean nodeIsMaster = false; /** - * Number of unassigned shards during last reconciliation - */ - private volatile long unassignedShards; - - /** - * Total number of assigned shards during last reconciliation + * The stats from the most recent reconciliation */ - private volatile long totalAllocations; - - /** - * Number of assigned shards during last reconciliation that are not allocated on a desired node and need to be moved. - * This excludes shards that must be reassigned due to a shutting down node. - */ - private volatile long undesiredAllocationsExcludingShuttingDownNodes; + private volatile AllocationStats lastReconciliationAllocationStats = EMPTY_ALLOCATION_STATS; private final AtomicReference> weightStatsPerNodeRef = new AtomicReference<>(Map.of()); private final AtomicReference> allocationStatsPerNodeRef = new AtomicReference<>( @@ -104,9 +146,7 @@ public void updateMetrics( assert allocationStats != null : "allocation stats cannot be null"; assert weightStatsPerNode != null : "node balance weight stats cannot be null"; if (allocationStats != EMPTY_ALLOCATION_STATS) { - this.unassignedShards = allocationStats.unassignedShards; - this.totalAllocations = allocationStats.totalAllocations; - this.undesiredAllocationsExcludingShuttingDownNodes = allocationStats.undesiredAllocationsExcludingShuttingDownNodes; + this.lastReconciliationAllocationStats = allocationStats; } weightStatsPerNodeRef.set(weightStatsPerNode); allocationStatsPerNodeRef.set(nodeAllocationStats); @@ -205,19 +245,23 @@ public void setNodeIsMaster(boolean nodeIsMaster) { } public long unassignedShards() { - return unassignedShards; + return lastReconciliationAllocationStats.unassignedShards(); } public long totalAllocations() { - return totalAllocations; + return lastReconciliationAllocationStats.totalAllocations(); } public long undesiredAllocations() { - return undesiredAllocationsExcludingShuttingDownNodes; + return lastReconciliationAllocationStats.undesiredAllocationsExcludingShuttingDownNodes(); + } + + public AllocationStats allocationStats() { + return lastReconciliationAllocationStats; } private List getUnassignedShardsMetrics() { - return getIfPublishing(unassignedShards); + return getIfPublishing(AllocationStats::unassignedShards); } private List getDesiredBalanceNodeWeightMetrics() { @@ -346,25 +390,25 @@ private Map getNodeAttributes(DiscoveryNode node) { } private List getTotalAllocationsMetrics() { - return getIfPublishing(totalAllocations); + return getIfPublishing(AllocationStats::totalAllocations); } private List getUndesiredAllocationsExcludingShuttingDownNodesMetrics() { - return getIfPublishing(undesiredAllocationsExcludingShuttingDownNodes); + return getIfPublishing(AllocationStats::undesiredAllocationsExcludingShuttingDownNodes); } - private List getIfPublishing(long value) { - if (nodeIsMaster) { - return List.of(new LongWithAttributes(value)); + private List getIfPublishing(ToLongFunction value) { + var currentStats = lastReconciliationAllocationStats; + if (nodeIsMaster && currentStats != EMPTY_ALLOCATION_STATS) { + return List.of(new LongWithAttributes(value.applyAsLong(currentStats))); } return List.of(); } private List getUndesiredAllocationsRatioMetrics() { - if (nodeIsMaster) { - var total = totalAllocations; - var undesired = undesiredAllocationsExcludingShuttingDownNodes; - return List.of(new DoubleWithAttributes(total != 0 ? (double) undesired / total : 0.0)); + var currentStats = lastReconciliationAllocationStats; + if (nodeIsMaster && currentStats != EMPTY_ALLOCATION_STATS) { + return List.of(new DoubleWithAttributes(currentStats.undesiredAllocationsRatio())); } return List.of(); } @@ -374,9 +418,7 @@ private List getUndesiredAllocationsRatioMetrics() { * This is best-effort because it is possible for {@link #updateMetrics} to race with this method. */ public void zeroAllMetrics() { - unassignedShards = 0; - totalAllocations = 0; - undesiredAllocationsExcludingShuttingDownNodes = 0; + lastReconciliationAllocationStats = EMPTY_ALLOCATION_STATS; weightStatsPerNodeRef.set(Map.of()); allocationStatsPerNodeRef.set(Map.of()); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java index 8e69d72777f04..c55e2a23ab8fa 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceReconciler.java @@ -9,6 +9,9 @@ package org.elasticsearch.cluster.routing.allocation.allocator; +import com.carrotsearch.hppc.ObjectLongHashMap; +import com.carrotsearch.hppc.ObjectLongMap; + import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.lucene.util.ArrayUtil; @@ -40,6 +43,7 @@ import java.util.function.BiFunction; import java.util.stream.Collectors; import java.util.stream.IntStream; +import java.util.stream.StreamSupport; import static org.elasticsearch.cluster.metadata.SingleNodeShutdownMetadata.Type.REPLACE; import static org.elasticsearch.cluster.routing.ExpectedShardSizeEstimator.getExpectedShardSize; @@ -525,6 +529,8 @@ private DesiredBalanceMetrics.AllocationStats balance() { int unassignedShards = routingNodes.unassigned().size() + routingNodes.unassigned().ignored().size(); int totalAllocations = 0; int undesiredAllocationsExcludingShuttingDownNodes = 0; + final ObjectLongMap totalAllocationsByRole = new ObjectLongHashMap<>(); + final ObjectLongMap undesiredAllocationsExcludingShuttingDownNodesByRole = new ObjectLongHashMap<>(); // Iterate over all started shards and try to move any which are on undesired nodes. In the presence of throttling shard // movements, the goal of this iteration order is to achieve a fairer movement of shards from the nodes that are offloading the @@ -533,6 +539,7 @@ private DesiredBalanceMetrics.AllocationStats balance() { final var shardRouting = iterator.next(); totalAllocations++; + totalAllocationsByRole.addTo(shardRouting.role(), 1); if (shardRouting.started() == false) { // can only rebalance started shards @@ -553,6 +560,7 @@ private DesiredBalanceMetrics.AllocationStats balance() { if (allocation.metadata().nodeShutdowns().contains(shardRouting.currentNodeId()) == false) { // shard is not on a shutting down node, nor is it on a desired node per the previous check. undesiredAllocationsExcludingShuttingDownNodes++; + undesiredAllocationsExcludingShuttingDownNodesByRole.addTo(shardRouting.role(), 1); } if (allocation.deciders().canRebalance(allocation).type() != Decision.Type.YES) { @@ -594,8 +602,16 @@ private DesiredBalanceMetrics.AllocationStats balance() { maybeLogUndesiredAllocationsWarning(totalAllocations, undesiredAllocationsExcludingShuttingDownNodes, routingNodes.size()); return new DesiredBalanceMetrics.AllocationStats( unassignedShards, - totalAllocations, - undesiredAllocationsExcludingShuttingDownNodes + StreamSupport.stream(totalAllocationsByRole.spliterator(), false) + .collect( + Collectors.toUnmodifiableMap( + lc -> lc.key, + lc -> new DesiredBalanceMetrics.RoleAllocationStats( + totalAllocationsByRole.get(lc.key), + undesiredAllocationsExcludingShuttingDownNodesByRole.get(lc.key) + ) + ) + ) ); } diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java index 515da761d8696..e0b927a84519c 100644 --- a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/allocator/DesiredBalanceShardsAllocator.java @@ -438,6 +438,10 @@ public DesiredBalanceStats getStats() { ); } + public DesiredBalanceMetrics.AllocationStats getAllocationStats() { + return desiredBalanceMetrics.allocationStats(); + } + private void onNoLongerMaster() { if (indexGenerator.getAndSet(-1) != -1) { currentDesiredBalanceRef.set(DesiredBalance.NOT_MASTER); diff --git a/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java new file mode 100644 index 0000000000000..ef24760f02a6b --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDecider.java @@ -0,0 +1,124 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.cluster.NodeUsageStatsForThreadPools.ThreadPoolUsageStats; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.ShardMovementWriteLoadSimulator; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings; +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.core.Strings; +import org.elasticsearch.threadpool.ThreadPool; + +/** + * Decides whether shards can be allocated to cluster nodes, or can remain on cluster nodes, based on the target node's current write thread + * pool usage stats and any candidate shard's write load estimate. + */ +public class WriteLoadConstraintDecider extends AllocationDecider { + private static final Logger logger = LogManager.getLogger(WriteLoadConstraintDecider.class); + + public static final String NAME = "write_load"; + + private final WriteLoadConstraintSettings writeLoadConstraintSettings; + + public WriteLoadConstraintDecider(ClusterSettings clusterSettings) { + this.writeLoadConstraintSettings = new WriteLoadConstraintSettings(clusterSettings); + } + + @Override + public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + if (writeLoadConstraintSettings.getWriteLoadConstraintEnabled().disabled()) { + return Decision.single(Decision.Type.YES, NAME, "Decider is disabled"); + } + + // Check whether the shard being relocated has any write load estimate. If it does not, then this decider has no opinion. + var allShardWriteLoads = allocation.clusterInfo().getShardWriteLoads(); + var shardWriteLoad = allShardWriteLoads.get(shardRouting.shardId()); + if (shardWriteLoad == null || shardWriteLoad == 0) { + return Decision.single(Decision.Type.YES, NAME, "Shard has no estimated write load. Decider takes no action."); + } + + var allNodeUsageStats = allocation.clusterInfo().getNodeUsageStatsForThreadPools(); + var nodeUsageStatsForThreadPools = allNodeUsageStats.get(node.nodeId()); + if (nodeUsageStatsForThreadPools == null) { + // No node-level thread pool usage stats were reported for this node. Let's assume this is OK and that the simulator will handle + // setting a node-level write load for this node after this shard is assigned. + return Decision.single(Decision.Type.YES, NAME, "The node has no write load estimate. Decider takes no action."); + } + + assert nodeUsageStatsForThreadPools.threadPoolUsageStatsMap().isEmpty() == false; + assert nodeUsageStatsForThreadPools.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE) != null; + var nodeWriteThreadPoolStats = nodeUsageStatsForThreadPools.threadPoolUsageStatsMap().get(ThreadPool.Names.WRITE); + var nodeWriteThreadPoolLoadThreshold = writeLoadConstraintSettings.getWriteThreadPoolHighUtilizationThresholdSetting(); + if (nodeWriteThreadPoolStats.averageThreadPoolUtilization() >= nodeWriteThreadPoolLoadThreshold) { + // The node's write thread pool usage stats already show high utilization above the threshold for accepting new shards. + String explain = Strings.format( + "Node [%s] with write thread pool utilization [%.2f] already exceeds the high utilization threshold of [%f]. Cannot " + + "allocate shard [%s] to node without risking increased write latencies.", + node.nodeId(), + nodeWriteThreadPoolStats.averageThreadPoolUtilization(), + nodeWriteThreadPoolLoadThreshold, + shardRouting.shardId() + ); + logger.debug(explain); + return Decision.single(Decision.Type.NO, NAME, explain); + } + + if (calculateShardMovementChange(nodeWriteThreadPoolStats, shardWriteLoad) >= nodeWriteThreadPoolLoadThreshold) { + // The node's write thread pool usage would be raised above the high utilization threshold with assignment of the new shard. + // This could lead to a hot spot on this node and is undesirable. + String explain = Strings.format( + "The high utilization threshold of [%f] would be exceeded on node [%s] with utilization [%.2f] if shard [%s] with " + + "estimated additional utilisation [%.5f] (write load [%.5f] / threads [%d]) were assigned to it. Cannot allocate " + + "shard to node without risking increased write latencies.", + nodeWriteThreadPoolLoadThreshold, + node.nodeId(), + nodeWriteThreadPoolStats.averageThreadPoolUtilization(), + shardRouting.shardId(), + shardWriteLoad / nodeWriteThreadPoolStats.totalThreadPoolThreads(), + shardWriteLoad, + nodeWriteThreadPoolStats.totalThreadPoolThreads() + ); + logger.debug(explain); + return Decision.single(Decision.Type.NO, NAME, explain); + } + + return Decision.YES; + } + + @Override + public Decision canRemain(IndexMetadata indexMetadata, ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { + if (writeLoadConstraintSettings.getWriteLoadConstraintEnabled().notFullyEnabled()) { + return Decision.single(Decision.Type.YES, NAME, "canRemain() is not enabled"); + } + + // TODO: implement + + return Decision.single(Decision.Type.YES, NAME, "canRemain() is not yet implemented"); + } + + /** + * Calculates the change to the node's write thread pool utilization percentage if the shard is added to the node. + * Returns the percent thread pool utilization change. + */ + private float calculateShardMovementChange(ThreadPoolUsageStats nodeWriteThreadPoolStats, double shardWriteLoad) { + assert shardWriteLoad > 0; + return ShardMovementWriteLoadSimulator.updateNodeUtilizationWithShardMovements( + nodeWriteThreadPoolStats.averageThreadPoolUtilization(), + (float) shardWriteLoad, + nodeWriteThreadPoolStats.totalThreadPoolThreads() + ); + } +} diff --git a/server/src/main/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutor.java b/server/src/main/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutor.java index 20f480af172bd..e37f411789fd0 100644 --- a/server/src/main/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutor.java +++ b/server/src/main/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutor.java @@ -151,7 +151,8 @@ public int getCurrentQueueSize() { * Returns the max queue latency seen since the last time that this method was called. Every call will reset the max seen back to zero. * Latencies are only observed as tasks are taken off of the queue. This means that tasks in the queue will not contribute to the max * latency until they are unqueued and handed to a thread to execute. To see the latency of tasks still in the queue, use - * {@link #peekMaxQueueLatencyInQueue}. If there have been no tasks in the queue since the last call, then zero latency is returned. + * {@link #peekMaxQueueLatencyInQueueMillis}. If there have been no tasks in the queue since the last call, then zero latency is + * returned. */ public long getMaxQueueLatencyMillisSinceLastPollAndReset() { if (trackMaxQueueLatency == false) { @@ -164,23 +165,29 @@ public long getMaxQueueLatencyMillisSinceLastPollAndReset() { * Returns the queue latency of the next task to be executed that is still in the task queue. Essentially peeks at the front of the * queue and calculates how long it has been there. Returns zero if there is no queue. */ - public long peekMaxQueueLatencyInQueue() { + public long peekMaxQueueLatencyInQueueMillis() { if (trackMaxQueueLatency == false) { return 0; } + var queue = getQueue(); - if (queue.isEmpty()) { + assert queue instanceof LinkedTransferQueue || queue instanceof SizeBlockingQueue + : "Not the type of queue expected: " + queue.getClass(); + var linkedTransferOrSizeBlockingQueue = queue instanceof LinkedTransferQueue + ? (LinkedTransferQueue) queue + : (SizeBlockingQueue) queue; + + var task = linkedTransferOrSizeBlockingQueue.peek(); + if (task == null) { + // There's nothing in the queue right now. return 0; } - assert queue instanceof LinkedTransferQueue : "Not the type of queue expected: " + queue.getClass(); - var linkedTransferQueue = (LinkedTransferQueue) queue; - var task = linkedTransferQueue.peek(); assert task instanceof WrappedRunnable : "Not the type of task expected: " + task.getClass(); var wrappedTask = ((WrappedRunnable) task).unwrap(); assert wrappedTask instanceof TimedRunnable : "Not the type of task expected: " + task.getClass(); var timedTask = (TimedRunnable) wrappedTask; - return timedTask.getTimeSinceCreationNanos(); + return TimeUnit.NANOSECONDS.toMillis(timedTask.getTimeSinceCreationNanos()); } /** diff --git a/server/src/main/java/org/elasticsearch/index/IndexVersions.java b/server/src/main/java/org/elasticsearch/index/IndexVersions.java index 2e464afa72b76..57fcc2bc763be 100644 --- a/server/src/main/java/org/elasticsearch/index/IndexVersions.java +++ b/server/src/main/java/org/elasticsearch/index/IndexVersions.java @@ -180,6 +180,7 @@ private static Version parseUnchecked(String version) { public static final IndexVersion SPARSE_VECTOR_PRUNING_INDEX_OPTIONS_SUPPORT = def(9_031_0_00, Version.LUCENE_10_2_2); public static final IndexVersion DEFAULT_DENSE_VECTOR_TO_BBQ_HNSW = def(9_032_0_00, Version.LUCENE_10_2_2); public static final IndexVersion MATCH_ONLY_TEXT_STORED_AS_BYTES = def(9_033_0_00, Version.LUCENE_10_2_2); + public static final IndexVersion IGNORED_SOURCE_FIELDS_PER_ENTRY_WITH_FF = def(9_034_0_00, Version.LUCENE_10_2_2); /* * STOP! READ THIS FIRST! No, really, diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/BulkNumericDocValues.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/BulkNumericDocValues.java deleted file mode 100644 index 9ac2e636810b5..0000000000000 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/BulkNumericDocValues.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one - * or more contributor license agreements. Licensed under the "Elastic License - * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side - * Public License v 1"; you may not use this file except in compliance with, at - * your election, the "Elastic License 2.0", the "GNU Affero General Public - * License v3.0 only", or the "Server Side Public License, v 1". - */ - -package org.elasticsearch.index.codec.tsdb.es819; - -import org.apache.lucene.index.NumericDocValues; -import org.elasticsearch.index.mapper.BlockLoader; - -import java.io.IOException; - -/** - * An es819 doc values specialization that allows bulk loading of values that is optimized in the context of compute engine. - */ -public abstract class BulkNumericDocValues extends NumericDocValues { - - /** - * Reads the values of all documents in {@code docs}. - */ - public abstract BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException; - -} diff --git a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java index 163e4c729bb95..ff875b4ef1c8a 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java +++ b/server/src/main/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesProducer.java @@ -53,6 +53,7 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { final IntObjectHashMap numerics; + private int primarySortFieldNumber = -1; final IntObjectHashMap binaries; final IntObjectHashMap sorted; final IntObjectHashMap sortedSets; @@ -91,7 +92,14 @@ final class ES819TSDBDocValuesProducer extends DocValuesProducer { ); readFields(in, state.fieldInfos); - + final var indexSort = state.segmentInfo.getIndexSort(); + if (indexSort != null && indexSort.getSort().length > 0) { + var primarySortField = indexSort.getSort()[0]; + var sortField = state.fieldInfos.fieldInfo(primarySortField.getField()); + if (sortField != null) { + primarySortFieldNumber = sortField.number; + } + } } catch (Throwable exception) { priorE = exception; } finally { @@ -333,10 +341,10 @@ public boolean advanceExact(int target) throws IOException { @Override public SortedDocValues getSorted(FieldInfo field) throws IOException { SortedEntry entry = sorted.get(field.number); - return getSorted(entry); + return getSorted(entry, field.number == primarySortFieldNumber); } - private SortedDocValues getSorted(SortedEntry entry) throws IOException { + private SortedDocValues getSorted(SortedEntry entry, boolean valuesSorted) throws IOException { final NumericDocValues ords = getNumeric(entry.ordsEntry, entry.termsDictEntry.termsDictSize); return new BaseSortedDocValues(entry) { @@ -369,10 +377,29 @@ public int advance(int target) throws IOException { public long cost() { return ords.cost(); } + + @Override + public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { + if (valuesSorted && ords instanceof BaseDenseNumericValues denseOrds) { + int firstDoc = docs.get(offset); + denseOrds.advanceExact(firstDoc); + long startValue = denseOrds.longValue(); + final int docCount = docs.count(); + int lastDoc = docs.get(docCount - 1); + long lastValue = denseOrds.lookAheadValueAt(lastDoc); + if (lastValue == startValue) { + BytesRef b = lookupOrd(Math.toIntExact(startValue)); + return factory.constantBytes(BytesRef.deepCopyOf(b), docCount - offset); + } + // TODO: Since ordinals are sorted, start at 0 (offset by startValue), scan until lastValue, + // then fill remaining positions with lastValue. + } + return null; + } }; } - abstract class BaseSortedDocValues extends SortedDocValues { + abstract class BaseSortedDocValues extends SortedDocValues implements BlockLoader.OptionalColumnAtATimeReader { final SortedEntry entry; final TermsEnum termsEnum; @@ -406,6 +433,15 @@ public int lookupTerm(BytesRef key) throws IOException { public TermsEnum termsEnum() throws IOException { return new TermsDict(entry.termsDictEntry, data, merging); } + + @Override + public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { + return null; + } + } + + abstract static class BaseDenseNumericValues extends NumericDocValues implements BlockLoader.OptionalColumnAtATimeReader { + abstract long lookAheadValueAt(int targetDoc) throws IOException; } abstract static class BaseSortedSetDocValues extends SortedSetDocValues { @@ -695,7 +731,7 @@ public SortedNumericDocValues getSortedNumeric(FieldInfo field) throws IOExcepti public SortedSetDocValues getSortedSet(FieldInfo field) throws IOException { SortedSetEntry entry = sortedSets.get(field.number); if (entry.singleValueEntry != null) { - return DocValues.singleton(getSorted(entry.singleValueEntry)); + return DocValues.singleton(getSorted(entry.singleValueEntry, field.number == primarySortFieldNumber)); } SortedNumericEntry ordsEntry = entry.ordsEntry; @@ -1047,7 +1083,7 @@ private NumericDocValues getNumeric(NumericEntry entry, long maxOrd) throws IOEx // Special case for maxOrd 1, no need to read blocks and use ordinal 0 as only value if (entry.docsWithFieldOffset == -1) { // Special case when all docs have a value - return new NumericDocValues() { + return new BaseDenseNumericValues() { private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc; private int doc = -1; @@ -1086,6 +1122,17 @@ public boolean advanceExact(int target) { public long cost() { return maxDoc; } + + @Override + long lookAheadValueAt(int targetDoc) throws IOException { + return 0L; // Only one ordinal! + } + + @Override + public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) + throws IOException { + return null; + } }; } else { final IndexedDISI disi = new IndexedDISI( @@ -1141,13 +1188,17 @@ public long longValue() { final int bitsPerOrd = maxOrd >= 0 ? PackedInts.bitsRequired(maxOrd - 1) : -1; if (entry.docsWithFieldOffset == -1) { // dense - return new BulkNumericDocValues() { + return new BaseDenseNumericValues() { private final int maxDoc = ES819TSDBDocValuesProducer.this.maxDoc; private int doc = -1; private final TSDBDocValuesEncoder decoder = new TSDBDocValuesEncoder(ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE); private long currentBlockIndex = -1; private final long[] currentBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; + // lookahead block + private long lookaheadBlockIndex = -1; + private long[] lookaheadBlock; + private IndexInput lookaheadData = null; @Override public int docID() { @@ -1183,24 +1234,28 @@ public long longValue() throws IOException { final int index = doc; final int blockIndex = index >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; final int blockInIndex = index & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK; - if (blockIndex != currentBlockIndex) { - assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex; - // no need to seek if the loading block is the next block - if (currentBlockIndex + 1 != blockIndex) { - valuesData.seek(indexReader.get(blockIndex)); - } - currentBlockIndex = blockIndex; - if (maxOrd >= 0) { - decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); - } else { - decoder.decode(valuesData, currentBlock); - } + if (blockIndex == currentBlockIndex) { + return currentBlock[blockInIndex]; + } + if (blockIndex == lookaheadBlockIndex) { + return lookaheadBlock[blockInIndex]; + } + assert blockIndex > currentBlockIndex : blockIndex + " < " + currentBlockIndex; + // no need to seek if the loading block is the next block + if (currentBlockIndex + 1 != blockIndex) { + valuesData.seek(indexReader.get(blockIndex)); + } + currentBlockIndex = blockIndex; + if (maxOrd >= 0) { + decoder.decodeOrdinals(valuesData, currentBlock, bitsPerOrd); + } else { + decoder.decode(valuesData, currentBlock); } return currentBlock[blockInIndex]; } @Override - public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { + public BlockLoader.Block tryRead(BlockLoader.BlockFactory factory, BlockLoader.Docs docs, int offset) throws IOException { assert maxOrd == -1 : "unexpected maxOrd[" + maxOrd + "]"; final int docsCount = docs.count(); doc = docs.get(docsCount - 1); @@ -1238,6 +1293,32 @@ public BlockLoader.Block read(BlockLoader.BlockFactory factory, BlockLoader.Docs } } + @Override + long lookAheadValueAt(int targetDoc) throws IOException { + final int blockIndex = targetDoc >>> ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SHIFT; + final int valueIndex = targetDoc & ES819TSDBDocValuesFormat.NUMERIC_BLOCK_MASK; + if (blockIndex == currentBlockIndex) { + return currentBlock[valueIndex]; + } + // load data to the lookahead block + if (lookaheadBlockIndex != blockIndex) { + if (lookaheadBlock == null) { + lookaheadBlock = new long[ES819TSDBDocValuesFormat.NUMERIC_BLOCK_SIZE]; + lookaheadData = data.slice("look_ahead_values", entry.valuesOffset, entry.valuesLength); + } + if (lookaheadBlockIndex + 1 != blockIndex) { + lookaheadData.seek(indexReader.get(blockIndex)); + } + if (maxOrd == -1L) { + decoder.decode(lookaheadData, lookaheadBlock); + } else { + decoder.decodeOrdinals(lookaheadData, lookaheadBlock, bitsPerOrd); + } + lookaheadBlockIndex = blockIndex; + } + return lookaheadBlock[valueIndex]; + } + static boolean isDense(int firstDocId, int lastDocId, int length) { // This does not detect duplicate docids (e.g [1, 1, 2, 4] would be detected as dense), // this can happen with enrich or lookup. However this codec isn't used for enrich / lookup. diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/BQSpaceUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/BQSpaceUtils.java index 06c96e5a2c176..bb26357cb6990 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/BQSpaceUtils.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/BQSpaceUtils.java @@ -19,6 +19,8 @@ */ package org.elasticsearch.index.codec.vectors; +import org.elasticsearch.simdvec.ESVectorUtil; + /** Utility class for quantization calculations */ public class BQSpaceUtils { @@ -117,48 +119,7 @@ public static void transposeHalfByteLegacy(byte[] q, byte[] quantQueryByte) { * @param quantQueryByte the byte array to store the transposed query vector * */ public static void transposeHalfByte(int[] q, byte[] quantQueryByte) { - int limit = q.length - 7; - int i = 0; - int index = 0; - for (; i < limit; i += 8, index++) { - assert q[i] >= 0 && q[i] <= 15; - assert q[i + 1] >= 0 && q[i + 1] <= 15; - assert q[i + 2] >= 0 && q[i + 2] <= 15; - assert q[i + 3] >= 0 && q[i + 3] <= 15; - assert q[i + 4] >= 0 && q[i + 4] <= 15; - assert q[i + 5] >= 0 && q[i + 5] <= 15; - assert q[i + 6] >= 0 && q[i + 6] <= 15; - assert q[i + 7] >= 0 && q[i + 7] <= 15; - int lowerByte = (q[i] & 1) << 7 | (q[i + 1] & 1) << 6 | (q[i + 2] & 1) << 5 | (q[i + 3] & 1) << 4 | (q[i + 4] & 1) << 3 | (q[i - + 5] & 1) << 2 | (q[i + 6] & 1) << 1 | (q[i + 7] & 1); - int lowerMiddleByte = ((q[i] >> 1) & 1) << 7 | ((q[i + 1] >> 1) & 1) << 6 | ((q[i + 2] >> 1) & 1) << 5 | ((q[i + 3] >> 1) & 1) - << 4 | ((q[i + 4] >> 1) & 1) << 3 | ((q[i + 5] >> 1) & 1) << 2 | ((q[i + 6] >> 1) & 1) << 1 | ((q[i + 7] >> 1) & 1); - int upperMiddleByte = ((q[i] >> 2) & 1) << 7 | ((q[i + 1] >> 2) & 1) << 6 | ((q[i + 2] >> 2) & 1) << 5 | ((q[i + 3] >> 2) & 1) - << 4 | ((q[i + 4] >> 2) & 1) << 3 | ((q[i + 5] >> 2) & 1) << 2 | ((q[i + 6] >> 2) & 1) << 1 | ((q[i + 7] >> 2) & 1); - int upperByte = ((q[i] >> 3) & 1) << 7 | ((q[i + 1] >> 3) & 1) << 6 | ((q[i + 2] >> 3) & 1) << 5 | ((q[i + 3] >> 3) & 1) << 4 - | ((q[i + 4] >> 3) & 1) << 3 | ((q[i + 5] >> 3) & 1) << 2 | ((q[i + 6] >> 3) & 1) << 1 | ((q[i + 7] >> 3) & 1); - quantQueryByte[index] = (byte) lowerByte; - quantQueryByte[index + quantQueryByte.length / 4] = (byte) lowerMiddleByte; - quantQueryByte[index + quantQueryByte.length / 2] = (byte) upperMiddleByte; - quantQueryByte[index + 3 * quantQueryByte.length / 4] = (byte) upperByte; - } - if (i == q.length) { - return; // all done - } - int lowerByte = 0; - int lowerMiddleByte = 0; - int upperMiddleByte = 0; - int upperByte = 0; - for (int j = 7; i < q.length; j--, i++) { - lowerByte |= (q[i] & 1) << j; - lowerMiddleByte |= ((q[i] >> 1) & 1) << j; - upperMiddleByte |= ((q[i] >> 2) & 1) << j; - upperByte |= ((q[i] >> 3) & 1) << j; - } - quantQueryByte[index] = (byte) lowerByte; - quantQueryByte[index + quantQueryByte.length / 4] = (byte) lowerMiddleByte; - quantQueryByte[index + quantQueryByte.length / 2] = (byte) upperMiddleByte; - quantQueryByte[index + 3 * quantQueryByte.length / 4] = (byte) upperByte; + ESVectorUtil.transposeHalfByte(q, quantQueryByte); } /** diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/BQVectorUtils.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/BQVectorUtils.java index f2ef2b05541f8..cba55f8a7e942 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/BQVectorUtils.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/BQVectorUtils.java @@ -22,6 +22,7 @@ import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitUtil; import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.simdvec.ESVectorUtil; /** Utility class for vector quantization calculations */ public class BQVectorUtils { @@ -55,31 +56,7 @@ public static void packAsBinaryLegacy(int[] vector, byte[] packed) { } public static void packAsBinary(int[] vector, byte[] packed) { - int limit = vector.length - 7; - int i = 0; - int index = 0; - for (; i < limit; i += 8, index++) { - assert vector[i] == 0 || vector[i] == 1; - assert vector[i + 1] == 0 || vector[i + 1] == 1; - assert vector[i + 2] == 0 || vector[i + 2] == 1; - assert vector[i + 3] == 0 || vector[i + 3] == 1; - assert vector[i + 4] == 0 || vector[i + 4] == 1; - assert vector[i + 5] == 0 || vector[i + 5] == 1; - assert vector[i + 6] == 0 || vector[i + 6] == 1; - assert vector[i + 7] == 0 || vector[i + 7] == 1; - int result = vector[i] << 7 | (vector[i + 1] << 6) | (vector[i + 2] << 5) | (vector[i + 3] << 4) | (vector[i + 4] << 3) - | (vector[i + 5] << 2) | (vector[i + 6] << 1) | (vector[i + 7]); - packed[index] = (byte) result; - } - if (i == vector.length) { - return; - } - byte result = 0; - for (int j = 7; j >= 0 && i < vector.length; i++, j--) { - assert vector[i] == 0 || vector[i] == 1; - result |= (byte) ((vector[i] & 1) << j); - } - packed[index] = result; + ESVectorUtil.packAsBinary(vector, packed); } public static int discretize(int value, int bucket) { diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java index aa8921cee24c4..73cf4adb804ba 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsFormat.java @@ -60,8 +60,8 @@ public class IVFVectorsFormat extends KnnVectorsFormat { ); // This dynamically sets the cluster probe based on the `k` requested and the number of clusters. - // useful when searching with 'efSearch' type parameters instead of requiring a specific nprobe. - public static final int DYNAMIC_NPROBE = -1; + // useful when searching with 'efSearch' type parameters instead of requiring a specific ratio. + public static final float DYNAMIC_VISIT_RATIO = 0.0f; public static final int DEFAULT_VECTORS_PER_CLUSTER = 384; public static final int MIN_VECTORS_PER_CLUSTER = 64; public static final int MAX_VECTORS_PER_CLUSTER = 1 << 16; // 65536 diff --git a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java index 0043f78590ac1..08bb87e5e5c12 100644 --- a/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java +++ b/server/src/main/java/org/elasticsearch/index/codec/vectors/IVFVectorsReader.java @@ -35,7 +35,7 @@ import java.io.IOException; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.SIMILARITY_FUNCTIONS; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_NPROBE; +import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_VISIT_RATIO; /** * Reader for IVF vectors. This reader is used to read the IVF vectors from the index. @@ -222,25 +222,28 @@ public final void search(String field, float[] target, KnnCollector knnCollector percentFiltered = Math.max(0f, Math.min(1f, (float) bitSet.approximateCardinality() / bitSet.length())); } int numVectors = rawVectorsReader.getFloatVectorValues(field).size(); - int nProbe = DYNAMIC_NPROBE; + float visitRatio = DYNAMIC_VISIT_RATIO; // Search strategy may be null if this is being called from checkIndex (e.g. from a test) if (knnCollector.getSearchStrategy() instanceof IVFKnnSearchStrategy ivfSearchStrategy) { - nProbe = ivfSearchStrategy.getNProbe(); + visitRatio = ivfSearchStrategy.getVisitRatio(); } FieldEntry entry = fields.get(fieldInfo.number); - if (nProbe == DYNAMIC_NPROBE) { + if (visitRatio == DYNAMIC_VISIT_RATIO) { // empirically based, and a good dynamic to get decent recall while scaling a la "efSearch" - // scaling by the number of centroids vs. the nearest neighbors requested + // scaling by the number of vectors vs. the nearest neighbors requested // not perfect, but a comparative heuristic. - // we might want to utilize the total vector count as well, but this is a good start - nProbe = (int) Math.round(Math.log10(entry.numCentroids) * Math.sqrt(knnCollector.k())); - // clip to be between 1 and the number of centroids - nProbe = Math.max(Math.min(nProbe, entry.numCentroids), 1); + // TODO: we might want to consider the density of the centroids as experiments shows that for fewer vectors per centroid, + // the least vectors we need to score to get a good recall. + float estimated = Math.round(Math.log10(numVectors) * Math.log10(numVectors) * (knnCollector.k())); + // clip so we visit at least one vector + visitRatio = estimated / numVectors; } + // we account for soar vectors here. We can potentially visit a vector twice so we multiply by 2 here. + long maxVectorVisited = (long) (2.0 * visitRatio * numVectors); CentroidIterator centroidIterator = getCentroidIterator(fieldInfo, entry.numCentroids, entry.centroidSlice(ivfCentroids), target); PostingVisitor scorer = getPostingVisitor(fieldInfo, entry.postingListSlice(ivfClusters), target, acceptDocs); - int centroidsVisited = 0; + long expectedDocs = 0; long actualDocs = 0; // initially we visit only the "centroids to search" @@ -248,8 +251,7 @@ public final void search(String field, float[] target, KnnCollector knnCollector // TODO do we need to handle nested doc counts similarly to how we handle // filtering? E.g. keep exploring until we hit an expected number of parent documents vs. child vectors? while (centroidIterator.hasNext() - && (centroidsVisited < nProbe || knnCollector.minCompetitiveSimilarity() == Float.NEGATIVE_INFINITY)) { - ++centroidsVisited; + && (maxVectorVisited > actualDocs || knnCollector.minCompetitiveSimilarity() == Float.NEGATIVE_INFINITY)) { // todo do we actually need to know the score??? long offset = centroidIterator.nextPostingListOffset(); // todo do we need direct access to the raw centroid???, this is used for quantizing, maybe hydrating and quantizing diff --git a/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java b/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java index a02a8da9e629e..0d3c314cee352 100644 --- a/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java +++ b/server/src/main/java/org/elasticsearch/index/fieldvisitor/StoredFieldLoader.java @@ -9,12 +9,14 @@ package org.elasticsearch.index.fieldvisitor; +import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.StoredFields; import org.elasticsearch.common.CheckedBiConsumer; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.lucene.index.SequentialStoredFieldsLeafReader; +import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.search.fetch.StoredFieldsSpec; import java.io.IOException; @@ -203,9 +205,25 @@ private static class ReaderStoredFieldLoader implements LeafStoredFieldLoader { private final CustomFieldsVisitor visitor; private int doc = -1; + private static CustomFieldsVisitor getFieldsVisitor(Set fields, boolean loadSource) { + if (fields.contains(IgnoredSourceFieldMapper.NAME)) { + return new CustomFieldsVisitor(fields, loadSource) { + @Override + public Status needsField(FieldInfo fieldInfo) { + if (fieldInfo.name.startsWith(IgnoredSourceFieldMapper.NAME)) { + return Status.YES; + } + return super.needsField(fieldInfo); + } + }; + } + + return new CustomFieldsVisitor(fields, loadSource); + } + ReaderStoredFieldLoader(CheckedBiConsumer reader, boolean loadSource, Set fields) { this.reader = reader; - this.visitor = new CustomFieldsVisitor(fields, loadSource); + this.visitor = getFieldsVisitor(fields, loadSource); } @Override diff --git a/server/src/main/java/org/elasticsearch/index/get/GetResult.java b/server/src/main/java/org/elasticsearch/index/get/GetResult.java index b8c842eefb836..e7531da294489 100644 --- a/server/src/main/java/org/elasticsearch/index/get/GetResult.java +++ b/server/src/main/java/org/elasticsearch/index/get/GetResult.java @@ -244,7 +244,7 @@ public XContentBuilder toXContentEmbedded(XContentBuilder builder, Params params for (DocumentField field : metaFields.values()) { // TODO: can we avoid having an exception here? - if (field.getName().equals(IgnoredFieldMapper.NAME) || field.getName().equals(IgnoredSourceFieldMapper.NAME)) { + if (field.getName().equals(IgnoredFieldMapper.NAME) || field.getName().startsWith(IgnoredSourceFieldMapper.NAME)) { builder.field(field.getName(), field.getValues()); } else { builder.field(field.getName(), field.getValue()); diff --git a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java index 1e553e913379c..ec0ad1acd917f 100644 --- a/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/server/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -322,7 +322,8 @@ private GetResult innerGetFetch( ? new SourceLoader.Synthetic( sourceFilter, () -> mappingLookup.getMapping().syntheticFieldLoader(sourceFilter), - mapperMetrics.sourceFieldMetrics() + mapperMetrics.sourceFieldMetrics(), + mappingLookup.getMapping().ignoredSourceFormat() ) : mappingLookup.newSourceLoader(sourceFilter, mapperMetrics.sourceFieldMetrics()); StoredFieldLoader storedFieldLoader = buildStoredFieldLoader(storedFieldSet, fetchSourceContext, loader); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/AbstractGeometryFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/AbstractGeometryFieldMapper.java index 894c053e3c0c0..4733e610a148f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/AbstractGeometryFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/AbstractGeometryFieldMapper.java @@ -194,7 +194,11 @@ protected BlockLoader blockLoaderFromSource(BlockLoaderContext blContext) { protected abstract Object nullValueAsSource(T nullValue); protected BlockLoader blockLoaderFromFallbackSyntheticSource(BlockLoaderContext blContext) { - return new FallbackSyntheticSourceBlockLoader(new GeometriesFallbackSyntheticSourceReader(), name()) { + return new FallbackSyntheticSourceBlockLoader( + new GeometriesFallbackSyntheticSourceReader(), + name(), + IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated()) + ) { @Override public Builder builder(BlockFactory factory, int expectedCount) { return factory.bytesRefs(expectedCount); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java index 017e713fe09fe..64d54cc47fdb1 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockDocValuesReader.java @@ -22,7 +22,6 @@ import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.ByteArrayStreamInput; import org.elasticsearch.index.IndexVersion; -import org.elasticsearch.index.codec.tsdb.es819.BulkNumericDocValues; import org.elasticsearch.index.mapper.BlockLoader.BlockFactory; import org.elasticsearch.index.mapper.BlockLoader.BooleanBuilder; import org.elasticsearch.index.mapper.BlockLoader.Builder; @@ -133,8 +132,11 @@ static class SingletonLongs extends BlockDocValuesReader { @Override public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException { - if (numericDocValues instanceof BulkNumericDocValues bulkDv) { - return bulkDv.read(factory, docs, offset); + if (numericDocValues instanceof BlockLoader.OptionalColumnAtATimeReader direct) { + BlockLoader.Block result = direct.tryRead(factory, docs, offset); + if (result != null) { + return result; + } } try (BlockLoader.LongBuilder builder = factory.longsFromDocValues(docs.count() - offset)) { int lastDoc = -1; @@ -748,6 +750,12 @@ public BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throw if (docs.count() - offset == 1) { return readSingleDoc(factory, docs.get(offset)); } + if (ordinals instanceof BlockLoader.OptionalColumnAtATimeReader direct) { + BlockLoader.Block block = direct.tryRead(factory, docs, offset); + if (block != null) { + return block; + } + } try (var builder = factory.singletonOrdinalsBuilder(ordinals, docs.count() - offset)) { for (int i = offset; i < docs.count(); i++) { int doc = docs.get(i); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java index 36c0b8bfb062f..601379c37823e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BlockLoader.java @@ -13,6 +13,7 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.core.Nullable; import org.elasticsearch.core.Releasable; import org.elasticsearch.search.fetch.StoredFieldsSpec; import org.elasticsearch.search.lookup.Source; @@ -46,6 +47,22 @@ interface ColumnAtATimeReader extends Reader { BlockLoader.Block read(BlockFactory factory, Docs docs, int offset) throws IOException; } + /** + * An interface for readers that attempt to load all document values in a column-at-a-time fashion. + *

+ * Unlike {@link ColumnAtATimeReader}, implementations may return {@code null} if they are unable + * to load the requested values, for example due to unsupported underlying data. + * This allows callers to optimistically try optimized loading strategies first, and fall back if necessary. + */ + interface OptionalColumnAtATimeReader { + /** + * Attempts to read the values of all documents in {@code docs} + * Returns {@code null} if unable to load the values. + */ + @Nullable + BlockLoader.Block tryRead(BlockFactory factory, Docs docs, int offset) throws IOException; + } + interface RowStrideReader extends Reader { /** * Reads the values of the given document into the builder. @@ -549,6 +566,5 @@ interface AggregateMetricDoubleBuilder extends Builder { DoubleBuilder sum(); IntBuilder count(); - } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java index 2843900c564c7..c0fb9acaf1986 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/BooleanFieldMapper.java @@ -352,7 +352,11 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) { // Multi fields don't have fallback synthetic source. if (isSyntheticSource && blContext.parentField(name()) == null) { - return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) { + return new FallbackSyntheticSourceBlockLoader( + fallbackSyntheticSourceBlockLoaderReader(), + name(), + IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated()) + ) { @Override public Builder builder(BlockFactory factory, int expectedCount) { return factory.booleans(expectedCount); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java index 76d6dbb941409..fdb8bad484d74 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/DateFieldMapper.java @@ -1018,7 +1018,11 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) { // Multi fields don't have fallback synthetic source. if (isSyntheticSource && blContext.parentField(name()) == null) { - return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) { + return new FallbackSyntheticSourceBlockLoader( + fallbackSyntheticSourceBlockLoaderReader(), + name(), + IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated()) + ) { @Override public Builder builder(BlockFactory factory, int expectedCount) { return factory.longs(expectedCount); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java index c709c891e4ad4..a05eac9a72b9f 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/FallbackSyntheticSourceBlockLoader.java @@ -17,13 +17,13 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.Set; import java.util.Stack; +import java.util.stream.Collectors; /** * Block loader for fields that use fallback synthetic source implementation. @@ -39,10 +39,19 @@ public abstract class FallbackSyntheticSourceBlockLoader implements BlockLoader { private final Reader reader; private final String fieldName; - - protected FallbackSyntheticSourceBlockLoader(Reader reader, String fieldName) { + private final Set fieldPaths; + private final IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat; + + protected FallbackSyntheticSourceBlockLoader( + Reader reader, + String fieldName, + IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat + ) { + assert ignoredSourceFormat != IgnoredSourceFieldMapper.IgnoredSourceFormat.NO_IGNORED_SOURCE; this.reader = reader; this.fieldName = fieldName; + this.ignoredSourceFormat = ignoredSourceFormat; + this.fieldPaths = splitIntoFieldPaths(fieldName); } @Override @@ -52,12 +61,19 @@ public ColumnAtATimeReader columnAtATimeReader(LeafReaderContext context) throws @Override public RowStrideReader rowStrideReader(LeafReaderContext context) throws IOException { - return new IgnoredSourceRowStrideReader<>(fieldName, reader); + return new IgnoredSourceRowStrideReader<>(fieldName, fieldPaths, reader, ignoredSourceFormat); } @Override public StoredFieldsSpec rowStrideStoredFieldSpec() { - return new StoredFieldsSpec(false, false, Set.of(IgnoredSourceFieldMapper.NAME)); + Set ignoredFieldNames; + if (ignoredSourceFormat == IgnoredSourceFieldMapper.IgnoredSourceFormat.PER_FIELD_IGNORED_SOURCE) { + ignoredFieldNames = fieldPaths.stream().map(IgnoredSourceFieldMapper::ignoredFieldName).collect(Collectors.toSet()); + } else { + ignoredFieldNames = Set.of(IgnoredSourceFieldMapper.NAME); + } + + return new StoredFieldsSpec(false, false, ignoredFieldNames); } @Override @@ -70,49 +86,51 @@ public SortedSetDocValues ordinals(LeafReaderContext context) throws IOException throw new UnsupportedOperationException(); } + public static Set splitIntoFieldPaths(String fieldName) { + var paths = new HashSet(); + paths.add("_doc"); + var current = new StringBuilder(); + for (var part : fieldName.split("\\.")) { + if (current.isEmpty() == false) { + current.append('.'); + } + current.append(part); + paths.add(current.toString()); + } + return paths; + } + private static class IgnoredSourceRowStrideReader implements RowStrideReader { - // Contains name of the field and all its parents - private final Set fieldNames; private final String fieldName; + // Contains name of the field and all its parents + private final Set fieldPaths; private final Reader reader; - - IgnoredSourceRowStrideReader(String fieldName, Reader reader) { + private final IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat; + + IgnoredSourceRowStrideReader( + String fieldName, + Set fieldPaths, + Reader reader, + IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat + ) { this.fieldName = fieldName; + this.fieldPaths = fieldPaths; this.reader = reader; - this.fieldNames = new HashSet<>() { - { - add("_doc"); - } - }; - - var current = new StringBuilder(); - for (String part : fieldName.split("\\.")) { - if (current.isEmpty() == false) { - current.append('.'); - } - current.append(part); - fieldNames.add(current.toString()); - } - + this.ignoredSourceFormat = ignoredSourceFormat; } @Override public void read(int docId, StoredFields storedFields, Builder builder) throws IOException { - var ignoredSource = storedFields.storedFields().get(IgnoredSourceFieldMapper.NAME); - if (ignoredSource == null) { + Map> valuesForFieldAndParents = ignoredSourceFormat.loadSingleIgnoredField( + fieldPaths, + storedFields.storedFields() + ); + + if (valuesForFieldAndParents.isEmpty()) { builder.appendNull(); return; } - Map> valuesForFieldAndParents = new HashMap<>(); - - for (Object value : ignoredSource) { - IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); - if (fieldNames.contains(nameValue.name())) { - valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue); - } - } - // TODO figure out how to handle XContentDataHelper#voidValue() var blockValues = new ArrayList(); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java index 812192d79cdce..2809a5e802433 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapper.java @@ -11,21 +11,34 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.io.stream.BytesStreamOutput; +import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.util.ByteUtils; +import org.elasticsearch.common.util.FeatureFlag; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.Tuple; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.index.IndexSettings; +import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.query.SearchExecutionContext; +import org.elasticsearch.search.lookup.SourceFilter; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.XContentType; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; +import java.util.HashMap; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.stream.Stream; @@ -62,6 +75,8 @@ public class IgnoredSourceFieldMapper extends MetadataFieldMapper { "mapper.ignored_source.always_store_object_arrays_in_nested" ); + public static final FeatureFlag IGNORED_SOURCE_FIELDS_PER_ENTRY_FF = new FeatureFlag("ignored_source_fields_per_entry"); + /* Setting to disable encoding and writing values for this field. This is needed to unblock index functionality in case there is a bug on this code path. @@ -159,8 +174,46 @@ public void postParse(DocumentParserContext context) { return; } - for (NameValue nameValue : context.getIgnoredFieldValues()) { - nameValue.doc().add(new StoredField(NAME, encode(nameValue))); + ignoredSourceFormat(context.indexSettings().getIndexVersionCreated()).writeIgnoredFields(context.getIgnoredFieldValues()); + } + + public static String ignoredFieldName(String fieldName) { + return NAME + "." + fieldName; + } + + static BytesRef encodeMultipleValuesForField(List values) { + assert values.isEmpty() == false; + try { + BytesStreamOutput stream = new BytesStreamOutput(); + stream.writeVInt(values.size()); + String fieldName = values.getFirst().name; + stream.writeString(fieldName); + for (var value : values) { + assert fieldName.equals(value.name); + stream.writeVInt(value.parentOffset); + stream.writeBytesRef(value.value); + } + return stream.bytes().toBytesRef(); + } catch (IOException e) { + throw new ElasticsearchException("Failed to encode _ignored_source", e); + } + } + + static List decodeMultipleValuesForField(BytesRef value) { + try { + StreamInput stream = new BytesArray(value).streamInput(); + var count = stream.readVInt(); + assert count >= 1; + String fieldName = stream.readString(); + List values = new ArrayList<>(count); + for (int i = 0; i < count; i++) { + int parentOffset = stream.readVInt(); + BytesRef valueBytes = stream.readBytesRef(); + values.add(new NameValue(fieldName, parentOffset, valueBytes, null)); + } + return values; + } catch (IOException e) { + throw new ElasticsearchException("Failed to decode _ignored_source", e); } } @@ -202,6 +255,180 @@ public static Set ensureLoaded(Set fieldsToLoadForSyntheticSourc return fieldsToLoadForSyntheticSource; } + public enum IgnoredSourceFormat { + NO_IGNORED_SOURCE { + @Override + public Map> loadAllIgnoredFields( + SourceFilter filter, + Map> storedFields + ) { + return Map.of(); + } + + @Override + public Map> loadSingleIgnoredField( + Set fieldPaths, + Map> storedFields + ) { + return Map.of(); + } + + @Override + public void writeIgnoredFields(Collection ignoredFieldValues) { + assert false : "cannot write " + ignoredFieldValues.size() + " values with format NO_IGNORED_SOURCE"; + } + }, + SINGLE_IGNORED_SOURCE { + @Override + public Map> loadAllIgnoredFields( + SourceFilter filter, + Map> storedFields + ) { + Map> objectsWithIgnoredFields = null; + List storedValues = storedFields.get(IgnoredSourceFieldMapper.NAME); + if (storedValues != null) { + for (Object value : storedValues) { + if (objectsWithIgnoredFields == null) { + objectsWithIgnoredFields = new HashMap<>(); + } + IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); + if (filter != null + && filter.isPathFiltered(nameValue.name(), XContentDataHelper.isEncodedObject(nameValue.value()))) { + // This path is filtered by the include/exclude rules + continue; + } + objectsWithIgnoredFields.computeIfAbsent(nameValue.getParentFieldName(), k -> new ArrayList<>()).add(nameValue); + } + } + return objectsWithIgnoredFields; + } + + @Override + public Map> loadSingleIgnoredField( + Set fieldPaths, + Map> storedFields + ) { + Map> valuesForFieldAndParents = new HashMap<>(); + var ignoredSource = storedFields.get(IgnoredSourceFieldMapper.NAME); + if (ignoredSource != null) { + for (Object value : ignoredSource) { + IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); + if (fieldPaths.contains(nameValue.name())) { + valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue); + } + } + } + return valuesForFieldAndParents; + } + + @Override + public void writeIgnoredFields(Collection ignoredFieldValues) { + for (NameValue nameValue : ignoredFieldValues) { + nameValue.doc().add(new StoredField(NAME, encode(nameValue))); + } + } + }, + PER_FIELD_IGNORED_SOURCE { + @Override + public Map> loadAllIgnoredFields( + SourceFilter filter, + Map> storedFields + ) { + Map> objectsWithIgnoredFields = null; + for (Map.Entry> e : storedFields.entrySet()) { + if (e.getKey().startsWith(IgnoredSourceFieldMapper.NAME)) { + assert e.getValue().size() == 1; + + Object value = e.getValue().getFirst(); + if (objectsWithIgnoredFields == null) { + objectsWithIgnoredFields = new HashMap<>(); + } + List nameValues = IgnoredSourceFieldMapper.decodeMultipleValuesForField( + (BytesRef) value + ); + + for (var nameValue : nameValues) { + if (filter != null + && filter.isPathFiltered(nameValue.name(), XContentDataHelper.isEncodedObject(nameValue.value()))) { + // This path is filtered by the include/exclude rules + continue; + } + objectsWithIgnoredFields.computeIfAbsent(nameValue.getParentFieldName(), k -> new ArrayList<>()).add(nameValue); + } + } + } + return objectsWithIgnoredFields; + } + + @Override + public Map> loadSingleIgnoredField( + Set fieldPaths, + Map> storedFields + ) { + Map> valuesForFieldAndParents = new HashMap<>(); + for (var parentPath : fieldPaths) { + var ignoredSource = storedFields.get(IgnoredSourceFieldMapper.ignoredFieldName(parentPath)); + if (ignoredSource == null) { + continue; + } + assert ignoredSource.size() == 1; + + List nameValues = IgnoredSourceFieldMapper.decodeMultipleValuesForField( + (BytesRef) ignoredSource.getFirst() + ); + + for (var nameValue : nameValues) { + assert fieldPaths.contains(nameValue.name()); + valuesForFieldAndParents.computeIfAbsent(nameValue.name(), k -> new ArrayList<>()).add(nameValue); + } + } + + return valuesForFieldAndParents; + } + + @Override + public void writeIgnoredFields(Collection ignoredFieldValues) { + Map>> entriesMap = new HashMap<>(); + + for (NameValue nameValue : ignoredFieldValues) { + String fieldName = ignoredFieldName(nameValue.name()); + entriesMap.computeIfAbsent(nameValue.doc(), d -> new HashMap<>()) + .computeIfAbsent(fieldName, n -> new ArrayList<>()) + .add(nameValue); + } + + for (var docEntry : entriesMap.entrySet()) { + for (var fieldEntry : docEntry.getValue().entrySet()) { + docEntry.getKey().add(new StoredField(fieldEntry.getKey(), encodeMultipleValuesForField(fieldEntry.getValue()))); + } + } + } + }; + + public abstract Map> loadAllIgnoredFields( + SourceFilter filter, + Map> storedFields + ); + + public abstract Map> loadSingleIgnoredField( + Set fieldPaths, + Map> storedFields + ); + + public abstract void writeIgnoredFields(Collection ignoredFieldValues); + } + + public IgnoredSourceFormat ignoredSourceFormat() { + return ignoredSourceFormat(indexSettings.getIndexVersionCreated()); + } + + public static IgnoredSourceFormat ignoredSourceFormat(IndexVersion indexCreatedVersion) { + return indexCreatedVersion.onOrAfter(IndexVersions.IGNORED_SOURCE_FIELDS_PER_ENTRY_WITH_FF) + && IGNORED_SOURCE_FIELDS_PER_ENTRY_FF.isEnabled() + ? IgnoredSourceFormat.PER_FIELD_IGNORED_SOURCE + : IgnoredSourceFormat.SINGLE_IGNORED_SOURCE; + } + @Override protected SyntheticSourceSupport syntheticSourceSupport() { // This loader controls if this field is loaded in scope of synthetic source constructions. @@ -248,7 +475,11 @@ public void reset() { }); } - public record MappedNameValue(NameValue nameValue, XContentType type, Map map) {} + public record MappedNameValue(NameValue nameValue, XContentType type, Map map) { + public MappedNameValue withMap(Map map) { + return new MappedNameValue(new NameValue(nameValue.name, nameValue.parentOffset, null, nameValue.doc), type, map); + } + } /** * Parses the passed byte array as a NameValue and converts its decoded value to a map of maps that corresponds to the field-value @@ -261,6 +492,20 @@ public record MappedNameValue(NameValue nameValue, XContentType type, Map decodeAsMapMultipleFieldValues(byte[] value) throws IOException { + BytesRef bytes = new BytesRef(value); + List nameValues = decodeMultipleValuesForField(bytes); + List mappedValues = new ArrayList<>(nameValues.size()); + for (var nameValue : nameValues) { + mappedValues.add(nameValueToMapped(nameValue)); + } + return mappedValues; + } + + private static MappedNameValue nameValueToMapped(NameValue nameValue) throws IOException { XContentBuilder xContentBuilder = XContentBuilder.builder(XContentDataHelper.getXContentType(nameValue.value()).xContent()); xContentBuilder.startObject().field(nameValue.name()); XContentDataHelper.decodeAndWrite(xContentBuilder, nameValue.value()); @@ -271,15 +516,28 @@ public static MappedNameValue decodeAsMap(byte[] value) throws IOException { /** * Clones the passed NameValue, using the passed map to produce its value. - * @param mappedNameValue containing the NameValue to clone - * @param map containing a simple field-value pair, or a deeper field-value subtree for objects and arrays with fields + * @param mappedNameValue containing the NameValue to clone and the map containing a simple field-value pair, or a deeper + * field-value subtree for objects and arrays with fields * @return a byte array containing the encoding form of the cloned NameValue * @throws IOException */ - public static byte[] encodeFromMap(MappedNameValue mappedNameValue, Map map) throws IOException { + public static byte[] encodeFromMap(MappedNameValue mappedNameValue) throws IOException { + return IgnoredSourceFieldMapper.encode(mappedToNameValue(mappedNameValue)); + } + + public static byte[] encodeFromMapMultipleFieldValues(List filteredValues) throws IOException { + List filteredNameValues = new ArrayList<>(filteredValues.size()); + for (var filteredValue : filteredValues) { + filteredNameValues.add(mappedToNameValue(filteredValue)); + } + var encoded = encodeMultipleValuesForField(filteredNameValues); + return ArrayUtil.copyOfSubArray(encoded.bytes, encoded.offset, encoded.length); + } + + private static IgnoredSourceFieldMapper.NameValue mappedToNameValue(MappedNameValue mappedNameValue) throws IOException { // The first entry is the field name, we skip to get to the value to encode. - assert map.size() == 1; - Object content = map.values().iterator().next(); + assert mappedNameValue.map.size() == 1; + Object content = mappedNameValue.map.values().iterator().next(); // Check if the field contains a single value or an object. @SuppressWarnings("unchecked") @@ -289,12 +547,11 @@ public static byte[] encodeFromMap(MappedNameValue mappedNameValue, Map values, BlockLoader.Builder blockBuil } }; - return new FallbackSyntheticSourceBlockLoader(reader, name()) { + return new FallbackSyntheticSourceBlockLoader( + reader, + name(), + IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated()) + ) { @Override public Builder builder(BlockFactory factory, int expectedCount) { return factory.bytesRefs(expectedCount); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java index e26c969f7d495..5524285d322ad 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/KeywordFieldMapper.java @@ -802,7 +802,11 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) { // Multi fields don't have fallback synthetic source. if (isSyntheticSource && blContext.parentField(name()) == null) { - return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) { + return new FallbackSyntheticSourceBlockLoader( + fallbackSyntheticSourceBlockLoaderReader(), + name(), + IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated()) + ) { @Override public Builder builder(BlockFactory factory, int expectedCount) { return factory.bytesRefs(expectedCount); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java index 7ba2dfb9a69f5..284a2de238f1e 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MapperFeatures.java @@ -47,6 +47,7 @@ public class MapperFeatures implements FeatureSpecification { static final NodeFeature BBQ_DISK_SUPPORT = new NodeFeature("mapper.bbq_disk_support"); static final NodeFeature SEARCH_LOAD_PER_SHARD = new NodeFeature("mapper.search_load_per_shard"); static final NodeFeature PATTERNED_TEXT = new NodeFeature("mapper.patterned_text"); + static final NodeFeature IGNORED_SOURCE_FIELDS_PER_ENTRY = new NodeFeature("mapper.ignored_source_fields_per_entry"); @Override public Set getTestFeatures() { @@ -80,7 +81,8 @@ public Set getTestFeatures() { BBQ_DISK_SUPPORT, SEARCH_LOAD_PER_SHARD, SPARSE_VECTOR_INDEX_OPTIONS_FEATURE, - PATTERNED_TEXT + PATTERNED_TEXT, + IGNORED_SOURCE_FIELDS_PER_ENTRY ); } } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java b/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java index 4a3e67455e603..24de538bab81a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/Mapping.java @@ -152,6 +152,14 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader(@Nullable SourceFi return root.syntheticFieldLoader(filter, mappers, false); } + public IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat() { + IgnoredSourceFieldMapper isfm = (IgnoredSourceFieldMapper) metadataMappersByName.get(IgnoredSourceFieldMapper.NAME); + if (isfm == null) { + return IgnoredSourceFieldMapper.IgnoredSourceFormat.NO_IGNORED_SOURCE; + } + return isfm.ignoredSourceFormat(); + } + /** * Merges a new mapping into the existing one. * diff --git a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java index 08314f2282b5f..64461d0fd2fd5 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/MappingLookup.java @@ -494,7 +494,7 @@ public boolean isSourceSynthetic() { */ public SourceLoader newSourceLoader(@Nullable SourceFilter filter, SourceFieldMetrics metrics) { if (isSourceSynthetic()) { - return new SourceLoader.Synthetic(filter, () -> mapping.syntheticFieldLoader(filter), metrics); + return new SourceLoader.Synthetic(filter, () -> mapping.syntheticFieldLoader(filter), metrics, mapping.ignoredSourceFormat()); } var syntheticVectorsLoader = mapping.syntheticVectorsLoader(filter); if (syntheticVectorsLoader != null) { diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java index b3a80dee26a85..7cfa7ef1b7988 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NestedObjectMapper.java @@ -448,7 +448,12 @@ SourceLoader.SyntheticFieldLoader syntheticFieldLoader(SourceFilter filter, Coll return SourceLoader.SyntheticFieldLoader.NOTHING; } - SourceLoader sourceLoader = new SourceLoader.Synthetic(filter, () -> super.syntheticFieldLoader(filter, mappers, true), NOOP); + SourceLoader sourceLoader = new SourceLoader.Synthetic( + filter, + () -> super.syntheticFieldLoader(filter, mappers, true), + NOOP, + IgnoredSourceFieldMapper.ignoredSourceFormat(indexSettings.getIndexVersionCreated()) + ); // Some synthetic source use cases require using _ignored_source field var requiredStoredFields = IgnoredSourceFieldMapper.ensureLoaded(sourceLoader.requiredStoredFields(), indexSettings); // force sequential access since nested fields are indexed per block diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java index 8711d39a23c08..fe3dac96541d2 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java @@ -492,8 +492,13 @@ BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSo } @Override - BlockLoader blockLoaderFromFallbackSyntheticSource(String fieldName, Number nullValue, boolean coerce) { - return floatingPointBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce); + BlockLoader blockLoaderFromFallbackSyntheticSource( + String fieldName, + Number nullValue, + boolean coerce, + MappedFieldType.BlockLoaderContext blContext + ) { + return floatingPointBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce, blContext); } }, FLOAT("float", NumericType.FLOAT) { @@ -681,8 +686,13 @@ BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSo } @Override - BlockLoader blockLoaderFromFallbackSyntheticSource(String fieldName, Number nullValue, boolean coerce) { - return floatingPointBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce); + BlockLoader blockLoaderFromFallbackSyntheticSource( + String fieldName, + Number nullValue, + boolean coerce, + MappedFieldType.BlockLoaderContext blContext + ) { + return floatingPointBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce, blContext); } }, DOUBLE("double", NumericType.DOUBLE) { @@ -836,8 +846,13 @@ BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSo } @Override - BlockLoader blockLoaderFromFallbackSyntheticSource(String fieldName, Number nullValue, boolean coerce) { - return floatingPointBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce); + BlockLoader blockLoaderFromFallbackSyntheticSource( + String fieldName, + Number nullValue, + boolean coerce, + MappedFieldType.BlockLoaderContext blContext + ) { + return floatingPointBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce, blContext); } }, BYTE("byte", NumericType.BYTE) { @@ -959,8 +974,13 @@ BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSo } @Override - BlockLoader blockLoaderFromFallbackSyntheticSource(String fieldName, Number nullValue, boolean coerce) { - return integerBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce); + BlockLoader blockLoaderFromFallbackSyntheticSource( + String fieldName, + Number nullValue, + boolean coerce, + MappedFieldType.BlockLoaderContext blContext + ) { + return integerBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce, blContext); } private boolean isOutOfRange(Object value) { @@ -1082,8 +1102,13 @@ BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSo } @Override - BlockLoader blockLoaderFromFallbackSyntheticSource(String fieldName, Number nullValue, boolean coerce) { - return integerBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce); + BlockLoader blockLoaderFromFallbackSyntheticSource( + String fieldName, + Number nullValue, + boolean coerce, + MappedFieldType.BlockLoaderContext blContext + ) { + return integerBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce, blContext); } private boolean isOutOfRange(Object value) { @@ -1279,8 +1304,13 @@ BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSo } @Override - BlockLoader blockLoaderFromFallbackSyntheticSource(String fieldName, Number nullValue, boolean coerce) { - return integerBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce); + BlockLoader blockLoaderFromFallbackSyntheticSource( + String fieldName, + Number nullValue, + boolean coerce, + MappedFieldType.BlockLoaderContext blContext + ) { + return integerBlockLoaderFromFallbackSyntheticSource(this, fieldName, nullValue, coerce, blContext); } }, LONG("long", NumericType.LONG) { @@ -1436,7 +1466,12 @@ BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSo } @Override - BlockLoader blockLoaderFromFallbackSyntheticSource(String fieldName, Number nullValue, boolean coerce) { + BlockLoader blockLoaderFromFallbackSyntheticSource( + String fieldName, + Number nullValue, + boolean coerce, + MappedFieldType.BlockLoaderContext blContext + ) { var reader = new NumberFallbackSyntheticSourceReader(this, nullValue, coerce) { @Override public void writeToBlock(List values, BlockLoader.Builder blockBuilder) { @@ -1447,7 +1482,11 @@ public void writeToBlock(List values, BlockLoader.Builder blockBuilder) } }; - return new FallbackSyntheticSourceBlockLoader(reader, fieldName) { + return new FallbackSyntheticSourceBlockLoader( + reader, + fieldName, + IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated()) + ) { @Override public Builder builder(BlockFactory factory, int expectedCount) { return factory.longs(expectedCount); @@ -1735,14 +1774,20 @@ public void writeValue(XContentBuilder b, long value) throws IOException { abstract BlockLoader blockLoaderFromSource(SourceValueFetcher sourceValueFetcher, BlockSourceReader.LeafIteratorLookup lookup); - abstract BlockLoader blockLoaderFromFallbackSyntheticSource(String fieldName, Number nullValue, boolean coerce); + abstract BlockLoader blockLoaderFromFallbackSyntheticSource( + String fieldName, + Number nullValue, + boolean coerce, + MappedFieldType.BlockLoaderContext blContext + ); // All values that fit into integer are returned as integers private static BlockLoader integerBlockLoaderFromFallbackSyntheticSource( NumberType type, String fieldName, Number nullValue, - boolean coerce + boolean coerce, + MappedFieldType.BlockLoaderContext blContext ) { var reader = new NumberFallbackSyntheticSourceReader(type, nullValue, coerce) { @Override @@ -1754,7 +1799,11 @@ public void writeToBlock(List values, BlockLoader.Builder blockBuilder) } }; - return new FallbackSyntheticSourceBlockLoader(reader, fieldName) { + return new FallbackSyntheticSourceBlockLoader( + reader, + fieldName, + IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated()) + ) { @Override public Builder builder(BlockFactory factory, int expectedCount) { return factory.ints(expectedCount); @@ -1767,7 +1816,8 @@ private static BlockLoader floatingPointBlockLoaderFromFallbackSyntheticSource( NumberType type, String fieldName, Number nullValue, - boolean coerce + boolean coerce, + MappedFieldType.BlockLoaderContext blContext ) { var reader = new NumberFallbackSyntheticSourceReader(type, nullValue, coerce) { @Override @@ -1779,7 +1829,11 @@ public void writeToBlock(List values, BlockLoader.Builder blockBuilder) } }; - return new FallbackSyntheticSourceBlockLoader(reader, fieldName) { + return new FallbackSyntheticSourceBlockLoader( + reader, + fieldName, + IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated()) + ) { @Override public Builder builder(BlockFactory factory, int expectedCount) { return factory.doubles(expectedCount); @@ -1975,7 +2029,7 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) { // Multi fields don't have fallback synthetic source. if (isSyntheticSource && blContext.parentField(name()) == null) { - return type.blockLoaderFromFallbackSyntheticSource(name(), nullValue, coerce); + return type.blockLoaderFromFallbackSyntheticSource(name(), nullValue, coerce, blContext); } BlockSourceReader.LeafIteratorLookup lookup = hasDocValues() == false && (isStored() || isIndexed()) diff --git a/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java b/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java index d3380a9a8f05b..8efe6219b059a 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/SourceLoader.java @@ -24,7 +24,6 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.util.ArrayList; -import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -120,6 +119,7 @@ class Synthetic implements SourceLoader { private final Supplier syntheticFieldLoaderLeafSupplier; private final Set requiredStoredFields; private final SourceFieldMetrics metrics; + private final IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat; /** * Creates a {@link SourceLoader} to reconstruct {@code _source} from doc values anf stored fields. @@ -127,7 +127,12 @@ class Synthetic implements SourceLoader { * @param fieldLoaderSupplier A supplier to create {@link SyntheticFieldLoader}, one for each leaf. * @param metrics Metrics for profiling. */ - public Synthetic(@Nullable SourceFilter filter, Supplier fieldLoaderSupplier, SourceFieldMetrics metrics) { + public Synthetic( + @Nullable SourceFilter filter, + Supplier fieldLoaderSupplier, + SourceFieldMetrics metrics, + IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat + ) { this.syntheticFieldLoaderLeafSupplier = fieldLoaderSupplier; this.requiredStoredFields = syntheticFieldLoaderLeafSupplier.get() .storedFieldLoaders() @@ -135,6 +140,7 @@ public Synthetic(@Nullable SourceFilter filter, Supplier f .collect(Collectors.toSet()); this.metrics = metrics; this.filter = filter; + this.ignoredSourceFormat = ignoredSourceFormat; } @Override @@ -150,7 +156,10 @@ public Set requiredStoredFields() { @Override public Leaf leaf(LeafReader reader, int[] docIdsInLeaf) throws IOException { SyntheticFieldLoader loader = syntheticFieldLoaderLeafSupplier.get(); - return new LeafWithMetrics(new SyntheticLeaf(filter, loader, loader.docValuesLoader(reader, docIdsInLeaf)), metrics); + return new LeafWithMetrics( + new SyntheticLeaf(filter, loader, loader.docValuesLoader(reader, docIdsInLeaf), ignoredSourceFormat), + metrics + ); } private record LeafWithMetrics(Leaf leaf, SourceFieldMetrics metrics) implements Leaf { @@ -183,14 +192,21 @@ private static class SyntheticLeaf implements Leaf { private final SyntheticFieldLoader loader; private final SyntheticFieldLoader.DocValuesLoader docValuesLoader; private final Map storedFieldLoaders; - - private SyntheticLeaf(SourceFilter filter, SyntheticFieldLoader loader, SyntheticFieldLoader.DocValuesLoader docValuesLoader) { + private final IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat; + + private SyntheticLeaf( + SourceFilter filter, + SyntheticFieldLoader loader, + SyntheticFieldLoader.DocValuesLoader docValuesLoader, + IgnoredSourceFieldMapper.IgnoredSourceFormat ignoredSourceFormat + ) { this.filter = filter; this.loader = loader; this.docValuesLoader = docValuesLoader; this.storedFieldLoaders = Map.copyOf( loader.storedFieldLoaders().collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)) ); + this.ignoredSourceFormat = ignoredSourceFormat; } @Override @@ -203,29 +219,19 @@ public Source source(LeafStoredFieldLoader storedFieldLoader, int docId) throws @Override public void write(LeafStoredFieldLoader storedFieldLoader, int docId, XContentBuilder b) throws IOException { - // Maps the names of existing objects to lists of ignored fields they contain. - Map> objectsWithIgnoredFields = null; - for (Map.Entry> e : storedFieldLoader.storedFields().entrySet()) { - SyntheticFieldLoader.StoredFieldLoader loader = storedFieldLoaders.get(e.getKey()); + SourceLoader.SyntheticFieldLoader.StoredFieldLoader loader = storedFieldLoaders.get(e.getKey()); if (loader != null) { loader.load(e.getValue()); } - if (IgnoredSourceFieldMapper.NAME.equals(e.getKey())) { - for (Object value : e.getValue()) { - if (objectsWithIgnoredFields == null) { - objectsWithIgnoredFields = new HashMap<>(); - } - IgnoredSourceFieldMapper.NameValue nameValue = IgnoredSourceFieldMapper.decode(value); - if (filter != null - && filter.isPathFiltered(nameValue.name(), XContentDataHelper.isEncodedObject(nameValue.value()))) { - // This path is filtered by the include/exclude rules - continue; - } - objectsWithIgnoredFields.computeIfAbsent(nameValue.getParentFieldName(), k -> new ArrayList<>()).add(nameValue); - } - } } + + // Maps the names of existing objects to lists of ignored fields they contain. + Map> objectsWithIgnoredFields = ignoredSourceFormat.loadAllIgnoredFields( + filter, + storedFieldLoader.storedFields() + ); + if (objectsWithIgnoredFields != null) { loader.setIgnoredValues(objectsWithIgnoredFields); } diff --git a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java index 3d2b89f5a1d48..7b69bd1841312 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/TextFieldMapper.java @@ -1071,14 +1071,14 @@ protected String delegatingTo() { // The parent might, but we don't have enough context here to figure this out. // So we bail. if (isSyntheticSource && syntheticSourceDelegate == null && parentField == null) { - return fallbackSyntheticSourceBlockLoader(); + return fallbackSyntheticSourceBlockLoader(blContext); } SourceValueFetcher fetcher = SourceValueFetcher.toString(blContext.sourcePaths(name())); return new BlockSourceReader.BytesRefsBlockLoader(fetcher, blockReaderDisiLookup(blContext)); } - FallbackSyntheticSourceBlockLoader fallbackSyntheticSourceBlockLoader() { + FallbackSyntheticSourceBlockLoader fallbackSyntheticSourceBlockLoader(BlockLoaderContext blContext) { var reader = new FallbackSyntheticSourceBlockLoader.SingleValueReader(null) { @Override public void convertValue(Object value, List accumulator) { @@ -1106,7 +1106,11 @@ public void writeToBlock(List values, BlockLoader.Builder blockBuilder } }; - return new FallbackSyntheticSourceBlockLoader(reader, name()) { + return new FallbackSyntheticSourceBlockLoader( + reader, + name(), + IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated()) + ) { @Override public Builder builder(BlockFactory factory, int expectedCount) { return factory.bytesRefs(expectedCount); diff --git a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java index 4edd6475b890d..cde64f54c80d5 100644 --- a/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java +++ b/server/src/main/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapper.java @@ -1693,18 +1693,22 @@ public DenseVectorIndexOptions parseIndexOptions(String fieldName, Map 100d) { throw new IllegalArgumentException( - "default_n_probe must be at least 1 or exactly -1, got: " + nProbe + " for field [" + fieldName + "]" + "default_visit_percentage must be between 0.0 and 100.0, got: " + + visitPercentage + + " for field [" + + fieldName + + "]" ); } } MappingParser.checkNoRemainingFields(fieldName, indexOptionsMap); - return new BBQIVFIndexOptions(clusterSize, nProbe, rescoreVector); + return new BBQIVFIndexOptions(clusterSize, visitPercentage, rescoreVector); } @Override @@ -2297,12 +2301,12 @@ public boolean validateDimension(int dim, boolean throwOnError) { static class BBQIVFIndexOptions extends QuantizedIndexOptions { final int clusterSize; - final int defaultNProbe; + final double defaultVisitPercentage; - BBQIVFIndexOptions(int clusterSize, int defaultNProbe, RescoreVector rescoreVector) { + BBQIVFIndexOptions(int clusterSize, double defaultVisitPercentage, RescoreVector rescoreVector) { super(VectorIndexType.BBQ_DISK, rescoreVector); this.clusterSize = clusterSize; - this.defaultNProbe = defaultNProbe; + this.defaultVisitPercentage = defaultVisitPercentage; } @Override @@ -2320,13 +2324,13 @@ public boolean updatableTo(DenseVectorIndexOptions update) { boolean doEquals(DenseVectorIndexOptions other) { BBQIVFIndexOptions that = (BBQIVFIndexOptions) other; return clusterSize == that.clusterSize - && defaultNProbe == that.defaultNProbe + && defaultVisitPercentage == that.defaultVisitPercentage && Objects.equals(rescoreVector, that.rescoreVector); } @Override int doHashCode() { - return Objects.hash(clusterSize, defaultNProbe, rescoreVector); + return Objects.hash(clusterSize, defaultVisitPercentage, rescoreVector); } @Override @@ -2339,7 +2343,7 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.startObject(); builder.field("type", type); builder.field("cluster_size", clusterSize); - builder.field("default_n_probe", defaultNProbe); + builder.field("default_visit_percentage", defaultVisitPercentage); if (rescoreVector != null) { rescoreVector.toXContent(builder, params); } @@ -2736,6 +2740,7 @@ private Query createKnnFloatQuery( .add(filter, BooleanClause.Occur.FILTER) .build(); } else if (indexOptions instanceof BBQIVFIndexOptions bbqIndexOptions) { + float defaultVisitRatio = (float) (bbqIndexOptions.defaultVisitPercentage / 100d); knnQuery = parentFilter != null ? new DiversifyingChildrenIVFKnnFloatVectorQuery( name(), @@ -2744,9 +2749,9 @@ private Query createKnnFloatQuery( numCands, filter, parentFilter, - bbqIndexOptions.defaultNProbe + defaultVisitRatio ) - : new IVFKnnFloatVectorQuery(name(), queryVector, adjustedK, numCands, filter, bbqIndexOptions.defaultNProbe); + : new IVFKnnFloatVectorQuery(name(), queryVector, adjustedK, numCands, filter, defaultVisitRatio); } else { knnQuery = parentFilter != null ? new ESDiversifyingChildrenFloatKnnVectorQuery( diff --git a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java index b2c0cdab8d16e..56e136801e128 100644 --- a/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java +++ b/server/src/main/java/org/elasticsearch/index/query/SearchExecutionContext.java @@ -36,6 +36,7 @@ import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.mapper.DocumentParsingException; import org.elasticsearch.index.mapper.FieldMapper; +import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MappedFieldType.FielddataOperation; import org.elasticsearch.index.mapper.Mapper; @@ -446,7 +447,8 @@ public SourceLoader newSourceLoader(@Nullable SourceFilter filter, boolean force return new SourceLoader.Synthetic( filter, () -> mappingLookup.getMapping().syntheticFieldLoader(null), - mapperMetrics.sourceFieldMetrics() + mapperMetrics.sourceFieldMetrics(), + IgnoredSourceFieldMapper.ignoredSourceFormat(indexSettings.getIndexVersionCreated()) ); } return mappingLookup.newSourceLoader(filter, mapperMetrics.sourceFieldMetrics()); diff --git a/server/src/main/java/org/elasticsearch/search/SearchHit.java b/server/src/main/java/org/elasticsearch/search/SearchHit.java index a9c8e01fa32ac..b16c00033292b 100644 --- a/server/src/main/java/org/elasticsearch/search/SearchHit.java +++ b/server/src/main/java/org/elasticsearch/search/SearchHit.java @@ -878,7 +878,7 @@ public XContentBuilder toInnerXContent(XContentBuilder builder, Params params) t } // _ignored is the only multi-valued meta field // TODO: can we avoid having an exception here? - if (IgnoredFieldMapper.NAME.equals(field.getName()) || IgnoredSourceFieldMapper.NAME.equals(field.getName())) { + if (IgnoredFieldMapper.NAME.equals(field.getName()) || field.getName().startsWith(IgnoredSourceFieldMapper.NAME)) { builder.field(field.getName(), field.getValues()); } else { builder.field(field.getName(), field.getValue()); diff --git a/server/src/main/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProvider.java b/server/src/main/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProvider.java index b67a3ff60f196..d4f44f31d6b36 100644 --- a/server/src/main/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProvider.java +++ b/server/src/main/java/org/elasticsearch/search/fetch/PreloadedFieldLookupProvider.java @@ -12,6 +12,7 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.util.SetOnce; import org.elasticsearch.index.mapper.IdFieldMapper; +import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.search.lookup.FieldLookup; import org.elasticsearch.search.lookup.LeafFieldLookupProvider; @@ -21,6 +22,7 @@ import java.util.Map; import java.util.Set; import java.util.function.Supplier; +import java.util.stream.Collectors; /** * Makes pre-loaded stored fields available via a LeafSearchLookup. @@ -45,6 +47,16 @@ public void populateFieldLookup(FieldLookup fieldLookup, int doc) throws IOExcep fieldLookup.setValues(Collections.singletonList(id)); return; } + if (field.equals(IgnoredSourceFieldMapper.NAME)) { + fieldLookup.setValues( + preloadedStoredFieldValues.entrySet() + .stream() + .filter(entry -> entry.getKey().startsWith(IgnoredSourceFieldMapper.NAME)) + .flatMap(entry -> entry.getValue().stream()) + .toList() + ); + return; + } if (preloadedStoredFieldNames.get().contains(field)) { fieldLookup.setValues(preloadedStoredFieldValues.get(field)); return; @@ -61,11 +73,17 @@ void setPreloadedStoredFieldNames(Set preloadedStoredFieldNames) { } void setPreloadedStoredFieldValues(String id, Map> preloadedStoredFieldValues) { - assert preloadedStoredFieldNames.get().containsAll(preloadedStoredFieldValues.keySet()) + assert preloadedStoredFieldNames.get() + .containsAll( + preloadedStoredFieldValues.keySet() + .stream() + .filter(it -> it.startsWith(IgnoredSourceFieldMapper.NAME) == false) + .collect(Collectors.toSet()) + ) : "Provided stored field that was not expected to be preloaded? " + preloadedStoredFieldValues.keySet() + " - " - + preloadedStoredFieldNames; + + preloadedStoredFieldNames.get(); this.preloadedStoredFieldValues = preloadedStoredFieldValues; this.id = id; } diff --git a/server/src/main/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQuery.java b/server/src/main/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQuery.java index 16b32c46972bc..50d94541fe666 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQuery.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQuery.java @@ -11,6 +11,7 @@ import com.carrotsearch.hppc.IntHashSet; +import org.apache.lucene.index.FloatVectorValues; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; @@ -50,29 +51,27 @@ abstract class AbstractIVFKnnVectorQuery extends Query implements QueryProfilerP static final TopDocs NO_RESULTS = TopDocsCollector.EMPTY_TOPDOCS; protected final String field; - protected final int nProbe; + protected final float providedVisitRatio; protected final int k; protected final int numCands; protected final Query filter; - protected final KnnSearchStrategy searchStrategy; protected int vectorOpsCount; - protected AbstractIVFKnnVectorQuery(String field, int nProbe, int k, int numCands, Query filter) { + protected AbstractIVFKnnVectorQuery(String field, float visitRatio, int k, int numCands, Query filter) { if (k < 1) { throw new IllegalArgumentException("k must be at least 1, got: " + k); } - if (nProbe < 1 && nProbe != -1) { - throw new IllegalArgumentException("nProbe must be at least 1 or exactly -1, got: " + nProbe); + if (visitRatio < 0.0f || visitRatio > 1.0f) { + throw new IllegalArgumentException("visitRatio must be between 0.0 and 1.0 (both inclusive), got: " + visitRatio); } if (numCands < k) { throw new IllegalArgumentException("numCands must be at least k, got: " + numCands); } this.field = field; - this.nProbe = nProbe; + this.providedVisitRatio = visitRatio; this.k = k; this.filter = filter; this.numCands = numCands; - this.searchStrategy = new IVFKnnSearchStrategy(nProbe); } @Override @@ -90,12 +89,12 @@ public boolean equals(Object o) { return k == that.k && Objects.equals(field, that.field) && Objects.equals(filter, that.filter) - && Objects.equals(nProbe, that.nProbe); + && Objects.equals(providedVisitRatio, that.providedVisitRatio); } @Override public int hashCode() { - return Objects.hash(field, k, filter, nProbe); + return Objects.hash(field, k, filter, providedVisitRatio); } @Override @@ -116,16 +115,39 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException { } else { filterWeight = null; } + // we request numCands as we are using it as an approximation measure // we need to ensure we are getting at least 2*k results to ensure we cover overspill duplicates - // TODO move the logic for automatically adjusting percentages/nprobe to the query, so we can only pass + // TODO move the logic for automatically adjusting percentages to the query, so we can only pass // 2k to the collector. - KnnCollectorManager knnCollectorManager = getKnnCollectorManager(Math.max(Math.round(2f * k), numCands), indexSearcher); + KnnCollectorManager knnCollectorManager = getKnnCollectorManager(Math.round(2f * k), indexSearcher); TaskExecutor taskExecutor = indexSearcher.getTaskExecutor(); List leafReaderContexts = reader.leaves(); + + assert this instanceof IVFKnnFloatVectorQuery; + int totalVectors = 0; + for (LeafReaderContext leafReaderContext : leafReaderContexts) { + LeafReader leafReader = leafReaderContext.reader(); + FloatVectorValues floatVectorValues = leafReader.getFloatVectorValues(field); + if (floatVectorValues != null) { + totalVectors += floatVectorValues.size(); + } + } + + final float visitRatio; + if (providedVisitRatio == 0.0f) { + // dynamically set the percentage + float expected = (float) Math.round( + Math.log10(totalVectors) * Math.log10(totalVectors) * (Math.min(10_000, Math.max(numCands, 5 * k))) + ); + visitRatio = expected / totalVectors; + } else { + visitRatio = providedVisitRatio; + } + List> tasks = new ArrayList<>(leafReaderContexts.size()); for (LeafReaderContext context : leafReaderContexts) { - tasks.add(() -> searchLeaf(context, filterWeight, knnCollectorManager)); + tasks.add(() -> searchLeaf(context, filterWeight, knnCollectorManager, visitRatio)); } TopDocs[] perLeafResults = taskExecutor.invokeAll(tasks).toArray(TopDocs[]::new); @@ -138,8 +160,9 @@ public Query rewrite(IndexSearcher indexSearcher) throws IOException { return new KnnScoreDocQuery(topK.scoreDocs, reader); } - private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight, KnnCollectorManager knnCollectorManager) throws IOException { - TopDocs results = getLeafResults(ctx, filterWeight, knnCollectorManager); + private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight, KnnCollectorManager knnCollectorManager, float visitRatio) + throws IOException { + TopDocs results = getLeafResults(ctx, filterWeight, knnCollectorManager, visitRatio); IntHashSet dedup = new IntHashSet(results.scoreDocs.length * 4 / 3); int deduplicateCount = 0; for (ScoreDoc scoreDoc : results.scoreDocs) { @@ -159,12 +182,13 @@ private TopDocs searchLeaf(LeafReaderContext ctx, Weight filterWeight, KnnCollec return new TopDocs(results.totalHits, deduplicatedScoreDocs); } - TopDocs getLeafResults(LeafReaderContext ctx, Weight filterWeight, KnnCollectorManager knnCollectorManager) throws IOException { + TopDocs getLeafResults(LeafReaderContext ctx, Weight filterWeight, KnnCollectorManager knnCollectorManager, float visitRatio) + throws IOException { final LeafReader reader = ctx.reader(); final Bits liveDocs = reader.getLiveDocs(); if (filterWeight == null) { - return approximateSearch(ctx, liveDocs, Integer.MAX_VALUE, knnCollectorManager); + return approximateSearch(ctx, liveDocs, Integer.MAX_VALUE, knnCollectorManager, visitRatio); } Scorer scorer = filterWeight.scorer(ctx); @@ -174,14 +198,15 @@ TopDocs getLeafResults(LeafReaderContext ctx, Weight filterWeight, KnnCollectorM BitSet acceptDocs = createBitSet(scorer.iterator(), liveDocs, reader.maxDoc()); final int cost = acceptDocs.cardinality(); - return approximateSearch(ctx, acceptDocs, cost + 1, knnCollectorManager); + return approximateSearch(ctx, acceptDocs, cost + 1, knnCollectorManager, visitRatio); } abstract TopDocs approximateSearch( LeafReaderContext context, Bits acceptDocs, int visitedLimit, - KnnCollectorManager knnCollectorManager + KnnCollectorManager knnCollectorManager, + float visitRatio ) throws IOException; protected KnnCollectorManager getKnnCollectorManager(int k, IndexSearcher searcher) { diff --git a/server/src/main/java/org/elasticsearch/search/vectors/DiversifyingChildrenIVFKnnFloatVectorQuery.java b/server/src/main/java/org/elasticsearch/search/vectors/DiversifyingChildrenIVFKnnFloatVectorQuery.java index 3b665f3ccf1d3..5df47af26a0f6 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/DiversifyingChildrenIVFKnnFloatVectorQuery.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/DiversifyingChildrenIVFKnnFloatVectorQuery.java @@ -29,7 +29,7 @@ public class DiversifyingChildrenIVFKnnFloatVectorQuery extends IVFKnnFloatVecto * @param numCands the number of nearest neighbors to gather per shard * @param childFilter the filter to apply to the results * @param parentsFilter bitset producer for the parent documents - * @param nProbe the number of probes to use for the IVF search strategy + * @param visitRatio the ratio of documents to be scored for the IVF search strategy */ public DiversifyingChildrenIVFKnnFloatVectorQuery( String field, @@ -38,9 +38,9 @@ public DiversifyingChildrenIVFKnnFloatVectorQuery( int numCands, Query childFilter, BitSetProducer parentsFilter, - int nProbe + float visitRatio ) { - super(field, query, k, numCands, childFilter, nProbe); + super(field, query, k, numCands, childFilter, visitRatio); this.parentsFilter = parentsFilter; } diff --git a/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQuery.java b/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQuery.java index a1168f82230c4..30b37b11005b3 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQuery.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQuery.java @@ -15,6 +15,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.knn.KnnCollectorManager; +import org.apache.lucene.search.knn.KnnSearchStrategy; import org.apache.lucene.util.Bits; import java.io.IOException; @@ -32,10 +33,10 @@ public class IVFKnnFloatVectorQuery extends AbstractIVFKnnVectorQuery { * @param k the number of nearest neighbors to return * @param numCands the number of nearest neighbors to gather per shard * @param filter the filter to apply to the results - * @param nProbe the number of probes to use for the IVF search strategy + * @param visitRatio the ratio of vectors to score for the IVF search strategy */ - public IVFKnnFloatVectorQuery(String field, float[] query, int k, int numCands, Query filter, int nProbe) { - super(field, nProbe, k, numCands, filter); + public IVFKnnFloatVectorQuery(String field, float[] query, int k, int numCands, Query filter, float visitRatio) { + super(field, visitRatio, k, numCands, filter); this.query = query; } @@ -77,19 +78,21 @@ protected TopDocs approximateSearch( LeafReaderContext context, Bits acceptDocs, int visitedLimit, - KnnCollectorManager knnCollectorManager + KnnCollectorManager knnCollectorManager, + float visitRatio ) throws IOException { - KnnCollector knnCollector = knnCollectorManager.newCollector(visitedLimit, searchStrategy, context); - if (knnCollector == null) { - return NO_RESULTS; - } LeafReader reader = context.reader(); FloatVectorValues floatVectorValues = reader.getFloatVectorValues(field); if (floatVectorValues == null) { FloatVectorValues.checkField(reader, field); return NO_RESULTS; } - if (Math.min(knnCollector.k(), floatVectorValues.size()) == 0) { + if (floatVectorValues.size() == 0) { + return NO_RESULTS; + } + KnnSearchStrategy strategy = new IVFKnnSearchStrategy(visitRatio); + KnnCollector knnCollector = knnCollectorManager.newCollector(visitedLimit, strategy, context); + if (knnCollector == null) { return NO_RESULTS; } reader.searchNearestVectors(field, query, knnCollector, acceptDocs); diff --git a/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnSearchStrategy.java b/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnSearchStrategy.java index eb630ea94f44f..30fe9c5ae24a6 100644 --- a/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnSearchStrategy.java +++ b/server/src/main/java/org/elasticsearch/search/vectors/IVFKnnSearchStrategy.java @@ -13,14 +13,14 @@ import java.util.Objects; public class IVFKnnSearchStrategy extends KnnSearchStrategy { - private final int nProbe; + private final float visitRatio; - IVFKnnSearchStrategy(int nProbe) { - this.nProbe = nProbe; + IVFKnnSearchStrategy(float visitRatio) { + this.visitRatio = visitRatio; } - public int getNProbe() { - return nProbe; + public float getVisitRatio() { + return visitRatio; } @Override @@ -28,12 +28,12 @@ public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; IVFKnnSearchStrategy that = (IVFKnnSearchStrategy) o; - return nProbe == that.nProbe; + return visitRatio == that.visitRatio; } @Override public int hashCode() { - return Objects.hashCode(nProbe); + return Objects.hashCode(visitRatio); } @Override diff --git a/server/src/main/java/org/elasticsearch/threadpool/ThreadPool.java b/server/src/main/java/org/elasticsearch/threadpool/ThreadPool.java index 58ac4635b2a4e..2a8c42c95cd0e 100644 --- a/server/src/main/java/org/elasticsearch/threadpool/ThreadPool.java +++ b/server/src/main/java/org/elasticsearch/threadpool/ThreadPool.java @@ -1024,6 +1024,23 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws return builder; } + @Override + public String toString() { + return "Info[name=" + + name + + ",type=" + + type + + ",min=" + + min + + ",max=" + + max + + ",keepAlive=" + + keepAlive + + ",queueSize=" + + queueSize + + "]"; + } + } /** diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java index bb91c4f6cbab2..e281b6d905229 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java @@ -160,6 +160,7 @@ public boolean isRemoteClusterServerEnabled() { private final Map> remoteClusters; private final RemoteClusterCredentialsManager remoteClusterCredentialsManager; private final ProjectResolver projectResolver; + private final boolean inSkippableContext; @FixForMultiProject(description = "Inject the ProjectResolver instance.") RemoteClusterService(Settings settings, TransportService transportService) { @@ -177,6 +178,7 @@ public boolean isRemoteClusterServerEnabled() { if (remoteClusterServerEnabled) { registerRemoteClusterHandshakeRequestHandler(transportService); } + this.inSkippableContext = settings.getAsBoolean("serverless.cross_project.enabled", false); } /** @@ -293,6 +295,18 @@ public boolean isSkipUnavailable(String clusterAlias) { return getRemoteClusterConnection(clusterAlias).isSkipUnavailable(); } + /** + * Returns whether we're in a skippable context. Skippable context is true when either in CPS environment + * or skip_unavailable is set to true for the specified cluster. + * @param clusterAlias Name of the cluster + * @param allowPartialSearchResults If partial results can be served for the search request. + * @return boolean + */ + public boolean shouldSkipOnFailure(String clusterAlias, Boolean allowPartialSearchResults) { + return (inSkippableContext && (allowPartialSearchResults != null && allowPartialSearchResults)) + || getRemoteClusterConnection(clusterAlias).isSkipUnavailable(); + } + public Transport.Connection getConnection(String cluster) { return getRemoteClusterConnection(cluster).getConnection(); } diff --git a/server/src/test/java/org/elasticsearch/action/bulk/TransportSimulateBulkActionTests.java b/server/src/test/java/org/elasticsearch/action/bulk/TransportSimulateBulkActionTests.java index ca275d284f7e3..8296d52900c98 100644 --- a/server/src/test/java/org/elasticsearch/action/bulk/TransportSimulateBulkActionTests.java +++ b/server/src/test/java/org/elasticsearch/action/bulk/TransportSimulateBulkActionTests.java @@ -185,7 +185,8 @@ public void onResponse(BulkResponse response) { "_index": "%s", "_version": -3, "_source": %s, - "executed_pipelines": [%s] + "executed_pipelines": [%s], + "effective_mapping":{} }""", indexRequest.id(), indexRequest.index(), @@ -319,7 +320,8 @@ public void onResponse(BulkResponse response) { "_version": -3, "_source": %s, "executed_pipelines": [%s], - "error":{"type":"exception","reason":"invalid mapping"} + "error":{"type":"exception","reason":"invalid mapping"}, + "effective_mapping":{"_doc":{"dynamic":"strict"}} }""", indexRequest.id(), indexName, @@ -346,7 +348,8 @@ public void onResponse(BulkResponse response) { "_index": "%s", "_version": -3, "_source": %s, - "executed_pipelines": [%s] + "executed_pipelines": [%s], + "effective_mapping":{"_doc":{"dynamic":"strict"}} }""", indexRequest.id(), indexName, @@ -373,7 +376,9 @@ public void onFailure(Exception e) { }; when(indicesService.withTempIndexService(any(), any())).thenAnswer((Answer) invocation -> { IndexMetadata imd = invocation.getArgument(0); - if (indicesWithInvalidMappings.contains(imd.getIndex().getName())) { + if (indicesWithInvalidMappings.contains(imd.getIndex().getName()) + // We only want to throw exceptions inside TransportSimulateBulkAction: + && invocation.getArgument(1).getClass().getSimpleName().contains(TransportSimulateBulkAction.class.getSimpleName())) { throw new ElasticsearchException("invalid mapping"); } else { // we don't actually care what is returned, as long as no exception is thrown the request is considered valid: diff --git a/server/src/test/java/org/elasticsearch/action/ingest/SimulateIndexResponseTests.java b/server/src/test/java/org/elasticsearch/action/ingest/SimulateIndexResponseTests.java index f3ec4fc1ac1c3..5136a5c3bd01a 100644 --- a/server/src/test/java/org/elasticsearch/action/ingest/SimulateIndexResponseTests.java +++ b/server/src/test/java/org/elasticsearch/action/ingest/SimulateIndexResponseTests.java @@ -13,6 +13,7 @@ import org.elasticsearch.action.index.IndexResponse; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.RandomObjects; @@ -25,6 +26,7 @@ import java.util.List; import java.util.stream.Collectors; +import static org.elasticsearch.cluster.metadata.ComponentTemplateTests.randomMappings; import static org.hamcrest.Matchers.equalTo; public class SimulateIndexResponseTests extends ESTestCase { @@ -49,6 +51,7 @@ public void testToXContent() throws IOException { XContentType.JSON, pipelines, List.of(), + null, null ); @@ -61,7 +64,8 @@ public void testToXContent() throws IOException { "_index": "%s", "_version": %d, "_source": %s, - "executed_pipelines": [%s] + "executed_pipelines": [%s], + "effective_mapping": {} }""", id, index, @@ -81,7 +85,8 @@ public void testToXContent() throws IOException { XContentType.JSON, pipelines, List.of(), - new ElasticsearchException("Some failure") + new ElasticsearchException("Some failure"), + null ); assertEquals( @@ -94,7 +99,8 @@ public void testToXContent() throws IOException { "_version": %d, "_source": %s, "executed_pipelines": [%s], - "error":{"type":"exception","reason":"Some failure"} + "error":{"type":"exception","reason":"Some failure"}, + "effective_mapping": {} }""", id, index, @@ -114,6 +120,7 @@ public void testToXContent() throws IOException { XContentType.JSON, pipelines, List.of("abc", "def"), + null, null ); @@ -127,7 +134,8 @@ public void testToXContent() throws IOException { "_version": %d, "_source": %s, "executed_pipelines": [%s], - "ignored_fields": [{"field": "abc"}, {"field": "def"}] + "ignored_fields": [{"field": "abc"}, {"field": "def"}], + "effective_mapping": {} }""", id, index, @@ -138,6 +146,39 @@ public void testToXContent() throws IOException { ), Strings.toString(indexResponseWithIgnoredFields) ); + + SimulateIndexResponse responseWithEffectiveMapping = new SimulateIndexResponse( + id, + index, + version, + sourceBytes, + XContentType.JSON, + pipelines, + List.of(), + null, + new CompressedXContent("{\"properties\":{\"foo\":{\"type\":\"keyword\"}}}") + ); + assertEquals( + XContentHelper.stripWhitespace( + Strings.format( + """ + { + "_id": "%s", + "_index": "%s", + "_version": %d, + "_source": %s, + "executed_pipelines": [%s], + "effective_mapping": {"properties": {"foo": {"type": "keyword"}}} + }""", + id, + index, + version, + source, + pipelines.stream().map(pipeline -> "\"" + pipeline + "\"").collect(Collectors.joining(",")) + ) + ), + Strings.toString(responseWithEffectiveMapping) + ); } public void testSerialization() throws IOException { @@ -171,7 +212,12 @@ private static SimulateIndexResponse randomIndexResponse() { xContentType, pipelines, randomList(0, 20, () -> randomAlphaOfLength(15)), - randomBoolean() ? null : new ElasticsearchException("failed") + randomBoolean() ? null : new ElasticsearchException("failed"), + randomEffectiveMapping() ); } + + private static CompressedXContent randomEffectiveMapping() { + return randomBoolean() ? null : randomMappings(); + } } diff --git a/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java b/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java index 2908bff995340..70df5f78615ca 100644 --- a/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/ClusterModuleTests.java @@ -36,6 +36,7 @@ import org.elasticsearch.cluster.routing.allocation.decider.ShardsLimitAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.SnapshotInProgressAllocationDecider; import org.elasticsearch.cluster.routing.allocation.decider.ThrottlingAllocationDecider; +import org.elasticsearch.cluster.routing.allocation.decider.WriteLoadConstraintDecider; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.IndexScopedSettings; @@ -278,6 +279,7 @@ public void testAllocationDeciderOrder() { SnapshotInProgressAllocationDecider.class, RestoreInProgressAllocationDecider.class, NodeShutdownAllocationDecider.class, + WriteLoadConstraintDecider.class, NodeReplacementAllocationDecider.class, FilterAllocationDecider.class, SameShardAllocationDecider.class, diff --git a/server/src/test/java/org/elasticsearch/cluster/InternalClusterInfoServiceSchedulingTests.java b/server/src/test/java/org/elasticsearch/cluster/InternalClusterInfoServiceSchedulingTests.java index 72eb5a6a3b764..1e91f69f47573 100644 --- a/server/src/test/java/org/elasticsearch/cluster/InternalClusterInfoServiceSchedulingTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/InternalClusterInfoServiceSchedulingTests.java @@ -62,7 +62,9 @@ public void testScheduling() { .put(InternalClusterInfoService.CLUSTER_ROUTING_ALLOCATION_ESTIMATED_HEAP_THRESHOLD_DECIDER_ENABLED.getKey(), true) .put( WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(), - WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED + randomBoolean() + ? WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED + : WriteLoadConstraintSettings.WriteLoadDeciderStatus.LOW_THRESHOLD_ONLY ); if (randomBoolean()) { settingsBuilder.put(INTERNAL_CLUSTER_INFO_UPDATE_INTERVAL_SETTING.getKey(), randomIntBetween(10000, 60000) + "ms"); diff --git a/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java new file mode 100644 index 0000000000000..12bfd8a0a4789 --- /dev/null +++ b/server/src/test/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderTests.java @@ -0,0 +1,306 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.cluster.routing.allocation.decider; + +import org.elasticsearch.action.support.replication.ClusterStateCreationUtils; +import org.elasticsearch.cluster.ClusterInfo; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ESAllocationTestCase; +import org.elasticsearch.cluster.NodeUsageStatsForThreadPools; +import org.elasticsearch.cluster.NodeUsageStatsForThreadPools.ThreadPoolUsageStats; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.routing.RoutingNode; +import org.elasticsearch.cluster.routing.RoutingNodes; +import org.elasticsearch.cluster.routing.RoutingNodesHelper; +import org.elasticsearch.cluster.routing.ShardRouting; +import org.elasticsearch.cluster.routing.ShardRoutingState; +import org.elasticsearch.cluster.routing.TestShardRouting; +import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; +import org.elasticsearch.cluster.routing.allocation.WriteLoadConstraintSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.shard.ShardId; +import org.elasticsearch.threadpool.ThreadPool; + +import java.util.HashMap; + +import static org.elasticsearch.common.settings.ClusterSettings.createBuiltInClusterSettings; + +public class WriteLoadConstraintDeciderTests extends ESAllocationTestCase { + + /** + * Test the write load decider behavior when disabled + */ + public void testWriteLoadDeciderDisabled() { + String indexName = "test-index"; + var testHarness = createClusterStateAndRoutingAllocation(indexName); + + // The write load decider is disabled by default. + + var writeLoadDecider = createWriteLoadConstraintDecider(Settings.builder().build()); + + assertEquals( + Decision.Type.YES, + writeLoadDecider.canAllocate( + testHarness.shardRouting2, + testHarness.exceedingThresholdRoutingNode, + testHarness.routingAllocation + ).type() + ); + assertEquals( + Decision.Type.YES, + writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.belowThresholdRoutingNode, testHarness.routingAllocation) + .type() + ); + assertEquals( + Decision.Type.YES, + writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.nearThresholdRoutingNode, testHarness.routingAllocation) + .type() + ); + assertEquals( + Decision.Type.YES, + writeLoadDecider.canAllocate( + testHarness.thirdRoutingNoWriteLoad, + testHarness.exceedingThresholdRoutingNode, + testHarness.routingAllocation + ).type() + ); + + assertEquals( + Decision.Type.YES, + writeLoadDecider.canRemain( + testHarness.clusterState.metadata().getProject().index(indexName), + testHarness.shardRouting1, + testHarness.exceedingThresholdRoutingNode, + testHarness.routingAllocation + ).type() + ); + } + + /** + * Test the {@link WriteLoadConstraintDecider#canAllocate} implementation. + */ + public void testWriteLoadDeciderCanAllocate() { + String indexName = "test-index"; + var testHarness = createClusterStateAndRoutingAllocation(indexName); + + var writeLoadDecider = createWriteLoadConstraintDecider( + Settings.builder() + .put( + WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(), + randomBoolean() + ? WriteLoadConstraintSettings.WriteLoadDeciderStatus.ENABLED + : WriteLoadConstraintSettings.WriteLoadDeciderStatus.LOW_THRESHOLD_ONLY + ) + .build() + ); + assertEquals( + "Assigning a new shard to a node that is above the threshold should fail", + Decision.Type.NO, + writeLoadDecider.canAllocate( + testHarness.shardRouting2, + testHarness.exceedingThresholdRoutingNode, + testHarness.routingAllocation + ).type() + ); + assertEquals( + "Assigning a new shard to a node that has capacity should succeed", + Decision.Type.YES, + writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.belowThresholdRoutingNode, testHarness.routingAllocation) + .type() + ); + assertEquals( + "Assigning a new shard without a write load estimate should _not_ be blocked by lack of capacity", + Decision.Type.YES, + writeLoadDecider.canAllocate( + testHarness.thirdRoutingNoWriteLoad, + testHarness.exceedingThresholdRoutingNode, + testHarness.routingAllocation + ).type() + ); + assertEquals( + "Assigning a new shard that would cause the node to exceed capacity should fail", + Decision.Type.NO, + writeLoadDecider.canAllocate(testHarness.shardRouting1, testHarness.nearThresholdRoutingNode, testHarness.routingAllocation) + .type() + ); + } + + /** + * Carries all the cluster state objects needed for testing after {@link #createClusterStateAndRoutingAllocation} sets them up. + */ + private record TestHarness( + ClusterState clusterState, + RoutingAllocation routingAllocation, + RoutingNode exceedingThresholdRoutingNode, + RoutingNode belowThresholdRoutingNode, + RoutingNode nearThresholdRoutingNode, + ShardRouting shardRouting1, + ShardRouting shardRouting2, + ShardRouting thirdRoutingNoWriteLoad + ) {} + + /** + * Creates all the cluster state and objects needed to test the {@link WriteLoadConstraintDecider}. + */ + private TestHarness createClusterStateAndRoutingAllocation(String indexName) { + /** + * Create the ClusterState for multiple nodes and multiple index shards. + */ + + ClusterState clusterState = ClusterStateCreationUtils.stateWithAssignedPrimariesAndReplicas(new String[] { indexName }, 3, 1); + // The number of data nodes the util method above creates is numberOfReplicas+1, and three data nodes are needed for this test. + assertEquals(3, clusterState.nodes().size()); + assertEquals(1, clusterState.metadata().getTotalNumberOfIndices()); + + /** + * Fetch references to the nodes and index shards from the generated ClusterState, so the ClusterInfo can be created from them. + */ + + var discoveryNodeIterator = clusterState.nodes().iterator(); + assertTrue(discoveryNodeIterator.hasNext()); + var exceedingThresholdDiscoveryNode = discoveryNodeIterator.next(); + assertTrue(discoveryNodeIterator.hasNext()); + var belowThresholdDiscoveryNode2 = discoveryNodeIterator.next(); + assertTrue(discoveryNodeIterator.hasNext()); + var nearThresholdDiscoveryNode3 = discoveryNodeIterator.next(); + assertFalse(discoveryNodeIterator.hasNext()); + + var indexIterator = clusterState.metadata().indicesAllProjects().iterator(); + assertTrue(indexIterator.hasNext()); + IndexMetadata testIndexMetadata = indexIterator.next(); + assertFalse(indexIterator.hasNext()); + Index testIndex = testIndexMetadata.getIndex(); + assertEquals(3, testIndexMetadata.getNumberOfShards()); + ShardId testShardId1 = new ShardId(testIndex, 0); + ShardId testShardId2 = new ShardId(testIndex, 1); + ShardId testShardId3NoWriteLoad = new ShardId(testIndex, 2); + + /** + * Create a ClusterInfo that includes the node and shard level write load estimates for a variety of node capacity situations. + */ + + var nodeThreadPoolStatsWithWriteExceedingThreshold = createNodeUsageStatsForThreadPools( + exceedingThresholdDiscoveryNode, + 8, + 0.99f, + 0 + ); + var nodeThreadPoolStatsWithWriteBelowThreshold = createNodeUsageStatsForThreadPools(belowThresholdDiscoveryNode2, 8, 0.50f, 0); + var nodeThreadPoolStatsWithWriteNearThreshold = createNodeUsageStatsForThreadPools(nearThresholdDiscoveryNode3, 8, 0.89f, 0); + + // Create a map of usage per node. + var nodeIdToNodeUsageStatsForThreadPools = new HashMap(); + nodeIdToNodeUsageStatsForThreadPools.put(exceedingThresholdDiscoveryNode.getId(), nodeThreadPoolStatsWithWriteExceedingThreshold); + nodeIdToNodeUsageStatsForThreadPools.put(belowThresholdDiscoveryNode2.getId(), nodeThreadPoolStatsWithWriteBelowThreshold); + nodeIdToNodeUsageStatsForThreadPools.put(nearThresholdDiscoveryNode3.getId(), nodeThreadPoolStatsWithWriteNearThreshold); + + // Create a map of usage per shard. + var shardIdToWriteLoadEstimate = new HashMap(); + shardIdToWriteLoadEstimate.put(testShardId1, 0.5); + shardIdToWriteLoadEstimate.put(testShardId2, 0.5); + shardIdToWriteLoadEstimate.put(testShardId3NoWriteLoad, 0d); + + ClusterInfo clusterInfo = ClusterInfo.builder() + .nodeUsageStatsForThreadPools(nodeIdToNodeUsageStatsForThreadPools) + .shardWriteLoads(shardIdToWriteLoadEstimate) + .build(); + + /** + * Create the RoutingAllocation from the ClusterState and ClusterInfo above, and set up the other input for the WriteLoadDecider. + */ + + var routingAllocation = new RoutingAllocation( + null, + RoutingNodes.immutable(clusterState.globalRoutingTable(), clusterState.nodes()), + clusterState, + clusterInfo, + null, + System.nanoTime() + ); + + ShardRouting shardRouting1 = TestShardRouting.newShardRouting( + testShardId1, + exceedingThresholdDiscoveryNode.getId(), + null, + true, + ShardRoutingState.STARTED + ); + ShardRouting shardRouting2 = TestShardRouting.newShardRouting( + testShardId2, + belowThresholdDiscoveryNode2.getId(), + null, + true, + ShardRoutingState.STARTED + ); + ShardRouting thirdRoutingNoWriteLoad = TestShardRouting.newShardRouting( + testShardId3NoWriteLoad, + belowThresholdDiscoveryNode2.getId(), + null, + true, + ShardRoutingState.STARTED + ); + + RoutingNode exceedingThresholdRoutingNode = RoutingNodesHelper.routingNode( + exceedingThresholdDiscoveryNode.getId(), + exceedingThresholdDiscoveryNode, + shardRouting1 + ); + RoutingNode belowThresholdRoutingNode = RoutingNodesHelper.routingNode( + belowThresholdDiscoveryNode2.getId(), + belowThresholdDiscoveryNode2, + shardRouting2 + ); + RoutingNode nearThresholdRoutingNode = RoutingNodesHelper.routingNode( + nearThresholdDiscoveryNode3.getId(), + nearThresholdDiscoveryNode3, + new ShardRouting[] {} + ); + + return new TestHarness( + clusterState, + routingAllocation, + exceedingThresholdRoutingNode, + belowThresholdRoutingNode, + nearThresholdRoutingNode, + shardRouting1, + shardRouting2, + thirdRoutingNoWriteLoad + ); + } + + private WriteLoadConstraintDecider createWriteLoadConstraintDecider(Settings settings) { + return new WriteLoadConstraintDecider(createBuiltInClusterSettings(settings)); + } + + /** + * Helper to create a {@link NodeUsageStatsForThreadPools} for the given node with the given WRITE thread pool usage stats. + */ + private NodeUsageStatsForThreadPools createNodeUsageStatsForThreadPools( + DiscoveryNode discoveryNode, + int totalWriteThreadPoolThreads, + float averageWriteThreadPoolUtilization, + long averageWriteThreadPoolQueueLatencyMillis + ) { + + // Create thread pool usage stats map for node1. + var writeThreadPoolUsageStats = new ThreadPoolUsageStats( + totalWriteThreadPoolThreads, + averageWriteThreadPoolUtilization, + averageWriteThreadPoolQueueLatencyMillis + ); + var threadPoolUsageMap = new HashMap(); + threadPoolUsageMap.put(ThreadPool.Names.WRITE, writeThreadPoolUsageStats); + + // Create the node's thread pool usage map + return new NodeUsageStatsForThreadPools(discoveryNode.getId(), threadPoolUsageMap); + } +} diff --git a/server/src/test/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutorTests.java b/server/src/test/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutorTests.java index b4b33d1265bcb..408050b01453d 100644 --- a/server/src/test/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutorTests.java +++ b/server/src/test/java/org/elasticsearch/common/util/concurrent/TaskExecutionTimeTrackingEsThreadPoolExecutorTests.java @@ -95,7 +95,7 @@ public void testExecutionEWMACalculation() throws Exception { /** * Verifies that we can peek at the task in front of the task queue to fetch the duration that the oldest task has been queued. - * Tests {@link TaskExecutionTimeTrackingEsThreadPoolExecutor#peekMaxQueueLatencyInQueue}. + * Tests {@link TaskExecutionTimeTrackingEsThreadPoolExecutor#peekMaxQueueLatencyInQueueMillis}. */ public void testFrontOfQueueLatency() throws Exception { ThreadContext context = new ThreadContext(Settings.EMPTY); @@ -135,7 +135,7 @@ public void testFrontOfQueueLatency() throws Exception { logger.info("--> executor: {}", executor); // Check that the peeking at a non-existence queue returns zero. - assertEquals("Zero should be returned when there is no queue", 0, executor.peekMaxQueueLatencyInQueue()); + assertEquals("Zero should be returned when there is no queue", 0, executor.peekMaxQueueLatencyInQueueMillis()); // Submit two tasks, into the thread pool with a single worker thread. The second one will be queued (because the pool only has // one thread) and can be peeked at. @@ -143,10 +143,10 @@ public void testFrontOfQueueLatency() throws Exception { executor.execute(() -> {}); waitForTimeToElapse(); - var frontOfQueueDuration = executor.peekMaxQueueLatencyInQueue(); + var frontOfQueueDuration = executor.peekMaxQueueLatencyInQueueMillis(); assertThat("Expected a task to be queued", frontOfQueueDuration, greaterThan(0L)); waitForTimeToElapse(); - var updatedFrontOfQueueDuration = executor.peekMaxQueueLatencyInQueue(); + var updatedFrontOfQueueDuration = executor.peekMaxQueueLatencyInQueueMillis(); assertThat( "Expected a second peek to report a longer duration", updatedFrontOfQueueDuration, @@ -156,7 +156,7 @@ public void testFrontOfQueueLatency() throws Exception { // Release the first task that's running, and wait for the second to start -- then it is ensured that the queue will be empty. safeAwait(barrier); safeAwait(barrier); - assertEquals("Queue should be emptied", 0, executor.peekMaxQueueLatencyInQueue()); + assertEquals("Queue should be emptied", 0, executor.peekMaxQueueLatencyInQueueMillis()); } finally { ThreadPool.terminate(executor, 10, TimeUnit.SECONDS); } @@ -463,8 +463,8 @@ long getQueueTimeNanos() { */ private static void waitForTimeToElapse() throws InterruptedException { final var startNanoTime = System.nanoTime(); - while ((System.nanoTime() - startNanoTime) < 1) { - Thread.sleep(Duration.ofNanos(1)); + while (TimeUnit.MILLISECONDS.convert(System.nanoTime() - startNanoTime, TimeUnit.NANOSECONDS) < 1) { + Thread.sleep(Duration.ofMillis(1)); } } } diff --git a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java index 6eb291c386885..28a7f08bb8d27 100644 --- a/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java +++ b/server/src/test/java/org/elasticsearch/index/codec/tsdb/es819/ES819TSDBDocValuesFormatTests.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.LeafReader; +import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.LogByteSizeMergePolicy; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; @@ -31,23 +32,32 @@ import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSortField; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.elasticsearch.cluster.metadata.DataStream; import org.elasticsearch.common.Randomness; +import org.elasticsearch.common.lucene.BytesRefs; import org.elasticsearch.common.util.CollectionUtils; import org.elasticsearch.index.codec.Elasticsearch900Lucene101Codec; import org.elasticsearch.index.codec.tsdb.ES87TSDBDocValuesFormatTests; +import org.elasticsearch.index.mapper.BlockLoader; import org.elasticsearch.index.mapper.TestBlock; import org.elasticsearch.test.ESTestCase; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Locale; +import java.util.Map; +import java.util.Set; import java.util.function.Supplier; import java.util.stream.IntStream; +import static org.hamcrest.Matchers.equalTo; + public class ES819TSDBDocValuesFormatTests extends ES87TSDBDocValuesFormatTests { private final Codec codec = new Elasticsearch900Lucene101Codec() { @@ -743,9 +753,9 @@ public void testBulkLoading() throws Exception { try (var reader = DirectoryReader.open(iw)) { int gaugeIndex = numDocs; for (var leaf : reader.leaves()) { - var timestampDV = getBulkNumericDocValues(leaf.reader(), timestampField); - var counterDV = getBulkNumericDocValues(leaf.reader(), counterField); - var gaugeDV = getBulkNumericDocValues(leaf.reader(), gaugeField); + var timestampDV = getColumnAtTimeReader(leaf.reader(), timestampField); + var counterDV = getColumnAtTimeReader(leaf.reader(), counterField); + var gaugeDV = getColumnAtTimeReader(leaf.reader(), gaugeField); int maxDoc = leaf.reader().maxDoc(); for (int i = 0; i < maxDoc;) { int size = Math.max(1, random().nextInt(0, maxDoc - i)); @@ -753,7 +763,8 @@ public void testBulkLoading() throws Exception { { // bulk loading timestamp: - var block = (TestBlock) timestampDV.read(factory, docs, 0); + var block = (TestBlock) timestampDV.tryRead(factory, docs, 0); + assertNotNull(block); assertEquals(size, block.size()); for (int j = 0; j < block.size(); j++) { long actualTimestamp = (long) block.get(j); @@ -764,7 +775,8 @@ public void testBulkLoading() throws Exception { } { // bulk loading counter field: - var block = (TestBlock) counterDV.read(factory, docs, 0); + var block = (TestBlock) counterDV.tryRead(factory, docs, 0); + assertNotNull(block); assertEquals(size, block.size()); for (int j = 0; j < block.size(); j++) { long actualCounter = (long) block.get(j); @@ -775,7 +787,8 @@ public void testBulkLoading() throws Exception { } { // bulk loading gauge field: - var block = (TestBlock) gaugeDV.read(factory, docs, 0); + var block = (TestBlock) gaugeDV.tryRead(factory, docs, 0); + assertNotNull(block); assertEquals(size, block.size()); for (int j = 0; j < block.size(); j++) { long actualGauge = (long) block.get(j); @@ -803,15 +816,16 @@ public void testBulkLoading() throws Exception { int size = maxDoc - randomOffset; int gaugeIndex = size; - var timestampDV = getBulkNumericDocValues(leafReader, timestampField); - var counterDV = getBulkNumericDocValues(leafReader, counterField); - var gaugeDV = getBulkNumericDocValues(leafReader, gaugeField); + var timestampDV = getColumnAtTimeReader(leafReader, timestampField); + var counterDV = getColumnAtTimeReader(leafReader, counterField); + var gaugeDV = getColumnAtTimeReader(leafReader, gaugeField); var docs = TestBlock.docs(IntStream.range(0, maxDoc).toArray()); { // bulk loading timestamp: - var block = (TestBlock) timestampDV.read(blockFactory, docs, randomOffset); + var block = (TestBlock) timestampDV.tryRead(blockFactory, docs, randomOffset); + assertNotNull(block); assertEquals(size, block.size()); for (int j = 0; j < block.size(); j++) { long actualTimestamp = (long) block.get(j); @@ -822,7 +836,8 @@ public void testBulkLoading() throws Exception { } { // bulk loading counter field: - var block = (TestBlock) counterDV.read(factory, docs, randomOffset); + var block = (TestBlock) counterDV.tryRead(factory, docs, randomOffset); + assertNotNull(block); assertEquals(size, block.size()); for (int j = 0; j < block.size(); j++) { long actualCounter = (long) block.get(j); @@ -833,7 +848,8 @@ public void testBulkLoading() throws Exception { } { // bulk loading gauge field: - var block = (TestBlock) gaugeDV.read(factory, docs, randomOffset); + var block = (TestBlock) gaugeDV.tryRead(factory, docs, randomOffset); + assertNotNull(block); assertEquals(size, block.size()); for (int j = 0; j < block.size(); j++) { long actualGauge = (long) block.get(j); @@ -847,16 +863,17 @@ public void testBulkLoading() throws Exception { size = docs.count(); // Test against values loaded using normal doc value apis: long[] expectedCounters = new long[size]; - counterDV = getBulkNumericDocValues(leafReader, counterField); + counterDV = getColumnAtTimeReader(leafReader, counterField); for (int i = 0; i < docs.count(); i++) { int docId = docs.get(i); counterDV.advanceExact(docId); expectedCounters[i] = counterDV.longValue(); } - counterDV = getBulkNumericDocValues(leafReader, counterField); + counterDV = getColumnAtTimeReader(leafReader, counterField); { // bulk loading counter field: - var block = (TestBlock) counterDV.read(factory, docs, 0); + var block = (TestBlock) counterDV.tryRead(factory, docs, 0); + assertNotNull(block); assertEquals(size, block.size()); for (int j = 0; j < block.size(); j++) { long actualCounter = (long) block.get(j); @@ -920,9 +937,9 @@ public void testBulkLoadingWithSparseDocs() throws Exception { false ); assertEquals(numDocsPerQValue, topDocs.totalHits.value()); - var timestampDV = getBulkNumericDocValues(leafReader, timestampField); + var timestampDV = getColumnAtTimeReader(leafReader, timestampField); long[] expectedTimestamps = new long[numDocsPerQValue]; - var counterDV = getBulkNumericDocValues(leafReader, counterField); + var counterDV = getColumnAtTimeReader(leafReader, counterField); long[] expectedCounters = new long[numDocsPerQValue]; int[] docIds = new int[numDocsPerQValue]; for (int i = 0; i < topDocs.scoreDocs.length; i++) { @@ -938,8 +955,9 @@ public void testBulkLoadingWithSparseDocs() throws Exception { var docs = TestBlock.docs(docIds); { - timestampDV = getBulkNumericDocValues(leafReader, timestampField); - var block = (TestBlock) timestampDV.read(factory, docs, 0); + timestampDV = getColumnAtTimeReader(leafReader, timestampField); + var block = (TestBlock) timestampDV.tryRead(factory, docs, 0); + assertNotNull(block); assertEquals(numDocsPerQValue, block.size()); for (int j = 0; j < block.size(); j++) { long actualTimestamp = (long) block.get(j); @@ -948,8 +966,9 @@ public void testBulkLoadingWithSparseDocs() throws Exception { } } { - counterDV = getBulkNumericDocValues(leafReader, counterField); - var block = (TestBlock) counterDV.read(factory, docs, 0); + counterDV = getColumnAtTimeReader(leafReader, counterField); + var block = (TestBlock) counterDV.tryRead(factory, docs, 0); + assertNotNull(block); assertEquals(numDocsPerQValue, block.size()); for (int j = 0; j < block.size(); j++) { long actualCounter = (long) block.get(j); @@ -962,8 +981,122 @@ public void testBulkLoadingWithSparseDocs() throws Exception { } } - private static BulkNumericDocValues getBulkNumericDocValues(LeafReader leafReader, String counterField) throws IOException { - return (BulkNumericDocValues) DocValues.unwrapSingleton(leafReader.getSortedNumericDocValues(counterField)); + public void testLoadKeywordFieldWithIndexSorts() throws IOException { + String primaryField = "sorted_first"; + String secondField = "sorted_second"; + String unsortedField = "no_sort"; + String sparseField = "sparse"; + var config = new IndexWriterConfig(); + config.setIndexSort(new Sort(new SortField(primaryField, SortField.Type.STRING, false))); + config.setMergePolicy(new LogByteSizeMergePolicy()); + config.setCodec(getCodec()); + Map hostnames = new HashMap<>(); + try (Directory dir = newDirectory(); IndexWriter writer = new IndexWriter(dir, config)) { + int numDocs = ESTestCase.randomIntBetween(100, 5000); + for (int i = 0; i < numDocs; i++) { + hostnames.put(i, "h" + random().nextInt(10)); + } + List ids = new ArrayList<>(hostnames.keySet()); + Randomness.shuffle(ids); + Set sparseIds = new HashSet<>(ESTestCase.randomSubsetOf(ESTestCase.between(1, ids.size() / 2), ids)); + for (Integer id : ids) { + var d = new Document(); + String hostname = hostnames.get(id); + d.add(new NumericDocValuesField("id", id)); + d.add(new SortedDocValuesField(primaryField, new BytesRef(hostname))); + d.add(new SortedDocValuesField(secondField, new BytesRef(hostname))); + d.add(new SortedDocValuesField(unsortedField, new BytesRef(hostname))); + if (sparseIds.contains(id)) { + d.add(new SortedDocValuesField(sparseField, new BytesRef(hostname))); + } + writer.addDocument(d); + if (random().nextInt(100) < 10) { + writer.flush(); + } + } + for (int iter = 0; iter < 2; iter++) { + var factory = TestBlock.factory(); + try (DirectoryReader reader = DirectoryReader.open(writer)) { + for (LeafReaderContext leaf : reader.leaves()) { + BlockLoader.Docs docs = new BlockLoader.Docs() { + @Override + public int count() { + return leaf.reader().maxDoc(); + } + + @Override + public int get(int i) { + return i; + } + }; + var idReader = ESTestCase.asInstanceOf( + BlockLoader.OptionalColumnAtATimeReader.class, + leaf.reader().getNumericDocValues("id") + ); + TestBlock idBlock = (TestBlock) idReader.tryRead(factory, docs, 0); + assertNotNull(idBlock); + var reader2 = ESTestCase.asInstanceOf( + BlockLoader.OptionalColumnAtATimeReader.class, + leaf.reader().getSortedDocValues(secondField) + ); + assertNull(reader2.tryRead(factory, docs, 0)); + var reader3 = ESTestCase.asInstanceOf( + BlockLoader.OptionalColumnAtATimeReader.class, + leaf.reader().getSortedDocValues(unsortedField) + ); + assertNull(reader3.tryRead(factory, docs, 0)); + for (int offset = 0; offset < idBlock.size(); offset += ESTestCase.between(1, numDocs)) { + int start = offset; + var reader1 = ESTestCase.asInstanceOf( + BlockLoader.OptionalColumnAtATimeReader.class, + leaf.reader().getSortedDocValues(primaryField) + ); + while (start < idBlock.size()) { + int end = start + random().nextInt(idBlock.size() - start); + TestBlock hostBlock = (TestBlock) reader1.tryRead(factory, new BlockLoader.Docs() { + @Override + public int count() { + return end + 1; + } + + @Override + public int get(int docId) { + return docId; + } + }, start); + Set seenValues = new HashSet<>(); + for (int p = start; p <= end; p++) { + String hostName = hostnames.get(((Number) idBlock.get(p)).intValue()); + seenValues.add(hostName); + } + if (seenValues.size() == 1) { + assertNotNull(hostBlock); + assertThat(hostBlock.size(), equalTo(end - start + 1)); + for (int i = 0; i < hostBlock.size(); i++) { + String actualHostName = BytesRefs.toString(hostBlock.get(i)); + assertThat(actualHostName, equalTo(hostnames.get(((Number) idBlock.get(i + start)).intValue()))); + } + } else { + assertNull(hostBlock); + } + if (start == idBlock.size() - 1) { + break; + } + start = end + ESTestCase.between(0, 10); + } + } + writer.forceMerge(1); + } + } + } + } + } + + private static ES819TSDBDocValuesProducer.BaseDenseNumericValues getColumnAtTimeReader(LeafReader leafReader, String counterField) + throws IOException { + return (ES819TSDBDocValuesProducer.BaseDenseNumericValues) DocValues.unwrapSingleton( + leafReader.getSortedNumericDocValues(counterField) + ); } private IndexWriterConfig getTimeSeriesIndexWriterConfig(String hostnameField, String timestampField) { diff --git a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java index f0e384d1cf272..6342ff1ddad9d 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/DateFieldMapperTests.java @@ -31,7 +31,6 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; -import org.elasticsearch.index.codec.tsdb.es819.BulkNumericDocValues; import org.elasticsearch.index.codec.tsdb.es819.ES819TSDBDocValuesFormat; import org.elasticsearch.index.mapper.DateFieldMapper.DateFieldType; import org.elasticsearch.script.DateFieldScript; @@ -849,7 +848,7 @@ public void testSingletonLongBulkBlockReadingManyValues() throws Exception { { // One big doc block var columnReader = (BlockDocValuesReader.SingletonLongs) blockLoader.columnAtATimeReader(context); - assertThat(columnReader.numericDocValues, instanceOf(BulkNumericDocValues.class)); + assertThat(columnReader.numericDocValues, instanceOf(BlockLoader.OptionalColumnAtATimeReader.class)); var docBlock = TestBlock.docs(IntStream.range(from, to).toArray()); var block = (TestBlock) columnReader.read(TestBlock.factory(), docBlock, 0); assertThat(block.size(), equalTo(to - from)); @@ -861,7 +860,7 @@ public void testSingletonLongBulkBlockReadingManyValues() throws Exception { // Smaller doc blocks int docBlockSize = 1000; var columnReader = (BlockDocValuesReader.SingletonLongs) blockLoader.columnAtATimeReader(context); - assertThat(columnReader.numericDocValues, instanceOf(BulkNumericDocValues.class)); + assertThat(columnReader.numericDocValues, instanceOf(BlockLoader.OptionalColumnAtATimeReader.class)); for (int i = from; i < to; i += docBlockSize) { var docBlock = TestBlock.docs(IntStream.range(i, i + docBlockSize).toArray()); var block = (TestBlock) columnReader.read(TestBlock.factory(), docBlock, 0); @@ -875,7 +874,7 @@ public void testSingletonLongBulkBlockReadingManyValues() throws Exception { { // One smaller doc block: var columnReader = (BlockDocValuesReader.SingletonLongs) blockLoader.columnAtATimeReader(context); - assertThat(columnReader.numericDocValues, instanceOf(BulkNumericDocValues.class)); + assertThat(columnReader.numericDocValues, instanceOf(BlockLoader.OptionalColumnAtATimeReader.class)); var docBlock = TestBlock.docs(IntStream.range(1010, 2020).toArray()); var block = (TestBlock) columnReader.read(TestBlock.factory(), docBlock, 0); assertThat(block.size(), equalTo(1010)); @@ -887,7 +886,7 @@ public void testSingletonLongBulkBlockReadingManyValues() throws Exception { { // Read two tiny blocks: var columnReader = (BlockDocValuesReader.SingletonLongs) blockLoader.columnAtATimeReader(context); - assertThat(columnReader.numericDocValues, instanceOf(BulkNumericDocValues.class)); + assertThat(columnReader.numericDocValues, instanceOf(BlockLoader.OptionalColumnAtATimeReader.class)); var docBlock = TestBlock.docs(IntStream.range(32, 64).toArray()); var block = (TestBlock) columnReader.read(TestBlock.factory(), docBlock, 0); assertThat(block.size(), equalTo(32)); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperConfigurationTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperConfigurationTests.java index 1ba5f423d4b03..222d8e92b269f 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperConfigurationTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperConfigurationTests.java @@ -52,7 +52,7 @@ public void testDisableIgnoredSourceRead() throws IOException { var doc = mapperService.documentMapper().parse(source(inputDocument)); // Field was written. - assertNotNull(doc.docs().get(0).getField(IgnoredSourceFieldMapper.NAME)); + assertTrue(doc.docs().get(0).getFields().stream().anyMatch(it -> it.name().startsWith(IgnoredSourceFieldMapper.NAME))); String syntheticSource = syntheticSource(mapperService.documentMapper(), inputDocument); // Values are not loaded. @@ -64,7 +64,7 @@ public void testDisableIgnoredSourceRead() throws IOException { doc = mapperService.documentMapper().parse(source(inputDocument)); // Field was written. - assertNotNull(doc.docs().get(0).getField(IgnoredSourceFieldMapper.NAME)); + assertTrue(doc.docs().get(0).getFields().stream().anyMatch(it -> it.name().startsWith(IgnoredSourceFieldMapper.NAME))); syntheticSource = syntheticSource(mapperService.documentMapper(), inputDocument); // Values are loaded. @@ -104,7 +104,7 @@ public void testDisableIgnoredSourceWrite() throws IOException { var doc = mapperService.documentMapper().parse(source(inputDocument)); // Field is not written. - assertNull(doc.docs().get(0).getField(IgnoredSourceFieldMapper.NAME)); + assertTrue(doc.docs().get(0).getFields().stream().noneMatch(it -> it.name().startsWith(IgnoredSourceFieldMapper.NAME))); String syntheticSource = syntheticSource(mapperService.documentMapper(), inputDocument); // Values are not loaded. @@ -116,7 +116,7 @@ public void testDisableIgnoredSourceWrite() throws IOException { doc = mapperService.documentMapper().parse(source(inputDocument)); // Field was written. - assertNotNull(doc.docs().get(0).getField(IgnoredSourceFieldMapper.NAME)); + assertTrue(doc.docs().get(0).getFields().stream().anyMatch(it -> it.name().startsWith(IgnoredSourceFieldMapper.NAME))); syntheticSource = syntheticSource(mapperService.documentMapper(), inputDocument); // Values are loaded. diff --git a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java index 423811c9e22a2..3484852a481e6 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/IgnoredSourceFieldMapperTests.java @@ -10,16 +10,16 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.util.ArrayUtil; import org.elasticsearch.common.Strings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.Nullable; import org.elasticsearch.search.lookup.SourceFilter; -import org.elasticsearch.test.FieldMaskingReader; +import org.elasticsearch.test.WildcardFieldMaskingReader; import org.elasticsearch.xcontent.XContentBuilder; import org.elasticsearch.xcontent.json.JsonXContent; import org.hamcrest.Matchers; -import org.junit.Before; import java.io.IOException; import java.math.BigInteger; @@ -28,7 +28,6 @@ import java.util.Locale; import java.util.Map; import java.util.Set; -import java.util.TreeSet; public class IgnoredSourceFieldMapperTests extends MapperServiceTestCase { private DocumentMapper getDocumentMapperWithFieldLimit() throws IOException { @@ -343,8 +342,14 @@ public void testIgnoredArray() throws IOException { public void testEncodeFieldToMap() throws IOException { String value = randomAlphaOfLength(5); ParsedDocument parsedDocument = getParsedDocumentWithFieldLimit(b -> b.field("my_value", value)); - byte[] bytes = parsedDocument.rootDoc().getField(IgnoredSourceFieldMapper.NAME).binaryValue().bytes; - IgnoredSourceFieldMapper.MappedNameValue mappedNameValue = IgnoredSourceFieldMapper.decodeAsMap(bytes); + IgnoredSourceFieldMapper.MappedNameValue mappedNameValue; + if (IgnoredSourceFieldMapper.IGNORED_SOURCE_FIELDS_PER_ENTRY_FF.isEnabled()) { + byte[] bytes = parsedDocument.rootDoc().getField(IgnoredSourceFieldMapper.ignoredFieldName("my_value")).binaryValue().bytes; + mappedNameValue = IgnoredSourceFieldMapper.decodeAsMapMultipleFieldValues(bytes).getFirst(); + } else { + byte[] bytes = parsedDocument.rootDoc().getField(IgnoredSourceFieldMapper.NAME).binaryValue().bytes; + mappedNameValue = IgnoredSourceFieldMapper.decodeAsMap(bytes); + } assertEquals("my_value", mappedNameValue.nameValue().name()); assertEquals(value, mappedNameValue.map().get("my_value")); } @@ -355,11 +360,23 @@ public void testEncodeObjectToMapAndDecode() throws IOException { ParsedDocument parsedDocument = getParsedDocumentWithFieldLimit( b -> { b.startObject("my_object").field("my_value", value).endObject(); } ); - byte[] bytes = parsedDocument.rootDoc().getField(IgnoredSourceFieldMapper.NAME).binaryValue().bytes; - IgnoredSourceFieldMapper.MappedNameValue mappedNameValue = IgnoredSourceFieldMapper.decodeAsMap(bytes); + byte[] bytes; + IgnoredSourceFieldMapper.MappedNameValue mappedNameValue; + if (IgnoredSourceFieldMapper.IGNORED_SOURCE_FIELDS_PER_ENTRY_FF.isEnabled()) { + var byteRef = parsedDocument.rootDoc().getField(IgnoredSourceFieldMapper.ignoredFieldName("my_object")).binaryValue(); + bytes = ArrayUtil.copyOfSubArray(byteRef.bytes, byteRef.offset, byteRef.length); + mappedNameValue = IgnoredSourceFieldMapper.decodeAsMapMultipleFieldValues(bytes).getFirst(); + } else { + bytes = parsedDocument.rootDoc().getField(IgnoredSourceFieldMapper.NAME).binaryValue().bytes; + mappedNameValue = IgnoredSourceFieldMapper.decodeAsMap(bytes); + } assertEquals("my_object", mappedNameValue.nameValue().name()); assertEquals(value, ((Map) mappedNameValue.map().get("my_object")).get("my_value")); - assertArrayEquals(bytes, IgnoredSourceFieldMapper.encodeFromMap(mappedNameValue, mappedNameValue.map())); + if (IgnoredSourceFieldMapper.IGNORED_SOURCE_FIELDS_PER_ENTRY_FF.isEnabled()) { + assertArrayEquals(bytes, IgnoredSourceFieldMapper.encodeFromMapMultipleFieldValues(List.of(mappedNameValue))); + } else { + assertArrayEquals(bytes, IgnoredSourceFieldMapper.encodeFromMap(mappedNameValue)); + } } public void testEncodeArrayToMapAndDecode() throws IOException { @@ -369,11 +386,23 @@ public void testEncodeArrayToMapAndDecode() throws IOException { b.startObject().field("int_value", 20).endObject(); b.endArray(); }); - byte[] bytes = parsedDocument.rootDoc().getField(IgnoredSourceFieldMapper.NAME).binaryValue().bytes; - IgnoredSourceFieldMapper.MappedNameValue mappedNameValue = IgnoredSourceFieldMapper.decodeAsMap(bytes); + byte[] bytes; + IgnoredSourceFieldMapper.MappedNameValue mappedNameValue; + if (IgnoredSourceFieldMapper.IGNORED_SOURCE_FIELDS_PER_ENTRY_FF.isEnabled()) { + var byteRef = parsedDocument.rootDoc().getField(IgnoredSourceFieldMapper.ignoredFieldName("my_array")).binaryValue(); + bytes = ArrayUtil.copyOfSubArray(byteRef.bytes, byteRef.offset, byteRef.length); + mappedNameValue = IgnoredSourceFieldMapper.decodeAsMapMultipleFieldValues(bytes).getFirst(); + } else { + bytes = parsedDocument.rootDoc().getField(IgnoredSourceFieldMapper.NAME).binaryValue().bytes; + mappedNameValue = IgnoredSourceFieldMapper.decodeAsMap(bytes); + } assertEquals("my_array", mappedNameValue.nameValue().name()); assertThat((List) mappedNameValue.map().get("my_array"), Matchers.contains(Map.of("int_value", 10), Map.of("int_value", 20))); - assertArrayEquals(bytes, IgnoredSourceFieldMapper.encodeFromMap(mappedNameValue, mappedNameValue.map())); + if (IgnoredSourceFieldMapper.IGNORED_SOURCE_FIELDS_PER_ENTRY_FF.isEnabled()) { + assertArrayEquals(bytes, IgnoredSourceFieldMapper.encodeFromMapMultipleFieldValues(List.of(mappedNameValue))); + } else { + assertArrayEquals(bytes, IgnoredSourceFieldMapper.encodeFromMap(mappedNameValue)); + } } public void testMultipleIgnoredFieldsRootObject() throws IOException { @@ -769,8 +798,6 @@ public void testIndexStoredArraySourceSingleLeafElementAndNull() throws IOExcept } public void testIndexStoredArraySourceSingleLeafElementInObjectArray() throws IOException { - roundtripMaskedFields.add("path.int_value.offsets"); - DocumentMapper documentMapper = createMapperServiceWithStoredArraySource(mapping(b -> { b.startObject("path").field("synthetic_source_keep", "none").startObject("properties"); { @@ -857,8 +884,6 @@ public void testIndexStoredArraySourceRootObjectArray() throws IOException { } public void testIndexStoredArraySourceRootObjectArrayWithBypass() throws IOException { - roundtripMaskedFields.add("path.int_value.offsets"); - DocumentMapper documentMapper = createMapperServiceWithStoredArraySource(mapping(b -> { b.startObject("path"); { @@ -911,8 +936,6 @@ public void testIndexStoredArraySourceNestedValueArray() throws IOException { } public void testIndexStoredArraySourceNestedValueArrayDisabled() throws IOException { - roundtripMaskedFields.add("path.obj.foo.offsets"); - DocumentMapper documentMapper = createMapperServiceWithStoredArraySource(mapping(b -> { b.startObject("path"); { @@ -1143,8 +1166,6 @@ public void testNestedArray() throws IOException { } public void testConflictingFieldNameAfterArray() throws IOException { - roundtripMaskedFields.add("path.to.id.offsets"); - DocumentMapper documentMapper = createSytheticSourceMapperService(mapping(b -> { b.startObject("path").startObject("properties"); { @@ -2468,15 +2489,19 @@ public void testSingleDeepIgnoredField() throws IOException { assertEquals("{\"top\":{\"level1\":{\"level2\":{\"n\":25}}}}", syntheticSource); } - private Set roundtripMaskedFields; - - @Before - public void resetRoundtripMaskedFields() { - roundtripMaskedFields = new TreeSet<>( - Set.of(SourceFieldMapper.RECOVERY_SOURCE_NAME, IgnoredSourceFieldMapper.NAME, SourceFieldMapper.RECOVERY_SOURCE_SIZE_NAME) - ); + private static String getIgnoredSourceFieldMask() { + return IgnoredSourceFieldMapper.IGNORED_SOURCE_FIELDS_PER_ENTRY_FF.isEnabled() + ? IgnoredSourceFieldMapper.ignoredFieldName("*") + : IgnoredSourceFieldMapper.NAME; } + private final Set roundtripMaskedFields = Set.of( + SourceFieldMapper.RECOVERY_SOURCE_NAME, + SourceFieldMapper.RECOVERY_SOURCE_SIZE_NAME, + getIgnoredSourceFieldMask(), + "*.offsets" + ); + protected void validateRoundTripReader(String syntheticSource, DirectoryReader reader, DirectoryReader roundTripReader) throws IOException { // We exclude ignored source field since in some cases it contains an exact copy of a part of document source. @@ -2484,8 +2509,8 @@ protected void validateRoundTripReader(String syntheticSource, DirectoryReader r // and since the copy is exact, contents of ignored source are different. assertReaderEquals( "round trip " + syntheticSource, - new FieldMaskingReader(roundtripMaskedFields, reader), - new FieldMaskingReader(roundtripMaskedFields, roundTripReader) + new WildcardFieldMaskingReader(roundtripMaskedFields, reader), + new WildcardFieldMaskingReader(roundtripMaskedFields, roundTripReader) ); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/LongFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/LongFieldMapperTests.java index 0cdce5f1aa28a..e531034f0725e 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/LongFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/LongFieldMapperTests.java @@ -111,11 +111,9 @@ protected Number randomNumber() { if (randomBoolean()) { return randomDouble(); } - assumeFalse("https://github.com/elastic/elasticsearch/issues/70585", true); return randomDoubleBetween(Long.MIN_VALUE, Long.MAX_VALUE, true); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/70585") public void testFetchCoerced() throws IOException { assertFetch(randomFetchTestMapper(), "field", 3.783147882954537E18, randomFetchTestFormat()); } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java index ce5482b15b0ee..d78c7eba351e2 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldBlockLoaderTests.java @@ -41,7 +41,7 @@ public static Object expectedValue(Map fieldMapping, Object valu var fields = (Map) fieldMapping.get("fields"); if (fields != null) { - var keywordMultiFieldMapping = (Map) fields.get("kwd"); + var keywordMultiFieldMapping = (Map) fields.get("subfield_keyword"); Object normalizer = fields.get("normalizer"); boolean docValues = hasDocValues(keywordMultiFieldMapping, true); boolean store = keywordMultiFieldMapping.getOrDefault("store", false).equals(true); diff --git a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java index 6343aeea2d9de..c74e133611071 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/blockloader/TextFieldWithParentBlockLoaderTests.java @@ -15,9 +15,7 @@ import org.elasticsearch.datageneration.FieldType; import org.elasticsearch.datageneration.MappingGenerator; import org.elasticsearch.datageneration.Template; -import org.elasticsearch.datageneration.datasource.DataSourceHandler; -import org.elasticsearch.datageneration.datasource.DataSourceRequest; -import org.elasticsearch.datageneration.datasource.DataSourceResponse; +import org.elasticsearch.datageneration.datasource.MultifieldAddonHandler; import org.elasticsearch.index.mapper.BlockLoaderTestCase; import org.elasticsearch.index.mapper.BlockLoaderTestRunner; import org.elasticsearch.index.mapper.MapperServiceTestCase; @@ -49,51 +47,8 @@ public TextFieldWithParentBlockLoaderTests(BlockLoaderTestCase.Params params) { // of text multi field in a keyword field. public void testBlockLoaderOfParentField() throws IOException { var template = new Template(Map.of("parent", new Template.Leaf("parent", FieldType.KEYWORD.toString()))); - var specification = buildSpecification(List.of(new DataSourceHandler() { - @Override - public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceRequest.LeafMappingParametersGenerator request) { - // This is a bit tricky meta-logic. - // We want to customize mapping but to do this we need the mapping for the same field type - // so we use name to untangle this. - if (request.fieldName().equals("parent") == false) { - return null; - } - - return new DataSourceResponse.LeafMappingParametersGenerator(() -> { - var dataSource = request.dataSource(); - - var keywordParentMapping = dataSource.get( - new DataSourceRequest.LeafMappingParametersGenerator( - dataSource, - "_field", - FieldType.KEYWORD.toString(), - request.eligibleCopyToFields(), - request.dynamicMapping() - ) - ).mappingGenerator().get(); - - var textMultiFieldMapping = dataSource.get( - new DataSourceRequest.LeafMappingParametersGenerator( - dataSource, - "_field", - FieldType.TEXT.toString(), - request.eligibleCopyToFields(), - request.dynamicMapping() - ) - ).mappingGenerator().get(); - - // we don't need this here - keywordParentMapping.remove("copy_to"); - - textMultiFieldMapping.put("type", "text"); - textMultiFieldMapping.remove("fields"); - - keywordParentMapping.put("fields", Map.of("mf", textMultiFieldMapping)); - - return keywordParentMapping; - }); - } - })); + var specification = buildSpecification(List.of(new MultifieldAddonHandler(Map.of(FieldType.KEYWORD, List.of(FieldType.TEXT)), 1f))); + var mapping = new MappingGenerator(specification).generate(template); var fieldMapping = mapping.lookup().get("parent"); @@ -106,7 +61,7 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques ? createSytheticSourceMapperService(mappingXContent) : createMapperService(mappingXContent); - runner.runTest(mapperService, document, expected, "parent.mf"); + runner.runTest(mapperService, document, expected, "parent.subfield_text"); } @SuppressWarnings("unchecked") @@ -123,7 +78,7 @@ private Object expected(Map fieldMapping, Object value, BlockLoa } // we are using block loader of the text field itself - var textFieldMapping = (Map) ((Map) fieldMapping.get("fields")).get("mf"); + var textFieldMapping = (Map) ((Map) fieldMapping.get("fields")).get("subfield_text"); return TextFieldBlockLoaderTests.expectedValue(textFieldMapping, value, params, testContext); } } diff --git a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java index 02ef40eeda0ca..09d1ad47a1083 100644 --- a/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java +++ b/server/src/test/java/org/elasticsearch/index/mapper/vectors/DenseVectorFieldMapperTests.java @@ -65,7 +65,7 @@ import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH; import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN; import static org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase.randomNormalizedVector; -import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_NPROBE; +import static org.elasticsearch.index.codec.vectors.IVFVectorsFormat.DYNAMIC_VISIT_RATIO; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.DEFAULT_OVERSAMPLE; import static org.elasticsearch.index.mapper.vectors.DenseVectorFieldMapper.IVF_FORMAT; import static org.hamcrest.Matchers.containsString; @@ -1514,7 +1514,7 @@ public void testIVFParsing() throws IOException { .getIndexOptions(); assertEquals(3.0F, indexOptions.rescoreVector.oversample(), 0.0F); assertEquals(IVFVectorsFormat.DEFAULT_VECTORS_PER_CLUSTER, indexOptions.clusterSize); - assertEquals(DYNAMIC_NPROBE, indexOptions.defaultNProbe); + assertEquals(DYNAMIC_VISIT_RATIO, indexOptions.defaultVisitPercentage, 0.0); } { DocumentMapper mapperService = createDocumentMapper(fieldMapping(b -> { @@ -1525,7 +1525,7 @@ public void testIVFParsing() throws IOException { b.startObject("index_options"); b.field("type", "bbq_disk"); b.field("cluster_size", 1000); - b.field("default_n_probe", 10); + b.field("default_visit_percentage", 5.0); b.field(DenseVectorFieldMapper.RescoreVector.NAME, Map.of("oversample", 2.0f)); b.endObject(); })); @@ -1536,7 +1536,7 @@ public void testIVFParsing() throws IOException { .getIndexOptions(); assertEquals(2F, indexOptions.rescoreVector.oversample(), 0.0F); assertEquals(1000, indexOptions.clusterSize); - assertEquals(10, indexOptions.defaultNProbe); + assertEquals(5.0, indexOptions.defaultVisitPercentage, 0.0); } } diff --git a/server/src/test/java/org/elasticsearch/rest/action/ingest/RestSimulateIngestActionTests.java b/server/src/test/java/org/elasticsearch/rest/action/ingest/RestSimulateIngestActionTests.java index ac6c66a13b507..d13ee2f48e6dd 100644 --- a/server/src/test/java/org/elasticsearch/rest/action/ingest/RestSimulateIngestActionTests.java +++ b/server/src/test/java/org/elasticsearch/rest/action/ingest/RestSimulateIngestActionTests.java @@ -15,6 +15,7 @@ import org.elasticsearch.action.bulk.BulkResponse; import org.elasticsearch.action.ingest.SimulateIndexResponse; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.rest.AbstractRestChannel; import org.elasticsearch.rest.RestResponse; @@ -23,6 +24,7 @@ import org.elasticsearch.xcontent.NamedXContentRegistry; import org.elasticsearch.xcontent.XContentType; +import java.io.IOException; import java.nio.ByteBuffer; import java.nio.charset.StandardCharsets; import java.util.List; @@ -157,9 +159,9 @@ private void testInputJsonConvertsToOutputJson(String inputJson, String expected public void testSimulateIngestRestToXContentListener() throws Exception { // First, make sure it works with success responses: BulkItemResponse[] responses = new BulkItemResponse[3]; - responses[0] = getSuccessBulkItemResponse("123", "{\"foo\": \"bar\"}"); + responses[0] = getSuccessBulkItemResponse("123", "{\"foo\": \"bar\"}", false); responses[1] = getFailureBulkItemResponse("678", "This has failed"); - responses[2] = getSuccessBulkItemResponse("456", "{\"bar\": \"baz\"}"); + responses[2] = getSuccessBulkItemResponse("456", "{\"bar\": \"baz\"}", true); BulkResponse bulkResponse = new BulkResponse(responses, randomLongBetween(0, 50000)); String expectedXContent = """ { @@ -183,7 +185,8 @@ public void testSimulateIngestRestToXContentListener() throws Exception { { "field" : "def" } - ] + ], + "effective_mapping" : { } } }, { @@ -215,7 +218,14 @@ public void testSimulateIngestRestToXContentListener() throws Exception { { "field" : "def" } - ] + ], + "effective_mapping" : { + "properties" : { + "foo" : { + "type" : "keyword" + } + } + } } } ] @@ -231,7 +241,7 @@ private BulkItemResponse getFailureBulkItemResponse(String id, String failureMes ); } - private BulkItemResponse getSuccessBulkItemResponse(String id, String source) { + private BulkItemResponse getSuccessBulkItemResponse(String id, String source, boolean hasMapping) throws IOException { ByteBuffer[] sourceByteBuffer = new ByteBuffer[1]; sourceByteBuffer[0] = ByteBuffer.wrap(source.getBytes(StandardCharsets.UTF_8)); return BulkItemResponse.success( @@ -245,7 +255,8 @@ private BulkItemResponse getSuccessBulkItemResponse(String id, String source) { XContentType.JSON, List.of("pipeline1", "pipeline2"), List.of("abc", "def"), - null + null, + hasMapping ? new CompressedXContent("{\"properties\":{\"foo\":{\"type\":\"keyword\"}}}") : null ) ); } diff --git a/server/src/test/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQueryTestCase.java b/server/src/test/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQueryTestCase.java index e602f9098b602..71583ce813154 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQueryTestCase.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/AbstractIVFKnnVectorQueryTestCase.java @@ -101,10 +101,10 @@ public void setUp() throws Exception { format = new IVFVectorsFormat(128, 4); } - abstract AbstractIVFKnnVectorQuery getKnnVectorQuery(String field, float[] query, int k, Query queryFilter, int nProbe); + abstract AbstractIVFKnnVectorQuery getKnnVectorQuery(String field, float[] query, int k, Query queryFilter, float visitRatio); final AbstractIVFKnnVectorQuery getKnnVectorQuery(String field, float[] query, int k, Query queryFilter) { - return getKnnVectorQuery(field, query, k, queryFilter, 10); + return getKnnVectorQuery(field, query, k, queryFilter, 0.05f); } final AbstractIVFKnnVectorQuery getKnnVectorQuery(String field, float[] query, int k) { @@ -275,7 +275,8 @@ public void testNonVectorField() throws IOException { /** Test bad parameters */ public void testIllegalArguments() throws IOException { expectThrows(IllegalArgumentException.class, () -> getKnnVectorQuery("xx", new float[] { 1 }, 0)); - expectThrows(IllegalArgumentException.class, () -> getKnnVectorQuery("xx", new float[] { 1 }, 1, null, 0)); + expectThrows(IllegalArgumentException.class, () -> getKnnVectorQuery("xx", new float[] { 1 }, 1, null, -1)); + expectThrows(IllegalArgumentException.class, () -> getKnnVectorQuery("xx", new float[] { 1 }, 1, null, 2)); } public void testDifferentReader() throws IOException { diff --git a/server/src/test/java/org/elasticsearch/search/vectors/DiversifyingChildrenIVFKnnFloatVectorQueryTests.java b/server/src/test/java/org/elasticsearch/search/vectors/DiversifyingChildrenIVFKnnFloatVectorQueryTests.java index edfe597cf961b..95581ca19653b 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/DiversifyingChildrenIVFKnnFloatVectorQueryTests.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/DiversifyingChildrenIVFKnnFloatVectorQueryTests.java @@ -18,7 +18,7 @@ public class DiversifyingChildrenIVFKnnFloatVectorQueryTests extends AbstractDiv @Override Query getDiversifyingChildrenKnnQuery(String fieldName, float[] queryVector, Query childFilter, int k, BitSetProducer parentBitSet) { - return new DiversifyingChildrenIVFKnnFloatVectorQuery(fieldName, queryVector, k, k, childFilter, parentBitSet, -1); + return new DiversifyingChildrenIVFKnnFloatVectorQuery(fieldName, queryVector, k, k, childFilter, parentBitSet, 0); } @Override diff --git a/server/src/test/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQueryTests.java b/server/src/test/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQueryTests.java index 2c57b6958f9ca..7de22ec3c7fa0 100644 --- a/server/src/test/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQueryTests.java +++ b/server/src/test/java/org/elasticsearch/search/vectors/IVFKnnFloatVectorQueryTests.java @@ -26,8 +26,8 @@ public class IVFKnnFloatVectorQueryTests extends AbstractIVFKnnVectorQueryTestCase { @Override - IVFKnnFloatVectorQuery getKnnVectorQuery(String field, float[] query, int k, Query queryFilter, int nProbe) { - return new IVFKnnFloatVectorQuery(field, query, k, k, queryFilter, nProbe); + IVFKnnFloatVectorQuery getKnnVectorQuery(String field, float[] query, int k, Query queryFilter, float visitRatio) { + return new IVFKnnFloatVectorQuery(field, query, k, k, queryFilter, visitRatio); } @Override diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/FieldType.java b/test/framework/src/main/java/org/elasticsearch/datageneration/FieldType.java index eab2149019204..ef6cc21e90d8a 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/FieldType.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/FieldType.java @@ -23,6 +23,7 @@ import org.elasticsearch.datageneration.fields.leaf.IpFieldDataGenerator; import org.elasticsearch.datageneration.fields.leaf.KeywordFieldDataGenerator; import org.elasticsearch.datageneration.fields.leaf.LongFieldDataGenerator; +import org.elasticsearch.datageneration.fields.leaf.MatchOnlyTextFieldDataGenerator; import org.elasticsearch.datageneration.fields.leaf.ScaledFloatFieldDataGenerator; import org.elasticsearch.datageneration.fields.leaf.ShortFieldDataGenerator; import org.elasticsearch.datageneration.fields.leaf.TextFieldDataGenerator; @@ -50,7 +51,8 @@ public enum FieldType { TEXT("text"), IP("ip"), CONSTANT_KEYWORD("constant_keyword"), - WILDCARD("wildcard"); + WILDCARD("wildcard"), + MATCH_ONLY_TEXT("match_only_text"); private final String name; @@ -78,6 +80,7 @@ public FieldDataGenerator generator(String fieldName, DataSource dataSource) { case IP -> new IpFieldDataGenerator(dataSource); case CONSTANT_KEYWORD -> new ConstantKeywordFieldDataGenerator(); case WILDCARD -> new WildcardFieldDataGenerator(dataSource); + case MATCH_ONLY_TEXT -> new MatchOnlyTextFieldDataGenerator(dataSource); }; } @@ -101,6 +104,7 @@ public static FieldType tryParse(String name) { case "ip" -> FieldType.IP; case "constant_keyword" -> FieldType.CONSTANT_KEYWORD; case "wildcard" -> FieldType.WILDCARD; + case "match_only_text" -> FieldType.MATCH_ONLY_TEXT; default -> null; }; } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java index 2e234f8aec41c..8e759946f2ee4 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultMappingParametersHandler.java @@ -44,10 +44,11 @@ public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceReques case BOOLEAN -> booleanMapping(); case DATE -> dateMapping(); case GEO_POINT -> geoPointMapping(); - case TEXT -> textMapping(request); + case TEXT -> textMapping(); case IP -> ipMapping(); case CONSTANT_KEYWORD -> constantKeywordMapping(); case WILDCARD -> wildcardMapping(); + case MATCH_ONLY_TEXT -> matchOnlyTextMapping(); }); } @@ -96,8 +97,8 @@ private Supplier> keywordMapping(DataSourceRequest.LeafMappi } } - if (ESTestCase.randomDouble() <= 0.2) { - mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 100)); + if (ESTestCase.randomDouble() <= 0.3) { + mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50)); } if (ESTestCase.randomDouble() <= 0.2) { mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10)); @@ -196,21 +197,13 @@ private Supplier> geoPointMapping() { }; } - private Supplier> textMapping(DataSourceRequest.LeafMappingParametersGenerator request) { + private Supplier> textMapping() { return () -> { var mapping = new HashMap(); mapping.put("store", ESTestCase.randomBoolean()); mapping.put("index", ESTestCase.randomBoolean()); - if (ESTestCase.randomDouble() <= 0.1) { - var keywordMultiFieldMapping = keywordMapping(request).get(); - keywordMultiFieldMapping.put("type", "keyword"); - keywordMultiFieldMapping.remove("copy_to"); - - mapping.put("fields", Map.of("kwd", keywordMultiFieldMapping)); - } - return mapping; }; } @@ -247,8 +240,8 @@ private Supplier> wildcardMapping() { return () -> { var mapping = new HashMap(); - if (ESTestCase.randomDouble() <= 0.2) { - mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 100)); + if (ESTestCase.randomDouble() <= 0.3) { + mapping.put("ignore_above", ESTestCase.randomIntBetween(1, 50)); } if (ESTestCase.randomDouble() <= 0.2) { mapping.put("null_value", ESTestCase.randomAlphaOfLengthBetween(0, 10)); @@ -258,6 +251,10 @@ private Supplier> wildcardMapping() { }; } + private Supplier> matchOnlyTextMapping() { + return HashMap::new; + } + public static HashMap commonMappingParameters() { var map = new HashMap(); map.put("store", ESTestCase.randomBoolean()); diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultObjectGenerationHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultObjectGenerationHandler.java index bf660779186ca..0938e59903099 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultObjectGenerationHandler.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/DefaultObjectGenerationHandler.java @@ -20,6 +20,14 @@ import static org.elasticsearch.test.ESTestCase.randomRealisticUnicodeOfCodepointLengthBetween; public class DefaultObjectGenerationHandler implements DataSourceHandler { + + /** + * Field names will not be generated which start with `_reserved_`. Handlers can safely + * create field names starting with this prefix without the concern of randomly generated + * fields having the same name. + */ + public static final String RESERVED_FIELD_NAME_PREFIX = "_reserved_"; + @Override public DataSourceResponse.ChildFieldGenerator handle(DataSourceRequest.ChildFieldGenerator request) { return new DataSourceResponse.ChildFieldGenerator() { @@ -57,6 +65,9 @@ public String generateFieldName() { if (fieldName.indexOf('.') != -1) { continue; } + if (fieldName.startsWith(RESERVED_FIELD_NAME_PREFIX)) { + continue; + } return fieldName; } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java new file mode 100644 index 0000000000000..886629beaf9d2 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/datasource/MultifieldAddonHandler.java @@ -0,0 +1,97 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.datageneration.datasource; + +import org.elasticsearch.datageneration.FieldType; +import org.elasticsearch.test.ESTestCase; + +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class MultifieldAddonHandler implements DataSourceHandler { + + private static final String PLACEHOLDER = DefaultObjectGenerationHandler.RESERVED_FIELD_NAME_PREFIX + "multifield"; + private static final float DEFAULT_CHANCE_OF_CHILD_FIELD = 0.5f; + private final Map> subfieldTypes; + private final float chanceOfChildField; + + private static final List STRING_TYPES = List.of( + FieldType.TEXT, + FieldType.KEYWORD, + FieldType.MATCH_ONLY_TEXT, + FieldType.WILDCARD + ); + public static MultifieldAddonHandler STRING_TYPE_HANDLER = new MultifieldAddonHandler( + STRING_TYPES.stream().collect(Collectors.toMap(t -> t, t -> STRING_TYPES.stream().filter(s -> s != t).toList())) + ); + + public MultifieldAddonHandler(Map> subfieldTypes, float chanceOfChildField) { + this.subfieldTypes = subfieldTypes; + this.chanceOfChildField = chanceOfChildField; + } + + public MultifieldAddonHandler(Map> subfieldTypes) { + this(subfieldTypes, DEFAULT_CHANCE_OF_CHILD_FIELD); + } + + @Override + public DataSourceResponse.LeafMappingParametersGenerator handle(DataSourceRequest.LeafMappingParametersGenerator request) { + + // Need to delegate creation of the same type of field to other handlers. So skip request + // if it's for the placeholder name used when creating the child and parent fields. + if (request.fieldName().equals(PLACEHOLDER)) { + return null; + } + + FieldType parentType = FieldType.tryParse(request.fieldType()); + List childTypes = subfieldTypes.get(parentType); + if (childTypes == null) { + return null; + } + + return new DataSourceResponse.LeafMappingParametersGenerator(() -> { + assert parentType != null; + var parent = getMappingForType(parentType, request); + if (ESTestCase.randomFloat() > chanceOfChildField) { + return parent; + } + + var childType = ESTestCase.randomFrom(childTypes); + var child = getChildMappingForType(childType, request); + + child.put("type", childType.toString()); + String childName = "subfield_" + childType; + parent.put("fields", Map.of(childName, child)); + return parent; + }); + } + + private static Map getChildMappingForType(FieldType type, DataSourceRequest.LeafMappingParametersGenerator request) { + Map mapping = getMappingForType(type, request); + mapping.remove("copy_to"); + return mapping; + } + + private static Map getMappingForType(FieldType type, DataSourceRequest.LeafMappingParametersGenerator request) { + return request.dataSource() + .get( + new DataSourceRequest.LeafMappingParametersGenerator( + request.dataSource(), + PLACEHOLDER, + type.toString(), + request.eligibleCopyToFields(), + request.dynamicMapping() + ) + ) + .mappingGenerator() + .get(); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/fields/leaf/MatchOnlyTextFieldDataGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/fields/leaf/MatchOnlyTextFieldDataGenerator.java new file mode 100644 index 0000000000000..f4493fd9b4ee9 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/fields/leaf/MatchOnlyTextFieldDataGenerator.java @@ -0,0 +1,28 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.datageneration.fields.leaf; + +import org.elasticsearch.datageneration.FieldDataGenerator; +import org.elasticsearch.datageneration.datasource.DataSource; + +import java.util.Map; + +public class MatchOnlyTextFieldDataGenerator implements FieldDataGenerator { + private final FieldDataGenerator textGenerator; + + public MatchOnlyTextFieldDataGenerator(DataSource dataSource) { + this.textGenerator = new TextFieldDataGenerator(dataSource); + } + + @Override + public Object generateValue(Map fieldMapping) { + return textGenerator.generateValue(fieldMapping); + } +} diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java b/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java index 7adf98ef9d6ee..1503cbd8e9adb 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/matchers/source/FieldSpecificMatcher.java @@ -63,6 +63,7 @@ static Map matchers( put("shape", new ExactMatcher("shape", actualMappings, actualSettings, expectedMappings, expectedSettings)); put("geo_point", new GeoPointMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); put("text", new TextMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); + put("match_only_text", new MatchOnlyTextMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); put("ip", new IpMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); put("constant_keyword", new ConstantKeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); put("wildcard", new WildcardMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings)); @@ -621,6 +622,10 @@ class TextMatcher implements FieldSpecificMatcher { this.expectedSettings = expectedSettings; } + public String type() { + return "text"; + } + @Override @SuppressWarnings("unchecked") public MatchResult match( @@ -643,7 +648,7 @@ public MatchResult match( if (multiFields != null) { var keywordMatcher = new KeywordMatcher(actualMappings, actualSettings, expectedMappings, expectedSettings); - var keywordFieldMapping = (Map) multiFields.get("kwd"); + var keywordFieldMapping = (Map) multiFields.get("subfield_keyword"); var keywordMatchResult = keywordMatcher.match(actual, expected, keywordFieldMapping, keywordFieldMapping); if (keywordMatchResult.isMatch()) { return MatchResult.match(); @@ -656,7 +661,7 @@ public MatchResult match( actualSettings, expectedMappings, expectedSettings, - "Values of type [text] don't match, " + prettyPrintCollections(actual, expected) + "Values of type [" + type() + "] don't match, " + prettyPrintCollections(actual, expected) ) ); } @@ -670,6 +675,22 @@ private Set normalize(List values) { } } + class MatchOnlyTextMatcher extends TextMatcher { + MatchOnlyTextMatcher( + XContentBuilder actualMappings, + Settings.Builder actualSettings, + XContentBuilder expectedMappings, + Settings.Builder expectedSettings + ) { + super(actualMappings, actualSettings, expectedMappings, expectedSettings); + } + + @Override + public String type() { + return "match_only_text"; + } + } + class IpMatcher extends GenericMappingAwareMatcher { IpMatcher( XContentBuilder actualMappings, diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java index be26db580edf6..cb102e185c9b5 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/LeafQueryGenerator.java @@ -12,10 +12,7 @@ import org.elasticsearch.datageneration.FieldType; import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; -import org.elasticsearch.test.ESTestCase; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Map; @@ -28,7 +25,7 @@ public interface LeafQueryGenerator { * @param type the type to build a query for * @return a generator that can build queries for this type */ - static LeafQueryGenerator buildForType(String type) { + static LeafQueryGenerator buildForType(String type, MappingPredicates mappingPredicates) { LeafQueryGenerator noQueries = (Map fieldMapping, String path, Object value) -> List.of(); FieldType fieldType = FieldType.tryParse(type); @@ -38,8 +35,9 @@ static LeafQueryGenerator buildForType(String type) { return switch (fieldType) { case KEYWORD -> new KeywordQueryGenerator(); - case TEXT -> new TextQueryGenerator(); case WILDCARD -> new WildcardQueryGenerator(); + case TEXT -> new TextQueryGenerator(); + case MATCH_ONLY_TEXT -> new MatchOnlyTextQueryGenerator(mappingPredicates); default -> noQueries; }; } @@ -53,13 +51,14 @@ public List generate(Map fieldMapping, String path return List.of(); } } - return List.of(QueryBuilders.termQuery(path, value)); + return List.of(QueryBuilders.termQuery(path, value), QueryBuilders.matchQuery(path, value)); } } class WildcardQueryGenerator implements LeafQueryGenerator { public List generate(Map fieldMapping, String path, Object value) { - // Queries with emojis can currently fail due to https://github.com/elastic/elasticsearch/issues/132144 + // TODO remove when fixed + // queries with emojis can currently fail due to https://github.com/elastic/elasticsearch/issues/132144 if (containsHighSurrogates((String) value)) { return List.of(); } @@ -76,25 +75,20 @@ public List generate(Map fieldMapping, String path } } - var results = new ArrayList(); - results.add(QueryBuilders.matchQuery(path, value)); - var phraseQuery = buildPhraseQuery(path, (String) value); - if (phraseQuery != null) { - results.add(phraseQuery); - } - return results; + return List.of(QueryBuilders.matchQuery(path, value), QueryBuilders.matchPhraseQuery(path, value)); } + } + + record MatchOnlyTextQueryGenerator(MappingPredicates mappingPredicates) implements LeafQueryGenerator { - private static QueryBuilder buildPhraseQuery(String path, String value) { - var tokens = Arrays.asList(value.split("[^a-zA-Z0-9]")); - if (tokens.isEmpty()) { - return null; + public List generate(Map fieldMapping, String path, Object value) { + // TODO remove when fixed + // match_only_text in nested context fails for synthetic source https://github.com/elastic/elasticsearch/issues/132352 + if (mappingPredicates.inNestedContext(path)) { + return List.of(QueryBuilders.matchQuery(path, value)); } - int low = ESTestCase.randomIntBetween(0, tokens.size() - 1); - int hi = ESTestCase.randomIntBetween(low + 1, tokens.size()); - var phrase = String.join(" ", tokens.subList(low, hi)); - return QueryBuilders.matchPhraseQuery(path, phrase); + return List.of(QueryBuilders.matchQuery(path, value), QueryBuilders.matchPhraseQuery(path, value)); } } diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingPredicates.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingPredicates.java new file mode 100644 index 0000000000000..a1500e6612c0f --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/MappingPredicates.java @@ -0,0 +1,64 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.datageneration.queries; + +import org.elasticsearch.datageneration.Mapping; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Map; + +public class MappingPredicates { + + private final Mapping mapping; + + public MappingPredicates(Mapping mapping) { + this.mapping = mapping; + } + + record PathMapping(String path, Map mapping) {} + + private List getPathMapping(String path) { + String[] parts = path.split("\\."); + var result = new ArrayList(); + for (int i = 0; i < parts.length; i++) { + var pathToHere = String.join(".", Arrays.copyOfRange(parts, 0, i + 1)); + Map fieldMapping = mapping.lookup().get(pathToHere); + if (fieldMapping == null) { + break; + } + result.add(new PathMapping(pathToHere, fieldMapping)); + } + return result; + } + + public List getNestedPathPrefixes(String fullPath) { + return getPathMapping(fullPath).stream().filter(pm -> "nested".equals(pm.mapping().get("type"))).map(PathMapping::path).toList(); + } + + public boolean inNestedContext(String fullPath) { + return getPathMapping(fullPath).stream().anyMatch(pm -> "nested".equals(pm.mapping().get("type"))); + } + + @SuppressWarnings("unchecked") + public boolean isRuntimeField(String path) { + var topLevelMapping = (Map) mapping.raw().get("_doc"); + boolean inRuntimeContext = "runtime".equals(topLevelMapping.get("dynamic")); + for (var pm : getPathMapping(path)) { + if (pm.mapping().containsKey("dynamic")) { + // lower down dynamic definitions override higher up behavior + inRuntimeContext = "runtime".equals(pm.mapping().get("dynamic")); + } + } + return inRuntimeContext; + } + +} diff --git a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java index 9db0b628f85da..7630d810acdc5 100644 --- a/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java +++ b/test/framework/src/main/java/org/elasticsearch/datageneration/queries/QueryGenerator.java @@ -14,17 +14,16 @@ import org.elasticsearch.index.query.QueryBuilder; import org.elasticsearch.index.query.QueryBuilders; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; -import java.util.Map; public class QueryGenerator { private final Mapping mapping; + private final MappingPredicates mappingPredicates; public QueryGenerator(Mapping mapping) { this.mapping = mapping; + this.mappingPredicates = new MappingPredicates(mapping); } public List generateQueries(String type, String path, Object value) { @@ -33,66 +32,21 @@ public List generateQueries(String type, String path, Object value return List.of(); } // Can handle dynamically mapped fields, but not runtime fields - if (isRuntimeField(path)) { + if (mappingPredicates.isRuntimeField(path)) { return List.of(); } - var leafQueryGenerator = LeafQueryGenerator.buildForType(type); + var leafQueryGenerator = LeafQueryGenerator.buildForType(type, mappingPredicates); var fieldMapping = mapping.lookup().get(path); var leafQueries = leafQueryGenerator.generate(fieldMapping, path, value); return leafQueries.stream().map(q -> wrapInNestedQuery(path, q)).toList(); } private QueryBuilder wrapInNestedQuery(String path, QueryBuilder leafQuery) { - String[] parts = path.split("\\."); - List nestedPaths = getNestedPathPrefixes(parts); + List nestedPaths = mappingPredicates.getNestedPathPrefixes(path); QueryBuilder query = leafQuery; for (String nestedPath : nestedPaths.reversed()) { query = QueryBuilders.nestedQuery(nestedPath, query, ScoreMode.Max); } return query; } - - @SuppressWarnings("unchecked") - private List getNestedPathPrefixes(String[] path) { - Map mapping = this.mapping.raw(); - mapping = (Map) mapping.get("_doc"); - mapping = (Map) mapping.get("properties"); - - var result = new ArrayList(); - for (int i = 0; i < path.length - 1; i++) { - var field = path[i]; - mapping = (Map) mapping.get(field); - - // dynamic field - if (mapping == null) { - break; - } - - boolean nested = "nested".equals(mapping.get("type")); - if (nested) { - result.add(String.join(".", Arrays.copyOfRange(path, 0, i + 1))); - } - mapping = (Map) mapping.get("properties"); - } - return result; - } - - @SuppressWarnings("unchecked") - private boolean isRuntimeField(String path) { - String[] parts = path.split("\\."); - var topLevelMapping = (Map) mapping.raw().get("_doc"); - boolean inRuntimeContext = "runtime".equals(topLevelMapping.get("dynamic")); - for (int i = 0; i < parts.length - 1; i++) { - var pathToHere = String.join(".", Arrays.copyOfRange(parts, 0, i + 1)); - Map fieldMapping = mapping.lookup().get(pathToHere); - if (fieldMapping == null) { - break; - } - if (fieldMapping.containsKey("dynamic")) { - // lower down dynamic definitions override higher up behavior - inRuntimeContext = "runtime".equals(fieldMapping.get("dynamic")); - } - } - return inRuntimeContext; - } } diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java index 71452917465a7..45370ea6358fa 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperServiceTestCase.java @@ -870,7 +870,8 @@ protected static String syntheticSource(DocumentMapper mapper, SourceFilter filt SourceLoader sourceLoader = new SourceLoader.Synthetic( filter, () -> mapper.mapping().syntheticFieldLoader(filter), - SourceFieldMetrics.NOOP + SourceFieldMetrics.NOOP, + mapper.mapping().ignoredSourceFormat() ); var sourceLeafLoader = sourceLoader.leaf(getOnlyLeafReader(reader), docIds); var storedFieldLoader = StoredFieldLoader.create(false, sourceLoader.requiredStoredFields()) diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java index 04e158a703af2..e078e33d51b18 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/MapperTestCase.java @@ -43,7 +43,6 @@ import org.elasticsearch.index.IndexSettings; import org.elasticsearch.index.IndexVersion; import org.elasticsearch.index.IndexVersions; -import org.elasticsearch.index.codec.tsdb.es819.BulkNumericDocValues; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.LuceneSyntheticSourceChangesSnapshot; import org.elasticsearch.index.fielddata.FieldDataContext; @@ -1541,7 +1540,7 @@ public void testSingletonLongBulkBlockReading() throws IOException { LeafReaderContext context = reader.leaves().get(0); var blockLoader = mapperService.fieldType("field").blockLoader(mockBlockContext); var columnReader = (BlockDocValuesReader.SingletonLongs) blockLoader.columnAtATimeReader(context); - assertThat(columnReader.numericDocValues, instanceOf(BulkNumericDocValues.class)); + assertThat(columnReader.numericDocValues, instanceOf(BlockLoader.OptionalColumnAtATimeReader.class)); var docBlock = TestBlock.docs(IntStream.range(0, 3).toArray()); var block = (TestBlock) columnReader.read(TestBlock.factory(), docBlock, 0); for (int i = 0; i < block.size(); i++) { @@ -1566,7 +1565,7 @@ public void testSingletonLongBulkBlockReading() throws IOException { LeafReaderContext context = reader.leaves().get(0); var blockLoader = mapperService.fieldType("field").blockLoader(mockBlockContext); var columnReader = (BlockDocValuesReader.SingletonLongs) blockLoader.columnAtATimeReader(context); - assertThat(columnReader.numericDocValues, not(instanceOf(BulkNumericDocValues.class))); + assertThat(columnReader.numericDocValues, not(instanceOf(BlockLoader.OptionalColumnAtATimeReader.class))); var docBlock = TestBlock.docs(IntStream.range(0, 3).toArray()); var block = (TestBlock) columnReader.read(TestBlock.factory(), docBlock, 0); assertThat(block.get(0), equalTo(expectedSampleValues[0])); diff --git a/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java b/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java index 4b54a09135e7c..1ba51a91e9203 100644 --- a/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/index/mapper/NativeArrayIntegrationTestCase.java @@ -228,7 +228,14 @@ public void testSynthesizeRandomArrayInNestedContext() throws Exception { var reader = searcher.getDirectoryReader(); var document = reader.storedFields().document(0); Set storedFieldNames = new LinkedHashSet<>(document.getFields().stream().map(IndexableField::name).toList()); - assertThat(storedFieldNames, contains("_ignored_source")); + assertThat( + storedFieldNames, + contains( + IgnoredSourceFieldMapper.IGNORED_SOURCE_FIELDS_PER_ENTRY_FF.isEnabled() + ? IgnoredSourceFieldMapper.ignoredFieldName("parent.field") + : IgnoredSourceFieldMapper.NAME + ) + ); assertThat(FieldInfos.getMergedFieldInfos(reader).fieldInfo("parent.field.offsets"), nullValue()); } } @@ -368,7 +375,15 @@ protected void verifySyntheticObjectArray(List> documents) throws var document = reader.storedFields().document(i); // Verify that there is ignored source because of leaf array being wrapped by object array: List storedFieldNames = document.getFields().stream().map(IndexableField::name).toList(); - assertThat(storedFieldNames, contains("_id", "_ignored_source")); + assertThat( + storedFieldNames, + contains( + "_id", + IgnoredSourceFieldMapper.IGNORED_SOURCE_FIELDS_PER_ENTRY_FF.isEnabled() + ? IgnoredSourceFieldMapper.ignoredFieldName("object") + : IgnoredSourceFieldMapper.NAME + ) + ); // Verify that there is no offset field: LeafReader leafReader = reader.leaves().get(0).reader(); diff --git a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java index 5c2f9646f03b3..7b36165a4f5a3 100644 --- a/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java +++ b/test/framework/src/main/java/org/elasticsearch/test/ESIntegTestCase.java @@ -163,6 +163,7 @@ import org.elasticsearch.test.disruption.ServiceDisruptionScheme; import org.elasticsearch.test.store.MockFSIndexStore; import org.elasticsearch.test.transport.MockTransportService; +import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportInterceptor; import org.elasticsearch.transport.TransportRequest; import org.elasticsearch.transport.TransportRequestHandler; @@ -204,11 +205,14 @@ import java.util.Optional; import java.util.Random; import java.util.Set; +import java.util.concurrent.BrokenBarrierException; import java.util.concurrent.Callable; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.CyclicBarrier; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; import java.util.concurrent.TimeUnit; +import java.util.concurrent.TimeoutException; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; @@ -2915,4 +2919,39 @@ protected static void deletePipeline(String id) { ) ); } + + /** + * Submits as many tasks to the given data node's write thread pool as there are write threads. These tasks will wait on the barrier + * that is returned, which waits for total-write-threads + 1 callers. The caller can release the tasks by calling + * {@code barrier.await()} or interrupt them with {@code barrier.reset()}. + */ + public CyclicBarrier blockDataNodeIndexing(String dataNodeName) { + // Block the executor workers to simulate long-running write tasks + var threadpool = internalCluster().getInstance(ThreadPool.class, dataNodeName); + var executor = threadpool.executor(ThreadPool.Names.WRITE); + final var executorInfo = threadpool.info(ThreadPool.Names.WRITE); + final var executorThreads = executorInfo.getMax(); + var barrier = new CyclicBarrier(executorThreads + 1); + for (int i = 0; i < executorThreads; i++) { + executor.execute(() -> longAwait(barrier)); + } + logger.info( + "---> Submitted [" + + executorThreads + + "] tasks to the write thread pool that will wait on a barrier until released. Write thread pool info: " + + executorInfo + ); + return barrier; + } + + private static void longAwait(CyclicBarrier barrier) { + try { + barrier.await(30, TimeUnit.SECONDS); + } catch (BrokenBarrierException | TimeoutException e) { + throw new AssertionError(e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + throw new AssertionError(e); + } + } } diff --git a/test/framework/src/main/java/org/elasticsearch/test/WildcardFieldMaskingReader.java b/test/framework/src/main/java/org/elasticsearch/test/WildcardFieldMaskingReader.java new file mode 100644 index 0000000000000..46318b46448a9 --- /dev/null +++ b/test/framework/src/main/java/org/elasticsearch/test/WildcardFieldMaskingReader.java @@ -0,0 +1,80 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the "Elastic License + * 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side + * Public License v 1"; you may not use this file except in compliance with, at + * your election, the "Elastic License 2.0", the "GNU Affero General Public + * License v3.0 only", or the "Server Side Public License, v 1". + */ + +package org.elasticsearch.test; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.FilterDirectoryReader; +import org.apache.lucene.index.FilterLeafReader; +import org.apache.lucene.index.LeafReader; +import org.apache.lucene.tests.index.FieldFilterLeafReader; +import org.apache.lucene.util.automaton.Automata; +import org.apache.lucene.util.automaton.CharacterRunAutomaton; +import org.elasticsearch.common.xcontent.support.XContentMapValues; + +import java.io.IOException; +import java.util.Set; +import java.util.TreeSet; + +public class WildcardFieldMaskingReader extends FilterDirectoryReader { + private final Set patterns; + + public WildcardFieldMaskingReader(String pattern, DirectoryReader in) throws IOException { + this(Set.of(pattern), in); + } + + public WildcardFieldMaskingReader(Set patterns, DirectoryReader in) throws IOException { + super(in, new FilterDirectoryReader.SubReaderWrapper() { + @Override + public LeafReader wrap(LeafReader reader) { + var matcher = XContentMapValues.compileAutomaton( + patterns.toArray(String[]::new), + new CharacterRunAutomaton(Automata.makeAnyString()) + ); + Set fields = new TreeSet<>(); + + for (var fieldInfo : reader.getFieldInfos()) { + String fieldName = fieldInfo.name; + if (matcher.run(fieldName)) { + fields.add(fieldName); + } + } + + return new FilterLeafReader(new FieldFilterLeafReader(reader, fields, true)) { + // FieldFilterLeafReader does not forward cache helpers + // since it considers it is illegal because of the fact + // that it changes the content of the index. However we + // want this behavior for tests, and security plugins + // are careful to only use the cache when it's valid + + @Override + public CacheHelper getReaderCacheHelper() { + return reader.getReaderCacheHelper(); + } + + @Override + public CacheHelper getCoreCacheHelper() { + return reader.getCoreCacheHelper(); + } + }; + } + }); + this.patterns = patterns; + } + + @Override + protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException { + return new WildcardFieldMaskingReader(patterns, in); + } + + @Override + public CacheHelper getReaderCacheHelper() { + return in.getReaderCacheHelper(); + } +} diff --git a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/ttest/TTestAggregatorTests.java b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/ttest/TTestAggregatorTests.java index 14898647c8e4b..167530e5a6b01 100644 --- a/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/ttest/TTestAggregatorTests.java +++ b/x-pack/plugin/analytics/src/test/java/org/elasticsearch/xpack/analytics/ttest/TTestAggregatorTests.java @@ -440,7 +440,6 @@ public void testEmptyBucket() throws IOException { }, new AggTestConfig(histogram, fieldType1, fieldType2, fieldTypePart)); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/54365") public void testFormatter() throws IOException { TTestType tTestType = randomFrom(TTestType.values()); MappedFieldType fieldType1 = new NumberFieldMapper.NumberFieldType("a", NumberFieldMapper.NumberType.INTEGER); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java index 18c13860efd6a..8bd64086a5c4e 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/DocumentSubsetBitsetCache.java @@ -151,8 +151,8 @@ public DocumentSubsetBitsetCache(Settings settings, ThreadPool threadPool) { } @Override - public void onClose(IndexReader.CacheKey ownerCoreCacheKey) { - final Set keys = keysByIndex.remove(ownerCoreCacheKey); + public void onClose(IndexReader.CacheKey indexKey) { + final Set keys = keysByIndex.remove(indexKey); if (keys != null) { // Because this Set has been removed from the map, and the only update to the set is performed in a // Map#compute call, it should not be possible to get a concurrent modification here. @@ -164,10 +164,10 @@ public void onClose(IndexReader.CacheKey ownerCoreCacheKey) { * Cleanup (synchronize) the internal state when an object is removed from the primary cache */ private void onCacheEviction(RemovalNotification notification) { - final BitsetCacheKey bitsetKey = notification.getKey(); - final IndexReader.CacheKey indexKey = bitsetKey.index; - if (keysByIndex.getOrDefault(indexKey, Set.of()).contains(bitsetKey) == false) { - // If the bitsetKey isn't in the lookup map, then there's nothing to synchronize + final BitsetCacheKey cacheKey = notification.getKey(); + final IndexReader.CacheKey indexKey = cacheKey.indexKey; + if (keysByIndex.getOrDefault(indexKey, Set.of()).contains(cacheKey) == false) { + // If the cacheKey isn't in the lookup map, then there's nothing to synchronize return; } // We push this to a background thread, so that it reduces the risk of blocking searches, but also so that the lock management is @@ -177,9 +177,9 @@ private void onCacheEviction(RemovalNotification notific cleanupExecutor.submit(() -> { try (ReleasableLock ignored = cacheEvictionLock.acquire()) { // it's possible for the key to be back in the cache if it was immediately repopulated after it was evicted, so check - if (bitsetCache.get(bitsetKey) == null) { + if (bitsetCache.get(cacheKey) == null) { // key is no longer in the cache, make sure it is no longer in the lookup map either. - Optional.ofNullable(keysByIndex.get(indexKey)).ifPresent(set -> set.remove(bitsetKey)); + Optional.ofNullable(keysByIndex.get(indexKey)).ifPresent(set -> set.remove(cacheKey)); } } }); @@ -325,12 +325,17 @@ public Map usageStats() { private static final class BitsetCacheKey { - final IndexReader.CacheKey index; + final IndexReader.CacheKey indexKey; final Query query; + final int hashCode; - private BitsetCacheKey(IndexReader.CacheKey index, Query query) { - this.index = index; + private BitsetCacheKey(IndexReader.CacheKey indexKey, Query query) { + this.indexKey = indexKey; this.query = query; + // compute the hashCode eagerly, since it's used multiple times in the cache implementation anyway -- the query here will + // be a ConstantScoreQuery around a BooleanQuery, and BooleanQuery already *lazily* caches the hashCode, so this isn't + // altogether that much faster in reality, but it makes it more explicit here that we're doing this + this.hashCode = computeHashCode(); } @Override @@ -342,17 +347,23 @@ public boolean equals(Object other) { return false; } final BitsetCacheKey that = (BitsetCacheKey) other; - return Objects.equals(this.index, that.index) && Objects.equals(this.query, that.query); + return Objects.equals(this.indexKey, that.indexKey) && Objects.equals(this.query, that.query); + } + + private int computeHashCode() { + int result = indexKey.hashCode(); + result = 31 * result + query.hashCode(); + return result; } @Override public int hashCode() { - return Objects.hash(index, query); + return hashCode; } @Override public String toString() { - return getClass().getSimpleName() + "(" + index + "," + query + ")"; + return getClass().getSimpleName() + "(" + indexKey + "," + query + ")"; } } @@ -362,15 +373,15 @@ public String toString() { */ void verifyInternalConsistency() { this.bitsetCache.keys().forEach(bck -> { - final Set set = this.keysByIndex.get(bck.index); + final Set set = this.keysByIndex.get(bck.indexKey); if (set == null) { throw new IllegalStateException( - "Key [" + bck + "] is in the cache, but there is no entry for [" + bck.index + "] in the lookup map" + "Key [" + bck + "] is in the cache, but there is no entry for [" + bck.indexKey + "] in the lookup map" ); } if (set.contains(bck) == false) { throw new IllegalStateException( - "Key [" + bck + "] is in the cache, but the lookup entry for [" + bck.index + "] does not contain that key" + "Key [" + bck + "] is in the cache, but the lookup entry for [" + bck.indexKey + "] does not contain that key" ); } }); diff --git a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java index 908f58c5f9147..adf12490a7d90 100644 --- a/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java +++ b/x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReader.java @@ -396,12 +396,37 @@ public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { if (topValue instanceof Map || topValue instanceof List) { // The field contains an object or an array, reconstruct it from the transformed map in case // any subfield has been filtered out. - visitor.binaryField(fieldInfo, IgnoredSourceFieldMapper.encodeFromMap(mappedNameValue, transformedField)); + visitor.binaryField(fieldInfo, IgnoredSourceFieldMapper.encodeFromMap(mappedNameValue.withMap(transformedField))); } else { // The field contains a leaf value, and it hasn't been filtered out. It is safe to propagate the original value. visitor.binaryField(fieldInfo, value); } } + } else if (fieldInfo.name.startsWith(IgnoredSourceFieldMapper.NAME)) { + List mappedNameValues = IgnoredSourceFieldMapper.decodeAsMapMultipleFieldValues( + value + ); + List filteredNameValues = new ArrayList<>(mappedNameValues.size()); + boolean didFilter = false; + for (var mappedNameValue : mappedNameValues) { + Map transformedField = filter(mappedNameValue.map(), filter, 0); + if (transformedField.isEmpty()) { + didFilter = true; + continue; + } + var topValue = mappedNameValue.map().values().iterator().next(); + if (topValue instanceof Map || topValue instanceof List) { + didFilter = true; + } + filteredNameValues.add(mappedNameValue.withMap(transformedField)); + } + if (didFilter) { + if (filteredNameValues.isEmpty() == false) { + visitor.binaryField(fieldInfo, IgnoredSourceFieldMapper.encodeFromMapMultipleFieldValues(filteredNameValues)); + } + } else { + visitor.binaryField(fieldInfo, value); + } } else { visitor.binaryField(fieldInfo, value); } diff --git a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java index 86f3e718c90b1..0a072a685fd1a 100644 --- a/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java +++ b/x-pack/plugin/core/src/test/java/org/elasticsearch/xpack/core/security/authz/accesscontrol/FieldSubsetReaderTests.java @@ -65,6 +65,7 @@ import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.core.IOUtils; import org.elasticsearch.index.IndexVersion; +import org.elasticsearch.index.IndexVersions; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.FieldNamesFieldMapper; import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; @@ -684,7 +685,9 @@ public void testSourceFilteringIntegration() throws Exception { } public void testIgnoredSourceFilteringIntegration() throws Exception { + IndexVersion indexVersion = randomBoolean() ? getVersion() : IndexVersions.MATCH_ONLY_TEXT_STORED_AS_BYTES; DocumentMapper mapper = createMapperService( + indexVersion, Settings.builder() .put("index.mapping.total_fields.limit", 1) .put("index.mapping.total_fields.ignore_dynamic_beyond_limit", true) @@ -708,8 +711,14 @@ public void testIgnoredSourceFilteringIntegration() throws Exception { iw.addDocuments(doc.docs()); iw.close(); + String ignoredSourceFieldPattern = IgnoredSourceFieldMapper.ignoredSourceFormat( + indexVersion + ) == IgnoredSourceFieldMapper.IgnoredSourceFormat.PER_FIELD_IGNORED_SOURCE + ? IgnoredSourceFieldMapper.ignoredFieldName("*") + : IgnoredSourceFieldMapper.NAME; + { - Automaton automaton = Automatons.patterns(Arrays.asList("fieldA", IgnoredSourceFieldMapper.NAME)); + Automaton automaton = Automatons.patterns(Arrays.asList("fieldA", ignoredSourceFieldPattern)); try ( DirectoryReader indexReader = FieldSubsetReader.wrap( wrapInMockESDirectoryReader(DirectoryReader.open(directory)), @@ -740,7 +749,7 @@ public void testIgnoredSourceFilteringIntegration() throws Exception { } { - Automaton automaton = Automatons.patterns(Arrays.asList("obj.fieldC", IgnoredSourceFieldMapper.NAME)); + Automaton automaton = Automatons.patterns(Arrays.asList("obj.fieldC", ignoredSourceFieldPattern)); try ( DirectoryReader indexReader = FieldSubsetReader.wrap( wrapInMockESDirectoryReader(DirectoryReader.open(directory)), @@ -772,7 +781,7 @@ public void testIgnoredSourceFilteringIntegration() throws Exception { } { - Automaton automaton = Automatons.patterns(Arrays.asList("arr.fieldD", IgnoredSourceFieldMapper.NAME)); + Automaton automaton = Automatons.patterns(Arrays.asList("arr.fieldD", ignoredSourceFieldPattern)); try ( DirectoryReader indexReader = FieldSubsetReader.wrap( wrapInMockESDirectoryReader(DirectoryReader.open(directory)), diff --git a/x-pack/plugin/deprecation/qa/src/javaRestTest/java/org/elasticsearch/xpack/deprecation/DeprecationHttpIT.java b/x-pack/plugin/deprecation/qa/src/javaRestTest/java/org/elasticsearch/xpack/deprecation/DeprecationHttpIT.java index df17c779a15df..389fd7d6843f7 100644 --- a/x-pack/plugin/deprecation/qa/src/javaRestTest/java/org/elasticsearch/xpack/deprecation/DeprecationHttpIT.java +++ b/x-pack/plugin/deprecation/qa/src/javaRestTest/java/org/elasticsearch/xpack/deprecation/DeprecationHttpIT.java @@ -179,7 +179,6 @@ private Response cleanupSettings() throws IOException { /** * Attempts to do a scatter/gather request that expects unique responses per sub-request. */ - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/19222") public void testUniqueDeprecationResponsesMergedTogether() throws IOException { final String[] indices = new String[randomIntBetween(2, 5)]; diff --git a/x-pack/plugin/eql/qa/correctness/src/javaRestTest/java/org/elasticsearch/xpack/eql/EsEQLCorrectnessIT.java b/x-pack/plugin/eql/qa/correctness/src/javaRestTest/java/org/elasticsearch/xpack/eql/EsEQLCorrectnessIT.java index e46082797ecfd..214c83a508e95 100644 --- a/x-pack/plugin/eql/qa/correctness/src/javaRestTest/java/org/elasticsearch/xpack/eql/EsEQLCorrectnessIT.java +++ b/x-pack/plugin/eql/qa/correctness/src/javaRestTest/java/org/elasticsearch/xpack/eql/EsEQLCorrectnessIT.java @@ -13,7 +13,6 @@ import org.apache.http.HttpHost; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.lucene.tests.util.LuceneTestCase.AwaitsFix; import org.apache.lucene.tests.util.TimeUnits; import org.elasticsearch.client.HttpAsyncResponseConsumerFactory; import org.elasticsearch.client.Request; @@ -47,7 +46,6 @@ @TimeoutSuite(millis = 30 * TimeUnits.MINUTE) @TestLogging(value = "org.elasticsearch.xpack.eql.EsEQLCorrectnessIT:INFO", reason = "Log query execution time") -@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/112572") public class EsEQLCorrectnessIT extends ESRestTestCase { private static final String PARAM_FORMATTING = "%1$s"; diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/ComputeBlockLoaderFactory.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/ComputeBlockLoaderFactory.java index 20e7ffc4ca2cb..9c895d8e705c2 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/ComputeBlockLoaderFactory.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/ComputeBlockLoaderFactory.java @@ -7,10 +7,8 @@ package org.elasticsearch.compute.lucene.read; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; -import org.elasticsearch.compute.data.BytesRefBlock; import org.elasticsearch.core.Releasable; class ComputeBlockLoaderFactory extends DelegatingBlockLoaderFactory implements Releasable { @@ -35,9 +33,4 @@ public void close() { nullBlock.close(); } } - - @Override - public BytesRefBlock constantBytes(BytesRef value, int count) { - return factory.newConstantBytesRefBlockWith(value, count); - } } diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java index 5b23aceda7db1..814a5c1a5c8a3 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/DelegatingBlockLoaderFactory.java @@ -9,9 +9,15 @@ import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; +import org.elasticsearch.compute.data.BytesRefBlock; +import org.elasticsearch.compute.data.BytesRefVector; import org.elasticsearch.compute.data.ElementType; +import org.elasticsearch.compute.data.IntVector; +import org.elasticsearch.compute.data.OrdinalBytesRefVector; +import org.elasticsearch.core.Releasables; import org.elasticsearch.index.mapper.BlockLoader; public abstract class DelegatingBlockLoaderFactory implements BlockLoader.BlockFactory { @@ -41,6 +47,27 @@ public BlockLoader.BytesRefBuilder bytesRefs(int expectedCount) { return factory.newBytesRefBlockBuilder(expectedCount); } + @Override + public BytesRefBlock constantBytes(BytesRef value, int count) { + if (count == 1) { + return factory.newConstantBytesRefBlockWith(value, count); + } + BytesRefVector dict = null; + IntVector ordinals = null; + boolean success = false; + try { + dict = factory.newConstantBytesRefVector(value, 1); + ordinals = factory.newConstantIntVector(0, count); + var result = new OrdinalBytesRefVector(ordinals, dict).asBlock(); + success = true; + return result; + } finally { + if (success == false) { + Releasables.closeExpectNoException(dict, ordinals); + } + } + } + @Override public BlockLoader.DoubleBuilder doublesFromDocValues(int expectedCount) { return factory.newDoubleBlockBuilder(expectedCount).mvOrdering(Block.MvOrdering.SORTED_ASCENDING); diff --git a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/TimeSeriesExtractFieldOperator.java b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/TimeSeriesExtractFieldOperator.java index e197861e9b701..26a4966bb8e34 100644 --- a/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/TimeSeriesExtractFieldOperator.java +++ b/x-pack/plugin/esql/compute/src/main/java/org/elasticsearch/compute/lucene/read/TimeSeriesExtractFieldOperator.java @@ -10,7 +10,6 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedDocValues; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.compute.data.Block; import org.elasticsearch.compute.data.BlockFactory; import org.elasticsearch.compute.data.BytesRefBlock; @@ -202,11 +201,6 @@ public BlockLoader.Block constantNulls(int count) { throw new UnsupportedOperationException("must not be used by column readers"); } - @Override - public BlockLoader.Block constantBytes(BytesRef value, int count) { - throw new UnsupportedOperationException("must not be used by column readers"); - } - @Override public BlockLoader.SingletonOrdinalsBuilder singletonOrdinalsBuilder(SortedDocValues ordinals, int count) { throw new UnsupportedOperationException("must not be used by column readers"); diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec index d9e1ff408c739..46d80609a06bf 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-cosine-similarity.csv-spec @@ -75,6 +75,19 @@ similarity:double avg:double | min:double | max:double 0.832 | 0.5 | 1.0 +; + +similarityWithNull +required_capability: cosine_vector_similarity_function +required_capability: vector_similarity_functions_support_null + +from colors +| eval similarity = v_cosine(rgb_vector, null) +| stats total_null = count(*) where similarity is null +; + +total_null:long +59 ; # TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec index 65bc4b9a365ce..b6d32b5ae651b 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-dot-product.csv-spec @@ -27,15 +27,15 @@ old lace | 60563.0 // end::vector-dot-product-result[] ; - similarityAsPartOfExpression - required_capability: dot_product_vector_similarity_function - - from colors - | eval score = round((1 + v_dot_product(rgb_vector, [0, 255, 255]) / 2), 3) - | sort score desc, color asc - | limit 10 - | keep color, score - ; +similarityAsPartOfExpression +required_capability: dot_product_vector_similarity_function + +from colors +| eval score = round((1 + v_dot_product(rgb_vector, [0, 255, 255]) / 2), 3) +| sort score desc, color asc +| limit 10 +| keep color, score +; color:text | score:double azure | 32513.75 @@ -62,18 +62,32 @@ similarity:double 4.5 ; - similarityWithStats - required_capability: dot_product_vector_similarity_function - - from colors - | eval similarity = round(v_dot_product(rgb_vector, [0, 255, 255]), 3) - | stats avg = round(avg(similarity), 3), min = min(similarity), max = max(similarity) - ; +similarityWithStats +required_capability: dot_product_vector_similarity_function + +from colors +| eval similarity = round(v_dot_product(rgb_vector, [0, 255, 255]), 3) +| stats avg = round(avg(similarity), 3), min = min(similarity), max = max(similarity) +; avg:double | min:double | max:double 39519.017 | 0.5 | 65025.5 ; +similarityWithNull +required_capability: dot_product_vector_similarity_function +required_capability: vector_similarity_functions_support_null + +from colors +| eval similarity = v_dot_product(rgb_vector, null) +| stats total_null = count(*) where similarity is null +; + +total_null:long +59 +; + + # TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector similarityWithRow-Ignore required_capability: dot_product_vector_similarity_function diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec index 4a7b4e004d117..53f550dd4fe1f 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l1-norm.csv-spec @@ -27,15 +27,15 @@ gold | 550.0 // end::vector-l1-norm-result[] ; - similarityAsPartOfExpression - required_capability: l1_norm_vector_similarity_function - - from colors - | eval score = round((1 + v_l1_norm(rgb_vector, [0, 255, 255]) / 2), 3) - | sort score desc, color asc - | limit 10 - | keep color, score - ; +similarityAsPartOfExpression +required_capability: l1_norm_vector_similarity_function + +from colors +| eval score = round((1 + v_l1_norm(rgb_vector, [0, 255, 255]) / 2), 3) +| sort score desc, color asc +| limit 10 +| keep color, score +; color:text | score:double red | 383.5 @@ -62,18 +62,31 @@ similarity:double 3.0 ; - similarityWithStats - required_capability: l1_norm_vector_similarity_function - - from colors - | eval similarity = round(v_l1_norm(rgb_vector, [0, 255, 255]), 3) - | stats avg = round(avg(similarity), 3), min = min(similarity), max = max(similarity) - ; +similarityWithStats +required_capability: l1_norm_vector_similarity_function + +from colors +| eval similarity = round(v_l1_norm(rgb_vector, [0, 255, 255]), 3) +| stats avg = round(avg(similarity), 3), min = min(similarity), max = max(similarity) +; avg:double | min:double | max:double 391.254 | 0.0 | 765.0 ; +similarityWithNull +required_capability: l1_norm_vector_similarity_function +required_capability: vector_similarity_functions_support_null + +from colors +| eval similarity = v_l1_norm(rgb_vector, null) +| stats total_null = count(*) where similarity is null +; + +total_null:long +59 +; + # TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector similarityWithRow-Ignore required_capability: l1_norm_vector_similarity_function diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec index c623a21ca6885..03a094ed93cad 100644 --- a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-l2-norm.csv-spec @@ -30,12 +30,12 @@ tomato | 351.0227966308594 similarityAsPartOfExpression required_capability: l2_norm_vector_similarity_function - from colors - | eval score = round((1 + v_l2_norm(rgb_vector, [0, 255, 255]) / 2), 3) - | sort score desc, color asc - | limit 10 - | keep color, score - ; +from colors +| eval score = round((1 + v_l2_norm(rgb_vector, [0, 255, 255]) / 2), 3) +| sort score desc, color asc +| limit 10 +| keep color, score +; color:text | score:double red | 221.836 @@ -62,18 +62,31 @@ similarity:double 1.732 ; - similarityWithStats - required_capability: l2_norm_vector_similarity_function - - from colors - | eval similarity = round(v_l2_norm(rgb_vector, [0, 255, 255]), 3) - | stats avg = round(avg(similarity), 3), min = min(similarity), max = max(similarity) - ; +similarityWithStats +required_capability: l2_norm_vector_similarity_function + +from colors +| eval similarity = round(v_l2_norm(rgb_vector, [0, 255, 255]), 3) +| stats avg = round(avg(similarity), 3), min = min(similarity), max = max(similarity) +; avg:double | min:double | max:double 274.974 | 0.0 | 441.673 ; +similarityWithNull +required_capability: l2_norm_vector_similarity_function +required_capability: vector_similarity_functions_support_null + +from colors +| eval similarity = v_l2_norm(rgb_vector, null) +| stats total_null = count(*) where similarity is null +; + +total_null:long +59 +; + # TODO Need to implement a conversion function to convert a non-foldable row to a dense_vector similarityWithRow-Ignore required_capability: l2_norm_vector_similarity_function diff --git a/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec new file mode 100644 index 0000000000000..c670cb9ec678e --- /dev/null +++ b/x-pack/plugin/esql/qa/testFixtures/src/main/resources/vector-magnitude.csv-spec @@ -0,0 +1,87 @@ + # Tests for v_magnitude scalar function + + magnitudeWithVectorField + required_capability: magnitude_scalar_vector_function + +// tag::vector-magnitude[] + from colors + | eval magnitude = v_magnitude(rgb_vector) + | sort magnitude desc, color asc +// end::vector-magnitude[] + | limit 10 + | keep color, magnitude + ; + +// tag::vector-magnitude-result[] +color:text | magnitude:double +white | 441.6729431152344 +snow | 435.9185791015625 +azure | 433.1858825683594 +ivory | 433.1858825683594 +mint cream | 433.0704345703125 +sea shell | 426.25579833984375 +honeydew | 424.5291442871094 +old lace | 420.6352233886719 +corn silk | 418.2451477050781 +linen | 415.93267822265625 +// end::vector-magnitude-result[] +; + + magnitudeAsPartOfExpression + required_capability: magnitude_scalar_vector_function + + from colors + | eval score = round((1 + v_magnitude(rgb_vector) / 2), 3) + | sort score desc, color asc + | limit 10 + | keep color, score + ; + +color:text | score:double +white | 221.836 +snow | 218.959 +azure | 217.593 +ivory | 217.593 +mint cream | 217.535 +sea shell | 214.128 +honeydew | 213.265 +old lace | 211.318 +corn silk | 210.123 +linen | 208.966 +; + +magnitudeWithLiteralVectors +required_capability: magnitude_scalar_vector_function + +row a = 1 +| eval magnitude = round(v_magnitude([1, 2, 3]), 3) +| keep magnitude +; + +magnitude:double +3.742 +; + + magnitudeWithStats + required_capability: magnitude_scalar_vector_function + + from colors + | eval magnitude = round(v_magnitude(rgb_vector), 3) + | stats avg = round(avg(magnitude), 3), min = min(magnitude), max = max(magnitude) + ; + +avg:double | min:double | max:double +313.692 | 0.0 | 441.673 +; + +magnitudeWithNull +required_capability: magnitude_scalar_vector_function + +row a = 1 +| eval magnitude = v_magnitude(null) +| keep magnitude +; + +magnitude:double +null +; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterCancellationIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterCancellationIT.java index 191c44cec51d8..866ea0d6f32b0 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterCancellationIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterCancellationIT.java @@ -81,27 +81,6 @@ private void createRemoteIndex(int numDocs) throws Exception { bulk.get(); } - private void createLocalIndex(int numDocs) throws Exception { - XContentBuilder mapping = JsonXContent.contentBuilder().startObject(); - mapping.startObject("runtime"); - { - mapping.startObject("const"); - { - mapping.field("type", "long"); - mapping.startObject("script").field("source", "").field("lang", "pause").endObject(); - } - mapping.endObject(); - } - mapping.endObject(); - mapping.endObject(); - client(LOCAL_CLUSTER).admin().indices().prepareCreate("test").setMapping(mapping).get(); - BulkRequestBuilder bulk = client(LOCAL_CLUSTER).prepareBulk("test").setRefreshPolicy(WriteRequest.RefreshPolicy.IMMEDIATE); - for (int i = 0; i < numDocs; i++) { - bulk.add(new IndexRequest().source("foo", i)); - } - bulk.get(); - } - public void testCancel() throws Exception { createRemoteIndex(between(10, 100)); EsqlQueryRequest request = EsqlQueryRequest.syncEsqlQueryRequest(); diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterEnrichIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterEnrichIT.java index d48f8af9c97e3..631e3575d2f60 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterEnrichIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/CrossClusterEnrichIT.java @@ -405,7 +405,7 @@ public void testAggThenEnrichRemote() { | sort vendor """, enrichHosts(Enrich.Mode.ANY), enrichVendors(Enrich.Mode.REMOTE)); var error = expectThrows(VerificationException.class, () -> runQuery(query, randomBoolean()).close()); - assertThat(error.getMessage(), containsString("ENRICH with remote policy can't be executed after STATS")); + assertThat(error.getMessage(), containsString("ENRICH with remote policy can't be executed after [stats c = COUNT(*) by os]@4:3")); } public void testEnrichCoordinatorThenEnrichRemote() { @@ -417,10 +417,7 @@ public void testEnrichCoordinatorThenEnrichRemote() { | sort vendor """, enrichHosts(Enrich.Mode.COORDINATOR), enrichVendors(Enrich.Mode.REMOTE)); var error = expectThrows(VerificationException.class, () -> runQuery(query, randomBoolean()).close()); - assertThat( - error.getMessage(), - containsString("ENRICH with remote policy can't be executed after another ENRICH with coordinator policy") - ); + assertThat(error.getMessage(), containsString("ENRICH with remote policy can't be executed after [ENRICH _COORDINATOR")); } private static void assertCCSExecutionInfoDetails(EsqlExecutionInfo executionInfo) { diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionBreakerIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionBreakerIT.java index a8b687cde48ca..7a2cccdf680b3 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionBreakerIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlActionBreakerIT.java @@ -87,10 +87,6 @@ protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { } public static class EsqlTestPluginWithMockBlockFactory extends EsqlPlugin { - public EsqlTestPluginWithMockBlockFactory(Settings settings) { - super(settings); - } - @Override protected BlockFactoryProvider blockFactoryProvider( CircuitBreaker breaker, diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlPluginWithEnterpriseOrTrialLicense.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlPluginWithEnterpriseOrTrialLicense.java index 79359229e2b16..34d09fc541572 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlPluginWithEnterpriseOrTrialLicense.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlPluginWithEnterpriseOrTrialLicense.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.action; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.license.License; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.license.internal.XPackLicenseStatus; @@ -20,10 +19,6 @@ * that require an Enteprise (or Trial) license. */ public class EsqlPluginWithEnterpriseOrTrialLicense extends EsqlPlugin { - public EsqlPluginWithEnterpriseOrTrialLicense(Settings settings) { - super(settings); - } - protected XPackLicenseState getLicenseState() { License.OperationMode operationMode = randomFrom(License.OperationMode.ENTERPRISE, License.OperationMode.TRIAL); return new XPackLicenseState(() -> System.currentTimeMillis(), new XPackLicenseStatus(operationMode, true, "Test license expired")); diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlPluginWithNonEnterpriseOrExpiredLicense.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlPluginWithNonEnterpriseOrExpiredLicense.java index 4f942173a1b26..46c3f3f6204cd 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlPluginWithNonEnterpriseOrExpiredLicense.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/action/EsqlPluginWithNonEnterpriseOrExpiredLicense.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.action; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.license.License; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.license.internal.XPackLicenseStatus; @@ -23,10 +22,6 @@ * - an expired enterprise or trial license */ public class EsqlPluginWithNonEnterpriseOrExpiredLicense extends EsqlPlugin { - public EsqlPluginWithNonEnterpriseOrExpiredLicense(Settings settings) { - super(settings); - } - protected XPackLicenseState getLicenseState() { License.OperationMode operationMode; boolean active; diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialNoLicenseTestCase.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialNoLicenseTestCase.java index 4ccbf4dd9164e..c3a770ed375e7 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialNoLicenseTestCase.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/spatial/SpatialNoLicenseTestCase.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.spatial; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.license.License; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.license.internal.XPackLicenseStatus; @@ -50,10 +49,6 @@ private static XPackLicenseState getLicenseState() { * This is used to test the behavior of spatial functions when no valid license is present. */ public static class TestEsqlPlugin extends EsqlPlugin { - public TestEsqlPlugin(Settings settings) { - super(settings); - } - protected XPackLicenseState getLicenseState() { return SpatialNoLicenseTestCase.getLicenseState(); } diff --git a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java index ccde2623fddea..2d85e3bd7f93c 100644 --- a/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java +++ b/x-pack/plugin/esql/src/internalClusterTest/java/org/elasticsearch/xpack/esql/vector/VectorSimilarityFunctionsIT.java @@ -102,10 +102,13 @@ public void testSimilarityBetweenVectors() { float[] left = readVector((List) values.get(0)); float[] right = readVector((List) values.get(1)); Double similarity = (Double) values.get(2); - - assertNotNull(similarity); - float expectedSimilarity = similarityFunction.calculateSimilarity(left, right); - assertEquals(expectedSimilarity, similarity, 0.0001); + if (left == null || right == null) { + assertNull(similarity); + } else { + assertNotNull(similarity); + float expectedSimilarity = similarityFunction.calculateSimilarity(left, right); + assertEquals(expectedSimilarity, similarity, 0.0001); + } }); } } @@ -124,10 +127,13 @@ public void testSimilarityBetweenConstantVectorAndField() { valuesList.forEach(values -> { float[] left = readVector((List) values.get(0)); Double similarity = (Double) values.get(1); - - assertNotNull(similarity); - float expectedSimilarity = similarityFunction.calculateSimilarity(left, randomVector); - assertEquals(expectedSimilarity, similarity, 0.0001); + if (left == null) { + assertNull(similarity); + } else { + assertNotNull(similarity); + float expectedSimilarity = similarityFunction.calculateSimilarity(left, randomVector); + assertEquals(expectedSimilarity, similarity, 0.0001); + } }); } } @@ -159,13 +165,20 @@ public void testSimilarityBetweenConstantVectors() { assertEquals(1, valuesList.size()); Double similarity = (Double) valuesList.get(0).get(0); - assertNotNull(similarity); - float expectedSimilarity = similarityFunction.calculateSimilarity(vectorLeft, vectorRight); - assertEquals(expectedSimilarity, similarity, 0.0001); + if (vectorLeft == null || vectorRight == null) { + assertNull(similarity); + } else { + assertNotNull(similarity); + float expectedSimilarity = similarityFunction.calculateSimilarity(vectorLeft, vectorRight); + assertEquals(expectedSimilarity, similarity, 0.0001); + } } } private static float[] readVector(List leftVector) { + if (leftVector == null) { + return null; + } float[] leftScratch = new float[leftVector.size()]; for (int i = 0; i < leftVector.size(); i++) { leftScratch[i] = leftVector.get(i); @@ -194,6 +207,9 @@ public void setup() throws IOException { private List randomVector() { assert numDims != 0 : "numDims must be set before calling randomVector()"; + if (rarely()) { + return null; + } List vector = new ArrayList<>(numDims); for (int j = 0; j < numDims; j++) { vector.add(randomFloat()); @@ -203,7 +219,7 @@ private List randomVector() { private float[] randomVectorArray() { assert numDims != 0 : "numDims must be set before calling randomVectorArray()"; - return randomVectorArray(numDims); + return rarely() ? null : randomVectorArray(numDims); } private static float[] randomVectorArray(int dimensions) { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java index b71f8f3fe83fc..a397a84343e43 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlCapabilities.java @@ -1356,10 +1356,20 @@ public enum Cap { */ CORRECT_SKIPPED_SHARDS_COUNT, + /* + * Support for calculating the scalar vector magnitude. + */ + MAGNITUDE_SCALAR_VECTOR_FUNCTION(Build.current().isSnapshot()), + /** * Byte elements dense vector field type support. */ - DENSE_VECTOR_FIELD_TYPE_BYTE_ELEMENTS(EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG); + DENSE_VECTOR_FIELD_TYPE_BYTE_ELEMENTS(EsqlCorePlugin.DENSE_VECTOR_FEATURE_FLAG), + + /** + * Support null elements on vector similarity functions + */ + VECTOR_SIMILARITY_FUNCTIONS_SUPPORT_NULL; private final boolean enabled; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlExecutionInfo.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlExecutionInfo.java index 1596bd3f64d91..f115d0ac8bf2c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlExecutionInfo.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/action/EsqlExecutionInfo.java @@ -71,7 +71,7 @@ public class EsqlExecutionInfo implements ChunkedToXContentObject, Writeable { private final boolean includeCCSMetadata; // fields that are not Writeable since they are only needed on the primary CCS coordinator - private final transient Predicate skipUnavailablePredicate; + private final transient Predicate skipOnFailurePredicate; // Predicate to determine if we should skip a cluster on failure private volatile boolean isPartial; // Does this request have partial results? private transient volatile boolean isStopped; // Have we received stop command? @@ -81,17 +81,18 @@ public class EsqlExecutionInfo implements ChunkedToXContentObject, Writeable { private transient TimeSpan planningTimeSpan; // time elapsed since start of query to calling ComputeService.execute private TimeValue overallTook; + // This is only used is tests. public EsqlExecutionInfo(boolean includeCCSMetadata) { - this(Predicates.always(), includeCCSMetadata); // default all clusters to skip_unavailable=true + this(Predicates.always(), includeCCSMetadata); // default all clusters to being skippable on failure } /** - * @param skipUnavailablePredicate provide lookup for whether a given cluster has skip_unavailable set to true or false + * @param skipOnPlanTimeFailurePredicate Decides whether we should skip the cluster that fails during planning phase. * @param includeCCSMetadata (user defined setting) whether to include the CCS metadata in the HTTP response */ - public EsqlExecutionInfo(Predicate skipUnavailablePredicate, boolean includeCCSMetadata) { + public EsqlExecutionInfo(Predicate skipOnPlanTimeFailurePredicate, boolean includeCCSMetadata) { this.clusterInfo = new ConcurrentHashMap<>(); - this.skipUnavailablePredicate = skipUnavailablePredicate; + this.skipOnFailurePredicate = skipOnPlanTimeFailurePredicate; this.includeCCSMetadata = includeCCSMetadata; this.relativeStart = TimeSpan.start(); } @@ -102,7 +103,7 @@ public EsqlExecutionInfo(Predicate skipUnavailablePredicate, boolean inc EsqlExecutionInfo(ConcurrentMap clusterInfo, boolean includeCCSMetadata) { this.clusterInfo = clusterInfo; this.includeCCSMetadata = includeCCSMetadata; - this.skipUnavailablePredicate = Predicates.always(); + this.skipOnFailurePredicate = Predicates.always(); this.relativeStart = null; } @@ -111,7 +112,7 @@ public EsqlExecutionInfo(StreamInput in) throws IOException { this.clusterInfo = in.readMapValues(EsqlExecutionInfo.Cluster::new, Cluster::getClusterAlias, ConcurrentHashMap::new); this.includeCCSMetadata = in.getTransportVersion().onOrAfter(TransportVersions.V_8_16_0) ? in.readBoolean() : false; this.isPartial = in.getTransportVersion().onOrAfter(TransportVersions.ESQL_RESPONSE_PARTIAL) ? in.readBoolean() : false; - this.skipUnavailablePredicate = Predicates.always(); + this.skipOnFailurePredicate = Predicates.always(); this.relativeStart = null; if (in.getTransportVersion().onOrAfter(TransportVersions.ESQL_QUERY_PLANNING_DURATION) || in.getTransportVersion().isPatchFrom(TransportVersions.ESQL_QUERY_PLANNING_DURATION_8_19)) { @@ -200,15 +201,16 @@ public Set clusterAliases() { } /** - * @param clusterAlias to lookup skip_unavailable from - * @return skip_unavailable setting (true/false) + * @param clusterAlias to check if we should skip this cluster on failure + * @return whether it's OK to skip the cluster on failure. * @throws NoSuchRemoteClusterException if clusterAlias is unknown to this node's RemoteClusterService */ - public boolean isSkipUnavailable(String clusterAlias) { + public boolean shouldSkipOnFailure(String clusterAlias) { if (RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY.equals(clusterAlias)) { + // local cluster is not skippable for now return false; } - return skipUnavailablePredicate.test(clusterAlias); + return skipOnFailurePredicate.test(clusterAlias); } public boolean isCrossClusterSearch() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/PlanCheckerProvider.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/PlanCheckerProvider.java new file mode 100644 index 0000000000000..04096a8db7e6e --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/PlanCheckerProvider.java @@ -0,0 +1,27 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.analysis; + +import org.elasticsearch.cluster.project.ProjectResolver; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.xpack.esql.common.Failures; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; + +import java.util.List; +import java.util.function.BiConsumer; + +/** + * SPI provider interface for supplying additional ESQL plan checks to be performed during verification. + */ +public interface PlanCheckerProvider { + /** + * Build a list of checks to perform on the plan. Each one is called once per + * {@link LogicalPlan} node in the plan. + */ + List> checkers(ProjectResolver projectResolver, ClusterService clusterService); +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java index 9db9e1cabfa14..30fa9e8609b3c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/analysis/Verifier.java @@ -7,7 +7,6 @@ package org.elasticsearch.xpack.esql.analysis; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.xpack.esql.LicenseAware; import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware; @@ -56,31 +55,22 @@ * step does type resolution and fails queries based on invalid type expressions. */ public class Verifier { - public interface ExtraCheckers { - /** - * Build a list of checks to perform on the plan. Each one is called once per - * {@link LogicalPlan} node in the plan. - */ - List> extra(Settings settings); - } /** * Extra plan verification checks defined in plugins. */ - private final List extraCheckers; + private final List> extraCheckers; private final Metrics metrics; private final XPackLicenseState licenseState; - private final Settings settings; public Verifier(Metrics metrics, XPackLicenseState licenseState) { - this(metrics, licenseState, Collections.emptyList(), Settings.EMPTY); + this(metrics, licenseState, Collections.emptyList()); } - public Verifier(Metrics metrics, XPackLicenseState licenseState, List extraCheckers, Settings settings) { + public Verifier(Metrics metrics, XPackLicenseState licenseState, List> extraCheckers) { this.metrics = metrics; this.licenseState = licenseState; this.extraCheckers = extraCheckers; - this.settings = settings; } /** @@ -104,9 +94,7 @@ Collection verify(LogicalPlan plan, BitSet partialMetrics) { // collect plan checkers var planCheckers = planCheckers(plan); - for (ExtraCheckers e : extraCheckers) { - planCheckers.addAll(e.extra(settings)); - } + planCheckers.addAll(extraCheckers); // Concrete verifications plan.forEachDown(p -> { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java index 414e1f372ea3f..2043176f24a29 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/execution/PlanExecutor.java @@ -8,7 +8,6 @@ package org.elasticsearch.xpack.esql.execution; import org.elasticsearch.action.ActionListener; -import org.elasticsearch.common.settings.Settings; import org.elasticsearch.indices.IndicesExpressionGrouper; import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.telemetry.metric.MeterRegistry; @@ -16,6 +15,7 @@ import org.elasticsearch.xpack.esql.action.EsqlQueryRequest; import org.elasticsearch.xpack.esql.analysis.PreAnalyzer; import org.elasticsearch.xpack.esql.analysis.Verifier; +import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.expression.FoldContext; import org.elasticsearch.xpack.esql.enrich.EnrichPolicyResolver; import org.elasticsearch.xpack.esql.expression.function.EsqlFunctionRegistry; @@ -23,6 +23,7 @@ import org.elasticsearch.xpack.esql.optimizer.LogicalPlanOptimizer; import org.elasticsearch.xpack.esql.optimizer.LogicalPlanPreOptimizer; import org.elasticsearch.xpack.esql.optimizer.LogicalPreOptimizerContext; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.planner.mapper.Mapper; import org.elasticsearch.xpack.esql.plugin.TransportActionServices; import org.elasticsearch.xpack.esql.querylog.EsqlQueryLog; @@ -36,6 +37,7 @@ import org.elasticsearch.xpack.esql.telemetry.QueryMetric; import java.util.List; +import java.util.function.BiConsumer; import static org.elasticsearch.action.ActionListener.wrap; @@ -55,15 +57,14 @@ public PlanExecutor( MeterRegistry meterRegistry, XPackLicenseState licenseState, EsqlQueryLog queryLog, - List extraCheckers, - Settings settings + List> extraCheckers ) { this.indexResolver = indexResolver; this.preAnalyzer = new PreAnalyzer(); this.functionRegistry = new EsqlFunctionRegistry(); this.mapper = new Mapper(); this.metrics = new Metrics(functionRegistry); - this.verifier = new Verifier(metrics, licenseState, extraCheckers, settings); + this.verifier = new Verifier(metrics, licenseState, extraCheckers); this.planTelemetryManager = new PlanTelemetryManager(meterRegistry); this.queryLog = queryLog; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java index 0eca67f625121..f52c51cf8d3f9 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/EsqlFunctionRegistry.java @@ -188,6 +188,7 @@ import org.elasticsearch.xpack.esql.expression.function.vector.Knn; import org.elasticsearch.xpack.esql.expression.function.vector.L1Norm; import org.elasticsearch.xpack.esql.expression.function.vector.L2Norm; +import org.elasticsearch.xpack.esql.expression.function.vector.Magnitude; import org.elasticsearch.xpack.esql.parser.ParsingException; import org.elasticsearch.xpack.esql.session.Configuration; @@ -503,7 +504,8 @@ private static FunctionDefinition[][] snapshotFunctions() { def(CosineSimilarity.class, CosineSimilarity::new, "v_cosine"), def(DotProduct.class, DotProduct::new, "v_dot_product"), def(L1Norm.class, L1Norm::new, "v_l1_norm"), - def(L2Norm.class, L2Norm::new, "v_l2_norm") } }; + def(L2Norm.class, L2Norm::new, "v_l2_norm"), + def(Magnitude.class, Magnitude::new, "v_magnitude") } }; } public EsqlFunctionRegistry snapshotRegistry() { diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java new file mode 100644 index 0000000000000..56d1cc0d31b8d --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/Magnitude.java @@ -0,0 +1,180 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.vector; + +import org.apache.lucene.util.VectorUtil; +import org.elasticsearch.common.io.stream.NamedWriteableRegistry; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.compute.data.Block; +import org.elasticsearch.compute.data.FloatBlock; +import org.elasticsearch.compute.data.Page; +import org.elasticsearch.compute.operator.DriverContext; +import org.elasticsearch.compute.operator.EvalOperator; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.expression.FoldContext; +import org.elasticsearch.xpack.esql.core.expression.TypeResolutions; +import org.elasticsearch.xpack.esql.core.expression.function.scalar.UnaryScalarFunction; +import org.elasticsearch.xpack.esql.core.tree.NodeInfo; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.core.type.DataType; +import org.elasticsearch.xpack.esql.evaluator.mapper.EvaluatorMapper; +import org.elasticsearch.xpack.esql.expression.function.Example; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesTo; +import org.elasticsearch.xpack.esql.expression.function.FunctionAppliesToLifecycle; +import org.elasticsearch.xpack.esql.expression.function.FunctionInfo; +import org.elasticsearch.xpack.esql.expression.function.Param; + +import java.io.IOException; + +import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; +import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; + +public class Magnitude extends UnaryScalarFunction implements EvaluatorMapper, VectorFunction { + + public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( + Expression.class, + "Magnitude", + Magnitude::new + ); + static final ScalarEvaluatorFunction SCALAR_FUNCTION = Magnitude::calculateScalar; + + @FunctionInfo( + returnType = "double", + preview = true, + description = "Calculates the magnitude of a dense_vector.", + examples = { @Example(file = "vector-magnitude", tag = "vector-magnitude") }, + appliesTo = { @FunctionAppliesTo(lifeCycle = FunctionAppliesToLifecycle.DEVELOPMENT) } + ) + public Magnitude( + Source source, + @Param(name = "input", type = { "dense_vector" }, description = "dense_vector for which to compute the magnitude") Expression input + ) { + super(source, input); + } + + private Magnitude(StreamInput in) throws IOException { + super(in); + } + + @Override + protected UnaryScalarFunction replaceChild(Expression newChild) { + return new Magnitude(source(), newChild); + } + + @Override + protected NodeInfo info() { + return NodeInfo.create(this, Magnitude::new, field()); + } + + @Override + public String getWriteableName() { + return ENTRY.name; + } + + public static float calculateScalar(float[] scratch) { + return (float) Math.sqrt(VectorUtil.dotProduct(scratch, scratch)); + } + + @Override + public DataType dataType() { + return DataType.DOUBLE; + } + + @Override + protected TypeResolution resolveType() { + if (childrenResolved() == false) { + return new TypeResolution("Unresolved children"); + } + + return isType(field(), dt -> dt == DENSE_VECTOR, sourceText(), TypeResolutions.ParamOrdinal.FIRST, "dense_vector"); + } + + /** + * Functional interface for evaluating the scalar value of the underlying float array. + */ + @FunctionalInterface + public interface ScalarEvaluatorFunction { + float calculateScalar(float[] scratch); + } + + @Override + public Object fold(FoldContext ctx) { + return EvaluatorMapper.super.fold(source(), ctx); + } + + @Override + public final EvalOperator.ExpressionEvaluator.Factory toEvaluator(EvaluatorMapper.ToEvaluator toEvaluator) { + return new ScalarEvaluatorFactory(toEvaluator.apply(field()), SCALAR_FUNCTION, getClass().getSimpleName() + "Evaluator"); + } + + private record ScalarEvaluatorFactory( + EvalOperator.ExpressionEvaluator.Factory child, + ScalarEvaluatorFunction scalarFunction, + String evaluatorName + ) implements EvalOperator.ExpressionEvaluator.Factory { + + @Override + public EvalOperator.ExpressionEvaluator get(DriverContext context) { + // TODO check whether to use this custom evaluator or reuse / define an existing one + return new EvalOperator.ExpressionEvaluator() { + @Override + public Block eval(Page page) { + try (FloatBlock block = (FloatBlock) child.get(context).eval(page);) { + int positionCount = page.getPositionCount(); + int dimensions = 0; + // Get the first non-empty vector to calculate the dimension + for (int p = 0; p < positionCount; p++) { + if (block.getValueCount(p) != 0) { + dimensions = block.getValueCount(p); + break; + } + } + if (dimensions == 0) { + return context.blockFactory().newConstantFloatBlockWith(0F, 0); + } + + float[] scratch = new float[dimensions]; + try (var builder = context.blockFactory().newDoubleBlockBuilder(positionCount * dimensions)) { + for (int p = 0; p < positionCount; p++) { + int dims = block.getValueCount(p); + if (dims == 0) { + // A null value for the vector, by default append null as result. + builder.appendNull(); + continue; + } + readFloatArray(block, block.getFirstValueIndex(p), dimensions, scratch); + float result = scalarFunction.calculateScalar(scratch); + builder.appendDouble(result); + } + return builder.build(); + } + } + } + + @Override + public String toString() { + return evaluatorName() + "[child=" + child + "]"; + } + + @Override + public void close() {} + }; + } + + private static void readFloatArray(FloatBlock block, int position, int dimensions, float[] scratch) { + for (int i = 0; i < dimensions; i++) { + scratch[i] = block.getFloat(position + i); + } + } + + @Override + public String toString() { + return evaluatorName() + "[child=" + child + "]"; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java index fc27ae2d876e8..69dcaa17368dc 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorSimilarityFunction.java @@ -9,7 +9,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.compute.data.Block; -import org.elasticsearch.compute.data.DoubleVector; +import org.elasticsearch.compute.data.DoubleBlock; import org.elasticsearch.compute.data.FloatBlock; import org.elasticsearch.compute.data.Page; import org.elasticsearch.compute.operator.DriverContext; @@ -27,7 +27,6 @@ import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.FIRST; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.ParamOrdinal.SECOND; -import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isNotNull; import static org.elasticsearch.xpack.esql.core.expression.TypeResolutions.isType; import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; @@ -59,9 +58,7 @@ protected TypeResolution resolveType() { } private TypeResolution checkDenseVectorParam(Expression param, TypeResolutions.ParamOrdinal paramOrdinal) { - return isNotNull(param, sourceText(), paramOrdinal).and( - isType(param, dt -> dt == DENSE_VECTOR, sourceText(), paramOrdinal, "dense_vector") - ); + return isType(param, dt -> dt == DENSE_VECTOR, sourceText(), paramOrdinal, "dense_vector"); } /** @@ -124,14 +121,14 @@ public Block eval(Page page) { float[] leftScratch = new float[dimensions]; float[] rightScratch = new float[dimensions]; - try (DoubleVector.Builder builder = context.blockFactory().newDoubleVectorBuilder(positionCount * dimensions)) { + try (DoubleBlock.Builder builder = context.blockFactory().newDoubleBlockBuilder(positionCount * dimensions)) { for (int p = 0; p < positionCount; p++) { int dimsLeft = leftBlock.getValueCount(p); int dimsRight = rightBlock.getValueCount(p); if (dimsLeft == 0 || dimsRight == 0) { - // A null value on the left or right vector. Similarity is 0 - builder.appendDouble(0.0); + // A null value on the left or right vector. Similarity is null + builder.appendNull(); continue; } else if (dimsLeft != dimsRight) { throw new EsqlClientException( @@ -145,7 +142,7 @@ public Block eval(Page page) { float result = similarityFunction.calculateSimilarity(leftScratch, rightScratch); builder.appendDouble(result); } - return builder.build().asBlock(); + return builder.build(); } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java index 4a1a2ec9386ae..a0897792482d8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/expression/function/vector/VectorWritables.java @@ -42,6 +42,9 @@ public static List getNamedWritables() { if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { entries.add(L2Norm.ENTRY); } + if (EsqlCapabilities.Cap.MAGNITUDE_SCALAR_VECTOR_FUNCTION.isEnabled()) { + entries.add(Magnitude.ENTRY); + } return Collections.unmodifiableList(entries); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceLimitAndSortAsTopN.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceLimitAndSortAsTopN.java index 7d44fa1fda5a2..dfc1a26ae980c 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceLimitAndSortAsTopN.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/optimizer/rules/logical/ReplaceLimitAndSortAsTopN.java @@ -18,7 +18,7 @@ public final class ReplaceLimitAndSortAsTopN extends OptimizerRules.OptimizerRul protected LogicalPlan rule(Limit plan) { LogicalPlan p = plan; if (plan.child() instanceof OrderBy o) { - p = new TopN(plan.source(), o.child(), o.order(), plan.limit()); + p = new TopN(o.source(), o.child(), o.order(), plan.limit()); } return p; } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Aggregate.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Aggregate.java index 794957dc473eb..5e265c30c57ba 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Aggregate.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Aggregate.java @@ -45,7 +45,13 @@ import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; import static org.elasticsearch.xpack.esql.plan.logical.Filter.checkFilterConditionDataType; -public class Aggregate extends UnaryPlan implements PostAnalysisVerificationAware, TelemetryAware, SortAgnostic, PipelineBreaker { +public class Aggregate extends UnaryPlan + implements + PostAnalysisVerificationAware, + TelemetryAware, + SortAgnostic, + PipelineBreaker, + ExecutesOn.Coordinator { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( LogicalPlan.class, "Aggregate", diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/ChangePoint.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/ChangePoint.java index 82bd5b1f69bfe..6338e32c6f5a8 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/ChangePoint.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/ChangePoint.java @@ -44,7 +44,12 @@ * enforced by the Limit in the surrogate plan. */ @SupportsObservabilityTier(tier = COMPLETE) -public class ChangePoint extends UnaryPlan implements SurrogateLogicalPlan, PostAnalysisVerificationAware, LicenseAware { +public class ChangePoint extends UnaryPlan + implements + SurrogateLogicalPlan, + PostAnalysisVerificationAware, + LicenseAware, + ExecutesOn.Coordinator { private final Attribute value; private final Attribute key; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java index af56345438c21..ef268ff6b7964 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Enrich.java @@ -17,7 +17,7 @@ import org.elasticsearch.index.IndexMode; import org.elasticsearch.transport.RemoteClusterAware; import org.elasticsearch.xpack.core.enrich.EnrichPolicy; -import org.elasticsearch.xpack.esql.capabilities.PostAnalysisPlanVerificationAware; +import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware; import org.elasticsearch.xpack.esql.capabilities.TelemetryAware; import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.capabilities.Resolvables; @@ -34,7 +34,6 @@ import org.elasticsearch.xpack.esql.index.EsIndex; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.plan.GeneratingPlan; -import org.elasticsearch.xpack.esql.plan.logical.join.LookupJoin; import java.io.IOException; import java.util.ArrayList; @@ -44,14 +43,19 @@ import java.util.Map; import java.util.Objects; import java.util.Set; -import java.util.function.BiConsumer; import static org.elasticsearch.xpack.esql.common.Failure.fail; import static org.elasticsearch.xpack.esql.core.expression.Expressions.asAttributes; import static org.elasticsearch.xpack.esql.expression.Foldables.literalValueOf; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public class Enrich extends UnaryPlan implements GeneratingPlan, PostAnalysisPlanVerificationAware, TelemetryAware, SortAgnostic { +public class Enrich extends UnaryPlan + implements + GeneratingPlan, + PostOptimizationVerificationAware, + TelemetryAware, + SortAgnostic, + ExecutesOn { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry( LogicalPlan.class, "Enrich", @@ -68,6 +72,16 @@ public class Enrich extends UnaryPlan implements GeneratingPlan, PostAna private final Mode mode; + @Override + public ExecuteLocation executesOn() { + if (mode == Mode.REMOTE) { + return ExecuteLocation.REMOTE; + } else if (mode == Mode.COORDINATOR) { + return ExecuteLocation.COORDINATOR; + } + return ExecuteLocation.ANY; + } + public enum Mode { ANY, COORDINATOR, @@ -284,11 +298,6 @@ public int hashCode() { return Objects.hash(super.hashCode(), mode, policyName, matchField, policy, concreteIndices, enrichFields); } - @Override - public BiConsumer postAnalysisPlanVerification() { - return Enrich::checkRemoteEnrich; - } - /** * Ensure that no remote enrich is allowed after a reduction or an enrich with coordinator mode. *

@@ -299,36 +308,24 @@ public BiConsumer postAnalysisPlanVerification() { * In that case, users have to write it as `FROM test | ENRICH _remote: | ORDER @timestamp | LIMIT 10`, * which is equivalent to bringing all data to the coordinating cluster. * We might consider implementing the actual remote enrich on the coordinating cluster, however, this requires - * retaining the originating cluster and restructing pages for routing, which might be complicated. - */ - private static void checkRemoteEnrich(LogicalPlan plan, Failures failures) { - // First look for remote ENRICH, and then look at its children. Going over the whole plan once is trickier as remote ENRICHs can be - // in separate FORK branches which are valid by themselves. - plan.forEachUp(Enrich.class, enrich -> checkForPlansForbiddenBeforeRemoteEnrich(enrich, failures)); - } - - /** - * For a given remote {@link Enrich}, check if there are any forbidden plans upstream. + * retaining the originating cluster and restructuring pages for routing, which might be complicated. */ - private static void checkForPlansForbiddenBeforeRemoteEnrich(Enrich enrich, Failures failures) { - if (enrich.mode != Mode.REMOTE) { - return; - } + private void checkForPlansForbiddenBeforeRemoteEnrich(Failures failures) { + Set fails = new HashSet<>(); - Set badCommands = new HashSet<>(); - - enrich.forEachUp(LogicalPlan.class, u -> { - if (u instanceof Aggregate) { - badCommands.add("STATS"); - } else if (u instanceof Enrich upstreamEnrich && upstreamEnrich.mode() == Enrich.Mode.COORDINATOR) { - badCommands.add("another ENRICH with coordinator policy"); - } else if (u instanceof LookupJoin) { - badCommands.add("LOOKUP JOIN"); - } else if (u instanceof Fork) { - badCommands.add("FORK"); + this.forEachUp(LogicalPlan.class, u -> { + if (u instanceof ExecutesOn ex && ex.executesOn() == ExecuteLocation.COORDINATOR) { + fails.add(u.source()); } }); - badCommands.forEach(c -> failures.add(fail(enrich, "ENRICH with remote policy can't be executed after " + c))); + fails.forEach(f -> failures.add(fail(this, "ENRICH with remote policy can't be executed after [" + f.text() + "]" + f.source()))); + } + + @Override + public void postOptimizationVerification(Failures failures) { + if (this.mode == Mode.REMOTE) { + checkForPlansForbiddenBeforeRemoteEnrich(failures); + } } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/ExecutesOn.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/ExecutesOn.java new file mode 100644 index 0000000000000..899107f3b1bb4 --- /dev/null +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/ExecutesOn.java @@ -0,0 +1,41 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.plan.logical; + +/** + * Mark nodes that execute only in a specific way, either on the coordinator or on a remote node. + */ +public interface ExecutesOn { + enum ExecuteLocation { + COORDINATOR, + REMOTE, + ANY; // Can be executed on either coordinator or remote nodes + } + + ExecuteLocation executesOn(); + + /** + * Executes on the remote nodes only (note that may include coordinator, but not on the aggregation stage). + */ + interface Remote extends ExecutesOn { + @Override + default ExecuteLocation executesOn() { + return ExecuteLocation.REMOTE; + } + } + + /** + * Executes on the coordinator only. Can not be run on remote nodes. + */ + interface Coordinator extends ExecutesOn { + @Override + default ExecuteLocation executesOn() { + return ExecuteLocation.COORDINATOR; + } + } +} diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Fork.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Fork.java index 1266ccbb1e962..09719774f8d78 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Fork.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/Fork.java @@ -33,7 +33,7 @@ * A Fork is a n-ary {@code Plan} where each child is a sub plan, e.g. * {@code FORK [WHERE content:"fox" ] [WHERE content:"dog"] } */ -public class Fork extends LogicalPlan implements PostAnalysisPlanVerificationAware, TelemetryAware, PipelineBreaker { +public class Fork extends LogicalPlan implements PostAnalysisPlanVerificationAware, TelemetryAware, ExecutesOn.Coordinator { public static final String FORK_FIELD = "_fork"; public static final int MAX_BRANCHES = 8; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/inference/Completion.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/inference/Completion.java index 191664bea9a81..ffb7ccfbe4798 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/inference/Completion.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/inference/Completion.java @@ -23,6 +23,7 @@ import org.elasticsearch.xpack.esql.core.tree.Source; import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; +import org.elasticsearch.xpack.esql.plan.logical.ExecutesOn; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import java.io.IOException; @@ -33,7 +34,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.TEXT; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public class Completion extends InferencePlan implements TelemetryAware, PostAnalysisVerificationAware { +public class Completion extends InferencePlan implements TelemetryAware, PostAnalysisVerificationAware, ExecutesOn.Coordinator { public static final String DEFAULT_OUTPUT_FIELD_NAME = "completion"; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/inference/Rerank.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/inference/Rerank.java index 6f86138397fa6..a63b70cd6cc57 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/inference/Rerank.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/inference/Rerank.java @@ -26,6 +26,7 @@ import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.plan.logical.Eval; +import org.elasticsearch.xpack.esql.plan.logical.ExecutesOn; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; @@ -37,7 +38,7 @@ import static org.elasticsearch.xpack.esql.common.Failure.fail; import static org.elasticsearch.xpack.esql.expression.NamedExpressions.mergeOutputAttributes; -public class Rerank extends InferencePlan implements PostAnalysisVerificationAware, TelemetryAware { +public class Rerank extends InferencePlan implements PostAnalysisVerificationAware, TelemetryAware, ExecutesOn.Coordinator { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Rerank", Rerank::new); public static final String DEFAULT_INFERENCE_ID = ".rerank-v1-elasticsearch"; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java index 2c7b1a399b3f2..2f217df1468a6 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/Join.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.xpack.esql.capabilities.PostAnalysisVerificationAware; +import org.elasticsearch.xpack.esql.capabilities.PostOptimizationVerificationAware; import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.expression.AttributeSet; @@ -21,14 +22,19 @@ import org.elasticsearch.xpack.esql.core.type.DataType; import org.elasticsearch.xpack.esql.io.stream.PlanStreamInput; import org.elasticsearch.xpack.esql.plan.logical.BinaryPlan; +import org.elasticsearch.xpack.esql.plan.logical.ExecutesOn; +import org.elasticsearch.xpack.esql.plan.logical.Limit; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; +import org.elasticsearch.xpack.esql.plan.logical.PipelineBreaker; import org.elasticsearch.xpack.esql.plan.logical.SortAgnostic; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashSet; import java.util.List; import java.util.Objects; +import java.util.Set; import static org.elasticsearch.xpack.esql.common.Failure.fail; import static org.elasticsearch.xpack.esql.core.type.DataType.AGGREGATE_METRIC_DOUBLE; @@ -56,7 +62,7 @@ import static org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.LEFT; import static org.elasticsearch.xpack.esql.type.EsqlDataTypeConverter.commonType; -public class Join extends BinaryPlan implements PostAnalysisVerificationAware, SortAgnostic { +public class Join extends BinaryPlan implements PostAnalysisVerificationAware, SortAgnostic, ExecutesOn, PostOptimizationVerificationAware { public static final NamedWriteableRegistry.Entry ENTRY = new NamedWriteableRegistry.Entry(LogicalPlan.class, "Join", Join::new); public static final DataType[] UNSUPPORTED_TYPES = { TEXT, @@ -309,4 +315,40 @@ private static boolean comparableTypes(Attribute left, Attribute right) { public boolean isRemote() { return isRemote; } + + @Override + public ExecuteLocation executesOn() { + return isRemote ? ExecuteLocation.REMOTE : ExecuteLocation.COORDINATOR; + } + + private void checkRemoteJoin(Failures failures) { + Set fails = new HashSet<>(); + + var myself = this; + this.forEachUp(LogicalPlan.class, u -> { + if (u == myself) { + return; // skip myself + } + if (u instanceof Limit) { + // Limit is ok because it can be moved in by the optimizer + // We check LIMITs in LookupJoin pre-optimization so they are still not allowed there + return; + } + if (u instanceof PipelineBreaker || (u instanceof ExecutesOn ex && ex.executesOn() == ExecuteLocation.COORDINATOR)) { + fails.add(u.source()); + } + }); + + fails.forEach( + f -> failures.add(fail(this, "LOOKUP JOIN with remote indices can't be executed after [" + f.text() + "]" + f.source())) + ); + + } + + @Override + public void postOptimizationVerification(Failures failures) { + if (isRemote()) { + checkRemoteJoin(failures); + } + } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/LookupJoin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/LookupJoin.java index 16ff5fba7bbd8..20913e0e27ce7 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/LookupJoin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plan/logical/join/LookupJoin.java @@ -13,14 +13,11 @@ import org.elasticsearch.xpack.esql.core.expression.Attribute; import org.elasticsearch.xpack.esql.core.tree.NodeInfo; import org.elasticsearch.xpack.esql.core.tree.Source; -import org.elasticsearch.xpack.esql.plan.logical.Enrich; +import org.elasticsearch.xpack.esql.plan.logical.Limit; import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; -import org.elasticsearch.xpack.esql.plan.logical.PipelineBreaker; import org.elasticsearch.xpack.esql.plan.logical.SurrogateLogicalPlan; -import org.elasticsearch.xpack.esql.plan.logical.UnaryPlan; import org.elasticsearch.xpack.esql.plan.logical.join.JoinTypes.UsingJoinType; -import java.util.LinkedList; import java.util.List; import static java.util.Collections.emptyList; @@ -30,7 +27,7 @@ /** * Lookup join - specialized LEFT (OUTER) JOIN between the main left side and a lookup index (index_mode = lookup) on the right. */ -public class LookupJoin extends Join implements SurrogateLogicalPlan, PostAnalysisVerificationAware, TelemetryAware { +public class LookupJoin extends Join implements SurrogateLogicalPlan, TelemetryAware, PostAnalysisVerificationAware { public LookupJoin(Source source, LogicalPlan left, LogicalPlan right, List joinFields, boolean isRemote) { this(source, left, right, new UsingJoinType(LEFT, joinFields), emptyList(), emptyList(), emptyList(), isRemote); @@ -104,21 +101,11 @@ public void postAnalysisVerification(Failures failures) { } private void checkRemoteJoin(Failures failures) { - List fails = new LinkedList<>(); - - this.forEachUp(UnaryPlan.class, u -> { - if (u instanceof PipelineBreaker) { - fails.add(u.source()); - } - if (u instanceof Enrich enrich && enrich.mode() == Enrich.Mode.COORDINATOR) { - fails.add(u.source()); - } + // Check only for LIMITs, Join will check the rest post-optimization + this.forEachUp(Limit.class, f -> { + failures.add( + fail(this, "LOOKUP JOIN with remote indices can't be executed after [" + f.source().text() + "]" + f.source().source()) + ); }); - - fails.forEach( - f -> failures.add(fail(this, "LOOKUP JOIN with remote indices can't be executed after [" + f.text() + "]" + f.source())) - ); - } - } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ClusterComputeHandler.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ClusterComputeHandler.java index 4e8a89d024b71..e93cfc45ea972 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ClusterComputeHandler.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/ClusterComputeHandler.java @@ -85,7 +85,7 @@ void startComputeOnRemoteCluster( final AtomicReference finalResponse = new AtomicReference<>(); listener = listener.delegateResponse((l, e) -> { final boolean receivedResults = finalResponse.get() != null || pagesFetched.get(); - if (EsqlCCSUtils.shouldIgnoreRuntimeError(executionInfo, clusterAlias, e) + if (executionInfo.shouldSkipOnFailure(clusterAlias) || (configuration.allowPartialResults() && EsqlCCSUtils.canAllowPartial(e))) { EsqlCCSUtils.markClusterWithFinalStateAndNoShards( executionInfo, @@ -107,7 +107,7 @@ void startComputeOnRemoteCluster( listener.delegateFailure((l, unused) -> { final CancellableTask groupTask; final Runnable onGroupFailure; - boolean failFast = executionInfo.isSkipUnavailable(clusterAlias) == false && configuration.allowPartialResults() == false; + boolean failFast = executionInfo.shouldSkipOnFailure(clusterAlias) == false && configuration.allowPartialResults() == false; if (failFast) { groupTask = rootTask; onGroupFailure = cancelQueryOnFailure; diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java index 776874fbf90f6..64e205e68d6fe 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/EsqlPlugin.java @@ -67,7 +67,8 @@ import org.elasticsearch.xpack.esql.action.RestEsqlListQueriesAction; import org.elasticsearch.xpack.esql.action.RestEsqlQueryAction; import org.elasticsearch.xpack.esql.action.RestEsqlStopAsyncAction; -import org.elasticsearch.xpack.esql.analysis.Verifier; +import org.elasticsearch.xpack.esql.analysis.PlanCheckerProvider; +import org.elasticsearch.xpack.esql.common.Failures; import org.elasticsearch.xpack.esql.enrich.EnrichLookupOperator; import org.elasticsearch.xpack.esql.enrich.LookupFromIndexOperator; import org.elasticsearch.xpack.esql.execution.PlanExecutor; @@ -75,6 +76,7 @@ import org.elasticsearch.xpack.esql.io.stream.ExpressionQueryBuilder; import org.elasticsearch.xpack.esql.io.stream.PlanStreamWrapperQueryBuilder; import org.elasticsearch.xpack.esql.plan.PlanWritables; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; import org.elasticsearch.xpack.esql.planner.PhysicalSettings; import org.elasticsearch.xpack.esql.querydsl.query.SingleValueQuery; import org.elasticsearch.xpack.esql.querylog.EsqlQueryLog; @@ -85,6 +87,7 @@ import java.util.Collection; import java.util.List; import java.util.Objects; +import java.util.function.BiConsumer; import java.util.function.Predicate; import java.util.function.Supplier; @@ -184,12 +187,7 @@ public class EsqlPlugin extends Plugin implements ActionPlugin, ExtensiblePlugin Setting.Property.Dynamic ); - private final List extraCheckers = new ArrayList<>(); - private final Settings settings; - - public EsqlPlugin(Settings settings) { - this.settings = settings; - } + private final List extraCheckerProviders = new ArrayList<>(); @Override public Collection createComponents(PluginServices services) { @@ -203,14 +201,17 @@ public Collection createComponents(PluginServices services) { BigArrays bigArrays = services.indicesService().getBigArrays().withCircuitBreaking(); var blockFactoryProvider = blockFactoryProvider(circuitBreaker, bigArrays, maxPrimitiveArrayBlockSize); setupSharedSecrets(); + List> extraCheckers = extraCheckerProviders.stream() + .flatMap(p -> p.checkers(services.projectResolver(), services.clusterService()).stream()) + .toList(); + return List.of( new PlanExecutor( new IndexResolver(services.client()), services.telemetryProvider().getMeterRegistry(), getLicenseState(), new EsqlQueryLog(services.clusterService().getClusterSettings(), services.slowLogFieldProvider()), - extraCheckers, - settings + extraCheckers ), new ExchangeService( services.clusterService().getSettings(), @@ -349,6 +350,6 @@ public List> getExecutorBuilders(Settings settings) { @Override public void loadExtensions(ExtensionLoader loader) { - extraCheckers.addAll(loader.loadExtensions(Verifier.ExtraCheckers.class)); + extraCheckerProviders.addAll(loader.loadExtensions(PlanCheckerProvider.class)); } } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlCCSUtils.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlCCSUtils.java index 80c08d145d9bb..ce8915af0fc69 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlCCSUtils.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlCCSUtils.java @@ -109,8 +109,9 @@ static boolean returnSuccessWithEmptyResult(EsqlExecutionInfo executionInfo, Exc if (e instanceof NoClustersToSearchException || ExceptionsHelper.isRemoteUnavailableException(e)) { for (String clusterAlias : executionInfo.clusterAliases()) { - if (executionInfo.isSkipUnavailable(clusterAlias) == false - && clusterAlias.equals(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY) == false) { + // Check if we have any remotes that can't be skipped on failure. + if (clusterAlias.equals(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY) == false + && executionInfo.shouldSkipOnFailure(clusterAlias) == false) { return false; } } @@ -227,7 +228,7 @@ static void updateExecutionInfoWithClustersWithNoMatchingIndices( "Unknown index [%s]", (c.equals(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY) ? indexExpression : c + ":" + indexExpression) ); - if (executionInfo.isSkipUnavailable(c) == false || usedFilter) { + if (executionInfo.shouldSkipOnFailure(c) == false || usedFilter) { if (fatalErrorMessage == null) { fatalErrorMessage = error; } else { @@ -239,7 +240,7 @@ static void updateExecutionInfoWithClustersWithNoMatchingIndices( markClusterWithFinalStateAndNoShards( executionInfo, c, - executionInfo.isSkipUnavailable(c) ? Cluster.Status.SKIPPED : Cluster.Status.FAILED, + executionInfo.shouldSkipOnFailure(c) ? Cluster.Status.SKIPPED : Cluster.Status.FAILED, new VerificationException(error) ); } @@ -344,7 +345,7 @@ public static void initCrossClusterState( final String indexExpr = Strings.arrayToCommaDelimitedString(entry.getValue().indices()); executionInfo.swapCluster(clusterAlias, (k, v) -> { assert v == null : "No cluster for " + clusterAlias + " should have been added to ExecutionInfo yet"; - return new EsqlExecutionInfo.Cluster(clusterAlias, indexExpr, executionInfo.isSkipUnavailable(clusterAlias)); + return new EsqlExecutionInfo.Cluster(clusterAlias, indexExpr, executionInfo.shouldSkipOnFailure(clusterAlias)); }); } @@ -389,13 +390,6 @@ public static void markClusterWithFinalStateAndNoShards( }); } - /** - * We will ignore the error if it's remote unavailable and the cluster is marked to skip unavailable. - */ - public static boolean shouldIgnoreRuntimeError(EsqlExecutionInfo executionInfo, String clusterAlias, Exception e) { - return executionInfo.isSkipUnavailable(clusterAlias); - } - /** * Check whether this exception can be tolerated when partial results are on, or should be treated as fatal. * @return true if the exception can be tolerated, false if it should be treated as fatal. diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java index a98b0f3c52735..af9b8e0e4dac1 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/session/EsqlSession.java @@ -20,7 +20,6 @@ import org.elasticsearch.compute.data.Page; import org.elasticsearch.compute.operator.DriverCompletionInfo; import org.elasticsearch.compute.operator.FailureCollector; -import org.elasticsearch.core.CheckedFunction; import org.elasticsearch.core.Releasables; import org.elasticsearch.index.IndexMode; import org.elasticsearch.index.mapper.IndexModeFieldMapper; @@ -332,7 +331,7 @@ static void handleFieldCapsFailures( assert cluster.getStatus() != EsqlExecutionInfo.Cluster.Status.SUCCESSFUL : "can't mark a cluster success with failures"; continue; } - if (allowPartialResults == false && executionInfo.isSkipUnavailable(clusterAlias) == false) { + if (allowPartialResults == false && executionInfo.shouldSkipOnFailure(clusterAlias) == false) { for (FieldCapabilitiesFailure failure : e.getValue()) { failureCollector.unwrapAndCollect(failure.getException()); } @@ -372,66 +371,22 @@ public void analyzedPlan( return; } - CheckedFunction analyzeAction = (l) -> { - handleFieldCapsFailures(configuration.allowPartialResults(), executionInfo, l.indices.failures()); - Analyzer analyzer = new Analyzer( - new AnalyzerContext(configuration, functionRegistry, l.indices, l.lookupIndices, l.enrichResolution, l.inferenceResolution), - verifier - ); - LogicalPlan plan = analyzer.analyze(parsed); - plan.setAnalyzed(); - return plan; - }; - PreAnalyzer.PreAnalysis preAnalysis = preAnalyzer.preAnalyze(parsed); var unresolvedPolicies = preAnalysis.enriches.stream().map(EnrichPolicyResolver.UnresolvedPolicy::from).collect(toSet()); EsqlCCSUtils.initCrossClusterState(indicesExpressionGrouper, verifier.licenseState(), preAnalysis.indices, executionInfo); - var listener = SubscribableListener.newForked( - l -> enrichPolicyResolver.resolvePolicies(unresolvedPolicies, executionInfo, l) - ) + var listener = SubscribableListener. // + newForked(l -> enrichPolicyResolver.resolvePolicies(unresolvedPolicies, executionInfo, l)) .andThenApply(enrichResolution -> FieldNameUtils.resolveFieldNames(parsed, enrichResolution)) .andThen((l, preAnalysisResult) -> resolveInferences(parsed, preAnalysisResult, l)); // first resolve the lookup indices, then the main indices for (var index : preAnalysis.lookupIndices) { listener = listener.andThen((l, preAnalysisResult) -> preAnalyzeLookupIndex(index, preAnalysisResult, executionInfo, l)); } - listener.andThen((l, result) -> { - // resolve the main indices - preAnalyzeMainIndices(preAnalysis, executionInfo, result, requestFilter, l); - }).andThen((l, result) -> { - // TODO in follow-PR (for skip_unavailable handling of missing concrete indexes) add some tests for - // invalid index resolution to updateExecutionInfo - // If we run out of clusters to search due to unavailability we can stop the analysis right here - if (result.indices.isValid() && allCCSClustersSkipped(executionInfo, result, logicalPlanListener)) return; - // whatever tuple we have here (from CCS-special handling or from the original pre-analysis), pass it on to the next step - l.onResponse(result); - }).andThen((l, result) -> { - // first attempt (maybe the only one) at analyzing the plan - analyzeAndMaybeRetry(analyzeAction, requestFilter, result, executionInfo, logicalPlanListener, l); - }).andThen((l, result) -> { - assert requestFilter != null : "The second pre-analysis shouldn't take place when there is no index filter in the request"; - - // here the requestFilter is set to null, performing the pre-analysis after the first step failed - preAnalyzeMainIndices(preAnalysis, executionInfo, result, null, l); - }).andThen((l, result) -> { - assert requestFilter != null : "The second analysis shouldn't take place when there is no index filter in the request"; - LOGGER.debug("Analyzing the plan (second attempt, without filter)"); - LogicalPlan plan; - try { - // the order here is tricky - if the cluster has been filtered and later became unavailable, - // do we want to declare it successful or skipped? For now, unavailability takes precedence. - EsqlCCSUtils.updateExecutionInfoWithUnavailableClusters(executionInfo, result.indices.failures()); - EsqlCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, result.indices, false); - plan = analyzeAction.apply(result); - } catch (Exception e) { - l.onFailure(e); - return; - } - LOGGER.debug("Analyzed plan (second attempt, without filter):\n{}", plan); - l.onResponse(plan); - }).addListener(logicalPlanListener); + listener.andThen((l, result) -> preAnalyzeMainIndices(preAnalysis, executionInfo, result, requestFilter, l)) + .andThen((l, result) -> analyzeWithRetry(parsed, requestFilter, preAnalysis, executionInfo, result, l)) + .addListener(logicalPlanListener); } private void preAnalyzeLookupIndex( @@ -475,7 +430,7 @@ private void preAnalyzeLookupIndex( private void skipClusterOrError(String clusterAlias, EsqlExecutionInfo executionInfo, String message) { VerificationException error = new VerificationException(message); // If we can, skip the cluster and mark it as such - if (executionInfo.isSkipUnavailable(clusterAlias)) { + if (executionInfo.shouldSkipOnFailure(clusterAlias)) { EsqlCCSUtils.markClusterWithFinalStateAndNoShards(executionInfo, clusterAlias, EsqlExecutionInfo.Cluster.Status.SKIPPED, error); } else { throw error; @@ -699,42 +654,28 @@ private void preAnalyzeMainIndices( } } - /** - * Check if there are any clusters to search. - * - * @return true if there are no clusters to search, false otherwise - */ - private boolean allCCSClustersSkipped( + private void analyzeWithRetry( + LogicalPlan parsed, + QueryBuilder requestFilter, + PreAnalyzer.PreAnalysis preAnalysis, EsqlExecutionInfo executionInfo, PreAnalysisResult result, - ActionListener logicalPlanListener + ActionListener listener ) { - IndexResolution indexResolution = result.indices; - EsqlCCSUtils.updateExecutionInfoWithUnavailableClusters(executionInfo, indexResolution.failures()); - if (executionInfo.isCrossClusterSearch() - && executionInfo.getClusterStates(EsqlExecutionInfo.Cluster.Status.RUNNING).findAny().isEmpty()) { - // for a CCS, if all clusters have been marked as SKIPPED, nothing to search so send a sentinel Exception - // to let the LogicalPlanActionListener decide how to proceed - LOGGER.debug("No more clusters to search, ending analysis stage"); - logicalPlanListener.onFailure(new NoClustersToSearchException()); - return true; + if (result.indices.isValid()) { + EsqlCCSUtils.updateExecutionInfoWithUnavailableClusters(executionInfo, result.indices.failures()); + if (executionInfo.isCrossClusterSearch() + && executionInfo.getClusterStates(EsqlExecutionInfo.Cluster.Status.RUNNING).findAny().isEmpty()) { + // for a CCS, if all clusters have been marked as SKIPPED, nothing to search so send a sentinel Exception + // to let the LogicalPlanActionListener decide how to proceed + LOGGER.debug("No more clusters to search, ending analysis stage"); + listener.onFailure(new NoClustersToSearchException()); + return; + } } - return false; - } - - private static void analyzeAndMaybeRetry( - CheckedFunction analyzeAction, - QueryBuilder requestFilter, - PreAnalysisResult result, - EsqlExecutionInfo executionInfo, - ActionListener logicalPlanListener, - ActionListener l - ) { - LogicalPlan plan = null; - var filterPresentMessage = requestFilter == null ? "without" : "with"; - var attemptMessage = requestFilter == null ? "the only" : "first"; - LOGGER.debug("Analyzing the plan ({} attempt, {} filter)", attemptMessage, filterPresentMessage); + var description = requestFilter == null ? "the only attempt without filter" : "first attempt with filter"; + LOGGER.debug("Analyzing the plan ({})", description); try { if (result.indices.isValid() || requestFilter != null) { @@ -742,32 +683,35 @@ private static void analyzeAndMaybeRetry( // when the resolution result is not valid for a different reason. EsqlCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, result.indices, requestFilter != null); } - plan = analyzeAction.apply(result); - } catch (Exception e) { - if (e instanceof VerificationException ve) { - LOGGER.debug( - "Analyzing the plan ({} attempt, {} filter) failed with {}", - attemptMessage, - filterPresentMessage, - ve.getDetailedMessage() - ); - if (requestFilter == null) { - // if the initial request didn't have a filter, then just pass the exception back to the user - logicalPlanListener.onFailure(ve); - } else { - // interested only in a VerificationException, but this time we are taking out the index filter - // to try and make the index resolution work without any index filtering. In the next step... to be continued - l.onResponse(result); - } + LogicalPlan plan = analyzedPlan(parsed, result, executionInfo); + LOGGER.debug("Analyzed plan ({}):\n{}", description, plan); + // the analysis succeeded from the first attempt, irrespective if it had a filter or not, just continue with the planning + listener.onResponse(plan); + } catch (VerificationException ve) { + LOGGER.debug("Analyzing the plan ({}) failed with {}", description, ve.getDetailedMessage()); + if (requestFilter == null) { + // if the initial request didn't have a filter, then just pass the exception back to the user + listener.onFailure(ve); } else { - // if the query failed with any other type of exception, then just pass the exception back to the user - logicalPlanListener.onFailure(e); + // retrying and make the index resolution work without any index filtering. + preAnalyzeMainIndices(preAnalysis, executionInfo, result, null, listener.delegateFailure((l, r) -> { + LOGGER.debug("Analyzing the plan (second attempt, without filter)"); + try { + // the order here is tricky - if the cluster has been filtered and later became unavailable, + // do we want to declare it successful or skipped? For now, unavailability takes precedence. + EsqlCCSUtils.updateExecutionInfoWithUnavailableClusters(executionInfo, r.indices.failures()); + EsqlCCSUtils.updateExecutionInfoWithClustersWithNoMatchingIndices(executionInfo, r.indices, false); + LogicalPlan plan = analyzedPlan(parsed, r, executionInfo); + LOGGER.debug("Analyzed plan (second attempt without filter):\n{}", plan); + l.onResponse(plan); + } catch (Exception e) { + l.onFailure(e); + } + })); } - return; + } catch (Exception e) { + listener.onFailure(e); } - LOGGER.debug("Analyzed plan ({} attempt, {} filter):\n{}", attemptMessage, filterPresentMessage, plan); - // the analysis succeeded from the first attempt, irrespective if it had a filter or not, just continue with the planning - logicalPlanListener.onResponse(plan); } private void resolveInferences(LogicalPlan plan, PreAnalysisResult preAnalysisResult, ActionListener l) { @@ -792,6 +736,17 @@ private PhysicalPlan logicalPlanToPhysicalPlan(LogicalPlan optimizedPlan, EsqlQu return EstimatesRowSize.estimateRowSize(0, physicalPlan); } + private LogicalPlan analyzedPlan(LogicalPlan parsed, PreAnalysisResult r, EsqlExecutionInfo executionInfo) throws Exception { + handleFieldCapsFailures(configuration.allowPartialResults(), executionInfo, r.indices.failures()); + Analyzer analyzer = new Analyzer( + new AnalyzerContext(configuration, functionRegistry, r.indices, r.lookupIndices, r.enrichResolution, r.inferenceResolution), + verifier + ); + LogicalPlan plan = analyzer.analyze(parsed); + plan.setAnalyzed(); + return plan; + } + public LogicalPlan optimizedPlan(LogicalPlan logicalPlan) { if (logicalPlan.preOptimized() == false) { throw new IllegalStateException("Expected pre-optimized plan"); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java index e040067458408..7307285ec37a7 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/AnalyzerTests.java @@ -59,6 +59,7 @@ import org.elasticsearch.xpack.esql.expression.function.scalar.string.Concat; import org.elasticsearch.xpack.esql.expression.function.scalar.string.Substring; import org.elasticsearch.xpack.esql.expression.function.vector.Knn; +import org.elasticsearch.xpack.esql.expression.function.vector.Magnitude; import org.elasticsearch.xpack.esql.expression.function.vector.VectorSimilarityFunction; import org.elasticsearch.xpack.esql.expression.predicate.operator.arithmetic.Add; import org.elasticsearch.xpack.esql.expression.predicate.operator.comparison.Equals; @@ -2432,6 +2433,30 @@ private void checkNoDenseVectorFailsSimilarityFunction(String similarityFunction ); } + public void testMagnitudePlanWithDenseVectorImplicitCasting() { + var plan = analyze(String.format(Locale.ROOT, """ + from test | eval scalar = v_magnitude([1, 2, 3]) + """), "mapping-dense_vector.json"); + + var limit = as(plan, Limit.class); + var eval = as(limit.child(), Eval.class); + var alias = as(eval.fields().get(0), Alias.class); + assertEquals("scalar", alias.name()); + var scalar = as(alias.child(), Magnitude.class); + var child = as(scalar.field(), Literal.class); + assertThat(child.dataType(), is(DENSE_VECTOR)); + assertThat(child.value(), equalTo(List.of(1.0f, 2.0f, 3.0f))); + } + + public void testNoDenseVectorFailsForMagnitude() { + var query = String.format(Locale.ROOT, "row a = 1 | eval scalar = v_magnitude(0.342)"); + VerificationException error = expectThrows(VerificationException.class, () -> analyze(query)); + assertThat( + error.getMessage(), + containsString("first argument of [v_magnitude(0.342)] must be [dense_vector], found value [0.342] type [double]") + ); + } + public void testRateRequiresCounterTypes() { assumeTrue("rate requires snapshot builds", Build.current().isSnapshot()); Analyzer analyzer = analyzer(tsdbIndexResolution()); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java index 32b4ccb768efe..c43f875327872 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/analysis/VerifierTests.java @@ -29,7 +29,6 @@ import org.elasticsearch.xpack.esql.parser.ParsingException; import org.elasticsearch.xpack.esql.parser.QueryParam; import org.elasticsearch.xpack.esql.parser.QueryParams; -import org.elasticsearch.xpack.esql.plan.logical.Enrich; import java.util.ArrayList; import java.util.LinkedHashMap; @@ -39,13 +38,9 @@ import java.util.Map; import java.util.Set; -import static org.elasticsearch.xpack.core.enrich.EnrichPolicy.MATCH_TYPE; import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_CFG; -import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER; import static org.elasticsearch.xpack.esql.EsqlTestUtils.paramAsConstant; import static org.elasticsearch.xpack.esql.EsqlTestUtils.withDefaultLimitWarning; -import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultLookupResolution; -import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadEnrichPolicyResolution; import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping; import static org.elasticsearch.xpack.esql.core.type.DataType.BOOLEAN; import static org.elasticsearch.xpack.esql.core.type.DataType.CARTESIAN_POINT; @@ -2272,43 +2267,6 @@ public void testFullTextFunctionsInStats() { } } - public void testRemoteLookupJoinWithPipelineBreaker() { - assumeTrue("Remote LOOKUP JOIN not enabled", EsqlCapabilities.Cap.ENABLE_LOOKUP_JOIN_ON_REMOTE.isEnabled()); - var analyzer = AnalyzerTestUtils.analyzer(loadMapping("mapping-default.json", "test,remote:test")); - assertEquals( - "1:92: LOOKUP JOIN with remote indices can't be executed after [STATS c = COUNT(*) by languages]@1:25", - error( - "FROM test,remote:test | STATS c = COUNT(*) by languages " - + "| EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code", - analyzer - ) - ); - - assertEquals( - "1:72: LOOKUP JOIN with remote indices can't be executed after [SORT emp_no]@1:25", - error( - "FROM test,remote:test | SORT emp_no | EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code", - analyzer - ) - ); - - assertEquals( - "1:68: LOOKUP JOIN with remote indices can't be executed after [LIMIT 2]@1:25", - error( - "FROM test,remote:test | LIMIT 2 | EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code", - analyzer - ) - ); - assertEquals( - "1:96: LOOKUP JOIN with remote indices can't be executed after [ENRICH _coordinator:languages_coord]@1:58", - error( - "FROM test,remote:test | EVAL language_code = languages | ENRICH _coordinator:languages_coord " - + "| LOOKUP JOIN languages_lookup ON language_code", - analyzer - ) - ); - } - public void testRemoteLookupJoinIsSnapshot() { // TODO: remove when we allow remote joins in release builds assumeTrue("Remote LOOKUP JOIN not enabled", EsqlCapabilities.Cap.ENABLE_LOOKUP_JOIN_ON_REMOTE.isEnabled()); @@ -2325,148 +2283,6 @@ public void testRemoteLookupJoinIsDisabled() { assertThat(e.getMessage(), containsString("remote clusters are not supported with LOOKUP JOIN")); } - public void testRemoteEnrichAfterLookupJoin() { - EnrichResolution enrichResolution = new EnrichResolution(); - loadEnrichPolicyResolution( - enrichResolution, - Enrich.Mode.REMOTE, - MATCH_TYPE, - "languages", - "language_code", - "languages_idx", - "mapping-languages.json" - ); - var analyzer = AnalyzerTestUtils.analyzer( - loadMapping("mapping-default.json", "test"), - defaultLookupResolution(), - enrichResolution, - TEST_VERIFIER - ); - - String lookupCommand = randomBoolean() ? "LOOKUP JOIN test_lookup ON languages" : "LOOKUP JOIN languages_lookup ON language_code"; - - query(Strings.format(""" - FROM test - | EVAL language_code = languages - | ENRICH _remote:languages ON language_code - | %s - """, lookupCommand), analyzer); - - String err = error(Strings.format(""" - FROM test - | EVAL language_code = languages - | %s - | ENRICH _remote:languages ON language_code - """, lookupCommand), analyzer); - assertThat(err, containsString("4:3: ENRICH with remote policy can't be executed after LOOKUP JOIN")); - - err = error(Strings.format(""" - FROM test - | EVAL language_code = languages - | %s - | ENRICH _remote:languages ON language_code - | %s - """, lookupCommand, lookupCommand), analyzer); - assertThat(err, containsString("4:3: ENRICH with remote policy can't be executed after LOOKUP JOIN")); - - err = error(Strings.format(""" - FROM test - | EVAL language_code = languages - | %s - | EVAL x = 1 - | MV_EXPAND language_code - | ENRICH _remote:languages ON language_code - """, lookupCommand), analyzer); - assertThat(err, containsString("6:3: ENRICH with remote policy can't be executed after LOOKUP JOIN")); - } - - public void testRemoteEnrichAfterCoordinatorOnlyPlans() { - EnrichResolution enrichResolution = new EnrichResolution(); - loadEnrichPolicyResolution( - enrichResolution, - Enrich.Mode.REMOTE, - MATCH_TYPE, - "languages", - "language_code", - "languages_idx", - "mapping-languages.json" - ); - loadEnrichPolicyResolution( - enrichResolution, - Enrich.Mode.COORDINATOR, - MATCH_TYPE, - "languages", - "language_code", - "languages_idx", - "mapping-languages.json" - ); - var analyzer = AnalyzerTestUtils.analyzer( - loadMapping("mapping-default.json", "test"), - defaultLookupResolution(), - enrichResolution, - TEST_VERIFIER - ); - - query(""" - FROM test - | EVAL language_code = languages - | ENRICH _remote:languages ON language_code - | STATS count(*) BY language_name - """, analyzer); - - String err = error(""" - FROM test - | EVAL language_code = languages - | STATS count(*) BY language_code - | ENRICH _remote:languages ON language_code - """, analyzer); - assertThat(err, containsString("4:3: ENRICH with remote policy can't be executed after STATS")); - - err = error(""" - FROM test - | EVAL language_code = languages - | STATS count(*) BY language_code - | EVAL x = 1 - | MV_EXPAND language_code - | ENRICH _remote:languages ON language_code - """, analyzer); - assertThat(err, containsString("6:3: ENRICH with remote policy can't be executed after STATS")); - - query(""" - FROM test - | EVAL language_code = languages - | ENRICH _remote:languages ON language_code - | ENRICH _coordinator:languages ON language_code - """, analyzer); - - err = error(""" - FROM test - | EVAL language_code = languages - | ENRICH _coordinator:languages ON language_code - | ENRICH _remote:languages ON language_code - """, analyzer); - assertThat(err, containsString("4:3: ENRICH with remote policy can't be executed after another ENRICH with coordinator policy")); - - err = error(""" - FROM test - | EVAL language_code = languages - | ENRICH _coordinator:languages ON language_code - | EVAL x = 1 - | MV_EXPAND language_name - | DISSECT language_name "%{foo}" - | ENRICH _remote:languages ON language_code - """, analyzer); - assertThat(err, containsString("7:3: ENRICH with remote policy can't be executed after another ENRICH with coordinator policy")); - - err = error(""" - FROM test - | FORK (WHERE languages == 1) (WHERE languages == 2) - | EVAL language_code = languages - | ENRICH _remote:languages ON language_code - """, analyzer); - assertThat(err, containsString("4:3: ENRICH with remote policy can't be executed after FORK")); - } - private void checkFullTextFunctionsInStats(String functionInvocation) { query("from test | stats c = max(id) where " + functionInvocation, fullTextAnalyzer); query("from test | stats c = max(id) where " + functionInvocation + " or length(title) > 10", fullTextAnalyzer); @@ -2484,28 +2300,28 @@ private void checkFullTextFunctionsInStats(String functionInvocation) { public void testVectorSimilarityFunctionsNullArgs() throws Exception { if (EsqlCapabilities.Cap.COSINE_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkVectorSimilarityFunctionsNullArgs("v_cosine(null, vector)", "first"); - checkVectorSimilarityFunctionsNullArgs("v_cosine(vector, null)", "second"); + checkVectorFunctionsNullArgs("v_cosine(null, vector)"); + checkVectorFunctionsNullArgs("v_cosine(vector, null)"); } if (EsqlCapabilities.Cap.DOT_PRODUCT_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkVectorSimilarityFunctionsNullArgs("v_dot_product(null, vector)", "first"); - checkVectorSimilarityFunctionsNullArgs("v_dot_product(vector, null)", "second"); + checkVectorFunctionsNullArgs("v_dot_product(null, vector)"); + checkVectorFunctionsNullArgs("v_dot_product(vector, null)"); } if (EsqlCapabilities.Cap.L1_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkVectorSimilarityFunctionsNullArgs("v_l1_norm(null, vector)", "first"); - checkVectorSimilarityFunctionsNullArgs("v_l1_norm(vector, null)", "second"); + checkVectorFunctionsNullArgs("v_l1_norm(null, vector)"); + checkVectorFunctionsNullArgs("v_l1_norm(vector, null)"); } if (EsqlCapabilities.Cap.L2_NORM_VECTOR_SIMILARITY_FUNCTION.isEnabled()) { - checkVectorSimilarityFunctionsNullArgs("v_l2_norm(null, vector)", "first"); - checkVectorSimilarityFunctionsNullArgs("v_l2_norm(vector, null)", "second"); + checkVectorFunctionsNullArgs("v_l2_norm(null, vector)"); + checkVectorFunctionsNullArgs("v_l2_norm(vector, null)"); + } + if (EsqlCapabilities.Cap.MAGNITUDE_SCALAR_VECTOR_FUNCTION.isEnabled()) { + checkVectorFunctionsNullArgs("v_magnitude(null)"); } } - private void checkVectorSimilarityFunctionsNullArgs(String functionInvocation, String argOrdinal) throws Exception { - assertThat( - error("from test | eval similarity = " + functionInvocation, fullTextAnalyzer), - containsString(argOrdinal + " argument of [" + functionInvocation + "] cannot be null, received [null]") - ); + private void checkVectorFunctionsNullArgs(String functionInvocation) throws Exception { + query("from test | eval similarity = " + functionInvocation, fullTextAnalyzer); } private void query(String query) { diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/AbstractVectorSimilarityFunctionTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/AbstractVectorSimilarityFunctionTestCase.java index 329eba63046f4..6b0faaaf6d53e 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/AbstractVectorSimilarityFunctionTestCase.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/AbstractVectorSimilarityFunctionTestCase.java @@ -10,9 +10,7 @@ import com.carrotsearch.randomizedtesting.annotations.Name; import org.elasticsearch.xpack.esql.action.EsqlCapabilities; -import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; -import org.hamcrest.Matcher; import org.junit.Before; import java.util.ArrayList; @@ -23,7 +21,7 @@ import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; import static org.hamcrest.Matchers.equalTo; -public abstract class AbstractVectorSimilarityFunctionTestCase extends AbstractScalarFunctionTestCase { +public abstract class AbstractVectorSimilarityFunctionTestCase extends AbstractVectorTestCase { protected AbstractVectorSimilarityFunctionTestCase(@Name("TestCase") Supplier testCaseSupplier) { this.testCase = testCaseSupplier.get(); @@ -69,34 +67,4 @@ protected static Iterable similarityParameters( return parameterSuppliersFromTypedData(suppliers); } - - private static float[] listToFloatArray(List floatList) { - float[] floatArray = new float[floatList.size()]; - for (int i = 0; i < floatList.size(); i++) { - floatArray[i] = floatList.get(i); - } - return floatArray; - } - - protected double calculateSimilarity(List left, List right) { - return 0; - } - - /** - * @return A random dense vector for testing - * @param dimensions - */ - private static List randomDenseVector(int dimensions) { - List vector = new ArrayList<>(); - for (int i = 0; i < dimensions; i++) { - vector.add(randomFloat()); - } - return vector; - } - - @Override - protected Matcher allNullsMatcher() { - // A null value on the left or right vector. Similarity is 0 - return equalTo(0.0); - } } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/AbstractVectorTestCase.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/AbstractVectorTestCase.java new file mode 100644 index 0000000000000..ddddcec21ea30 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/AbstractVectorTestCase.java @@ -0,0 +1,37 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.vector; + +import org.elasticsearch.xpack.esql.expression.function.AbstractScalarFunctionTestCase; + +import java.util.ArrayList; +import java.util.List; + +public abstract class AbstractVectorTestCase extends AbstractScalarFunctionTestCase { + + protected static float[] listToFloatArray(List floatList) { + float[] floatArray = new float[floatList.size()]; + for (int i = 0; i < floatList.size(); i++) { + floatArray[i] = floatList.get(i); + } + return floatArray; + } + + /** + * @return A random dense vector for testing + * @param dimensions + */ + protected static List randomDenseVector(int dimensions) { + List vector = new ArrayList<>(); + for (int i = 0; i < dimensions; i++) { + vector.add(randomFloat()); + } + return vector; + } + +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/MagnitudeTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/MagnitudeTests.java new file mode 100644 index 0000000000000..651130a2c1be1 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/expression/function/vector/MagnitudeTests.java @@ -0,0 +1,76 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.expression.function.vector; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; + +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.core.expression.Expression; +import org.elasticsearch.xpack.esql.core.tree.Source; +import org.elasticsearch.xpack.esql.expression.function.FunctionName; +import org.elasticsearch.xpack.esql.expression.function.TestCaseSupplier; +import org.junit.Before; + +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static org.elasticsearch.xpack.esql.core.type.DataType.DENSE_VECTOR; +import static org.elasticsearch.xpack.esql.core.type.DataType.DOUBLE; +import static org.hamcrest.Matchers.equalTo; + +@FunctionName("v_magnitude") +public class MagnitudeTests extends AbstractVectorTestCase { + + public MagnitudeTests(@Name("TestCase") Supplier testCaseSupplier) { + this.testCase = testCaseSupplier.get(); + } + + @ParametersFactory + public static Iterable parameters() { + return scalarParameters(Magnitude.class.getSimpleName(), Magnitude.SCALAR_FUNCTION); + } + + protected EsqlCapabilities.Cap capability() { + return EsqlCapabilities.Cap.MAGNITUDE_SCALAR_VECTOR_FUNCTION; + } + + @Override + protected Expression build(Source source, List args) { + return new Magnitude(source, args.get(0)); + } + + @Before + public void checkCapability() { + assumeTrue("Scalar function is not enabled", capability().isEnabled()); + } + + protected static Iterable scalarParameters(String className, Magnitude.ScalarEvaluatorFunction scalarFunction) { + + final String evaluatorName = className + "Evaluator" + "[child=Attribute[channel=0]]"; + + List suppliers = new ArrayList<>(); + + // Basic test with a dense vector. + suppliers.add(new TestCaseSupplier(List.of(DENSE_VECTOR), () -> { + int dimensions = between(64, 128); + List input = randomDenseVector(dimensions); + float[] array = listToFloatArray(input); + double expected = scalarFunction.calculateScalar(array); + return new TestCaseSupplier.TestCase( + List.of(new TestCaseSupplier.TypedData(input, DENSE_VECTOR, "vector")), + evaluatorName, + DOUBLE, + equalTo(expected) + ); + })); + + return parameterSuppliersFromTypedData(suppliers); + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java index 830e4bd546c25..e3f13fc331cdc 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/LogicalPlanOptimizerTests.java @@ -6642,7 +6642,6 @@ public void testSimplifyComparisonArithmeticWithDisjunction() { doTestSimplifyComparisonArithmetics("12 * (-integer - 5) >= -120 OR integer < 5", "integer", LTE, 5); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/108388") public void testSimplifyComparisonArithmeticWithFloatsAndDirectionChange() { doTestSimplifyComparisonArithmetics("float / -2 < 4", "float", GT, -8d); doTestSimplifyComparisonArithmetics("float * -2 < 4", "float", GT, -2d); diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/OptimizerVerificationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/OptimizerVerificationTests.java new file mode 100644 index 0000000000000..fcfe4a0a2a455 --- /dev/null +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/OptimizerVerificationTests.java @@ -0,0 +1,327 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0; you may not use this file except in compliance with the Elastic License + * 2.0. + */ + +package org.elasticsearch.xpack.esql.optimizer; + +import org.elasticsearch.common.Strings; +import org.elasticsearch.xpack.esql.EsqlTestUtils; +import org.elasticsearch.xpack.esql.VerificationException; +import org.elasticsearch.xpack.esql.action.EsqlCapabilities; +import org.elasticsearch.xpack.esql.analysis.Analyzer; +import org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils; +import org.elasticsearch.xpack.esql.analysis.EnrichResolution; +import org.elasticsearch.xpack.esql.parser.QueryParam; +import org.elasticsearch.xpack.esql.plan.logical.Enrich; +import org.elasticsearch.xpack.esql.plan.logical.LogicalPlan; + +import java.util.ArrayList; +import java.util.List; + +import static org.elasticsearch.xpack.core.enrich.EnrichPolicy.MATCH_TYPE; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.TEST_VERIFIER; +import static org.elasticsearch.xpack.esql.EsqlTestUtils.paramAsConstant; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.defaultLookupResolution; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadEnrichPolicyResolution; +import static org.elasticsearch.xpack.esql.analysis.AnalyzerTestUtils.loadMapping; +import static org.hamcrest.Matchers.containsString; +import static org.hamcrest.Matchers.instanceOf; + +public class OptimizerVerificationTests extends AbstractLogicalPlanOptimizerTests { + + private LogicalPlan plan(String query, Analyzer analyzer) { + var analyzed = analyzer.analyze(parser.createStatement(query, EsqlTestUtils.TEST_CFG)); + return logicalOptimizer.optimize(analyzed); + } + + private String error(String query, Analyzer analyzer, Object... params) { + List parameters = new ArrayList<>(); + for (Object param : params) { + if (param == null) { + parameters.add(paramAsConstant(null, null)); + } else if (param instanceof String) { + parameters.add(paramAsConstant(null, param)); + } else if (param instanceof Number) { + parameters.add(paramAsConstant(null, param)); + } else { + throw new IllegalArgumentException("VerifierTests don't support params of type " + param.getClass()); + } + } + Throwable e = expectThrows( + VerificationException.class, + "Expected error for query [" + query + "] but no error was raised", + () -> plan(query, analyzer) + ); + assertThat(e, instanceOf(VerificationException.class)); + + String message = e.getMessage(); + assertTrue(message.startsWith("Found ")); + + String pattern = "\nline "; + int index = message.indexOf(pattern); + return message.substring(index + pattern.length()); + } + + public void testRemoteEnrichAfterCoordinatorOnlyPlans() { + EnrichResolution enrichResolution = new EnrichResolution(); + loadEnrichPolicyResolution( + enrichResolution, + Enrich.Mode.REMOTE, + MATCH_TYPE, + "languages", + "language_code", + "languages_idx", + "mapping-languages.json" + ); + loadEnrichPolicyResolution( + enrichResolution, + Enrich.Mode.COORDINATOR, + MATCH_TYPE, + "languages", + "language_code", + "languages_idx", + "mapping-languages.json" + ); + var analyzer = AnalyzerTestUtils.analyzer( + loadMapping("mapping-default.json", "test"), + defaultLookupResolution(), + enrichResolution, + TEST_VERIFIER + ); + + String err; + + plan(""" + FROM test + | EVAL language_code = languages + | ENRICH _remote:languages ON language_code + | STATS count(*) BY language_name + """, analyzer); + + plan(""" + FROM test + | LIMIT 10 + | EVAL language_code = languages + | ENRICH _remote:languages ON language_code + | STATS count(*) BY language_name + """, analyzer); + + plan(""" + FROM test + | EVAL language_code = languages + | ENRICH _remote:languages ON language_code + | STATS count(*) BY language_name + | LIMIT 10 + """, analyzer); + + err = error(""" + FROM test + | EVAL language_code = languages + | STATS count(*) BY language_code + | ENRICH _remote:languages ON language_code + """, analyzer); + assertThat(err, containsString("4:3: ENRICH with remote policy can't be executed after [STATS count(*) BY language_code]@3:3")); + + err = error(""" + FROM test + | EVAL language_code = languages + | INLINESTATS count(*) BY language_code + | ENRICH _remote:languages ON language_code + """, analyzer); + assertThat( + err, + containsString("4:3: ENRICH with remote policy can't be executed after [INLINESTATS count(*) BY language_code]@3:3") + ); + + err = error(""" + FROM test + | EVAL language_code = languages + | STATS count(*) BY language_code + | EVAL x = 1 + | MV_EXPAND language_code + | ENRICH _remote:languages ON language_code + """, analyzer); + assertThat(err, containsString("6:3: ENRICH with remote policy can't be executed after [STATS count(*) BY language_code]@3:3")); + + // Coordinator after remote is OK + plan(""" + FROM test + | EVAL language_code = languages + | ENRICH _remote:languages ON language_code + | ENRICH _coordinator:languages ON language_code + """, analyzer); + + err = error(""" + FROM test + | EVAL language_code = languages + | ENRICH _coordinator:languages ON language_code + | ENRICH _remote:languages ON language_code + """, analyzer); + assertThat( + err, + containsString("4:3: ENRICH with remote policy can't be executed after [ENRICH _coordinator:languages ON language_code]@3:3") + ); + + err = error(""" + FROM test + | EVAL language_code = languages + | ENRICH _coordinator:languages ON language_code + | EVAL x = 1 + | MV_EXPAND language_name + | DISSECT language_name "%{foo}" + | ENRICH _remote:languages ON language_code + """, analyzer); + assertThat( + err, + containsString("7:3: ENRICH with remote policy can't be executed after [ENRICH _coordinator:languages ON language_code]@3:3") + ); + + err = error(""" + FROM test + | FORK (WHERE languages == 1) (WHERE languages == 2) + | EVAL language_code = languages + | ENRICH _remote:languages ON language_code + """, analyzer); + assertThat( + err, + containsString( + "4:3: ENRICH with remote policy can't be executed after [FORK (WHERE languages == 1) (WHERE languages == 2)]@2:3" + ) + ); + + err = error(""" + FROM test + | COMPLETION language_code = "some prompt" WITH { "inference_id" : "completion-inference-id" } + | ENRICH _remote:languages ON language_code + """, analyzer); + assertThat( + err, + containsString( + "ENRICH with remote policy can't be executed after " + + "[COMPLETION language_code = \"some prompt\" WITH { \"inference_id\" : \"completion-inference-id\" }]@2:3" + ) + ); + + err = error(""" + FROM test + | RERANK language_code="test" ON languages WITH { "inference_id" : "reranking-inference-id" } + | ENRICH _remote:languages ON language_code + """, analyzer); + assertThat( + err, + containsString( + "ENRICH with remote policy can't be executed after " + + "[RERANK language_code=\"test\" ON languages WITH { \"inference_id\" : \"reranking-inference-id\" }]@2:3" + ) + ); + + err = error(""" + FROM test + | CHANGE_POINT salary ON languages + | EVAL language_code = languages + | ENRICH _remote:languages ON language_code + """, analyzer); + assertThat(err, containsString("4:3: ENRICH with remote policy can't be executed after [CHANGE_POINT salary ON languages]@2:3")); + } + + public void testRemoteEnrichAfterLookupJoin() { + EnrichResolution enrichResolution = new EnrichResolution(); + loadEnrichPolicyResolution( + enrichResolution, + Enrich.Mode.REMOTE, + MATCH_TYPE, + "languages", + "language_code", + "languages_idx", + "mapping-languages.json" + ); + var analyzer = AnalyzerTestUtils.analyzer( + loadMapping("mapping-default.json", "test"), + defaultLookupResolution(), + enrichResolution, + TEST_VERIFIER + ); + + String lookupCommand = randomBoolean() ? "LOOKUP JOIN test_lookup ON languages" : "LOOKUP JOIN languages_lookup ON language_code"; + + plan(Strings.format(""" + FROM test + | EVAL language_code = languages + | ENRICH _remote:languages ON language_code + | %s + """, lookupCommand), analyzer); + + String err = error(Strings.format(""" + FROM test + | EVAL language_code = languages + | %s + | ENRICH _remote:languages ON language_code + """, lookupCommand), analyzer); + assertThat(err, containsString("4:3: ENRICH with remote policy can't be executed after [" + lookupCommand + "]@3:3")); + + err = error(Strings.format(""" + FROM test + | EVAL language_code = languages + | %s + | ENRICH _remote:languages ON language_code + | %s + """, lookupCommand, lookupCommand), analyzer); + assertThat(err, containsString("4:3: ENRICH with remote policy can't be executed after [" + lookupCommand + "]@3:3")); + + err = error(Strings.format(""" + FROM test + | EVAL language_code = languages + | %s + | EVAL x = 1 + | MV_EXPAND language_code + | ENRICH _remote:languages ON language_code + """, lookupCommand), analyzer); + assertThat(err, containsString("6:3: ENRICH with remote policy can't be executed after [" + lookupCommand + "]@3:3")); + } + + public void testRemoteLookupJoinWithPipelineBreaker() { + assumeTrue("Remote LOOKUP JOIN not enabled", EsqlCapabilities.Cap.ENABLE_LOOKUP_JOIN_ON_REMOTE.isEnabled()); + var analyzer = AnalyzerTestUtils.analyzer(loadMapping("mapping-default.json", "test,remote:test")); + assertEquals( + "1:92: LOOKUP JOIN with remote indices can't be executed after [STATS c = COUNT(*) by languages]@1:25", + error( + "FROM test,remote:test | STATS c = COUNT(*) by languages " + + "| EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code", + analyzer + ) + ); + + assertEquals( + "1:72: LOOKUP JOIN with remote indices can't be executed after [SORT emp_no]@1:25", + error( + "FROM test,remote:test | SORT emp_no | EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code", + analyzer + ) + ); + + assertEquals( + "1:68: LOOKUP JOIN with remote indices can't be executed after [LIMIT 2]@1:25", + error( + "FROM test,remote:test | LIMIT 2 | EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code", + analyzer + ) + ); + + assertEquals( + "1:96: LOOKUP JOIN with remote indices can't be executed after [ENRICH _coordinator:languages_coord]@1:58", + error( + "FROM test,remote:test | EVAL language_code = languages | ENRICH _coordinator:languages_coord " + + "| LOOKUP JOIN languages_lookup ON language_code", + analyzer + ) + ); + + plan("FROM test,remote:test | EVAL language_code = languages | LOOKUP JOIN languages_lookup ON language_code | LIMIT 2", analyzer); + + // Since FORK, RERANK, COMPLETION and CHANGE_POINT are not supported on remote indices, we can't check them here against the remote + // LOOKUP JOIN + } +} diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java index 3464e2e74217a..5ff6687145798 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/optimizer/PhysicalPlanOptimizerTests.java @@ -7004,7 +7004,10 @@ public void testAggThenEnrichRemote() { | eval employee_id = to_str(emp_no) | ENRICH _remote:departments """)); - assertThat(error.getMessage(), containsString("line 4:3: ENRICH with remote policy can't be executed after STATS")); + assertThat( + error.getMessage(), + containsString("line 4:3: ENRICH with remote policy can't be executed after [STATS size=count(*) BY emp_no]@2:3") + ); } public void testEnrichBeforeLimit() { @@ -7354,7 +7357,7 @@ public void testRejectRemoteEnrichAfterCoordinatorEnrich() { """)); assertThat( error.getMessage(), - containsString("ENRICH with remote policy can't be executed after another ENRICH with coordinator policy") + containsString("ENRICH with remote policy can't be executed after [ENRICH _coordinator:departments]@3:3") ); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java index 65587cf4d6876..cba3c3a7556e0 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/ClusterRequestTests.java @@ -57,7 +57,7 @@ protected Writeable.Reader instanceReader() { protected NamedWriteableRegistry getNamedWriteableRegistry() { List writeables = new ArrayList<>(); writeables.addAll(new SearchModule(Settings.EMPTY, List.of()).getNamedWriteables()); - writeables.addAll(new EsqlPlugin(Settings.EMPTY).getNamedWriteables()); + writeables.addAll(new EsqlPlugin().getNamedWriteables()); return new NamedWriteableRegistry(writeables); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/DataNodeRequestSerializationTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/DataNodeRequestSerializationTests.java index fe0e028db4c9c..1a1d981ca0ba1 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/DataNodeRequestSerializationTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/plugin/DataNodeRequestSerializationTests.java @@ -60,7 +60,7 @@ protected Writeable.Reader instanceReader() { protected NamedWriteableRegistry getNamedWriteableRegistry() { List writeables = new ArrayList<>(); writeables.addAll(new SearchModule(Settings.EMPTY, List.of()).getNamedWriteables()); - writeables.addAll(new EsqlPlugin(Settings.EMPTY).getNamedWriteables()); + writeables.addAll(new EsqlPlugin().getNamedWriteables()); return new NamedWriteableRegistry(writeables); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/EsqlCCSUtilsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/EsqlCCSUtilsTests.java index dc7cf37559bfb..a12d26f48b608 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/EsqlCCSUtilsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/session/EsqlCCSUtilsTests.java @@ -8,7 +8,6 @@ package org.elasticsearch.xpack.esql.session; import org.apache.lucene.index.CorruptIndexException; -import org.elasticsearch.ElasticsearchException; import org.elasticsearch.ElasticsearchStatusException; import org.elasticsearch.action.OriginalIndices; import org.elasticsearch.action.fieldcaps.FieldCapabilitiesFailure; @@ -21,7 +20,6 @@ import org.elasticsearch.license.XPackLicenseState; import org.elasticsearch.license.internal.XPackLicenseStatus; import org.elasticsearch.rest.RestStatus; -import org.elasticsearch.tasks.TaskCancelledException; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.transport.ConnectTransportException; import org.elasticsearch.transport.NoSeedNodeLeftException; @@ -49,13 +47,11 @@ import static org.elasticsearch.xpack.esql.core.tree.Source.EMPTY; import static org.elasticsearch.xpack.esql.session.EsqlCCSUtils.initCrossClusterState; -import static org.elasticsearch.xpack.esql.session.EsqlCCSUtils.shouldIgnoreRuntimeError; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThanOrEqualTo; import static org.hamcrest.Matchers.hasSize; -import static org.hamcrest.Matchers.is; public class EsqlCCSUtilsTests extends ESTestCase { @@ -766,35 +762,6 @@ private void assertLicenseCheckFails( assertThat(e.status(), equalTo(RestStatus.BAD_REQUEST)); } - public void testShouldIgnoreRuntimeError() { - Predicate skipUnPredicate = s -> s.equals(REMOTE1_ALIAS); - - EsqlExecutionInfo executionInfo = new EsqlExecutionInfo(skipUnPredicate, true); - executionInfo.swapCluster(LOCAL_CLUSTER_ALIAS, (k, v) -> new EsqlExecutionInfo.Cluster(LOCAL_CLUSTER_ALIAS, "logs*", false)); - executionInfo.swapCluster(REMOTE1_ALIAS, (k, v) -> new EsqlExecutionInfo.Cluster(REMOTE1_ALIAS, "*", true)); - executionInfo.swapCluster(REMOTE2_ALIAS, (k, v) -> new EsqlExecutionInfo.Cluster(REMOTE2_ALIAS, "mylogs1,mylogs2,logs*", false)); - - // remote1: skip_unavailable=true, so should ignore connect errors, but not others - assertThat( - shouldIgnoreRuntimeError(executionInfo, REMOTE1_ALIAS, new IllegalStateException("Unable to open any connections")), - is(true) - ); - assertThat(shouldIgnoreRuntimeError(executionInfo, REMOTE1_ALIAS, new TaskCancelledException("task cancelled")), is(true)); - assertThat(shouldIgnoreRuntimeError(executionInfo, REMOTE1_ALIAS, new ElasticsearchException("something is wrong")), is(true)); - // remote2: skip_unavailable=false, so should not ignore any errors - assertThat( - shouldIgnoreRuntimeError(executionInfo, REMOTE2_ALIAS, new IllegalStateException("Unable to open any connections")), - is(false) - ); - assertThat(shouldIgnoreRuntimeError(executionInfo, REMOTE2_ALIAS, new TaskCancelledException("task cancelled")), is(false)); - // same for local - assertThat( - shouldIgnoreRuntimeError(executionInfo, LOCAL_CLUSTER_ALIAS, new IllegalStateException("Unable to open any connections")), - is(false) - ); - assertThat(shouldIgnoreRuntimeError(executionInfo, LOCAL_CLUSTER_ALIAS, new TaskCancelledException("task cancelled")), is(false)); - } - private XPackLicenseStatus activeLicenseStatus(License.OperationMode operationMode) { return new XPackLicenseStatus(operationMode, true, null); } diff --git a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/telemetry/PlanExecutorMetricsTests.java b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/telemetry/PlanExecutorMetricsTests.java index 2d8151d8fc2a1..752e61c240cd5 100644 --- a/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/telemetry/PlanExecutorMetricsTests.java +++ b/x-pack/plugin/esql/src/test/java/org/elasticsearch/xpack/esql/telemetry/PlanExecutorMetricsTests.java @@ -150,14 +150,7 @@ public void testFailedMetric() { return null; }).when(esqlClient).execute(eq(EsqlResolveFieldsAction.TYPE), any(), any()); - var planExecutor = new PlanExecutor( - indexResolver, - MeterRegistry.NOOP, - new XPackLicenseState(() -> 0L), - mockQueryLog(), - List.of(), - Settings.EMPTY - ); + var planExecutor = new PlanExecutor(indexResolver, MeterRegistry.NOOP, new XPackLicenseState(() -> 0L), mockQueryLog(), List.of()); var enrichResolver = mockEnrichResolver(); var request = new EsqlQueryRequest(); diff --git a/x-pack/plugin/logsdb/build.gradle b/x-pack/plugin/logsdb/build.gradle index aebb860f9d5c3..4b0f98b5e17bc 100644 --- a/x-pack/plugin/logsdb/build.gradle +++ b/x-pack/plugin/logsdb/build.gradle @@ -15,7 +15,7 @@ apply plugin: 'elasticsearch.internal-yaml-rest-test' esplugin { name = 'logsdb' description = 'A plugin for logsdb related functionality' - classname ='org.elasticsearch.xpack.logsdb.LogsDBPlugin' + classname = 'org.elasticsearch.xpack.logsdb.LogsDBPlugin' extendedPlugins = ['x-pack-core'] } base { @@ -43,4 +43,10 @@ tasks.named("javaRestTest").configure { tasks.named('yamlRestTest') { usesDefaultDistribution("Requires a bunch of xpack plugins") + + if (buildParams.snapshotBuild == false) { + systemProperty 'tests.rest.blacklist', [ + '20_ignored_source/*' + ].join(',') + } } diff --git a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java index 86c435ba2a4e8..257689c8aa558 100644 --- a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java +++ b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/DataGenerationHelper.java @@ -18,6 +18,7 @@ import org.elasticsearch.datageneration.datasource.DataSourceHandler; import org.elasticsearch.datageneration.datasource.DataSourceRequest; import org.elasticsearch.datageneration.datasource.DataSourceResponse; +import org.elasticsearch.datageneration.datasource.MultifieldAddonHandler; import org.elasticsearch.datageneration.fields.PredefinedField; import org.elasticsearch.index.mapper.Mapper; import org.elasticsearch.test.ESTestCase; @@ -105,7 +106,8 @@ public DataSourceResponse.FieldTypeGenerator.FieldTypeInfo get() { } }); } - })); + })) + .withDataSourceHandlers(List.of(MultifieldAddonHandler.STRING_TYPE_HANDLER)); // Customize builder if necessary builderConfigurator.accept(specificationBuilder); diff --git a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java index db48748bba302..c4eb399a39243 100644 --- a/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java +++ b/x-pack/plugin/logsdb/src/javaRestTest/java/org/elasticsearch/xpack/logsdb/qa/StandardVersusLogsIndexModeChallengeRestIT.java @@ -143,6 +143,7 @@ public void testMatchAllQuery() throws IOException { assertTrue(matchResult.getMessage(), matchResult.isMatch()); } + @SuppressWarnings("unchecked") public void testRandomQueries() throws IOException { int numberOfDocuments = ESTestCase.randomIntBetween(10, 50); final List documents = generateDocuments(numberOfDocuments); diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/20_ignored_source.yml b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/20_ignored_source.yml index 61d3c7c8971e0..023c59a576ec3 100644 --- a/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/20_ignored_source.yml +++ b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/20_ignored_source.yml @@ -3,7 +3,7 @@ setup: - skip: features: headers - requires: - cluster_features: [ "mapper.source.mode_from_index_setting" ] + cluster_features: [ "mapper.source.mode_from_index_setting" , "mapper.ignored_source_fields_per_entry" ] reason: "Source mode configured through index setting" - do: @@ -79,7 +79,7 @@ setup: stored_fields: [ _ignored_source ] - match: { hits.total.value: 1 } - - match: { hits.hits.0._ignored_source.0: !!binary "BgAAAG9iamVjdHktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } + - match: { hits.hits.0._ignored_source.0: !!binary "AQZvYmplY3QAG3ktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } --- "fetch fields by name": @@ -96,7 +96,7 @@ setup: fields: [ _ignored_source ] - match: { hits.total.value: 1 } - - match: { hits.hits.0._ignored_source.0: !!binary "BgAAAG9iamVjdHktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } + - match: { hits.hits.0._ignored_source.0: !!binary "AQZvYmplY3QAG3ktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } --- "fields and stored fields combination": @@ -113,11 +113,11 @@ setup: stored_fields: [ _ignored_source ] fields: [ _ignored_source ] query: - match_all: {} + match_all: { } - match: { hits.total.value: 1 } - match: { hits.hits.0.fields.object: null } - - match: { hits.hits.0._ignored_source.0: !!binary "BgAAAG9iamVjdHktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } + - match: { hits.hits.0._ignored_source.0: !!binary "AQZvYmplY3QAG3ktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } --- "wildcard fields and stored fields combination": @@ -151,7 +151,7 @@ setup: fields: [ object ] - match: { hits.total.value: 1 } - - match: { hits.hits.0._ignored_source.0: !!binary "BgAAAG9iamVjdHktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } + - match: { hits.hits.0._ignored_source.0: !!binary "AQZvYmplY3QAG3ktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } - match: { hits.hits.0.fields: null } --- @@ -170,7 +170,7 @@ setup: fields: [ _ignored_source ] - match: { hits.total.value: 1 } - - match: { hits.hits.0._ignored_source.0: !!binary "BgAAAG9iamVjdHktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } + - match: { hits.hits.0._ignored_source.0: !!binary "AQZvYmplY3QAG3ktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } --- "ignored source via fields and wildcard stored fields": @@ -188,7 +188,7 @@ setup: fields: [ _ignored_source ] - match: { hits.total.value: 1 } - - match: { hits.hits.0._ignored_source.0: !!binary "BgAAAG9iamVjdHktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } + - match: { hits.hits.0._ignored_source.0: !!binary "AQZvYmplY3QAG3ktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } --- "wildcard fields and ignored source via stored fields": @@ -206,12 +206,12 @@ setup: fields: [ "*" ] - match: { hits.total.value: 1 } - - match: { hits.hits.0._ignored_source.0: !!binary "BgAAAG9iamVjdHktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } + - match: { hits.hits.0._ignored_source.0: !!binary "AQZvYmplY3QAG3ktLS0KbmFtZTogImZvbyIKdmFsdWU6IDEwCg==" } --- ignored source array via fields: - requires: - cluster_features: [mapper.ignored_source_as_top_level_metadata_array_field] + cluster_features: [ mapper.ignored_source_as_top_level_metadata_array_field ] reason: requires returning the _ignored_source field as a top level array metadata field - do: @@ -248,21 +248,21 @@ ignored source array via fields: body: fields: [ "_ignored_source" ] query: - match_all: {} + match_all: { } - match: { hits.total.value: 1 } - match: { hits.hits.0._source.name: "foo" } - match: { hits.hits.0._source.value: 1 } - match: { hits.hits.0._source.id: "f5t7-66gt" } - - match: { hits.hits.0._ignored: [ "id", "value" ]} + - match: { hits.hits.0._ignored: [ "id", "value" ] } - length: { hits.hits.0._ignored_source: 2 } - - match: { hits.hits.0._ignored_source.0: !!binary "AgAAAGlkU2Y1dDctNjZndA==" } # `id` field - - match: { hits.hits.0._ignored_source.1: !!binary "BQAAAHZhbHVlSQEAAAA=" } # `value` field + - match: { hits.hits.0._ignored_source.0: !!binary "AQJpZAAKU2Y1dDctNjZndA==" } # `id` field + - match: { hits.hits.0._ignored_source.1: !!binary "AQV2YWx1ZQAFSQEAAAA=" } # `value` field --- ignored source array via stored_fields: - requires: - cluster_features: [mapper.ignored_source_as_top_level_metadata_array_field] + cluster_features: [ mapper.ignored_source_as_top_level_metadata_array_field ] reason: requires returning the _ignored_source field as a top level array metadata field - do: @@ -301,13 +301,13 @@ ignored source array via stored_fields: # a wildcard request would not include it. stored_fields: [ "_ignored_source", "_source" ] query: - match_all: {} + match_all: { } - match: { hits.total.value: 1 } - match: { hits.hits.0._source.name: "foo" } - match: { hits.hits.0._source.value: 1 } - match: { hits.hits.0._source.id: "f5t7-66gt" } - - match: { hits.hits.0._ignored: [ "id", "value" ]} + - match: { hits.hits.0._ignored: [ "id", "value" ] } - length: { hits.hits.0._ignored_source: 2 } - - match: { hits.hits.0._ignored_source.0: !!binary "AgAAAGlkU2Y1dDctNjZndA==" } # `id` field - - match: { hits.hits.0._ignored_source.1: !!binary "BQAAAHZhbHVlSQEAAAA=" } # `value` field + - match: { hits.hits.0._ignored_source.0: !!binary "AQJpZAAKU2Y1dDctNjZndA==" } # `id` field + - match: { hits.hits.0._ignored_source.1: !!binary "AQV2YWx1ZQAFSQEAAAA=" } # `value` field diff --git a/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/53_esql_synthetic_source_mixed_disabled_fields.yml b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/53_esql_synthetic_source_mixed_disabled_fields.yml new file mode 100644 index 0000000000000..49a2a05154fdc --- /dev/null +++ b/x-pack/plugin/logsdb/src/yamlRestTest/resources/rest-api-spec/test/53_esql_synthetic_source_mixed_disabled_fields.yml @@ -0,0 +1,129 @@ +--- +setup: + - requires: + test_runner_features: allowed_warnings_regex + + - do: + indices.create: + index: my-index + body: + settings: + index: + mode: logsdb + mappings: + properties: + "@timestamp": + type: date + host.name: + type: keyword + agent_id: + type: keyword + doc_values: false + store: false + process_id: + type: integer + doc_values: false + store: false + http_method: + type: keyword + doc_values: false + store: true + is_https: + type: boolean + doc_values: false + store: false + location: + type: geo_point + doc_values: false + store: false + message: + type: text + store: false + fields: + raw: + type: keyword + + - do: + bulk: + index: my-index + refresh: true + body: + - { "index": { } } + - { "@timestamp": "2024-02-12T10:30:00Z", "host.name": "foo", "agent_id": "darth-vader", "process_id": 101, "http_method": "GET", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "No, I am your father." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:31:00Z", "host.name": "bar", "agent_id": "yoda", "process_id": 102, "http_method": "PUT", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "Do. Or do not. There is no try." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:32:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 103, "http_method": "GET", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "May the force be with you." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:33:00Z", "host.name": "baz", "agent_id": "darth-vader", "process_id": 102, "http_method": "POST", "is_https": true, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "I find your lack of faith disturbing." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:34:00Z", "host.name": "baz", "agent_id": "yoda", "process_id": 104, "http_method": "POST", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "Wars not make one great." } + - { "index": { } } + - { "@timestamp": "2024-02-12T10:35:00Z", "host.name": "foo", "agent_id": "obi-wan", "process_id": 105, "http_method": "GET", "is_https": false, "location": { "lat": 40.7128, "lon": -74.0060 }, "message": "That's no moon. It's a space station." } + +--- +teardown: + - do: + indices.delete: + index: my-index + +--- +"Simple from": + - do: + esql.query: + body: + query: 'FROM my-index | SORT host.name, @timestamp | LIMIT 1' + + - match: { columns.0.name: "@timestamp" } + - match: { columns.0.type: "date" } + - match: { columns.1.name: "agent_id" } + - match: { columns.1.type: "keyword" } + - match: { columns.2.name: "host.name" } + - match: { columns.2.type: "keyword" } + - match: { columns.3.name: "http_method" } + - match: { columns.3.type: "keyword" } + - match: { columns.4.name: "is_https" } + - match: { columns.4.type: "boolean" } + - match: { columns.5.name: "location" } + - match: { columns.5.type: "geo_point" } + - match: { columns.6.name: "message" } + - match: { columns.6.type: "text" } + - match: { columns.7.name: "message.raw" } + - match: { columns.7.type: "keyword" } + - match: { columns.8.name: "process_id" } + - match: { columns.8.type: "integer" } + + - match: { values.0.0: "2024-02-12T10:31:00.000Z" } + - match: { values.0.1: "yoda" } + - match: { values.0.2: "bar" } + - match: { values.0.3: "PUT" } + - match: { values.0.4: false } + - match: { values.0.5: "POINT (-74.006 40.7128)" } + - match: { values.0.6: "Do. Or do not. There is no try." } + - match: { values.0.7: "Do. Or do not. There is no try." } + - match: { values.0.8: 102 } + +--- +"Simple from keyword fields": + - do: + esql.query: + body: + query: 'FROM my-index | SORT host.name, @timestamp | KEEP agent_id, http_method | LIMIT 10' + + - match: { columns.0.name: "agent_id" } + - match: { columns.0.type: "keyword" } + - match: { columns.1.name: "http_method" } + - match: { columns.1.type: "keyword" } + + - match: { values.0.0: "yoda" } + - match: { values.0.1: "PUT" } + - match: { values.1.0: "darth-vader" } + - match: { values.1.1: "POST" } + - match: { values.2.0: "yoda" } + - match: { values.2.1: "POST" } + - match: { values.3.0: "darth-vader" } + - match: { values.3.1: "GET" } + - match: { values.4.0: "obi-wan" } + - match: { values.4.1: "GET" } + - match: { values.5.0: "obi-wan" } + - match: { values.5.1: "GET" } diff --git a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java index 5061c8e303514..2962278117195 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java +++ b/x-pack/plugin/mapper-unsigned-long/src/main/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapper.java @@ -34,6 +34,7 @@ import org.elasticsearch.index.mapper.FallbackSyntheticSourceBlockLoader; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.IgnoreMalformedStoredValues; +import org.elasticsearch.index.mapper.IgnoredSourceFieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperBuilderContext; import org.elasticsearch.index.mapper.MapperParsingException; @@ -381,7 +382,11 @@ public BlockLoader blockLoader(BlockLoaderContext blContext) { } // Multi fields don't have fallback synthetic source. if (isSyntheticSource && blContext.parentField(name()) == null) { - return new FallbackSyntheticSourceBlockLoader(fallbackSyntheticSourceBlockLoaderReader(), name()) { + return new FallbackSyntheticSourceBlockLoader( + fallbackSyntheticSourceBlockLoaderReader(), + name(), + IgnoredSourceFieldMapper.ignoredSourceFormat(blContext.indexSettings().getIndexVersionCreated()) + ) { @Override public Builder builder(BlockFactory factory, int expectedCount) { return factory.longs(expectedCount); diff --git a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java index 538d8efe97446..a679f14680e4e 100644 --- a/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java +++ b/x-pack/plugin/mapper-unsigned-long/src/test/java/org/elasticsearch/xpack/unsignedlong/UnsignedLongFieldMapperTests.java @@ -432,7 +432,6 @@ protected Number randomNumber() { if (randomBoolean()) { return randomDouble(); } - assumeFalse("https://github.com/elastic/elasticsearch/issues/70585", true); return randomDoubleBetween(0L, Long.MAX_VALUE, true); } diff --git a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/InferenceIngestIT.java b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/InferenceIngestIT.java index 92d8774cd1842..0544534501ab2 100644 --- a/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/InferenceIngestIT.java +++ b/x-pack/plugin/ml/qa/native-multi-node-tests/src/javaRestTest/java/org/elasticsearch/xpack/ml/integration/InferenceIngestIT.java @@ -283,7 +283,6 @@ public void assertStatsWithCacheMisses(String modelId, int inferenceCount) throw assertThat(stats.toString(), (Integer) XContentMapValues.extractValue("inference_stats.cache_miss_count", stats), greaterThan(0)); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105955") public void testSimulate() throws IOException { String classificationModelId = "test_classification_simulate"; putModel(classificationModelId, CLASSIFICATION_CONFIG); @@ -388,7 +387,6 @@ public void testSimulate() throws IOException { assertThat(responseString, containsString("Could not find trained model [test_classification_missing]")); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105955") public void testSimulateWithDefaultMappedField() throws IOException { String classificationModelId = "test_classification_default_mapped_field"; putModel(classificationModelId, CLASSIFICATION_CONFIG); diff --git a/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/BasicDistributedJobsIT.java b/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/BasicDistributedJobsIT.java index b9f674a6ef624..842ed7a4a2a2e 100644 --- a/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/BasicDistributedJobsIT.java +++ b/x-pack/plugin/ml/src/internalClusterTest/java/org/elasticsearch/xpack/ml/integration/BasicDistributedJobsIT.java @@ -263,7 +263,6 @@ public void testDedicatedMlNode() throws Exception { }); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/102657") public void testMaxConcurrentJobAllocations() throws Exception { int numMlNodes = 2; internalCluster().ensureAtMostNumDataNodes(0); diff --git a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/filter/IPFilterTests.java b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/filter/IPFilterTests.java index b2f0695602061..491323d85d20f 100644 --- a/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/filter/IPFilterTests.java +++ b/x-pack/plugin/security/src/test/java/org/elasticsearch/xpack/security/transport/filter/IPFilterTests.java @@ -288,7 +288,6 @@ public void testThatAllAddressesAreAllowedWhenLicenseDisablesSecurity() { assertAddressIsDeniedForProfile("default", "8.8.8.8"); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/62298") public void testThatNodeStartsWithIPFilterDisabled() throws Exception { Settings settings = Settings.builder() .put("path.home", createTempDir()) diff --git a/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/FetchSizeTestCase.java b/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/FetchSizeTestCase.java index ec20cc3c64104..b8af2ae44623a 100644 --- a/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/FetchSizeTestCase.java +++ b/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/FetchSizeTestCase.java @@ -89,7 +89,6 @@ public void testScroll() throws SQLException { * Test for {@code SELECT} that is implemented as a scroll query. * In this test we don't retrieve all records and rely on close() to clean the cursor */ - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testIncompleteScroll() throws SQLException { try (Connection c = esJdbc(); Statement s = c.createStatement()) { s.setFetchSize(4); @@ -153,7 +152,6 @@ public void testScrollWithDatetimeAndTimezoneParam() throws IOException, SQLExce /** * Test for {@code SELECT} that is implemented as an aggregation. */ - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testAggregation() throws SQLException { try (Connection c = esJdbc(); Statement s = c.createStatement()) { s.setFetchSize(4); @@ -172,7 +170,6 @@ public void testAggregation() throws SQLException { /** * Test for nested documents. */ - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testNestedDocuments() throws SQLException { try (Connection c = esJdbc(); Statement s = c.createStatement()) { s.setFetchSize(5); diff --git a/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/JdbcErrorsTestCase.java b/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/JdbcErrorsTestCase.java index bd49ef0f6b39d..e962f35be2a94 100644 --- a/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/JdbcErrorsTestCase.java +++ b/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/JdbcErrorsTestCase.java @@ -78,7 +78,6 @@ public void testSelectProjectScoreInAggContext() throws IOException, SQLExceptio } } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testSelectOrderByScoreInAggContext() throws IOException, SQLException { index("test", body -> body.field("foo", 1)); try (Connection c = esJdbc()) { @@ -112,7 +111,6 @@ public void testSelectScoreSubField() throws IOException, SQLException { } } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testHardLimitForSortOnAggregate() throws IOException, SQLException { index("test", body -> body.field("a", 1).field("b", 2)); try (Connection c = esJdbc()) { diff --git a/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/PreparedStatementTestCase.java b/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/PreparedStatementTestCase.java index 108a5cd0bc7c6..cb149703a6b3e 100644 --- a/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/PreparedStatementTestCase.java +++ b/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/PreparedStatementTestCase.java @@ -309,7 +309,6 @@ public void testWildcardField() throws IOException, SQLException { } } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testConstantKeywordField() throws IOException, SQLException { String mapping = """ "properties":{"id":{"type":"integer"},"text":{"type":"constant_keyword"}}"""; @@ -377,7 +376,6 @@ public void testTooMayParameters() throws IOException, SQLException { } } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testStringEscaping() throws SQLException { try (Connection connection = esJdbc()) { try (PreparedStatement statement = connection.prepareStatement("SELECT ?, ?, ?, ?")) { diff --git a/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/ResultSetTestCase.java b/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/ResultSetTestCase.java index 20fb342aa4b38..21bba56e91d28 100644 --- a/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/ResultSetTestCase.java +++ b/x-pack/plugin/sql/qa/jdbc/src/main/java/org/elasticsearch/xpack/sql/qa/jdbc/ResultSetTestCase.java @@ -845,7 +845,6 @@ public void testGettingValidNumbersWithCastingFromUnsignedLong() throws IOExcept } // Double values testing - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testGettingValidDoubleWithoutCasting() throws IOException, SQLException { List doubleTestValues = createTestDataForNumericValueTests(ESTestCase::randomDouble); double random1 = doubleTestValues.get(0); @@ -1158,7 +1157,6 @@ public void testGettingValidBigDecimalFromFloatWithoutCasting() throws IOExcepti ); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testGettingValidBigDecimalFromDoubleWithoutCasting() throws IOException, SQLException { List doubleTestValues = createTestDataForNumericValueTests(ESTestCase::randomDouble); doWithQuery( @@ -1406,7 +1404,6 @@ public void testGettingDateWithoutCalendarWithNanos() throws Exception { }); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testGettingDateWithCalendar() throws Exception { long randomLongDate = randomMillisUpToYear9999(); setupDataForDateTimeTests(randomLongDate); @@ -1436,7 +1433,6 @@ public void testGettingDateWithCalendar() throws Exception { }); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testGettingDateWithCalendarWithNanos() throws Exception { assumeTrue( "Driver version [" + JDBC_DRIVER_VERSION + "] doesn't support DATETIME with nanosecond resolution]", @@ -1600,7 +1596,6 @@ public void testGettingTimestampWithoutCalendar() throws Exception { }); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testGettingTimestampWithoutCalendarWithNanos() throws Exception { assumeTrue( "Driver version [" + JDBC_DRIVER_VERSION + "] doesn't support DATETIME with nanosecond resolution]", @@ -1933,7 +1928,6 @@ public void testGetTimeType() throws IOException, SQLException { }); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testValidGetObjectCalls() throws IOException, SQLException { createIndexWithMapping("test"); updateMappingForNumericValuesTests("test"); diff --git a/x-pack/plugin/sql/qa/server/security/src/test/java/org/elasticsearch/xpack/sql/qa/security/JdbcSecurityIT.java b/x-pack/plugin/sql/qa/server/security/src/test/java/org/elasticsearch/xpack/sql/qa/security/JdbcSecurityIT.java index 6a46346f627ac..0e0c2bc8d78b4 100644 --- a/x-pack/plugin/sql/qa/server/security/src/test/java/org/elasticsearch/xpack/sql/qa/security/JdbcSecurityIT.java +++ b/x-pack/plugin/sql/qa/server/security/src/test/java/org/elasticsearch/xpack/sql/qa/security/JdbcSecurityIT.java @@ -345,7 +345,6 @@ public void testMetadataGetColumnsSingleFieldExcepted() throws Exception { } } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/105840") public void testMetadataGetColumnsDocumentExcluded() throws Exception { createUser("no_3s", "read_test_without_c_3"); diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/190_lookup_join.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/190_lookup_join.yml index 7c1606b90258c..d01ae6e3c44a1 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/190_lookup_join.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/190_lookup_join.yml @@ -276,7 +276,23 @@ lookup-no-key: - match: { error.type: "verification_exception" } - contains: { error.reason: "Unknown column [key] in right side of join" } +--- +lookup-no-key-only-key: + - requires: + capabilities: + - method: POST + path: /_query + parameters: [ ] + capabilities: [ enable_lookup_join_on_remote ] + reason: "recent lookup join fix" + - do: + esql.query: + body: + query: 'FROM test | LOOKUP JOIN test-lookup-no-key ON key | KEEP key' + catch: "bad_request" + - match: { error.type: "verification_exception" } + - contains: { error.reason: "Unknown column [key] in right side of join" } --- basic join on two fields: - requires: diff --git a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/230_folding.yml b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/230_folding.yml index 83236dff9163c..5bf382d67578f 100644 --- a/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/230_folding.yml +++ b/x-pack/plugin/src/yamlRestTest/resources/rest-api-spec/test/esql/230_folding.yml @@ -1,8 +1,13 @@ --- setup: - requires: + capabilities: + - method: POST + path: /_query + parameters: [ ] + capabilities: [ dense_vector_field_type ] test_runner_features: [ capabilities, contains ] - reason: "make sure new functions run where supported only" + reason: "make sure new functions run where supported only, dense vector needed for index creation" - do: indices.create: index: employees diff --git a/x-pack/plugin/watcher/qa/rest/src/javaRestTest/java/org/elasticsearch/smoketest/SmokeTestWatcherTestSuiteIT.java b/x-pack/plugin/watcher/qa/rest/src/javaRestTest/java/org/elasticsearch/smoketest/SmokeTestWatcherTestSuiteIT.java index d201ee13a05c8..cdda27bc4af0b 100644 --- a/x-pack/plugin/watcher/qa/rest/src/javaRestTest/java/org/elasticsearch/smoketest/SmokeTestWatcherTestSuiteIT.java +++ b/x-pack/plugin/watcher/qa/rest/src/javaRestTest/java/org/elasticsearch/smoketest/SmokeTestWatcherTestSuiteIT.java @@ -46,7 +46,6 @@ protected Settings restAdminSettings() { return Settings.builder().put(ThreadContext.PREFIX + ".Authorization", token).build(); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/52453") public void testMonitorClusterHealth() throws Exception { final String watchId = "cluster_health_watch"; diff --git a/x-pack/qa/oidc-op-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthIT.java b/x-pack/qa/oidc-op-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthIT.java index e80773d572b03..cd37d86626333 100644 --- a/x-pack/qa/oidc-op-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthIT.java +++ b/x-pack/qa/oidc-op-tests/src/javaRestTest/java/org/elasticsearch/xpack/security/authc/oidc/OpenIdConnectAuthIT.java @@ -121,7 +121,6 @@ public void testAuthenticateWithCodeFlowAndClientPost() throws Exception { verifyElasticsearchAccessTokenForCodeFlow(tokens.v1()); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/109871") public void testAuthenticateWithCodeFlowAndClientJwtPost() throws Exception { final PrepareAuthResponse prepareAuthResponse = getRedirectedFromFacilitator(REALM_NAME_CLIENT_JWT_AUTH); final String redirectUri = authenticateAtOP(prepareAuthResponse.getAuthUri()); diff --git a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/TransformSurvivesUpgradeIT.java b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/TransformSurvivesUpgradeIT.java index 87a9911bd80b6..ee9954429df1f 100644 --- a/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/TransformSurvivesUpgradeIT.java +++ b/x-pack/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/TransformSurvivesUpgradeIT.java @@ -69,7 +69,6 @@ protected RestClient buildClient(Settings settings, HttpHost[] hosts) throws IOE * The purpose of this test is to ensure that when a transform is running through a rolling upgrade it * keeps working and does not fail */ - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/84283") public void testTransformRollingUpgrade() throws Exception { Request adjustLoggingLevels = new Request("PUT", "/_cluster/settings"); adjustLoggingLevels.setJsonEntity(""" From 814101f11780e04053184867606f9c780ec1a4ca Mon Sep 17 00:00:00 2001 From: Pawan Kartik Date: Fri, 15 Aug 2025 12:31:59 +0100 Subject: [PATCH 2/9] Rename references to `skip_unavailable` --- .../action/search/TransportSearchAction.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java index 86a97405ca549..0233597033180 100644 --- a/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java +++ b/server/src/main/java/org/elasticsearch/action/search/TransportSearchAction.java @@ -1025,7 +1025,7 @@ protected void releaseResponse(SearchResponse searchResponse) { } /** - * Creates a new Cluster object using the {@link ShardSearchFailure} info and skip_unavailable + * Creates a new Cluster object using the {@link ShardSearchFailure} info and shouldSkipOnFailure * flag to set Status. Then it swaps it in the clusters CHM at key clusterAlias */ static void ccsClusterInfoUpdate( @@ -1063,8 +1063,8 @@ private static void ccsClusterInfoUpdate( ) { /* * Cluster Status logic: - * 1) FAILED if total_shards > 0 && all shards failed && skip_unavailable=false - * 2) SKIPPED if total_shards > 0 && all shards failed && skip_unavailable=true + * 1) FAILED if total_shards > 0 && all shards failed && shouldSkipOnFailure=false + * 2) SKIPPED if total_shards > 0 && all shards failed && shouldSkipOnFailure=true * 3) PARTIAL if it timed out * 4) PARTIAL if it at least one of the shards succeeded but not all * 5) SUCCESSFUL if no shards failed (and did not time out) @@ -1862,7 +1862,7 @@ protected void releaseResponse(FinalResponse response) {} * causes of shard failures. * @param f ShardSearchFailure to log * @param clusterAlias cluster on which the failure occurred - * @param shouldSkipOnFailure the skip_unavailable setting of the cluster with the search error + * @param shouldSkipOnFailure the shouldSkipOnFailure setting of the cluster with the search error */ private static void logCCSError(ShardSearchFailure f, String clusterAlias, boolean shouldSkipOnFailure) { String errorInfo; @@ -1873,7 +1873,7 @@ private static void logCCSError(ShardSearchFailure f, String clusterAlias, boole errorInfo = f.toString(); } logger.debug( - "CCS remote cluster failure. Cluster [{}]. skip_unavailable: [{}]. Error: {}", + "CCS remote cluster failure. Cluster [{}]. shouldSkipOnFailure: [{}]. Error: {}", clusterAlias, shouldSkipOnFailure, errorInfo From ca07c1c44652fe512f3c8a5c8324eefd092f5867 Mon Sep 17 00:00:00 2001 From: Pawan Kartik Date: Fri, 15 Aug 2025 13:16:33 +0100 Subject: [PATCH 3/9] Free resource after test --- .../search/ccs/CpsDoesNotUseSkipUnavailableIT.java | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java index a49c624fb6edb..0af53fb6bb986 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java @@ -101,6 +101,8 @@ public void testCpsShouldNotUseSkipUnavailable() { linkedClusterFailures.getFirst().getCause(), Matchers.anyOf(Matchers.instanceOf(RemoteTransportException.class), Matchers.instanceOf(ConnectTransportException.class)) ); + + result.decRef(); } /* From 205a638640a59d708d16cbe24e3c8ca5a0389648 Mon Sep 17 00:00:00 2001 From: Pawan Kartik Date: Mon, 18 Aug 2025 16:44:34 +0100 Subject: [PATCH 4/9] Add a TODO comment to explain why `serverless.cross_project.enabled` setting is used --- .../org/elasticsearch/transport/RemoteClusterService.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java index e281b6d905229..016f348b1fab3 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java @@ -178,6 +178,10 @@ public boolean isRemoteClusterServerEnabled() { if (remoteClusterServerEnabled) { registerRemoteClusterHandshakeRequestHandler(transportService); } + /* + * TODO: This is not the right way to check if we're in CPS context and is more of a temporary measure since + * the functionality to do it the right way is not yet ready -- replace this code when it's ready. + */ this.inSkippableContext = settings.getAsBoolean("serverless.cross_project.enabled", false); } From 30fd5be9ce840a7950e081f2d3bd003181f98dbb Mon Sep 17 00:00:00 2001 From: Pawan Kartik Date: Tue, 19 Aug 2025 17:45:18 +0100 Subject: [PATCH 5/9] `isSkipUnavailable()` returns `Optional` --- .../TransportResolveClusterAction.java | 2 +- .../TransportFieldCapabilitiesAction.java | 2 +- .../transport/RemoteClusterService.java | 29 ++++++++++++------- .../transport/RemoteClusterServiceTests.java | 6 ++-- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterAction.java index 0e0406cc8e74c..4dae6c5c201e8 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterAction.java @@ -174,7 +174,7 @@ protected void doExecuteForked(Task task, ResolveClusterActionRequest request, A resolveClusterTask.ensureNotCancelled(); String clusterAlias = remoteIndices.getKey(); OriginalIndices originalIndices = remoteIndices.getValue(); - boolean skipUnavailable = remoteClusterService.isSkipUnavailable(clusterAlias); + boolean skipUnavailable = remoteClusterService.isSkipUnavailable(clusterAlias).orElse(true) == false; RemoteClusterClient remoteClusterClient = remoteClusterService.getRemoteClusterClient( clusterAlias, searchCoordinationExecutor, diff --git a/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java b/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java index dde4b81af94d2..9069e7439ad7a 100644 --- a/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java +++ b/server/src/main/java/org/elasticsearch/action/fieldcaps/TransportFieldCapabilitiesAction.java @@ -332,7 +332,7 @@ private void doExecuteForked( ); boolean ensureConnected = forceConnectTimeoutSecs != null - || transportService.getRemoteClusterService().isSkipUnavailable(clusterAlias) == false; + || transportService.getRemoteClusterService().isSkipUnavailable(clusterAlias).orElse(true) == false; transportService.getRemoteClusterService() .maybeEnsureConnectedAndGetConnection(clusterAlias, ensureConnected, connectionListener); } diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java index 016f348b1fab3..3b73540a90f64 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java @@ -48,6 +48,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.Executor; @@ -160,7 +161,7 @@ public boolean isRemoteClusterServerEnabled() { private final Map> remoteClusters; private final RemoteClusterCredentialsManager remoteClusterCredentialsManager; private final ProjectResolver projectResolver; - private final boolean inSkippableContext; + private final boolean isCpsEnabled; @FixForMultiProject(description = "Inject the ProjectResolver instance.") RemoteClusterService(Settings settings, TransportService transportService) { @@ -182,7 +183,7 @@ public boolean isRemoteClusterServerEnabled() { * TODO: This is not the right way to check if we're in CPS context and is more of a temporary measure since * the functionality to do it the right way is not yet ready -- replace this code when it's ready. */ - this.inSkippableContext = settings.getAsBoolean("serverless.cross_project.enabled", false); + this.isCpsEnabled = settings.getAsBoolean("serverless.cross_project.enabled", false); } /** @@ -293,22 +294,28 @@ void ensureConnected(String clusterAlias, ActionListener listener) { } /** - * Returns whether the cluster identified by the provided alias is configured to be skipped when unavailable + * Returns whether the cluster identified by the provided alias is configured to be skipped when unavailable. + * @param clusterAlias Name of the cluster + * @return A boolean optional that denotes if the cluster is configured to be skipped. In CPS-like environment, + * it returns an empty value where we default/fall back to true. */ - public boolean isSkipUnavailable(String clusterAlias) { - return getRemoteClusterConnection(clusterAlias).isSkipUnavailable(); + public Optional isSkipUnavailable(String clusterAlias) { + if (isCpsEnabled) { + return Optional.empty(); + } else { + return Optional.of(getRemoteClusterConnection(clusterAlias).isSkipUnavailable()); + } } /** - * Returns whether we're in a skippable context. Skippable context is true when either in CPS environment - * or skip_unavailable is set to true for the specified cluster. + * Signifies if an error can be skipped for the specified cluster based on skip_unavailable, or, + * allow_partial_search_results if in CPS-like environment. * @param clusterAlias Name of the cluster * @param allowPartialSearchResults If partial results can be served for the search request. * @return boolean */ public boolean shouldSkipOnFailure(String clusterAlias, Boolean allowPartialSearchResults) { - return (inSkippableContext && (allowPartialSearchResults != null && allowPartialSearchResults)) - || getRemoteClusterConnection(clusterAlias).isSkipUnavailable(); + return isSkipUnavailable(clusterAlias).orElseGet(() -> allowPartialSearchResults != null && allowPartialSearchResults); } public Transport.Connection getConnection(String cluster) { @@ -675,7 +682,9 @@ public RemoteClusterClient getRemoteClusterClient( return new RemoteClusterAwareClient(transportService, clusterAlias, responseExecutor, switch (disconnectedStrategy) { case RECONNECT_IF_DISCONNECTED -> true; case FAIL_IF_DISCONNECTED -> false; - case RECONNECT_UNLESS_SKIP_UNAVAILABLE -> transportService.getRemoteClusterService().isSkipUnavailable(clusterAlias) == false; + case RECONNECT_UNLESS_SKIP_UNAVAILABLE -> transportService.getRemoteClusterService() + .isSkipUnavailable(clusterAlias) + .orElse(true) == false; }); } diff --git a/server/src/test/java/org/elasticsearch/transport/RemoteClusterServiceTests.java b/server/src/test/java/org/elasticsearch/transport/RemoteClusterServiceTests.java index 4fb0cb097b9d2..cf6764db5b47f 100644 --- a/server/src/test/java/org/elasticsearch/transport/RemoteClusterServiceTests.java +++ b/server/src/test/java/org/elasticsearch/transport/RemoteClusterServiceTests.java @@ -1370,15 +1370,15 @@ public void testSkipUnavailable() { service.start(); service.acceptIncomingRequests(); - assertTrue(service.getRemoteClusterService().isSkipUnavailable("cluster1")); + assertTrue(service.getRemoteClusterService().isSkipUnavailable("cluster1").orElse(true)); if (randomBoolean()) { updateSkipUnavailable(service.getRemoteClusterService(), "cluster1", false); - assertFalse(service.getRemoteClusterService().isSkipUnavailable("cluster1")); + assertFalse(service.getRemoteClusterService().isSkipUnavailable("cluster1").orElse(true)); } updateSkipUnavailable(service.getRemoteClusterService(), "cluster1", true); - assertTrue(service.getRemoteClusterService().isSkipUnavailable("cluster1")); + assertTrue(service.getRemoteClusterService().isSkipUnavailable("cluster1").orElse(true)); } } } From 6254827638b2297484ec9f423c4a71b8bced75bb Mon Sep 17 00:00:00 2001 From: Pawan Kartik Date: Tue, 19 Aug 2025 18:05:40 +0100 Subject: [PATCH 6/9] Fix build --- .../xpack/esql/enrich/EnrichPolicyResolver.java | 4 ++-- .../xpack/esql/plugin/TransportEsqlQueryAction.java | 5 ++++- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java index af008771f1068..f6a6b6065520f 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/enrich/EnrichPolicyResolver.java @@ -347,7 +347,7 @@ public void onFailure(Exception e) { } private void failIfSkipUnavailableFalse(Exception e, String cluster, ActionListener lookupListener) { - if (ExceptionsHelper.isRemoteUnavailableException(e) && remoteClusterService.isSkipUnavailable(cluster)) { + if (ExceptionsHelper.isRemoteUnavailableException(e) && remoteClusterService.isSkipUnavailable(cluster).orElse(true)) { lookupListener.onResponse(new LookupResponse(e)); } else { lookupListener.onFailure(e); @@ -465,7 +465,7 @@ protected Map availablePolicies() { protected void getRemoteConnection(String cluster, ActionListener listener) { remoteClusterService.maybeEnsureConnectedAndGetConnection( cluster, - remoteClusterService.isSkipUnavailable(cluster) == false, + remoteClusterService.isSkipUnavailable(cluster).orElse(true) == false, listener ); } diff --git a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java index a23154c218a61..43fd7e8dc8077 100644 --- a/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java +++ b/x-pack/plugin/esql/src/main/java/org/elasticsearch/xpack/esql/plugin/TransportEsqlQueryAction.java @@ -333,7 +333,10 @@ private EsqlExecutionInfo getOrCreateExecutionInfo(Task task, EsqlQueryRequest r } private EsqlExecutionInfo createEsqlExecutionInfo(EsqlQueryRequest request) { - return new EsqlExecutionInfo(clusterAlias -> remoteClusterService.isSkipUnavailable(clusterAlias), request.includeCCSMetadata()); + return new EsqlExecutionInfo( + clusterAlias -> remoteClusterService.isSkipUnavailable(clusterAlias).orElse(true), + request.includeCCSMetadata() + ); } private EsqlQueryResponse toResponse(Task task, EsqlQueryRequest request, Configuration configuration, Result result) { From adbc646668d1b46cfe3262935108031035982a31 Mon Sep 17 00:00:00 2001 From: Pawan Kartik Date: Wed, 20 Aug 2025 11:19:48 +0100 Subject: [PATCH 7/9] Fix tests and address review comment --- .../indices/resolve/TransportResolveClusterAction.java | 2 +- .../org/elasticsearch/transport/RemoteClusterService.java | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterAction.java b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterAction.java index 4dae6c5c201e8..7eb3670dccb53 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/resolve/TransportResolveClusterAction.java @@ -174,7 +174,7 @@ protected void doExecuteForked(Task task, ResolveClusterActionRequest request, A resolveClusterTask.ensureNotCancelled(); String clusterAlias = remoteIndices.getKey(); OriginalIndices originalIndices = remoteIndices.getValue(); - boolean skipUnavailable = remoteClusterService.isSkipUnavailable(clusterAlias).orElse(true) == false; + boolean skipUnavailable = remoteClusterService.isSkipUnavailable(clusterAlias).orElse(true); RemoteClusterClient remoteClusterClient = remoteClusterService.getRemoteClusterClient( clusterAlias, searchCoordinationExecutor, diff --git a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java index 3b73540a90f64..ef0989fb4d47f 100644 --- a/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java +++ b/server/src/main/java/org/elasticsearch/transport/RemoteClusterService.java @@ -161,7 +161,7 @@ public boolean isRemoteClusterServerEnabled() { private final Map> remoteClusters; private final RemoteClusterCredentialsManager remoteClusterCredentialsManager; private final ProjectResolver projectResolver; - private final boolean isCpsEnabled; + private final boolean canUseSkipUnavailable; @FixForMultiProject(description = "Inject the ProjectResolver instance.") RemoteClusterService(Settings settings, TransportService transportService) { @@ -183,7 +183,7 @@ public boolean isRemoteClusterServerEnabled() { * TODO: This is not the right way to check if we're in CPS context and is more of a temporary measure since * the functionality to do it the right way is not yet ready -- replace this code when it's ready. */ - this.isCpsEnabled = settings.getAsBoolean("serverless.cross_project.enabled", false); + this.canUseSkipUnavailable = settings.getAsBoolean("serverless.cross_project.enabled", false) == false; } /** @@ -300,7 +300,7 @@ void ensureConnected(String clusterAlias, ActionListener listener) { * it returns an empty value where we default/fall back to true. */ public Optional isSkipUnavailable(String clusterAlias) { - if (isCpsEnabled) { + if (canUseSkipUnavailable == false) { return Optional.empty(); } else { return Optional.of(getRemoteClusterConnection(clusterAlias).isSkipUnavailable()); From d57f0fac8e6b81d3b5081a8e81522876284e8240 Mon Sep 17 00:00:00 2001 From: Pawan Kartik Date: Wed, 20 Aug 2025 12:57:39 +0100 Subject: [PATCH 8/9] Use `assertResponse()` in test --- .../ccs/CpsDoesNotUseSkipUnavailableIT.java | 38 ++++++++++--------- 1 file changed, 20 insertions(+), 18 deletions(-) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java index 0af53fb6bb986..6d6c5357d74c7 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java @@ -28,6 +28,7 @@ import java.util.concurrent.ExecutionException; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; public class CpsDoesNotUseSkipUnavailableIT extends AbstractMultiClustersTestCase { private static final String LINKED_CLUSTER_1 = "cluster-a"; @@ -65,7 +66,7 @@ protected Map skipUnavailableForRemoteClusters() { return Map.of(LINKED_CLUSTER_1, false); } - public void testCpsShouldNotUseSkipUnavailable() { + public void testCpsShouldNotUseSkipUnavailable() throws Exception { // Add some dummy data to prove we are communicating fine with the remote. assertAcked(client(LINKED_CLUSTER_1).admin().indices().prepareCreate("test-index")); client(LINKED_CLUSTER_1).prepareIndex("test-index").setSource("sample-field", "sample-value").get(); @@ -86,23 +87,24 @@ public void testCpsShouldNotUseSkipUnavailable() { { var searchRequest = getSearchRequest(true); searchRequest.setCcsMinimizeRoundtrips(randomBoolean()); - var result = safeGet(client().execute(TransportSearchAction.TYPE, searchRequest)); - - var originCluster = result.getClusters().getCluster(LOCAL_CLUSTER); - assertThat(originCluster.getStatus(), Matchers.is(SearchResponse.Cluster.Status.SUCCESSFUL)); - - var linkedCluster = result.getClusters().getCluster(LINKED_CLUSTER_1); - assertThat(linkedCluster.getStatus(), Matchers.is(SearchResponse.Cluster.Status.SKIPPED)); - - var linkedClusterFailures = result.getClusters().getCluster(LINKED_CLUSTER_1).getFailures(); - assertThat(linkedClusterFailures.size(), Matchers.is(1)); - // Failure is something along the lines of shard failure and is caused by a connection error. - assertThat( - linkedClusterFailures.getFirst().getCause(), - Matchers.anyOf(Matchers.instanceOf(RemoteTransportException.class), Matchers.instanceOf(ConnectTransportException.class)) - ); - - result.decRef(); + assertResponse(client().execute(TransportSearchAction.TYPE, searchRequest), result -> { + var originCluster = result.getClusters().getCluster(LOCAL_CLUSTER); + assertThat(originCluster.getStatus(), Matchers.is(SearchResponse.Cluster.Status.SUCCESSFUL)); + + var linkedCluster = result.getClusters().getCluster(LINKED_CLUSTER_1); + assertThat(linkedCluster.getStatus(), Matchers.is(SearchResponse.Cluster.Status.SKIPPED)); + + var linkedClusterFailures = result.getClusters().getCluster(LINKED_CLUSTER_1).getFailures(); + assertThat(linkedClusterFailures.size(), Matchers.is(1)); + // Failure is something along the lines of shard failure and is caused by a connection error. + assertThat( + linkedClusterFailures.getFirst().getCause(), + Matchers.anyOf( + Matchers.instanceOf(RemoteTransportException.class), + Matchers.instanceOf(ConnectTransportException.class) + ) + ); + }); } /* From 06239ad45cdab0f8de5309bdd79099511500776f Mon Sep 17 00:00:00 2001 From: Pawan Kartik Date: Wed, 20 Aug 2025 14:55:38 +0100 Subject: [PATCH 9/9] Add TODO comment as per review comment --- .../elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java index 6d6c5357d74c7..5345975d43417 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/ccs/CpsDoesNotUseSkipUnavailableIT.java @@ -30,6 +30,7 @@ import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertResponse; +// TODO: Move this test to the Serverless repo once the IT framework is ready there. public class CpsDoesNotUseSkipUnavailableIT extends AbstractMultiClustersTestCase { private static final String LINKED_CLUSTER_1 = "cluster-a";