diff --git a/.backportrc.json b/.backportrc.json index 59843f4d5f134..53275ddc8563d 100644 --- a/.backportrc.json +++ b/.backportrc.json @@ -1,9 +1,9 @@ { "upstream" : "elastic/elasticsearch", - "targetBranchChoices" : [ "main", "8.14", "8.13", "8.12", "8.11", "8.10", "8.9", "8.8", "8.7", "8.6", "8.5", "8.4", "8.3", "8.2", "8.1", "8.0", "7.17", "6.8" ], + "targetBranchChoices" : [ "main", "8.15", "8.14", "8.13", "8.12", "8.11", "8.10", "8.9", "8.8", "8.7", "8.6", "8.5", "8.4", "8.3", "8.2", "8.1", "8.0", "7.17", "6.8" ], "targetPRLabels" : [ "backport" ], "branchLabelMapping" : { - "^v8.15.0$" : "main", + "^v8.15.3$" : "main", "^v(\\d+).(\\d+).\\d+(?:-(?:alpha|beta|rc)\\d+)?$" : "$1.$2" } } \ No newline at end of file diff --git a/.buildkite/hooks/pre-command b/.buildkite/hooks/pre-command index b6b730fc3de8b..a47079a98e10d 100644 --- a/.buildkite/hooks/pre-command +++ b/.buildkite/hooks/pre-command @@ -105,3 +105,11 @@ EOF Agent information from gobld EOF fi + +# Amazon Linux 2 has DNS resolution issues with resource-based hostnames in EC2 +# We have many functional tests that try to lookup and resolve the hostname of the local machine in a particular way +# And they fail. This sets up a manual entry for the hostname in dnsmasq. +if [[ -f /etc/os-release ]] && grep -q '"Amazon Linux 2"' /etc/os-release; then + echo "$(hostname -i | cut -d' ' -f 2) $(hostname -f)." | sudo tee /etc/dnsmasq.hosts + sudo systemctl restart dnsmasq.service +fi diff --git a/.buildkite/pipelines/dra-workflow.yml b/.buildkite/pipelines/dra-workflow.yml index 32a2b7d22134a..bcc6c9c57d756 100644 --- a/.buildkite/pipelines/dra-workflow.yml +++ b/.buildkite/pipelines/dra-workflow.yml @@ -7,7 +7,7 @@ steps: image: family/elasticsearch-ubuntu-2204 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 + diskSizeGb: 350 - wait # The hadoop build depends on the ES artifact # So let's trigger the hadoop build any time we build a new staging artifact diff --git a/.buildkite/pipelines/intake.template.yml b/.buildkite/pipelines/intake.template.yml index 1a513971b2c10..f530f237113a9 100644 --- a/.buildkite/pipelines/intake.template.yml +++ b/.buildkite/pipelines/intake.template.yml @@ -7,7 +7,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait - label: part1 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart1 @@ -17,7 +16,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part2 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart2 timeout_in_minutes: 300 @@ -26,7 +24,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part3 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart3 timeout_in_minutes: 300 @@ -35,7 +32,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part4 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart4 timeout_in_minutes: 300 @@ -44,7 +40,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part5 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5 timeout_in_minutes: 300 @@ -53,7 +48,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - group: bwc-snapshots steps: - label: "{{matrix.BWC_VERSION}} / bwc-snapshots" @@ -67,7 +61,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: "{{matrix.BWC_VERSION}}" - label: rest-compat @@ -78,7 +71,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait - trigger: elasticsearch-dra-workflow label: Trigger DRA snapshot workflow diff --git a/.buildkite/pipelines/intake.yml b/.buildkite/pipelines/intake.yml index 4124d4e550d11..afb7b7b522193 100644 --- a/.buildkite/pipelines/intake.yml +++ b/.buildkite/pipelines/intake.yml @@ -8,7 +8,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait - label: part1 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart1 @@ -18,7 +17,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part2 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart2 timeout_in_minutes: 300 @@ -27,7 +25,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part3 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart3 timeout_in_minutes: 300 @@ -36,7 +33,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part4 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart4 timeout_in_minutes: 300 @@ -45,7 +41,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part5 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5 timeout_in_minutes: 300 @@ -54,7 +49,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - group: bwc-snapshots steps: - label: "{{matrix.BWC_VERSION}} / bwc-snapshots" @@ -62,13 +56,12 @@ steps: timeout_in_minutes: 300 matrix: setup: - BWC_VERSION: ["7.17.23", "8.14.2", "8.15.0"] + BWC_VERSION: ["7.17.25", "8.15.3"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: "{{matrix.BWC_VERSION}}" - label: rest-compat @@ -79,7 +72,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait - trigger: elasticsearch-dra-workflow label: Trigger DRA snapshot workflow diff --git a/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml b/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml index 1f69b8faa7ab4..8cf2a8aacbece 100644 --- a/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml +++ b/.buildkite/pipelines/lucene-snapshot/build-snapshot.yml @@ -15,7 +15,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait - trigger: "elasticsearch-lucene-snapshot-tests" build: diff --git a/.buildkite/pipelines/lucene-snapshot/run-tests.yml b/.buildkite/pipelines/lucene-snapshot/run-tests.yml index 49c3396488d82..c76c54a56494e 100644 --- a/.buildkite/pipelines/lucene-snapshot/run-tests.yml +++ b/.buildkite/pipelines/lucene-snapshot/run-tests.yml @@ -7,7 +7,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - wait: null - label: part1 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart1 @@ -17,7 +16,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part2 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart2 timeout_in_minutes: 300 @@ -26,7 +24,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part3 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart3 timeout_in_minutes: 300 @@ -35,7 +32,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part4 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart4 timeout_in_minutes: 300 @@ -44,7 +40,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: part5 command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true -Dorg.elasticsearch.build.cache.push=true -Dignore.tests.seed -Dscan.capture-task-input-files checkPart5 timeout_in_minutes: 300 @@ -53,7 +48,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - group: bwc-snapshots steps: - label: "{{matrix.BWC_VERSION}} / bwc-snapshots" @@ -70,7 +64,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: "{{matrix.BWC_VERSION}}" - label: rest-compat @@ -81,4 +74,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/periodic-packaging.bwc.template.yml b/.buildkite/pipelines/periodic-packaging.bwc.template.yml index 8a6fa2553b204..b06bc80d3535d 100644 --- a/.buildkite/pipelines/periodic-packaging.bwc.template.yml +++ b/.buildkite/pipelines/periodic-packaging.bwc.template.yml @@ -11,6 +11,5 @@ image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: $BWC_VERSION diff --git a/.buildkite/pipelines/periodic-packaging.template.yml b/.buildkite/pipelines/periodic-packaging.template.yml index 64c5fa5060e6c..14a2fd7ba1bc4 100644 --- a/.buildkite/pipelines/periodic-packaging.template.yml +++ b/.buildkite/pipelines/periodic-packaging.template.yml @@ -7,8 +7,6 @@ steps: matrix: setup: image: - - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 diff --git a/.buildkite/pipelines/periodic-packaging.yml b/.buildkite/pipelines/periodic-packaging.yml index 4217fc91bf0fd..70128211a39c0 100644 --- a/.buildkite/pipelines/periodic-packaging.yml +++ b/.buildkite/pipelines/periodic-packaging.yml @@ -8,8 +8,6 @@ steps: matrix: setup: image: - - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 @@ -46,7 +44,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.0.1 @@ -63,7 +60,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.1.1 @@ -80,7 +76,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.2.1 @@ -97,7 +92,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.3.2 @@ -114,7 +108,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.4.2 @@ -131,7 +124,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.5.2 @@ -148,7 +140,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.6.2 @@ -165,7 +156,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.7.1 @@ -182,7 +172,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.8.1 @@ -199,7 +188,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.9.3 @@ -216,7 +204,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.10.2 @@ -233,7 +220,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.11.2 @@ -250,7 +236,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.12.1 @@ -267,7 +252,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.13.4 @@ -284,7 +268,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.14.2 @@ -301,7 +284,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.15.2 @@ -318,12 +300,11 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 7.16.3 - - label: "{{matrix.image}} / 7.17.23 / packaging-tests-upgrade" - command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v7.17.23 + - label: "{{matrix.image}} / 7.17.25 / packaging-tests-upgrade" + command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v7.17.25 timeout_in_minutes: 300 matrix: setup: @@ -335,9 +316,8 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: - BWC_VERSION: 7.17.23 + BWC_VERSION: 7.17.25 - label: "{{matrix.image}} / 8.0.1 / packaging-tests-upgrade" command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.0.1 @@ -352,7 +332,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.0.1 @@ -369,7 +348,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.1.3 @@ -386,7 +364,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.2.3 @@ -403,7 +380,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.3.3 @@ -420,7 +396,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.4.3 @@ -437,7 +412,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.5.3 @@ -454,7 +428,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.6.2 @@ -471,7 +444,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.7.1 @@ -488,7 +460,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.8.2 @@ -505,7 +476,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.9.2 @@ -522,7 +492,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.10.4 @@ -539,7 +508,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.11.4 @@ -556,7 +524,6 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.12.2 @@ -573,12 +540,11 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: 8.13.4 - - label: "{{matrix.image}} / 8.14.2 / packaging-tests-upgrade" - command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.14.2 + - label: "{{matrix.image}} / 8.14.3 / packaging-tests-upgrade" + command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.14.3 timeout_in_minutes: 300 matrix: setup: @@ -590,12 +556,11 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: - BWC_VERSION: 8.14.2 + BWC_VERSION: 8.14.3 - - label: "{{matrix.image}} / 8.15.0 / packaging-tests-upgrade" - command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.15.0 + - label: "{{matrix.image}} / 8.15.3 / packaging-tests-upgrade" + command: ./.ci/scripts/packaging-test.sh -Dbwc.checkout.align=true destructiveDistroUpgradeTest.v8.15.3 timeout_in_minutes: 300 matrix: setup: @@ -607,9 +572,8 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: - BWC_VERSION: 8.15.0 + BWC_VERSION: 8.15.3 - group: packaging-tests-windows steps: diff --git a/.buildkite/pipelines/periodic-platform-support.yml b/.buildkite/pipelines/periodic-platform-support.yml index 867ebe41ed6af..86e0623ba5b87 100644 --- a/.buildkite/pipelines/periodic-platform-support.yml +++ b/.buildkite/pipelines/periodic-platform-support.yml @@ -7,8 +7,6 @@ steps: matrix: setup: image: - - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 @@ -30,7 +28,6 @@ steps: localSsds: 1 localSsdInterface: nvme machineType: custom-32-98304 - diskSizeGb: 250 env: {} - group: platform-support-windows steps: diff --git a/.buildkite/pipelines/periodic.bwc.template.yml b/.buildkite/pipelines/periodic.bwc.template.yml index b22270dbf221c..43a0a7438d656 100644 --- a/.buildkite/pipelines/periodic.bwc.template.yml +++ b/.buildkite/pipelines/periodic.bwc.template.yml @@ -7,7 +7,6 @@ machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: $BWC_VERSION retry: diff --git a/.buildkite/pipelines/periodic.template.yml b/.buildkite/pipelines/periodic.template.yml index 87e30a0ea73ba..110226d44c90e 100644 --- a/.buildkite/pipelines/periodic.template.yml +++ b/.buildkite/pipelines/periodic.template.yml @@ -25,7 +25,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: example-plugins command: |- cd $$WORKSPACE/plugins/examples @@ -37,7 +36,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - group: java-fips-matrix steps: - label: "{{matrix.ES_RUNTIME_JAVA}} / {{matrix.GRADLE_TASK}} / java-fips-matrix" @@ -59,7 +57,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" GRADLE_TASK: "{{matrix.GRADLE_TASK}}" @@ -76,7 +73,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" BWC_VERSION: "{{matrix.BWC_VERSION}}" @@ -105,7 +101,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" GRADLE_TASK: "{{matrix.GRADLE_TASK}}" @@ -126,7 +121,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" BWC_VERSION: "{{matrix.BWC_VERSION}}" @@ -162,7 +156,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / azure command: | export azure_storage_container=elasticsearch-ci-thirdparty @@ -177,7 +170,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / gcs command: | export google_storage_bucket=elasticsearch-ci-thirdparty @@ -192,7 +184,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / geoip command: | .ci/scripts/run-gradle.sh :modules:ingest-geoip:internalClusterTest -Dtests.jvm.argline="-Dgeoip_use_service=true" @@ -202,7 +193,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / s3 command: | export amazon_s3_bucket=elasticsearch-ci.us-west-2 @@ -217,7 +207,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: Upload Snyk Dependency Graph command: .ci/scripts/run-gradle.sh uploadSnykDependencyGraph -PsnykTargetReference=$BUILDKITE_BRANCH env: @@ -228,8 +217,7 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - if: build.branch == "main" || build.branch == "7.17" + if: build.branch == "main" || build.branch == "8.x" || build.branch == "7.17" - label: check-branch-consistency command: .ci/scripts/run-gradle.sh branchConsistency timeout_in_minutes: 15 @@ -237,7 +225,6 @@ steps: provider: gcp image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-2 - diskSizeGb: 250 - label: check-branch-protection-rules command: .buildkite/scripts/branch-protection.sh timeout_in_minutes: 5 diff --git a/.buildkite/pipelines/periodic.yml b/.buildkite/pipelines/periodic.yml index 06e7ffbc8fb1c..f7796f51d624b 100644 --- a/.buildkite/pipelines/periodic.yml +++ b/.buildkite/pipelines/periodic.yml @@ -11,7 +11,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.0.1 retry: @@ -31,7 +30,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.1.1 retry: @@ -51,7 +49,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.2.1 retry: @@ -71,7 +68,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.3.2 retry: @@ -91,7 +87,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.4.2 retry: @@ -111,7 +106,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.5.2 retry: @@ -131,7 +125,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.6.2 retry: @@ -151,7 +144,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.7.1 retry: @@ -171,7 +163,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.8.1 retry: @@ -191,7 +182,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.9.3 retry: @@ -211,7 +201,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.10.2 retry: @@ -231,7 +220,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.11.2 retry: @@ -251,7 +239,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.12.1 retry: @@ -271,7 +258,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.13.4 retry: @@ -291,7 +277,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.14.2 retry: @@ -311,7 +296,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.15.2 retry: @@ -331,7 +315,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 7.16.3 retry: @@ -342,8 +325,8 @@ steps: - signal_reason: agent_stop limit: 3 - - label: 7.17.23 / bwc - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v7.17.23#bwcTest + - label: 7.17.25 / bwc + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v7.17.25#bwcTest timeout_in_minutes: 300 agents: provider: gcp @@ -351,9 +334,8 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: - BWC_VERSION: 7.17.23 + BWC_VERSION: 7.17.25 retry: automatic: - exit_status: "-1" @@ -371,7 +353,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.0.1 retry: @@ -391,7 +372,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.1.3 retry: @@ -411,7 +391,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.2.3 retry: @@ -431,7 +410,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.3.3 retry: @@ -451,7 +429,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.4.3 retry: @@ -471,7 +448,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.5.3 retry: @@ -491,7 +467,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.6.2 retry: @@ -511,7 +486,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.7.1 retry: @@ -531,7 +505,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.8.2 retry: @@ -551,7 +524,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.9.2 retry: @@ -571,7 +543,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.10.4 retry: @@ -591,7 +562,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.11.4 retry: @@ -611,7 +581,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.12.2 retry: @@ -631,7 +600,6 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: BWC_VERSION: 8.13.4 retry: @@ -642,8 +610,8 @@ steps: - signal_reason: agent_stop limit: 3 - - label: 8.14.2 / bwc - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.14.2#bwcTest + - label: 8.14.3 / bwc + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.14.3#bwcTest timeout_in_minutes: 300 agents: provider: gcp @@ -651,9 +619,8 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: - BWC_VERSION: 8.14.2 + BWC_VERSION: 8.14.3 retry: automatic: - exit_status: "-1" @@ -662,8 +629,8 @@ steps: - signal_reason: agent_stop limit: 3 - - label: 8.15.0 / bwc - command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.15.0#bwcTest + - label: 8.15.3 / bwc + command: .ci/scripts/run-gradle.sh -Dbwc.checkout.align=true v8.15.3#bwcTest timeout_in_minutes: 300 agents: provider: gcp @@ -671,9 +638,8 @@ steps: machineType: n1-standard-32 buildDirectory: /dev/shm/bk preemptible: true - diskSizeGb: 250 env: - BWC_VERSION: 8.15.0 + BWC_VERSION: 8.15.3 retry: automatic: - exit_status: "-1" @@ -706,7 +672,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: example-plugins command: |- cd $$WORKSPACE/plugins/examples @@ -718,7 +683,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - group: java-fips-matrix steps: - label: "{{matrix.ES_RUNTIME_JAVA}} / {{matrix.GRADLE_TASK}} / java-fips-matrix" @@ -740,7 +704,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" GRADLE_TASK: "{{matrix.GRADLE_TASK}}" @@ -751,13 +714,12 @@ steps: setup: ES_RUNTIME_JAVA: - openjdk17 - BWC_VERSION: ["7.17.23", "8.14.2", "8.15.0"] + BWC_VERSION: ["7.17.25", "8.15.3"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" BWC_VERSION: "{{matrix.BWC_VERSION}}" @@ -786,7 +748,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" GRADLE_TASK: "{{matrix.GRADLE_TASK}}" @@ -801,13 +762,12 @@ steps: - openjdk21 - openjdk22 - openjdk23 - BWC_VERSION: ["7.17.23", "8.14.2", "8.15.0"] + BWC_VERSION: ["7.17.25", "8.15.3"] agents: provider: gcp image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: ES_RUNTIME_JAVA: "{{matrix.ES_RUNTIME_JAVA}}" BWC_VERSION: "{{matrix.BWC_VERSION}}" @@ -843,7 +803,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / azure command: | export azure_storage_container=elasticsearch-ci-thirdparty @@ -858,7 +817,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / gcs command: | export google_storage_bucket=elasticsearch-ci-thirdparty @@ -873,7 +831,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / geoip command: | .ci/scripts/run-gradle.sh :modules:ingest-geoip:internalClusterTest -Dtests.jvm.argline="-Dgeoip_use_service=true" @@ -883,7 +840,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: third-party / s3 command: | export amazon_s3_bucket=elasticsearch-ci.us-west-2 @@ -898,7 +854,6 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - label: Upload Snyk Dependency Graph command: .ci/scripts/run-gradle.sh uploadSnykDependencyGraph -PsnykTargetReference=$BUILDKITE_BRANCH env: @@ -909,8 +864,7 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-8 buildDirectory: /dev/shm/bk - diskSizeGb: 250 - if: build.branch == "main" || build.branch == "7.17" + if: build.branch == "main" || build.branch == "8.x" || build.branch == "7.17" - label: check-branch-consistency command: .ci/scripts/run-gradle.sh branchConsistency timeout_in_minutes: 15 @@ -918,7 +872,6 @@ steps: provider: gcp image: family/elasticsearch-ubuntu-2004 machineType: n2-standard-2 - diskSizeGb: 250 - label: check-branch-protection-rules command: .buildkite/scripts/branch-protection.sh timeout_in_minutes: 5 diff --git a/.buildkite/pipelines/pull-request/build-benchmark.yml b/.buildkite/pipelines/pull-request/build-benchmark.yml index 96330bee03638..8d3215b8393ce 100644 --- a/.buildkite/pipelines/pull-request/build-benchmark.yml +++ b/.buildkite/pipelines/pull-request/build-benchmark.yml @@ -22,4 +22,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/bwc-snapshots-windows.yml b/.buildkite/pipelines/pull-request/bwc-snapshots-windows.yml deleted file mode 100644 index d37bdf380f926..0000000000000 --- a/.buildkite/pipelines/pull-request/bwc-snapshots-windows.yml +++ /dev/null @@ -1,20 +0,0 @@ -config: - allow-labels: test-windows -steps: - - group: bwc-snapshots-windows - steps: - - label: "{{matrix.BWC_VERSION}} / bwc-snapshots-windows" - key: "bwc-snapshots-windows" - command: .\.buildkite\scripts\run-script.ps1 bash .buildkite/scripts/windows-run-gradle.sh - env: - GRADLE_TASK: "v{{matrix.BWC_VERSION}}#bwcTest" - timeout_in_minutes: 300 - matrix: - setup: - BWC_VERSION: $SNAPSHOT_BWC_VERSIONS - agents: - provider: gcp - image: family/elasticsearch-windows-2022 - machineType: custom-32-98304 - diskType: pd-ssd - diskSizeGb: 350 diff --git a/.buildkite/pipelines/pull-request/bwc-snapshots.yml b/.buildkite/pipelines/pull-request/bwc-snapshots.yml index 8f59e593b286f..5a9fc2d938ac0 100644 --- a/.buildkite/pipelines/pull-request/bwc-snapshots.yml +++ b/.buildkite/pipelines/pull-request/bwc-snapshots.yml @@ -18,4 +18,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: n1-standard-32 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/cloud-deploy.yml b/.buildkite/pipelines/pull-request/cloud-deploy.yml index 2932f874c5cf8..ce8e8206d51ff 100644 --- a/.buildkite/pipelines/pull-request/cloud-deploy.yml +++ b/.buildkite/pipelines/pull-request/cloud-deploy.yml @@ -11,4 +11,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/docs-check.yml b/.buildkite/pipelines/pull-request/docs-check.yml index 3bf1e43697a7c..2201eb2d1e4ea 100644 --- a/.buildkite/pipelines/pull-request/docs-check.yml +++ b/.buildkite/pipelines/pull-request/docs-check.yml @@ -12,4 +12,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/eql-correctness.yml b/.buildkite/pipelines/pull-request/eql-correctness.yml index d85827d10e886..8f7ca6942c0e9 100644 --- a/.buildkite/pipelines/pull-request/eql-correctness.yml +++ b/.buildkite/pipelines/pull-request/eql-correctness.yml @@ -7,4 +7,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/example-plugins.yml b/.buildkite/pipelines/pull-request/example-plugins.yml index fb4a17fb214cb..18d0de6594980 100644 --- a/.buildkite/pipelines/pull-request/example-plugins.yml +++ b/.buildkite/pipelines/pull-request/example-plugins.yml @@ -16,4 +16,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/full-bwc.yml b/.buildkite/pipelines/pull-request/full-bwc.yml index c404069bd0e60..d3fa8eccaf7d9 100644 --- a/.buildkite/pipelines/pull-request/full-bwc.yml +++ b/.buildkite/pipelines/pull-request/full-bwc.yml @@ -13,4 +13,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/packaging-tests-unix.yml b/.buildkite/pipelines/pull-request/packaging-tests-unix.yml index d5c937aa4b5a2..e94baac8d9448 100644 --- a/.buildkite/pipelines/pull-request/packaging-tests-unix.yml +++ b/.buildkite/pipelines/pull-request/packaging-tests-unix.yml @@ -10,8 +10,6 @@ steps: matrix: setup: image: - - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 @@ -39,8 +37,6 @@ steps: matrix: setup: image: - - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 @@ -68,8 +64,6 @@ steps: matrix: setup: image: - - centos-7 - - debian-10 - debian-11 - opensuse-leap-15 - oraclelinux-7 diff --git a/.buildkite/pipelines/pull-request/packaging-upgrade-tests.yml b/.buildkite/pipelines/pull-request/packaging-upgrade-tests.yml index 970dafbb28647..c62cf23310422 100644 --- a/.buildkite/pipelines/pull-request/packaging-upgrade-tests.yml +++ b/.buildkite/pipelines/pull-request/packaging-upgrade-tests.yml @@ -18,6 +18,5 @@ steps: image: family/elasticsearch-{{matrix.image}} machineType: custom-16-32768 buildDirectory: /dev/shm/bk - diskSizeGb: 250 env: BWC_VERSION: $BWC_VERSION diff --git a/.buildkite/pipelines/pull-request/part-1-fips.yml b/.buildkite/pipelines/pull-request/part-1-fips.yml index 99544e7f5a80b..42f930c1bde9a 100644 --- a/.buildkite/pipelines/pull-request/part-1-fips.yml +++ b/.buildkite/pipelines/pull-request/part-1-fips.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-1.yml b/.buildkite/pipelines/pull-request/part-1.yml index b4b9d5469ec41..3d467c6c41e43 100644 --- a/.buildkite/pipelines/pull-request/part-1.yml +++ b/.buildkite/pipelines/pull-request/part-1.yml @@ -7,4 +7,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-2-fips.yml b/.buildkite/pipelines/pull-request/part-2-fips.yml index 36a9801547d78..6a3647ceb50ae 100644 --- a/.buildkite/pipelines/pull-request/part-2-fips.yml +++ b/.buildkite/pipelines/pull-request/part-2-fips.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-2.yml b/.buildkite/pipelines/pull-request/part-2.yml index 12bd78cf895fd..43de69bbcd945 100644 --- a/.buildkite/pipelines/pull-request/part-2.yml +++ b/.buildkite/pipelines/pull-request/part-2.yml @@ -7,4 +7,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-3-fips.yml b/.buildkite/pipelines/pull-request/part-3-fips.yml index 4a2df3026e782..cee3ea153acb9 100644 --- a/.buildkite/pipelines/pull-request/part-3-fips.yml +++ b/.buildkite/pipelines/pull-request/part-3-fips.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-3.yml b/.buildkite/pipelines/pull-request/part-3.yml index 6991c05da85c6..12abae7634822 100644 --- a/.buildkite/pipelines/pull-request/part-3.yml +++ b/.buildkite/pipelines/pull-request/part-3.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-4-fips.yml b/.buildkite/pipelines/pull-request/part-4-fips.yml index 734f8af816895..11a50456ca4c0 100644 --- a/.buildkite/pipelines/pull-request/part-4-fips.yml +++ b/.buildkite/pipelines/pull-request/part-4-fips.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-4.yml b/.buildkite/pipelines/pull-request/part-4.yml index 59f2f2898a590..af11f08953d07 100644 --- a/.buildkite/pipelines/pull-request/part-4.yml +++ b/.buildkite/pipelines/pull-request/part-4.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-5-fips.yml b/.buildkite/pipelines/pull-request/part-5-fips.yml index 801b812bb99c0..4e193ac751086 100644 --- a/.buildkite/pipelines/pull-request/part-5-fips.yml +++ b/.buildkite/pipelines/pull-request/part-5-fips.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/part-5.yml b/.buildkite/pipelines/pull-request/part-5.yml index c7e50631d1cdd..306ce7533d0ed 100644 --- a/.buildkite/pipelines/pull-request/part-5.yml +++ b/.buildkite/pipelines/pull-request/part-5.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/precommit.yml b/.buildkite/pipelines/pull-request/precommit.yml index 8d1458b1b60c8..f6548dfeed9b2 100644 --- a/.buildkite/pipelines/pull-request/precommit.yml +++ b/.buildkite/pipelines/pull-request/precommit.yml @@ -10,4 +10,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/rest-compatibility.yml b/.buildkite/pipelines/pull-request/rest-compatibility.yml index 16144a2a0780f..a69810e23d960 100644 --- a/.buildkite/pipelines/pull-request/rest-compatibility.yml +++ b/.buildkite/pipelines/pull-request/rest-compatibility.yml @@ -9,4 +9,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.buildkite/pipelines/pull-request/validate-changelogs.yml b/.buildkite/pipelines/pull-request/validate-changelogs.yml index 296ef11637118..9451d321a9b39 100644 --- a/.buildkite/pipelines/pull-request/validate-changelogs.yml +++ b/.buildkite/pipelines/pull-request/validate-changelogs.yml @@ -7,4 +7,3 @@ steps: image: family/elasticsearch-ubuntu-2004 machineType: custom-32-98304 buildDirectory: /dev/shm/bk - diskSizeGb: 250 diff --git a/.ci/bwcVersions b/.ci/bwcVersions index bce556e9fc352..72da5539e7dd6 100644 --- a/.ci/bwcVersions +++ b/.ci/bwcVersions @@ -16,7 +16,7 @@ BWC_VERSION: - "7.14.2" - "7.15.2" - "7.16.3" - - "7.17.23" + - "7.17.25" - "8.0.1" - "8.1.3" - "8.2.3" @@ -31,5 +31,5 @@ BWC_VERSION: - "8.11.4" - "8.12.2" - "8.13.4" - - "8.14.2" - - "8.15.0" + - "8.14.3" + - "8.15.3" diff --git a/.ci/snapshotBwcVersions b/.ci/snapshotBwcVersions index 5fc4b6c072899..e7779e3fc1ce8 100644 --- a/.ci/snapshotBwcVersions +++ b/.ci/snapshotBwcVersions @@ -1,4 +1,3 @@ BWC_VERSION: - - "7.17.23" - - "8.14.2" - - "8.15.0" + - "7.17.25" + - "8.15.3" diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS deleted file mode 100644 index 0f7e3073ed022..0000000000000 --- a/.github/CODEOWNERS +++ /dev/null @@ -1,68 +0,0 @@ -# GitHub CODEOWNERS definition -# Identify which groups will be pinged by changes to different parts of the codebase. -# For more info, see https://help.github.com/articles/about-codeowners/ - -# Stack Monitoring index templates -x-pack/plugin/core/template-resources/src/main/resources/monitoring-alerts-7.json @elastic/stack-monitoring -x-pack/plugin/core/template-resources/src/main/resources/monitoring-beats-mb.json @elastic/stack-monitoring -x-pack/plugin/core/template-resources/src/main/resources/monitoring-beats.json @elastic/stack-monitoring -x-pack/plugin/core/template-resources/src/main/resources/monitoring-ent-search-mb.json @elastic/stack-monitoring -x-pack/plugin/core/template-resources/src/main/resources/monitoring-es-mb.json @elastic/stack-monitoring -x-pack/plugin/core/template-resources/src/main/resources/monitoring-es.json @elastic/stack-monitoring -x-pack/plugin/core/template-resources/src/main/resources/monitoring-kibana-mb.json @elastic/stack-monitoring -x-pack/plugin/core/template-resources/src/main/resources/monitoring-kibana.json @elastic/stack-monitoring -x-pack/plugin/core/template-resources/src/main/resources/monitoring-logstash-mb.json @elastic/stack-monitoring -x-pack/plugin/core/template-resources/src/main/resources/monitoring-logstash.json @elastic/stack-monitoring -x-pack/plugin/core/template-resources/src/main/resources/monitoring-mb-ilm-policy.json @elastic/stack-monitoring -x-pack/plugin/monitoring/src/main/java/org/elasticsearch/xpack/monitoring/MonitoringTemplateRegistry.java @elastic/stack-monitoring - -# Fleet -x-pack/plugin/fleet/src/main/java/org/elasticsearch/xpack/fleet @elastic/fleet -x-pack/plugin/core/src/main/resources/fleet-* @elastic/fleet - -# Logstash -libs/logstash-bridge @elastic/logstash - -# Kibana Security -x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/KibanaOwnedReservedRoleDescriptors.java @elastic/kibana-security - -# APM Data index templates, etc. -x-pack/plugin/apm-data/src/main/resources @elastic/apm-server -x-pack/plugin/apm-data/src/yamlRestTest/resources @elastic/apm-server - -# Delivery -gradle @elastic/es-delivery -build-conventions @elastic/es-delivery -build-tools @elastic/es-delivery -build-tools-internal @elastic/es-delivery -*.gradle @elastic/es-delivery -.buildkite @elastic/es-delivery -.ci @elastic/es-delivery -.idea @elastic/es-delivery -distribution/src @elastic/es-delivery -distribution/packages/src @elastic/es-delivery -distribution/docker/src @elastic/es-delivery - -# Core/Infra -distribution/tools @elastic/es-core-infra -libs/core @elastic/es-core-infra -libs/logging @elastic/es-core-infra -libs/native @elastic/es-core-infra -libs/plugin-analysis-api @elastic/es-core-infra -libs/plugin-api @elastic/es-core-infra -libs/plugin-classloader @elastic/es-core-infra -libs/plugin-scanner @elastic/es-core-infra -libs/x-content @elastic/es-core-infra -modules/lang-expression @elastic/es-core-infra -modules/lang-mustache @elastic/es-core-infra -modules/lang-painless @elastic/es-core-infra -modules/rest-root @elastic/es-core-infra -modules/systemd @elastic/es-core-infra -server/src/main/java/org/elasticsearch/bootstrap @elastic/es-core-infra -server/src/main/java/org/elasticsearch/node @elastic/es-core-infra -server/src/main/java/org/elasticsearch/plugins @elastic/es-core-infra -server/src/main/java/org/elasticsearch/threadpool @elastic/es-core-infra - -# Security -x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/privilege @elastic/es-security -x-pack/plugin/core/src/main/java/org/elasticsearch/xpack/core/security/authz/store/ReservedRolesStore.java @elastic/es-security diff --git a/README.asciidoc b/README.asciidoc index dc27735d3c015..8d3c96c659896 100644 --- a/README.asciidoc +++ b/README.asciidoc @@ -1,6 +1,6 @@ = Elasticsearch -Elasticsearch is a distributed search and analytics engine optimized for speed and relevance on production-scale workloads. Elasticsearch is the foundation of Elastic's open Stack platform. Search in near real-time over massive datasets, perform vector searches, integrate with generative AI applications, and much more. +Elasticsearch is a distributed search and analytics engine, scalable data store and vector database optimized for speed and relevance on production-scale workloads. Elasticsearch is the foundation of Elastic's open Stack platform. Search in near real-time over massive datasets, perform vector searches, integrate with generative AI applications, and much more. Use cases enabled by Elasticsearch include: @@ -33,76 +33,124 @@ https://www.elastic.co/downloads/elasticsearch[elastic.co/downloads/elasticsearc === Run Elasticsearch locally //// -IMPORTANT: This content is replicated in the Elasticsearch guide. -If you make changes, you must also update setup/set-up-local-dev-deployment.asciidoc. -//// +IMPORTANT: This content is replicated in the Elasticsearch repo. See `run-elasticsearch-locally.asciidoc`. +Ensure both files are in sync. -To try out Elasticsearch on your own machine, we recommend using Docker -and running both Elasticsearch and Kibana. -Docker images are available from the https://www.docker.elastic.co[Elastic Docker registry]. +https://github.com/elastic/start-local is the source of truth. +//// -NOTE: Starting in Elasticsearch 8.0, security is enabled by default. -The first time you start Elasticsearch, TLS encryption is configured automatically, -a password is generated for the `elastic` user, -and a Kibana enrollment token is created so you can connect Kibana to your secured cluster. +[WARNING] +==== +DO NOT USE THESE INSTRUCTIONS FOR PRODUCTION DEPLOYMENTS. -For other installation options, see the -https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html[Elasticsearch installation documentation]. +This setup is intended for local development and testing only. +==== -**Start Elasticsearch** +Quickly set up Elasticsearch and Kibana in Docker for local development or testing, using the https://github.com/elastic/start-local?tab=readme-ov-file#-try-elasticsearch-and-kibana-locally[`start-local` script]. -. Install and start https://www.docker.com/products/docker-desktop[Docker -Desktop]. Go to **Preferences > Resources > Advanced** and set Memory to at least 4GB. +ℹ️ For more detailed information about the `start-local` setup, refer to the https://github.com/elastic/start-local[README on GitHub]. -. Start an Elasticsearch container: -+ ----- -docker network create elastic -docker pull docker.elastic.co/elasticsearch/elasticsearch:{version} <1> -docker run --name elasticsearch --net elastic -p 9200:9200 -p 9300:9300 -e "discovery.type=single-node" -t docker.elastic.co/elasticsearch/elasticsearch:{version} ----- -<1> Replace {version} with the version of Elasticsearch you want to run. -+ -When you start Elasticsearch for the first time, the generated `elastic` user password and -Kibana enrollment token are output to the terminal. -+ -NOTE: You might need to scroll back a bit in the terminal to view the password -and enrollment token. +==== Prerequisites + +- If you don't have Docker installed, https://www.docker.com/products/docker-desktop[download and install Docker Desktop] for your operating system. +- If you're using Microsoft Windows, then install https://learn.microsoft.com/en-us/windows/wsl/install[Windows Subsystem for Linux (WSL)]. -. Copy the generated password and enrollment token and save them in a secure -location. These values are shown only when you start Elasticsearch for the first time. -You'll use these to enroll Kibana with your Elasticsearch cluster and log in. +==== Trial license -**Start Kibana** +This setup comes with a one-month trial of the Elastic *Platinum* license. +After the trial period, the license reverts to *Free and open - Basic*. +Refer to https://www.elastic.co/subscriptions[Elastic subscriptions] for more information. -Kibana enables you to easily send requests to Elasticsearch and analyze, visualize, and manage data interactively. +==== Run `start-local` -. In a new terminal session, start Kibana and connect it to your Elasticsearch container: -+ +To set up Elasticsearch and Kibana locally, run the `start-local` script: + +[source,sh] ---- -docker pull docker.elastic.co/kibana/kibana:{version} <1> -docker run --name kibana --net elastic -p 5601:5601 docker.elastic.co/kibana/kibana:{version} +curl -fsSL https://elastic.co/start-local | sh ---- -<1> Replace {version} with the version of Kibana you want to run. -+ -When you start Kibana, a unique URL is output to your terminal. +// NOTCONSOLE + +This script creates an `elastic-start-local` folder containing configuration files and starts both Elasticsearch and Kibana using Docker. + +After running the script, you can access Elastic services at the following endpoints: + +* *Elasticsearch*: http://localhost:9200 +* *Kibana*: http://localhost:5601 -. To access Kibana, open the generated URL in your browser. +The script generates a random password for the `elastic` user, which is displayed at the end of the installation and stored in the `.env` file. - .. Paste the enrollment token that you copied when starting - Elasticsearch and click the button to connect your Kibana instance with Elasticsearch. +[CAUTION] +==== +This setup is for local testing only. HTTPS is disabled, and Basic authentication is used for Elasticsearch. For security, Elasticsearch and Kibana are accessible only through `localhost`. +==== - .. Log in to Kibana as the `elastic` user with the password that was generated - when you started Elasticsearch. +==== API access -**Send requests to Elasticsearch** +An API key for Elasticsearch is generated and stored in the `.env` file as `ES_LOCAL_API_KEY`. +Use this key to connect to Elasticsearch with a https://www.elastic.co/guide/en/elasticsearch/client/index.html[programming language client] or the https://www.elastic.co/guide/en/elasticsearch/reference/current/rest-apis.html[REST API]. + +From the `elastic-start-local` folder, check the connection to Elasticsearch using `curl`: + +[source,sh] +---- +source .env +curl $ES_LOCAL_URL -H "Authorization: ApiKey ${ES_LOCAL_API_KEY}" +---- +// NOTCONSOLE + +=== Send requests to Elasticsearch You send data and other requests to Elasticsearch through REST APIs. You can interact with Elasticsearch using any client that sends HTTP requests, such as the https://www.elastic.co/guide/en/elasticsearch/client/index.html[Elasticsearch language clients] and https://curl.se[curl]. + +==== Using curl + +Here's an example curl command to create a new Elasticsearch index, using basic auth: + +[source,sh] +---- +curl -u elastic:$ELASTIC_PASSWORD \ + -X PUT \ + http://localhost:9200/my-new-index \ + -H 'Content-Type: application/json' +---- +// NOTCONSOLE + +==== Using a language client + +To connect to your local dev Elasticsearch cluster with a language client, you can use basic authentication with the `elastic` username and the password you set in the environment variable. + +You'll use the following connection details: + +* **Elasticsearch endpoint**: `http://localhost:9200` +* **Username**: `elastic` +* **Password**: `$ELASTIC_PASSWORD` (Value you set in the environment variable) + +For example, to connect with the Python `elasticsearch` client: + +[source,python] +---- +import os +from elasticsearch import Elasticsearch + +username = 'elastic' +password = os.getenv('ELASTIC_PASSWORD') # Value you set in the environment variable + +client = Elasticsearch( + "http://localhost:9200", + basic_auth=(username, password) +) + +print(client.info()) +---- + +==== Using the Dev Tools Console + Kibana's developer console provides an easy way to experiment and test requests. -To access the console, go to **Management > Dev Tools**. +To access the console, open Kibana, then go to **Management** > **Dev Tools**. **Add data** diff --git a/build-tools-internal/gradle/wrapper/gradle-wrapper.properties b/build-tools-internal/gradle/wrapper/gradle-wrapper.properties index 515ab9d5f1822..e955ee28dd349 100644 --- a/build-tools-internal/gradle/wrapper/gradle-wrapper.properties +++ b/build-tools-internal/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=f8b4f4772d302c8ff580bc40d0f56e715de69b163546944f787c87abf209c961 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-all.zip +distributionSha256Sum=fdfca5dbc2834f0ece5020465737538e5ba679deeff5ab6c09621d67f8bb1a15 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.1-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/build-tools-internal/src/integTest/groovy/org/elasticsearch/gradle/internal/test/rest/LegacyYamlRestCompatTestPluginFuncTest.groovy b/build-tools-internal/src/integTest/groovy/org/elasticsearch/gradle/internal/test/rest/LegacyYamlRestCompatTestPluginFuncTest.groovy index b7c4908e39b62..737c448f23be6 100644 --- a/build-tools-internal/src/integTest/groovy/org/elasticsearch/gradle/internal/test/rest/LegacyYamlRestCompatTestPluginFuncTest.groovy +++ b/build-tools-internal/src/integTest/groovy/org/elasticsearch/gradle/internal/test/rest/LegacyYamlRestCompatTestPluginFuncTest.groovy @@ -55,8 +55,7 @@ class LegacyYamlRestCompatTestPluginFuncTest extends AbstractRestResourcesFuncTe def result = gradleRunner("yamlRestTestV${compatibleVersion}CompatTest", '--stacktrace').build() then: - // we set the task to be skipped if there are no matching tests in the compatibility test sourceSet - result.task(":yamlRestTestV${compatibleVersion}CompatTest").outcome == TaskOutcome.SKIPPED + result.task(":yamlRestTestV${compatibleVersion}CompatTest").outcome == TaskOutcome.NO_SOURCE result.task(':copyRestCompatApiTask').outcome == TaskOutcome.NO_SOURCE result.task(':copyRestCompatTestTask').outcome == TaskOutcome.NO_SOURCE result.task(transformTask).outcome == TaskOutcome.NO_SOURCE @@ -165,7 +164,7 @@ class LegacyYamlRestCompatTestPluginFuncTest extends AbstractRestResourcesFuncTe then: result.task(':check').outcome == TaskOutcome.UP_TO_DATE result.task(':checkRestCompat').outcome == TaskOutcome.UP_TO_DATE - result.task(":yamlRestTestV${compatibleVersion}CompatTest").outcome == TaskOutcome.SKIPPED + result.task(":yamlRestTestV${compatibleVersion}CompatTest").outcome == TaskOutcome.NO_SOURCE result.task(':copyRestCompatApiTask').outcome == TaskOutcome.NO_SOURCE result.task(':copyRestCompatTestTask').outcome == TaskOutcome.NO_SOURCE result.task(transformTask).outcome == TaskOutcome.NO_SOURCE diff --git a/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle b/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle index 6cb22dad9bc79..82c511818d1ff 100644 --- a/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle +++ b/build-tools-internal/src/main/groovy/elasticsearch.ide.gradle @@ -167,9 +167,10 @@ if (providers.systemProperty('idea.active').getOrNull() == 'true') { vmParameters = [ '-ea', '-Djava.security.manager=allow', - '-Djava.locale.providers=SPI,COMPAT', + '-Djava.locale.providers=SPI,CLDR', '-Djava.library.path=' + testLibraryPath, '-Djna.library.path=' + testLibraryPath, + '-Dtests.testfeatures.enabled=true', // TODO: only open these for mockito when it is modularized '--add-opens=java.base/java.security.cert=ALL-UNNAMED', '--add-opens=java.base/java.nio.channels=ALL-UNNAMED', diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/BwcSetupExtension.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/BwcSetupExtension.java index 7010ed92d4c57..4112d96c7296b 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/BwcSetupExtension.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/BwcSetupExtension.java @@ -26,7 +26,6 @@ import org.gradle.api.tasks.TaskProvider; import org.gradle.jvm.toolchain.JavaLanguageVersion; import org.gradle.jvm.toolchain.JavaToolchainService; -import org.gradle.jvm.toolchain.JvmVendorSpec; import java.io.File; import java.io.IOException; @@ -161,10 +160,8 @@ private static TaskProvider createRunBwcGradleTask( /** A convenience method for getting java home for a version of java and requiring that version for the given task to execute */ private static Provider getJavaHome(ObjectFactory objectFactory, JavaToolchainService toolChainService, final int version) { Property value = objectFactory.property(JavaLanguageVersion.class).value(JavaLanguageVersion.of(version)); - return toolChainService.launcherFor(javaToolchainSpec -> { - javaToolchainSpec.getLanguageVersion().value(value); - javaToolchainSpec.getVendor().set(JvmVendorSpec.ORACLE); - }).map(launcher -> launcher.getMetadata().getInstallationPath().getAsFile().getAbsolutePath()); + return toolChainService.launcherFor(javaToolchainSpec -> { javaToolchainSpec.getLanguageVersion().value(value); }) + .map(launcher -> launcher.getMetadata().getInstallationPath().getAsFile().getAbsolutePath()); } private static String readFromFile(File file) { diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java index 4f9498c8f33a6..a0b4a3d8d37c1 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchBuildCompletePlugin.java @@ -8,7 +8,7 @@ package org.elasticsearch.gradle.internal; -import com.gradle.scan.plugin.BuildScanExtension; +import com.gradle.develocity.agent.gradle.DevelocityConfiguration; import org.apache.commons.compress.archivers.tar.TarArchiveEntry; import org.apache.commons.compress.archivers.tar.TarArchiveOutputStream; @@ -64,7 +64,7 @@ public void apply(Project target) { File targetFile = target.file("build/" + buildNumber + ".tar.bz2"); File projectDir = target.getProjectDir(); File gradleWorkersDir = new File(target.getGradle().getGradleUserHomeDir(), "workers/"); - BuildScanExtension extension = target.getExtensions().getByType(BuildScanExtension.class); + DevelocityConfiguration extension = target.getExtensions().getByType(DevelocityConfiguration.class); File daemonsLogDir = new File(target.getGradle().getGradleUserHomeDir(), "daemon/" + target.getGradle().getGradleVersion()); getFlowScope().always(BuildFinishedFlowAction.class, spec -> { @@ -125,7 +125,7 @@ interface Parameters extends FlowParameters { ListProperty getFilteredFiles(); @Input - Property getBuildScan(); + Property getBuildScan(); } @@ -152,7 +152,13 @@ public void execute(BuildFinishedFlowAction.Parameters parameters) throws FileNo // So, if you change this such that the artifact will have a slash/directory in it, you'll need to update the logic // below as well pb.directory(uploadFileDir); - pb.start().waitFor(); + try { + // we are very generious here, as the upload can take + // a long time depending on its size + pb.start().waitFor(30, java.util.concurrent.TimeUnit.MINUTES); + } catch (InterruptedException e) { + System.out.println("Failed to upload buildkite artifact " + e.getMessage()); + } System.out.println("Generating buildscan link for artifact..."); @@ -198,7 +204,7 @@ public void execute(BuildFinishedFlowAction.Parameters parameters) throws FileNo + System.getenv("BUILDKITE_JOB_ID") + "/artifacts/" + artifactUuid; - parameters.getBuildScan().get().link("Artifact Upload", targetLink); + parameters.getBuildScan().get().getBuildScan().link("Artifact Upload", targetLink); } } catch (Exception e) { System.out.println("Failed to upload buildkite artifact " + e.getMessage()); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java index 689c8ddecb057..d823176fc5ae9 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/ElasticsearchTestBasePlugin.java @@ -92,7 +92,7 @@ public void execute(Task t) { mkdirs(test.getWorkingDir().toPath().resolve("temp").toFile()); // TODO remove once jvm.options are added to test system properties - test.systemProperty("java.locale.providers", "SPI,COMPAT"); + test.systemProperty("java.locale.providers", "SPI,CLDR"); } }); test.getJvmArgumentProviders().add(nonInputProperties); @@ -107,6 +107,7 @@ public void execute(Task t) { "-Xmx" + System.getProperty("tests.heap.size", "512m"), "-Xms" + System.getProperty("tests.heap.size", "512m"), "-Djava.security.manager=allow", + "-Dtests.testfeatures.enabled=true", "--add-opens=java.base/java.util=ALL-UNNAMED", // TODO: only open these for mockito when it is modularized "--add-opens=java.base/java.security.cert=ALL-UNNAMED", @@ -183,8 +184,8 @@ public void execute(Task t) { }); if (OS.current().equals(OS.WINDOWS) && System.getProperty("tests.timeoutSuite") == null) { - // override the suite timeout to 30 mins for windows, because it has the most inefficient filesystem known to man - test.systemProperty("tests.timeoutSuite", "2400000!"); + // override the suite timeout to 60 mins for windows, because it has the most inefficient filesystem known to man + test.systemProperty("tests.timeoutSuite", "3600000!"); } /* diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/EmbeddedProviderExtension.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/EmbeddedProviderExtension.java index e9e75a711a8ff..03b8f19d10b13 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/EmbeddedProviderExtension.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/EmbeddedProviderExtension.java @@ -43,17 +43,16 @@ void impl(String implName, Project implProject) { }); String manifestTaskName = "generate" + capitalName + "ProviderManifest"; - Provider generatedResourcesDir = project.getLayout().getBuildDirectory().dir("generated-resources"); + Provider generatedResourcesRoot = project.getLayout().getBuildDirectory().dir("generated-resources"); var generateProviderManifest = project.getTasks().register(manifestTaskName, GenerateProviderManifest.class); generateProviderManifest.configure(t -> { - t.getManifestFile().set(generatedResourcesDir.map(d -> d.file("LISTING.TXT"))); + t.getManifestFile().set(generatedResourcesRoot.map(d -> d.dir(manifestTaskName).file("LISTING.TXT"))); t.getProviderImplClasspath().from(implConfig); }); - String implTaskName = "generate" + capitalName + "ProviderImpl"; var generateProviderImpl = project.getTasks().register(implTaskName, Sync.class); generateProviderImpl.configure(t -> { - t.into(generatedResourcesDir); + t.into(generatedResourcesRoot.map(d -> d.dir(implTaskName))); t.into("IMPL-JARS/" + implName, spec -> { spec.from(implConfig); spec.from(generateProviderManifest); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java index 42834928bafed..e61bbefc9a973 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/info/GlobalBuildInfoPlugin.java @@ -51,6 +51,7 @@ import java.util.Arrays; import java.util.List; import java.util.Locale; +import java.util.Optional; import java.util.Random; import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; @@ -97,24 +98,25 @@ public void apply(Project project) { JavaVersion minimumCompilerVersion = JavaVersion.toVersion(getResourceContents("/minimumCompilerVersion")); JavaVersion minimumRuntimeVersion = JavaVersion.toVersion(getResourceContents("/minimumRuntimeVersion")); - File runtimeJavaHome = findRuntimeJavaHome(); - boolean isRuntimeJavaHomeSet = Jvm.current().getJavaHome().equals(runtimeJavaHome) == false; + Optional selectedRuntimeJavaHome = findRuntimeJavaHome(); + File actualRuntimeJavaHome = selectedRuntimeJavaHome.orElse(Jvm.current().getJavaHome()); + boolean isRuntimeJavaHomeSet = selectedRuntimeJavaHome.isPresent(); GitInfo gitInfo = GitInfo.gitInfo(project.getRootDir()); BuildParams.init(params -> { params.reset(); - params.setRuntimeJavaHome(runtimeJavaHome); + params.setRuntimeJavaHome(actualRuntimeJavaHome); params.setJavaToolChainSpec(resolveToolchainSpecFromEnv()); params.setRuntimeJavaVersion( determineJavaVersion( "runtime java.home", - runtimeJavaHome, + actualRuntimeJavaHome, isRuntimeJavaHomeSet ? minimumRuntimeVersion : Jvm.current().getJavaVersion() ) ); params.setIsRuntimeJavaHomeSet(isRuntimeJavaHomeSet); - JvmInstallationMetadata runtimeJdkMetaData = metadataDetector.getMetadata(getJavaInstallation(runtimeJavaHome)); + JvmInstallationMetadata runtimeJdkMetaData = metadataDetector.getMetadata(getJavaInstallation(actualRuntimeJavaHome)); params.setRuntimeJavaDetails(formatJavaVendorDetails(runtimeJdkMetaData)); params.setJavaVersions(getAvailableJavaVersions()); params.setMinimumCompilerVersion(minimumCompilerVersion); @@ -298,19 +300,19 @@ private static void assertMinimumCompilerVersion(JavaVersion minimumCompilerVers } } - private File findRuntimeJavaHome() { + private Optional findRuntimeJavaHome() { String runtimeJavaProperty = System.getProperty("runtime.java"); if (runtimeJavaProperty != null) { - return resolveJavaHomeFromToolChainService(runtimeJavaProperty); + return Optional.of(resolveJavaHomeFromToolChainService(runtimeJavaProperty)); } String env = System.getenv("RUNTIME_JAVA_HOME"); if (env != null) { - return new File(env); + return Optional.of(new File(env)); } // fall back to tool chain if set. env = System.getenv("JAVA_TOOLCHAIN_HOME"); - return env == null ? Jvm.current().getJavaHome() : new File(env); + return env == null ? Optional.empty() : Optional.of(new File(env)); } @NotNull @@ -348,7 +350,6 @@ private File resolveJavaHomeFromToolChainService(String version) { Property value = objectFactory.property(JavaLanguageVersion.class).value(JavaLanguageVersion.of(version)); Provider javaLauncherProvider = toolChainService.launcherFor(javaToolchainSpec -> { javaToolchainSpec.getLanguageVersion().value(value); - javaToolchainSpec.getVendor().set(JvmVendorSpec.ORACLE); }); return javaLauncherProvider.get().getMetadata().getInstallationPath().getAsFile(); } diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/ThirdPartyAuditTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/ThirdPartyAuditTask.java index 4263ef2b1f76f..489cff65976b1 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/ThirdPartyAuditTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/precommit/ThirdPartyAuditTask.java @@ -193,6 +193,11 @@ public Set getMissingClassExcludes() { @SkipWhenEmpty public abstract ConfigurableFileCollection getJarsToScan(); + @Classpath + public FileCollection getClasspath() { + return classpath; + } + @TaskAction public void runThirdPartyAudit() throws IOException { Set jars = getJarsToScan().getFiles(); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java index 08abb02ea831e..ec79fe20492e1 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/ReleaseToolsPlugin.java @@ -52,7 +52,7 @@ public void apply(Project project) { project.getTasks().register("extractCurrentVersions", ExtractCurrentVersionsTask.class); project.getTasks().register("tagVersions", TagVersionsTask.class); - project.getTasks().register("setCompatibleVersions", SetCompatibleVersionsTask.class); + project.getTasks().register("setCompatibleVersions", SetCompatibleVersionsTask.class, t -> t.setThisVersion(version)); final FileTree yamlFiles = projectDirectory.dir("docs/changelog") .getAsFileTree() diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java index 15e0a0cc345d5..17761e5183b31 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/release/SetCompatibleVersionsTask.java @@ -14,6 +14,7 @@ import com.github.javaparser.ast.expr.NameExpr; import com.github.javaparser.printer.lexicalpreservation.LexicalPreservingPrinter; +import org.elasticsearch.gradle.Version; import org.gradle.api.tasks.TaskAction; import org.gradle.api.tasks.options.Option; import org.gradle.initialization.layout.BuildLayout; @@ -28,6 +29,8 @@ public class SetCompatibleVersionsTask extends AbstractVersionsTask { + private Version thisVersion; + private Version releaseVersion; private Map versionIds = Map.of(); @Inject @@ -35,21 +38,35 @@ public SetCompatibleVersionsTask(BuildLayout layout) { super(layout); } + public void setThisVersion(Version version) { + thisVersion = version; + } + @Option(option = "version-id", description = "Version id used for the release. Of the form :.") public void versionIds(List version) { this.versionIds = splitVersionIds(version); } + @Option(option = "release", description = "The version being released") + public void releaseVersion(String version) { + releaseVersion = Version.fromString(version); + } + @TaskAction public void executeTask() throws IOException { if (versionIds.isEmpty()) { throw new IllegalArgumentException("No version ids specified"); } + + if (releaseVersion.getMajor() < thisVersion.getMajor()) { + // don't need to update CCS version - this is for a different major + return; + } + Integer transportVersion = versionIds.get(TRANSPORT_VERSION_TYPE); if (transportVersion == null) { throw new IllegalArgumentException("TransportVersion id not specified"); } - Path versionJava = rootDir.resolve(TRANSPORT_VERSIONS_FILE_PATH); CompilationUnit file = LexicalPreservingPrinter.setup(StaticJavaParser.parse(versionJava)); diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/test/rest/compat/compat/AbstractYamlRestCompatTestPlugin.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/test/rest/compat/compat/AbstractYamlRestCompatTestPlugin.java index c6320394ef5b9..e0581ebf67081 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/test/rest/compat/compat/AbstractYamlRestCompatTestPlugin.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/test/rest/compat/compat/AbstractYamlRestCompatTestPlugin.java @@ -35,6 +35,7 @@ import org.gradle.api.tasks.Sync; import org.gradle.api.tasks.TaskProvider; import org.gradle.api.tasks.testing.Test; +import org.gradle.language.jvm.tasks.ProcessResources; import java.io.File; import java.nio.file.Path; @@ -213,6 +214,17 @@ public void apply(Project project) { .named(RestResourcesPlugin.COPY_YAML_TESTS_TASK) .flatMap(CopyRestTestsTask::getOutputResourceDir); + // ensure we include other non rest spec related test resources + project.getTasks() + .withType(ProcessResources.class) + .named(yamlCompatTestSourceSet.getProcessResourcesTaskName()) + .configure(processResources -> { + processResources.from( + sourceSets.getByName(YamlRestTestPlugin.YAML_REST_TEST).getResources(), + spec -> { spec.exclude("rest-api-spec/**"); } + ); + }); + // setup the test task TaskProvider yamlRestCompatTestTask = registerTestTask(project, yamlCompatTestSourceSet); yamlRestCompatTestTask.configure(testTask -> { @@ -221,7 +233,7 @@ public void apply(Project project) { testTask.setTestClassesDirs( yamlTestSourceSet.getOutput().getClassesDirs().plus(yamlCompatTestSourceSet.getOutput().getClassesDirs()) ); - testTask.onlyIf("Compatibility tests are available", t -> yamlCompatTestSourceSet.getAllSource().isEmpty() == false); + testTask.onlyIf("Compatibility tests are available", t -> yamlCompatTestSourceSet.getOutput().isEmpty() == false); testTask.setClasspath( yamlCompatTestSourceSet.getRuntimeClasspath() // remove the "normal" api and tests diff --git a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolver.java b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolver.java index b8cffae0189ce..913a15517f0af 100644 --- a/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolver.java +++ b/build-tools-internal/src/main/java/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolver.java @@ -23,9 +23,12 @@ import java.util.Map; import java.util.Optional; +/** + * Resolves released Oracle JDKs that are EOL. + */ public abstract class ArchivedOracleJdkToolchainResolver extends AbstractCustomJavaToolchainResolver { - private static final Map ARCHIVED_BASE_VERSIONS = Maps.of(20, "20.0.2", 19, "19.0.2", 18, "18.0.2.1", 17, "17.0.7"); + private static final Map ARCHIVED_BASE_VERSIONS = Maps.of(20, "20.0.2", 19, "19.0.2", 18, "18.0.2.1"); @Override public Optional resolve(JavaToolchainRequest request) { diff --git a/build-tools-internal/src/main/resources/minimumGradleVersion b/build-tools-internal/src/main/resources/minimumGradleVersion index 83ea3179ddacc..2eb8a97206651 100644 --- a/build-tools-internal/src/main/resources/minimumGradleVersion +++ b/build-tools-internal/src/main/resources/minimumGradleVersion @@ -1 +1 @@ -8.8 \ No newline at end of file +8.10.1 \ No newline at end of file diff --git a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolverSpec.groovy b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolverSpec.groovy index b7f08b6016679..dd6e7b324e745 100644 --- a/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolverSpec.groovy +++ b/build-tools-internal/src/test/groovy/org/elasticsearch/gradle/internal/toolchain/ArchivedOracleJdkToolchainResolverSpec.groovy @@ -40,12 +40,6 @@ class ArchivedOracleJdkToolchainResolverSpec extends AbstractToolchainResolverSp [18, ORACLE, LINUX, X86_64, "https://download.oracle.com/java/18/archive/jdk-18.0.2.1_linux-x64_bin.tar.gz"], [18, ORACLE, LINUX, AARCH64, "https://download.oracle.com/java/18/archive/jdk-18.0.2.1_linux-aarch64_bin.tar.gz"], [18, ORACLE, WINDOWS, X86_64, "https://download.oracle.com/java/18/archive/jdk-18.0.2.1_windows-x64_bin.zip"], - - [17, ORACLE, MAC_OS, X86_64, "https://download.oracle.com/java/17/archive/jdk-17.0.7_macos-x64_bin.tar.gz"], - [17, ORACLE, MAC_OS, AARCH64, "https://download.oracle.com/java/17/archive/jdk-17.0.7_macos-aarch64_bin.tar.gz"], - [17, ORACLE, LINUX, X86_64, "https://download.oracle.com/java/17/archive/jdk-17.0.7_linux-x64_bin.tar.gz"], - [17, ORACLE, LINUX, AARCH64, "https://download.oracle.com/java/17/archive/jdk-17.0.7_linux-aarch64_bin.tar.gz"], - [17, ORACLE, WINDOWS, X86_64, "https://download.oracle.com/java/17/archive/jdk-17.0.7_windows-x64_bin.zip"] ] } diff --git a/build-tools-internal/version.properties b/build-tools-internal/version.properties index 0fa6142789381..6823a73c8bb8b 100644 --- a/build-tools-internal/version.properties +++ b/build-tools-internal/version.properties @@ -1,4 +1,4 @@ -elasticsearch = 8.15.0 +elasticsearch = 8.15.3 lucene = 9.11.1 bundled_jdk_vendor = openjdk @@ -30,7 +30,7 @@ httpcore = 4.4.13 httpasyncclient = 4.1.5 commonslogging = 1.2 commonscodec = 1.15 -protobuf = 3.21.9 +protobuf = 3.25.5 # test dependencies randomizedrunner = 2.8.0 diff --git a/build-tools/src/main/java/org/elasticsearch/gradle/DistributionDownloadPlugin.java b/build-tools/src/main/java/org/elasticsearch/gradle/DistributionDownloadPlugin.java index 2bc4aa1a1be36..d4747c9a6c38e 100644 --- a/build-tools/src/main/java/org/elasticsearch/gradle/DistributionDownloadPlugin.java +++ b/build-tools/src/main/java/org/elasticsearch/gradle/DistributionDownloadPlugin.java @@ -14,6 +14,7 @@ import org.gradle.api.NamedDomainObjectContainer; import org.gradle.api.Plugin; import org.gradle.api.Project; +import org.gradle.api.artifacts.Configuration; import org.gradle.api.artifacts.dsl.DependencyHandler; import org.gradle.api.artifacts.repositories.IvyArtifactRepository; import org.gradle.api.artifacts.type.ArtifactTypeDefinition; @@ -42,8 +43,10 @@ public class DistributionDownloadPlugin implements Plugin { private static final String FAKE_SNAPSHOT_IVY_GROUP = "elasticsearch-distribution-snapshot"; private static final String DOWNLOAD_REPO_NAME = "elasticsearch-downloads"; private static final String SNAPSHOT_REPO_NAME = "elasticsearch-snapshots"; - public static final String DISTRO_EXTRACTED_CONFIG_PREFIX = "es_distro_extracted_"; - public static final String DISTRO_CONFIG_PREFIX = "es_distro_file_"; + + public static final String ES_DISTRO_CONFIG_PREFIX = "es_distro_"; + public static final String DISTRO_EXTRACTED_CONFIG_PREFIX = ES_DISTRO_CONFIG_PREFIX + "extracted_"; + public static final String DISTRO_CONFIG_PREFIX = ES_DISTRO_CONFIG_PREFIX + "file_"; private final ObjectFactory objectFactory; private NamedDomainObjectContainer distributionsContainer; @@ -51,6 +54,8 @@ public class DistributionDownloadPlugin implements Plugin { private Property dockerAvailability; + private boolean writingDependencies = false; + @Inject public DistributionDownloadPlugin(ObjectFactory objectFactory) { this.objectFactory = objectFactory; @@ -63,6 +68,7 @@ public void setDockerAvailability(Provider dockerAvailability) { @Override public void apply(Project project) { + writingDependencies = project.getGradle().getStartParameter().getWriteDependencyVerifications().isEmpty() == false; project.getDependencies().registerTransform(UnzipTransform.class, transformSpec -> { transformSpec.getFrom().attribute(ArtifactTypeDefinition.ARTIFACT_TYPE_ATTRIBUTE, ArtifactTypeDefinition.ZIP_TYPE); transformSpec.getTo().attribute(ArtifactTypeDefinition.ARTIFACT_TYPE_ATTRIBUTE, ArtifactTypeDefinition.DIRECTORY_TYPE); @@ -85,7 +91,6 @@ private void setupDistributionContainer(Project project) { var extractedConfiguration = project.getConfigurations().create(DISTRO_EXTRACTED_CONFIG_PREFIX + name); extractedConfiguration.getAttributes() .attribute(ArtifactTypeDefinition.ARTIFACT_TYPE_ATTRIBUTE, ArtifactTypeDefinition.DIRECTORY_TYPE); - var distribution = new ElasticsearchDistribution( name, objectFactory, @@ -94,16 +99,20 @@ private void setupDistributionContainer(Project project) { objectFactory.fileCollection().from(extractedConfiguration) ); - registerDistributionDependencies(project, distribution); + // when running with --write-dependency-verification to update dependency verification data, + // we do not register the dependencies as we ignore elasticsearch internal dependencies anyhow and + // want to reduce general resolution time + if (writingDependencies == false) { + registerDistributionDependencies(project, distribution); + } return distribution; }); project.getExtensions().add(CONTAINER_NAME, distributionsContainer); } private void registerDistributionDependencies(Project project, ElasticsearchDistribution distribution) { - project.getConfigurations() - .getByName(DISTRO_CONFIG_PREFIX + distribution.getName()) - .getDependencies() + Configuration distroConfig = project.getConfigurations().getByName(DISTRO_CONFIG_PREFIX + distribution.getName()); + distroConfig.getDependencies() .addLater( project.provider(() -> distribution.maybeFreeze()) .map( @@ -112,9 +121,9 @@ private void registerDistributionDependencies(Project project, ElasticsearchDist ) ); - project.getConfigurations() - .getByName(DISTRO_EXTRACTED_CONFIG_PREFIX + distribution.getName()) - .getDependencies() + Configuration extractedDistroConfig = project.getConfigurations() + .getByName(DISTRO_EXTRACTED_CONFIG_PREFIX + distribution.getName()); + extractedDistroConfig.getDependencies() .addAllLater( project.provider(() -> distribution.maybeFreeze()) .map( diff --git a/build-tools/src/main/java/org/elasticsearch/gradle/test/GradleTestPolicySetupPlugin.java b/build-tools/src/main/java/org/elasticsearch/gradle/test/GradleTestPolicySetupPlugin.java index a1da860abe26a..9593a281686e7 100644 --- a/build-tools/src/main/java/org/elasticsearch/gradle/test/GradleTestPolicySetupPlugin.java +++ b/build-tools/src/main/java/org/elasticsearch/gradle/test/GradleTestPolicySetupPlugin.java @@ -22,6 +22,9 @@ public void apply(Project project) { test.systemProperty("tests.gradle", true); test.systemProperty("tests.task", test.getPath()); + // Flag is required for later Java versions since our tests use a custom security manager + test.jvmArgs("-Djava.security.manager=allow"); + SystemPropertyCommandLineArgumentProvider nonInputProperties = new SystemPropertyCommandLineArgumentProvider(); // don't track these as inputs since they contain absolute paths and break cache relocatability nonInputProperties.systemProperty("gradle.dist.lib", gradle.getGradleHomeDir().getAbsolutePath() + "/lib"); diff --git a/build.gradle b/build.gradle index 3869d21b49bfe..01fdace570ce0 100644 --- a/build.gradle +++ b/build.gradle @@ -19,6 +19,7 @@ import org.elasticsearch.gradle.internal.info.BuildParams import org.elasticsearch.gradle.util.GradleUtils import org.gradle.plugins.ide.eclipse.model.AccessRule import org.gradle.plugins.ide.eclipse.model.ProjectDependency +import org.elasticsearch.gradle.DistributionDownloadPlugin import java.nio.file.Files @@ -284,11 +285,16 @@ allprojects { } tasks.register('resolveAllDependencies', ResolveAllDependencies) { - configs = project.configurations + def ignoredPrefixes = [DistributionDownloadPlugin.ES_DISTRO_CONFIG_PREFIX, "jdbcDriver"] + configs = project.configurations.matching { config -> ignoredPrefixes.any { config.name.startsWith(it) } == false } resolveJavaToolChain = true if (project.path.contains("fixture")) { dependsOn tasks.withType(ComposePull) } + if (project.path.contains(":distribution:docker")) { + enabled = false + } + } plugins.withId('lifecycle-base') { diff --git a/distribution/docker/src/docker/Dockerfile b/distribution/docker/src/docker/Dockerfile index 32f35b05015b9..2a2a77a6df820 100644 --- a/distribution/docker/src/docker/Dockerfile +++ b/distribution/docker/src/docker/Dockerfile @@ -22,7 +22,7 @@ <% if (docker_base == 'iron_bank') { %> ARG BASE_REGISTRY=registry1.dso.mil ARG BASE_IMAGE=ironbank/redhat/ubi/ubi9 -ARG BASE_TAG=9.3 +ARG BASE_TAG=9.4 <% } %> ################################################################################ diff --git a/distribution/docker/src/docker/iron_bank/hardening_manifest.yaml b/distribution/docker/src/docker/iron_bank/hardening_manifest.yaml index 38ce16a413af2..f4364c5008c09 100644 --- a/distribution/docker/src/docker/iron_bank/hardening_manifest.yaml +++ b/distribution/docker/src/docker/iron_bank/hardening_manifest.yaml @@ -14,7 +14,7 @@ tags: # Build args passed to Dockerfile ARGs args: BASE_IMAGE: "redhat/ubi/ubi9" - BASE_TAG: "9.3" + BASE_TAG: "9.4" # Docker image labels labels: diff --git a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java index 298b4671582b5..7b62f3fc4b63f 100644 --- a/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java +++ b/distribution/tools/server-cli/src/main/java/org/elasticsearch/server/cli/SystemJvmOptions.java @@ -11,6 +11,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.EsExecutors; import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.core.UpdateForV9; import java.io.File; import java.nio.file.Path; @@ -64,11 +65,7 @@ static List systemJvmOptions(Settings nodeSettings, final Map -------------------------- -<1> The `name` field uses the `standard` analyzer, and so support full text queries. +<1> The `name` field uses the `standard` analyzer, and so supports full text queries. <2> The `name.sort` field is an `icu_collation_keyword` field that will preserve the name as a single token doc values, and applies the German ``phonebook'' order. <3> An example query which searches the `name` field and sorts on the `name.sort` field. @@ -467,7 +467,7 @@ differences. `case_first`:: Possible values: `lower` or `upper`. Useful to control which case is sorted -first when case is not ignored for strength `tertiary`. The default depends on +first when the case is not ignored for strength `tertiary`. The default depends on the collation. `numeric`:: diff --git a/docs/plugins/analysis-kuromoji.asciidoc b/docs/plugins/analysis-kuromoji.asciidoc index 1f114e9ad9ed6..fa6229b9f20e8 100644 --- a/docs/plugins/analysis-kuromoji.asciidoc +++ b/docs/plugins/analysis-kuromoji.asciidoc @@ -86,7 +86,7 @@ The `kuromoji_iteration_mark` normalizes Japanese horizontal iteration marks `normalize_kanji`:: - Indicates whether kanji iteration marks should be normalize. Defaults to `true`. + Indicates whether kanji iteration marks should be normalized. Defaults to `true`. `normalize_kana`:: @@ -189,7 +189,7 @@ PUT kuromoji_sample + -- Additional expert user parameters `nbest_cost` and `nbest_examples` can be used -to include additional tokens that most likely according to the statistical model. +to include additional tokens that are most likely according to the statistical model. If both parameters are used, the largest number of both is applied. `nbest_cost`:: @@ -624,3 +624,123 @@ Which results in: } ] } -------------------------------------------------- + +[[analysis-kuromoji-hiragana-uppercase]] +==== `hiragana_uppercase` token filter + +The `hiragana_uppercase` token filter normalizes small letters (捨て仮名) in hiragana into standard letters. +This filter is useful if you want to search against old style Japanese text such as +patents, legal documents, contract policies, etc. + +For example: + +[source,console] +-------------------------------------------------- +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "hiragana_uppercase" + ] + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "ちょっとまって" +} +-------------------------------------------------- + +Which results in: + +[source,console-result] +-------------------------------------------------- +{ + "tokens": [ + { + "token": "ちよつと", + "start_offset": 0, + "end_offset": 4, + "type": "word", + "position": 0 + }, + { + "token": "まつ", + "start_offset": 4, + "end_offset": 6, + "type": "word", + "position": 1 + }, + { + "token": "て", + "start_offset": 6, + "end_offset": 7, + "type": "word", + "position": 2 + } + ] +} +-------------------------------------------------- + +[[analysis-kuromoji-katakana-uppercase]] +==== `katakana_uppercase` token filter + +The `katakana_uppercase` token filter normalizes small letters (捨て仮名) in katakana into standard letters. +This filter is useful if you want to search against old style Japanese text such as +patents, legal documents, contract policies, etc. + +For example: + +[source,console] +-------------------------------------------------- +PUT kuromoji_sample +{ + "settings": { + "index": { + "analysis": { + "analyzer": { + "my_analyzer": { + "tokenizer": "kuromoji_tokenizer", + "filter": [ + "katakana_uppercase" + ] + } + } + } + } + } +} + +GET kuromoji_sample/_analyze +{ + "analyzer": "my_analyzer", + "text": "ストップウォッチ" +} +-------------------------------------------------- + +Which results in: + +[source,console-result] +-------------------------------------------------- +{ + "tokens": [ + { + "token": "ストツプウオツチ", + "start_offset": 0, + "end_offset": 8, + "type": "word", + "position": 0 + } + ] +} +-------------------------------------------------- diff --git a/docs/plugins/analysis-nori.asciidoc b/docs/plugins/analysis-nori.asciidoc index 1a3153fa3bea5..369268bcef0cd 100644 --- a/docs/plugins/analysis-nori.asciidoc +++ b/docs/plugins/analysis-nori.asciidoc @@ -447,7 +447,7 @@ Which responds with: The `nori_number` token filter normalizes Korean numbers to regular Arabic decimal numbers in half-width characters. -Korean numbers are often written using a combination of Hangul and Arabic numbers with various kinds punctuation. +Korean numbers are often written using a combination of Hangul and Arabic numbers with various kinds of punctuation. For example, 3.2천 means 3200. This filter does this kind of normalization and allows a search for 3200 to match 3.2천 in text, but can also be used to make range facets based on the normalized numbers and so on. diff --git a/docs/plugins/development/creating-classic-plugins.asciidoc b/docs/plugins/development/creating-classic-plugins.asciidoc index f3f62a11f2993..531de916f14fa 100644 --- a/docs/plugins/development/creating-classic-plugins.asciidoc +++ b/docs/plugins/development/creating-classic-plugins.asciidoc @@ -18,7 +18,7 @@ will refuse to start in the presence of plugins with the incorrect [discrete] ==== Classic plugin file structure -Classis plugins are ZIP files composed of JAR files and +Classic plugins are ZIP files composed of JAR files and <>, a Java properties file that describes the plugin. diff --git a/docs/plugins/development/creating-stable-plugins.asciidoc b/docs/plugins/development/creating-stable-plugins.asciidoc index c9a8a1f6c7e2a..9f98774b5a761 100644 --- a/docs/plugins/development/creating-stable-plugins.asciidoc +++ b/docs/plugins/development/creating-stable-plugins.asciidoc @@ -1,8 +1,8 @@ [[creating-stable-plugins]] === Creating text analysis plugins with the stable plugin API -Text analysis plugins provide {es} with custom {ref}/analysis.html[Lucene -analyzers, token filters, character filters, and tokenizers]. +Text analysis plugins provide {es} with custom {ref}/analysis.html[Lucene +analyzers, token filters, character filters, and tokenizers]. [discrete] ==== The stable plugin API @@ -10,7 +10,7 @@ analyzers, token filters, character filters, and tokenizers]. Text analysis plugins can be developed against the stable plugin API. This API consists of the following dependencies: -* `plugin-api` - an API used by plugin developers to implement custom {es} +* `plugin-api` - an API used by plugin developers to implement custom {es} plugins. * `plugin-analysis-api` - an API used by plugin developers to implement analysis plugins and integrate them into {es}. @@ -18,7 +18,7 @@ plugins and integrate them into {es}. core Lucene analysis interfaces like `Tokenizer`, `Analyzer`, and `TokenStream`. For new versions of {es} within the same major version, plugins built against -this API do not need to be recompiled. Future versions of the API will be +this API does not need to be recompiled. Future versions of the API will be backwards compatible and plugins are binary compatible with future versions of {es}. In other words, once you have a working artifact, you can re-use it when you upgrade {es} to a new bugfix or minor version. @@ -48,9 +48,9 @@ require code changes. Stable plugins are ZIP files composed of JAR files and two metadata files: -* `stable-plugin-descriptor.properties` - a Java properties file that describes +* `stable-plugin-descriptor.properties` - a Java properties file that describes the plugin. Refer to <>. -* `named_components.json` - a JSON file mapping interfaces to key-value pairs +* `named_components.json` - a JSON file mapping interfaces to key-value pairs of component names and implementation classes. Note that only JAR files at the root of the plugin are added to the classpath @@ -65,7 +65,7 @@ you use this plugin. However, you don't need Gradle to create plugins. The {es} Github repository contains {es-repo}tree/main/plugins/examples/stable-analysis[an example analysis plugin]. -The example `build.gradle` build script provides a good starting point for +The example `build.gradle` build script provides a good starting point for developing your own plugin. [discrete] @@ -77,29 +77,29 @@ Plugins are written in Java, so you need to install a Java Development Kit [discrete] ===== Step by step -. Create a directory for your project. +. Create a directory for your project. . Copy the example `build.gradle` build script to your project directory. Note that this build script uses the `elasticsearch.stable-esplugin` gradle plugin to build your plugin. . Edit the `build.gradle` build script: -** Add a definition for the `pluginApiVersion` and matching `luceneVersion` -variables to the top of the file. You can find these versions in the -`build-tools-internal/version.properties` file in the {es-repo}[Elasticsearch +** Add a definition for the `pluginApiVersion` and matching `luceneVersion` +variables to the top of the file. You can find these versions in the +`build-tools-internal/version.properties` file in the {es-repo}[Elasticsearch Github repository]. -** Edit the `name` and `description` in the `esplugin` section of the build -script. This will create the plugin descriptor file. If you're not using the -`elasticsearch.stable-esplugin` gradle plugin, refer to +** Edit the `name` and `description` in the `esplugin` section of the build +script. This will create the plugin descriptor file. If you're not using the +`elasticsearch.stable-esplugin` gradle plugin, refer to <> to create the file manually. ** Add module information. -** Ensure you have declared the following compile-time dependencies. These -dependencies are compile-time only because {es} will provide these libraries at +** Ensure you have declared the following compile-time dependencies. These +dependencies are compile-time only because {es} will provide these libraries at runtime. *** `org.elasticsearch.plugin:elasticsearch-plugin-api` *** `org.elasticsearch.plugin:elasticsearch-plugin-analysis-api` *** `org.apache.lucene:lucene-analysis-common` -** For unit testing, ensure these dependencies have also been added to the +** For unit testing, ensure these dependencies have also been added to the `build.gradle` script as `testImplementation` dependencies. -. Implement an interface from the analysis plugin API, annotating it with +. Implement an interface from the analysis plugin API, annotating it with `NamedComponent`. Refer to <> for an example. . You should now be able to assemble a plugin ZIP file by running: + @@ -107,22 +107,22 @@ runtime. ---- gradle bundlePlugin ---- -The resulting plugin ZIP file is written to the `build/distributions` +The resulting plugin ZIP file is written to the `build/distributions` directory. [discrete] ===== YAML REST tests -The Gradle `elasticsearch.yaml-rest-test` plugin enables testing of your -plugin using the {es-repo}blob/main/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc[{es} yamlRestTest framework]. +The Gradle `elasticsearch.yaml-rest-test` plugin enables testing of your +plugin using the {es-repo}blob/main/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/README.asciidoc[{es} yamlRestTest framework]. These tests use a YAML-formatted domain language to issue REST requests against -an internal {es} cluster that has your plugin installed, and to check the -results of those requests. The structure of a YAML REST test directory is as +an internal {es} cluster that has your plugin installed, and to check the +results of those requests. The structure of a YAML REST test directory is as follows: -* A test suite class, defined under `src/yamlRestTest/java`. This class should +* A test suite class, defined under `src/yamlRestTest/java`. This class should extend `ESClientYamlSuiteTestCase`. -* The YAML tests themselves should be defined under +* The YAML tests themselves should be defined under `src/yamlRestTest/resources/test/`. [[plugin-descriptor-file-stable]] diff --git a/docs/plugins/discovery-azure-classic.asciidoc b/docs/plugins/discovery-azure-classic.asciidoc index aa710a2fe7ef9..b8d37f024172c 100644 --- a/docs/plugins/discovery-azure-classic.asciidoc +++ b/docs/plugins/discovery-azure-classic.asciidoc @@ -148,7 +148,7 @@ Before starting, you need to have: -- You should follow http://azure.microsoft.com/en-us/documentation/articles/linux-use-ssh-key/[this guide] to learn -how to create or use existing SSH keys. If you have already did it, you can skip the following. +how to create or use existing SSH keys. If you have already done it, you can skip the following. Here is a description on how to generate SSH keys using `openssl`: diff --git a/docs/plugins/discovery-gce.asciidoc b/docs/plugins/discovery-gce.asciidoc index 2e8cff21208e0..0a2629b7f094b 100644 --- a/docs/plugins/discovery-gce.asciidoc +++ b/docs/plugins/discovery-gce.asciidoc @@ -478,7 +478,7 @@ discovery: seed_providers: gce -------------------------------------------------- -Replaces `project_id` and `zone` with your settings. +Replace `project_id` and `zone` with your settings. To run test: diff --git a/docs/plugins/integrations.asciidoc b/docs/plugins/integrations.asciidoc index 71f237692ad35..aff4aed0becd2 100644 --- a/docs/plugins/integrations.asciidoc +++ b/docs/plugins/integrations.asciidoc @@ -91,7 +91,7 @@ Integrations are not plugins, but are external tools or modules that make it eas Elasticsearch Grails plugin. * https://hibernate.org/search/[Hibernate Search] - Integration with Hibernate ORM, from the Hibernate team. Automatic synchronization of write operations, yet exposes full Elasticsearch capabilities for queries. Can return either Elasticsearch native or re-map queries back into managed entities loaded within transaction from the reference database. + Integration with Hibernate ORM, from the Hibernate team. Automatic synchronization of write operations, yet exposes full Elasticsearch capabilities for queries. Can return either Elasticsearch native or re-map queries back into managed entities loaded within transactions from the reference database. * https://github.com/spring-projects/spring-data-elasticsearch[Spring Data Elasticsearch]: Spring Data implementation for Elasticsearch @@ -104,7 +104,7 @@ Integrations are not plugins, but are external tools or modules that make it eas * https://pulsar.apache.org/docs/en/io-elasticsearch[Apache Pulsar]: The Elasticsearch Sink Connector is used to pull messages from Pulsar topics - and persist the messages to a index. + and persist the messages to an index. * https://micronaut-projects.github.io/micronaut-elasticsearch/latest/guide/index.html[Micronaut Elasticsearch Integration]: Integration of Micronaut with Elasticsearch diff --git a/docs/plugins/mapper-annotated-text.asciidoc b/docs/plugins/mapper-annotated-text.asciidoc index afe8ba41da9b8..e4141e98a2285 100644 --- a/docs/plugins/mapper-annotated-text.asciidoc +++ b/docs/plugins/mapper-annotated-text.asciidoc @@ -143,7 +143,7 @@ broader positional queries e.g. finding mentions of a `Guitarist` near to `strat WARNING: Any use of `=` signs in annotation values eg `[Prince](person=Prince)` will cause the document to be rejected with a parse failure. In future we hope to have a use for -the equals signs so wil actively reject documents that contain this today. +the equals signs so will actively reject documents that contain this today. [[annotated-text-synthetic-source]] ===== Synthetic `_source` diff --git a/docs/plugins/store-smb.asciidoc b/docs/plugins/store-smb.asciidoc index 8557ef868010f..da803b4f42022 100644 --- a/docs/plugins/store-smb.asciidoc +++ b/docs/plugins/store-smb.asciidoc @@ -10,7 +10,7 @@ include::install_remove.asciidoc[] ==== Working around a bug in Windows SMB and Java on windows When using a shared file system based on the SMB protocol (like Azure File Service) to store indices, the way Lucene -open index segment files is with a write only flag. This is the _correct_ way to open the files, as they will only be +opens index segment files is with a write only flag. This is the _correct_ way to open the files, as they will only be used for writes and allows different FS implementations to optimize for it. Sadly, in windows with SMB, this disables the cache manager, causing writes to be slow. This has been described in https://issues.apache.org/jira/browse/LUCENE-6176[LUCENE-6176], but it affects each and every Java program out there!. @@ -44,7 +44,7 @@ This can be configured for all indices by adding this to the `elasticsearch.yml` index.store.type: smb_nio_fs ---- -Note that setting will be applied for newly created indices. +Note that settings will be applied for newly created indices. It can also be set on a per-index basis at index creation time: diff --git a/docs/reference/aggregations/bucket/composite-aggregation.asciidoc b/docs/reference/aggregations/bucket/composite-aggregation.asciidoc index 807ec93132d37..ded01237c23c8 100644 --- a/docs/reference/aggregations/bucket/composite-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/composite-aggregation.asciidoc @@ -156,7 +156,7 @@ GET /_search "type": "keyword", "script": """ emit(doc['timestamp'].value.dayOfWeekEnum - .getDisplayName(TextStyle.FULL, Locale.ROOT)) + .getDisplayName(TextStyle.FULL, Locale.ENGLISH)) """ } }, diff --git a/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc index 3511ec9e63b02..ef62f263a54a8 100644 --- a/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc +++ b/docs/reference/aggregations/bucket/datehistogram-aggregation.asciidoc @@ -582,7 +582,7 @@ For example, the offset of `+19d` will result in buckets with names like `2022-0 Increasing the offset to `+20d`, each document will appear in a bucket for the previous month, with all bucket keys ending with the same day of the month, as normal. -However, further increasing to `+28d`, +However, further increasing to `+28d`, what used to be a February bucket has now become `"2022-03-01"`. [source,console,id=datehistogram-aggregation-offset-example-28d] @@ -819,7 +819,7 @@ POST /sales/_search?size=0 "runtime_mappings": { "date.day_of_week": { "type": "keyword", - "script": "emit(doc['date'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))" + "script": "emit(doc['date'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" } }, "aggs": { diff --git a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc index 3efb8f6de9b3e..e37118019a55c 100644 --- a/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonym-graph-tokenfilter.asciidoc @@ -85,45 +85,45 @@ Additional settings are: <> search analyzers to pick up changes to synonym files. Only to be used for search analyzers. * `expand` (defaults to `true`). -* `lenient` (defaults to `false`). If `true` ignores exceptions while parsing the synonym configuration. It is important -to note that only those synonym rules which cannot get parsed are ignored. For instance consider the following request: - -[source,console] --------------------------------------------------- -PUT /test_index -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "synonym": { - "tokenizer": "standard", - "filter": [ "my_stop", "synonym_graph" ] - } - }, - "filter": { - "my_stop": { - "type": "stop", - "stopwords": [ "bar" ] - }, - "synonym_graph": { - "type": "synonym_graph", - "lenient": true, - "synonyms": [ "foo, bar => baz" ] - } - } - } - } - } -} --------------------------------------------------- +Expands definitions for equivalent synonym rules. +See <>. +* `lenient` (defaults to `false`). +If `true` ignores errors while parsing the synonym configuration. +It is important to note that only those synonym rules which cannot get parsed are ignored. +See <> for an example of `lenient` behaviour for invalid synonym rules. + +[discrete] +[[synonym-graph-tokenizer-expand-equivalent-synonyms]] +===== `expand` equivalent synonym rules + +The `expand` parameter controls whether to expand equivalent synonym rules. +Consider a synonym defined like: + +`foo, bar, baz` + +Using `expand: true`, the synonym rule would be expanded into: -With the above request the word `bar` gets skipped but a mapping `foo => baz` is still added. However, if the mapping -being added was `foo, baz => bar` nothing would get added to the synonym list. This is because the target word for the -mapping is itself eliminated because it was a stop word. Similarly, if the mapping was "bar, foo, baz" and `expand` was -set to `false` no mapping would get added as when `expand=false` the target mapping is the first word. However, if -`expand=true` then the mappings added would be equivalent to `foo, baz => foo, baz` i.e, all mappings other than the -stop word. +``` +foo => foo +foo => bar +foo => baz +bar => foo +bar => bar +bar => baz +baz => foo +baz => bar +baz => baz +``` + +When `expand` is set to `false`, the synonym rule is not expanded and the first synonym is treated as the canonical representation. The synonym would be equivalent to: + +``` +foo => foo +bar => foo +baz => foo +``` + +The `expand` parameter does not affect explicit synonym rules, like `foo, bar => baz`. [discrete] [[synonym-graph-tokenizer-ignore_case-deprecated]] @@ -160,12 +160,65 @@ Text will be processed first through filters preceding the synonym filter before {es} will also use the token filters preceding the synonym filter in a tokenizer chain to parse the entries in a synonym file or synonym set. In the above example, the synonyms graph token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. -The synonym rules should not contain words that are removed by a filter that appears later in the chain (like a `stop` filter). -Removing a term from a synonym rule means there will be no matching for it at query time. - Because entries in the synonym map cannot have stacked positions, some token filters may cause issues here. Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms. For example, `asciifolding` will only produce the folded version of the token. Others, like `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. If you need to build analyzers that include both multi-token filters and synonym filters, consider using the <> filter, with the multi-token filters in one branch and the synonym filter in the other. + +[discrete] +[[synonym-graph-tokenizer-stop-token-filter]] +===== Synonyms and `stop` token filters + +Synonyms and <> interact with each other in the following ways: + +[discrete] +====== Stop token filter *before* synonym token filter + +Stop words will be removed from the synonym rule definition. +This can can cause errors on the synonym rule. + +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + +For *explicit synonym rules* like `foo, bar => baz` with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the left hand side of the synonym rule. +- If `lenient` is set to `true`, the rule `foo => baz` will be added and `bar => baz` will be ignored. + +If the stop filter removed `baz` instead: + +- If `lenient` is set to `false`, an error will be raised as `baz` would be removed from the right hand side of the synonym rule. +- If `lenient` is set to `true`, the synonym will have no effect as the target word is removed. + +For *equivalent synonym rules* like `foo, bar, baz` and `expand: true, with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the synonym rule. +- If `lenient` is set to `true`, the synonyms added would be equivalent to the following synonym rules, which do not contain the removed word: + +``` +foo => foo +foo => baz +baz => foo +baz => baz +``` + +[discrete] +====== Stop token filter *after* synonym token filter + +The stop filter will remove the terms from the resulting synonym expansion. + +For example, a synonym rule like `foo, bar => baz` and a stop filter that removes `baz` will get no matches for `foo` or `bar`, as both would get expanded to `baz` which is removed by the stop filter. + +If the stop filter removed `foo` instead, then searching for `foo` would get expanded to `baz`, which is not removed by the stop filter thus potentially providing matches for `baz`. diff --git a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc b/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc index 046cd297b5092..1658f016db60b 100644 --- a/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonym-tokenfilter.asciidoc @@ -73,47 +73,45 @@ Additional settings are: <> search analyzers to pick up changes to synonym files. Only to be used for search analyzers. * `expand` (defaults to `true`). -* `lenient` (defaults to `false`). If `true` ignores exceptions while parsing the synonym configuration. It is important -to note that only those synonym rules which cannot get parsed are ignored. For instance consider the following request: - - -[source,console] --------------------------------------------------- -PUT /test_index -{ - "settings": { - "index": { - "analysis": { - "analyzer": { - "synonym": { - "tokenizer": "standard", - "filter": [ "my_stop", "synonym" ] - } - }, - "filter": { - "my_stop": { - "type": "stop", - "stopwords": [ "bar" ] - }, - "synonym": { - "type": "synonym", - "lenient": true, - "synonyms": [ "foo, bar => baz" ] - } - } - } - } - } -} --------------------------------------------------- +Expands definitions for equivalent synonym rules. +See <>. +* `lenient` (defaults to `false`). +If `true` ignores errors while parsing the synonym configuration. +It is important to note that only those synonym rules which cannot get parsed are ignored. +See <> for an example of `lenient` behaviour for invalid synonym rules. + +[discrete] +[[synonym-tokenizer-expand-equivalent-synonyms]] +===== `expand` equivalent synonym rules + +The `expand` parameter controls whether to expand equivalent synonym rules. +Consider a synonym defined like: + +`foo, bar, baz` + +Using `expand: true`, the synonym rule would be expanded into: -With the above request the word `bar` gets skipped but a mapping `foo => baz` is still added. However, if the mapping -being added was `foo, baz => bar` nothing would get added to the synonym list. This is because the target word for the -mapping is itself eliminated because it was a stop word. Similarly, if the mapping was "bar, foo, baz" and `expand` was -set to `false` no mapping would get added as when `expand=false` the target mapping is the first word. However, if -`expand=true` then the mappings added would be equivalent to `foo, baz => foo, baz` i.e, all mappings other than the -stop word. +``` +foo => foo +foo => bar +foo => baz +bar => foo +bar => bar +bar => baz +baz => foo +baz => bar +baz => baz +``` +When `expand` is set to `false`, the synonym rule is not expanded and the first synonym is treated as the canonical representation. The synonym would be equivalent to: + +``` +foo => foo +bar => foo +baz => foo +``` + +The `expand` parameter does not affect explicit synonym rules, like `foo, bar => baz`. [discrete] [[synonym-tokenizer-ignore_case-deprecated]] @@ -135,7 +133,7 @@ To apply synonyms, you will need to include a synonym token filters into an anal "my_analyzer": { "type": "custom", "tokenizer": "standard", - "filter": ["stemmer", "synonym_graph"] + "filter": ["stemmer", "synonym"] } } ---- @@ -148,10 +146,7 @@ Order is important for your token filters. Text will be processed first through filters preceding the synonym filter before being processed by the synonym filter. {es} will also use the token filters preceding the synonym filter in a tokenizer chain to parse the entries in a synonym file or synonym set. -In the above example, the synonyms graph token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. - -The synonym rules should not contain words that are removed by a filter that appears later in the chain (like a `stop` filter). -Removing a term from a synonym rule means there will be no matching for it at query time. +In the above example, the synonyms token filter is placed after a stemmer. The stemmer will also be applied to the synonym entries. Because entries in the synonym map cannot have stacked positions, some token filters may cause issues here. Token filters that produce multiple versions of a token may choose which version of the token to emit when parsing synonyms. @@ -159,3 +154,59 @@ For example, `asciifolding` will only produce the folded version of the token. Others, like `multiplexer`, `word_delimiter_graph` or `ngram` will throw an error. If you need to build analyzers that include both multi-token filters and synonym filters, consider using the <> filter, with the multi-token filters in one branch and the synonym filter in the other. + +[discrete] +[[synonym-tokenizer-stop-token-filter]] +===== Synonyms and `stop` token filters + +Synonyms and <> interact with each other in the following ways: + +[discrete] +====== Stop token filter *before* synonym token filter + +Stop words will be removed from the synonym rule definition. +This can can cause errors on the synonym rule. + +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + +For *explicit synonym rules* like `foo, bar => baz` with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the left hand side of the synonym rule. +- If `lenient` is set to `true`, the rule `foo => baz` will be added and `bar => baz` will be ignored. + +If the stop filter removed `baz` instead: + +- If `lenient` is set to `false`, an error will be raised as `baz` would be removed from the right hand side of the synonym rule. +- If `lenient` is set to `true`, the synonym will have no effect as the target word is removed. + +For *equivalent synonym rules* like `foo, bar, baz` and `expand: true, with a stop filter that removes `bar`: + +- If `lenient` is set to `false`, an error will be raised as `bar` would be removed from the synonym rule. +- If `lenient` is set to `true`, the synonyms added would be equivalent to the following synonym rules, which do not contain the removed word: + +``` +foo => foo +foo => baz +baz => foo +baz => baz +``` + +[discrete] +====== Stop token filter *after* synonym token filter + +The stop filter will remove the terms from the resulting synonym expansion. + +For example, a synonym rule like `foo, bar => baz` and a stop filter that removes `baz` will get no matches for `foo` or `bar`, as both would get expanded to `baz` which is removed by the stop filter. + +If the stop filter removed `foo` instead, then searching for `foo` would get expanded to `baz`, which is not removed by the stop filter thus potentially providing matches for `baz`. diff --git a/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc b/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc index 63dd72dade8d0..e780c24963312 100644 --- a/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc +++ b/docs/reference/analysis/tokenfilters/synonyms-format.asciidoc @@ -15,7 +15,7 @@ This format uses two different definitions: ipod, i-pod, i pod computer, pc, laptop ---- -* Explicit mappings: Matches a group of words to other words. Words on the left hand side of the rule definition are expanded into all the possibilities described on the right hand side. Example: +* Explicit synonyms: Matches a group of words to other words. Words on the left hand side of the rule definition are expanded into all the possibilities described on the right hand side. Example: + [source,synonyms] ---- diff --git a/docs/reference/api-conventions.asciidoc b/docs/reference/api-conventions.asciidoc index 25881b707d724..f8d925945401e 100644 --- a/docs/reference/api-conventions.asciidoc +++ b/docs/reference/api-conventions.asciidoc @@ -334,6 +334,7 @@ All REST API parameters (both request parameters and JSON body) support providing boolean "false" as the value `false` and boolean "true" as the value `true`. All other values will raise an error. +[[api-conventions-number-values]] [discrete] === Number Values diff --git a/docs/reference/autoscaling/apis/autoscaling-apis.asciidoc b/docs/reference/autoscaling/apis/autoscaling-apis.asciidoc index 090eda5ef5436..e4da2c45ee978 100644 --- a/docs/reference/autoscaling/apis/autoscaling-apis.asciidoc +++ b/docs/reference/autoscaling/apis/autoscaling-apis.asciidoc @@ -4,7 +4,7 @@ NOTE: {cloud-only} -You can use the following APIs to perform autoscaling operations. +You can use the following APIs to perform {cloud}/ec-autoscaling.html[autoscaling operations]. [discrete] [[autoscaling-api-top-level]] diff --git a/docs/reference/autoscaling/apis/delete-autoscaling-policy.asciidoc b/docs/reference/autoscaling/apis/delete-autoscaling-policy.asciidoc index 608b7bd7cb903..190428485a003 100644 --- a/docs/reference/autoscaling/apis/delete-autoscaling-policy.asciidoc +++ b/docs/reference/autoscaling/apis/delete-autoscaling-policy.asciidoc @@ -7,7 +7,7 @@ NOTE: {cloud-only} -Delete autoscaling policy. +Delete {cloud}/ec-autoscaling.html[autoscaling] policy. [[autoscaling-delete-autoscaling-policy-request]] ==== {api-request-title} diff --git a/docs/reference/autoscaling/apis/get-autoscaling-capacity.asciidoc b/docs/reference/autoscaling/apis/get-autoscaling-capacity.asciidoc index 05724b9c48b6e..d635d8c8f7bd0 100644 --- a/docs/reference/autoscaling/apis/get-autoscaling-capacity.asciidoc +++ b/docs/reference/autoscaling/apis/get-autoscaling-capacity.asciidoc @@ -7,7 +7,7 @@ NOTE: {cloud-only} -Get autoscaling capacity. +Get {cloud}/ec-autoscaling.html[autoscaling] capacity. [[autoscaling-get-autoscaling-capacity-request]] ==== {api-request-title} diff --git a/docs/reference/autoscaling/apis/get-autoscaling-policy.asciidoc b/docs/reference/autoscaling/apis/get-autoscaling-policy.asciidoc index ad00d69d1aeb2..973eedcb361c9 100644 --- a/docs/reference/autoscaling/apis/get-autoscaling-policy.asciidoc +++ b/docs/reference/autoscaling/apis/get-autoscaling-policy.asciidoc @@ -7,7 +7,7 @@ NOTE: {cloud-only} -Get autoscaling policy. +Get {cloud}/ec-autoscaling.html[autoscaling] policy. [[autoscaling-get-autoscaling-policy-request]] ==== {api-request-title} diff --git a/docs/reference/autoscaling/apis/put-autoscaling-policy.asciidoc b/docs/reference/autoscaling/apis/put-autoscaling-policy.asciidoc index ff79def51ebb9..e564f83411eb4 100644 --- a/docs/reference/autoscaling/apis/put-autoscaling-policy.asciidoc +++ b/docs/reference/autoscaling/apis/put-autoscaling-policy.asciidoc @@ -7,7 +7,7 @@ NOTE: {cloud-only} -Creates or updates an autoscaling policy. +Creates or updates an {cloud}/ec-autoscaling.html[autoscaling] policy. [[autoscaling-put-autoscaling-policy-request]] ==== {api-request-title} diff --git a/docs/reference/autoscaling/deciders/fixed-decider.asciidoc b/docs/reference/autoscaling/deciders/fixed-decider.asciidoc index c46d1dffe2cc8..5a8b009d9f063 100644 --- a/docs/reference/autoscaling/deciders/fixed-decider.asciidoc +++ b/docs/reference/autoscaling/deciders/fixed-decider.asciidoc @@ -6,7 +6,7 @@ experimental[] [WARNING] The fixed decider is intended for testing only. Do not use this decider in production. -The `fixed` decider responds with a fixed required capacity. It is not enabled +The {cloud}/ec-autoscaling.html[autoscaling] `fixed` decider responds with a fixed required capacity. It is not enabled by default but can be enabled for any policy by explicitly configuring it. ==== Configuration settings diff --git a/docs/reference/autoscaling/deciders/frozen-existence-decider.asciidoc b/docs/reference/autoscaling/deciders/frozen-existence-decider.asciidoc index 832cf330053aa..0fc9ad444a213 100644 --- a/docs/reference/autoscaling/deciders/frozen-existence-decider.asciidoc +++ b/docs/reference/autoscaling/deciders/frozen-existence-decider.asciidoc @@ -2,7 +2,7 @@ [[autoscaling-frozen-existence-decider]] === Frozen existence decider -The frozen existence decider (`frozen_existence`) ensures that once the first +The {cloud}/ec-autoscaling.html[autoscaling] frozen existence decider (`frozen_existence`) ensures that once the first index enters the frozen ILM phase, the frozen tier is scaled into existence. The frozen existence decider is enabled for all policies governing frozen data diff --git a/docs/reference/autoscaling/deciders/frozen-shards-decider.asciidoc b/docs/reference/autoscaling/deciders/frozen-shards-decider.asciidoc index ab11da04c8642..1977f95797ef0 100644 --- a/docs/reference/autoscaling/deciders/frozen-shards-decider.asciidoc +++ b/docs/reference/autoscaling/deciders/frozen-shards-decider.asciidoc @@ -2,7 +2,7 @@ [[autoscaling-frozen-shards-decider]] === Frozen shards decider -The frozen shards decider (`frozen_shards`) calculates the memory required to search +The {cloud}/ec-autoscaling.html[autoscaling] frozen shards decider (`frozen_shards`) calculates the memory required to search the current set of partially mounted indices in the frozen tier. Based on a required memory amount per shard, it calculates the necessary memory in the frozen tier. diff --git a/docs/reference/autoscaling/deciders/frozen-storage-decider.asciidoc b/docs/reference/autoscaling/deciders/frozen-storage-decider.asciidoc index 5a10f31f1365b..3a8e7cdb518b3 100644 --- a/docs/reference/autoscaling/deciders/frozen-storage-decider.asciidoc +++ b/docs/reference/autoscaling/deciders/frozen-storage-decider.asciidoc @@ -2,7 +2,7 @@ [[autoscaling-frozen-storage-decider]] === Frozen storage decider -The frozen storage decider (`frozen_storage`) calculates the local storage +The {cloud}/ec-autoscaling.html[autoscaling] frozen storage decider (`frozen_storage`) calculates the local storage required to search the current set of partially mounted indices based on a percentage of the total data set size of such indices. It signals that additional storage capacity is necessary when existing capacity is less than the diff --git a/docs/reference/autoscaling/deciders/machine-learning-decider.asciidoc b/docs/reference/autoscaling/deciders/machine-learning-decider.asciidoc index 26ced6ad7bb26..5432d96a47edb 100644 --- a/docs/reference/autoscaling/deciders/machine-learning-decider.asciidoc +++ b/docs/reference/autoscaling/deciders/machine-learning-decider.asciidoc @@ -2,7 +2,7 @@ [[autoscaling-machine-learning-decider]] === Machine learning decider -The {ml} decider (`ml`) calculates the memory and CPU requirements to run {ml} +The {cloud}/ec-autoscaling.html[autoscaling] {ml} decider (`ml`) calculates the memory and CPU requirements to run {ml} jobs and trained models. The {ml} decider is enabled for policies governing `ml` nodes. diff --git a/docs/reference/autoscaling/deciders/proactive-storage-decider.asciidoc b/docs/reference/autoscaling/deciders/proactive-storage-decider.asciidoc index 763f1de96f6b9..33c989f3b12eb 100644 --- a/docs/reference/autoscaling/deciders/proactive-storage-decider.asciidoc +++ b/docs/reference/autoscaling/deciders/proactive-storage-decider.asciidoc @@ -2,7 +2,7 @@ [[autoscaling-proactive-storage-decider]] === Proactive storage decider -The proactive storage decider (`proactive_storage`) calculates the storage required to contain +The {cloud}/ec-autoscaling.html[autoscaling] proactive storage decider (`proactive_storage`) calculates the storage required to contain the current data set plus an estimated amount of expected additional data. The proactive storage decider is enabled for all policies governing nodes with the `data_hot` role. diff --git a/docs/reference/autoscaling/deciders/reactive-storage-decider.asciidoc b/docs/reference/autoscaling/deciders/reactive-storage-decider.asciidoc index 50897178a88de..7c38df75169fd 100644 --- a/docs/reference/autoscaling/deciders/reactive-storage-decider.asciidoc +++ b/docs/reference/autoscaling/deciders/reactive-storage-decider.asciidoc @@ -2,7 +2,7 @@ [[autoscaling-reactive-storage-decider]] === Reactive storage decider -The reactive storage decider (`reactive_storage`) calculates the storage required to contain +The {cloud}/ec-autoscaling.html[autoscaling] reactive storage decider (`reactive_storage`) calculates the storage required to contain the current data set. It signals that additional storage capacity is necessary when existing capacity has been exceeded (reactively). diff --git a/docs/reference/autoscaling/index.asciidoc b/docs/reference/autoscaling/index.asciidoc index fbf1a9536973e..e70c464889419 100644 --- a/docs/reference/autoscaling/index.asciidoc +++ b/docs/reference/autoscaling/index.asciidoc @@ -4,7 +4,7 @@ NOTE: {cloud-only} -The autoscaling feature enables an operator to configure tiers of nodes that +The {cloud}/ec-autoscaling.html[autoscaling] feature enables an operator to configure tiers of nodes that self-monitor whether or not they need to scale based on an operator-defined policy. Then, via the autoscaling API, an Elasticsearch cluster can report whether or not it needs additional resources to meet the policy. For example, an diff --git a/docs/reference/behavioral-analytics/apis/delete-analytics-collection.asciidoc b/docs/reference/behavioral-analytics/apis/delete-analytics-collection.asciidoc index 9b15bcca3fc85..a6894a933b460 100644 --- a/docs/reference/behavioral-analytics/apis/delete-analytics-collection.asciidoc +++ b/docs/reference/behavioral-analytics/apis/delete-analytics-collection.asciidoc @@ -17,7 +17,7 @@ PUT _application/analytics/my_analytics_collection //// -Removes an Analytics Collection and its associated data stream. +Removes a <> Collection and its associated data stream. [[delete-analytics-collection-request]] ==== {api-request-title} diff --git a/docs/reference/behavioral-analytics/apis/index.asciidoc b/docs/reference/behavioral-analytics/apis/index.asciidoc index 042b50259b1bb..692d3374f89f5 100644 --- a/docs/reference/behavioral-analytics/apis/index.asciidoc +++ b/docs/reference/behavioral-analytics/apis/index.asciidoc @@ -9,7 +9,7 @@ beta::[] --- -Use the following APIs to manage tasks and resources related to Behavioral Analytics: +Use the following APIs to manage tasks and resources related to <>: * <> * <> diff --git a/docs/reference/behavioral-analytics/apis/list-analytics-collection.asciidoc b/docs/reference/behavioral-analytics/apis/list-analytics-collection.asciidoc index 8d2491ff8a6ee..14511a1258278 100644 --- a/docs/reference/behavioral-analytics/apis/list-analytics-collection.asciidoc +++ b/docs/reference/behavioral-analytics/apis/list-analytics-collection.asciidoc @@ -24,7 +24,7 @@ DELETE _application/analytics/my_analytics_collection2 // TEARDOWN //// -Returns information about Analytics Collections. +Returns information about <> Collections. [[list-analytics-collection-request]] ==== {api-request-title} diff --git a/docs/reference/behavioral-analytics/apis/post-analytics-collection-event.asciidoc b/docs/reference/behavioral-analytics/apis/post-analytics-collection-event.asciidoc index 84d9cb5351799..f82717e22ed34 100644 --- a/docs/reference/behavioral-analytics/apis/post-analytics-collection-event.asciidoc +++ b/docs/reference/behavioral-analytics/apis/post-analytics-collection-event.asciidoc @@ -22,7 +22,7 @@ DELETE _application/analytics/my_analytics_collection // TEARDOWN //// -Post an event to an Analytics Collection. +Post an event to a <> Collection. [[post-analytics-collection-event-request]] ==== {api-request-title} diff --git a/docs/reference/behavioral-analytics/apis/put-analytics-collection.asciidoc b/docs/reference/behavioral-analytics/apis/put-analytics-collection.asciidoc index 48273fb3906c4..cbbab2ae3e26c 100644 --- a/docs/reference/behavioral-analytics/apis/put-analytics-collection.asciidoc +++ b/docs/reference/behavioral-analytics/apis/put-analytics-collection.asciidoc @@ -16,7 +16,7 @@ DELETE _application/analytics/my_analytics_collection // TEARDOWN //// -Creates an Analytics Collection. +Creates a <> Collection. [[put-analytics-collection-request]] ==== {api-request-title} diff --git a/docs/reference/cat/component-templates.asciidoc b/docs/reference/cat/component-templates.asciidoc index 596c86befd1b7..f87f400bb8885 100644 --- a/docs/reference/cat/component-templates.asciidoc +++ b/docs/reference/cat/component-templates.asciidoc @@ -6,8 +6,8 @@ [IMPORTANT] ==== -cat APIs are only intended for human consumption using the command line or {kib} -console. They are _not_ intended for use by applications. For application +cat APIs are only intended for human consumption using the command line or {kib} +console. They are _not_ intended for use by applications. For application consumption, use the <>. ==== @@ -82,8 +82,7 @@ PUT _component_template/my-template-2 "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } } diff --git a/docs/reference/cat/nodes.asciidoc b/docs/reference/cat/nodes.asciidoc index fc5b01f9234e3..5f329c00efd7f 100644 --- a/docs/reference/cat/nodes.asciidoc +++ b/docs/reference/cat/nodes.asciidoc @@ -50,16 +50,16 @@ Valid columns are: (Default) IP address, such as `127.0.1.1`. `heap.percent`, `hp`, `heapPercent`:: -(Default) Maximum configured heap, such as `7`. +(Default) Used percentage of total allocated Elasticsearch JVM heap, such as `7`. This reflects only the {es} process running within the operating system and is the most direct indicator of its JVM/heap/memory resource performance. `heap.max`, `hm`, `heapMax`:: -(Default) Total heap, such as `4gb`. +Total heap, such as `4gb`. `ram.percent`, `rp`, `ramPercent`:: -(Default) Used total memory percentage, such as `47`. +(Default) Used percentage of total operating system's memory, such as `47`. This reflects all processes running on operating system instead of only {es} and is not guaranteed to correlate to its performance. `file_desc.percent`, `fdp`, `fileDescriptorPercent`:: -(Default) Used file descriptors percentage, such as `1`. +Used file descriptors percentage, such as `1`. `node.role`, `r`, `role`, `nodeRole`:: (Default) Roles of the node. @@ -138,16 +138,16 @@ Used file descriptors, such as `123`. Maximum number of file descriptors, such as `1024`. `cpu`:: -Recent system CPU usage as percent, such as `12`. +(Default) Recent system CPU usage as percent, such as `12`. `load_1m`, `l`:: -Most recent load average, such as `0.22`. +(Default) Most recent load average, such as `0.22`. `load_5m`, `l`:: -Load average for the last five minutes, such as `0.78`. +(Default) Load average for the last five minutes, such as `0.78`. `load_15m`, `l`:: -Load average for the last fifteen minutes, such as `1.24`. +(Default) Load average for the last fifteen minutes, such as `1.24`. `uptime`, `u`:: Node uptime, such as `17.3m`. diff --git a/docs/reference/cat/recovery.asciidoc b/docs/reference/cat/recovery.asciidoc index 058f4e69ae8e3..c3292fc9971ee 100644 --- a/docs/reference/cat/recovery.asciidoc +++ b/docs/reference/cat/recovery.asciidoc @@ -39,7 +39,7 @@ The cat recovery API returns information about shard recoveries, both ongoing and completed. It is a more compact view of the JSON <> API. -include::{es-ref-dir}/indices/recovery.asciidoc[tag=shard-recovery-desc] +include::{es-ref-dir}/modules/shard-recovery-desc.asciidoc[] [[cat-recovery-path-params]] diff --git a/docs/reference/ccr/apis/auto-follow/delete-auto-follow-pattern.asciidoc b/docs/reference/ccr/apis/auto-follow/delete-auto-follow-pattern.asciidoc index 1c72fb8742b93..b510163bab50b 100644 --- a/docs/reference/ccr/apis/auto-follow/delete-auto-follow-pattern.asciidoc +++ b/docs/reference/ccr/apis/auto-follow/delete-auto-follow-pattern.asciidoc @@ -5,7 +5,7 @@ Delete auto-follow pattern ++++ -Delete auto-follow patterns. +Delete {ccr} <>. [[ccr-delete-auto-follow-pattern-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/auto-follow/get-auto-follow-pattern.asciidoc b/docs/reference/ccr/apis/auto-follow/get-auto-follow-pattern.asciidoc index 46ef288b05088..a2969e993ddfb 100644 --- a/docs/reference/ccr/apis/auto-follow/get-auto-follow-pattern.asciidoc +++ b/docs/reference/ccr/apis/auto-follow/get-auto-follow-pattern.asciidoc @@ -5,7 +5,7 @@ Get auto-follow pattern ++++ -Get auto-follow patterns. +Get {ccr} <>. [[ccr-get-auto-follow-pattern-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/auto-follow/pause-auto-follow-pattern.asciidoc b/docs/reference/ccr/apis/auto-follow/pause-auto-follow-pattern.asciidoc index 1e64ab813e2ad..c5ae5a7b4af9d 100644 --- a/docs/reference/ccr/apis/auto-follow/pause-auto-follow-pattern.asciidoc +++ b/docs/reference/ccr/apis/auto-follow/pause-auto-follow-pattern.asciidoc @@ -5,7 +5,7 @@ Pause auto-follow pattern ++++ -Pauses an auto-follow pattern. +Pauses a {ccr} <>. [[ccr-pause-auto-follow-pattern-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/auto-follow/put-auto-follow-pattern.asciidoc b/docs/reference/ccr/apis/auto-follow/put-auto-follow-pattern.asciidoc index d08997068f705..6769f21ca5cef 100644 --- a/docs/reference/ccr/apis/auto-follow/put-auto-follow-pattern.asciidoc +++ b/docs/reference/ccr/apis/auto-follow/put-auto-follow-pattern.asciidoc @@ -5,7 +5,7 @@ Create auto-follow pattern ++++ -Creates an auto-follow pattern. +Creates a {ccr} <>. [[ccr-put-auto-follow-pattern-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/auto-follow/resume-auto-follow-pattern.asciidoc b/docs/reference/ccr/apis/auto-follow/resume-auto-follow-pattern.asciidoc index 04da9b4a35ba0..a580bb3838f9b 100644 --- a/docs/reference/ccr/apis/auto-follow/resume-auto-follow-pattern.asciidoc +++ b/docs/reference/ccr/apis/auto-follow/resume-auto-follow-pattern.asciidoc @@ -5,7 +5,7 @@ Resume auto-follow pattern ++++ -Resumes an auto-follow pattern. +Resumes a {ccr} <>. [[ccr-resume-auto-follow-pattern-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/ccr-apis.asciidoc b/docs/reference/ccr/apis/ccr-apis.asciidoc index 0c9f033639eda..ae94e1931af85 100644 --- a/docs/reference/ccr/apis/ccr-apis.asciidoc +++ b/docs/reference/ccr/apis/ccr-apis.asciidoc @@ -2,7 +2,7 @@ [[ccr-apis]] == {ccr-cap} APIs -You can use the following APIs to perform {ccr} operations. +You can use the following APIs to perform <> operations. [discrete] [[ccr-api-top-level]] diff --git a/docs/reference/ccr/apis/follow/get-follow-info.asciidoc b/docs/reference/ccr/apis/follow/get-follow-info.asciidoc index 68fd6e210f884..6c049d9c92b59 100644 --- a/docs/reference/ccr/apis/follow/get-follow-info.asciidoc +++ b/docs/reference/ccr/apis/follow/get-follow-info.asciidoc @@ -5,7 +5,7 @@ Get follower info ++++ -Retrieves information about all follower indices. +Retrieves information about all <> follower indices. [[ccr-get-follow-info-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/follow/get-follow-stats.asciidoc b/docs/reference/ccr/apis/follow/get-follow-stats.asciidoc index 72224cc7f51f4..4892f86b3523d 100644 --- a/docs/reference/ccr/apis/follow/get-follow-stats.asciidoc +++ b/docs/reference/ccr/apis/follow/get-follow-stats.asciidoc @@ -5,7 +5,7 @@ Get follower stats ++++ -Get follower stats. +Get <> follower stats. [[ccr-get-follow-stats-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/follow/post-forget-follower.asciidoc b/docs/reference/ccr/apis/follow/post-forget-follower.asciidoc index ea7e8640056bf..1917c08d6640d 100644 --- a/docs/reference/ccr/apis/follow/post-forget-follower.asciidoc +++ b/docs/reference/ccr/apis/follow/post-forget-follower.asciidoc @@ -5,7 +5,7 @@ Forget follower ++++ -Removes the follower retention leases from the leader. +Removes the <> follower retention leases from the leader. [[ccr-post-forget-follower-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/follow/post-pause-follow.asciidoc b/docs/reference/ccr/apis/follow/post-pause-follow.asciidoc index a4ab69aba8d84..6d4730d10efe6 100644 --- a/docs/reference/ccr/apis/follow/post-pause-follow.asciidoc +++ b/docs/reference/ccr/apis/follow/post-pause-follow.asciidoc @@ -5,7 +5,7 @@ Pause follower ++++ -Pauses a follower index. +Pauses a <> follower index. [[ccr-post-pause-follow-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/follow/post-resume-follow.asciidoc b/docs/reference/ccr/apis/follow/post-resume-follow.asciidoc index 47ba51a3fb8a0..b023a8cb5cb70 100644 --- a/docs/reference/ccr/apis/follow/post-resume-follow.asciidoc +++ b/docs/reference/ccr/apis/follow/post-resume-follow.asciidoc @@ -5,7 +5,7 @@ Resume follower ++++ -Resumes a follower index. +Resumes a <> follower index. [[ccr-post-resume-follow-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/follow/post-unfollow.asciidoc b/docs/reference/ccr/apis/follow/post-unfollow.asciidoc index b96777b455d3b..dab11ef9e7a54 100644 --- a/docs/reference/ccr/apis/follow/post-unfollow.asciidoc +++ b/docs/reference/ccr/apis/follow/post-unfollow.asciidoc @@ -5,7 +5,7 @@ Unfollow ++++ -Converts a follower index to a regular index. +Converts a <> follower index to a regular index. [[ccr-post-unfollow-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/follow/put-follow.asciidoc b/docs/reference/ccr/apis/follow/put-follow.asciidoc index eb83e2a13dcf1..b7ae9ac987474 100644 --- a/docs/reference/ccr/apis/follow/put-follow.asciidoc +++ b/docs/reference/ccr/apis/follow/put-follow.asciidoc @@ -5,7 +5,7 @@ Create follower ++++ -Creates a follower index. +Creates a <> follower index. [[ccr-put-follow-request]] ==== {api-request-title} diff --git a/docs/reference/ccr/apis/get-ccr-stats.asciidoc b/docs/reference/ccr/apis/get-ccr-stats.asciidoc index 128df5e47c777..92e6bae0bdce8 100644 --- a/docs/reference/ccr/apis/get-ccr-stats.asciidoc +++ b/docs/reference/ccr/apis/get-ccr-stats.asciidoc @@ -6,7 +6,7 @@ Get {ccr-init} stats ++++ -Get {ccr} stats. +Get <> stats. [[ccr-get-stats-request]] ==== {api-request-title} diff --git a/docs/reference/cluster/allocation-explain.asciidoc b/docs/reference/cluster/allocation-explain.asciidoc index 0b0fde6546c29..7547dd74c5ecd 100644 --- a/docs/reference/cluster/allocation-explain.asciidoc +++ b/docs/reference/cluster/allocation-explain.asciidoc @@ -4,7 +4,7 @@ Cluster allocation explain ++++ -Provides an explanation for a shard's current allocation. +Provides an explanation for a shard's current <>. [source,console] ---- @@ -81,6 +81,7 @@ you might expect otherwise. ===== Unassigned primary shard +====== Conflicting settings The following request gets an allocation explanation for an unassigned primary shard. @@ -158,6 +159,56 @@ node. <5> The decider which led to the `no` decision for the node. <6> An explanation as to why the decider returned a `no` decision, with a helpful hint pointing to the setting that led to the decision. In this example, a newly created index has <> that requires that it only be allocated to a node named `nonexistent_node`, which does not exist, so the index is unable to allocate. +====== Maximum number of retries exceeded + +The following response contains an allocation explanation for an unassigned +primary shard that has reached the maximum number of allocation retry attempts. + +[source,js] +---- +{ + "index" : "my-index-000001", + "shard" : 0, + "primary" : true, + "current_state" : "unassigned", + "unassigned_info" : { + "at" : "2017-01-04T18:03:28.464Z", + "failed shard on node [mEKjwwzLT1yJVb8UxT6anw]: failed recovery, failure RecoveryFailedException", + "reason": "ALLOCATION_FAILED", + "failed_allocation_attempts": 5, + "last_allocation_status": "no", + }, + "can_allocate": "no", + "allocate_explanation": "cannot allocate because allocation is not permitted to any of the nodes", + "node_allocation_decisions" : [ + { + "node_id" : "3sULLVJrRneSg0EfBB-2Ew", + "node_name" : "node_t0", + "transport_address" : "127.0.0.1:9400", + "roles" : ["data_content", "data_hot"], + "node_decision" : "no", + "store" : { + "matching_size" : "4.2kb", + "matching_size_in_bytes" : 4325 + }, + "deciders" : [ + { + "decider": "max_retry", + "decision" : "NO", + "explanation": "shard has exceeded the maximum number of retries [5] on failed allocation attempts - manually call [/_cluster/reroute?retry_failed=true] to retry, [unassigned_info[[reason=ALLOCATION_FAILED], at[2024-07-30T21:04:12.166Z], failed_attempts[5], failed_nodes[[mEKjwwzLT1yJVb8UxT6anw]], delayed=false, details[failed shard on node [mEKjwwzLT1yJVb8UxT6anw]: failed recovery, failure RecoveryFailedException], allocation_status[deciders_no]]]" + } + ] + } + ] +} +---- +// NOTCONSOLE + +If decider message indicates a transient allocation issue, use +<> to retry allocation. + +====== No valid shard copy + The following response contains an allocation explanation for an unassigned primary shard that was previously allocated. @@ -184,6 +235,8 @@ TIP: If a shard is unassigned with an allocation status of `no_valid_shard_copy` ===== Unassigned replica shard +====== Allocation delayed + The following response contains an allocation explanation for a replica that's unassigned due to <>. @@ -241,8 +294,52 @@ unassigned due to <>. <2> The remaining delay before allocating the replica shard. <3> Information about the shard data found on a node. +====== Allocation throttled + +The following response contains an allocation explanation for a replica that's +queued to allocate but currently waiting on other queued shards. + +[source,js] +---- +{ + "index" : "my-index-000001", + "shard" : 0, + "primary" : false, + "current_state" : "unassigned", + "unassigned_info" : { + "reason" : "NODE_LEFT", + "at" : "2017-01-04T18:53:59.498Z", + "details" : "node_left[G92ZwuuaRY-9n8_tc-IzEg]", + "last_allocation_status" : "no_attempt" + }, + "can_allocate": "throttled", + "allocate_explanation": "Elasticsearch is currently busy with other activities. It expects to be able to allocate this shard when those activities finish. Please wait.", + "node_allocation_decisions" : [ + { + "node_id" : "3sULLVJrRneSg0EfBB-2Ew", + "node_name" : "node_t0", + "transport_address" : "127.0.0.1:9400", + "roles" : ["data_content", "data_hot"], + "node_decision" : "no", + "deciders" : [ + { + "decider": "throttling", + "decision": "THROTTLE", + "explanation": "reached the limit of incoming shard recoveries [2], cluster setting [cluster.routing.allocation.node_concurrent_incoming_recoveries=2] (can also be set via [cluster.routing.allocation.node_concurrent_recoveries])" + } + ] + } + ] +} +---- +// NOTCONSOLE + +This is a transient message that might appear when a large amount of shards are allocating. + ===== Assigned shard +====== Cannot remain on current node + The following response contains an allocation explanation for an assigned shard. The response indicates the shard is not allowed to remain on its current node and must be reallocated. @@ -295,6 +392,8 @@ and must be reallocated. <2> The deciders that factored into the decision of why the shard is not allowed to remain on its current node. <3> Whether the shard is allowed to be allocated to another node. +====== Must remain on current node + The following response contains an allocation explanation for a shard that must remain on its current node. Moving the shard to another node would not improve cluster balance. @@ -338,7 +437,7 @@ cluster balance. ===== No arguments If you call the API with no arguments, {es} retrieves an allocation explanation -for an arbitrary unassigned primary or replica shard. +for an arbitrary unassigned primary or replica shard, returning any unassigned primary shards first. [source,console] ---- diff --git a/docs/reference/cluster/delete-desired-balance.asciidoc b/docs/reference/cluster/delete-desired-balance.asciidoc index f81dcab011da4..c67834269e505 100644 --- a/docs/reference/cluster/delete-desired-balance.asciidoc +++ b/docs/reference/cluster/delete-desired-balance.asciidoc @@ -6,7 +6,7 @@ NOTE: {cloud-only} -Discards the current desired balance and computes a new desired balance starting from the current allocation of shards. +Discards the current <> and computes a new desired balance starting from the current allocation of shards. This can sometimes help {es} find a desired balance which needs fewer shard movements to achieve, especially if the cluster has experienced changes so substantial that the current desired balance is no longer optimal without {es} having detected that the current desired balance will take more shard movements to achieve than needed. However, this API diff --git a/docs/reference/cluster/get-desired-balance.asciidoc b/docs/reference/cluster/get-desired-balance.asciidoc index 3fd87dcfedc4f..74afdaa52daf1 100644 --- a/docs/reference/cluster/get-desired-balance.asciidoc +++ b/docs/reference/cluster/get-desired-balance.asciidoc @@ -8,7 +8,7 @@ NOTE: {cloud-only} Exposes: -* the desired balance computation and reconciliation stats +* the <> computation and reconciliation stats * balancing stats such as distribution of shards, disk and ingest forecasts across nodes and data tiers (based on the current cluster state) * routing table with each shard current and desired location diff --git a/docs/reference/commands/cli-jvm-options.asciidoc b/docs/reference/commands/cli-jvm-options.asciidoc index 546884f428c12..0428ead60b626 100644 --- a/docs/reference/commands/cli-jvm-options.asciidoc +++ b/docs/reference/commands/cli-jvm-options.asciidoc @@ -3,7 +3,7 @@ ==== JVM options CLI tools run with 64MB of heap. For most tools, this value is fine. However, if -needed this can be overriden by setting the `CLI_JAVA_OPTS` environment variable. +needed this can be overridden by setting the `CLI_JAVA_OPTS` environment variable. For example, the following increases the heap size used by the `pass:a[elasticsearch-{tool-name}]` tool to 1GB. diff --git a/docs/reference/connector/apis/connector-apis.asciidoc b/docs/reference/connector/apis/connector-apis.asciidoc index 41186ff6326f2..d0f30e57744cd 100644 --- a/docs/reference/connector/apis/connector-apis.asciidoc +++ b/docs/reference/connector/apis/connector-apis.asciidoc @@ -82,7 +82,7 @@ beta:[] preview::[] -*Connector Service APIs* are a subset of Connector API endpoints, that represent framework-level operations defined in the https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md[Connector Protocol]. These APIs are not intended for direct connector management by users but are there to support the implementation of services that utilize the Conector Protocol to communicate with {es}. +*Connector Service APIs* are a subset of Connector API endpoints, that represent framework-level operations defined in the https://github.com/elastic/connectors/blob/main/docs/CONNECTOR_PROTOCOL.md[Connector Protocol]. These APIs are not intended for direct connector management by users but are there to support the implementation of services that utilize the Connector Protocol to communicate with {es}. [TIP] ==== diff --git a/docs/reference/connector/apis/create-connector-api.asciidoc b/docs/reference/connector/apis/create-connector-api.asciidoc index 9bd49a3c5ef94..9564060cb07cb 100644 --- a/docs/reference/connector/apis/create-connector-api.asciidoc +++ b/docs/reference/connector/apis/create-connector-api.asciidoc @@ -115,7 +115,7 @@ PUT _connector/my-connector "name": "My Connector", "description": "My Connector to sync data to Elastic index from Google Drive", "service_type": "google_drive", - "language": "english" + "language": "en" } ---- diff --git a/docs/reference/data-streams/change-mappings-and-settings.asciidoc b/docs/reference/data-streams/change-mappings-and-settings.asciidoc index 076b315558b60..1290f289e5bbd 100644 --- a/docs/reference/data-streams/change-mappings-and-settings.asciidoc +++ b/docs/reference/data-streams/change-mappings-and-settings.asciidoc @@ -5,7 +5,7 @@ [[data-streams-change-mappings-and-settings]] === Change mappings and settings for a data stream -Each data stream has a <> has a <>. Mappings and index settings from this template are applied to new backing indices created for the stream. This includes the stream's first backing index, which is auto-generated when the stream is created. diff --git a/docs/reference/data-streams/data-streams.asciidoc b/docs/reference/data-streams/data-streams.asciidoc index 9c7137563caef..1484e21febdb3 100644 --- a/docs/reference/data-streams/data-streams.asciidoc +++ b/docs/reference/data-streams/data-streams.asciidoc @@ -157,4 +157,5 @@ include::set-up-a-data-stream.asciidoc[] include::use-a-data-stream.asciidoc[] include::change-mappings-and-settings.asciidoc[] include::tsds.asciidoc[] +include::logs.asciidoc[] include::lifecycle/index.asciidoc[] diff --git a/docs/reference/data-streams/downsampling-manual.asciidoc b/docs/reference/data-streams/downsampling-manual.asciidoc index 771a08d97d949..44ae77d072034 100644 --- a/docs/reference/data-streams/downsampling-manual.asciidoc +++ b/docs/reference/data-streams/downsampling-manual.asciidoc @@ -14,7 +14,7 @@ DELETE _ingest/pipeline/my-timestamp-pipeline // TEARDOWN //// -The recommended way to downsample a time series data stream (TSDS) is +The recommended way to <> a <> is <>. However, if you're not using ILM, you can downsample a TSDS manually. This guide shows you how, using typical Kubernetes cluster monitoring data. @@ -32,7 +32,7 @@ To test out manual downsampling, follow these steps: ==== Prerequisites * Refer to the <>. -* It is not possible to downsample a data stream directly, nor +* It is not possible to downsample a <> directly, nor multiple indices at once. It's only possible to downsample one time series index (TSDS backing index). * In order to downsample an index, it needs to be read-only. For a TSDS write diff --git a/docs/reference/data-streams/lifecycle/apis/delete-lifecycle.asciidoc b/docs/reference/data-streams/lifecycle/apis/delete-lifecycle.asciidoc index f20c949c2fbc8..315f7fa85e45f 100644 --- a/docs/reference/data-streams/lifecycle/apis/delete-lifecycle.asciidoc +++ b/docs/reference/data-streams/lifecycle/apis/delete-lifecycle.asciidoc @@ -4,7 +4,7 @@ Delete Data Stream Lifecycle ++++ -Deletes the lifecycle from a set of data streams. +Deletes the <> from a set of data streams. [[delete-lifecycle-api-prereqs]] ==== {api-prereq-title} diff --git a/docs/reference/data-streams/lifecycle/apis/explain-lifecycle.asciidoc b/docs/reference/data-streams/lifecycle/apis/explain-lifecycle.asciidoc index 7968bb78939e8..2b15886ebe192 100644 --- a/docs/reference/data-streams/lifecycle/apis/explain-lifecycle.asciidoc +++ b/docs/reference/data-streams/lifecycle/apis/explain-lifecycle.asciidoc @@ -4,7 +4,7 @@ Explain Data Stream Lifecycle ++++ -Retrieves the current data stream lifecycle status for one or more data stream backing indices. +Retrieves the current <> status for one or more data stream backing indices. [[explain-lifecycle-api-prereqs]] ==== {api-prereq-title} diff --git a/docs/reference/data-streams/lifecycle/apis/get-lifecycle-stats.asciidoc b/docs/reference/data-streams/lifecycle/apis/get-lifecycle-stats.asciidoc index a99fa19d9db8d..f48fa1eb52daa 100644 --- a/docs/reference/data-streams/lifecycle/apis/get-lifecycle-stats.asciidoc +++ b/docs/reference/data-streams/lifecycle/apis/get-lifecycle-stats.asciidoc @@ -4,7 +4,7 @@ Get Data Stream Lifecycle ++++ -Gets stats about the execution of data stream lifecycle. +Gets stats about the execution of <>. [[get-lifecycle-stats-api-prereqs]] ==== {api-prereq-title} diff --git a/docs/reference/data-streams/lifecycle/apis/get-lifecycle.asciidoc b/docs/reference/data-streams/lifecycle/apis/get-lifecycle.asciidoc index 331285af395b6..c83572a4e0795 100644 --- a/docs/reference/data-streams/lifecycle/apis/get-lifecycle.asciidoc +++ b/docs/reference/data-streams/lifecycle/apis/get-lifecycle.asciidoc @@ -4,7 +4,7 @@ Get Data Stream Lifecycle ++++ -Gets the lifecycle of a set of data streams. +Gets the <> of a set of <>. [[get-lifecycle-api-prereqs]] ==== {api-prereq-title} diff --git a/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc b/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc index 6bd157071f54e..c60c105e818ab 100644 --- a/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc +++ b/docs/reference/data-streams/lifecycle/apis/put-lifecycle.asciidoc @@ -4,7 +4,7 @@ Put Data Stream Lifecycle ++++ -Configures the data stream lifecycle for the targeted data streams. +Configures the data stream <> for the targeted <>. [[put-lifecycle-api-prereqs]] ==== {api-prereq-title} @@ -54,7 +54,7 @@ duration the document could be deleted. When empty, every document in this data `enabled`:: (Optional, boolean) -If defined, it turns data streqm lifecycle on/off (`true`/`false`) for this data stream. +If defined, it turns data stream lifecycle on/off (`true`/`false`) for this data stream. A data stream lifecycle that's disabled (`enabled: false`) will have no effect on the data stream. Defaults to `true`. diff --git a/docs/reference/data-streams/lifecycle/tutorial-migrate-data-stream-from-ilm-to-dsl.asciidoc b/docs/reference/data-streams/lifecycle/tutorial-migrate-data-stream-from-ilm-to-dsl.asciidoc index 5b2e2a1ec70a2..8d959d8f4ad84 100644 --- a/docs/reference/data-streams/lifecycle/tutorial-migrate-data-stream-from-ilm-to-dsl.asciidoc +++ b/docs/reference/data-streams/lifecycle/tutorial-migrate-data-stream-from-ilm-to-dsl.asciidoc @@ -2,8 +2,8 @@ [[tutorial-migrate-data-stream-from-ilm-to-dsl]] === Tutorial: Migrate ILM managed data stream to data stream lifecycle -In this tutorial we'll look at migrating an existing data stream from Index Lifecycle Management ({ilm-init}) to -data stream lifecycle. The existing {ilm-init} managed backing indices will continue +In this tutorial we'll look at migrating an existing data stream from <> to +<>. The existing {ilm-init} managed backing indices will continue to be managed by {ilm-init} until they age out and get deleted by {ilm-init}; however, the new backing indices will be managed by data stream lifecycle. This way, a data stream is gradually migrated away from being managed by {ilm-init} to diff --git a/docs/reference/data-streams/logs.asciidoc b/docs/reference/data-streams/logs.asciidoc new file mode 100644 index 0000000000000..e870289bcf7be --- /dev/null +++ b/docs/reference/data-streams/logs.asciidoc @@ -0,0 +1,52 @@ +[[logs-data-stream]] +== Logs data stream + +preview::[Logs data streams and the logsdb index mode are in tech preview and may be changed or removed in the future. Don't use logs data streams or logsdb index mode in production.] + +A logs data stream is a data stream type that stores log data more efficiently. + +In benchmarks, log data stored in a logs data stream used ~2.5 times less disk space than a regular data +stream. The exact impact will vary depending on your data set. + +The following features are enabled in a logs data stream: + +* <>, which omits storing the `_source` field. When the document source is requested, it is synthesized from document fields upon retrieval. + +* Index sorting. This yields a lower storage footprint. By default indices are sorted by `host.name` and `@timestamp` fields at index time. + +* More space efficient compression for fields with <> enabled. + +[discrete] +[[how-to-use-logsds]] +=== Create a logs data stream + +To create a logs data stream, set your index template `index.mode` to `logsdb`: + +[source,console] +---- +PUT _index_template/my-index-template +{ + "index_patterns": ["logs-*"], + "data_stream": { }, + "template": { + "settings": { + "index.mode": "logsdb" <1> + } + }, + "priority": 101 <2> +} +---- +// TEST + +<1> The index mode setting. +<2> The index template priority. By default, Elasticsearch ships with an index template with a `logs-*-*` pattern with a priority of 100. You need to define a priority higher than 100 to ensure that this index template gets selected over the default index template for the `logs-*-*` pattern. See the <> for more information. + +After the index template is created, new indices that use the template will be configured as a logs data stream. You can start indexing data and <>. + +//// +[source,console] +---- +DELETE _index_template/my-index-template +---- +// TEST[continued] +//// diff --git a/docs/reference/data-streams/modify-data-streams-api.asciidoc b/docs/reference/data-streams/modify-data-streams-api.asciidoc index f05e76e67c32f..2da869083df22 100644 --- a/docs/reference/data-streams/modify-data-streams-api.asciidoc +++ b/docs/reference/data-streams/modify-data-streams-api.asciidoc @@ -4,7 +4,7 @@ Modify data streams ++++ -Performs one or more data stream modification actions in a single atomic +Performs one or more <> modification actions in a single atomic operation. [source,console] diff --git a/docs/reference/data-streams/promote-data-stream-api.asciidoc b/docs/reference/data-streams/promote-data-stream-api.asciidoc index 281e9b549abcb..111c7a2256f8a 100644 --- a/docs/reference/data-streams/promote-data-stream-api.asciidoc +++ b/docs/reference/data-streams/promote-data-stream-api.asciidoc @@ -5,7 +5,7 @@ Promote data stream ++++ -The purpose of the promote data stream api is to turn +The purpose of the promote <> API is to turn a data stream that is replicated by CCR into a regular data stream. diff --git a/docs/reference/data-streams/tsds-reindex.asciidoc b/docs/reference/data-streams/tsds-reindex.asciidoc index ea4ba16df5c4a..9d6594db4e779 100644 --- a/docs/reference/data-streams/tsds-reindex.asciidoc +++ b/docs/reference/data-streams/tsds-reindex.asciidoc @@ -9,7 +9,7 @@ [[tsds-reindex-intro]] ==== Introduction -With reindexing, you can copy documents from an old time-series data stream (TSDS) to a new one. Data streams support +With reindexing, you can copy documents from an old <> to a new one. Data streams support reindexing in general, with a few <>. Still, time-series data streams introduce additional challenges due to tight control on the accepted timestamp range for each backing index they contain. Direct use of the reindex API would likely error out due to attempting to insert documents with timestamps that are diff --git a/docs/reference/data-streams/tsds.asciidoc b/docs/reference/data-streams/tsds.asciidoc index 460048d8ccbc9..de89fa1ca3f31 100644 --- a/docs/reference/data-streams/tsds.asciidoc +++ b/docs/reference/data-streams/tsds.asciidoc @@ -53,8 +53,9 @@ shard segments by `_tsid` and `@timestamp`. documents, the document `_id` is a hash of the document's dimensions and `@timestamp`. A TSDS doesn't support custom document `_id` values. + * A TSDS uses <>, and as a result is -subject to a number of <>. +subject to some <> and <> applied to the `_source` field. NOTE: A time series index can contain fields other than dimensions or metrics. diff --git a/docs/reference/eql/eql-apis.asciidoc b/docs/reference/eql/eql-apis.asciidoc index d3f591ccfe6c1..e8cc2b21492ae 100644 --- a/docs/reference/eql/eql-apis.asciidoc +++ b/docs/reference/eql/eql-apis.asciidoc @@ -1,7 +1,7 @@ [[eql-apis]] == EQL APIs -Event Query Language (EQL) is a query language for event-based time series data, +<> is a query language for event-based time series data, such as logs, metrics, and traces. For an overview of EQL and related tutorials, see <>. diff --git a/docs/reference/esql/esql-across-clusters.asciidoc b/docs/reference/esql/esql-across-clusters.asciidoc index 6231b4f4f0a69..d4e36cd41e046 100644 --- a/docs/reference/esql/esql-across-clusters.asciidoc +++ b/docs/reference/esql/esql-across-clusters.asciidoc @@ -49,11 +49,6 @@ Refer to <> for prerequisi [[esql-ccs-security-model-api-key]] ===== API key authentication -[NOTE] -==== -`ENRICH` is *not supported* in this version when using {esql} with the API key based security model. -==== - The following information pertains to using {esql} across clusters with the <>. You'll need to follow the steps on that page for the *full setup instructions*. This page only contains additional information specific to {esql}. API key based cross-cluster search (CCS) enables more granular control over allowed actions between clusters. @@ -66,6 +61,7 @@ You will need to: Using {esql} with the API key based security model requires some additional permissions that may not be needed when using the traditional query DSL based search. The following example API call creates a role that can query remote indices using {esql} when using the API key based security model. +The final privilege, `remote_cluster`, is required to allow remote enrich operations. [source,console] ---- @@ -84,7 +80,17 @@ POST /_security/role/remote1 "privileges": [ "read","read_cross_cluster" ], <4> "clusters" : ["my_remote_cluster"] <5> } - ] + ], + "remote_cluster": [ <6> + { + "privileges": [ + "monitor_enrich" + ], + "clusters": [ + "my_remote_cluster" + ] + } + ] } ---- @@ -95,6 +101,7 @@ POST /_security/role/remote1 <5> The remote clusters to which these privileges apply. This remote cluster must be configured with a <> and connected to the remote cluster before the remote index can be queried. Verify connection using the <> API. +<6> Required to allow remote enrichment. Without this, the user cannot read from the `.enrich` indices on the remote cluster. The `remote_cluster` security privilege was introduced in version *8.15.0*. You will then need a user or API key with the permissions you created above. The following example API call creates a user with the `remote1` role. @@ -109,6 +116,11 @@ POST /_security/user/remote_user Remember that all cross-cluster requests from the local cluster are bound by the cross cluster API key’s privileges, which are controlled by the remote cluster's administrator. +[TIP] +==== +Cross cluster API keys created in versions prior to 8.15.0 will need to replaced or updated to add the new permissions required for {esql} with ENRICH. +==== + [discrete] [[ccq-remote-cluster-setup]] ==== Remote cluster setup @@ -169,9 +181,11 @@ clusters, aiming to minimize computation or inter-cluster data transfer. Ensurin the policy exists with consistent data on both the local cluster and the remote clusters is critical for ES|QL to produce a consistent query result. -[NOTE] +[TIP] ==== -Enrich across clusters is *not supported* in this version when using {esql} with the <>. +Enrich in {esql} across clusters using the API key based security model was introduced in version *8.15.0*. +Cross cluster API keys created in versions prior to 8.15.0 will need to replaced or updated to use the new required permissions. +Refer to the example in the <> section. ==== In the following example, the enrich with `hosts` policy can be executed on diff --git a/docs/reference/esql/esql-apis.asciidoc b/docs/reference/esql/esql-apis.asciidoc index 686a71506bc14..8586cd1ae6bce 100644 --- a/docs/reference/esql/esql-apis.asciidoc +++ b/docs/reference/esql/esql-apis.asciidoc @@ -1,7 +1,7 @@ [[esql-apis]] == {esql} APIs -The {es} Query Language ({esql}) provides a powerful way to filter, transform, +The <> provides a powerful way to filter, transform, and analyze data stored in {es}, and in the future in other runtimes. For an overview of {esql} and related tutorials, see <>. diff --git a/docs/reference/esql/esql-async-query-delete-api.asciidoc b/docs/reference/esql/esql-async-query-delete-api.asciidoc index 90f8c06b9124a..5cad566f7f9c0 100644 --- a/docs/reference/esql/esql-async-query-delete-api.asciidoc +++ b/docs/reference/esql/esql-async-query-delete-api.asciidoc @@ -4,7 +4,7 @@ {esql} async query delete API ++++ -The {esql} async query delete API is used to manually delete an async query +The <> async query delete API is used to manually delete an async query by ID. If the query is still running, the query will be cancelled. Otherwise, the stored results are deleted. diff --git a/docs/reference/esql/esql-limitations.asciidoc b/docs/reference/esql/esql-limitations.asciidoc index 11e3fd7ae9883..8accc7550edbb 100644 --- a/docs/reference/esql/esql-limitations.asciidoc +++ b/docs/reference/esql/esql-limitations.asciidoc @@ -85,6 +85,11 @@ Some <> are not supported in all contexts: ** `cartesian_point` ** `cartesian_shape` +In addition, when <>, +it's possible for the same field to be mapped to multiple types. +These fields cannot be directly used in queries or returned in results, +unless they're <>. + [discrete] [[esql-_source-availability]] === _source availability diff --git a/docs/reference/esql/esql-multi-index.asciidoc b/docs/reference/esql/esql-multi-index.asciidoc new file mode 100644 index 0000000000000..25874a132d93d --- /dev/null +++ b/docs/reference/esql/esql-multi-index.asciidoc @@ -0,0 +1,175 @@ +[[esql-multi-index]] +=== Using {esql} to query multiple indices +++++ +Using {esql} to query multiple indices +++++ + +With {esql}, you can execute a single query across multiple indices, data streams, or aliases. +To do so, use wildcards and date arithmetic. The following example uses a comma-separated list and a wildcard: + +[source,esql] +---- +FROM employees-00001,other-employees-* +---- + +Use the format `:` to <>: + +[source,esql] +---- +FROM cluster_one:employees-00001,cluster_two:other-employees-* +---- + +[discrete] +[[esql-multi-index-invalid-mapping]] +=== Field type mismatches + +When querying multiple indices, data streams, or aliases, you might find that the same field is mapped to multiple different types. +For example, consider the two indices with the following field mappings: + +*index: events_ip* +``` +{ + "mappings": { + "properties": { + "@timestamp": { "type": "date" }, + "client_ip": { "type": "ip" }, + "event_duration": { "type": "long" }, + "message": { "type": "keyword" } + } + } +} +``` + +*index: events_keyword* +``` +{ + "mappings": { + "properties": { + "@timestamp": { "type": "date" }, + "client_ip": { "type": "keyword" }, + "event_duration": { "type": "long" }, + "message": { "type": "keyword" } + } + } +} +``` + +When you query each of these individually with a simple query like `FROM events_ip`, the results are provided with type-specific columns: + +[source.merge.styled,esql] +---- +FROM events_ip +| SORT @timestamp DESC +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +@timestamp:date | client_ip:ip | event_duration:long | message:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +|=== + +Note how the `client_ip` column is correctly identified as type `ip`, and all values are displayed. +However, if instead the query sources two conflicting indices with `FROM events_*`, the type of the `client_ip` column cannot be determined +and is reported as `unsupported` with all values returned as `null`. + +[[query-unsupported]] +[source.merge.styled,esql] +---- +FROM events_* +| SORT @timestamp DESC +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +@timestamp:date | client_ip:unsupported | event_duration:long | message:keyword +2023-10-23T13:55:01.543Z | null | 1756467 | Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z | null | 5033755 | Connection error +2023-10-23T13:52:55.015Z | null | 8268153 | Connection error +2023-10-23T13:51:54.732Z | null | 725448 | Connection error +2023-10-23T13:33:34.937Z | null | 1232382 | Disconnected +2023-10-23T12:27:28.948Z | null | 2764889 | Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z | null | 3450233 | Connected to 10.1.0.3 +|=== + +In addition, if the query refers to this unsupported field directly, the query fails: + +[source.merge.styled,esql] +---- +FROM events_* +| SORT client_ip DESC +---- + +[source,bash] +---- +Cannot use field [client_ip] due to ambiguities being mapped as +[2] incompatible types: + [ip] in [events_ip], + [keyword] in [events_keyword] +---- + +[discrete] +[[esql-multi-index-union-types]] +=== Union types + +experimental::[] + +{esql} has a way to handle <>. When the same field is mapped to multiple types in multiple indices, +the type of the field is understood to be a _union_ of the various types in the index mappings. +As seen in the preceding examples, this _union type_ cannot be used in the results, +and cannot be referred to by the query -- except in `KEEP`, `DROP` or when it's passed to a type conversion function that accepts all the types in +the _union_ and converts the field to a single type. {esql} offers a suite of <> to achieve this. + +In the above examples, the query can use a command like `EVAL client_ip = TO_IP(client_ip)` to resolve +the union of `ip` and `keyword` to just `ip`. +You can also use the type-conversion syntax `EVAL client_ip = client_ip::IP`. +Alternatively, the query could use <> to convert all supported types into `KEYWORD`. + +For example, the <> that returned `client_ip:unsupported` with `null` values can be improved using the `TO_IP` function or the equivalent `field::ip` syntax. +These changes also resolve the error message. +As long as the only reference to the original field is to pass it to a conversion function that resolves the type ambiguity, no error results. + +[source.merge.styled,esql] +---- +FROM events_* +| EVAL client_ip = TO_IP(client_ip) +| KEEP @timestamp, client_ip, event_duration, message +| SORT @timestamp DESC +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +@timestamp:date | client_ip:ip | event_duration:long | message:keyword +2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +|=== + +[discrete] +[[esql-multi-index-index-metadata]] +=== Index metadata + +It can be helpful to know the particular index from which each row is sourced. +To get this information, use the <> option on the <> command. + +[source.merge.styled,esql] +---- +FROM events_* METADATA _index +| EVAL client_ip = TO_IP(client_ip) +| KEEP _index, @timestamp, client_ip, event_duration, message +| SORT @timestamp DESC +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +_index:keyword | @timestamp:date | client_ip:ip | event_duration:long | message:keyword +events_ip | 2023-10-23T13:55:01.543Z | 172.21.3.15 | 1756467 | Connected to 10.1.0.1 +events_ip | 2023-10-23T13:53:55.832Z | 172.21.3.15 | 5033755 | Connection error +events_ip | 2023-10-23T13:52:55.015Z | 172.21.3.15 | 8268153 | Connection error +events_keyword | 2023-10-23T13:51:54.732Z | 172.21.3.15 | 725448 | Connection error +events_keyword | 2023-10-23T13:33:34.937Z | 172.21.0.5 | 1232382 | Disconnected +events_keyword | 2023-10-23T12:27:28.948Z | 172.21.2.113 | 2764889 | Connected to 10.1.0.2 +events_keyword | 2023-10-23T12:15:03.360Z | 172.21.2.162 | 3450233 | Connected to 10.1.0.3 +|=== diff --git a/docs/reference/esql/esql-query-api.asciidoc b/docs/reference/esql/esql-query-api.asciidoc index 2cdd97ceab176..d1db21043a5b5 100644 --- a/docs/reference/esql/esql-query-api.asciidoc +++ b/docs/reference/esql/esql-query-api.asciidoc @@ -75,6 +75,13 @@ For syntax, refer to <>. (Optional, array) Values for parameters in the `query`. For syntax, refer to <>. +`profile`:: +(Optional, boolean) If provided and `true` the response will include an extra `profile` object +with information about how the query was executed. It provides insight into the performance +of each part of the query. This is for human debugging as the object's format might change at any time. +Think of this like https://www.postgresql.org/docs/current/sql-explain.html[EXPLAIN ANALYZE] or +https://en.wikipedia.org/wiki/Query_plan[EXPLAIN PLAN]. + `query`:: (Required, string) {esql} query to run. For syntax, refer to <>. @@ -97,6 +104,13 @@ Column `name` and `type` for each column returned in `values`. Each object is a Column `name` and `type` for each queried column. Each object is a single column. This is only returned if `drop_null_columns` is sent with the request. -`rows`:: +`values`:: (array of arrays) Values for the search results. + +`profile`:: +(object) +Profile describing the execution of the query. Only returned if `profile` was sent in the body. +The object itself is for human debugging and can change at any time. Think of this like +https://www.postgresql.org/docs/current/sql-explain.html[EXPLAIN ANALYZE] or +https://en.wikipedia.org/wiki/Query_plan[EXPLAIN PLAN]. diff --git a/docs/reference/esql/esql-rest.asciidoc b/docs/reference/esql/esql-rest.asciidoc index 5b90e96d7a734..b2493dc32a211 100644 --- a/docs/reference/esql/esql-rest.asciidoc +++ b/docs/reference/esql/esql-rest.asciidoc @@ -233,6 +233,7 @@ POST /_query } ---- // TEST[setup:library] +// TEST[skip:This can output a warning, and asciidoc doesn't support allowed_warnings] [discrete] [[esql-rest-params]] @@ -278,6 +279,47 @@ POST /_query ---- // TEST[setup:library] +The parameters can be named parameters or positional parameters. + +Named parameters use question mark placeholders (`?`) followed by a string. + +[source,console] +---- +POST /_query +{ + "query": """ + FROM library + | EVAL year = DATE_EXTRACT("year", release_date) + | WHERE page_count > ?page_count AND author == ?author + | STATS count = COUNT(*) by year + | WHERE count > ?count + | LIMIT 5 + """, + "params": [{"page_count" : 300}, {"author" : "Frank Herbert"}, {"count" : 0}] +} +---- +// TEST[setup:library] + +Positional parameters use question mark placeholders (`?`) followed by an +integer. + +[source,console] +---- +POST /_query +{ + "query": """ + FROM library + | EVAL year = DATE_EXTRACT("year", release_date) + | WHERE page_count > ?1 AND author == ?2 + | STATS count = COUNT(*) by year + | WHERE count > ?3 + | LIMIT 5 + """, + "params": [300, "Frank Herbert", 0] +} +---- +// TEST[setup:library] + [discrete] [[esql-rest-async-query]] ==== Running an async {esql} query diff --git a/docs/reference/esql/esql-using.asciidoc b/docs/reference/esql/esql-using.asciidoc index 3e045163069ec..d2e18bf1b91a3 100644 --- a/docs/reference/esql/esql-using.asciidoc +++ b/docs/reference/esql/esql-using.asciidoc @@ -12,6 +12,9 @@ and set up alerts. Using {esql} in {elastic-sec} to investigate events in Timeline, create detection rules, and build {esql} queries using Elastic AI Assistant. +<>:: +Using {esql} to query multiple indexes and resolve field type mismatches. + <>:: Using {esql} to query across multiple clusters. @@ -21,5 +24,6 @@ Using the <> to list and cancel {esql} queries. include::esql-rest.asciidoc[] include::esql-kibana.asciidoc[] include::esql-security-solution.asciidoc[] +include::esql-multi-index.asciidoc[] include::esql-across-clusters.asciidoc[] include::task-management.asciidoc[] diff --git a/docs/reference/esql/functions/description/locate.asciidoc b/docs/reference/esql/functions/description/locate.asciidoc index 60a6d435e37b6..b3f9d2a1ad78e 100644 --- a/docs/reference/esql/functions/description/locate.asciidoc +++ b/docs/reference/esql/functions/description/locate.asciidoc @@ -2,4 +2,4 @@ *Description* -Returns an integer that indicates the position of a keyword substring within another string +Returns an integer that indicates the position of a keyword substring within another string. Returns `0` if the substring cannot be found. Note that string positions start from `1`. diff --git a/docs/reference/esql/functions/description/mv_first.asciidoc b/docs/reference/esql/functions/description/mv_first.asciidoc index 99223e2c02d9f..13c433ce209d0 100644 --- a/docs/reference/esql/functions/description/mv_first.asciidoc +++ b/docs/reference/esql/functions/description/mv_first.asciidoc @@ -2,4 +2,10 @@ *Description* -Converts a multivalued expression into a single valued column containing the first value. This is most useful when reading from a function that emits multivalued columns in a known order like <>. The order that <> are read from underlying storage is not guaranteed. It is *frequently* ascending, but don't rely on that. If you need the minimum value use <> instead of `MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a performance benefit to `MV_FIRST`. +Converts a multivalued expression into a single valued column containing the first value. This is most useful when reading from a function that emits multivalued columns in a known order like <>. + +The order that <> are read from +underlying storage is not guaranteed. It is *frequently* ascending, but don't +rely on that. If you need the minimum value use <> instead of +`MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a +performance benefit to `MV_FIRST`. diff --git a/docs/reference/esql/functions/description/mv_last.asciidoc b/docs/reference/esql/functions/description/mv_last.asciidoc index 4b4b4336588d1..beba7b5a402c9 100644 --- a/docs/reference/esql/functions/description/mv_last.asciidoc +++ b/docs/reference/esql/functions/description/mv_last.asciidoc @@ -2,4 +2,10 @@ *Description* -Converts a multivalue expression into a single valued column containing the last value. This is most useful when reading from a function that emits multivalued columns in a known order like <>. The order that <> are read from underlying storage is not guaranteed. It is *frequently* ascending, but don't rely on that. If you need the maximum value use <> instead of `MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a performance benefit to `MV_LAST`. +Converts a multivalue expression into a single valued column containing the last value. This is most useful when reading from a function that emits multivalued columns in a known order like <>. + +The order that <> are read from +underlying storage is not guaranteed. It is *frequently* ascending, but don't +rely on that. If you need the maximum value use <> instead of +`MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a +performance benefit to `MV_LAST`. diff --git a/docs/reference/esql/functions/description/mv_slice.asciidoc b/docs/reference/esql/functions/description/mv_slice.asciidoc index 24d3183b6f40e..98438ae097fe7 100644 --- a/docs/reference/esql/functions/description/mv_slice.asciidoc +++ b/docs/reference/esql/functions/description/mv_slice.asciidoc @@ -2,4 +2,8 @@ *Description* -Returns a subset of the multivalued field using the start and end index values. +Returns a subset of the multivalued field using the start and end index values. This is most useful when reading from a function that emits multivalued columns in a known order like <> or <>. + +The order that <> are read from +underlying storage is not guaranteed. It is *frequently* ascending, but don't +rely on that. diff --git a/docs/reference/esql/functions/kibana/definition/bucket.json b/docs/reference/esql/functions/kibana/definition/bucket.json index 7141ca4c27443..ae7305bcca60e 100644 --- a/docs/reference/esql/functions/kibana/definition/bucket.json +++ b/docs/reference/esql/functions/kibana/definition/bucket.json @@ -8,7 +8,7 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Numeric or date expression from which to derive buckets." }, @@ -20,13 +20,13 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Numeric or date expression from which to derive buckets." }, @@ -38,25 +38,25 @@ }, { "name" : "from", - "type" : "datetime", + "type" : "date", "optional" : true, "description" : "Start of the range. Can be a number or a date expressed as a string." }, { "name" : "to", - "type" : "datetime", + "type" : "date", "optional" : true, "description" : "End of the range. Can be a number or a date expressed as a string." } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Numeric or date expression from which to derive buckets." }, @@ -68,7 +68,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/case.json b/docs/reference/esql/functions/kibana/definition/case.json index 5959eed62d37b..27705cd3897f9 100644 --- a/docs/reference/esql/functions/kibana/definition/case.json +++ b/docs/reference/esql/functions/kibana/definition/case.json @@ -50,13 +50,13 @@ }, { "name" : "trueValue", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "The value that's returned when the corresponding condition is the first to evaluate to `true`. The default value is returned when no condition matches." } ], "variadic" : true, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/coalesce.json b/docs/reference/esql/functions/kibana/definition/coalesce.json index f00f471e63ecc..2459a4d51bb2d 100644 --- a/docs/reference/esql/functions/kibana/definition/coalesce.json +++ b/docs/reference/esql/functions/kibana/definition/coalesce.json @@ -74,19 +74,19 @@ "params" : [ { "name" : "first", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Expression to evaluate." }, { "name" : "rest", - "type" : "datetime", + "type" : "date", "optional" : true, "description" : "Other expression to evaluate." } ], "variadic" : true, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/date_diff.json b/docs/reference/esql/functions/kibana/definition/date_diff.json index 7995d3c6d32b6..d6589f041075d 100644 --- a/docs/reference/esql/functions/kibana/definition/date_diff.json +++ b/docs/reference/esql/functions/kibana/definition/date_diff.json @@ -14,13 +14,13 @@ }, { "name" : "startTimestamp", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "A string representing a start timestamp" }, { "name" : "endTimestamp", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "A string representing an end timestamp" } @@ -38,13 +38,13 @@ }, { "name" : "startTimestamp", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "A string representing a start timestamp" }, { "name" : "endTimestamp", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "A string representing an end timestamp" } diff --git a/docs/reference/esql/functions/kibana/definition/date_extract.json b/docs/reference/esql/functions/kibana/definition/date_extract.json index 75cedcc191b50..557f0e0a47e54 100644 --- a/docs/reference/esql/functions/kibana/definition/date_extract.json +++ b/docs/reference/esql/functions/kibana/definition/date_extract.json @@ -14,7 +14,7 @@ }, { "name" : "date", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Date expression. If `null`, the function returns `null`." } @@ -32,7 +32,7 @@ }, { "name" : "date", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Date expression. If `null`, the function returns `null`." } diff --git a/docs/reference/esql/functions/kibana/definition/date_format.json b/docs/reference/esql/functions/kibana/definition/date_format.json index 5e8587c046d70..7bd01d7f4ef31 100644 --- a/docs/reference/esql/functions/kibana/definition/date_format.json +++ b/docs/reference/esql/functions/kibana/definition/date_format.json @@ -14,7 +14,7 @@ }, { "name" : "date", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Date expression. If `null`, the function returns `null`." } @@ -32,7 +32,7 @@ }, { "name" : "date", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Date expression. If `null`, the function returns `null`." } diff --git a/docs/reference/esql/functions/kibana/definition/date_parse.json b/docs/reference/esql/functions/kibana/definition/date_parse.json index 890179143bef8..9400340750c2a 100644 --- a/docs/reference/esql/functions/kibana/definition/date_parse.json +++ b/docs/reference/esql/functions/kibana/definition/date_parse.json @@ -20,7 +20,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ @@ -38,7 +38,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ @@ -56,7 +56,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ @@ -74,7 +74,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" } ], "examples" : [ diff --git a/docs/reference/esql/functions/kibana/definition/date_trunc.json b/docs/reference/esql/functions/kibana/definition/date_trunc.json index 3d8658c496529..bd3f362d1670b 100644 --- a/docs/reference/esql/functions/kibana/definition/date_trunc.json +++ b/docs/reference/esql/functions/kibana/definition/date_trunc.json @@ -14,13 +14,13 @@ }, { "name" : "date", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Date expression" } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ @@ -32,13 +32,13 @@ }, { "name" : "date", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Date expression" } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" } ], "examples" : [ diff --git a/docs/reference/esql/functions/kibana/definition/greatest.json b/docs/reference/esql/functions/kibana/definition/greatest.json index 15c9f58d32d3e..92543b0bbbabb 100644 --- a/docs/reference/esql/functions/kibana/definition/greatest.json +++ b/docs/reference/esql/functions/kibana/definition/greatest.json @@ -35,6 +35,24 @@ "variadic" : true, "returnType" : "boolean" }, + { + "params" : [ + { + "name" : "first", + "type" : "date", + "optional" : false, + "description" : "First of the columns to evaluate." + }, + { + "name" : "rest", + "type" : "date", + "optional" : true, + "description" : "The rest of the columns to evaluate." + } + ], + "variadic" : true, + "returnType" : "date" + }, { "params" : [ { diff --git a/docs/reference/esql/functions/kibana/definition/least.json b/docs/reference/esql/functions/kibana/definition/least.json index 0b922ad6ad3c2..a993b1df718ec 100644 --- a/docs/reference/esql/functions/kibana/definition/least.json +++ b/docs/reference/esql/functions/kibana/definition/least.json @@ -34,6 +34,24 @@ "variadic" : true, "returnType" : "boolean" }, + { + "params" : [ + { + "name" : "first", + "type" : "date", + "optional" : false, + "description" : "First of the columns to evaluate." + }, + { + "name" : "rest", + "type" : "date", + "optional" : true, + "description" : "The rest of the columns to evaluate." + } + ], + "variadic" : true, + "returnType" : "date" + }, { "params" : [ { diff --git a/docs/reference/esql/functions/kibana/definition/locate.json b/docs/reference/esql/functions/kibana/definition/locate.json index 13b7512e17def..a9ddc8c52368a 100644 --- a/docs/reference/esql/functions/kibana/definition/locate.json +++ b/docs/reference/esql/functions/kibana/definition/locate.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", "type" : "eval", "name" : "locate", - "description" : "Returns an integer that indicates the position of a keyword substring within another string", + "description" : "Returns an integer that indicates the position of a keyword substring within another string.\nReturns `0` if the substring cannot be found.\nNote that string positions start from `1`.", "signatures" : [ { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/mv_append.json b/docs/reference/esql/functions/kibana/definition/mv_append.json index 8ee4e7297cc3a..3365226141f8f 100644 --- a/docs/reference/esql/functions/kibana/definition/mv_append.json +++ b/docs/reference/esql/functions/kibana/definition/mv_append.json @@ -62,19 +62,19 @@ "params" : [ { "name" : "field1", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "" }, { "name" : "field2", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "" } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/mv_count.json b/docs/reference/esql/functions/kibana/definition/mv_count.json index d414e5b957495..f125327314f4e 100644 --- a/docs/reference/esql/functions/kibana/definition/mv_count.json +++ b/docs/reference/esql/functions/kibana/definition/mv_count.json @@ -44,7 +44,7 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Multivalue expression." } diff --git a/docs/reference/esql/functions/kibana/definition/mv_dedupe.json b/docs/reference/esql/functions/kibana/definition/mv_dedupe.json index 7ab287bc94d34..7d66e3dcc0b9b 100644 --- a/docs/reference/esql/functions/kibana/definition/mv_dedupe.json +++ b/docs/reference/esql/functions/kibana/definition/mv_dedupe.json @@ -45,13 +45,13 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Multivalue expression." } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/mv_first.json b/docs/reference/esql/functions/kibana/definition/mv_first.json index e3141e800e4ad..7edd2e4c065b6 100644 --- a/docs/reference/esql/functions/kibana/definition/mv_first.json +++ b/docs/reference/esql/functions/kibana/definition/mv_first.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", "type" : "eval", "name" : "mv_first", - "description" : "Converts a multivalued expression into a single valued column containing the\nfirst value. This is most useful when reading from a function that emits\nmultivalued columns in a known order like <>.\n\nThe order that <> are read from\nunderlying storage is not guaranteed. It is *frequently* ascending, but don't\nrely on that. If you need the minimum value use <> instead of\n`MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a\nperformance benefit to `MV_FIRST`.", + "description" : "Converts a multivalued expression into a single valued column containing the\nfirst value. This is most useful when reading from a function that emits\nmultivalued columns in a known order like <>.", "signatures" : [ { "params" : [ @@ -44,13 +44,13 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Multivalue expression." } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/mv_last.json b/docs/reference/esql/functions/kibana/definition/mv_last.json index e55d66dbf8b93..4161d39e6f80f 100644 --- a/docs/reference/esql/functions/kibana/definition/mv_last.json +++ b/docs/reference/esql/functions/kibana/definition/mv_last.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", "type" : "eval", "name" : "mv_last", - "description" : "Converts a multivalue expression into a single valued column containing the last\nvalue. This is most useful when reading from a function that emits multivalued\ncolumns in a known order like <>.\n\nThe order that <> are read from\nunderlying storage is not guaranteed. It is *frequently* ascending, but don't\nrely on that. If you need the maximum value use <> instead of\n`MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a\nperformance benefit to `MV_LAST`.", + "description" : "Converts a multivalue expression into a single valued column containing the last\nvalue. This is most useful when reading from a function that emits multivalued\ncolumns in a known order like <>.", "signatures" : [ { "params" : [ @@ -44,13 +44,13 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Multivalue expression." } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/mv_max.json b/docs/reference/esql/functions/kibana/definition/mv_max.json index 0783f6d6d5cbc..eb25369f78f77 100644 --- a/docs/reference/esql/functions/kibana/definition/mv_max.json +++ b/docs/reference/esql/functions/kibana/definition/mv_max.json @@ -20,13 +20,13 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Multivalue expression." } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/mv_min.json b/docs/reference/esql/functions/kibana/definition/mv_min.json index cc23df386356e..87ad94338492e 100644 --- a/docs/reference/esql/functions/kibana/definition/mv_min.json +++ b/docs/reference/esql/functions/kibana/definition/mv_min.json @@ -20,13 +20,13 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Multivalue expression." } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/mv_slice.json b/docs/reference/esql/functions/kibana/definition/mv_slice.json index 30d0e1179dc89..3a62cfb070b68 100644 --- a/docs/reference/esql/functions/kibana/definition/mv_slice.json +++ b/docs/reference/esql/functions/kibana/definition/mv_slice.json @@ -2,7 +2,7 @@ "comment" : "This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../README.md for how to regenerate it.", "type" : "eval", "name" : "mv_slice", - "description" : "Returns a subset of the multivalued field using the start and end index values.", + "description" : "Returns a subset of the multivalued field using the start and end index values.\nThis is most useful when reading from a function that emits multivalued columns\nin a known order like <> or <>.", "signatures" : [ { "params" : [ @@ -80,7 +80,7 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Multivalue expression. If `null`, the function returns `null`." }, @@ -98,7 +98,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/mv_sort.json b/docs/reference/esql/functions/kibana/definition/mv_sort.json index 28b4c9e8d6fea..d2bbd2c0fdbf4 100644 --- a/docs/reference/esql/functions/kibana/definition/mv_sort.json +++ b/docs/reference/esql/functions/kibana/definition/mv_sort.json @@ -26,7 +26,7 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Multivalue expression. If `null`, the function returns `null`." }, @@ -38,7 +38,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/definition/now.json b/docs/reference/esql/functions/kibana/definition/now.json index 9cdb4945afa2e..1a2fc3a1dc42a 100644 --- a/docs/reference/esql/functions/kibana/definition/now.json +++ b/docs/reference/esql/functions/kibana/definition/now.json @@ -6,7 +6,7 @@ "signatures" : [ { "params" : [ ], - "returnType" : "datetime" + "returnType" : "date" } ], "examples" : [ diff --git a/docs/reference/esql/functions/kibana/definition/to_datetime.json b/docs/reference/esql/functions/kibana/definition/to_datetime.json index 10fcf8b22e8b0..6891a92f3788b 100644 --- a/docs/reference/esql/functions/kibana/definition/to_datetime.json +++ b/docs/reference/esql/functions/kibana/definition/to_datetime.json @@ -8,13 +8,13 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Input value. The input can be a single- or multi-valued column or an expression." } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ @@ -26,7 +26,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ @@ -38,7 +38,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ @@ -50,7 +50,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ @@ -62,7 +62,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ @@ -74,7 +74,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ @@ -86,7 +86,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" } ], "examples" : [ diff --git a/docs/reference/esql/functions/kibana/definition/to_double.json b/docs/reference/esql/functions/kibana/definition/to_double.json index f4e414068db61..ae7e4832bfb3c 100644 --- a/docs/reference/esql/functions/kibana/definition/to_double.json +++ b/docs/reference/esql/functions/kibana/definition/to_double.json @@ -56,7 +56,7 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Input value. The input can be a single- or multi-valued column or an expression." } diff --git a/docs/reference/esql/functions/kibana/definition/to_integer.json b/docs/reference/esql/functions/kibana/definition/to_integer.json index 2776d8b29c412..5150d12936711 100644 --- a/docs/reference/esql/functions/kibana/definition/to_integer.json +++ b/docs/reference/esql/functions/kibana/definition/to_integer.json @@ -32,7 +32,7 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Input value. The input can be a single- or multi-valued column or an expression." } diff --git a/docs/reference/esql/functions/kibana/definition/to_long.json b/docs/reference/esql/functions/kibana/definition/to_long.json index e3218eba9642a..5fd4bce34e7e0 100644 --- a/docs/reference/esql/functions/kibana/definition/to_long.json +++ b/docs/reference/esql/functions/kibana/definition/to_long.json @@ -44,7 +44,7 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Input value. The input can be a single- or multi-valued column or an expression." } diff --git a/docs/reference/esql/functions/kibana/definition/to_string.json b/docs/reference/esql/functions/kibana/definition/to_string.json index ef03cc06ea636..ea94171834908 100644 --- a/docs/reference/esql/functions/kibana/definition/to_string.json +++ b/docs/reference/esql/functions/kibana/definition/to_string.json @@ -44,7 +44,7 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Input value. The input can be a single- or multi-valued column or an expression." } diff --git a/docs/reference/esql/functions/kibana/definition/to_unsigned_long.json b/docs/reference/esql/functions/kibana/definition/to_unsigned_long.json index d9cba641573fb..5521241224d61 100644 --- a/docs/reference/esql/functions/kibana/definition/to_unsigned_long.json +++ b/docs/reference/esql/functions/kibana/definition/to_unsigned_long.json @@ -20,7 +20,7 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "Input value. The input can be a single- or multi-valued column or an expression." } diff --git a/docs/reference/esql/functions/kibana/definition/top.json b/docs/reference/esql/functions/kibana/definition/top.json index 7ad073d6e7564..8ec14e4659850 100644 --- a/docs/reference/esql/functions/kibana/definition/top.json +++ b/docs/reference/esql/functions/kibana/definition/top.json @@ -8,7 +8,7 @@ "params" : [ { "name" : "field", - "type" : "datetime", + "type" : "date", "optional" : false, "description" : "The field to collect the top values for." }, @@ -26,7 +26,7 @@ } ], "variadic" : false, - "returnType" : "datetime" + "returnType" : "date" }, { "params" : [ diff --git a/docs/reference/esql/functions/kibana/docs/locate.md b/docs/reference/esql/functions/kibana/docs/locate.md index 7fffbfd548f20..412832e9b1587 100644 --- a/docs/reference/esql/functions/kibana/docs/locate.md +++ b/docs/reference/esql/functions/kibana/docs/locate.md @@ -3,7 +3,9 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ --> ### LOCATE -Returns an integer that indicates the position of a keyword substring within another string +Returns an integer that indicates the position of a keyword substring within another string. +Returns `0` if the substring cannot be found. +Note that string positions start from `1`. ``` row a = "hello" diff --git a/docs/reference/esql/functions/kibana/docs/mv_avg.md b/docs/reference/esql/functions/kibana/docs/mv_avg.md index c3d7e5423f724..c5163f36129bf 100644 --- a/docs/reference/esql/functions/kibana/docs/mv_avg.md +++ b/docs/reference/esql/functions/kibana/docs/mv_avg.md @@ -3,7 +3,7 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ --> ### MV_AVG -Converts a multivalued field into a single valued field containing the average of all of the values. +Converts a multivalued field into a single valued field containing the average of all the values. ``` ROW a=[3, 5, 1, 6] diff --git a/docs/reference/esql/functions/kibana/docs/mv_first.md b/docs/reference/esql/functions/kibana/docs/mv_first.md index 4faea6edd9162..c50ed7d764020 100644 --- a/docs/reference/esql/functions/kibana/docs/mv_first.md +++ b/docs/reference/esql/functions/kibana/docs/mv_first.md @@ -7,12 +7,6 @@ Converts a multivalued expression into a single valued column containing the first value. This is most useful when reading from a function that emits multivalued columns in a known order like <>. -The order that <> are read from -underlying storage is not guaranteed. It is *frequently* ascending, but don't -rely on that. If you need the minimum value use <> instead of -`MV_FIRST`. `MV_MIN` has optimizations for sorted values so there isn't a -performance benefit to `MV_FIRST`. - ``` ROW a="foo;bar;baz" | EVAL first_a = MV_FIRST(SPLIT(a, ";")) diff --git a/docs/reference/esql/functions/kibana/docs/mv_last.md b/docs/reference/esql/functions/kibana/docs/mv_last.md index a8c3bf25eb51b..eeefd929c1359 100644 --- a/docs/reference/esql/functions/kibana/docs/mv_last.md +++ b/docs/reference/esql/functions/kibana/docs/mv_last.md @@ -7,12 +7,6 @@ Converts a multivalue expression into a single valued column containing the last value. This is most useful when reading from a function that emits multivalued columns in a known order like <>. -The order that <> are read from -underlying storage is not guaranteed. It is *frequently* ascending, but don't -rely on that. If you need the maximum value use <> instead of -`MV_LAST`. `MV_MAX` has optimizations for sorted values so there isn't a -performance benefit to `MV_LAST`. - ``` ROW a="foo;bar;baz" | EVAL last_a = MV_LAST(SPLIT(a, ";")) diff --git a/docs/reference/esql/functions/kibana/docs/mv_slice.md b/docs/reference/esql/functions/kibana/docs/mv_slice.md index 3daf0de930a7f..bba7a219960ef 100644 --- a/docs/reference/esql/functions/kibana/docs/mv_slice.md +++ b/docs/reference/esql/functions/kibana/docs/mv_slice.md @@ -4,6 +4,8 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ ### MV_SLICE Returns a subset of the multivalued field using the start and end index values. +This is most useful when reading from a function that emits multivalued columns +in a known order like <> or <>. ``` row a = [1, 2, 2, 3] diff --git a/docs/reference/esql/functions/kibana/docs/mv_sum.md b/docs/reference/esql/functions/kibana/docs/mv_sum.md index 16285d3c7229b..987017b34b743 100644 --- a/docs/reference/esql/functions/kibana/docs/mv_sum.md +++ b/docs/reference/esql/functions/kibana/docs/mv_sum.md @@ -3,7 +3,7 @@ This is generated by ESQL's AbstractFunctionTestCase. Do no edit it. See ../READ --> ### MV_SUM -Converts a multivalued field into a single valued field containing the sum of all of the values. +Converts a multivalued field into a single valued field containing the sum of all the values. ``` ROW a=[3, 5, 6] diff --git a/docs/reference/esql/functions/types/add.asciidoc b/docs/reference/esql/functions/types/add.asciidoc index a0215a803d4e3..54d1aec463c1a 100644 --- a/docs/reference/esql/functions/types/add.asciidoc +++ b/docs/reference/esql/functions/types/add.asciidoc @@ -5,10 +5,10 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== lhs | rhs | result +date | date_period | date +date | time_duration | date +date_period | date | date date_period | date_period | date_period -date_period | datetime | datetime -datetime | date_period | datetime -datetime | time_duration | datetime double | double | double double | integer | double double | long | double @@ -18,7 +18,7 @@ integer | long | long long | double | double long | integer | long long | long | long -time_duration | datetime | datetime +time_duration | date | date time_duration | time_duration | time_duration unsigned_long | unsigned_long | unsigned_long |=== diff --git a/docs/reference/esql/functions/types/bucket.asciidoc b/docs/reference/esql/functions/types/bucket.asciidoc index d1ce8e499eb07..a78883252a112 100644 --- a/docs/reference/esql/functions/types/bucket.asciidoc +++ b/docs/reference/esql/functions/types/bucket.asciidoc @@ -5,9 +5,9 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== field | buckets | from | to | result -datetime | date_period | | | datetime -datetime | integer | datetime | datetime | datetime -datetime | time_duration | | | datetime +date | date_period | | | date +date | integer | date | date | date +date | time_duration | | | date double | double | | | double double | integer | double | double | double double | integer | double | integer | double diff --git a/docs/reference/esql/functions/types/case.asciidoc b/docs/reference/esql/functions/types/case.asciidoc index 85e4193b5bf2f..f6c8cfe9361d1 100644 --- a/docs/reference/esql/functions/types/case.asciidoc +++ b/docs/reference/esql/functions/types/case.asciidoc @@ -7,7 +7,7 @@ condition | trueValue | result boolean | boolean | boolean boolean | cartesian_point | cartesian_point -boolean | datetime | datetime +boolean | date | date boolean | double | double boolean | geo_point | geo_point boolean | integer | integer diff --git a/docs/reference/esql/functions/types/coalesce.asciidoc b/docs/reference/esql/functions/types/coalesce.asciidoc index 841d836f6837e..368a12db0dca4 100644 --- a/docs/reference/esql/functions/types/coalesce.asciidoc +++ b/docs/reference/esql/functions/types/coalesce.asciidoc @@ -9,7 +9,7 @@ boolean | boolean | boolean boolean | | boolean cartesian_point | cartesian_point | cartesian_point cartesian_shape | cartesian_shape | cartesian_shape -datetime | datetime | datetime +date | date | date geo_point | geo_point | geo_point geo_shape | geo_shape | geo_shape integer | integer | integer diff --git a/docs/reference/esql/functions/types/date_diff.asciidoc b/docs/reference/esql/functions/types/date_diff.asciidoc index 98adcef51e75c..b0a4818f412ac 100644 --- a/docs/reference/esql/functions/types/date_diff.asciidoc +++ b/docs/reference/esql/functions/types/date_diff.asciidoc @@ -5,6 +5,6 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== unit | startTimestamp | endTimestamp | result -keyword | datetime | datetime | integer -text | datetime | datetime | integer +keyword | date | date | integer +text | date | date | integer |=== diff --git a/docs/reference/esql/functions/types/date_extract.asciidoc b/docs/reference/esql/functions/types/date_extract.asciidoc index 43702ef0671a7..ec9bf70c221cc 100644 --- a/docs/reference/esql/functions/types/date_extract.asciidoc +++ b/docs/reference/esql/functions/types/date_extract.asciidoc @@ -5,6 +5,6 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== datePart | date | result -keyword | datetime | long -text | datetime | long +keyword | date | long +text | date | long |=== diff --git a/docs/reference/esql/functions/types/date_format.asciidoc b/docs/reference/esql/functions/types/date_format.asciidoc index a76f38653b9b8..b2e97dfa8835a 100644 --- a/docs/reference/esql/functions/types/date_format.asciidoc +++ b/docs/reference/esql/functions/types/date_format.asciidoc @@ -5,6 +5,6 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== dateFormat | date | result -keyword | datetime | keyword -text | datetime | keyword +keyword | date | keyword +text | date | keyword |=== diff --git a/docs/reference/esql/functions/types/date_parse.asciidoc b/docs/reference/esql/functions/types/date_parse.asciidoc index 314d02eb06271..f3eab18309dd8 100644 --- a/docs/reference/esql/functions/types/date_parse.asciidoc +++ b/docs/reference/esql/functions/types/date_parse.asciidoc @@ -5,8 +5,8 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== datePattern | dateString | result -keyword | keyword | datetime -keyword | text | datetime -text | keyword | datetime -text | text | datetime +keyword | keyword | date +keyword | text | date +text | keyword | date +text | text | date |=== diff --git a/docs/reference/esql/functions/types/date_trunc.asciidoc b/docs/reference/esql/functions/types/date_trunc.asciidoc index 8df45cfef54a8..aa7dee99c6c44 100644 --- a/docs/reference/esql/functions/types/date_trunc.asciidoc +++ b/docs/reference/esql/functions/types/date_trunc.asciidoc @@ -5,6 +5,6 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== interval | date | result -date_period | datetime | datetime -time_duration | datetime | datetime +date_period | date | date +time_duration | date | date |=== diff --git a/docs/reference/esql/functions/types/equals.asciidoc b/docs/reference/esql/functions/types/equals.asciidoc index 497c9319fedb3..ad0e46ef4b8da 100644 --- a/docs/reference/esql/functions/types/equals.asciidoc +++ b/docs/reference/esql/functions/types/equals.asciidoc @@ -8,7 +8,7 @@ lhs | rhs | result boolean | boolean | boolean cartesian_point | cartesian_point | boolean cartesian_shape | cartesian_shape | boolean -datetime | datetime | boolean +date | date | boolean double | double | boolean double | integer | boolean double | long | boolean diff --git a/docs/reference/esql/functions/types/greater_than.asciidoc b/docs/reference/esql/functions/types/greater_than.asciidoc index 771daf1a953b2..c506328126a94 100644 --- a/docs/reference/esql/functions/types/greater_than.asciidoc +++ b/docs/reference/esql/functions/types/greater_than.asciidoc @@ -5,7 +5,7 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== lhs | rhs | result -datetime | datetime | boolean +date | date | boolean double | double | boolean double | integer | boolean double | long | boolean diff --git a/docs/reference/esql/functions/types/greater_than_or_equal.asciidoc b/docs/reference/esql/functions/types/greater_than_or_equal.asciidoc index 771daf1a953b2..c506328126a94 100644 --- a/docs/reference/esql/functions/types/greater_than_or_equal.asciidoc +++ b/docs/reference/esql/functions/types/greater_than_or_equal.asciidoc @@ -5,7 +5,7 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== lhs | rhs | result -datetime | datetime | boolean +date | date | boolean double | double | boolean double | integer | boolean double | long | boolean diff --git a/docs/reference/esql/functions/types/greatest.asciidoc b/docs/reference/esql/functions/types/greatest.asciidoc index 537be55cd17ef..1454bbb6f81c1 100644 --- a/docs/reference/esql/functions/types/greatest.asciidoc +++ b/docs/reference/esql/functions/types/greatest.asciidoc @@ -7,6 +7,7 @@ first | rest | result boolean | boolean | boolean boolean | | boolean +date | date | date double | double | double integer | integer | integer integer | | integer diff --git a/docs/reference/esql/functions/types/least.asciidoc b/docs/reference/esql/functions/types/least.asciidoc index 537be55cd17ef..1454bbb6f81c1 100644 --- a/docs/reference/esql/functions/types/least.asciidoc +++ b/docs/reference/esql/functions/types/least.asciidoc @@ -7,6 +7,7 @@ first | rest | result boolean | boolean | boolean boolean | | boolean +date | date | date double | double | double integer | integer | integer integer | | integer diff --git a/docs/reference/esql/functions/types/less_than.asciidoc b/docs/reference/esql/functions/types/less_than.asciidoc index 771daf1a953b2..c506328126a94 100644 --- a/docs/reference/esql/functions/types/less_than.asciidoc +++ b/docs/reference/esql/functions/types/less_than.asciidoc @@ -5,7 +5,7 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== lhs | rhs | result -datetime | datetime | boolean +date | date | boolean double | double | boolean double | integer | boolean double | long | boolean diff --git a/docs/reference/esql/functions/types/less_than_or_equal.asciidoc b/docs/reference/esql/functions/types/less_than_or_equal.asciidoc index 771daf1a953b2..c506328126a94 100644 --- a/docs/reference/esql/functions/types/less_than_or_equal.asciidoc +++ b/docs/reference/esql/functions/types/less_than_or_equal.asciidoc @@ -5,7 +5,7 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== lhs | rhs | result -datetime | datetime | boolean +date | date | boolean double | double | boolean double | integer | boolean double | long | boolean diff --git a/docs/reference/esql/functions/types/mv_append.asciidoc b/docs/reference/esql/functions/types/mv_append.asciidoc index 49dcef6dc8860..a1894e429ae82 100644 --- a/docs/reference/esql/functions/types/mv_append.asciidoc +++ b/docs/reference/esql/functions/types/mv_append.asciidoc @@ -8,7 +8,7 @@ field1 | field2 | result boolean | boolean | boolean cartesian_point | cartesian_point | cartesian_point cartesian_shape | cartesian_shape | cartesian_shape -datetime | datetime | datetime +date | date | date double | double | double geo_point | geo_point | geo_point geo_shape | geo_shape | geo_shape diff --git a/docs/reference/esql/functions/types/mv_count.asciidoc b/docs/reference/esql/functions/types/mv_count.asciidoc index 8af6b76591acb..260c531731f04 100644 --- a/docs/reference/esql/functions/types/mv_count.asciidoc +++ b/docs/reference/esql/functions/types/mv_count.asciidoc @@ -8,7 +8,7 @@ field | result boolean | integer cartesian_point | integer cartesian_shape | integer -datetime | integer +date | integer double | integer geo_point | integer geo_shape | integer diff --git a/docs/reference/esql/functions/types/mv_dedupe.asciidoc b/docs/reference/esql/functions/types/mv_dedupe.asciidoc index a6b78f781f17a..68e546451c8cb 100644 --- a/docs/reference/esql/functions/types/mv_dedupe.asciidoc +++ b/docs/reference/esql/functions/types/mv_dedupe.asciidoc @@ -8,7 +8,7 @@ field | result boolean | boolean cartesian_point | cartesian_point cartesian_shape | cartesian_shape -datetime | datetime +date | date double | double geo_point | geo_point geo_shape | geo_shape diff --git a/docs/reference/esql/functions/types/mv_first.asciidoc b/docs/reference/esql/functions/types/mv_first.asciidoc index e077c57971a4a..35633544d99a0 100644 --- a/docs/reference/esql/functions/types/mv_first.asciidoc +++ b/docs/reference/esql/functions/types/mv_first.asciidoc @@ -8,7 +8,7 @@ field | result boolean | boolean cartesian_point | cartesian_point cartesian_shape | cartesian_shape -datetime | datetime +date | date double | double geo_point | geo_point geo_shape | geo_shape diff --git a/docs/reference/esql/functions/types/mv_last.asciidoc b/docs/reference/esql/functions/types/mv_last.asciidoc index e077c57971a4a..35633544d99a0 100644 --- a/docs/reference/esql/functions/types/mv_last.asciidoc +++ b/docs/reference/esql/functions/types/mv_last.asciidoc @@ -8,7 +8,7 @@ field | result boolean | boolean cartesian_point | cartesian_point cartesian_shape | cartesian_shape -datetime | datetime +date | date double | double geo_point | geo_point geo_shape | geo_shape diff --git a/docs/reference/esql/functions/types/mv_max.asciidoc b/docs/reference/esql/functions/types/mv_max.asciidoc index 4e5f0a5e0ae89..8ea36aebbad37 100644 --- a/docs/reference/esql/functions/types/mv_max.asciidoc +++ b/docs/reference/esql/functions/types/mv_max.asciidoc @@ -6,7 +6,7 @@ |=== field | result boolean | boolean -datetime | datetime +date | date double | double integer | integer ip | ip diff --git a/docs/reference/esql/functions/types/mv_min.asciidoc b/docs/reference/esql/functions/types/mv_min.asciidoc index 4e5f0a5e0ae89..8ea36aebbad37 100644 --- a/docs/reference/esql/functions/types/mv_min.asciidoc +++ b/docs/reference/esql/functions/types/mv_min.asciidoc @@ -6,7 +6,7 @@ |=== field | result boolean | boolean -datetime | datetime +date | date double | double integer | integer ip | ip diff --git a/docs/reference/esql/functions/types/mv_slice.asciidoc b/docs/reference/esql/functions/types/mv_slice.asciidoc index 568de10f53d32..0a9dc073370c7 100644 --- a/docs/reference/esql/functions/types/mv_slice.asciidoc +++ b/docs/reference/esql/functions/types/mv_slice.asciidoc @@ -8,7 +8,7 @@ field | start | end | result boolean | integer | integer | boolean cartesian_point | integer | integer | cartesian_point cartesian_shape | integer | integer | cartesian_shape -datetime | integer | integer | datetime +date | integer | integer | date double | integer | integer | double geo_point | integer | integer | geo_point geo_shape | integer | integer | geo_shape diff --git a/docs/reference/esql/functions/types/mv_sort.asciidoc b/docs/reference/esql/functions/types/mv_sort.asciidoc index 24925ca8a6587..93965187482ac 100644 --- a/docs/reference/esql/functions/types/mv_sort.asciidoc +++ b/docs/reference/esql/functions/types/mv_sort.asciidoc @@ -6,7 +6,7 @@ |=== field | order | result boolean | keyword | boolean -datetime | keyword | datetime +date | keyword | date double | keyword | double integer | keyword | integer ip | keyword | ip diff --git a/docs/reference/esql/functions/types/not_equals.asciidoc b/docs/reference/esql/functions/types/not_equals.asciidoc index 497c9319fedb3..ad0e46ef4b8da 100644 --- a/docs/reference/esql/functions/types/not_equals.asciidoc +++ b/docs/reference/esql/functions/types/not_equals.asciidoc @@ -8,7 +8,7 @@ lhs | rhs | result boolean | boolean | boolean cartesian_point | cartesian_point | boolean cartesian_shape | cartesian_shape | boolean -datetime | datetime | boolean +date | date | boolean double | double | boolean double | integer | boolean double | long | boolean diff --git a/docs/reference/esql/functions/types/now.asciidoc b/docs/reference/esql/functions/types/now.asciidoc index 5737d98f2f7db..b474ab1042050 100644 --- a/docs/reference/esql/functions/types/now.asciidoc +++ b/docs/reference/esql/functions/types/now.asciidoc @@ -5,5 +5,5 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== result -datetime +date |=== diff --git a/docs/reference/esql/functions/types/sub.asciidoc b/docs/reference/esql/functions/types/sub.asciidoc index d309f651705f0..c3ded301ebe68 100644 --- a/docs/reference/esql/functions/types/sub.asciidoc +++ b/docs/reference/esql/functions/types/sub.asciidoc @@ -5,9 +5,9 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== lhs | rhs | result +date | date_period | date +date | time_duration | date date_period | date_period | date_period -datetime | date_period | datetime -datetime | time_duration | datetime double | double | double double | integer | double double | long | double diff --git a/docs/reference/esql/functions/types/to_datetime.asciidoc b/docs/reference/esql/functions/types/to_datetime.asciidoc index 52c4cebb661cf..80c986efca794 100644 --- a/docs/reference/esql/functions/types/to_datetime.asciidoc +++ b/docs/reference/esql/functions/types/to_datetime.asciidoc @@ -5,11 +5,11 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== field | result -datetime | datetime -double | datetime -integer | datetime -keyword | datetime -long | datetime -text | datetime -unsigned_long | datetime +date | date +double | date +integer | date +keyword | date +long | date +text | date +unsigned_long | date |=== diff --git a/docs/reference/esql/functions/types/to_double.asciidoc b/docs/reference/esql/functions/types/to_double.asciidoc index cff686c7bc4ca..d5f5833cd7249 100644 --- a/docs/reference/esql/functions/types/to_double.asciidoc +++ b/docs/reference/esql/functions/types/to_double.asciidoc @@ -9,7 +9,7 @@ boolean | double counter_double | double counter_integer | double counter_long | double -datetime | double +date | double double | double integer | double keyword | double diff --git a/docs/reference/esql/functions/types/to_integer.asciidoc b/docs/reference/esql/functions/types/to_integer.asciidoc index 974f3c9c82d88..d67f8f07affd9 100644 --- a/docs/reference/esql/functions/types/to_integer.asciidoc +++ b/docs/reference/esql/functions/types/to_integer.asciidoc @@ -7,7 +7,7 @@ field | result boolean | integer counter_integer | integer -datetime | integer +date | integer double | integer integer | integer keyword | integer diff --git a/docs/reference/esql/functions/types/to_long.asciidoc b/docs/reference/esql/functions/types/to_long.asciidoc index b3959c5444e34..a07990cb1cfbf 100644 --- a/docs/reference/esql/functions/types/to_long.asciidoc +++ b/docs/reference/esql/functions/types/to_long.asciidoc @@ -8,7 +8,7 @@ field | result boolean | long counter_integer | long counter_long | long -datetime | long +date | long double | long integer | long keyword | long diff --git a/docs/reference/esql/functions/types/to_string.asciidoc b/docs/reference/esql/functions/types/to_string.asciidoc index f14cfbb39929f..26a5b31a2a589 100644 --- a/docs/reference/esql/functions/types/to_string.asciidoc +++ b/docs/reference/esql/functions/types/to_string.asciidoc @@ -8,7 +8,7 @@ field | result boolean | keyword cartesian_point | keyword cartesian_shape | keyword -datetime | keyword +date | keyword double | keyword geo_point | keyword geo_shape | keyword diff --git a/docs/reference/esql/functions/types/to_unsigned_long.asciidoc b/docs/reference/esql/functions/types/to_unsigned_long.asciidoc index a271e1a19321d..87b21f3948dad 100644 --- a/docs/reference/esql/functions/types/to_unsigned_long.asciidoc +++ b/docs/reference/esql/functions/types/to_unsigned_long.asciidoc @@ -6,7 +6,7 @@ |=== field | result boolean | unsigned_long -datetime | unsigned_long +date | unsigned_long double | unsigned_long integer | unsigned_long keyword | unsigned_long diff --git a/docs/reference/esql/functions/types/top.asciidoc b/docs/reference/esql/functions/types/top.asciidoc index 1874cd8b12bf3..32b9513f5c6f8 100644 --- a/docs/reference/esql/functions/types/top.asciidoc +++ b/docs/reference/esql/functions/types/top.asciidoc @@ -5,7 +5,7 @@ [%header.monospaced.styled,format=dsv,separator=|] |=== field | limit | order | result -datetime | integer | keyword | datetime +date | integer | keyword | date double | integer | keyword | double integer | integer | keyword | integer long | integer | keyword | long diff --git a/docs/reference/esql/multivalued-fields.asciidoc b/docs/reference/esql/multivalued-fields.asciidoc index 8ff645bba863e..00f441fcee0ff 100644 --- a/docs/reference/esql/multivalued-fields.asciidoc +++ b/docs/reference/esql/multivalued-fields.asciidoc @@ -166,6 +166,35 @@ POST /_query } ---- +[discrete] +[[esql-multivalued-nulls]] +==== `null` in a list + +`null` values in a list are not preserved at the storage layer: + +[source,console,id=esql-multivalued-fields-multivalued-nulls] +---- +POST /mv/_doc?refresh +{ "a": [2, null, 1] } + +POST /_query +{ + "query": "FROM mv | LIMIT 1" +} +---- + +[source,console-result] +---- +{ + "columns": [ + { "name": "a", "type": "long"}, + ], + "values": [ + [[1, 2]], + ] +} +---- + [discrete] [[esql-multivalued-fields-functions]] ==== Functions diff --git a/docs/reference/esql/processing-commands/dissect.asciidoc b/docs/reference/esql/processing-commands/dissect.asciidoc index c48b72af0de7e..3dca50c8aee5e 100644 --- a/docs/reference/esql/processing-commands/dissect.asciidoc +++ b/docs/reference/esql/processing-commands/dissect.asciidoc @@ -17,6 +17,8 @@ multiple values, `DISSECT` will process each value. `pattern`:: A <>. +If a field name conflicts with an existing column, the existing column is dropped. +If a field name is used more than once, only the rightmost duplicate creates a column. ``:: A string used as the separator between appended values, when using the <>. diff --git a/docs/reference/esql/processing-commands/enrich.asciidoc b/docs/reference/esql/processing-commands/enrich.asciidoc index 5470d81b2f40b..844cc2c62d1ed 100644 --- a/docs/reference/esql/processing-commands/enrich.asciidoc +++ b/docs/reference/esql/processing-commands/enrich.asciidoc @@ -28,11 +28,16 @@ name as the `match_field` defined in the <>. The enrich fields from the enrich index that are added to the result as new columns. If a column with the same name as the enrich field already exists, the existing column will be replaced by the new column. If not specified, each of -the enrich fields defined in the policy is added +the enrich fields defined in the policy is added. +A column with the same name as the enrich field will be dropped unless the +enrich field is renamed. `new_nameX`:: Enables you to change the name of the column that's added for each of the enrich fields. Defaults to the enrich field name. +If a column has the same name as the new name, it will be discarded. +If a name (new or original) occurs more than once, only the rightmost duplicate +creates a new column. *Description* diff --git a/docs/reference/esql/processing-commands/eval.asciidoc b/docs/reference/esql/processing-commands/eval.asciidoc index 9b34fca7ceeff..be69d775b2755 100644 --- a/docs/reference/esql/processing-commands/eval.asciidoc +++ b/docs/reference/esql/processing-commands/eval.asciidoc @@ -13,10 +13,12 @@ EVAL [column1 =] value1[, ..., [columnN =] valueN] `columnX`:: The column name. +If a column with the same name already exists, the existing column is dropped. +If a column name is used more than once, only the rightmost duplicate creates a column. `valueX`:: The value for the column. Can be a literal, an expression, or a -<>. +<>. Can use columns defined left of this one. *Description* diff --git a/docs/reference/esql/processing-commands/grok.asciidoc b/docs/reference/esql/processing-commands/grok.asciidoc index d5d58a9eaee12..58493a13359d2 100644 --- a/docs/reference/esql/processing-commands/grok.asciidoc +++ b/docs/reference/esql/processing-commands/grok.asciidoc @@ -17,6 +17,9 @@ multiple values, `GROK` will process each value. `pattern`:: A grok pattern. +If a field name conflicts with an existing column, the existing column is discarded. +If a field name is used more than once, a multi-valued column will be created with one value +per each occurrence of the field name. *Description* @@ -64,4 +67,16 @@ include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime] |=== include::{esql-specs}/docs.csv-spec[tag=grokWithToDatetime-result] |=== + +If a field name is used more than once, `GROK` creates a multi-valued +column: + +[source.merge.styled,esql] +---- +include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames] +---- +[%header.monospaced.styled,format=dsv,separator=|] +|=== +include::{esql-specs}/docs.csv-spec[tag=grokWithDuplicateFieldNames-result] +|=== // end::examples[] diff --git a/docs/reference/esql/processing-commands/keep.asciidoc b/docs/reference/esql/processing-commands/keep.asciidoc index 57f32a68aec4c..a07afa64a756c 100644 --- a/docs/reference/esql/processing-commands/keep.asciidoc +++ b/docs/reference/esql/processing-commands/keep.asciidoc @@ -13,6 +13,8 @@ KEEP columns `columns`:: A comma-separated list of columns to keep. Supports wildcards. +See below for the behavior in case an existing column matches multiple +given wildcards or column names. *Description* @@ -26,7 +28,7 @@ Fields are added in the order they appear. If one field matches multiple express 2. Partial wildcard expressions (for example: `fieldNam*`) 3. Wildcard only (`*`) -If a field matches two expressions with the same precedence, the right-most expression wins. +If a field matches two expressions with the same precedence, the rightmost expression wins. Refer to the examples for illustrations of these precedence rules. diff --git a/docs/reference/esql/processing-commands/lookup.asciidoc b/docs/reference/esql/processing-commands/lookup.asciidoc index 1944d243968a8..142bcb93dc445 100644 --- a/docs/reference/esql/processing-commands/lookup.asciidoc +++ b/docs/reference/esql/processing-commands/lookup.asciidoc @@ -15,6 +15,7 @@ LOOKUP table ON match_field1[, match_field2, ...] `table`:: The name of the `table` provided in the request to match. +If the table's column names conflict with existing columns, the existing columns will be dropped. `match_field`:: The fields in the input to match against the table. diff --git a/docs/reference/esql/processing-commands/rename.asciidoc b/docs/reference/esql/processing-commands/rename.asciidoc index 773fe8b640f75..0f338ed6e15e8 100644 --- a/docs/reference/esql/processing-commands/rename.asciidoc +++ b/docs/reference/esql/processing-commands/rename.asciidoc @@ -15,7 +15,9 @@ RENAME old_name1 AS new_name1[, ..., old_nameN AS new_nameN] The name of a column you want to rename. `new_nameX`:: -The new name of the column. +The new name of the column. If it conflicts with an existing column name, +the existing column is dropped. If multiple columns are renamed to the same +name, all but the rightmost column with the same new name are dropped. *Description* diff --git a/docs/reference/esql/processing-commands/stats.asciidoc b/docs/reference/esql/processing-commands/stats.asciidoc index fe84c56bbfc19..21b91eafd3198 100644 --- a/docs/reference/esql/processing-commands/stats.asciidoc +++ b/docs/reference/esql/processing-commands/stats.asciidoc @@ -2,6 +2,9 @@ [[esql-stats-by]] === `STATS ... BY` +The `STATS ... BY` processing command groups rows according to a common value +and calculates one or more aggregated values over the grouped rows. + **Syntax** [source,esql] @@ -15,12 +18,15 @@ STATS [column1 =] expression1[, ..., [columnN =] expressionN] `columnX`:: The name by which the aggregated value is returned. If omitted, the name is equal to the corresponding expression (`expressionX`). +If multiple columns have the same name, all but the rightmost column with this +name will be ignored. `expressionX`:: An expression that computes an aggregated value. `grouping_expressionX`:: An expression that outputs the values to group by. +If its name coincides with one of the computed columns, that column will be ignored. NOTE: Individual `null` values are skipped when computing aggregations. @@ -35,6 +41,10 @@ The following <> are supported: include::../functions/aggregation-functions.asciidoc[tag=agg_list] +The following <> are supported: + +include::../functions/grouping-functions.asciidoc[tag=group_list] + NOTE: `STATS` without any groups is much much faster than adding a group. NOTE: Grouping on a single expression is currently much more optimized than grouping diff --git a/docs/reference/esql/source-commands/from.asciidoc b/docs/reference/esql/source-commands/from.asciidoc index 9ab21e8996aa0..1abe7dcb2fa9b 100644 --- a/docs/reference/esql/source-commands/from.asciidoc +++ b/docs/reference/esql/source-commands/from.asciidoc @@ -58,24 +58,22 @@ today's index: FROM ---- -Use comma-separated lists or wildcards to query multiple data streams, indices, -or aliases: +Use comma-separated lists or wildcards to <>: [source,esql] ---- FROM employees-00001,other-employees-* ---- -Use the format `:` to query data streams and indices -on remote clusters: +Use the format `:` to <>: [source,esql] ---- FROM cluster_one:employees-00001,cluster_two:other-employees-* ---- -See <>. - Use the optional `METADATA` directive to enable <>: [source,esql] diff --git a/docs/reference/esql/source-commands/row.asciidoc b/docs/reference/esql/source-commands/row.asciidoc index adce844f365b8..d127080415f37 100644 --- a/docs/reference/esql/source-commands/row.asciidoc +++ b/docs/reference/esql/source-commands/row.asciidoc @@ -13,6 +13,7 @@ ROW column1 = value1[, ..., columnN = valueN] `columnX`:: The column name. +In case of duplicate column names, only the rightmost duplicate creates a column. `valueX`:: The value for the column. Can be a literal, an expression, or a diff --git a/docs/reference/features/apis/features-apis.asciidoc b/docs/reference/features/apis/features-apis.asciidoc index fe06471cff0df..83414b69fabf3 100644 --- a/docs/reference/features/apis/features-apis.asciidoc +++ b/docs/reference/features/apis/features-apis.asciidoc @@ -7,7 +7,7 @@ by Elasticsearch and Elasticsearch plugins. [discrete] === Features APIs * <> -* <> +* <> include::get-features-api.asciidoc[] include::reset-features-api.asciidoc[] diff --git a/docs/reference/geospatial-analysis.asciidoc b/docs/reference/geospatial-analysis.asciidoc index 7577bb222127f..6760040e14bc7 100644 --- a/docs/reference/geospatial-analysis.asciidoc +++ b/docs/reference/geospatial-analysis.asciidoc @@ -2,7 +2,7 @@ [[geospatial-analysis]] = Geospatial analysis -Did you know that {es} has geospatial capabilities? https://www.elastic.co/blog/geo-location-and-search[{es} and geo] go way back, to 2010. A lot has happened since then and today {es} provides robust geospatial capabilities with speed, all with a stack that scales automatically. +Did you know that {es} has geospatial capabilities? https://www.elastic.co/blog/geo-location-and-search[{es} and geo] go way back, to 2010. A lot has happened since then and today {es} provides robust geospatial capabilities with speed, all with a stack that scales automatically. Not sure where to get started with {es} and geo? Then, you have come to the right place. @@ -18,8 +18,10 @@ Have an index with lat/lon pairs but no geo_point mapping? Use <> lets you clean, transform, and augment your data before indexing. +Data is often messy and incomplete. <> lets you clean, transform, and augment your data before indexing. +* Use <> together with <> to index CSV files with geo data. + Kibana's {kibana-ref}/import-geospatial-data.html[Import CSV] feature can help with this. * Use <> to add geographical location of an IPv4 or IPv6 address. * Use <> to convert grid tiles or hexagonal cell ids to bounding boxes or polygons which describe their shape. * Use <> for reverse geocoding. For example, use {kibana-ref}/reverse-geocoding-tutorial.html[reverse geocoding] to visualize metropolitan areas by web traffic. @@ -30,6 +32,18 @@ Data is often messy and incomplete. <> lets you clean, <> answer location-driven questions. Find documents that intersect with, are within, are contained by, or do not intersect your query geometry. Combine geospatial queries with full text search queries for unparalleled searching experience. For example, "Show me all subscribers that live within 5 miles of our new gym location, that joined in the last year and have running mentioned in their profile". +[discrete] +[[esql-query]] +=== ES|QL + +<> has support for <> functions, enabling efficient index searching for documents that intersect with, are within, are contained by, or are disjoint from a query geometry. In addition, the `ST_DISTANCE` function calculates the distance between two points. + +* experimental:[] <> +* experimental:[] <> +* experimental:[] <> +* experimental:[] <> +* experimental:[] <> + [discrete] [[geospatial-aggregate]] === Aggregate @@ -42,12 +56,12 @@ Geospatial bucket aggregations: * <> groups geo_point and geo_shape values into buckets that represent a grid. * <> groups geo_point and geo_shape values into buckets that represent an H3 hexagonal cell. * <> groups geo_point and geo_shape values into buckets that represent a grid. Each cell corresponds to a {wikipedia}/Tiled_web_map[map tile] as used by many online map sites. - + Geospatial metric aggregations: * <> computes the geographic bounding box containing all values for a Geopoint or Geoshape field. * <> computes the weighted centroid from all coordinate values for geo fields. -* <> aggregates all geo_point values within a bucket into a LineString ordered by the chosen sort field. Use geo_line aggregation to create {kibana-ref}/asset-tracking-tutorial.html[vehicle tracks]. +* <> aggregates all geo_point values within a bucket into a LineString ordered by the chosen sort field. Use geo_line aggregation to create {kibana-ref}/asset-tracking-tutorial.html[vehicle tracks]. Combine aggregations to perform complex geospatial analysis. For example, to calculate the most recent GPS tracks per flight, use a <> to group documents into buckets per aircraft. Then use geo-line aggregation to compute a track for each aircraft. In another example, use geotile grid aggregation to group documents into a grid. Then use geo-centroid aggregation to find the weighted centroid of each grid cell. @@ -79,4 +93,4 @@ Put machine learning to work for you and find the data that should stand out wit Let your location data drive insights and action with {kibana-ref}/geo-alerting.html[geographic alerts]. Commonly referred to as geo-fencing, track moving objects as they enter or exit a boundary to receive notifications through common business systems (email, Slack, Teams, PagerDuty, and more). -Interested in learning more? Follow {kibana-ref}/asset-tracking-tutorial.html[step-by-step instructions] for setting up tracking containment alerts to monitor moving vehicles. \ No newline at end of file +Interested in learning more? Follow {kibana-ref}/asset-tracking-tutorial.html[step-by-step instructions] for setting up tracking containment alerts to monitor moving vehicles. diff --git a/docs/reference/health/health.asciidoc b/docs/reference/health/health.asciidoc index 6ac7bd2001d45..34714e80e1b18 100644 --- a/docs/reference/health/health.asciidoc +++ b/docs/reference/health/health.asciidoc @@ -204,9 +204,8 @@ for health status set `verbose` to `false` to disable the more expensive analysi `help_url` field. `affected_resources`:: - (Optional, array of strings) If the root cause pertains to multiple resources in the - cluster (like indices, shards, nodes, etc...) this will hold all resources that this - diagnosis is applicable for. + (Optional, object) An object where the keys represent resource types (for example, indices, shards), + and the values are lists of the specific resources affected by the issue. `help_url`:: (string) A link to the troubleshooting guide that'll fix the health problem. diff --git a/docs/reference/high-availability/cluster-design.asciidoc b/docs/reference/high-availability/cluster-design.asciidoc index 6c17a494f36ae..105c8b236b0b1 100644 --- a/docs/reference/high-availability/cluster-design.asciidoc +++ b/docs/reference/high-availability/cluster-design.asciidoc @@ -246,7 +246,8 @@ accumulate into a noticeable performance penalty. An unreliable network may have frequent network partitions. {es} will automatically recover from a network partition as quickly as it can but your cluster may be partly unavailable during a partition and will need to spend time and resources to -resynchronize any missing data and rebalance itself once the partition heals. +<> and <> +itself once the partition heals. Recovering from a failure may involve copying a large amount of data between nodes so the recovery time is often determined by the available bandwidth. diff --git a/docs/reference/how-to/knn-search.asciidoc b/docs/reference/how-to/knn-search.asciidoc index 194d122cef159..18882380ce160 100644 --- a/docs/reference/how-to/knn-search.asciidoc +++ b/docs/reference/how-to/knn-search.asciidoc @@ -45,6 +45,12 @@ results contains the full document `_source`. When the documents contain high-dimensional `dense_vector` fields, the `_source` can be quite large and expensive to load. This could significantly slow down the speed of kNN search. +NOTE: <>, <>, +and <> operations generally +require the `_source` field. Disabling `_source` for a field might result in +unexpected behavior for these operations. For example, reindex might not actually +contain the `dense_vector` field in the new index. + You can disable storing `dense_vector` fields in the `_source` through the <> mapping parameter. This prevents loading and returning large vectors during search, and also cuts down on the index size. @@ -102,14 +108,14 @@ merges smaller segments into larger ones through a background explicit steps to reduce the number of index segments. [discrete] -==== Force merge to one segment - -The <> operation forces an index merge. If you -force merge to one segment, the kNN search only need to check a single, -all-inclusive HNSW graph. Force merging `dense_vector` fields is an expensive -operation that can take significant time to complete. - -include::{es-ref-dir}/indices/forcemerge.asciidoc[tag=force-merge-read-only-warn] +==== Increase maximum segment size + +{es} provides many tunable settings for controlling the merge process. One +important setting is `index.merge.policy.max_merged_segment`. This controls +the maximum size of the segments that are created during the merge process. +By increasing the value, you can reduce the number of segments in the index. +The default value is `5GB`, but that might be too small for larger dimensional vectors. +Consider increasing this value to `10GB` or `20GB` can help reduce the number of segments. [discrete] ==== Create large segments during bulk indexing diff --git a/docs/reference/how-to/size-your-shards.asciidoc b/docs/reference/how-to/size-your-shards.asciidoc index 56e5fbbf15c77..6baac25aa0532 100644 --- a/docs/reference/how-to/size-your-shards.asciidoc +++ b/docs/reference/how-to/size-your-shards.asciidoc @@ -152,9 +152,10 @@ same data. However, very large shards can also cause slower searches and will take longer to recover after a failure. There is no hard limit on the physical size of a shard, and each shard can in -theory contain up to just over two billion documents. However, experience shows -that shards between 10GB and 50GB typically work well for many use cases, as -long as the per-shard document count is kept below 200 million. +theory contain up to <>. However, experience shows that shards between 10GB and 50GB +typically work well for many use cases, as long as the per-shard document count +is kept below 200 million. You may be able to use larger shards depending on your network and use case, and smaller shards may be appropriate for @@ -184,6 +185,29 @@ index prirep shard store // TESTRESPONSE[s/\.ds-my-data-stream-2099\.05\.06-000001/my-index-000001/] // TESTRESPONSE[s/50gb/.*/] +If an index's shard is experiencing degraded performance from surpassing the +recommended 50GB size, you may consider fixing the index's shards' sizing. +Shards are immutable and therefore their size is fixed in place, +so indices must be copied with corrected settings. This requires first ensuring +sufficient disk to copy the data. Afterwards, you can copy the index's data +with corrected settings via one of the following options: + +* running <> to increase number of primary +shards + +* creating a destination index with corrected settings and then running +<> + +Kindly note performing a <> and/or +<> would be insufficient to resolve shards' +sizing. + +Once a source index's data is copied into its destination index, the source +index can be <>. You may then consider setting +<> against the destination index for the source +index's name to point to it for continuity. + + [discrete] [[shard-count-recommendation]] ==== Master-eligible nodes should have at least 1GB of heap per 3000 indices @@ -544,3 +568,42 @@ PUT _cluster/settings } } ---- + +[discrete] +[[troubleshooting-max-docs-limit]] +==== Number of documents in the shard cannot exceed [2147483519] + +Each {es} shard is a separate Lucene index, so it shares Lucene's +https://github.com/apache/lucene/issues/5176[`MAX_DOC` limit] of having at most +2,147,483,519 (`(2^31)-129`) documents. This per-shard limit applies to the sum +of `docs.count` plus `docs.deleted` as reported by the <>. Exceeding this limit will result in errors like the following: + +[source,txt] +---- +Elasticsearch exception [type=illegal_argument_exception, reason=Number of documents in the shard cannot exceed [2147483519]] +---- + +TIP: This calculation may differ from the <> +calculation, because the Count API does not include nested documents and does +not count deleted documents. + +This limit is much higher than the <> of approximately 200M documents per shard. + +If you encounter this problem, try to mitigate it by using the +<> to merge away some deleted docs. For +example: + +[source,console] +---- +POST my-index-000001/_forcemerge?only_expunge_deletes=true +---- +// TEST[setup:my_index] + +This will launch an asynchronous task which can be monitored via the +<>. + +It may also be helpful to <>, +or to <> or <> the index into +one with a larger number of shards. diff --git a/docs/reference/ilm/actions/ilm-delete.asciidoc b/docs/reference/ilm/actions/ilm-delete.asciidoc index eac3b9804709a..beed60105ed96 100644 --- a/docs/reference/ilm/actions/ilm-delete.asciidoc +++ b/docs/reference/ilm/actions/ilm-delete.asciidoc @@ -15,6 +15,18 @@ Deletes the searchable snapshot created in a previous phase. Defaults to `true`. This option is applicable when the <> action is used in any previous phase. ++ +If you set this option to `false`, use the <> to remove {search-snaps} from your snapshot repository when +they are no longer needed. ++ +If you manually delete an index before the {ilm-cap} delete phase runs, then +{ilm-init} will not delete the underlying {search-snap}. Use the +<> to remove the {search-snap} from +your snapshot repository when it is no longer needed. ++ +See <> for +further information about deleting {search-snaps}. WARNING: If a policy with a searchable snapshot action is applied on an existing searchable snapshot index, the snapshot backing this index will NOT be deleted because it was not created by this policy. If you want diff --git a/docs/reference/ilm/apis/delete-lifecycle.asciidoc b/docs/reference/ilm/apis/delete-lifecycle.asciidoc index 632cb982b3968..fc9a35e4ef570 100644 --- a/docs/reference/ilm/apis/delete-lifecycle.asciidoc +++ b/docs/reference/ilm/apis/delete-lifecycle.asciidoc @@ -5,7 +5,7 @@ Delete policy ++++ -Deletes an index lifecycle policy. +Deletes an index <> policy. [[ilm-delete-lifecycle-request]] ==== {api-request-title} diff --git a/docs/reference/ilm/apis/explain.asciidoc b/docs/reference/ilm/apis/explain.asciidoc index 348a9e7f99e78..a1ddde8c9f2d9 100644 --- a/docs/reference/ilm/apis/explain.asciidoc +++ b/docs/reference/ilm/apis/explain.asciidoc @@ -5,7 +5,7 @@ Explain lifecycle ++++ -Retrieves the current lifecycle status for one or more indices. For data +Retrieves the current <> status for one or more indices. For data streams, the API retrieves the current lifecycle status for the stream's backing indices. diff --git a/docs/reference/ilm/apis/get-lifecycle.asciidoc b/docs/reference/ilm/apis/get-lifecycle.asciidoc index 7443610065487..b4e07389a9fb7 100644 --- a/docs/reference/ilm/apis/get-lifecycle.asciidoc +++ b/docs/reference/ilm/apis/get-lifecycle.asciidoc @@ -5,7 +5,7 @@ Get policy ++++ -Retrieves a lifecycle policy. +Retrieves a <> policy. [[ilm-get-lifecycle-request]] ==== {api-request-title} diff --git a/docs/reference/ilm/apis/get-status.asciidoc b/docs/reference/ilm/apis/get-status.asciidoc index 7e9e963f6f369..f2ab8d65ec9a1 100644 --- a/docs/reference/ilm/apis/get-status.asciidoc +++ b/docs/reference/ilm/apis/get-status.asciidoc @@ -7,7 +7,7 @@ Get {ilm} status ++++ -Retrieves the current {ilm} ({ilm-init}) status. +Retrieves the current <> ({ilm-init}) status. You can start or stop {ilm-init} with the <> and <> APIs. diff --git a/docs/reference/ilm/apis/move-to-step.asciidoc b/docs/reference/ilm/apis/move-to-step.asciidoc index 19cc9f7088867..f3441fa997cff 100644 --- a/docs/reference/ilm/apis/move-to-step.asciidoc +++ b/docs/reference/ilm/apis/move-to-step.asciidoc @@ -5,7 +5,7 @@ Move to step ++++ -Triggers execution of a specific step in the lifecycle policy. +Triggers execution of a specific step in the <> policy. [[ilm-move-to-step-request]] ==== {api-request-title} diff --git a/docs/reference/ilm/apis/put-lifecycle.asciidoc b/docs/reference/ilm/apis/put-lifecycle.asciidoc index ffd59a14d8c25..390f6b1bb4d15 100644 --- a/docs/reference/ilm/apis/put-lifecycle.asciidoc +++ b/docs/reference/ilm/apis/put-lifecycle.asciidoc @@ -5,7 +5,7 @@ Create or update lifecycle policy ++++ -Creates or updates lifecycle policy. See <> for +Creates or updates <> policy. See <> for definitions of policy components. [[ilm-put-lifecycle-request]] diff --git a/docs/reference/ilm/apis/remove-policy-from-index.asciidoc b/docs/reference/ilm/apis/remove-policy-from-index.asciidoc index 711eccc298df1..107cab4d5aa19 100644 --- a/docs/reference/ilm/apis/remove-policy-from-index.asciidoc +++ b/docs/reference/ilm/apis/remove-policy-from-index.asciidoc @@ -5,7 +5,7 @@ Remove policy ++++ -Removes assigned lifecycle policies from an index or a data stream's backing +Removes assigned <> policies from an index or a data stream's backing indices. [[ilm-remove-policy-request]] diff --git a/docs/reference/ilm/apis/retry-policy.asciidoc b/docs/reference/ilm/apis/retry-policy.asciidoc index cb2587fbb151b..8f01f15e0c3ad 100644 --- a/docs/reference/ilm/apis/retry-policy.asciidoc +++ b/docs/reference/ilm/apis/retry-policy.asciidoc @@ -5,7 +5,7 @@ Retry policy ++++ -Retry executing the policy for an index that is in the ERROR step. +Retry executing the <> policy for an index that is in the ERROR step. [[ilm-retry-policy-request]] ==== {api-request-title} diff --git a/docs/reference/ilm/apis/start.asciidoc b/docs/reference/ilm/apis/start.asciidoc index 32db585c6b14c..c38b3d9ca8831 100644 --- a/docs/reference/ilm/apis/start.asciidoc +++ b/docs/reference/ilm/apis/start.asciidoc @@ -7,7 +7,7 @@ Start {ilm} ++++ -Start the {ilm} ({ilm-init}) plugin. +Start the <> ({ilm-init}) plugin. [[ilm-start-request]] ==== {api-request-title} diff --git a/docs/reference/ilm/apis/stop.asciidoc b/docs/reference/ilm/apis/stop.asciidoc index 1e9cfb94d0b1f..a6100d794c2d3 100644 --- a/docs/reference/ilm/apis/stop.asciidoc +++ b/docs/reference/ilm/apis/stop.asciidoc @@ -7,7 +7,7 @@ Stop {ilm} ++++ -Stop the {ilm} ({ilm-init}) plugin. +Stop the <> ({ilm-init}) plugin. [[ilm-stop-request]] ==== {api-request-title} diff --git a/docs/reference/ilm/error-handling.asciidoc b/docs/reference/ilm/error-handling.asciidoc index d922fa6687823..f810afc6c2b5f 100644 --- a/docs/reference/ilm/error-handling.asciidoc +++ b/docs/reference/ilm/error-handling.asciidoc @@ -2,7 +2,7 @@ [[index-lifecycle-error-handling]] == Troubleshooting {ilm} errors -When {ilm-init} executes a lifecycle policy, it's possible for errors to occur +When <> executes a lifecycle policy, it's possible for errors to occur while performing the necessary index operations for a step. When this happens, {ilm-init} moves the index to an `ERROR` step. If {ilm-init} cannot resolve the error automatically, execution is halted diff --git a/docs/reference/ilm/ilm-index-lifecycle.asciidoc b/docs/reference/ilm/ilm-index-lifecycle.asciidoc index acf59645dae13..040e02742f5e7 100644 --- a/docs/reference/ilm/ilm-index-lifecycle.asciidoc +++ b/docs/reference/ilm/ilm-index-lifecycle.asciidoc @@ -5,7 +5,7 @@ Index lifecycle ++++ -{ilm-init} defines five index lifecycle _phases_: +<> defines five index lifecycle _phases_: * **Hot**: The index is actively being updated and queried. * **Warm**: The index is no longer being updated but is still being queried. diff --git a/docs/reference/index-modules.asciidoc b/docs/reference/index-modules.asciidoc index 04bebfae2763b..7232de12c8c50 100644 --- a/docs/reference/index-modules.asciidoc +++ b/docs/reference/index-modules.asciidoc @@ -81,8 +81,9 @@ breaking change]. If you are updating the compression type, the new one will be applied after segments are merged. Segment merging can be forced using <>. Experiments with indexing log datasets - have shown that `best_compression` gives up to ~18% lower storage usage - compared to `default` while only minimally affecting indexing throughput (~2%). + have shown that `best_compression` gives up to ~18% lower storage usage in + the most ideal scenario compared to `default` while only minimally affecting + indexing throughput (~2%). [[index-mode-setting]] `index.mode`:: + @@ -112,7 +113,7 @@ Index mode supports the following values: `time_series`::: Index mode optimized for storage of metrics documented in <>. -`logs`::: Index mode optimized for storage of logs. It applies default sort settings on the `hostname` and `timestamp` fields and uses <>. <> on different fields is still allowed. +`logsdb`::: Index mode optimized for storage of logs. It applies default sort settings on the `hostname` and `timestamp` fields and uses <>. <> on different fields is still allowed. preview:[] [[routing-partition-size]] `index.routing_partition_size`:: diff --git a/docs/reference/index-modules/translog.asciidoc b/docs/reference/index-modules/translog.asciidoc index 52631bc0956b8..0032c7b46bfb2 100644 --- a/docs/reference/index-modules/translog.asciidoc +++ b/docs/reference/index-modules/translog.asciidoc @@ -19,7 +19,8 @@ An {es} <> is the process of performing a Lucene commit and starting a new translog generation. Flushes are performed automatically in the background in order to make sure the translog does not grow too large, which would make replaying its operations take a considerable amount of time during -recovery. The ability to perform a flush manually is also exposed through an +recovery. The translog size will never exceed `1%` of the disk size. +The ability to perform a flush manually is also exposed through an API, although this is rarely needed. [discrete] @@ -71,7 +72,8 @@ update, or bulk request. This setting accepts the following parameters: The translog stores all operations that are not yet safely persisted in Lucene (i.e., are not part of a Lucene commit point). Although these operations are available for reads, they will need to be replayed if the shard was stopped - and had to be recovered. This setting controls the maximum total size of these - operations, to prevent recoveries from taking too long. Once the maximum size - has been reached a flush will happen, generating a new Lucene commit point. - Defaults to `512mb`. + and had to be recovered. + This setting controls the maximum total size of these operations to prevent + recoveries from taking too long. Once the maximum size has been reached, a flush + will happen, generating a new Lucene commit point. Defaults to `10 GB`. + diff --git a/docs/reference/index.asciidoc b/docs/reference/index.asciidoc index 2057519719177..24dbee8c2983b 100644 --- a/docs/reference/index.asciidoc +++ b/docs/reference/index.asciidoc @@ -6,10 +6,10 @@ include::links.asciidoc[] include::landing-page.asciidoc[] -include::intro.asciidoc[] - include::release-notes/highlights.asciidoc[] +include::intro.asciidoc[] + include::quickstart/index.asciidoc[] include::setup.asciidoc[] @@ -32,6 +32,8 @@ include::alias.asciidoc[] include::search/search-your-data/search-your-data.asciidoc[] +include::reranking/index.asciidoc[] + include::query-dsl.asciidoc[] include::aggregations.asciidoc[] diff --git a/docs/reference/indices/forcemerge.asciidoc b/docs/reference/indices/forcemerge.asciidoc index 1d473acbd5d48..6eacaac5e7b2a 100644 --- a/docs/reference/indices/forcemerge.asciidoc +++ b/docs/reference/indices/forcemerge.asciidoc @@ -89,8 +89,9 @@ one at a time. If you expand the `force_merge` threadpool on a node then it will force merge its shards in parallel. Force merge makes the storage for the shard being merged temporarily -increase, up to double its size in case `max_num_segments` parameter is set to -`1`, as all segments need to be rewritten into a new one. +increase, as it may require free space up to triple its size in case +`max_num_segments` parameter is set to `1`, to rewrite all segments into a new +one. [[forcemerge-api-path-params]] ==== {api-path-parms-title} diff --git a/docs/reference/indices/index-templates.asciidoc b/docs/reference/indices/index-templates.asciidoc index 538fb5b97860a..69936df7feff8 100644 --- a/docs/reference/indices/index-templates.asciidoc +++ b/docs/reference/indices/index-templates.asciidoc @@ -102,7 +102,7 @@ PUT _component_template/runtime_component_template "day_of_week": { "type": "keyword", "script": { - "source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))" + "source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" } } } @@ -134,8 +134,7 @@ PUT _index_template/template_1 "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } }, diff --git a/docs/reference/indices/put-component-template.asciidoc b/docs/reference/indices/put-component-template.asciidoc index 0a0e36b63e6cd..6fd54f81d1222 100644 --- a/docs/reference/indices/put-component-template.asciidoc +++ b/docs/reference/indices/put-component-template.asciidoc @@ -26,8 +26,7 @@ PUT _component_template/template_1 "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } } diff --git a/docs/reference/indices/put-index-template-v1.asciidoc b/docs/reference/indices/put-index-template-v1.asciidoc index 86a8a54edd97f..75325edf77696 100644 --- a/docs/reference/indices/put-index-template-v1.asciidoc +++ b/docs/reference/indices/put-index-template-v1.asciidoc @@ -4,8 +4,8 @@ Create or update index template (legacy) ++++ -IMPORTANT: This documentation is about legacy index templates, -which are deprecated and will be replaced by the composable templates introduced in {es} 7.8. +IMPORTANT: This documentation is about legacy index templates, +which are deprecated and will be replaced by the composable templates introduced in {es} 7.8. For information about composable templates, see <>. Creates or updates an index template. @@ -27,8 +27,7 @@ PUT _template/template_1 "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } } @@ -67,8 +66,8 @@ that you can automatically apply when creating new indices. {es} applies templates to new indices based on an index pattern that matches the index name. -NOTE: Composable templates always take precedence over legacy templates. -If no composable template matches a new index, +NOTE: Composable templates always take precedence over legacy templates. +If no composable template matches a new index, matching legacy templates are applied according to their order. Index templates are only applied during index creation. @@ -263,4 +262,4 @@ The API returns the following response: "version" : 123 } } --------------------------------------------------- \ No newline at end of file +-------------------------------------------------- diff --git a/docs/reference/indices/recovery.asciidoc b/docs/reference/indices/recovery.asciidoc index b4e4bd33f819a..06b4d9d92e49f 100644 --- a/docs/reference/indices/recovery.asciidoc +++ b/docs/reference/indices/recovery.asciidoc @@ -35,21 +35,7 @@ index, or alias. Use the index recovery API to get information about ongoing and completed shard recoveries. -// tag::shard-recovery-desc[] -Shard recovery is the process of initializing a shard copy, such as restoring a -primary shard from a snapshot or syncing a replica shard from a primary shard. -When a shard recovery completes, the recovered shard is available for search -and indexing. - -Recovery automatically occurs during the following processes: - -* Node startup. This type of recovery is called a local store recovery. -* Primary shard replication. -* Relocation of a shard to a different node in the same cluster. -* <> operation. -* <>, <>, or -<> operation. -// end::shard-recovery-desc[] +include::{es-ref-dir}/modules/shard-recovery-desc.asciidoc[] The index recovery API reports information about completed recoveries only for shard copies that currently exist in the cluster. It only reports the last @@ -360,7 +346,7 @@ The API returns the following response: "index1" : { "shards" : [ { "id" : 0, - "type" : "STORE", + "type" : "EXISTING_STORE", "stage" : "DONE", "primary" : true, "start_time" : "2014-02-24T12:38:06.349", diff --git a/docs/reference/indices/simulate-template.asciidoc b/docs/reference/indices/simulate-template.asciidoc index c7397ace97886..fd825883dc20c 100644 --- a/docs/reference/indices/simulate-template.asciidoc +++ b/docs/reference/indices/simulate-template.asciidoc @@ -26,8 +26,7 @@ PUT _index_template/template_1 "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } }, @@ -74,19 +73,19 @@ privilege>> to use this API. ``:: (Optional, string) Name of the index template to simulate. -To test a template configuration before you add it to the cluster, +To test a template configuration before you add it to the cluster, omit this parameter and specify the template configuration in the request body. [[simulate-template-api-query-params]] ==== {api-query-parms-title} //// `cause`:: -(Optional, string) The reason for using the specified template for the simulation. +(Optional, string) The reason for using the specified template for the simulation. //// `create`:: -(Optional, Boolean) If `true`, the template passed in the body is -only used if no existing templates match the same index patterns. +(Optional, Boolean) If `true`, the template passed in the body is +only used if no existing templates match the same index patterns. If `false`, the simulation uses the template with the highest priority. Note that the template is not permanently added or updated in either case; it is only used for the simulation. @@ -114,7 +113,7 @@ include::{es-ref-dir}/indices/put-index-template.asciidoc[tag=index-template-api .Properties of `overlapping` [%collapsible%open] ==== -`index_patterns`:: +`index_patterns`:: (array) Index patterns that the superseded template applies to. `name`:: @@ -123,7 +122,7 @@ include::{es-ref-dir}/indices/put-index-template.asciidoc[tag=index-template-api `template`:: (object) -The settings, mappings, and aliases that would be applied to matching indices. +The settings, mappings, and aliases that would be applied to matching indices. + .Properties of `template` [%collapsible%open] @@ -227,8 +226,8 @@ The response shows the index settings, mappings, and aliases applied by the `fin [[simulate-template-config-ex]] ===== Simulating an arbitrary template configuration -To see what settings will be applied by a template before you add it to the cluster, -you can pass a template configuration in the request body. +To see what settings will be applied by a template before you add it to the cluster, +you can pass a template configuration in the request body. The specified template is used for the simulation if it has a higher priority than existing templates. [source,console] @@ -266,8 +265,8 @@ The response shows any overlapping templates with a lower priority. } }, "mappings" : { - "properties" : { - "@timestamp" : { + "properties" : { + "@timestamp" : { "type" : "date" } } diff --git a/docs/reference/inference/delete-inference.asciidoc b/docs/reference/inference/delete-inference.asciidoc index 2f9d9511e6326..4df72ba672092 100644 --- a/docs/reference/inference/delete-inference.asciidoc +++ b/docs/reference/inference/delete-inference.asciidoc @@ -8,7 +8,7 @@ Deletes an {infer} endpoint. IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in {ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI or -Hugging Face. For built-in models and models uploaded though Eland, the {infer} +Hugging Face. For built-in models and models uploaded through Eland, the {infer} APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <>. diff --git a/docs/reference/inference/get-inference.asciidoc b/docs/reference/inference/get-inference.asciidoc index 7f4dc1c496837..c3fe841603bcc 100644 --- a/docs/reference/inference/get-inference.asciidoc +++ b/docs/reference/inference/get-inference.asciidoc @@ -8,7 +8,7 @@ Retrieves {infer} endpoint information. IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in {ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI or -Hugging Face. For built-in models and models uploaded though Eland, the {infer} +Hugging Face. For built-in models and models uploaded through Eland, the {infer} APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <>. diff --git a/docs/reference/inference/images/inference-landscape.png b/docs/reference/inference/images/inference-landscape.png new file mode 100644 index 0000000000000..a35d1370fd09b Binary files /dev/null and b/docs/reference/inference/images/inference-landscape.png differ diff --git a/docs/reference/inference/inference-apis.asciidoc b/docs/reference/inference/inference-apis.asciidoc index 896cb02a9e699..33db148755d8e 100644 --- a/docs/reference/inference/inference-apis.asciidoc +++ b/docs/reference/inference/inference-apis.asciidoc @@ -5,14 +5,16 @@ experimental[] IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in -{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio or -Hugging Face. For built-in models and models uploaded though Eland, the {infer} -APIs offer an alternative way to use and manage trained models. However, if you -do not plan to use the {infer} APIs to use these models or if you want to use -non-NLP models, use the <>. +{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, +Google AI Studio or Hugging Face. For built-in models and models uploaded +through Eland, the {infer} APIs offer an alternative way to use and manage +trained models. However, if you do not plan to use the {infer} APIs to use these +models or if you want to use non-NLP models, use the +<>. The {infer} APIs enable you to create {infer} endpoints and use {ml} models of -different providers - such as Cohere, OpenAI, or HuggingFace - as a service. Use +different providers - such as Amazon Bedrock, Anthropic, Azure AI Studio, +Cohere, Google AI, Mistral, OpenAI, or HuggingFace - as a service. Use the following APIs to manage {infer} models and perform {infer}: * <> @@ -20,11 +22,25 @@ the following APIs to manage {infer} models and perform {infer}: * <> * <> +[[inference-landscape]] +.A representation of the Elastic inference landscape +image::images/inference-landscape.png[A representation of the Elastic inference landscape,align="center"] + +An {infer} endpoint enables you to use the corresponding {ml} model without +manual deployment and apply it to your data at ingestion time through +<>. + +Choose a model from your provider or use ELSER – a retrieval model trained by +Elastic –, then create an {infer} endpoint by the <>. +Now use <> to perform +<> on your data. include::delete-inference.asciidoc[] include::get-inference.asciidoc[] include::post-inference.asciidoc[] include::put-inference.asciidoc[] +include::service-amazon-bedrock.asciidoc[] +include::service-anthropic.asciidoc[] include::service-azure-ai-studio.asciidoc[] include::service-azure-openai.asciidoc[] include::service-cohere.asciidoc[] diff --git a/docs/reference/inference/post-inference.asciidoc b/docs/reference/inference/post-inference.asciidoc index 3ad23ac3300cc..52131c0b10776 100644 --- a/docs/reference/inference/post-inference.asciidoc +++ b/docs/reference/inference/post-inference.asciidoc @@ -8,7 +8,7 @@ Performs an inference task on an input text by using an {infer} endpoint. IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in {ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Azure, Google AI Studio, Google Vertex AI or -Hugging Face. For built-in models and models uploaded though Eland, the {infer} +Hugging Face. For built-in models and models uploaded through Eland, the {infer} APIs offer an alternative way to use and manage trained models. However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <>. diff --git a/docs/reference/inference/put-inference.asciidoc b/docs/reference/inference/put-inference.asciidoc index 101c0a24b66b7..ea9fb9de72f06 100644 --- a/docs/reference/inference/put-inference.asciidoc +++ b/docs/reference/inference/put-inference.asciidoc @@ -6,10 +6,16 @@ experimental[] Creates an {infer} endpoint to perform an {infer} task. -IMPORTANT: The {infer} APIs enable you to use certain services, such as built-in -{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, Azure OpenAI, Google AI Studio, Google Vertex AI or Hugging Face. -For built-in models and models uploaded though Eland, the {infer} APIs offer an alternative way to use and manage trained models. -However, if you do not plan to use the {infer} APIs to use these models or if you want to use non-NLP models, use the <>. +[IMPORTANT] +==== +* The {infer} APIs enable you to use certain services, such as built-in +{ml} models (ELSER, E5), models uploaded through Eland, Cohere, OpenAI, Mistral, +Azure OpenAI, Google AI Studio, Google Vertex AI, Anthropic or Hugging Face. +* For built-in models and models uploaded through Eland, the {infer} APIs offer an +alternative way to use and manage trained models. However, if you do not plan to +use the {infer} APIs to use these models or if you want to use non-NLP models, +use the <>. +==== [discrete] @@ -25,6 +31,22 @@ However, if you do not plan to use the {infer} APIs to use these models or if yo * Requires the `manage_inference` <> (the built-in `inference_admin` role grants this privilege) +[discrete] +[[put-inference-api-path-params]] +==== {api-path-parms-title} + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=inference-id] + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=task-type] ++ +-- +Refer to the service list in the <> for the available task types. +-- + [discrete] [[put-inference-api-desc]] @@ -32,15 +54,22 @@ However, if you do not plan to use the {infer} APIs to use these models or if yo The create {infer} API enables you to create an {infer} endpoint and configure a {ml} model to perform a specific {infer} task. -The following services are available through the {infer} API, click the links to review the configuration details of the services: - -* <> -* <> -* <> -* <> (for built-in models and models uploaded through Eland) -* <> -* <> -* <> -* <> -* <> -* <> +The following services are available through the {infer} API. +You can find the available task types next to the service name. +Click the links to review the configuration details of the services: + +* <> (`completion`, `text_embedding`) +* <> (`completion`) +* <> (`completion`, `text_embedding`) +* <> (`completion`, `text_embedding`) +* <> (`completion`, `rerank`, `text_embedding`) +* <> (`rerank`, `sparse_embedding`, `text_embedding` - this service is for built-in models and models uploaded through Eland) +* <> (`sparse_embedding`) +* <> (`completion`, `text_embedding`) +* <> (`rerank`, `text_embedding`) +* <> (`text_embedding`) +* <> (`text_embedding`) +* <> (`completion`, `text_embedding`) + +The {es} and ELSER services run on a {ml} node in your {es} cluster. The rest of +the services connect to external providers. \ No newline at end of file diff --git a/docs/reference/inference/service-amazon-bedrock.asciidoc b/docs/reference/inference/service-amazon-bedrock.asciidoc new file mode 100644 index 0000000000000..4ffa368613a0e --- /dev/null +++ b/docs/reference/inference/service-amazon-bedrock.asciidoc @@ -0,0 +1,175 @@ +[[infer-service-amazon-bedrock]] +=== Amazon Bedrock {infer} service + +Creates an {infer} endpoint to perform an {infer} task with the `amazonbedrock` service. + +[discrete] +[[infer-service-amazon-bedrock-api-request]] +==== {api-request-title} + +`PUT /_inference//` + +[discrete] +[[infer-service-amazon-bedrock-api-path-params]] +==== {api-path-parms-title} + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=inference-id] + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=task-type] ++ +-- +Available task types: + +* `completion`, +* `text_embedding`. +-- + +[discrete] +[[infer-service-amazon-bedrock-api-request-body]] +==== {api-request-body-title} + +`service`:: +(Required, string) The type of service supported for the specified task type. +In this case, +`amazonbedrock`. + +`service_settings`:: +(Required, object) +include::inference-shared.asciidoc[tag=service-settings] ++ +-- +These settings are specific to the `amazonbedrock` service. +-- + +`access_key`::: +(Required, string) +A valid AWS access key that has permissions to use Amazon Bedrock and access to models for inference requests. + +`secret_key`::: +(Required, string) +A valid AWS secret key that is paired with the `access_key`. +To create or manage access and secret keys, see https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html[Managing access keys for IAM users] in the AWS documentation. + +IMPORTANT: You need to provide the access and secret keys only once, during the {infer} model creation. +The <> does not retrieve your access or secret keys. +After creating the {infer} model, you cannot change the associated key pairs. +If you want to use a different access and secret key pair, delete the {infer} model and recreate it with the same name and the updated keys. + +`provider`::: +(Required, string) +The model provider for your deployment. +Note that some providers may support only certain task types. +Supported providers include: + +* `amazontitan` - available for `text_embedding` and `completion` task types +* `anthropic` - available for `completion` task type only +* `ai21labs` - available for `completion` task type only +* `cohere` - available for `text_embedding` and `completion` task types +* `meta` - available for `completion` task type only +* `mistral` - available for `completion` task type only + +`model`::: +(Required, string) +The base model ID or an ARN to a custom model based on a foundational model. +The base model IDs can be found in the https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[Amazon Bedrock model IDs] documentation. +Note that the model ID must be available for the provider chosen, and your IAM user must have access to the model. + +`region`::: +(Required, string) +The region that your model or ARN is deployed in. +The list of available regions per model can be found in the https://docs.aws.amazon.com/bedrock/latest/userguide/models-regions.html[Model support by AWS region] documentation. + +`rate_limit`::: +(Optional, object) +By default, the `amazonbedrock` service sets the number of requests allowed per minute to `240`. +This helps to minimize the number of rate limit errors returned from Amazon Bedrock. +To modify this, set the `requests_per_minute` setting of this object in your service settings: ++ +-- +include::inference-shared.asciidoc[tag=request-per-minute-example] +-- + +`task_settings`:: +(Optional, object) +include::inference-shared.asciidoc[tag=task-settings] ++ +.`task_settings` for the `completion` task type +[%collapsible%closed] +===== + +`max_new_tokens`::: +(Optional, integer) +Sets the maximum number for the output tokens to be generated. +Defaults to 64. + +`temperature`::: +(Optional, float) +A number between 0.0 and 1.0 that controls the apparent creativity of the results. At temperature 0.0 the model is most deterministic, at temperature 1.0 most random. +Should not be used if `top_p` or `top_k` is specified. + +`top_p`::: +(Optional, float) +Alternative to `temperature`. A number in the range of 0.0 to 1.0, to eliminate low-probability tokens. Top-p uses nucleus sampling to select top tokens whose sum of likelihoods does not exceed a certain value, ensuring both variety and coherence. +Should not be used if `temperature` is specified. + +`top_k`::: +(Optional, float) +Only available for `anthropic`, `cohere`, and `mistral` providers. +Alternative to `temperature`. Limits samples to the top-K most likely words, balancing coherence and variability. +Should not be used if `temperature` is specified. + +===== ++ +.`task_settings` for the `text_embedding` task type +[%collapsible%closed] +===== + +There are no `task_settings` available for the `text_embedding` task type. + +===== + +[discrete] +[[inference-example-amazonbedrock]] +==== Amazon Bedrock service example + +The following example shows how to create an {infer} endpoint called `amazon_bedrock_embeddings` to perform a `text_embedding` task type. + +Choose chat completion and embeddings models that you have access to from the https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[Amazon Bedrock base models]. + +[source,console] +------------------------------------------------------------ +PUT _inference/text_embedding/amazon_bedrock_embeddings +{ + "service": "amazonbedrock", + "service_settings": { + "access_key": "", + "secret_key": "", + "region": "us-east-1", + "provider": "amazontitan", + "model": "amazon.titan-embed-text-v2:0" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] + +The next example shows how to create an {infer} endpoint called `amazon_bedrock_completion` to perform a `completion` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/completion/amazon_bedrock_completion +{ + "service": "amazonbedrock", + "service_settings": { + "access_key": "", + "secret_key": "", + "region": "us-east-1", + "provider": "amazontitan", + "model": "amazon.titan-text-premier-v1:0" + } +} +------------------------------------------------------------ +// TEST[skip:TBD] diff --git a/docs/reference/inference/service-anthropic.asciidoc b/docs/reference/inference/service-anthropic.asciidoc new file mode 100644 index 0000000000000..41419db7a6069 --- /dev/null +++ b/docs/reference/inference/service-anthropic.asciidoc @@ -0,0 +1,124 @@ +[[infer-service-anthropic]] +=== Anthropic {infer} service + +Creates an {infer} endpoint to perform an {infer} task with the `anthropic` service. + + +[discrete] +[[infer-service-anthropic-api-request]] +==== {api-request-title} + +`PUT /_inference//` + +[discrete] +[[infer-service-anthropic-api-path-params]] +==== {api-path-parms-title} + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=inference-id] + +``:: +(Required, string) +include::inference-shared.asciidoc[tag=task-type] ++ +-- +Available task types: + +* `completion` +-- + +[discrete] +[[infer-service-anthropic-api-request-body]] +==== {api-request-body-title} + +`service`:: +(Required, string) +The type of service supported for the specified task type. In this case, +`anthropic`. + +`service_settings`:: +(Required, object) +include::inference-shared.asciidoc[tag=service-settings] ++ +-- +These settings are specific to the `anthropic` service. +-- + +`api_key`::: +(Required, string) +A valid API key for the Anthropic API. + +`model_id`::: +(Required, string) +The name of the model to use for the {infer} task. +You can find the supported models at https://docs.anthropic.com/en/docs/about-claude/models#model-names[Anthropic models]. + +`rate_limit`::: +(Optional, object) +By default, the `anthropic` service sets the number of requests allowed per minute to `50`. +This helps to minimize the number of rate limit errors returned from Anthropic. +To modify this, set the `requests_per_minute` setting of this object in your service settings: ++ +-- +include::inference-shared.asciidoc[tag=request-per-minute-example] +-- + +`task_settings`:: +(Required, object) +include::inference-shared.asciidoc[tag=task-settings] ++ +.`task_settings` for the `completion` task type +[%collapsible%closed] +===== +`max_tokens`::: +(Required, integer) +The maximum number of tokens to generate before stopping. + +`temperature`::: +(Optional, float) +The amount of randomness injected into the response. ++ +For more details about the supported range, see the https://docs.anthropic.com/en/api/messages[Anthropic messages API]. + +`top_k`::: +(Optional, integer) +Specifies to only sample from the top K options for each subsequent token. ++ +Recommended for advanced use cases only. You usually only need to use `temperature`. ++ +For more details, see the https://docs.anthropic.com/en/api/messages[Anthropic messages API]. + +`top_p`::: +(Optional, float) +Specifies to use Anthropic's nucleus sampling. ++ +In nucleus sampling, Anthropic computes the cumulative distribution over all the options for each subsequent token in decreasing probability order and cut it off once it reaches a particular probability specified by `top_p`. You should either alter `temperature` or `top_p`, but not both. ++ +Recommended for advanced use cases only. You usually only need to use `temperature`. ++ +For more details, see the https://docs.anthropic.com/en/api/messages[Anthropic messages API]. +===== + +[discrete] +[[inference-example-anthropic]] +==== Anthropic service example + +The following example shows how to create an {infer} endpoint called +`anthropic_completion` to perform a `completion` task type. + +[source,console] +------------------------------------------------------------ +PUT _inference/completion/anthropic_completion +{ + "service": "anthropic", + "service_settings": { + "api_key": "", + "model_id": "" + }, + "task_settings": { + "max_tokens": 1024 + } +} +------------------------------------------------------------ +// TEST[skip:TBD] diff --git a/docs/reference/inference/service-cohere.asciidoc b/docs/reference/inference/service-cohere.asciidoc index 52d71e0bc02a5..84eae6e880617 100644 --- a/docs/reference/inference/service-cohere.asciidoc +++ b/docs/reference/inference/service-cohere.asciidoc @@ -131,6 +131,7 @@ Specify whether to return doc text within the results. `top_n`:: (Optional, integer) The number of most relevant documents to return, defaults to the number of the documents. +If this {infer} endpoint is used in a `text_similarity_reranker` retriever query and `top_n` is set, it must be greater than or equal to `rank_window_size` in the query. ===== + .`task_settings` for the `text_embedding` task type diff --git a/docs/reference/inference/service-elasticsearch.asciidoc b/docs/reference/inference/service-elasticsearch.asciidoc index 3b9b5b1928d7b..6fb0b4a38d0ef 100644 --- a/docs/reference/inference/service-elasticsearch.asciidoc +++ b/docs/reference/inference/service-elasticsearch.asciidoc @@ -1,7 +1,12 @@ [[infer-service-elasticsearch]] === Elasticsearch {infer} service -Creates an {infer} endpoint to perform an {infer} task with the `elasticsearch` service. +Creates an {infer} endpoint to perform an {infer} task with the `elasticsearch` +service. + +NOTE: If you use the E5 model through the `elasticsearch` service, the API +request will automatically download and deploy the model if it isn't downloaded +yet. [discrete] @@ -35,7 +40,7 @@ Available task types: `service`:: (Required, string) -The type of service supported for the specified task type. In this case, +The type of service supported for the specified task type. In this case, `elasticsearch`. `service_settings`:: @@ -58,7 +63,7 @@ The total number of allocations this model is assigned across machine learning n `num_threads`::: (Required, integer) -Sets the number of threads used by each model allocation during inference. This generally increases the speed per inference request. The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node. +Sets the number of threads used by each model allocation during inference. This generally increases the speed per inference request. The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node. Must be a power of 2. Max allowed value is 32. `task_settings`:: @@ -81,6 +86,9 @@ Returns the document instead of only the index. Defaults to `true`. The following example shows how to create an {infer} endpoint called `my-e5-model` to perform a `text_embedding` task type. +The API request below will automatically download the E5 model if it isn't +already downloaded and then deploy the model. + [source,console] ------------------------------------------------------------ PUT _inference/text_embedding/my-e5-model @@ -98,6 +106,14 @@ PUT _inference/text_embedding/my-e5-model Valid values are `.multilingual-e5-small` and `.multilingual-e5-small_linux-x86_64`. For further details, refer to the {ml-docs}/ml-nlp-e5.html[E5 model documentation]. +[NOTE] +==== +You might see a 502 bad gateway error in the response when using the {kib} Console. +This error usually just reflects a timeout, while the model downloads in the background. +You can check the download progress in the {ml-app} UI. +If using the Python client, you can set the `timeout` parameter to a higher value. +==== + [discrete] [[inference-example-eland]] ==== Models uploaded by Eland via the elasticsearch service @@ -107,16 +123,17 @@ The following example shows how to create an {infer} endpoint called [source,console] ------------------------------------------------------------ -PUT _inference/text_embedding/my-msmarco-minilm-model +PUT _inference/text_embedding/my-msmarco-minilm-model <1> { "service": "elasticsearch", "service_settings": { "num_allocations": 1, "num_threads": 1, - "model_id": "msmarco-MiniLM-L12-cos-v5" <1> + "model_id": "msmarco-MiniLM-L12-cos-v5" <2> } } ------------------------------------------------------------ // TEST[skip:TBD] -<1> The `model_id` must be the ID of a text embedding model which has already been -{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland]. \ No newline at end of file +<1> Provide an unique identifier for the inference endpoint. The `inference_id` must be unique and must not match the `model_id`. +<2> The `model_id` must be the ID of a text embedding model which has already been +{ml-docs}/ml-nlp-import-model.html#ml-nlp-import-script[uploaded through Eland]. diff --git a/docs/reference/inference/service-elser.asciidoc b/docs/reference/inference/service-elser.asciidoc index 829ff4968c5be..34c0f7d0a9c53 100644 --- a/docs/reference/inference/service-elser.asciidoc +++ b/docs/reference/inference/service-elser.asciidoc @@ -3,6 +3,9 @@ Creates an {infer} endpoint to perform an {infer} task with the `elser` service. +NOTE: The API request will automatically download and deploy the ELSER model if +it isn't already downloaded. + [discrete] [[infer-service-elser-api-request]] @@ -34,7 +37,7 @@ Available task types: `service`:: (Required, string) -The type of service supported for the specified task type. In this case, +The type of service supported for the specified task type. In this case, `elser`. `service_settings`:: @@ -51,7 +54,7 @@ The total number of allocations this model is assigned across machine learning n `num_threads`::: (Required, integer) -Sets the number of threads used by each model allocation during inference. This generally increases the speed per inference request. The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node. +Sets the number of threads used by each model allocation during inference. This generally increases the speed per inference request. The inference process is a compute-bound process; `threads_per_allocations` must not exceed the number of available allocated processors per node. Must be a power of 2. Max allowed value is 32. @@ -63,6 +66,9 @@ The following example shows how to create an {infer} endpoint called `my-elser-model` to perform a `sparse_embedding` task type. Refer to the {ml-docs}/ml-nlp-elser.html[ELSER model documentation] for more info. +The request below will automatically download the ELSER model if it isn't +already downloaded and then deploy the model. + [source,console] ------------------------------------------------------------ PUT _inference/sparse_embedding/my-elser-model @@ -92,4 +98,12 @@ Example response: "task_settings": {} } ------------------------------------------------------------ -// NOTCONSOLE \ No newline at end of file +// NOTCONSOLE + +[NOTE] +==== +You might see a 502 bad gateway error in the response when using the {kib} Console. +This error usually just reflects a timeout, while the model downloads in the background. +You can check the download progress in the {ml-app} UI. +If using the Python client, you can set the `timeout` parameter to a higher value. +==== diff --git a/docs/reference/ingest/apis/delete-geoip-database.asciidoc b/docs/reference/ingest/apis/delete-geoip-database.asciidoc new file mode 100644 index 0000000000000..957e59f0f0de4 --- /dev/null +++ b/docs/reference/ingest/apis/delete-geoip-database.asciidoc @@ -0,0 +1,55 @@ +[[delete-geoip-database-api]] +=== Delete geoip database configuration API +++++ +Delete geoip database configuration +++++ + +Deletes a geoip database configuration. + +[source,console] +---- +DELETE /_ingest/geoip/database/my-database-id +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] + + +[[delete-geoip-database-api-request]] +==== {api-request-title} + +`DELETE /_ingest/geoip/database/` + +[[delete-geoip-database-api-prereqs]] +==== {api-prereq-title} + +* If the {es} {security-features} are enabled, you must have the +`manage` <> to use this API. + +[[delete-geoip-database-api-path-params]] +==== {api-path-parms-title} + +``:: ++ +-- +(Required, string) Database configuration ID used to limit the request. + +-- + + +[[delete-geoip-database-api-query-params]] +==== {api-query-parms-title} + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] + + +[[delete-geoip-database-api-example]] +==== {api-examples-title} + + +[[delete-geoip-database-api-specific-ex]] +===== Delete a specific geoip database configuration + +[source,console] +---- +DELETE /_ingest/geoip/database/example-database-id +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] diff --git a/docs/reference/ingest/apis/delete-pipeline.asciidoc b/docs/reference/ingest/apis/delete-pipeline.asciidoc index 6f50251dbf1cd..94ac87c61b56b 100644 --- a/docs/reference/ingest/apis/delete-pipeline.asciidoc +++ b/docs/reference/ingest/apis/delete-pipeline.asciidoc @@ -62,7 +62,7 @@ use a value of `*`. include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] -[[delete-pipeline-api-api-example]] +[[delete-pipeline-api-example]] ==== {api-examples-title} diff --git a/docs/reference/ingest/apis/geoip-stats-api.asciidoc b/docs/reference/ingest/apis/geoip-stats.asciidoc similarity index 100% rename from docs/reference/ingest/apis/geoip-stats-api.asciidoc rename to docs/reference/ingest/apis/geoip-stats.asciidoc diff --git a/docs/reference/ingest/apis/get-geoip-database.asciidoc b/docs/reference/ingest/apis/get-geoip-database.asciidoc new file mode 100644 index 0000000000000..f055e3e759db8 --- /dev/null +++ b/docs/reference/ingest/apis/get-geoip-database.asciidoc @@ -0,0 +1,80 @@ +[[get-geoip-database-api]] +=== Get geoip database configuration API +++++ +Get geoip database configuration +++++ + +Returns information about one or more geoip database configurations. + +[source,console] +---- +GET /_ingest/geoip/database/my-database-id +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] + + + +[[get-geoip-database-api-request]] +==== {api-request-title} + +`GET /_ingest/geoip/database/` + +`GET /_ingest/geoip/database` + +[[get-geoip-database-api-prereqs]] +==== {api-prereq-title} + +* If the {es} {security-features} are enabled, you must have the + `manage` <> to use this API. + +[[get-geoip-database-api-path-params]] +==== {api-path-parms-title} + +``:: +(Optional, string) +Comma-separated list of database configuration IDs to retrieve. Wildcard (`*`) expressions are +supported. ++ +To get all database configurations, omit this parameter or use `*`. + + +[[get-geoip-database-api-query-params]] +==== {api-query-parms-title} + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] + + +[[get-geoip-database-api-example]] +==== {api-examples-title} + + +[[get-geoip-database-api-specific-ex]] +===== Get information for a specific geoip database configuration + +[source,console] +---- +GET /_ingest/geoip/database/my-database-id +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] + +The API returns the following response: + +[source,console-result] +---- +{ + "databases" : [ + { + "id" : "my-database-id", + "version" : 1, + "modified_date_millis" : 1723040276114, + "database" : { + "name" : "GeoIP2-Domain", + "maxmind" : { + "account_id" : "1234567" + } + } + } + ] +} +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] diff --git a/docs/reference/ingest/apis/get-pipeline.asciidoc b/docs/reference/ingest/apis/get-pipeline.asciidoc index 71a261d97bdeb..f2a1155bca12b 100644 --- a/docs/reference/ingest/apis/get-pipeline.asciidoc +++ b/docs/reference/ingest/apis/get-pipeline.asciidoc @@ -65,7 +65,7 @@ To get all ingest pipelines, omit this parameter or use `*`. include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=master-timeout] -[[get-pipeline-api-api-example]] +[[get-pipeline-api-example]] ==== {api-examples-title} diff --git a/docs/reference/ingest/apis/index.asciidoc b/docs/reference/ingest/apis/index.asciidoc index 04fcd500a9721..e068f99ea0ad3 100644 --- a/docs/reference/ingest/apis/index.asciidoc +++ b/docs/reference/ingest/apis/index.asciidoc @@ -13,7 +13,7 @@ Use the following APIs to create, manage, and test ingest pipelines: * <> to create or update a pipeline * <> to retrieve a pipeline configuration * <> to delete a pipeline -* <> to test a pipeline +* <> and <> to test ingest pipelines [discrete] [[ingest-stat-apis]] @@ -21,12 +21,27 @@ Use the following APIs to create, manage, and test ingest pipelines: Use the following APIs to get statistics about ingest processing: -* <> to get download statistics for GeoIP2 databases used with +* <> to get download statistics for IP geolocation databases used with the <>. +[discrete] +[[ingest-geoip-database-apis]] +=== Ingest GeoIP Database APIs + +preview::["The commercial IP geolocation database download management APIs are in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but this feature is not subject to the support SLA of official GA features."] + +Use the following APIs to configure and manage commercial IP geolocation database downloads: + +* <> to create or update a database configuration +* <> to retrieve a database configuration +* <> to delete a database configuration + include::put-pipeline.asciidoc[] -include::delete-pipeline.asciidoc[] -include::geoip-stats-api.asciidoc[] include::get-pipeline.asciidoc[] +include::delete-pipeline.asciidoc[] include::simulate-pipeline.asciidoc[] include::simulate-ingest.asciidoc[] +include::geoip-stats.asciidoc[] +include::put-geoip-database.asciidoc[] +include::get-geoip-database.asciidoc[] +include::delete-geoip-database.asciidoc[] diff --git a/docs/reference/ingest/apis/put-geoip-database.asciidoc b/docs/reference/ingest/apis/put-geoip-database.asciidoc new file mode 100644 index 0000000000000..311c303002387 --- /dev/null +++ b/docs/reference/ingest/apis/put-geoip-database.asciidoc @@ -0,0 +1,72 @@ +[[put-geoip-database-api]] +=== Create or update geoip database configuration API +++++ +Create or update geoip database configuration +++++ + +Creates or updates an IP geolocation database configuration. + +IMPORTANT: The Maxmind `account_id` shown below requires a license key. Because the license key is sensitive information, +it is stored as a <> in {es} named `ingest.geoip.downloader.maxmind.license_key`. Only +one Maxmind license key is currently allowed per {es} cluster. A valid license key must be in the secure settings in order +to download from Maxmind. The license key setting does not take effect until all nodes are restarted. + +[source,console] +---- +PUT _ingest/geoip/database/my-database-id +{ + "name": "GeoIP2-Domain", + "maxmind": { + "account_id": "1025402" + } +} +---- +// TEST[skip:we don't want to leak the enterprise-geoip-downloader task, which touching these APIs would cause. Therefore, skip this test.] + +[[put-geoip-database-api-request]] +==== {api-request-title} + +`PUT /_ingest/geoip/database/` + +[[put-geoip-database-api-prereqs]] +==== {api-prereq-title} + +* If the {es} {security-features} are enabled, you must have the +`manage` <> to use this API. + + +[[put-geoip-database-api-path-params]] +==== {api-path-parms-title} + +``:: ++ +__ +(Required, string) ID of the database configuration to create or update. + +[[put-geoip-database-api-query-params]] +==== {api-query-parms-title} + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=timeoutparms] + +[[put-geoip-database-api-request-body]] +==== {api-request-body-title} + +// tag::geoip-database-object[] +`name`:: +(Required, string) +The provider-assigned name of the IP geolocation database to download. + +``:: +(Required, a provider object and its associated configuration) +The configuration necessary to identify which IP geolocation provider to use to download +the database, as well as any provider-specific configuration necessary for such downloading. ++ +At present, the only supported provider is `maxmind`, and the maxmind provider +requires that an `account_id` (string) is configured. +// end::geoip-database-object[] + +[[geoip-database-configuration-licensing]] +==== Licensing + +Downloading databases from third party providers is a commercial feature that requires an +appropriate license. For more information, refer to https://www.elastic.co/subscriptions. diff --git a/docs/reference/ingest/common-log-format-example.asciidoc b/docs/reference/ingest/common-log-format-example.asciidoc index 3fd296a167a6a..be20b49cf39f7 100644 --- a/docs/reference/ingest/common-log-format-example.asciidoc +++ b/docs/reference/ingest/common-log-format-example.asciidoc @@ -28,7 +28,7 @@ Pipelines**. image::images/ingest/ingest-pipeline-list.png[Kibana's Ingest Pipelines list view,align="center"] . Click **Create pipeline > New pipeline**. -. Set **Name** to `my-pipeline` and optionally add a description for the +. Set **Name** to `my-pipeline` and optionally add a description for the pipeline. . Add a <> to parse the log message: @@ -128,6 +128,7 @@ PUT _ingest/pipeline/my-pipeline // end::common-log-pipeline[] } ---- +// TEST[skip:This can output a warning, and asciidoc doesn't support allowed_warnings] //// -- diff --git a/docs/reference/ingest/processors/date.asciidoc b/docs/reference/ingest/processors/date.asciidoc index fd045abfa0c95..7c65c8c1468b3 100644 --- a/docs/reference/ingest/processors/date.asciidoc +++ b/docs/reference/ingest/processors/date.asciidoc @@ -67,3 +67,11 @@ the timezone and locale values. } -------------------------------------------------- // NOTCONSOLE + +[WARNING] +==== +// tag::locale-warning[] +The text strings accepted by textual date formats, and calculations for week-dates, depend on the JDK version +that Elasticsearch is running on. For more information see <>. +// end::locale-warning[] +==== diff --git a/docs/reference/ingest/processors/geoip.asciidoc b/docs/reference/ingest/processors/geoip.asciidoc index 738ac234d6162..3a9ba58dedbf0 100644 --- a/docs/reference/ingest/processors/geoip.asciidoc +++ b/docs/reference/ingest/processors/geoip.asciidoc @@ -24,6 +24,9 @@ stats API>>. If your cluster can't connect to the Elastic GeoIP endpoint or you want to manage your own updates, see <>. +If you would like to have {es} download database files directly from Maxmind using your own provided +license key, see <>. + If {es} can't connect to the endpoint for 30 days all updated databases will become invalid. {es} will stop enriching documents with geoip data and will add `tags: ["_geoip_expired_database"]` field instead. @@ -36,9 +39,9 @@ field instead. [options="header"] |====== | Name | Required | Default | Description -| `field` | yes | - | The field to get the ip address from for the geographical lookup. +| `field` | yes | - | The field to get the IP address from for the geographical lookup. | `target_field` | no | geoip | The field that will hold the geographical information looked up from the MaxMind database. -| `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb) or the name of a supported database file in the `ingest-geoip` config directory. +| `database_file` | no | GeoLite2-City.mmdb | The database filename referring to one of the automatically downloaded GeoLite2 databases (GeoLite2-City.mmdb, GeoLite2-Country.mmdb, or GeoLite2-ASN.mmdb), or the name of a supported database file in the `ingest-geoip` config directory, or the name of a <> (with the `.mmdb` suffix appended). | `properties` | no | [`continent_name`, `country_iso_code`, `country_name`, `region_iso_code`, `region_name`, `city_name`, `location`] * | Controls what properties are added to the `target_field` based on the geoip lookup. | `ignore_missing` | no | `false` | If `true` and `field` does not exist, the processor quietly exits without modifying the document | `first_only` | no | `true` | If `true` only first found geoip data will be returned, even if `field` contains array @@ -64,12 +67,12 @@ depend on what has been found and which properties were configured in `propertie * If the GeoIP2 Domain database is used, then the following fields may be added under the `target_field`: `ip`, and `domain`. The fields actually added depend on what has been found and which properties were configured in `properties`. * If the GeoIP2 ISP database is used, then the following fields may be added under the `target_field`: `ip`, `asn`, -`organization_name`, `network`, `isp`, `isp_organization`, `mobile_country_code`, and `mobile_network_code`. The fields actually added +`organization_name`, `network`, `isp`, `isp_organization_name`, `mobile_country_code`, and `mobile_network_code`. The fields actually added depend on what has been found and which properties were configured in `properties`. * If the GeoIP2 Enterprise database is used, then the following fields may be added under the `target_field`: `ip`, `country_iso_code`, `country_name`, `continent_code`, `continent_name`, `region_iso_code`, `region_name`, `city_name`, `timezone`, `location`, `asn`, `organization_name`, `network`, `hosting_provider`, `tor_exit_node`, `anonymous_vpn`, `anonymous`, `public_proxy`, -`residential_proxy`, `domain`, `isp`, `isp_organization`, `mobile_country_code`, `mobile_network_code`, `user_type`, and +`residential_proxy`, `domain`, `isp`, `isp_organization_name`, `mobile_country_code`, `mobile_network_code`, `user_type`, and `connection_type`. The fields actually added depend on what has been found and which properties were configured in `properties`. preview::["Do not use the GeoIP2 Anonymous IP, GeoIP2 Connection Type, GeoIP2 Domain, GeoIP2 ISP, and GeoIP2 Enterprise databases in production environments. This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] diff --git a/docs/reference/ingest/processors/inference.asciidoc b/docs/reference/ingest/processors/inference.asciidoc index 88d97d9422d5e..c942959d34e53 100644 --- a/docs/reference/ingest/processors/inference.asciidoc +++ b/docs/reference/ingest/processors/inference.asciidoc @@ -31,6 +31,7 @@ include::common-options.asciidoc[] `field_map` fields. For NLP models, use the `input_output` option. For {dfanalytics} models, use the `target_field` and `field_map` option. * Each {infer} input field must be single strings, not arrays of strings. +* The `input_field` is processed as is and ignores any <>'s <> at time of {infer} run. ================================================== [discrete] @@ -40,6 +41,11 @@ include::common-options.asciidoc[] Select the `content` field for inference and write the result to `content_embedding`. +IMPORTANT: If the specified `output_field` already exists in the ingest document, it won't be overwritten. +The {infer} results will be appended to the existing fields within `output_field`, which could lead to duplicate fields and potential errors. +To avoid this, use an unique `output_field` field name that does not clash with any existing fields. + + [source,js] -------------------------------------------------- { diff --git a/docs/reference/intro.asciidoc b/docs/reference/intro.asciidoc index 3fc23b44994a7..21b20a7e2f5d0 100644 --- a/docs/reference/intro.asciidoc +++ b/docs/reference/intro.asciidoc @@ -1,267 +1,495 @@ [[elasticsearch-intro]] -== What is {es}? -_**You know, for search (and analysis)**_ - -{es} is the distributed search and analytics engine at the heart of -the {stack}. {ls} and {beats} facilitate collecting, aggregating, and -enriching your data and storing it in {es}. {kib} enables you to -interactively explore, visualize, and share insights into your data and manage -and monitor the stack. {es} is where the indexing, search, and analysis -magic happens. - -{es} provides near real-time search and analytics for all types of data. Whether you -have structured or unstructured text, numerical data, or geospatial data, -{es} can efficiently store and index it in a way that supports fast searches. -You can go far beyond simple data retrieval and aggregate information to discover -trends and patterns in your data. And as your data and query volume grows, the -distributed nature of {es} enables your deployment to grow seamlessly right -along with it. - -While not _every_ problem is a search problem, {es} offers speed and flexibility -to handle data in a wide variety of use cases: - -* Add a search box to an app or website -* Store and analyze logs, metrics, and security event data -* Use machine learning to automatically model the behavior of your data in real - time -* Use {es} as a vector database to create, store, and search vector embeddings -* Automate business workflows using {es} as a storage engine -* Manage, integrate, and analyze spatial information using {es} as a geographic - information system (GIS) -* Store and process genetic data using {es} as a bioinformatics research tool - -We’re continually amazed by the novel ways people use search. But whether -your use case is similar to one of these, or you're using {es} to tackle a new -problem, the way you work with your data, documents, and indices in {es} is -the same. +== {es} basics +This guide covers the core concepts you need to understand to get started with {es}. +If you'd prefer to start working with {es} right away, set up a <> and jump to <>. + +This guide covers the following topics: + +* <>: Learn about {es} and some of its main use cases. +* <>: Understand your options for deploying {es} in different environments, including a fast local development setup. +* <>: Understand {es}'s most important primitives and how it stores data. +* <>: Understand your options for ingesting data into {es}. +* <>: Understand your options for searching and analyzing data in {es}. +* <>: Understand the basic concepts required for moving your {es} deployment to production. + +[[elasticsearch-intro-what-is-es]] +=== What is {es}? + +{es-repo}[{es}] is a distributed search and analytics engine, scalable data store, and vector database built on Apache Lucene. +It's optimized for speed and relevance on production-scale workloads. +Use {es} to search, index, store, and analyze data of all shapes and sizes in near real time. + +{es} is the heart of the {estc-welcome-current}/stack-components.html[Elastic Stack]. +Combined with https://www.elastic.co/kibana[{kib}], it powers the following Elastic solutions: + +* https://www.elastic.co/observability[Observability] +* https://www.elastic.co/enterprise-search[Search] +* https://www.elastic.co/security[Security] + +[TIP] +==== +{es} has a lot of features. Explore the full list on the https://www.elastic.co/elasticsearch/features[product webpage^]. +==== + +[discrete] +[[elasticsearch-intro-elastic-stack]] +.What is the Elastic Stack? +******************************* +{es} is the core component of the Elastic Stack, a suite of products for collecting, storing, searching, and visualizing data. +{estc-welcome-current}/stack-components.html[Learn more about the Elastic Stack]. +******************************* + +[discrete] +[[elasticsearch-intro-use-cases]] +==== Use cases + +{es} is used for a wide and growing range of use cases. Here are a few examples: + +**Observability** + +* *Logs, metrics, and traces*: Collect, store, and analyze logs, metrics, and traces from applications, systems, and services. +* *Application performance monitoring (APM)*: Monitor and analyze the performance of business-critical software applications. +* *Real user monitoring (RUM)*: Monitor, quantify, and analyze user interactions with web applications. +* *OpenTelemetry*: Reuse your existing instrumentation to send telemetry data to the Elastic Stack using the OpenTelemetry standard. + +**Search** + +* *Full-text search*: Build a fast, relevant full-text search solution using inverted indexes, tokenization, and text analysis. +* *Vector database*: Store and search vectorized data, and create vector embeddings with built-in and third-party natural language processing (NLP) models. +* *Semantic search*: Understand the intent and contextual meaning behind search queries using tools like synonyms, dense vector embeddings, and learned sparse query-document expansion. +* *Hybrid search*: Combine full-text search with vector search using state-of-the-art ranking algorithms. +* *Build search experiences*: Add hybrid search capabilities to apps or websites, or build enterprise search engines over your organization's internal data sources. +* *Retrieval augmented generation (RAG)*: Use {es} as a retrieval engine to supplement generative AI models with more relevant, up-to-date, or proprietary data for a range of use cases. +* *Geospatial search*: Search for locations and calculate spatial relationships using geospatial queries. + +**Security** + +* *Security information and event management (SIEM)*: Collect, store, and analyze security data from applications, systems, and services. +* *Endpoint security*: Monitor and analyze endpoint security data. +* *Threat hunting*: Search and analyze data to detect and respond to security threats. + +This is just a sample of search, observability, and security use cases enabled by {es}. +Refer to Elastic https://www.elastic.co/customers/success-stories[customer success stories] for concrete examples across a range of industries. + +[[elasticsearch-intro-deploy]] +=== Run {es} + +To use {es}, you need a running instance of the {es} service. +You can deploy {es} in various ways. + +**Quick start option** + +* <>: Get started quickly with a minimal local Docker setup for development and testing. + +**Hosted options** + +* {cloud}/ec-getting-started-trial.html[*Elastic Cloud Hosted*]: {es} is available as part of the hosted Elastic Stack offering, deployed in the cloud with your provider of choice. Sign up for a https://cloud.elastic.co/registration[14-day free trial]. +* {serverless-docs}/general/sign-up-trial[*Elastic Cloud Serverless* (technical preview)]: Create serverless projects for autoscaled and fully managed {es} deployments. Sign up for a https://cloud.elastic.co/serverless-registration[14-day free trial]. + +**Advanced options** + +* <>: Install, configure, and run {es} on your own premises. +* {ece-ref}/Elastic-Cloud-Enterprise-overview.html[*Elastic Cloud Enterprise*]: Deploy Elastic Cloud on public or private clouds, virtual machines, or your own premises. +* {eck-ref}/k8s-overview.html[*Elastic Cloud on Kubernetes*]: Deploy Elastic Cloud on Kubernetes. + +// new html page [[documents-indices]] -=== Data in: documents and indices - -{es} is a distributed document store. Instead of storing information as rows of -columnar data, {es} stores complex data structures that have been serialized -as JSON documents. When you have multiple {es} nodes in a cluster, stored -documents are distributed across the cluster and can be accessed immediately -from any node. - -When a document is stored, it is indexed and fully searchable in <>--within 1 second. {es} uses a data structure called an -inverted index that supports very fast full-text searches. An inverted index -lists every unique word that appears in any document and identifies all of the -documents each word occurs in. - -An index can be thought of as an optimized collection of documents and each -document is a collection of fields, which are the key-value pairs that contain -your data. By default, {es} indexes all data in every field and each indexed -field has a dedicated, optimized data structure. For example, text fields are -stored in inverted indices, and numeric and geo fields are stored in BKD trees. -The ability to use the per-field data structures to assemble and return search -results is what makes {es} so fast. - -{es} also has the ability to be schema-less, which means that documents can be -indexed without explicitly specifying how to handle each of the different fields -that might occur in a document. When dynamic mapping is enabled, {es} -automatically detects and adds new fields to the index. This default -behavior makes it easy to index and explore your data--just start -indexing documents and {es} will detect and map booleans, floating point and -integer values, dates, and strings to the appropriate {es} data types. - -Ultimately, however, you know more about your data and how you want to use it -than {es} can. You can define rules to control dynamic mapping and explicitly -define mappings to take full control of how fields are stored and indexed. - -Defining your own mappings enables you to: - -* Distinguish between full-text string fields and exact value string fields -* Perform language-specific text analysis -* Optimize fields for partial matching -* Use custom date formats -* Use data types such as `geo_point` and `geo_shape` that cannot be automatically -detected - -It’s often useful to index the same field in different ways for different -purposes. For example, you might want to index a string field as both a text -field for full-text search and as a keyword field for sorting or aggregating -your data. Or, you might choose to use more than one language analyzer to -process the contents of a string field that contains user input. - -The analysis chain that is applied to a full-text field during indexing is also -used at search time. When you query a full-text field, the query text undergoes -the same analysis before the terms are looked up in the index. +=== Indices, documents, and fields +++++ +Indices and documents +++++ + +The index is the fundamental unit of storage in {es}, a logical namespace for storing data that share similar characteristics. +After you have {es} <>, you'll get started by creating an index to store your data. + +An index is a collection of documents uniquely identified by a name or an <>. +This unique name is important because it's used to target the index in search queries and other operations. + +[TIP] +==== +A closely related concept is a <>. +This index abstraction is optimized for append-only timestamped data, and is made up of hidden, auto-generated backing indices. +If you're working with timestamped data, we recommend the {observability-guide}[Elastic Observability] solution for additional tools and optimized content. +==== + +[discrete] +[[elasticsearch-intro-documents-fields]] +==== Documents and fields + +{es} serializes and stores data in the form of JSON documents. +A document is a set of fields, which are key-value pairs that contain your data. +Each document has a unique ID, which you can create or have {es} auto-generate. + +A simple {es} document might look like this: + +[source,js] +---- +{ + "_index": "my-first-elasticsearch-index", + "_id": "DyFpo5EBxE8fzbb95DOa", + "_version": 1, + "_seq_no": 0, + "_primary_term": 1, + "found": true, + "_source": { + "email": "john@smith.com", + "first_name": "John", + "last_name": "Smith", + "info": { + "bio": "Eco-warrior and defender of the weak", + "age": 25, + "interests": [ + "dolphins", + "whales" + ] + }, + "join_date": "2024/05/01" + } +} +---- +// NOTCONSOLE + +[discrete] +[[elasticsearch-intro-documents-fields-data-metadata]] +==== Metadata fields + +An indexed document contains data and metadata. <> are system fields that store information about the documents. +In {es}, metadata fields are prefixed with an underscore. +For example, the following fields are metadata fields: + +* `_index`: The name of the index where the document is stored. +* `_id`: The document's ID. IDs must be unique per index. + +[discrete] +[[elasticsearch-intro-documents-fields-mappings]] +==== Mappings and data types + +Each index has a <> or schema for how the fields in your documents are indexed. +A mapping defines the <> for each field, how the field should be indexed, +and how it should be stored. +When adding documents to {es}, you have two options for mappings: + +* <>: Let {es} automatically detect the data types and create the mappings for you. Dynamic mapping helps you get started quickly, but might yield suboptimal results for your specific use case due to automatic field type inference. +* <>: Define the mappings up front by specifying data types for each field. Recommended for production use cases, because you have full control over how your data is indexed to suit your specific use case. + +[TIP] +==== +You can use a combination of dynamic and explicit mapping on the same index. +This is useful when you have a mix of known and unknown fields in your data. +==== + +// New html page +[[es-ingestion-overview]] +=== Add data to {es} + +There are multiple ways to ingest data into {es}. +The option that you choose depends on whether you're working with timestamped data or non-timestamped data, where the data is coming from, its complexity, and more. + +[TIP] +==== +You can load {kibana-ref}/connect-to-elasticsearch.html#_add_sample_data[sample data] into your {es} cluster using {kib}, to get started quickly. +==== + +[discrete] +[[es-ingestion-overview-general-content]] +==== General content + +General content is data that does not have a timestamp. +This could be data like vector embeddings, website content, product catalogs, and more. +For general content, you have the following options for adding data to {es} indices: + +* <>: Use the {es} <> to index documents directly, using the Dev Tools {kibana-ref}/console-kibana.html[Console], or cURL. ++ +If you're building a website or app, then you can call Elasticsearch APIs using an https://www.elastic.co/guide/en/elasticsearch/client/index.html[{es} client] in the programming language of your choice. If you use the Python client, then check out the `elasticsearch-labs` repo for various https://github.com/elastic/elasticsearch-labs/tree/main/notebooks/search/python-examples[example notebooks]. +* {kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[File upload]: Use the {kib} file uploader to index single files for one-off testing and exploration. The GUI guides you through setting up your index and field mappings. +* https://github.com/elastic/crawler[Web crawler]: Extract and index web page content into {es} documents. +* {enterprise-search-ref}/connectors.html[Connectors]: Sync data from various third-party data sources to create searchable, read-only replicas in {es}. + +[discrete] +[[es-ingestion-overview-timestamped]] +==== Timestamped data + +Timestamped data in {es} refers to datasets that include a timestamp field. If you use the {ecs-ref}/ecs-reference.html[Elastic Common Schema (ECS)], this field is named `@timestamp`. +This could be data like logs, metrics, and traces. + +For timestamped data, you have the following options for adding data to {es} data streams: +* {fleet-guide}/fleet-overview.html[Elastic Agent and Fleet]: The preferred way to index timestamped data. Each Elastic Agent based integration includes default ingestion rules, dashboards, and visualizations to start analyzing your data right away. +You can use the Fleet UI in {kib} to centrally manage Elastic Agents and their policies. +* {beats-ref}/beats-reference.html[Beats]: If your data source isn't supported by Elastic Agent, use Beats to collect and ship data to Elasticsearch. You install a separate Beat for each type of data to collect. +* {logstash-ref}/introduction.html[Logstash]: Logstash is an open source data collection engine with real-time pipelining capabilities that supports a wide variety of data sources. You might use this option because neither Elastic Agent nor Beats supports your data source. You can also use Logstash to persist incoming data, or if you need to send the data to multiple destinations. +* {cloud}/ec-ingest-guides.html[Language clients]: The linked tutorials demonstrate how to use {es} programming language clients to ingest data from an application. In these examples, {es} is running on Elastic Cloud, but the same principles apply to any {es} deployment. + +[TIP] +==== +If you're interested in data ingestion pipelines for timestamped data, use the decision tree in the {cloud}/ec-cloud-ingest-data.html#ec-data-ingest-pipeline[Elastic Cloud docs] to understand your options. +==== + +// New html page [[search-analyze]] -=== Information out: search and analyze +=== Search and analyze data + +You can use {es} as a basic document store to retrieve documents and their +metadata. +However, the real power of {es} comes from its advanced search and analytics capabilities. + +You'll use a combination of an API endpoint and a query language to interact with your data. + +[discrete] +[[search-analyze-rest-api]] +==== REST API + +Use REST APIs to manage your {es} cluster, and to index +and search your data. +For testing purposes, you can submit requests +directly from the command line or through the Dev Tools {kibana-ref}/console-kibana.html[Console] in {kib}. +From your applications, you can use a +https://www.elastic.co/guide/en/elasticsearch/client/index.html[client] +in your programming language of choice. -While you can use {es} as a document store and retrieve documents and their -metadata, the real power comes from being able to easily access the full suite -of search capabilities built on the Apache Lucene search engine library. +Refer to <> for a hands-on example of using the `_search` endpoint, adding data to {es}, and running basic searches in Query DSL syntax. -{es} provides a simple, coherent REST API for managing your cluster and indexing -and searching your data. For testing purposes, you can easily submit requests -directly from the command line or through the Developer Console in {kib}. From -your applications, you can use the -https://www.elastic.co/guide/en/elasticsearch/client/index.html[{es} client] -for your language of choice: Java, JavaScript, Go, .NET, PHP, Perl, Python -or Ruby. +[discrete] +[[search-analyze-query-languages]] +==== Query languages + +{es} provides a number of query languages for interacting with your data. + +*Query DSL* is the primary query language for {es} today. + +*{esql}* is a new piped query language and compute engine which was first added in version *8.11*. + +{esql} does not yet support all the features of Query DSL, like full-text search and semantic search. +Look forward to new {esql} features and functionalities in each release. + +Refer to <> for a full overview of the query languages available in {es}. [discrete] -[[search-data]] -==== Searching your data - -The {es} REST APIs support structured queries, full text queries, and complex -queries that combine the two. Structured queries are -similar to the types of queries you can construct in SQL. For example, you -could search the `gender` and `age` fields in your `employee` index and sort the -matches by the `hire_date` field. Full-text queries find all documents that -match the query string and return them sorted by _relevance_—how good a -match they are for your search terms. - -In addition to searching for individual terms, you can perform phrase searches, -similarity searches, and prefix searches, and get autocomplete suggestions. - -Have geospatial or other numerical data that you want to search? {es} indexes -non-textual data in optimized data structures that support -high-performance geo and numerical queries. - -You can access all of these search capabilities using {es}'s -comprehensive JSON-style query language (<>). You can also -construct <> to search and aggregate data -natively inside {es}, and JDBC and ODBC drivers enable a broad range of -third-party applications to interact with {es} via SQL. +[[search-analyze-query-dsl]] +===== Query DSL + +<> is a full-featured JSON-style query language that enables complex searching, filtering, and aggregations. +It is the original and most powerful query language for {es} today. + +The <> accepts queries written in Query DSL syntax. [discrete] -[[analyze-data]] -==== Analyzing your data +[[search-analyze-query-dsl-search-filter]] +====== Search and filter with Query DSL + +Query DSL support a wide range of search techniques, including the following: -{es} aggregations enable you to build complex summaries of your data and gain -insight into key metrics, patterns, and trends. Instead of just finding the -proverbial “needle in a haystack”, aggregations enable you to answer questions -like: +* <>: Search text that has been analyzed and indexed to support phrase or proximity queries, fuzzy matches, and more. +* <>: Search for exact matches using `keyword` fields. +* <>: Search `semantic_text` fields using dense or sparse vector search on embeddings generated in your {es} cluster. +* <>: Search for similar dense vectors using the kNN algorithm for embeddings generated outside of {es}. +* <>: Search for locations and calculate spatial relationships using geospatial queries. -* How many needles are in the haystack? -* What is the average length of the needles? -* What is the median length of the needles, broken down by manufacturer? -* How many needles were added to the haystack in each of the last six months? +Learn about the full range of queries supported by <>. -You can also use aggregations to answer more subtle questions, such as: +You can also filter data using Query DSL. +Filters enable you to include or exclude documents by retrieving documents that match specific field-level criteria. +A query that uses the `filter` parameter indicates <>. + +[discrete] +[[search-analyze-data-query-dsl]] +====== Analyze with Query DSL -* What are your most popular needle manufacturers? -* Are there any unusual or anomalous clumps of needles? +<> are the primary tool for analyzing {es} data using Query DSL. +Aggregrations enable you to build complex summaries of your data and gain +insight into key metrics, patterns, and trends. -Because aggregations leverage the same data-structures used for search, they are +Because aggregations leverage the same data structures used for search, they are also very fast. This enables you to analyze and visualize your data in real time. -Your reports and dashboards update as your data changes so you can take action -based on the latest information. +You can search documents, filter results, and perform analytics at the same time, on the same +data, in a single request. +That means aggregations are calculated in the context of the search query. + +The folowing aggregation types are available: + +* <>: Calculate metrics, +such as a sum or average, from field values. +* <>: Group documents into buckets based on field values, ranges, +or other criteria. +* <>: Run aggregations on the results of other aggregations. + +Run aggregations by specifying the <>'s `aggs` parameter. +Learn more in <>. -What’s more, aggregations operate alongside search requests. You can search -documents, filter results, and perform analytics at the same time, on the same -data, in a single request. And because aggregations are calculated in the -context of a particular search, you’re not just displaying a count of all -size 70 needles, you’re displaying a count of the size 70 needles -that match your users' search criteria--for example, all size 70 _non-stick -embroidery_ needles. +[discrete] +[[search-analyze-data-esql]] +===== {esql} + +<> is a piped query language for filtering, transforming, and analyzing data. +{esql} is built on top of a new compute engine, where search, aggregation, and transformation functions are +directly executed within {es} itself. +{esql} syntax can also be used within various {kib} tools. + +The <> accepts queries written in {esql} syntax. + +Today, it supports a subset of the features available in Query DSL, like aggregations, filters, and transformations. +It does not yet support full-text search or semantic search. + +It comes with a comprehensive set of <> for working with data and has robust integration with {kib}'s Discover, dashboards and visualizations. + +Learn more in <>, or try https://www.elastic.co/training/introduction-to-esql[our training course]. [discrete] -[[more-features]] -===== But wait, there’s more +[[search-analyze-data-query-languages-table]] +==== List of available query languages + +The following table summarizes all available {es} query languages, to help you choose the right one for your use case. -Want to automate the analysis of your time series data? You can use -{ml-docs}/ml-ad-overview.html[machine learning] features to create accurate -baselines of normal behavior in your data and identify anomalous patterns. With -machine learning, you can detect: +[cols="1,2,2,1", options="header"] +|=== +| Name | Description | Use cases | API endpoint -* Anomalies related to temporal deviations in values, counts, or frequencies -* Statistical rarity -* Unusual behaviors for a member of a population +| <> +| The primary query language for {es}. A powerful and flexible JSON-style language that enables complex queries. +| Full-text search, semantic search, keyword search, filtering, aggregations, and more. +| <> -And the best part? You can do this without having to specify algorithms, models, -or other data science-related configurations. +| <> +| Introduced in *8.11*, the Elasticsearch Query Language ({esql}) is a piped query language language for filtering, transforming, and analyzing data. +| Initially tailored towards working with time series data like logs and metrics. +Robust integration with {kib} for querying, visualizing, and analyzing data. +Does not yet support full-text search. +| <> + + +| <> +| Event Query Language (EQL) is a query language for event-based time series data. Data must contain the `@timestamp` field to use EQL. +| Designed for the threat hunting security use case. +| <> + +| <> +| Allows native, real-time SQL-like querying against {es} data. JDBC and ODBC drivers are available for integration with business intelligence (BI) tools. +| Enables users familiar with SQL to query {es} data using familiar syntax for BI and reporting. +| <> + +| {kibana-ref}/kuery-query.html[Kibana Query Language (KQL)] +| {kib} Query Language (KQL) is a text-based query language for filtering data when you access it through the {kib} UI. +| Use KQL to filter documents where a value for a field exists, matches a given value, or is within a given range. +| N/A + +|=== + +// New html page [[scalability]] -=== Scalability and resilience: clusters, nodes, and shards -++++ -Scalability and resilience -++++ +=== Get ready for production + +Many teams rely on {es} to run their key services. To keep these services running, you can design your {es} deployment +to keep {es} available, even in case of large-scale outages. To keep it running fast, you also can design your +deployment to be responsive to production workloads. + +{es} is built to be always available and to scale with your needs. It does this using a distributed architecture. +By distributing your cluster, you can keep Elastic online and responsive to requests. + +In case of failure, {es} offers tools for cross-cluster replication and cluster snapshots that can +help you fall back or recover quickly. You can also use cross-cluster replication to serve requests based on the +geographic location of your users and your resources. -{es} is built to be always available and to scale with your needs. It does this -by being distributed by nature. You can add servers (nodes) to a cluster to -increase capacity and {es} automatically distributes your data and query load -across all of the available nodes. No need to overhaul your application, {es} -knows how to balance multi-node clusters to provide scale and high availability. -The more nodes, the merrier. - -How does this work? Under the covers, an {es} index is really just a logical -grouping of one or more physical shards, where each shard is actually a -self-contained index. By distributing the documents in an index across multiple -shards, and distributing those shards across multiple nodes, {es} can ensure -redundancy, which both protects against hardware failures and increases -query capacity as nodes are added to a cluster. As the cluster grows (or shrinks), -{es} automatically migrates shards to rebalance the cluster. - -There are two types of shards: primaries and replicas. Each document in an index -belongs to one primary shard. A replica shard is a copy of a primary shard. -Replicas provide redundant copies of your data to protect against hardware -failure and increase capacity to serve read requests -like searching or retrieving a document. - -The number of primary shards in an index is fixed at the time that an index is -created, but the number of replica shards can be changed at any time, without -interrupting indexing or query operations. +{es} also offers security and monitoring tools to help you keep your cluster highly available. [discrete] -[[it-depends]] -==== It depends... +[[use-multiple-nodes-shards]] +==== Use multiple nodes and shards + +[NOTE] +==== +Nodes and shards are what make {es} distributed and scalable. + +These concepts aren’t essential if you’re just getting started. How you <> in production determines what you need to know: + +* *Self-managed {es}*: You are responsible for setting up and managing nodes, clusters, shards, and replicas. This includes +managing the underlying infrastructure, scaling, and ensuring high availability through failover and backup strategies. +* *Elastic Cloud*: Elastic can autoscale resources in response to workload changes. Choose from different deployment types +to apply sensible defaults for your use case. A basic understanding of nodes, shards, and replicas is still important. +* *Elastic Cloud Serverless*: You don’t need to worry about nodes, shards, or replicas. These resources are 100% automated +on the serverless platform, which is designed to scale with your workload. +==== -There are a number of performance considerations and trade offs with respect -to shard size and the number of primary shards configured for an index. The more -shards, the more overhead there is simply in maintaining those indices. The -larger the shard size, the longer it takes to move shards around when {es} -needs to rebalance a cluster. +You can add servers (_nodes_) to a cluster to increase capacity, and {es} automatically distributes your data and query load +across all of the available nodes. -Querying lots of small shards makes the processing per shard faster, but more -queries means more overhead, so querying a smaller -number of larger shards might be faster. In short...it depends. +Elastic is able to distribute your data across nodes by subdividing an index into _shards_. Each index in {es} is a grouping +of one or more physical shards, where each shard is a self-contained Lucene index containing a subset of the documents in +the index. By distributing the documents in an index across multiple shards, and distributing those shards across multiple +nodes, {es} increases indexing and query capacity. -As a starting point: +There are two types of shards: _primaries_ and _replicas_. Each document in an index belongs to one primary shard. A replica +shard is a copy of a primary shard. Replicas maintain redundant copies of your data across the nodes in your cluster. +This protects against hardware failure and increases capacity to serve read requests like searching or retrieving a document. -* Aim to keep the average shard size between a few GB and a few tens of GB. For - use cases with time-based data, it is common to see shards in the 20GB to 40GB - range. +[TIP] +==== +The number of primary shards in an index is fixed at the time that an index is created, but the number of replica shards can +be changed at any time, without interrupting indexing or query operations. +==== -* Avoid the gazillion shards problem. The number of shards a node can hold is - proportional to the available heap space. As a general rule, the number of - shards per GB of heap space should be less than 20. +Shard copies in your cluster are automatically balanced across nodes to provide scale and high availability. All nodes are +aware of all the other nodes in the cluster and can forward client requests to the appropriate node. This allows {es} +to distribute indexing and query load across the cluster. -The best way to determine the optimal configuration for your use case is -through https://www.elastic.co/elasticon/conf/2016/sf/quantitative-cluster-sizing[ -testing with your own data and queries]. +If you’re exploring {es} for the first time or working in a development environment, then you can use a cluster with a single node and create indices +with only one shard. However, in a production environment, you should build a cluster with multiple nodes and indices +with multiple shards to increase performance and resilience. + +// TODO - diagram + +To learn about optimizing the number and size of shards in your cluster, refer to <>. +To learn about how read and write operations are replicated across shards and shard copies, refer to <>. +To adjust how shards are allocated and balanced across nodes, refer to <>. [discrete] -[[disaster-ccr]] -==== In case of disaster +[[ccr-disaster-recovery-geo-proximity]] +==== CCR for disaster recovery and geo-proximity + +To effectively distribute read and write operations across nodes, the nodes in a cluster need good, reliable connections +to each other. To provide better connections, you typically co-locate the nodes in the same data center or nearby data centers. + +Co-locating nodes in a single location exposes you to the risk of a single outage taking your entire cluster offline. To +maintain high availability, you can prepare a second cluster that can take over in case of disaster by implementing +cross-cluster replication (CCR). -A cluster's nodes need good, reliable connections to each other. To provide -better connections, you typically co-locate the nodes in the same data center or -nearby data centers. However, to maintain high availability, you -also need to avoid any single point of failure. In the event of a major outage -in one location, servers in another location need to be able to take over. The -answer? {ccr-cap} (CCR). +CCR provides a way to automatically synchronize indices from your primary cluster to a secondary remote cluster that +can serve as a hot backup. If the primary cluster fails, the secondary cluster can take over. -CCR provides a way to automatically synchronize indices from your primary cluster -to a secondary remote cluster that can serve as a hot backup. If the primary -cluster fails, the secondary cluster can take over. You can also use CCR to -create secondary clusters to serve read requests in geo-proximity to your users. +You can also use CCR to create secondary clusters to serve read requests in geo-proximity to your users. -{ccr-cap} is active-passive. The index on the primary cluster is -the active leader index and handles all write requests. Indices replicated to -secondary clusters are read-only followers. +Learn more about <> and about <>. + +[TIP] +==== +You can also take <> of your cluster that can be restored in case of failure. +==== [discrete] -[[admin]] -==== Care and feeding - -As with any enterprise system, you need tools to secure, manage, and -monitor your {es} clusters. Security, monitoring, and administrative features -that are integrated into {es} enable you to use {kibana-ref}/introduction.html[{kib}] -as a control center for managing a cluster. Features like <> and <> -help you intelligently manage your data over time. +[[security-and-monitoring]] +==== Security and monitoring + +As with any enterprise system, you need tools to secure, manage, and monitor your {es} clusters. Security, +monitoring, and administrative features that are integrated into {es} enable you to use {kibana-ref}/introduction.html[Kibana] as a +control center for managing a cluster. + +<>. + +<>. + +[discrete] +[[cluster-design]] +==== Cluster design + +{es} offers many options that allow you to configure your cluster to meet your organization’s goals, requirements, +and restrictions. You can review the following guides to learn how to tune your cluster to meet your needs: + +* <> +* <> +* <> +* <> +* <> + +Many {es} options come with different performance considerations and trade-offs. The best way to determine the +optimal configuration for your use case is through https://www.elastic.co/elasticon/conf/2016/sf/quantitative-cluster-sizing[testing with your own data and queries]. \ No newline at end of file diff --git a/docs/reference/landing-page.asciidoc b/docs/reference/landing-page.asciidoc index e781dc0aff4e3..f1b5ce8210996 100644 --- a/docs/reference/landing-page.asciidoc +++ b/docs/reference/landing-page.asciidoc @@ -62,7 +62,7 @@ Elasticsearch is the search and analytics engine that powers the Elastic Stack.

- +

diff --git a/docs/reference/mapping.asciidoc b/docs/reference/mapping.asciidoc index 192f581f28d76..5d6245a964104 100644 --- a/docs/reference/mapping.asciidoc +++ b/docs/reference/mapping.asciidoc @@ -33,10 +33,13 @@ mapping values by overriding values in the mapping during the search request. [discrete] [[mapping-dynamic]] == Dynamic mapping -<> allows you to experiment with -and explore data when you’re just getting started. {es} adds new fields -automatically, just by indexing a document. You can add fields to the top-level -mapping, and to inner <> and <> fields. + +When you use <>, {es} automatically +attempts to detect the data type of fields in your documents. This allows +you to get started quickly by just adding data to an index. If you index +additional documents with new fields, {es} will add these fields automatically. +You can add fields to the top-level mapping, and to inner <> +and <> fields. Use <> to define custom mappings that are applied to dynamically added fields based on the matching condition. @@ -44,23 +47,57 @@ applied to dynamically added fields based on the matching condition. [discrete] [[mapping-explicit]] == Explicit mapping -<> allows you to precisely choose how to -define the mapping definition, such as: -* Which string fields should be treated as full text fields. -* Which fields contain numbers, dates, or geolocations. -* The <> of date values. -* Custom rules to control the mapping for - <>. +Use <> to define exactly how data types +are mapped to fields, customized to your specific use case. + +Defining your own mappings enables you to: + +* Define which string fields should be treated as full-text fields. +* Define which fields contain numbers, dates, or geolocations. +* Use data types that cannot be automatically detected (such as `geo_point` and `geo_shape`.) +* Choose date value <>, including custom date formats. +* Create custom rules to control the mapping for <>. +* Optimize fields for partial matching. +* Perform language-specific text analysis. + +[TIP] +==== +It’s often useful to index the same field in different ways for different purposes. +For example, you might want to index a string field as both a text field for full-text +search and as a keyword field for sorting or aggregating your data. Or, you might +choose to use more than one language analyzer to process the contents of a string field +that contains user input. +==== Use <> to make schema changes without reindexing. You can use runtime fields in conjunction with indexed fields to balance resource usage and performance. Your index will be smaller, but with slower search performance. +[discrete] +[[mapping-manage-update]] +== Managing and updating mappings + +Explicit mappings should be defined at index creation for fields you know in advance. +You can still add _new fields_ to mappings at any time, as your data evolves. + +Use the <> to update an existing mapping. + +In most cases, you can't change mappings for fields that are already mapped. +These changes require <>. + +However, you can _update_ mappings under certain conditions: + +* You can add new fields to an existing mapping at any time, explicitly or dynamically. +* You can add new <> for existing fields. +** Documents indexed before the mapping update will not have values for the new multi-fields until they are updated or reindexed. Documents indexed after the mapping change will automatically have values for the new multi-fields. +* Some <> can be updated for existing fields of certain <>. + [discrete] [[mapping-limit-settings]] -== Settings to prevent mapping explosion +== Prevent mapping explosions + Defining too many fields in an index can lead to a mapping explosion, which can cause out of memory errors and difficult situations to recover from. diff --git a/docs/reference/mapping/fields/source-field.asciidoc b/docs/reference/mapping/fields/source-field.asciidoc index ec824e421e015..903b301ab1a96 100644 --- a/docs/reference/mapping/fields/source-field.asciidoc +++ b/docs/reference/mapping/fields/source-field.asciidoc @@ -6,11 +6,11 @@ at index time. The `_source` field itself is not indexed (and thus is not searchable), but it is stored so that it can be returned when executing _fetch_ requests, like <> or <>. -If disk usage is important to you then have a look at -<> which shrinks disk usage at the cost of -only supporting a subset of mappings and slower fetches or (not recommended) -<> which also shrinks disk -usage but disables many features. +If disk usage is important to you, then consider the following options: + +- Using <>, which reconstructs source content at the time of retrieval instead of storing it on disk. This shrinks disk usage, at the cost of slower access to `_source` in <> and <> queries. +- <>. This shrinks disk +usage but disables features that rely on `_source`. include::synthetic-source.asciidoc[] @@ -43,7 +43,7 @@ available then a number of features are not supported: * The <>, <>, and <> APIs. -* In the {kib} link:{kibana-ref}/discover.html[Discover] application, field data will not be displayed. +* In the {kib} link:{kibana-ref}/discover.html[Discover] application, field data will not be displayed. * On the fly <>. diff --git a/docs/reference/mapping/fields/synthetic-source.asciidoc b/docs/reference/mapping/fields/synthetic-source.asciidoc index a0e7aed177a9c..ccea38cf602da 100644 --- a/docs/reference/mapping/fields/synthetic-source.asciidoc +++ b/docs/reference/mapping/fields/synthetic-source.asciidoc @@ -28,45 +28,22 @@ PUT idx While this on the fly reconstruction is *generally* slower than saving the source documents verbatim and loading them at query time, it saves a lot of storage -space. +space. Additional latency can be avoided by not loading `_source` field in queries when it is not needed. + +[[synthetic-source-fields]] +===== Supported fields +Synthetic `_source` is supported by all field types. Depending on implementation details, field types have different properties when used with synthetic `_source`. + +<> construct synthetic `_source` using existing data, most commonly <> and <>. For these field types, no additional space is needed to store the contents of `_source` field. Due to the storage layout of <>, the generated `_source` field undergoes <> compared to original document. + +For all other field types, the original value of the field is stored as is, in the same way as the `_source` field in non-synthetic mode. In this case there are no modifications and field data in `_source` is the same as in the original document. Similarly, malformed values of fields that use <> or <> need to be stored as is. This approach is less storage efficient since data needed for `_source` reconstruction is stored in addition to other data required to index the field (like `doc_values`). [[synthetic-source-restrictions]] ===== Synthetic `_source` restrictions -There are a couple of restrictions to be aware of: +Synthetic `_source` cannot be used together with field mappings that use <>. -* When you retrieve synthetic `_source` content it undergoes minor -<> compared to the original JSON. -* Synthetic `_source` can be used with indices that contain only these field -types: - -** <> -** {plugins}/mapper-annotated-text-usage.html#annotated-text-synthetic-source[`annotated-text`] -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> -** <> +Some field types have additional restrictions. These restrictions are documented in the **synthetic `_source`** section of the field type's <>. [[synthetic-source-modifications]] ===== Synthetic `_source` modifications @@ -178,4 +155,40 @@ that ordering. [[synthetic-source-modifications-ranges]] ====== Representation of ranges -Range field vales (e.g. `long_range`) are always represented as inclusive on both sides with bounds adjusted accordingly. See <>. +Range field values (e.g. `long_range`) are always represented as inclusive on both sides with bounds adjusted accordingly. See <>. + +[[synthetic-source-precision-loss-for-point-types]] +====== Reduced precision of `geo_point` values +Values of `geo_point` fields are represented in synthetic `_source` with reduced precision. See <>. + + +[[synthetic-source-fields-native-list]] +===== Field types that support synthetic source with no storage overhead +The following field types support synthetic source using data from <> or <>, and require no additional storage space to construct the `_source` field. + +NOTE: If you enable the <> or <> settings, then additional storage is required to store ignored field values for these types. + +** <> +** {plugins}/mapper-annotated-text-usage.html#annotated-text-synthetic-source[`annotated-text`] +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> +** <> diff --git a/docs/reference/mapping/params/copy-to.asciidoc b/docs/reference/mapping/params/copy-to.asciidoc index 10eebfb027736..b26ceac349a3e 100644 --- a/docs/reference/mapping/params/copy-to.asciidoc +++ b/docs/reference/mapping/params/copy-to.asciidoc @@ -64,16 +64,104 @@ Some important points: * It is the field _value_ which is copied, not the terms (which result from the analysis process). * The original <> field will not be modified to show the copied values. * The same value can be copied to multiple fields, with `"copy_to": [ "field_1", "field_2" ]` -* You cannot copy recursively via intermediary fields such as a `copy_to` on -`field_1` to `field_2` and `copy_to` on `field_2` to `field_3` expecting -indexing into `field_1` will eventuate in `field_3`, instead use copy_to -directly to multiple fields from the originating field. +* You cannot copy recursively using intermediary fields. +The following configuration will not copy data from `field_1` to `field_3`: ++ +[source,console] +---- +PUT bad_example_index +{ + "mappings": { + "properties": { + "field_1": { + "type": "text", + "copy_to": "field_2" + }, + "field_2": { + "type": "text", + "copy_to": "field_3" + }, + "field_3": { + "type": "text" + } + } + } +} +---- +Instead, copy to multiple fields from the source field: ++ +[source,console] +---- +PUT good_example_index +{ + "mappings": { + "properties": { + "field_1": { + "type": "text", + "copy_to": ["field_2", "field_3"] + }, + "field_2": { + "type": "text" + }, + "field_3": { + "type": "text" + } + } + } +} +---- + +NOTE: `copy_to` is not supported for field types where values take the form of objects, e.g. `date_range`. + +[float] +[[copy-to-dynamic-mapping]] +==== Dynamic mapping + +Consider the following points when using `copy_to` with dynamic mappings: + * If the target field does not exist in the index mappings, the usual <> behavior applies. By default, with <> set to `true`, a non-existent target field will be -dynamically added to the index mappings. If `dynamic` is set to `false`, the +dynamically added to the index mappings. +* If `dynamic` is set to `false`, the target field will not be added to the index mappings, and the value will not be -copied. If `dynamic` is set to `strict`, copying to a non-existent field will +copied. +* If `dynamic` is set to `strict`, copying to a non-existent field will result in an error. ++ +** If the target field is nested, then `copy_to` fields must specify the full path to the nested field. +Omitting the full path will lead to a `strict_dynamic_mapping_exception`. +Use `"copy_to": ["parent_field.child_field"]` to correctly target a nested field. ++ +For example: ++ +[source,console] +-------------------------------------------------- +PUT /test_index +{ + "mappings": { + "dynamic": "strict", + "properties": { + "description": { + "properties": { + "notes": { + "type": "text", + "copy_to": [ "description.notes_raw"], <1> + "analyzer": "standard", + "search_analyzer": "standard" + }, + "notes_raw": { + "type": "keyword" + } + } + } + } + } +} +-------------------------------------------------- -NOTE: `copy_to` is _not_ supported for field types where values take the form of objects, e.g. `date_range` \ No newline at end of file +<1> The `notes` field is copied to the `notes_raw` field. Targeting `notes_raw` alone instead of `description.notes_raw` +would lead to a `strict_dynamic_mapping_exception`. ++ +In this example, `notes_raw` is not defined at the root of the mapping, but under the `description` field. +Without the fully qualified path, {es} would interpret the `copy_to` target as a root-level field, not as a nested field under `description`. \ No newline at end of file diff --git a/docs/reference/mapping/params/format.asciidoc b/docs/reference/mapping/params/format.asciidoc index 5babb4def2320..b890e62fd0a0b 100644 --- a/docs/reference/mapping/params/format.asciidoc +++ b/docs/reference/mapping/params/format.asciidoc @@ -31,8 +31,38 @@ down to the nearest day. [[custom-date-formats]] ==== Custom date formats -Completely customizable date formats are supported. The syntax for these is explained -https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html[DateTimeFormatter docs]. +Completely customizable date formats are supported. The syntax for these is explained in +https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/time/format/DateTimeFormatter.html[DateTimeFormatter docs]. + +[[custom-date-format-locales]] +===== Differences in locale information between JDK versions + +There can be some differences in date formats between JDK versions and different locales. In particular, +there can be differences in text strings used for textual date formats, and there can be differences +in the results of week-date calculations. + +There can be differences in text strings used by the following field specifiers: + +* `B`, `E`, `G`, `O`, `a`, `v`, `z` of any length +* `L`, `M`, `Q`, `q`, `c`, `e` of length 3 or greater +* `Z` of length 4 + +If the text format changes between Elasticsearch or JDK versions, it can cause significant problems +with ingest, output, and re-indexing. It is recommended to always use numerical fields in custom date formats, +which are not affected by locale information. + +There can also be differences in week-date calculations using the `Y`, `W`, and `w` field specifiers. +The underlying data used to calculate week-dates can vary depending on the JDK version and locale; +this can cause differences in the calculated week-date for the same calendar dates. +It is recommended that the built-in week-date formats are used, which will always use ISO rules +for calculating week-dates. + +In particular, there is a significant change in locale information between JDK releases 22 and 23. +Elasticsearch will use the _COMPAT_ locale database when run on JDK 22 and before, +and will use the _CLDR_ locale database when run on JDK 23 and above. This change can cause significant differences +to the textual date formats accepted by Elasticsearch, and to calculated week-dates. If you are using +affected specifiers, you may need to modify your ingest or output integration code to account +for the differences between these two JDK versions. [[built-in-date-formats]] ==== Built In Formats @@ -256,31 +286,37 @@ The following tables lists all the defaults ISO formats supported: `week_date` or `strict_week_date`:: A formatter for a full date as four digit weekyear, two digit week of - weekyear, and one digit day of week: `xxxx-'W'ww-e`. + weekyear, and one digit day of week: `YYYY-'W'ww-e`. + This uses the ISO week-date definition. `week_date_time` or `strict_week_date_time`:: A formatter that combines a full weekyear date and time, separated by a - 'T': `xxxx-'W'ww-e'T'HH:mm:ss.SSSZ`. + 'T': `YYYY-'W'ww-e'T'HH:mm:ss.SSSZ`. + This uses the ISO week-date definition. `week_date_time_no_millis` or `strict_week_date_time_no_millis`:: A formatter that combines a full weekyear date and time without millis, - separated by a 'T': `xxxx-'W'ww-e'T'HH:mm:ssZ`. + separated by a 'T': `YYYY-'W'ww-e'T'HH:mm:ssZ`. + This uses the ISO week-date definition. `weekyear` or `strict_weekyear`:: - A formatter for a four digit weekyear: `xxxx`. + A formatter for a four digit weekyear: `YYYY`. + This uses the ISO week-date definition. `weekyear_week` or `strict_weekyear_week`:: A formatter for a four digit weekyear and two digit week of weekyear: - `xxxx-'W'ww`. + `YYYY-'W'ww`. + This uses the ISO week-date definition. `weekyear_week_day` or `strict_weekyear_week_day`:: A formatter for a four digit weekyear, two digit week of weekyear, and one - digit day of week: `xxxx-'W'ww-e`. + digit day of week: `YYYY-'W'ww-e`. + This uses the ISO week-date definition. `year` or `strict_year`:: diff --git a/docs/reference/mapping/runtime.asciidoc b/docs/reference/mapping/runtime.asciidoc index dc21fcfb9261e..190081fa801b4 100644 --- a/docs/reference/mapping/runtime.asciidoc +++ b/docs/reference/mapping/runtime.asciidoc @@ -135,7 +135,7 @@ PUT my-index-000001/ "day_of_week": { "type": "keyword", "script": { - "source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))" + "source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" } } }, @@ -291,7 +291,7 @@ GET my-index-000001/_search "day_of_week": { "type": "keyword", "script": { - "source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))" + "source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" } } }, @@ -667,7 +667,7 @@ PUT my-index-000001/ "day_of_week": { "type": "keyword", "script": { - "source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))" + "source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" } } }, diff --git a/docs/reference/mapping/types/date.asciidoc b/docs/reference/mapping/types/date.asciidoc index a29db79167d2e..47ca4859b3f20 100644 --- a/docs/reference/mapping/types/date.asciidoc +++ b/docs/reference/mapping/types/date.asciidoc @@ -81,6 +81,14 @@ on those dates so they should be avoided. // end::decimal-warning[] ==== +[WARNING] +==== +// tag::locale-warning[] +The text strings accepted by textual date formats, and calculations for week-dates, depend on the JDK version +that Elasticsearch is running on. For more information see <>. +// end::locale-warning[] +==== + [[multiple-date-formats]] ==== Multiple date formats @@ -126,7 +134,7 @@ The following parameters are accepted by `date` fields: The locale to use when parsing dates since months do not have the same names and/or abbreviations in all languages. The default is the - https://docs.oracle.com/javase/8/docs/api/java/util/Locale.html#ROOT[`ROOT` locale], + https://docs.oracle.com/javase/8/docs/api/java/util/Locale.html#ROOT[`ROOT` locale]. <>:: diff --git a/docs/reference/mapping/types/geo-shape.asciidoc b/docs/reference/mapping/types/geo-shape.asciidoc index 20f79df8950af..e50c7d73b1b76 100644 --- a/docs/reference/mapping/types/geo-shape.asciidoc +++ b/docs/reference/mapping/types/geo-shape.asciidoc @@ -18,9 +18,8 @@ Documents using this type can be used: ** a <> (for example, intersecting polygons). * to aggregate documents by geographic grids: ** either <> -** or <>. - -Grid aggregations over `geo_hex` grids are not supported for `geo_shape` fields. +** or <> +** or <> [[geo-shape-mapping-options]] [discrete] diff --git a/docs/reference/mapping/types/rank-features.asciidoc b/docs/reference/mapping/types/rank-features.asciidoc index b54e99ede3fae..25d5278ca220d 100644 --- a/docs/reference/mapping/types/rank-features.asciidoc +++ b/docs/reference/mapping/types/rank-features.asciidoc @@ -70,6 +70,15 @@ GET my-index-000001/_search } } } + +GET my-index-000001/_search +{ + "query": { <6> + "term": { + "topics": "economics" + } + } +} -------------------------------------------------- <1> Rank features fields must use the `rank_features` field type @@ -77,6 +86,7 @@ GET my-index-000001/_search <3> Rank features fields must be a hash with string keys and strictly positive numeric values <4> This query ranks documents by how much they are about the "politics" topic. <5> This query ranks documents inversely to the number of "1star" reviews they received. +<6> This query returns documents that store the "economics" feature in the "topics" field. NOTE: `rank_features` fields only support single-valued features and strictly diff --git a/docs/reference/mapping/types/semantic-text.asciidoc b/docs/reference/mapping/types/semantic-text.asciidoc index 6ee30e6b9f831..e0793dbe643f4 100644 --- a/docs/reference/mapping/types/semantic-text.asciidoc +++ b/docs/reference/mapping/types/semantic-text.asciidoc @@ -7,8 +7,8 @@ beta[] -The `semantic_text` field type automatically generates embeddings for text -content using an inference endpoint. +The `semantic_text` field type automatically generates embeddings for text content using an inference endpoint. +Long passages are <> to smaller sections to enable the processing of larger corpuses of text. The `semantic_text` field type specifies an inference endpoint identifier that will be used to generate embeddings. You can create the inference endpoint by using the <>. @@ -52,8 +52,8 @@ Use the <> to create the endpoint. The `inference_id` will not be validated when the mapping is created, but when documents are ingested into the index. When the first document is indexed, the `inference_id` will be used to generate underlying indexing structures for the field. -WARNING: Removing an inference endpoint will cause ingestion of documents and semantic queries to fail on indices that define `semantic_text` fields with that inference endpoint as their `inference_id`. -Please check that inference endpoints are not used in `semantic_text` fields before removal. +WARNING: Removing an {infer} endpoint will cause ingestion of documents and semantic queries to fail on indices that define `semantic_text` fields with that {infer} endpoint as their `inference_id`. +Before removal, check if {infer} endpoints are used in `semantic_text` fields. [discrete] [[auto-text-chunking]] @@ -65,6 +65,9 @@ To allow for large amounts of text to be used in semantic search, `semantic_text Each chunk will include the text subpassage and the corresponding embedding generated from it. When querying, the individual passages will be automatically searched for each document, and the most relevant passage will be used to compute a score. +Documents are split into 250-word sections with a 100-word overlap so that each section shares 100 words with the previous section. +This overlap ensures continuity and prevents vital contextual information in the input text from being lost by a hard break. + [discrete] [[semantic-text-structure]] @@ -118,13 +121,19 @@ In case you want to customize data indexing, use the <> or <> field types and create an ingest pipeline with an <> to generate the embeddings. -<> walks you through the process. +<> walks you through the process. In +these cases - when you use `sparse_vector` or `dense_vector` field types instead +of the `semantic_text` field type to customize indexing - using the +<> is not supported for querying the +field data. + [discrete] [[update-script]] ==== Updates to `semantic_text` fields -Updates that use scripts are not supported when the index contains a `semantic_text` field. +Updates that use scripts are not supported for an index contains a `semantic_text` field. +Even if the script targets non-`semantic_text` fields, the update will fail when the index contains a `semantic_text` field. [discrete] diff --git a/docs/reference/mapping/types/sparse-vector.asciidoc b/docs/reference/mapping/types/sparse-vector.asciidoc index d0c2c83b8a8fa..b24f65fcf97ca 100644 --- a/docs/reference/mapping/types/sparse-vector.asciidoc +++ b/docs/reference/mapping/types/sparse-vector.asciidoc @@ -91,7 +91,7 @@ NOTE: `sparse_vector` fields can not be included in indices that were *created* NOTE: `sparse_vector` fields only support strictly positive values. Negative values will be rejected. -NOTE: `sparse_vector` fields do not support querying, sorting or aggregating. +NOTE: `sparse_vector` fields do not support <>, querying, sorting or aggregating. They may only be used within specialized queries. The recommended query to use on these fields are <> queries. They may also be used within legacy <> queries. diff --git a/docs/reference/migration/migrate_8_15.asciidoc b/docs/reference/migration/migrate_8_15.asciidoc index a183e68a50693..1961230da1bbf 100644 --- a/docs/reference/migration/migrate_8_15.asciidoc +++ b/docs/reference/migration/migrate_8_15.asciidoc @@ -16,5 +16,125 @@ coming::[8.15.0] [[breaking-changes-8.15]] === Breaking changes -There are no breaking changes in {es} 8.15. +The following changes in {es} 8.15 might affect your applications +and prevent them from operating normally. +Before upgrading to 8.15, review these changes and take the described steps +to mitigate the impact. + +[discrete] +[[breaking_815_cluster_and_node_setting_changes]] +==== Cluster and node setting changes + +[[change_skip_unavailable_remote_cluster_setting_default_value_to_true]] +.Change `skip_unavailable` remote cluster setting default value to true +[%collapsible] +==== +*Details* + +The default value of the `skip_unavailable` setting is now set to true. All existing and future remote clusters that do not define this setting will use the new default. This setting only affects cross-cluster searches using the _search or _async_search API. + +*Impact* + +Unavailable remote clusters in a cross-cluster search will no longer cause the search to fail unless skip_unavailable is configured to be `false` in elasticsearch.yml or via the `_cluster/settings` API. Unavailable clusters with `skip_unavailable`=`true` (either explicitly or by using the new default) are marked as SKIPPED in the search response metadata section and do not fail the entire search. If users want to ensure that a search returns a failure when a particular remote cluster is not available, `skip_unavailable` must be now be set explicitly. +==== + +[discrete] +[[breaking_815_rollup_changes]] +==== Rollup changes + +[[disallow_new_rollup_jobs_in_clusters_with_no_rollup_usage]] +.Disallow new rollup jobs in clusters with no rollup usage +[%collapsible] +==== +*Details* + +The put rollup API will fail with an error when a rollup job is created in a cluster with no rollup usage + +*Impact* + +Clusters with no rollup usage (either no rollup job or index) can not create new rollup jobs +==== + +[discrete] +[[breaking_815_rest_api_changes]] +==== REST API changes + +[[interpret_timeout_1_as_infinite_ack_timeout]] +.Interpret `?timeout=-1` as infinite ack timeout +[%collapsible] +==== +*Details* + +Today {es} accepts the parameter `?timeout=-1` in many APIs, but interprets +this to mean the same as `?timeout=0`. From 8.15 onwards `?timeout=-1` will +mean to wait indefinitely, aligning the behaviour of this parameter with +other similar parameters such as `?master_timeout`. + +*Impact* + +Use `?timeout=0` to force relevant operations to time out immediately +instead of `?timeout=-1` +==== + +[[replace_model_id_with_inference_id]] +.Replace `model_id` with `inference_id` in GET inference API +[%collapsible] +==== +*Details* + +From 8.15 onwards the <> response will return an +`inference_id` field instead of a `model_id`. + +*Impact* + +If your application uses the `model_id` in a GET inference API response, +switch it to use `inference_id` instead. +==== + + +[discrete] +[[deprecated-8.15]] +=== Deprecations + +The following functionality has been deprecated in {es} 8.15 +and will be removed in a future version. +While this won't have an immediate impact on your applications, +we strongly encourage you to take the described steps to update your code +after upgrading to 8.15. + +To find out if you are using any deprecated functionality, +enable <>. + +[discrete] +[[deprecations_815_cluster_and_node_setting]] +==== Cluster and node setting deprecations + +[[deprecate_absolute_size_values_for_indices_breaker_total_limit_setting]] +.Deprecate absolute size values for `indices.breaker.total.limit` setting +[%collapsible] +==== +*Details* + +Previously, the value of `indices.breaker.total.limit` could be specified as an absolute size in bytes. This setting controls the overal amount of memory the server is allowed to use before taking remedial actions. Setting this to a specific number of bytes led to strange behaviour when the node maximum heap size changed because the circut breaker limit would remain unchanged. This would either leave the value too low, causing part of the heap to remain unused; or it would leave the value too high, causing the circuit breaker to be ineffective at preventing OOM errors. The only reasonable behaviour for this setting is that it scales with the size of the heap, and so absolute byte limits are now deprecated. + +*Impact* + +Users must change their configuration to specify a percentage instead of an absolute number of bytes for `indices.breaker.total.limit`, or else accept the default, which is already specified as a percentage. +==== + +[discrete] +[[deprecations_815_rest_api]] +==== REST API deprecations + +[[deprecate_text_expansion_weighted_tokens_queries]] +.Deprecate `text_expansion` and `weighted_tokens` queries +[%collapsible] +==== +*Details* + +The `text_expansion` and `weighted_tokens` queries have been replaced by `sparse_vector`. + +*Impact* + +Please update your existing `text_expansion` and `weighted_tokens` queries to use `sparse_vector.` +==== + +[[deprecate_using_slm_privileges_to_access_ilm]] +.Deprecate using slm privileges to access ilm +[%collapsible] +==== +*Details* + +The `read_slm` privilege can get the ILM status, and the `manage_slm` privilege can start and stop ILM. Access to these APIs should be granted using the `read_ilm` and `manage_ilm` privileges instead. Access to ILM APIs will be removed from SLM privileges in a future major release, and is now deprecated. + +*Impact* + +Users that need access to the ILM status API should now use the `read_ilm` privilege. Users that need to start and stop ILM, should use the `manage_ilm` privilege. +==== diff --git a/docs/reference/ml/anomaly-detection/functions/ml-geo-functions.asciidoc b/docs/reference/ml/anomaly-detection/functions/ml-geo-functions.asciidoc index 5c061daa1cd44..63a0f047db647 100644 --- a/docs/reference/ml/anomaly-detection/functions/ml-geo-functions.asciidoc +++ b/docs/reference/ml/anomaly-detection/functions/ml-geo-functions.asciidoc @@ -52,6 +52,12 @@ detects anomalies where the geographic location of a credit card transaction is unusual for a particular customer’s credit card. An anomaly might indicate fraud. +A "typical" value indicates a centroid of a cluster of previously observed +locations that is closest to the "actual" location at that time. For example, +there may be one centroid near the person's home that is associated with the +cluster of local grocery stores and restaurants, and another centroid near the +person's work associated with the cluster of lunch and coffee places. + IMPORTANT: The `field_name` that you supply must be a single string that contains two comma-separated numbers of the form `latitude,longitude`, a `geo_point` field, a `geo_shape` field that contains point values, or a diff --git a/docs/reference/ml/ml-shared.asciidoc b/docs/reference/ml/ml-shared.asciidoc index a69fd2f1812e9..971cf628ed8b0 100644 --- a/docs/reference/ml/ml-shared.asciidoc +++ b/docs/reference/ml/ml-shared.asciidoc @@ -913,7 +913,7 @@ For example: "day_of_week": { "type": "keyword", "script": { - "source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))" + "source": "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" } } } diff --git a/docs/reference/ml/trained-models/apis/put-trained-models.asciidoc b/docs/reference/ml/trained-models/apis/put-trained-models.asciidoc index eef90630eb35b..e29bc8823ab29 100644 --- a/docs/reference/ml/trained-models/apis/put-trained-models.asciidoc +++ b/docs/reference/ml/trained-models/apis/put-trained-models.asciidoc @@ -588,7 +588,7 @@ Refer to <> to review the properties of the `tokenization` object. ===== -`text_similarity`:::: +`text_similarity`::: (Object, optional) include::{es-ref-dir}/ml/ml-shared.asciidoc[tag=inference-config-text-similarity] + diff --git a/docs/reference/modules/cluster.asciidoc b/docs/reference/modules/cluster.asciidoc index 4b9ede5450683..b3eaa5b47c238 100644 --- a/docs/reference/modules/cluster.asciidoc +++ b/docs/reference/modules/cluster.asciidoc @@ -1,9 +1,7 @@ [[modules-cluster]] === Cluster-level shard allocation and routing settings -_Shard allocation_ is the process of allocating shards to nodes. This can -happen during initial recovery, replica allocation, rebalancing, or -when nodes are added or removed. +include::{es-ref-dir}/modules/shard-allocation-desc.asciidoc[] One of the main roles of the master is to decide which shards to allocate to which nodes, and when to move shards between nodes in order to rebalance the diff --git a/docs/reference/modules/cluster/misc.asciidoc b/docs/reference/modules/cluster/misc.asciidoc index 3da5df4f16414..75eaca88c66b1 100644 --- a/docs/reference/modules/cluster/misc.asciidoc +++ b/docs/reference/modules/cluster/misc.asciidoc @@ -11,12 +11,12 @@ An entire cluster may be set to read-only with the following setting: (<>) Make the whole cluster read only (indices do not accept write operations), metadata is not allowed to be modified (create or delete - indices). + indices). Defaults to `false`. `cluster.blocks.read_only_allow_delete`:: (<>) Identical to `cluster.blocks.read_only` but allows to delete indices - to free up resources. + to free up resources. Defaults to `false`. WARNING: Don't rely on this setting to prevent changes to your cluster. Any user with access to the <> diff --git a/docs/reference/modules/cluster/remote-clusters-settings.asciidoc b/docs/reference/modules/cluster/remote-clusters-settings.asciidoc index 2308ec259da48..537783ef6ff01 100644 --- a/docs/reference/modules/cluster/remote-clusters-settings.asciidoc +++ b/docs/reference/modules/cluster/remote-clusters-settings.asciidoc @@ -6,7 +6,10 @@ mode are described separately. `cluster.remote..mode`:: The mode used for a remote cluster connection. The only supported modes are - `sniff` and `proxy`. + `sniff` and `proxy`. The default is `sniff`. See <> for + further information about these modes, and <> + and <> for further information about their + settings. `cluster.remote.initial_connect_timeout`:: @@ -97,6 +100,11 @@ you configure the remotes. [[remote-cluster-sniff-settings]] ==== Sniff mode remote cluster settings +To use <> to connect to a remote cluster, set +`cluster.remote..mode: sniff` and then configure the following +settings. You may also leave `cluster.remote..mode` unset since +`sniff` is the default mode. + `cluster.remote..seeds`:: The list of seed nodes used to sniff the remote cluster state. @@ -117,6 +125,10 @@ you configure the remotes. [[remote-cluster-proxy-settings]] ==== Proxy mode remote cluster settings +To use <> to connect to a remote cluster, set +`cluster.remote..mode: proxy` and then configure the following +settings. + `cluster.remote..proxy_address`:: The address used for all remote connections. diff --git a/docs/reference/modules/discovery/bootstrapping.asciidoc b/docs/reference/modules/discovery/bootstrapping.asciidoc index 81ac3f6cc4cdc..5120c1d17e69b 100644 --- a/docs/reference/modules/discovery/bootstrapping.asciidoc +++ b/docs/reference/modules/discovery/bootstrapping.asciidoc @@ -5,7 +5,7 @@ Starting an Elasticsearch cluster for the very first time requires the initial set of <> to be explicitly defined on one or more of the master-eligible nodes in the cluster. This is known as _cluster bootstrapping_. This is only required the first time a cluster starts up. -Freshly-started nodes that are joining a running cluster obtain this +Freshly-started nodes that are joining a running cluster obtain this information from the cluster's elected master. The initial set of master-eligible nodes is defined in the @@ -27,17 +27,20 @@ node: if it is not possible to use the `node.name` of the node and there are multiple nodes sharing a single IP address. +Do not set `cluster.initial_master_nodes` on master-ineligible nodes. + [IMPORTANT] ==== After the cluster has formed, remove the `cluster.initial_master_nodes` setting -from each node's configuration. It should not be set for master-ineligible -nodes, master-eligible nodes joining an existing cluster, or nodes which are -restarting. - -If you leave `cluster.initial_master_nodes` in place once the cluster has -formed then there is a risk that a future misconfiguration may result in -bootstrapping a new cluster alongside your existing cluster. It may not be -possible to recover from this situation without losing data. +from each node's configuration and never set it again for this cluster. Do not +configure this setting on nodes joining an existing cluster. Do not configure +this setting on nodes which are restarting. Do not configure this setting when +performing a full-cluster restart. + +If you leave `cluster.initial_master_nodes` in place once the cluster has formed +then there is a risk that a future misconfiguration may result in bootstrapping +a new cluster alongside your existing cluster. It may not be possible to recover +from this situation without losing data. ==== The simplest way to create a new cluster is for you to select one of your diff --git a/docs/reference/modules/discovery/discovery-settings.asciidoc b/docs/reference/modules/discovery/discovery-settings.asciidoc index 401d1e7206d35..daf84f0292c12 100644 --- a/docs/reference/modules/discovery/discovery-settings.asciidoc +++ b/docs/reference/modules/discovery/discovery-settings.asciidoc @@ -46,8 +46,11 @@ setting, see <>. Sets the initial set of master-eligible nodes in a brand-new cluster. By default this list is empty, meaning that this node expects to join a cluster that has already been bootstrapped. Remove this setting once the cluster has -formed. Do not use this setting when restarting nodes or when adding new nodes -to an existing cluster. See <>. +formed, and never set it again for this cluster. Do not configure this setting +on master-ineligible nodes. Do not configure this setting on nodes joining an +existing cluster. Do not configure this setting on nodes which are restarting. +Do not configure this setting when performing a full-cluster restart. See +<>. [discrete] ==== Expert settings diff --git a/docs/reference/modules/discovery/fault-detection.asciidoc b/docs/reference/modules/discovery/fault-detection.asciidoc index dfa49e5b0d9af..d12985b70597c 100644 --- a/docs/reference/modules/discovery/fault-detection.asciidoc +++ b/docs/reference/modules/discovery/fault-detection.asciidoc @@ -151,26 +151,25 @@ down, but if they rejoin the cluster without restarting then there is some other problem. {es} is designed to run on a fairly reliable network. It opens a number of TCP -connections between nodes and expects these connections to remain open forever. -If a connection is closed then {es} will try and reconnect, so the occasional -blip should have limited impact on the cluster even if the affected node -briefly leaves the cluster. In contrast, repeatedly-dropped connections will -severely affect its operation. +connections between nodes and expects these connections to remain open +<>. If a connection is closed then {es} will +try and reconnect, so the occasional blip may fail some in-flight operations +but should otherwise have limited impact on the cluster. In contrast, +repeatedly-dropped connections will severely affect its operation. The connections from the elected master node to every other node in the cluster are particularly important. The elected master never spontaneously closes its -outbound connections to other nodes. Similarly, once a connection is fully -established, a node never spontaneously close its inbound connections unless -the node is shutting down. +outbound connections to other nodes. Similarly, once an inbound connection is +fully established, a node never spontaneously it unless the node is shutting +down. If you see a node unexpectedly leave the cluster with the `disconnected` reason, something other than {es} likely caused the connection to close. A common cause is a misconfigured firewall with an improper timeout or another policy that's <>. It could also be caused by general connectivity issues, such as packet loss due to faulty -hardware or network congestion. If you're an advanced user, you can get more -detailed information about network exceptions by configuring the following -loggers: +hardware or network congestion. If you're an advanced user, configure the +following loggers to get more detailed information about network exceptions: [source,yaml] ---- @@ -178,9 +177,11 @@ logger.org.elasticsearch.transport.TcpTransport: DEBUG logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG ---- -In extreme cases, you may need to take packet captures using `tcpdump` to -determine whether messages between nodes are being dropped or rejected by some -other device on the network. +If these logs do not show enough information to diagnose the problem, obtain a +packet capture simultaneously from the nodes at both ends of an unstable +connection and analyse it alongside the {es} logs from those nodes to determine +if traffic between the nodes is being disrupted by another device on the +network. [discrete] ===== Diagnosing `lagging` nodes @@ -299,4 +300,48 @@ To reconstruct the output, base64-decode the data and decompress it using ---- cat shardlock.log | sed -e 's/.*://' | base64 --decode | gzip --decompress ---- -//end::troubleshooting[] \ No newline at end of file + +[discrete] +===== Diagnosing other network disconnections + +{es} is designed to run on a fairly reliable network. It opens a number of TCP +connections between nodes and expects these connections to remain open +<>. If a connection is closed then {es} will +try and reconnect, so the occasional blip may fail some in-flight operations +but should otherwise have limited impact on the cluster. In contrast, +repeatedly-dropped connections will severely affect its operation. + +{es} nodes will only actively close an outbound connection to another node if +the other node leaves the cluster. See +<> for further information about +identifying and troubleshooting this situation. If an outbound connection +closes for some other reason, nodes will log a message such as the following: + +[source,text] +---- +[INFO ][o.e.t.ClusterConnectionManager] [node-1] transport connection to [{node-2}{g3cCUaMDQJmQ2ZLtjr-3dg}{10.0.0.1:9300}] closed by remote +---- + +Similarly, once an inbound connection is fully established, a node never +spontaneously closes it unless the node is shutting down. + +Therefore if you see a node report that a connection to another node closed +unexpectedly, something other than {es} likely caused the connection to close. +A common cause is a misconfigured firewall with an improper timeout or another +policy that's <>. It could also +be caused by general connectivity issues, such as packet loss due to faulty +hardware or network congestion. If you're an advanced user, configure the +following loggers to get more detailed information about network exceptions: + +[source,yaml] +---- +logger.org.elasticsearch.transport.TcpTransport: DEBUG +logger.org.elasticsearch.xpack.core.security.transport.netty4.SecurityNetty4Transport: DEBUG +---- + +If these logs do not show enough information to diagnose the problem, obtain a +packet capture simultaneously from the nodes at both ends of an unstable +connection and analyse it alongside the {es} logs from those nodes to determine +if traffic between the nodes is being disrupted by another device on the +network. +//end::troubleshooting[] diff --git a/docs/reference/modules/gateway.asciidoc b/docs/reference/modules/gateway.asciidoc index d6ee730d5021c..bf7e6de64f093 100644 --- a/docs/reference/modules/gateway.asciidoc +++ b/docs/reference/modules/gateway.asciidoc @@ -4,11 +4,11 @@ The local gateway stores the cluster state and shard data across full cluster restarts. -The following _static_ settings, which must be set on every master node, +The following _static_ settings, which must be set on every <>, control how long a freshly elected master should wait before it tries to -recover the cluster state and the cluster's data. +recover the <> and the cluster's data. -NOTE: These settings only take effect on a full cluster restart. +NOTE: These settings only take effect during a <>. `gateway.expected_data_nodes`:: (<>) diff --git a/docs/reference/modules/indices/circuit_breaker.asciidoc b/docs/reference/modules/indices/circuit_breaker.asciidoc index a5a787e23d170..452d4e99704ce 100644 --- a/docs/reference/modules/indices/circuit_breaker.asciidoc +++ b/docs/reference/modules/indices/circuit_breaker.asciidoc @@ -175,7 +175,8 @@ an `OutOfMemory` exception which would bring down the node. To prevent this from happening, a special <> is used, which limits the memory allocation during the execution of a <> query. When the breaker is triggered, an `org.elasticsearch.common.breaker.CircuitBreakingException` -is thrown and a descriptive error message is returned to the user. +is thrown and a descriptive error message including `circuit_breaking_exception` +is returned to the user. This <> can be configured using the following settings: diff --git a/docs/reference/modules/indices/search-settings.asciidoc b/docs/reference/modules/indices/search-settings.asciidoc index e43ec076578d4..003974815c4bd 100644 --- a/docs/reference/modules/indices/search-settings.asciidoc +++ b/docs/reference/modules/indices/search-settings.asciidoc @@ -33,6 +33,39 @@ a single response. Defaults to 65,536. + Requests that attempt to return more than this limit will return an error. +[[search-settings-only-allowed-scripts]] +`search.aggs.only_allowed_metric_scripts`:: +(<>, boolean) +Configures whether only explicitly allowed scripts can be used in +<>. +Defaults to `false`. ++ +Requests using scripts not contained in either +<> +or +<> +will return an error. + +[[search-settings-allowed-inline-scripts]] +`search.aggs.allowed_inline_metric_scripts`:: +(<>, list of strings) +List of inline scripts that can be used in scripted metrics aggregations when +<> +is set to `true`. +Defaults to an empty list. ++ +Requests using other inline scripts will return an error. + +[[search-settings-allowed-stored-scripts]] +`search.aggs.allowed_stored_metric_scripts`:: +(<>, list of strings) +List of ids of stored scripts that can be used in scripted metrics aggregations when +<> +is set to `true`. +Defaults to an empty list. ++ +Requests using other stored scripts will return an error. + [[indices-query-bool-max-nested-depth]] `indices.query.bool.max_nested_depth`:: (<>, integer) Maximum nested depth of queries. Defaults to `30`. diff --git a/docs/reference/modules/network.asciidoc b/docs/reference/modules/network.asciidoc index 55c236ce43574..db3818bff234d 100644 --- a/docs/reference/modules/network.asciidoc +++ b/docs/reference/modules/network.asciidoc @@ -5,7 +5,9 @@ Each {es} node has two different network interfaces. Clients send requests to {es}'s REST APIs using its <>, but nodes communicate with other nodes using the <>. The transport interface is also used for communication with -<>. +<>. The transport interface uses a custom +binary protocol sent over <> TCP channels. +Both interfaces can be configured to use <>. You can configure both of these interfaces at the same time using the `network.*` settings. If you have a more complicated network, you might need to diff --git a/docs/reference/modules/remote-clusters.asciidoc b/docs/reference/modules/remote-clusters.asciidoc index 25217302b7631..ca1c507aa4ed9 100644 --- a/docs/reference/modules/remote-clusters.asciidoc +++ b/docs/reference/modules/remote-clusters.asciidoc @@ -1,7 +1,7 @@ [[remote-clusters]] == Remote clusters You can connect a local cluster to other {es} clusters, known as _remote -clusters_. Remote clusters can be located in different datacenters or +clusters_. Remote clusters can be located in different datacenters or geographic regions, and contain indices or data streams that can be replicated with {ccr} or searched by a local cluster using {ccs}. @@ -30,9 +30,9 @@ capabilities, the local and remote cluster must be on the same [discrete] === Add remote clusters -NOTE: The instructions that follow describe how to create a remote connection from a -self-managed cluster. You can also set up {ccs} and {ccr} from an -link:https://www.elastic.co/guide/en/cloud/current/ec-enable-ccs.html[{ess} deployment] +NOTE: The instructions that follow describe how to create a remote connection from a +self-managed cluster. You can also set up {ccs} and {ccr} from an +link:https://www.elastic.co/guide/en/cloud/current/ec-enable-ccs.html[{ess} deployment] or from an link:https://www.elastic.co/guide/en/cloud-enterprise/current/ece-enable-ccs.html[{ece} deployment]. To add remote clusters, you can choose between @@ -52,7 +52,7 @@ controls. <>. Certificate based security model:: Uses mutual TLS authentication for cross-cluster operations. User authentication -is performed on the local cluster and a user's role names are passed to the +is performed on the local cluster and a user's role names are passed to the remote cluster. In this model, a superuser on the local cluster gains total read access to the remote cluster, so it is only suitable for clusters that are in the same security domain. <>. @@ -63,13 +63,17 @@ the same security domain. <>. [[sniff-mode]] Sniff mode:: -In sniff mode, a cluster is created using a name and a list of seed nodes. When -a remote cluster is registered, its cluster state is retrieved from one of the -seed nodes and up to three _gateway nodes_ are selected as part of remote -cluster requests. This mode requires that the gateway node's publish addresses -are accessible by the local cluster. +In sniff mode, a cluster alias is registered with a name of your choosing and a +list of addresses of _seed_ nodes specified with the +`cluster.remote..seeds` setting. When you register a remote +cluster using sniff mode, {es} retrieves from one of the seed nodes the +addresses of up to three _gateway nodes_. Each `remote_cluster_client` node in +the local {es} cluster then opens several TCP connections to the publish +addresses of the gateway nodes. This mode therefore requires that the gateway +nodes' publish addresses are accessible to nodes in the local cluster. + -Sniff mode is the default connection mode. +Sniff mode is the default connection mode. See <> +for more information about configuring sniff mode. + [[gateway-nodes-selection]] The _gateway nodes_ selection depends on the following criteria: @@ -84,13 +88,21 @@ However, such nodes still have to satisfy the two above requirements. [[proxy-mode]] Proxy mode:: -In proxy mode, a cluster is created using a name and a single proxy address. -When you register a remote cluster, a configurable number of socket connections -are opened to the proxy address. The proxy is required to route those -connections to the remote cluster. Proxy mode does not require remote cluster -nodes to have accessible publish addresses. +In proxy mode, a cluster alias is registered with a name of your choosing and +the address of a TCP (layer 4) reverse proxy specified with the +`cluster.remote..proxy_address` setting. You must configure this +proxy to route connections to one or more nodes of the remote cluster. When you +register a remote cluster using proxy mode, {es} opens several TCP connections +to the proxy address and uses these connections to communicate with the remote +cluster. In proxy mode {es} disregards the publish addresses of the remote +cluster nodes which means that the publish addresses of the remote cluster +nodes need not be accessible to the local cluster. ++ +Proxy mode is not the default connection mode, so you must set +`cluster.remote..mode: proxy` to use it. See +<> for more information about configuring proxy +mode. + -The proxy mode is not the default connection mode and must be configured. Proxy mode has the same <> as sniff mode. diff --git a/docs/reference/modules/shard-allocation-desc.asciidoc b/docs/reference/modules/shard-allocation-desc.asciidoc new file mode 100644 index 0000000000000..426ad0da72e1b --- /dev/null +++ b/docs/reference/modules/shard-allocation-desc.asciidoc @@ -0,0 +1,2 @@ +Shard allocation is the process of assigning shard copies to nodes. This can +happen during initial recovery, replica allocation, rebalancing, when nodes are added to or removed from the cluster, or when cluster or index settings that impact allocation are updated. \ No newline at end of file diff --git a/docs/reference/modules/shard-ops.asciidoc b/docs/reference/modules/shard-ops.asciidoc new file mode 100644 index 0000000000000..c0e5ee6a220f0 --- /dev/null +++ b/docs/reference/modules/shard-ops.asciidoc @@ -0,0 +1,75 @@ +[[shard-allocation-relocation-recovery]] +=== Shard allocation, relocation, and recovery + +Each <> in Elasticsearch is divided into one or more <>. +Each document in an index belongs to a single shard. + +A cluster can contain multiple copies of a shard. Each shard has one distinguished shard copy called the _primary_, and zero or more non-primary copies called _replicas_. The primary shard copy serves as the main entry point for all indexing operations. The operations on the primary shard copy are then forwarded to its replicas. + +Replicas maintain redundant copies of your data across the <> in your cluster, protecting against hardware failure and increasing capacity to serve read requests like searching or retrieving a document. If the primary shard copy fails, then a replica is promoted to primary and takes over the primary's responsibilities. + +Over the course of normal operation, Elasticsearch allocates shard copies to nodes, relocates shard copies across nodes to balance the cluster or satisfy new allocation constraints, and recovers shards to initialize new copies. In this topic, you'll learn how these operations work and how you can control them. + +TIP: To learn about optimizing the number and size of shards in your cluster, refer to <>. To learn about how read and write operations are replicated across shards and shard copies, refer to <>. + +[[shard-allocation]] +==== Shard allocation + +include::{es-ref-dir}/modules/shard-allocation-desc.asciidoc[] + +By default, the primary and replica shard copies for an index can be allocated to any node in the cluster, and may be relocated to rebalance the cluster. + +===== Adjust shard allocation settings + +You can control how shard copies are allocated using the following settings: + +- <>: Use these settings to control how shard copies are allocated and balanced across the entire cluster. For example, you might want to allocate nodes availability zones, or prevent certain nodes from being used so you can perform maintenance. + +- <>: Use these settings to control how the shard copies for a specific index are allocated. For example, you might want to allocate an index to a node in a specific data tier, or to an node with specific attributes. + +===== Monitor shard allocation + +If a shard copy is unassigned, it means that the shard copy is not allocated to any node in the cluster. This can happen if there are not enough nodes in the cluster to allocate the shard copy, or if the shard copy can't be allocated to any node that satisfies the shard allocation filtering rules. When a shard copy is unassigned, your cluster is considered unhealthy and returns a yellow or red cluster health status. + +You can use the following APIs to monitor shard allocation: + +- <> +- <> +- <> + +<>. + +[[shard-recovery]] +==== Shard recovery + +include::{es-ref-dir}/modules/shard-recovery-desc.asciidoc[] + +===== Adjust shard recovery settings + +To control how shards are recovered, for example the resources that can be used by recovery operations, and which indices should be prioritized for recovery, you can adjust the following settings: + +- <> +- <> +- <>, including <> and <> + +Shard recovery operations also respect general shard allocation settings. + +===== Monitor shard recovery + +You can use the following APIs to monitor shard allocation: + + - View a list of in-progress and completed recoveries using the <> + - View detailed information about a specific recovery using the <> + +[[shard-relocation]] +==== Shard relocation + +Shard relocation is the process of moving shard copies from one node to another. This can happen when a node joins or leaves the cluster, or when the cluster is rebalancing. + +When a shard copy is relocated, it is created as a new shard copy on the target node. When the shard copy is fully allocated and recovered, the old shard copy is deleted. If the shard copy being relocated is a primary, then the new shard copy is marked as primary before the old shard copy is deleted. + +===== Adjust shard relocation settings + +You can control how and when shard copies are relocated. For example, you can adjust the rebalancing settings that control when shard copies are relocated to balance the cluster, or the high watermark for disk-based shard allocation that can trigger relocation. These settings are part of the <>. + +Shard relocation operations also respect shard allocation and recovery settings. \ No newline at end of file diff --git a/docs/reference/modules/shard-recovery-desc.asciidoc b/docs/reference/modules/shard-recovery-desc.asciidoc new file mode 100644 index 0000000000000..67eaceb528962 --- /dev/null +++ b/docs/reference/modules/shard-recovery-desc.asciidoc @@ -0,0 +1,16 @@ +Shard recovery is the process of initializing a shard copy, such as restoring a +primary shard from a snapshot or creating a replica shard from a primary shard. +When a shard recovery completes, the recovered shard is available for search +and indexing. + +Recovery automatically occurs during the following processes: + +* When creating an index for the first time. +* When a node rejoins the cluster and starts up any missing primary shard copies using the data that it holds in its data path. +* Creation of new replica shard copies from the primary. +* Relocation of a shard copy to a different node in the same cluster. +* A <> operation. +* A <>, <>, or +<> operation. + +You can determine the cause of a shard recovery using the <> or <> APIs. \ No newline at end of file diff --git a/docs/reference/modules/threadpool.asciidoc b/docs/reference/modules/threadpool.asciidoc index ed4becbfbb6d0..2d4110bdcb431 100644 --- a/docs/reference/modules/threadpool.asciidoc +++ b/docs/reference/modules/threadpool.asciidoc @@ -121,6 +121,11 @@ There are several thread pools, but the important ones include: `min(5 * (`<>`), 50)` and queue_size of `1000`. +[[modules-threadpool-esql]]`esql_worker`:: + Executes <> operations. Thread pool type is `fixed` with a + size of `int((`<> + `pass:[ * ]3) / 2) + 1`, and queue_size of `1000`. + Thread pool settings are <> and can be changed by editing `elasticsearch.yml`. Changing a specific thread pool can be done by setting its type-specific parameters; for example, changing the number of diff --git a/docs/reference/monitoring/production.asciidoc b/docs/reference/monitoring/production.asciidoc index 381f67e254041..86ffa99fa7f59 100644 --- a/docs/reference/monitoring/production.asciidoc +++ b/docs/reference/monitoring/production.asciidoc @@ -73,7 +73,9 @@ credentials must be valid on both the {kib} server and the monitoring cluster. *** If you plan to use {agent}, create a user that has the `remote_monitoring_collector` -<>. +<> and that the +monitoring related {fleet-guide}/install-uninstall-integration-assets.html#install-integration-assets[integration assets have been installed] +on the remote monitoring cluster. *** If you plan to use {metricbeat}, create a user that has the `remote_monitoring_collector` built-in role and a diff --git a/docs/reference/query-dsl.asciidoc b/docs/reference/query-dsl.asciidoc index 4d5504e5fe7ae..2f8f07f21f648 100644 --- a/docs/reference/query-dsl.asciidoc +++ b/docs/reference/query-dsl.asciidoc @@ -72,14 +72,12 @@ include::query-dsl/match-all-query.asciidoc[] include::query-dsl/span-queries.asciidoc[] +include::query-dsl/vector-queries.asciidoc[] + include::query-dsl/special-queries.asciidoc[] include::query-dsl/term-level-queries.asciidoc[] -include::query-dsl/text-expansion-query.asciidoc[] - -include::query-dsl/sparse-vector-query.asciidoc[] - include::query-dsl/minimum-should-match.asciidoc[] include::query-dsl/multi-term-rewrite.asciidoc[] diff --git a/docs/reference/query-dsl/intervals-query.asciidoc b/docs/reference/query-dsl/intervals-query.asciidoc index 63ba4046a395d..1e3380389d861 100644 --- a/docs/reference/query-dsl/intervals-query.asciidoc +++ b/docs/reference/query-dsl/intervals-query.asciidoc @@ -397,68 +397,3 @@ This query does *not* match a document containing the phrase `hot porridge is salty porridge`, because the intervals returned by the match query for `hot porridge` only cover the initial two terms in this document, and these do not overlap the intervals covering `salty`. - -Another restriction to be aware of is the case of `any_of` rules that contain -sub-rules which overlap. In particular, if one of the rules is a strict -prefix of the other, then the longer rule can never match, which can -cause surprises when used in combination with `max_gaps`. Consider the -following query, searching for `the` immediately followed by `big` or `big bad`, -immediately followed by `wolf`: - -[source,console] --------------------------------------------------- -POST _search -{ - "query": { - "intervals" : { - "my_text" : { - "all_of" : { - "intervals" : [ - { "match" : { "query" : "the" } }, - { "any_of" : { - "intervals" : [ - { "match" : { "query" : "big" } }, - { "match" : { "query" : "big bad" } } - ] } }, - { "match" : { "query" : "wolf" } } - ], - "max_gaps" : 0, - "ordered" : true - } - } - } - } -} --------------------------------------------------- - -Counter-intuitively, this query does *not* match the document `the big bad -wolf`, because the `any_of` rule in the middle only produces intervals -for `big` - intervals for `big bad` being longer than those for `big`, while -starting at the same position, and so being minimized away. In these cases, -it's better to rewrite the query so that all of the options are explicitly -laid out at the top level: - -[source,console] --------------------------------------------------- -POST _search -{ - "query": { - "intervals" : { - "my_text" : { - "any_of" : { - "intervals" : [ - { "match" : { - "query" : "the big bad wolf", - "ordered" : true, - "max_gaps" : 0 } }, - { "match" : { - "query" : "the big wolf", - "ordered" : true, - "max_gaps" : 0 } } - ] - } - } - } - } -} --------------------------------------------------- diff --git a/docs/reference/query-dsl/knn-query.asciidoc b/docs/reference/query-dsl/knn-query.asciidoc index 05a00b9949912..daf9e9499a189 100644 --- a/docs/reference/query-dsl/knn-query.asciidoc +++ b/docs/reference/query-dsl/knn-query.asciidoc @@ -241,7 +241,7 @@ to <>: * kNN search over nested dense_vectors diversifies the top results over the top-level document * `filter` over the top-level document metadata is supported and acts as a -post-filter +pre-filter * `filter` over `nested` field metadata is not supported A sample query can look like below: diff --git a/docs/reference/query-dsl/query-string-query.asciidoc b/docs/reference/query-dsl/query-string-query.asciidoc index 319ede7c4ac05..b45247ace3735 100644 --- a/docs/reference/query-dsl/query-string-query.asciidoc +++ b/docs/reference/query-dsl/query-string-query.asciidoc @@ -30,7 +30,7 @@ If you don't need to support a query syntax, consider using the syntax, use the <> query, which is less strict. ==== - + [[query-string-query-ex-request]] ==== Example request @@ -83,7 +83,7 @@ could be expensive. There is a limit on the number of fields times terms that can be queried at once. It is defined by the `indices.query.bool.max_clause_count` -<>, which defaults to 4096. +<>. ==== -- diff --git a/docs/reference/query-dsl/semantic-query.asciidoc b/docs/reference/query-dsl/semantic-query.asciidoc index d0eb2da95ebc6..22b5e6c5e6aad 100644 --- a/docs/reference/query-dsl/semantic-query.asciidoc +++ b/docs/reference/query-dsl/semantic-query.asciidoc @@ -128,6 +128,10 @@ If you want to fine-tune a search on a `semantic_text` field, you need to know t You can find the task type using the <>, and check the `task_type` associated with the {infer} service. Depending on the `task_type`, use either the <> or the <> query for greater flexibility and customization. +NOTE: While it is possible to use the `sparse_vector` query or the `knn` query +on a `semantic_text` field, it is not supported to use the `semantic_query` on a +`sparse_vector` or `dense_vector` field type. + [discrete] [[search-sparse-inference]] diff --git a/docs/reference/query-dsl/span-multi-term-query.asciidoc b/docs/reference/query-dsl/span-multi-term-query.asciidoc index aefb3e4b75eb5..5a5f0e1f5ff99 100644 --- a/docs/reference/query-dsl/span-multi-term-query.asciidoc +++ b/docs/reference/query-dsl/span-multi-term-query.asciidoc @@ -39,7 +39,8 @@ GET /_search -------------------------------------------------- WARNING: `span_multi` queries will hit too many clauses failure if the number of terms that match the query exceeds the -boolean query limit (defaults to 4096).To avoid an unbounded expansion you can set the <>. +To avoid an unbounded expansion you can set the <> of the multi term query to `top_terms_*` rewrite. Or, if you use `span_multi` on `prefix` query only, you can activate the <> field option of the `text` field instead. This will rewrite any prefix query on the field to a single term query that matches the indexed prefix. diff --git a/docs/reference/query-dsl/sparse-vector-query.asciidoc b/docs/reference/query-dsl/sparse-vector-query.asciidoc index 80616ff174e36..399cf29d4dd12 100644 --- a/docs/reference/query-dsl/sparse-vector-query.asciidoc +++ b/docs/reference/query-dsl/sparse-vector-query.asciidoc @@ -1,5 +1,5 @@ [[query-dsl-sparse-vector-query]] -== Sparse vector query +=== Sparse vector query ++++ Sparse vector @@ -19,7 +19,7 @@ For example, a stored vector `{"feature_0": 0.12, "feature_1": 1.2, "feature_2": [discrete] [[sparse-vector-query-ex-request]] -=== Example request using an {nlp} model +==== Example request using an {nlp} model [source,console] ---- @@ -37,7 +37,7 @@ GET _search // TEST[skip: Requires inference] [discrete] -=== Example request using precomputed vectors +==== Example request using precomputed vectors [source,console] ---- @@ -55,7 +55,7 @@ GET _search [discrete] [[sparse-vector-field-params]] -=== Top level parameters for `sparse_vector` +==== Top level parameters for `sparse_vector` `field`:: (Required, string) The name of the field that contains the token-weight pairs to be searched against. @@ -104,7 +104,7 @@ Default: `5`. `tokens_weight_threshold`:: (Optional, float) preview:[] -Tokens whose weight is less than `tokens_weight_threshold` are considered nonsignificant and pruned. +Tokens whose weight is less than `tokens_weight_threshold` are considered insignificant and pruned. This value must be between 0 and 1. Default: `0.4`. @@ -120,7 +120,7 @@ NOTE: The default values for `tokens_freq_ratio_threshold` and `tokens_weight_th [discrete] [[sparse-vector-query-example]] -=== Example ELSER query +==== Example ELSER query The following is an example of the `sparse_vector` query that references the ELSER model to perform semantic search. For a more detailed description of how to perform semantic search by using ELSER and the `sparse_vector` query, refer to <>. @@ -241,7 +241,7 @@ GET my-index/_search [discrete] [[sparse-vector-query-with-pruning-config-and-rescore-example]] -=== Example ELSER query with pruning configuration and rescore +==== Example ELSER query with pruning configuration and rescore The following is an extension to the above example that adds a preview:[] pruning configuration to the `sparse_vector` query. The pruning configuration identifies non-significant tokens to prune from the query in order to improve query performance. diff --git a/docs/reference/query-dsl/special-queries.asciidoc b/docs/reference/query-dsl/special-queries.asciidoc index 90cd9a696a6d9..a6d35d4f9b707 100644 --- a/docs/reference/query-dsl/special-queries.asciidoc +++ b/docs/reference/query-dsl/special-queries.asciidoc @@ -17,10 +17,6 @@ or collection of documents. This query finds queries that are stored as documents that match with the specified document. -<>:: -A query that finds the _k_ nearest vectors to a query -vector, as measured by a similarity metric. - <>:: A query that computes scores based on the values of numeric features and is able to efficiently skip non-competitive hits. @@ -32,9 +28,6 @@ This query allows a script to act as a filter. Also see the <>:: A query that allows to modify the score of a sub-query with a script. -<>:: -A query that allows you to perform semantic search. - <>:: A query that accepts other queries as json or yaml string. @@ -50,20 +43,14 @@ include::mlt-query.asciidoc[] include::percolate-query.asciidoc[] -include::knn-query.asciidoc[] - include::rank-feature-query.asciidoc[] include::script-query.asciidoc[] include::script-score-query.asciidoc[] -include::semantic-query.asciidoc[] - include::wrapper-query.asciidoc[] include::pinned-query.asciidoc[] include::rule-query.asciidoc[] - -include::weighted-tokens-query.asciidoc[] diff --git a/docs/reference/query-dsl/text-expansion-query.asciidoc b/docs/reference/query-dsl/text-expansion-query.asciidoc index 1c51429b5aa22..235a413df686f 100644 --- a/docs/reference/query-dsl/text-expansion-query.asciidoc +++ b/docs/reference/query-dsl/text-expansion-query.asciidoc @@ -1,5 +1,5 @@ [[query-dsl-text-expansion-query]] -== Text expansion query +=== Text expansion query ++++ Text expansion @@ -12,7 +12,7 @@ The text expansion query uses a {nlp} model to convert the query text into a lis [discrete] [[text-expansion-query-ex-request]] -=== Example request +==== Example request [source,console] ---- @@ -32,14 +32,14 @@ GET _search [discrete] [[text-expansion-query-params]] -=== Top level parameters for `text_expansion` +==== Top level parameters for `text_expansion` ``::: (Required, object) The name of the field that contains the token-weight pairs the NLP model created based on the input text. [discrete] [[text-expansion-rank-feature-field-params]] -=== Top level parameters for `` +==== Top level parameters for `` `model_id`:::: (Required, string) The ID of the model to use to convert the query text into token-weight pairs. @@ -68,7 +68,7 @@ Default: `5`. `tokens_weight_threshold`:: (Optional, float) preview:[] -Tokens whose weight is less than `tokens_weight_threshold` are considered nonsignificant and pruned. +Tokens whose weight is less than `tokens_weight_threshold` are considered insignificant and pruned. This value must be between 0 and 1. Default: `0.4`. @@ -84,7 +84,7 @@ NOTE: The default values for `tokens_freq_ratio_threshold` and `tokens_weight_th [discrete] [[text-expansion-query-example]] -=== Example ELSER query +==== Example ELSER query The following is an example of the `text_expansion` query that references the ELSER model to perform semantic search. For a more detailed description of how to perform semantic search by using ELSER and the `text_expansion` query, refer to <>. @@ -208,7 +208,7 @@ GET my-index/_search [discrete] [[text-expansion-query-with-pruning-config-and-rescore-example]] -=== Example ELSER query with pruning configuration and rescore +==== Example ELSER query with pruning configuration and rescore The following is an extension to the above example that adds a preview:[] pruning configuration to the `text_expansion` query. The pruning configuration identifies non-significant tokens to prune from the query in order to improve query performance. diff --git a/docs/reference/query-dsl/vector-queries.asciidoc b/docs/reference/query-dsl/vector-queries.asciidoc new file mode 100644 index 0000000000000..fe9f380eeb621 --- /dev/null +++ b/docs/reference/query-dsl/vector-queries.asciidoc @@ -0,0 +1,37 @@ +[[vector-queries]] +== Vector queries + +Vector queries are specialized queries that work on vector fields to efficiently perform <>. + +<>:: +A query that finds the _k_ nearest vectors to a query vector for <> fields, as measured by a similarity metric. + +<>:: +A query used to search <> field types. + +<>:: +A query that allows you to perform semantic search on <> fields. + +[discrete] +=== Deprecated vector queries + +The following queries have been deprecated and will be removed in the near future. +Use the <> query instead. + +<>:: +A query that allows you to perform sparse vector search on <> or <> fields. + +<>:: +Allows to perform text expansion queries optimizing for performance. + +include::knn-query.asciidoc[] + +include::sparse-vector-query.asciidoc[] + +include::semantic-query.asciidoc[] + +include::text-expansion-query.asciidoc[] + +include::weighted-tokens-query.asciidoc[] + + diff --git a/docs/reference/query-dsl/weighted-tokens-query.asciidoc b/docs/reference/query-dsl/weighted-tokens-query.asciidoc index d4318665a9778..fb051f4229df6 100644 --- a/docs/reference/query-dsl/weighted-tokens-query.asciidoc +++ b/docs/reference/query-dsl/weighted-tokens-query.asciidoc @@ -58,7 +58,7 @@ This value must between 1 and 100. Default: `5`. `tokens_weight_threshold`:: -(Optional, float) Tokens whose weight is less than `tokens_weight_threshold` are considered nonsignificant and pruned. +(Optional, float) Tokens whose weight is less than `tokens_weight_threshold` are considered insignificant and pruned. This value must be between 0 and 1. Default: `0.4`. diff --git a/docs/reference/query-rules/apis/index.asciidoc b/docs/reference/query-rules/apis/index.asciidoc index f7303647f8515..53d5fc3dc4eee 100644 --- a/docs/reference/query-rules/apis/index.asciidoc +++ b/docs/reference/query-rules/apis/index.asciidoc @@ -1,8 +1,6 @@ [[query-rules-apis]] == Query rules APIs -preview::[] - ++++ Query rules APIs ++++ diff --git a/docs/reference/query-rules/apis/put-query-rule.asciidoc b/docs/reference/query-rules/apis/put-query-rule.asciidoc index 2b9a6ba892b84..9737673be009c 100644 --- a/docs/reference/query-rules/apis/put-query-rule.asciidoc +++ b/docs/reference/query-rules/apis/put-query-rule.asciidoc @@ -70,10 +70,10 @@ Matches all queries, regardless of input. -- - `metadata` (Optional, string) The metadata field to match against. This metadata will be used to match against `match_criteria` sent in the <>. -Required for all criteria types except `global`. +Required for all criteria types except `always`. - `values` (Optional, array of strings) The values to match against the metadata field. Only one value must match for the criteria to be met. -Required for all criteria types except `global`. +Required for all criteria types except `always`. `actions`:: (Required, object) The actions to take when the rule is matched. diff --git a/docs/reference/query-rules/apis/put-query-ruleset.asciidoc b/docs/reference/query-rules/apis/put-query-ruleset.asciidoc index 012060e1004ae..c164e9e140a4e 100644 --- a/docs/reference/query-rules/apis/put-query-ruleset.asciidoc +++ b/docs/reference/query-rules/apis/put-query-ruleset.asciidoc @@ -78,10 +78,10 @@ Matches all queries, regardless of input. -- - `metadata` (Optional, string) The metadata field to match against. This metadata will be used to match against `match_criteria` sent in the <>. -Required for all criteria types except `global`. +Required for all criteria types except `always`. - `values` (Optional, array of strings) The values to match against the metadata field. Only one value must match for the criteria to be met. -Required for all criteria types except `global`. +Required for all criteria types except `always`. Actions depend on the rule type. For `pinned` rules, actions follow the format specified by the <>. diff --git a/docs/reference/quickstart/getting-started.asciidoc b/docs/reference/quickstart/getting-started.asciidoc index 6b3095e07f9d4..1225d4a964659 100644 --- a/docs/reference/quickstart/getting-started.asciidoc +++ b/docs/reference/quickstart/getting-started.asciidoc @@ -1,103 +1,149 @@ [[getting-started]] -== Quick start guide +== Index and search data using {es} APIs +++++ +Basics: Index and search using APIs +++++ -This guide helps you learn how to: +This quick start guide is a hands-on introduction to the fundamental concepts of Elasticsearch: <>. -* Run {es} and {kib} (using {ecloud} or in a local Docker dev environment), -* add simple (non-timestamped) dataset to {es}, -* run basic searches. +You'll learn how to create an index, add data as documents, work with dynamic and explicit mappings, and perform your first basic searches. [TIP] ==== -If you're interested in using {es} with Python, check out Elastic Search Labs. This is the best place to explore AI-powered search use cases, such as working with embeddings, vector search, and retrieval augmented generation (RAG). - -* https://www.elastic.co/search-labs/tutorials/search-tutorial/welcome[Tutorial]: this walks you through building a complete search solution with {es}, from the ground up. -* https://github.com/elastic/elasticsearch-labs[`elasticsearch-labs` repository]: it contains a range of Python https://github.com/elastic/elasticsearch-labs/tree/main/notebooks[notebooks] and https://github.com/elastic/elasticsearch-labs/tree/main/example-apps[example apps]. +The code examples in this tutorial are in {kibana-ref}/console-kibana.html[Console] syntax by default. ==== [discrete] -[[run-elasticsearch]] -=== Run {es} +[[getting-started-prerequisites]] +=== Prerequisites + +Before you begin, you need to have a running {es} cluster. +The fastest way to get started is with a <>. +Refer to <> for other deployment options. -The simplest way to set up {es} is to create a managed deployment with {ess} on -{ecloud}. If you prefer to manage your own test environment, install and -run {es} using Docker. +//// +[source,console] +---- +PUT books +PUT my-explicit-mappings-books +---- +// TESTSETUP -include::{es-ref-dir}/tab-widgets/code.asciidoc[] -include::{es-ref-dir}/tab-widgets/quick-start-install-widget.asciidoc[] +[source,console] +-------------------------------------------------- +DELETE books +DELETE my-explicit-mappings-books +-------------------------------------------------- +// TEARDOWN + +//// [discrete] -[[send-requests-to-elasticsearch]] -=== Send requests to {es} +[[getting-started-index-creation]] +=== Step 1: Create an index + +Create a new index named `books`: -You send data and other requests to {es} using REST APIs. This lets you interact -with {es} using any client that sends HTTP requests, such as -https://curl.se[curl]. You can also use {kib}'s Console to send requests to -{es}. +[source,console] +---- +PUT /books +---- +// TEST[skip: index already setup] -include::{es-ref-dir}/tab-widgets/api-call-widget.asciidoc[] +The following response indicates the index was created successfully. + +.Example response +[%collapsible] +=============== +[source,console-result] +---- +{ + "acknowledged": true, + "shards_acknowledged": true, + "index": "books" +} +---- +// TEST[skip: index already setup] +=============== [discrete] -[[add-data]] -=== Add data +[[getting-started-add-documents]] +=== Step 2: Add data to your index + +[TIP] +==== +This tutorial uses {es} APIs, but there are many other ways to +<>. +==== -You add data to {es} as JSON objects called documents. {es} stores these +You add data to {es} as JSON objects called documents. +{es} stores these documents in searchable indices. [discrete] -[[add-single-document]] +[[getting-started-add-single-document]] ==== Add a single document Submit the following indexing request to add a single document to the `books` index. -The request automatically creates the index. -//// -[source,console] ----- -PUT books ----- -// TESTSETUP -//// +[TIP] +==== +If the index didn't already exist, this request would automatically create it. +==== [source,console] ---- POST books/_doc -{"name": "Snow Crash", "author": "Neal Stephenson", "release_date": "1992-06-01", "page_count": 470} +{ + "name": "Snow Crash", + "author": "Neal Stephenson", + "release_date": "1992-06-01", + "page_count": 470 +} ---- -// TEST[s/_doc/_doc?refresh=wait_for/] +// TEST[continued] -The response includes metadata that {es} generates for the document including a unique `_id` for the document within the index. +The response includes metadata that {es} generates for the document, including a unique `_id` for the document within the index. -.Expand to see example response +.Example response [%collapsible] =============== [source,console-result] ---- { - "_index": "books", - "_id": "O0lG2IsBaSa7VYx_rEia", - "_version": 1, - "result": "created", - "_shards": { - "total": 2, - "successful": 2, - "failed": 0 + "_index": "books", <1> + "_id": "O0lG2IsBaSa7VYx_rEia", <2> + "_version": 1, <3> + "result": "created", <4> + "_shards": { <5> + "total": 2, <6> + "successful": 2, <7> + "failed": 0 <8> }, - "_seq_no": 0, - "_primary_term": 1 + "_seq_no": 0, <9> + "_primary_term": 1 <10> } ---- -// TEST[skip:TODO] +// TEST[s/O0lG2IsBaSa7VYx_rEia/*/] +<1> The `_index` field indicates the index the document was added to. +<2> The `_id` field is the unique identifier for the document. +<3> The `_version` field indicates the version of the document. +<4> The `result` field indicates the result of the indexing operation. +<5> The `_shards` field contains information about the number of <> that the indexing operation was executed on and the number that succeeded. +<6> The `total` field indicates the total number of shards for the index. +<7> The `successful` field indicates the number of shards that the indexing operation was executed on. +<8> The `failed` field indicates the number of shards that failed during the indexing operation. '0' indicates no failures. +<9> The `_seq_no` field holds a monotonically increasing number incremented for each indexing operation on a shard. +<10> The `_primary_term` field is a monotonically increasing number incremented each time a primary shard is assigned to a different node. =============== [discrete] -[[add-multiple-documents]] +[[getting-started-add-multiple-documents]] ==== Add multiple documents -Use the `_bulk` endpoint to add multiple documents in one request. Bulk data -must be newline-delimited JSON (NDJSON). Each line must end in a newline -character (`\n`), including the last line. +Use the <> to add multiple documents in one request. Bulk data +must be formatted as newline-delimited JSON (NDJSON). [source,console] ---- @@ -117,7 +163,7 @@ POST /_bulk You should receive a response indicating there were no errors. -.Expand to see example response +.Example response [%collapsible] =============== [source,console-result] @@ -213,33 +259,221 @@ You should receive a response indicating there were no errors. =============== [discrete] -[[qs-search-data]] -=== Search data +[[getting-started-mappings-and-data-types]] +=== Step 3: Define mappings and data types + +<> define how data is stored and indexed in {es}, like a schema in a relational database. + +[discrete] +[[getting-started-dynamic-mapping]] +==== Use dynamic mapping + +When using dynamic mapping, {es} automatically creates mappings for new fields by default. +The documents we've added so far have used dynamic mapping, because we didn't specify a mapping when creating the index. + +To see how dynamic mapping works, add a new document to the `books` index with a field that doesn't appear in the existing documents. + +[source,console] +---- +POST /books/_doc +{ + "name": "The Great Gatsby", + "author": "F. Scott Fitzgerald", + "release_date": "1925-04-10", + "page_count": 180, + "language": "EN" <1> +} +---- +// TEST[continued] +<1> The new field. + +View the mapping for the `books` index with the <>. The new field `new_field` has been added to the mapping with a `text` data type. + +[source,console] +---- +GET /books/_mapping +---- +// TEST[continued] + +.Example response +[%collapsible] +=============== +[source,console-result] +---- +{ + "books": { + "mappings": { + "properties": { + "author": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "name": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "new_field": { + "type": "text", + "fields": { + "keyword": { + "type": "keyword", + "ignore_above": 256 + } + } + }, + "page_count": { + "type": "long" + }, + "release_date": { + "type": "date" + } + } + } + } +} +---- +// TEST[continued] +=============== + +[discrete] +[[getting-started-explicit-mapping]] +==== Define explicit mapping -Indexed documents are available for search in near real-time. +Create an index named `my-explicit-mappings-books` with explicit mappings. +Pass each field's properties as a JSON object. This object should contain the <> and any additional <>. + +[source,console] +---- +PUT /my-explicit-mappings-books +{ + "mappings": { + "dynamic": false, <1> + "properties": { <2> + "name": { "type": "text" }, + "author": { "type": "text" }, + "release_date": { "type": "date", "format": "yyyy-MM-dd" }, + "page_count": { "type": "integer" } + } + } +} +---- +// TEST[continued] +<1> Disables dynamic mapping for the index. Documents containing fields not defined in the mapping will be rejected. +<2> The `properties` object defines the fields and their data types for documents in this index. + +.Example response +[%collapsible] +=============== +[source,console-result] +---- +{ + "acknowledged": true, + "shards_acknowledged": true, + "index": "my-explicit-mappings-books" +} +---- +// TEST[skip:already created in setup] +=============== + +[discrete] +[[getting-started-combined-mapping]] +==== Combine dynamic and explicit mappings + +Explicit mappings are defined at index creation, and documents must conform to these mappings. +You can also use the <>. +When an index has the `dynamic` flag set to `true`, you can add new fields to documents without updating the mapping. + +This allows you to combine explicit and dynamic mappings. +Learn more about <>. + +[discrete] +[[getting-started-search-data]] +=== Step 4: Search your index + +Indexed documents are available for search in near real-time, using the <>. +// TODO: You'll find more detailed quick start guides in TODO [discrete] -[[search-all-documents]] +[[getting-started-search-all-documents]] ==== Search all documents Run the following command to search the `books` index for all documents: + [source,console] ---- GET books/_search ---- // TEST[continued] -The `_source` of each hit contains the original -JSON object submitted during indexing. +.Example response +[%collapsible] +=============== +[source,console-result] +---- +{ + "took": 2, <1> + "timed_out": false, <2> + "_shards": { <3> + "total": 5, + "successful": 5, + "skipped": 0, + "failed": 0 + }, + "hits": { <4> + "total": { <5> + "value": 7, + "relation": "eq" + }, + "max_score": 1, <6> + "hits": [ + { + "_index": "books", <7> + "_id": "CwICQpIBO6vvGGiC_3Ls", <8> + "_score": 1, <9> + "_source": { <10> + "name": "Brave New World", + "author": "Aldous Huxley", + "release_date": "1932-06-01", + "page_count": 268 + } + }, + ... (truncated) + ] + } +} +---- +// TEST[continued] +<1> The `took` field indicates the time in milliseconds for {es} to execute the search +<2> The `timed_out` field indicates whether the search timed out +<3> The `_shards` field contains information about the number of <> that the search was executed on and the number that succeeded +<4> The `hits` object contains the search results +<5> The `total` object provides information about the total number of matching documents +<6> The `max_score` field indicates the highest relevance score among all matching documents +<7> The `_index` field indicates the index the document belongs to +<8> The `_id` field is the document's unique identifier +<9> The `_score` field indicates the relevance score of the document +<10> The `_source` field contains the original JSON object submitted during indexing +=============== [discrete] -[[qs-match-query]] +[[getting-started-match-query]] ==== `match` query -You can use the `match` query to search for documents that contain a specific value in a specific field. -This is the standard query for performing full-text search, including fuzzy matching and phrase searches. +You can use the <> to search for documents that contain a specific value in a specific field. +This is the standard query for full-text searches. Run the following command to search the `books` index for documents containing `brave` in the `name` field: + [source,console] ---- GET books/_search @@ -253,32 +487,63 @@ GET books/_search ---- // TEST[continued] -[discrete] -[[whats-next]] -=== Next steps - -Now that {es} is up and running and you've learned the basics, you'll probably want to test out larger datasets, or index your own data. +.Example response +[%collapsible] +=============== +[source,console-result] +---- +{ + "took": 9, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 1, + "relation": "eq" + }, + "max_score": 0.6931471, <1> + "hits": [ + { + "_index": "books", + "_id": "CwICQpIBO6vvGGiC_3Ls", + "_score": 0.6931471, + "_source": { + "name": "Brave New World", + "author": "Aldous Huxley", + "release_date": "1932-06-01", + "page_count": 268 + } + } + ] + } +} +---- +// TEST[continued] +<1> The `max_score` is the score of the highest-scoring document in the results. In this case, there is only one matching document, so the `max_score` is the score of that document. +=============== [discrete] -[[whats-next-search-learn-more]] -==== Learn more about search queries +[[getting-started-delete-indices]] +=== Step 5: Delete your indices (optional) -* <>. Jump here to learn about exact value search, full-text search, vector search, and more, using the <>. +When following along with examples, you might want to delete an index to start from scratch. +You can delete indices using the <>. -[discrete] -[[whats-next-more-data]] -==== Add more data +For example, run the following command to delete the indices created in this tutorial: -* Learn how to {kibana-ref}/sample-data.html[install sample data] using {kib}. This is a quick way to test out {es} on larger workloads. -* Learn how to use the {kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[upload data UI] in {kib} to add your own CSV, TSV, or JSON files. -* Use the https://www.elastic.co/guide/en/elasticsearch/reference/current/docs-bulk.html[bulk API] to ingest your own datasets to {es}. +[source,console] +---- +DELETE /books +DELETE /my-explicit-mappings-books +---- +// TEST[skip:handled by setup/teardown] -[discrete] -[[whats-next-client-libraries]] -==== {es} programming language clients - -* Check out our https://www.elastic.co/guide/en/elasticsearch/client/index.html[client library] to work with your {es} instance in your preferred programming language. -* If you're using Python, check out https://www.elastic.co/search-labs[Elastic Search Labs] for a range of examples that use the {es} Python client. This is the best place to explore AI-powered search use cases, such as working with embeddings, vector search, and retrieval augmented generation (RAG). -** This extensive, hands-on https://www.elastic.co/search-labs/tutorials/search-tutorial/welcome[tutorial] -walks you through building a complete search solution with {es}, from the ground up. -** https://github.com/elastic/elasticsearch-labs[`elasticsearch-labs`] contains a range of executable Python https://github.com/elastic/elasticsearch-labs/tree/main/notebooks[notebooks] and https://github.com/elastic/elasticsearch-labs/tree/main/example-apps[example apps]. \ No newline at end of file +[CAUTION] +==== +Deleting an index permanently deletes its documents, shards, and metadata. +==== diff --git a/docs/reference/quickstart/index.asciidoc b/docs/reference/quickstart/index.asciidoc index e517d039e620b..2d9114882254f 100644 --- a/docs/reference/quickstart/index.asciidoc +++ b/docs/reference/quickstart/index.asciidoc @@ -1,10 +1,29 @@ [[quickstart]] -= Quickstart += Quick starts -Get started quickly with {es}. +Use these quick starts to get hands-on experience with the {es} APIs. +Unless otherwise noted, these examples will use queries written in <> syntax. -* Learn how to run {es} (and {kib}) for <>. -* Follow our <> to add data to {es} and query it. +[discrete] +[[quickstart-requirements]] +== Requirements + +You'll need a running {es} cluster, together with {kib} to use the Dev Tools API Console. +Get started <> , or see our <>. + +[discrete] +[[quickstart-list]] +== Hands-on quick starts + +* <>. Learn about indices, documents, and mappings, and perform a basic search. + +[discrete] +[[quickstart-python-links]] +== Working in Python + +If you're interested in using {es} with Python, check out Elastic Search Labs: + +* https://github.com/elastic/elasticsearch-labs[`elasticsearch-labs` repository]: Contains a range of Python https://github.com/elastic/elasticsearch-labs/tree/main/notebooks[notebooks] and https://github.com/elastic/elasticsearch-labs/tree/main/example-apps[example apps]. +* https://www.elastic.co/search-labs/tutorials/search-tutorial/welcome[Tutorial]: This walks you through building a complete search solution with {es} from the ground up using Flask. -include::run-elasticsearch-locally.asciidoc[] include::getting-started.asciidoc[] diff --git a/docs/reference/quickstart/run-elasticsearch-locally.asciidoc b/docs/reference/quickstart/run-elasticsearch-locally.asciidoc deleted file mode 100644 index 0db395ba34b0a..0000000000000 --- a/docs/reference/quickstart/run-elasticsearch-locally.asciidoc +++ /dev/null @@ -1,175 +0,0 @@ -[[run-elasticsearch-locally]] -== Run {es} locally in Docker (without security) -++++ -Local dev setup (Docker) -++++ - -[WARNING] -==== -*DO NOT USE THESE INSTRUCTIONS FOR PRODUCTION DEPLOYMENTS* - -The instructions on this page are for *local development only*. Do not use these instructions for production deployments, because they are not secure. -While this approach is convenient for experimenting and learning, you should never run the service in this way in a production environment. -==== - -The following commands help you very quickly spin up a single-node {es} cluster, together with {kib} in Docker. -Note that if you don't need the {kib} UI, you can skip those instructions. - -[discrete] -[[local-dev-why]] -=== When would I use this setup? - -Use this setup if you want to quickly spin up {es} (and {kib}) for local development or testing. - -For example you might: - -* Want to run a quick test to see how a feature works. -* Follow a tutorial or guide that requires an {es} cluster, like our <>. -* Experiment with the {es} APIs using different tools, like the Dev Tools Console, cURL, or an Elastic programming language client. -* Quickly spin up an {es} cluster to test an executable https://github.com/elastic/elasticsearch-labs/tree/main/notebooks#readme[Python notebook] locally. - -[discrete] -[[local-dev-prerequisites]] -=== Prerequisites - -If you don't have Docker installed, https://www.docker.com/products/docker-desktop[download and install Docker Desktop] for your operating system. - -[discrete] -[[local-dev-env-vars]] -=== Set environment variables - -Configure the following environment variables. - -[source,sh] ----- -export ELASTIC_PASSWORD="" # password for "elastic" username -export KIBANA_PASSWORD="" # Used _internally_ by Kibana, must be at least 6 characters long ----- - -[discrete] -[[local-dev-create-docker-network]] -=== Create a Docker network - -To run both {es} and {kib}, you'll need to create a Docker network: - -[source,sh] ----- -docker network create elastic-net ----- - -[discrete] -[[local-dev-run-es]] -=== Run {es} - -Start the {es} container with the following command: - -ifeval::["{release-state}"=="unreleased"] -WARNING: Version {version} has not yet been released. -No Docker image is currently available for {es} {version}. -endif::[] - -[source,sh,subs="attributes"] ----- -docker run -p 127.0.0.1:9200:9200 -d --name elasticsearch --network elastic-net \ - -e ELASTIC_PASSWORD=$ELASTIC_PASSWORD \ - -e "discovery.type=single-node" \ - -e "xpack.security.http.ssl.enabled=false" \ - -e "xpack.license.self_generated.type=trial" \ - {docker-image} ----- - -[discrete] -[[local-dev-run-kib]] -=== Run {kib} (optional) - -To run {kib}, you must first set the `kibana_system` password in the {es} container. - -[source,sh,subs="attributes"] ----- -# configure the Kibana password in the ES container -curl -u elastic:$ELASTIC_PASSWORD \ - -X POST \ - http://localhost:9200/_security/user/kibana_system/_password \ - -d '{"password":"'"$KIBANA_PASSWORD"'"}' \ - -H 'Content-Type: application/json' ----- -// NOTCONSOLE - -Start the {kib} container with the following command: - -ifeval::["{release-state}"=="unreleased"] -WARNING: Version {version} has not yet been released. -No Docker image is currently available for {es} {version}. -endif::[] - -[source,sh,subs="attributes"] ----- -docker run -p 127.0.0.1:5601:5601 -d --name kibana --network elastic-net \ - -e ELASTICSEARCH_URL=http://elasticsearch:9200 \ - -e ELASTICSEARCH_HOSTS=http://elasticsearch:9200 \ - -e ELASTICSEARCH_USERNAME=kibana_system \ - -e ELASTICSEARCH_PASSWORD=$KIBANA_PASSWORD \ - -e "xpack.security.enabled=false" \ - -e "xpack.license.self_generated.type=trial" \ - {kib-docker-image} ----- - -[NOTE] -==== -The service is started with a trial license. The trial license enables all features of Elasticsearch for a trial period of 30 days. After the trial period expires, the license is downgraded to a basic license, which is free forever. If you prefer to skip the trial and use the basic license, set the value of the `xpack.license.self_generated.type` variable to basic instead. For a detailed feature comparison between the different licenses, refer to our https://www.elastic.co/subscriptions[subscriptions page]. -==== - -[discrete] -[[local-dev-connecting-clients]] -== Connecting to {es} with language clients - -To connect to the {es} cluster from a language client, you can use basic authentication with the `elastic` username and the password you set in the environment variable. - -You'll use the following connection details: - -* **{es} endpoint**: `http://localhost:9200` -* **Username**: `elastic` -* **Password**: `$ELASTIC_PASSWORD` (Value you set in the environment variable) - -For example, to connect with the Python `elasticsearch` client: - -[source,python] ----- -import os -from elasticsearch import Elasticsearch - -username = 'elastic' -password = os.getenv('ELASTIC_PASSWORD') # Value you set in the environment variable - -client = Elasticsearch( - "http://localhost:9200", - basic_auth=(username, password) -) - -print(client.info()) ----- - -Here's an example curl command using basic authentication: - -[source,sh,subs="attributes"] ----- -curl -u elastic:$ELASTIC_PASSWORD \ - -X PUT \ - http://localhost:9200/my-new-index \ - -H 'Content-Type: application/json' ----- -// NOTCONSOLE - -[discrete] -[[local-dev-next-steps]] -=== Next steps - -Use our <> to learn the basics of {es}: how to add data and query it. - -[discrete] -[[local-dev-production]] -=== Moving to production - -This setup is not suitable for production use. For production deployments, we recommend using our managed service on Elastic Cloud. https://cloud.elastic.co/registration[Sign up for a free trial] (no credit card required). - -Otherwise, refer to https://www.elastic.co/guide/en/elasticsearch/reference/current/install-elasticsearch.html[Install {es}] to learn about the various options for installing {es} in a self-managed production environment, including using Docker. diff --git a/docs/reference/release-notes.asciidoc b/docs/reference/release-notes.asciidoc index 2e043834c9969..6abaf0c13ac9f 100644 --- a/docs/reference/release-notes.asciidoc +++ b/docs/reference/release-notes.asciidoc @@ -6,7 +6,11 @@ This section summarizes the changes in each release. +* <> +* <> * <> +* <> +* <> * <> * <> * <> @@ -68,7 +72,11 @@ This section summarizes the changes in each release. -- +include::release-notes/8.15.2.asciidoc[] +include::release-notes/8.15.1.asciidoc[] include::release-notes/8.15.0.asciidoc[] +include::release-notes/8.14.3.asciidoc[] +include::release-notes/8.14.2.asciidoc[] include::release-notes/8.14.1.asciidoc[] include::release-notes/8.14.0.asciidoc[] include::release-notes/8.13.4.asciidoc[] diff --git a/docs/reference/release-notes/8.12.0.asciidoc b/docs/reference/release-notes/8.12.0.asciidoc index 4c0fc50584b9f..bfa99401f41a2 100644 --- a/docs/reference/release-notes/8.12.0.asciidoc +++ b/docs/reference/release-notes/8.12.0.asciidoc @@ -14,6 +14,13 @@ there are deleted documents in the segments, quantiles may fail to build and pre This issue is fixed in 8.12.1. +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[breaking-8.12.0]] [float] === Breaking changes diff --git a/docs/reference/release-notes/8.12.1.asciidoc b/docs/reference/release-notes/8.12.1.asciidoc index 9aa9a11b3bf02..8ebe5cbac3852 100644 --- a/docs/reference/release-notes/8.12.1.asciidoc +++ b/docs/reference/release-notes/8.12.1.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.12.1]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.12.1]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.12.2.asciidoc b/docs/reference/release-notes/8.12.2.asciidoc index 2be8449b6c1df..44202ee8226eb 100644 --- a/docs/reference/release-notes/8.12.2.asciidoc +++ b/docs/reference/release-notes/8.12.2.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.12.2]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.12.2]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.13.0.asciidoc b/docs/reference/release-notes/8.13.0.asciidoc index dba4fdbe5f67e..65c77ff602e34 100644 --- a/docs/reference/release-notes/8.13.0.asciidoc +++ b/docs/reference/release-notes/8.13.0.asciidoc @@ -21,6 +21,13 @@ This affects clusters running version 8.10 or later, with an active downsampling https://www.elastic.co/guide/en/elasticsearch/reference/current/downsampling-ilm.html[configuration] or a configuration that was activated at some point since upgrading to version 8.10 or later. +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[breaking-8.13.0]] [float] === Breaking changes diff --git a/docs/reference/release-notes/8.13.1.asciidoc b/docs/reference/release-notes/8.13.1.asciidoc index 7b3dbff74cc6e..c95fb1e720651 100644 --- a/docs/reference/release-notes/8.13.1.asciidoc +++ b/docs/reference/release-notes/8.13.1.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.13.1]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.13.1]] [float] diff --git a/docs/reference/release-notes/8.13.2.asciidoc b/docs/reference/release-notes/8.13.2.asciidoc index 514118f5ea575..d4e2cc794b7e8 100644 --- a/docs/reference/release-notes/8.13.2.asciidoc +++ b/docs/reference/release-notes/8.13.2.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.13.2]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.13.2]] [float] diff --git a/docs/reference/release-notes/8.13.3.asciidoc b/docs/reference/release-notes/8.13.3.asciidoc index 9aee0dd815f6d..bbad07f36a31e 100644 --- a/docs/reference/release-notes/8.13.3.asciidoc +++ b/docs/reference/release-notes/8.13.3.asciidoc @@ -10,6 +10,16 @@ Also see <>. SQL:: * Limit how much space some string functions can use {es-pull}107333[#107333] +[[known-issues-8.13.3]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.13.3]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.13.4.asciidoc b/docs/reference/release-notes/8.13.4.asciidoc index bf3f2f497d8fc..bb2fe5789d56f 100644 --- a/docs/reference/release-notes/8.13.4.asciidoc +++ b/docs/reference/release-notes/8.13.4.asciidoc @@ -3,6 +3,16 @@ Also see <>. +[[known-issues-8.13.4]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.13.4]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.14.0.asciidoc b/docs/reference/release-notes/8.14.0.asciidoc index 42f2f86a123ed..034b1ce39be1b 100644 --- a/docs/reference/release-notes/8.14.0.asciidoc +++ b/docs/reference/release-notes/8.14.0.asciidoc @@ -12,6 +12,16 @@ Security:: * Apply stricter Document Level Security (DLS) rules for the validate query API with the rewrite parameter {es-pull}105709[#105709] * Apply stricter Document Level Security (DLS) rules for terms aggregations when min_doc_count is set to 0 {es-pull}105714[#105714] +[[known-issues-8.14.0]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.14.0]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.14.1.asciidoc b/docs/reference/release-notes/8.14.1.asciidoc index f161c7d08099c..0b5f5b0a4e804 100644 --- a/docs/reference/release-notes/8.14.1.asciidoc +++ b/docs/reference/release-notes/8.14.1.asciidoc @@ -4,6 +4,16 @@ Also see <>. +[[known-issues-8.14.1]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + [[bug-8.14.1]] [float] === Bug fixes diff --git a/docs/reference/release-notes/8.14.2.asciidoc b/docs/reference/release-notes/8.14.2.asciidoc new file mode 100644 index 0000000000000..f52acf0b6a7e8 --- /dev/null +++ b/docs/reference/release-notes/8.14.2.asciidoc @@ -0,0 +1,48 @@ +[[release-notes-8.14.2]] +== {es} version 8.14.2 + +coming[8.14.2] + +Also see <>. + +[[known-issues-8.14.2]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + +[[bug-8.14.2]] +[float] +=== Bug fixes + +Data streams:: +* Ensure a lazy rollover request will rollover the target data stream once. {es-pull}109636[#109636] +* [Data streams] Fix the description of the lazy rollover task {es-pull}109629[#109629] + +ES|QL:: +* Fix ESQL cancellation for exchange requests {es-pull}109695[#109695] +* Fix equals and hashcode for `SingleValueQuery.LuceneQuery` {es-pull}110035[#110035] +* Force execute inactive sink reaper {es-pull}109632[#109632] + +Infra/Scripting:: +* Check array size before returning array item in script doc values {es-pull}109824[#109824] (issue: {es-issue}104998[#104998]) + +Infra/Settings:: +* Guard file settings readiness on file settings support {es-pull}109500[#109500] + +Machine Learning:: +* Fix IndexOutOfBoundsException during inference {es-pull}109533[#109533] + +Mapping:: +* Re-define `index.mapper.dynamic` setting in 8.x for a better 7.x to 8.x upgrade if this setting is used. {es-pull}109341[#109341] + +Ranking:: +* Fix for from parameter when using `sub_searches` and rank {es-pull}106253[#106253] (issue: {es-issue}99011[#99011]) + +Search:: +* Add hexstring support byte painless scorers {es-pull}109492[#109492] +* Fix automatic tracking of collapse with `docvalue_fields` {es-pull}110103[#110103] diff --git a/docs/reference/release-notes/8.14.3.asciidoc b/docs/reference/release-notes/8.14.3.asciidoc new file mode 100644 index 0000000000000..6b8895e39de3e --- /dev/null +++ b/docs/reference/release-notes/8.14.3.asciidoc @@ -0,0 +1,33 @@ +[[release-notes-8.14.3]] +== {es} version 8.14.3 + + +Also see <>. + +[[known-issues-8.14.3]] +[float] +=== Known issues +* When upgrading clusters from version 8.11.4 or earlier, if your cluster contains non-master-eligible nodes, +information about the new functionality of these upgraded nodes may not be registered properly with the master node. +This can lead to some new functionality added since 8.12.0 not being accessible on the upgraded cluster. +If your cluster is running on ECK 2.12.1 and above, this may cause problems with finalizing the upgrade. +To resolve this issue, perform a rolling restart on the non-master-eligible nodes once all Elasticsearch nodes +are upgraded. This issue is fixed in 8.15.0. + +[[bug-8.14.3]] +[float] +=== Bug fixes + +Cluster Coordination:: +* Ensure tasks preserve versions in `MasterService` {es-pull}109850[#109850] + +ES|QL:: +* Introduce compute listener {es-pull}110400[#110400] + +Mapping:: +* Automatically adjust `ignore_malformed` only for the @timestamp {es-pull}109948[#109948] + +TSDB:: +* Disallow index.time_series.end_time setting from being set or updated in normal indices {es-pull}110268[#110268] (issue: {es-issue}110265[#110265]) + + diff --git a/docs/reference/release-notes/8.15.0.asciidoc b/docs/reference/release-notes/8.15.0.asciidoc index 97f4a51a1142f..a6ddff5399346 100644 --- a/docs/reference/release-notes/8.15.0.asciidoc +++ b/docs/reference/release-notes/8.15.0.asciidoc @@ -1,8 +1,557 @@ [[release-notes-8.15.0]] == {es} version 8.15.0 -coming[8.15.0] - Also see <>. +[[known-issues-8.15.0]] +[float] +=== Known issues +* The `pytorch_inference` process used to run Machine Learning models can consume large amounts of memory. +In environments where the available memory is limited, the OS Out of Memory Killer will kill the `pytorch_inference` +process to reclaim memory. This can cause inference requests to fail. +Elasticsearch will automatically restart the `pytorch_inference` process +after it is killed up to four times in 24 hours. (issue: {es-issue}110530[#110530]) + +* Pipeline aggregations under `time_series` and `categorize_text` aggregations are never +returned (issue: {es-issue}111679[#111679]) + +* Elasticsearch will not start on Windows machines if +[`bootstrap.memory_lock` is set to `true`](https://www.elastic.co/guide/en/elasticsearch/reference/current/setup-configuration-memory.html#bootstrap-memory_lock). +Either downgrade to an earlier version, upgrade to 8.15.1, or else follow the +recommendation in the manual to entirely disable swap instead of using the +memory lock feature (issue: {es-issue}111847[#111847]) + +* The `took` field of the response to the <> API is incorrect and may be rather large. Clients which +<> assume that this value will be within a particular range (e.g. that it fits into a 32-bit +signed integer) may encounter errors (issue: {es-issue}111854[#111854]) + +* Elasticsearch will not start if custom role mappings are configured using the +`xpack.security.authc.realms.*.files.role_mapping` configuration option. As a workaround, custom role mappings +can be configured using the https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-put-role-mapping.html[REST API] (issue: {es-issue}112503[#112503]) + +* ES|QL queries can lead to node crashes due to Out Of Memory errors when: +** Multiple indices match the query pattern +** These indices have many conflicting field mappings +** Many of those fields are included in the request +These issues deplete heap memory, increasing the likelihood of OOM errors. (issue: {es-issue}111964[#111964], {es-issue}111358[#111358]). +In Kibana, you might indirectly execute these queries when using Discover, or adding a Field Statistics panel to a dashboard. ++ +To work around this issue, you have a number of options: +** Downgrade to an earlier version +** Upgrade to 8.15.2 upon release +** Follow the instructions to +<> +** Change the default data view in Discover to a smaller set of indices and/or one with fewer mapping conflicts. + +* Synthetic source bug. Synthetic source may fail generating the _source at runtime, causing failures in get APIs or +partial failures in the search APIs. The result is that for the affected documents the _source can't be retrieved. +There is no workaround and the only option to is to upgrade to 8.15.2 when released. ++ +If you use synthetic source then you may be affected by this bug if the following is true: +** If you have more fields then the `index.mapping.total_fields.limit` setting allows. +** If you use dynamic mappings and the `index.mapping.total_fields.ignore_dynamic_beyond_limit` setting is enabled. + +[[breaking-8.15.0]] +[float] +=== Breaking changes + +Cluster Coordination:: +* Interpret `?timeout=-1` as infinite ack timeout {es-pull}107675[#107675] + +Inference API:: +* Replace `model_id` with `inference_id` in GET inference API {es-pull}111366[#111366] + +Rollup:: +* Disallow new rollup jobs in clusters with no rollup usage {es-pull}108624[#108624] (issue: {es-issue}108381[#108381]) + +Search:: +* Change `skip_unavailable` remote cluster setting default value to true {es-pull}105792[#105792] + +[[bug-8.15.0]] +[float] +=== Bug fixes + +Aggregations:: +* Don't sample calls to `ReduceContext#consumeBucketsAndMaybeBreak` ins `InternalDateHistogram` and `InternalHistogram` during reduction {es-pull}110186[#110186] +* Fix `ClassCastException` in Significant Terms {es-pull}108429[#108429] (issue: {es-issue}108427[#108427]) +* Run terms concurrently when cardinality is only lower than shard size {es-pull}110369[#110369] (issue: {es-issue}105505[#105505]) + +Allocation:: +* Fix misc trappy allocation API timeouts {es-pull}109241[#109241] +* Fix trappy timeout in allocation explain API {es-pull}109240[#109240] + +Analysis:: +* Correct positioning for unique token filter {es-pull}109395[#109395] + +Authentication:: +* Add comma before charset parameter in WWW-Authenticate response header {es-pull}110906[#110906] +* Avoid NPE if `users_roles` file does not exist {es-pull}109606[#109606] +* Improve security-crypto threadpool overflow handling {es-pull}111369[#111369] + +Authorization:: +* Fix trailing slash in `security.put_privileges` specification {es-pull}110177[#110177] +* Fixes cluster state-based role mappings not recovered from disk {es-pull}109167[#109167] +* Handle unmatching remote cluster wildcards properly for `IndicesRequest.SingleIndexNoWildcards` requests {es-pull}109185[#109185] + +Autoscaling:: +* Expose `?master_timeout` in autoscaling APIs {es-pull}108759[#108759] + +CRUD:: +* Update checkpoints after post-replication actions, even on failure {es-pull}109908[#109908] + +Cluster Coordination:: +* Deserialize publish requests on generic thread-pool {es-pull}108814[#108814] (issue: {es-issue}106352[#106352]) +* Fail cluster state API if blocked {es-pull}109618[#109618] (issue: {es-issue}107503[#107503]) +* Use `scheduleUnlessShuttingDown` in `LeaderChecker` {es-pull}108643[#108643] (issue: {es-issue}108642[#108642]) + +Data streams:: +* Apm-data: set concrete values for `metricset.interval` {es-pull}109043[#109043] +* Ecs@mappings: reduce scope for `ecs_geo_point` {es-pull}108349[#108349] (issue: {es-issue}108338[#108338]) +* Include component templates in retention validaiton {es-pull}109779[#109779] + +Distributed:: +* Associate restore snapshot task to parent mount task {es-pull}108705[#108705] (issue: {es-issue}105830[#105830]) +* Don't detect `PlainActionFuture` deadlock on concurrent complete {es-pull}110361[#110361] (issues: {es-issue}110181[#110181], {es-issue}110360[#110360]) +* Handle nullable `DocsStats` and `StoresStats` {es-pull}109196[#109196] + +Downsampling:: +* Support flattened fields and multi-fields as dimensions in downsampling {es-pull}110066[#110066] (issue: {es-issue}99297[#99297]) + +ES|QL:: +* ESQL: Change "substring" function to not return null on empty string {es-pull}109174[#109174] +* ESQL: Fix Join references {es-pull}109989[#109989] +* ESQL: Fix LOOKUP attribute shadowing {es-pull}109807[#109807] (issue: {es-issue}109392[#109392]) +* ESQL: Fix Max doubles bug with negatives and add tests for Max and Min {es-pull}110586[#110586] +* ESQL: Fix `IpPrefix` function not handling correctly `ByteRefs` {es-pull}109205[#109205] (issue: {es-issue}109198[#109198]) +* ESQL: Fix equals `hashCode` for functions {es-pull}107947[#107947] (issue: {es-issue}104393[#104393]) +* ESQL: Fix variable shadowing when pushing down past Project {es-pull}108360[#108360] (issue: {es-issue}108008[#108008]) +* ESQL: Validate unique plan attribute names {es-pull}110488[#110488] (issue: {es-issue}110541[#110541]) +* ESQL: change from quoting from backtick to quote {es-pull}108395[#108395] +* ESQL: make named params objects truly per request {es-pull}110046[#110046] (issue: {es-issue}110028[#110028]) +* ES|QL: Fix DISSECT that overwrites input {es-pull}110201[#110201] (issue: {es-issue}110184[#110184]) +* ES|QL: limit query depth to 500 levels {es-pull}108089[#108089] (issue: {es-issue}107752[#107752]) +* ES|QL: reduce max expression depth to 400 {es-pull}111186[#111186] (issue: {es-issue}109846[#109846]) +* Fix ST_DISTANCE Lucene push-down for complex predicates {es-pull}110391[#110391] (issue: {es-issue}110349[#110349]) +* Fix `ClassCastException` with MV_EXPAND on missing field {es-pull}110096[#110096] (issue: {es-issue}109974[#109974]) +* Fix bug in union-types with type-casting in grouping key of STATS {es-pull}110476[#110476] (issues: {es-issue}109922[#109922], {es-issue}110477[#110477]) +* Fix for union-types for multiple columns with the same name {es-pull}110793[#110793] (issues: {es-issue}110490[#110490], {es-issue}109916[#109916]) +* [ESQL] Count_distinct(_source) should return a 400 {es-pull}110824[#110824] +* [ESQL] Fix parsing of large magnitude negative numbers {es-pull}110665[#110665] (issue: {es-issue}104323[#104323]) +* [ESQL] Migrate `SimplifyComparisonArithmetics` optimization {es-pull}109256[#109256] (issues: {es-issue}108388[#108388], {es-issue}108743[#108743]) + +Engine:: +* Async close of `IndexShard` {es-pull}108145[#108145] + +Highlighting:: +* Fix issue with returning incomplete fragment for plain highlighter {es-pull}110707[#110707] + +ILM+SLM:: +* Allow `read_slm` to call GET /_slm/status {es-pull}108333[#108333] + +Indices APIs:: +* Create a new `NodeRequest` for every `NodesDataTiersUsageTransport` use {es-pull}108379[#108379] + +Infra/Core:: +* Add a cluster listener to fix missing node features after upgrading from a version prior to 8.13 {es-pull}110710[#110710] (issue: {es-issue}109254[#109254]) +* Add bounds checking to parsing ISO8601 timezone offset values {es-pull}108672[#108672] +* Fix native preallocate to actually run {es-pull}110851[#110851] +* Ignore additional cpu.stat fields {es-pull}108019[#108019] (issue: {es-issue}107983[#107983]) +* Specify parse index when error occurs on multiple datetime parses {es-pull}108607[#108607] + +Infra/Metrics:: +* Provide document size reporter with `MapperService` {es-pull}109794[#109794] + +Infra/Node Lifecycle:: +* Expose `?master_timeout` on get-shutdown API {es-pull}108886[#108886] +* Fix serialization of put-shutdown request {es-pull}107862[#107862] (issue: {es-issue}107857[#107857]) +* Support wait indefinitely for search tasks to complete on node shutdown {es-pull}107426[#107426] + +Infra/REST API:: +* Add some missing timeout params to REST API specs {es-pull}108761[#108761] +* Consider `error_trace` supported by all endpoints {es-pull}109613[#109613] (issue: {es-issue}109612[#109612]) + +Ingest Node:: +* Fix Dissect with leading non-ascii characters {es-pull}111184[#111184] +* Fix enrich policy runner exception handling on empty segments response {es-pull}111290[#111290] +* GeoIP tasks should wait longer for master {es-pull}108410[#108410] +* Removing the use of Stream::peek from `GeoIpDownloader::cleanDatabases` {es-pull}110666[#110666] +* Simulate should succeed if `ignore_missing_pipeline` {es-pull}108106[#108106] (issue: {es-issue}107314[#107314]) + +Machine Learning:: +* Allow deletion of the ELSER inference service when reference in ingest {es-pull}108146[#108146] +* Avoid `InferenceRunner` deadlock {es-pull}109551[#109551] +* Correctly handle duplicate model ids for the `_cat` trained models api and usage statistics {es-pull}109126[#109126] +* Do not use global ordinals strategy if the leaf reader context cannot be obtained {es-pull}108459[#108459] +* Fix NPE in trained model assignment updater {es-pull}108942[#108942] +* Fix serialising inference delete response {es-pull}109384[#109384] +* Fix "stack use after scope" memory error {ml-pull}2673[#2673] +* Fix trailing slash in `ml.get_categories` specification {es-pull}110146[#110146] +* Handle any exception thrown by inference {ml-pull}2680[#2680] +* Increase response size limit for batched requests {es-pull}110112[#110112] +* Offload request to generic threadpool {es-pull}109104[#109104] (issue: {es-issue}109100[#109100]) +* Propagate accurate deployment timeout {es-pull}109534[#109534] (issue: {es-issue}109407[#109407]) +* Refactor TextEmbeddingResults to use primitives rather than objects {es-pull}108161[#108161] +* Require question to be non-null in `QuestionAnsweringConfig` {es-pull}107972[#107972] +* Start Trained Model Deployment API request query params now override body params {es-pull}109487[#109487] +* Suppress deprecation warnings from ingest pipelines when deleting trained model {es-pull}108679[#108679] (issue: {es-issue}105004[#105004]) +* Use default translog durability on AD results index {es-pull}108999[#108999] +* Use the multi node routing action for internal inference services {es-pull}109358[#109358] +* [Inference API] Extract optional long instead of integer in `RateLimitSettings#of` {es-pull}108602[#108602] +* [Inference API] Fix serialization for inference delete endpoint response {es-pull}110431[#110431] +* [Inference API] Replace `model_id` with `inference_id` in inference API except when stored {es-pull}111366[#111366] + +Mapping:: +* Fix off by one error when handling null values in range fields {es-pull}107977[#107977] (issue: {es-issue}107282[#107282]) +* Limit number of synonym rules that can be created {es-pull}109981[#109981] (issue: {es-issue}108785[#108785]) +* Propagate mapper builder context flags across nested mapper builder context creation {es-pull}109963[#109963] +* `DenseVectorFieldMapper` fixed typo {es-pull}108065[#108065] + +Network:: +* Use proper executor for failing requests when connection closes {es-pull}109236[#109236] (issue: {es-issue}109225[#109225]) +* `NoSuchRemoteClusterException` should not be thrown when a remote is configured {es-pull}107435[#107435] (issue: {es-issue}107381[#107381]) + +Packaging:: +* Adding override for lintian false positive on `libvec.so` {es-pull}108521[#108521] (issue: {es-issue}108514[#108514]) + +Ranking:: +* Fix score count validation in reranker response {es-pull}111424[#111424] (issue: {es-issue}111202[#111202]) + +Rollup:: +* Fix trailing slash in two rollup specifications {es-pull}110176[#110176] + +Search:: +* Adding score from `RankDoc` to `SearchHit` {es-pull}108870[#108870] +* Better handling of multiple rescorers clauses with LTR {es-pull}109071[#109071] +* Correct query profiling for conjunctions {es-pull}108122[#108122] (issue: {es-issue}108116[#108116]) +* Fix `DecayFunctions'` `toString` {es-pull}107415[#107415] (issue: {es-issue}100870[#100870]) +* Fix leak in collapsing search results {es-pull}110927[#110927] +* Fork freeing search/scroll contexts to GENERIC pool {es-pull}109481[#109481] + +Security:: +* Add permission to secure access to certain config files {es-pull}107827[#107827] +* Add permission to secure access to certain config files specified by settings {es-pull}108895[#108895] +* Fix trappy timeouts in security settings APIs {es-pull}109233[#109233] + +Snapshot/Restore:: +* Stricter failure handling in multi-repo get-snapshots request handling {es-pull}107191[#107191] + +TSDB:: +* Sort time series indices by time range in `GetDataStreams` API {es-pull}107967[#107967] (issue: {es-issue}102088[#102088]) + +Transform:: +* Always pick the user `maxPageSize` value {es-pull}109876[#109876] (issue: {es-issue}109844[#109844]) +* Exit gracefully when deleted {es-pull}107917[#107917] (issue: {es-issue}107266[#107266]) +* Fix NPE during destination index creation {es-pull}108891[#108891] (issue: {es-issue}108890[#108890]) +* Forward `indexServiceSafe` exception to listener {es-pull}108517[#108517] (issue: {es-issue}108418[#108418]) +* Halt Indexer on Stop/Abort API {es-pull}107792[#107792] +* Handle `IndexNotFoundException` {es-pull}108394[#108394] (issue: {es-issue}107263[#107263]) +* Prevent concurrent jobs during cleanup {es-pull}109047[#109047] +* Redirect `VersionConflict` to reset code {es-pull}108070[#108070] +* Reset max page size to settings value {es-pull}109449[#109449] (issue: {es-issue}109308[#109308]) + +Vector Search:: +* Ensure vector similarity correctly limits `inner_hits` returned for nested kNN {es-pull}111363[#111363] (issue: {es-issue}111093[#111093]) +* Ensure we return non-negative scores when scoring scalar dot-products {es-pull}108522[#108522] + +Watcher:: +* Avoiding running watch jobs in TickerScheduleTriggerEngine if it is paused {es-pull}110061[#110061] (issue: {es-issue}105933[#105933]) + +[[deprecation-8.15.0]] +[float] +=== Deprecations + +ILM+SLM:: +* Deprecate using slm privileges to access ilm {es-pull}110540[#110540] + +Infra/Settings:: +* `ParseHeapRatioOrDeprecatedByteSizeValue` for `indices.breaker.total.limit` {es-pull}110236[#110236] + +Machine Learning:: +* Deprecate `text_expansion` and `weighted_tokens` queries {es-pull}109880[#109880] + +[[enhancement-8.15.0]] +[float] +=== Enhancements + +Aggregations:: +* Aggs: Scripted metric allow list {es-pull}109444[#109444] +* Enable inter-segment concurrency for low cardinality numeric terms aggs {es-pull}108306[#108306] +* Increase size of big arrays only when there is an actual value in the aggregators {es-pull}107764[#107764] +* Increase size of big arrays only when there is an actual value in the aggregators (Analytics module) {es-pull}107813[#107813] +* Optimise `BinaryRangeAggregator` for single value fields {es-pull}108016[#108016] +* Optimise cardinality aggregations for single value fields {es-pull}107892[#107892] +* Optimise composite aggregations for single value fields {es-pull}107897[#107897] +* Optimise few metric aggregations for single value fields {es-pull}107832[#107832] +* Optimise histogram aggregations for single value fields {es-pull}107893[#107893] +* Optimise multiterms aggregation for single value fields {es-pull}107937[#107937] +* Optimise terms aggregations for single value fields {es-pull}107930[#107930] +* Speed up collecting zero document string terms {es-pull}110922[#110922] + +Allocation:: +* Log shard movements {es-pull}105829[#105829] +* Support effective watermark thresholds in node stats API {es-pull}107244[#107244] (issue: {es-issue}106676[#106676]) + +Application:: +* Add Create or update query rule API call {es-pull}109042[#109042] +* Rename rule query and add support for multiple rulesets {es-pull}108831[#108831] +* Support multiple associated groups for TopN {es-pull}108409[#108409] (issue: {es-issue}108018[#108018]) +* [Connector API] Change `UpdateConnectorFiltering` API to have better defaults {es-pull}108612[#108612] + +Authentication:: +* Expose API Key cache metrics {es-pull}109078[#109078] + +Authorization:: +* Cluster state role mapper file settings service {es-pull}107886[#107886] +* Cluster-state based Security role mapper {es-pull}107410[#107410] +* Introduce role description field {es-pull}107088[#107088] +* [Osquery] Extend `kibana_system` role with an access to new `osquery_manager` index {es-pull}108849[#108849] + +Data streams:: +* Add metrics@custom component template to metrics-*-* index template {es-pull}109540[#109540] (issue: {es-issue}109475[#109475]) +* Apm-data: enable plugin by default {es-pull}108860[#108860] +* Apm-data: ignore malformed fields, and too many dynamic fields {es-pull}108444[#108444] +* Apm-data: improve default pipeline performance {es-pull}108396[#108396] (issue: {es-issue}108290[#108290]) +* Apm-data: improve indexing resilience {es-pull}108227[#108227] +* Apm-data: increase priority above Fleet templates {es-pull}108885[#108885] +* Apm-data: increase version for templates {es-pull}108340[#108340] +* Apm-data: set codec: best_compression for logs-apm.* data streams {es-pull}108862[#108862] +* Remove `default_field: message` from metrics index templates {es-pull}110651[#110651] + +Distributed:: +* Add `wait_for_completion` parameter to delete snapshot request {es-pull}109462[#109462] (issue: {es-issue}101300[#101300]) +* Improve mechanism for extracting the result of a `PlainActionFuture` {es-pull}110019[#110019] (issue: {es-issue}108125[#108125]) + +ES|QL:: +* Add `BlockHash` for 3 `BytesRefs` {es-pull}108165[#108165] +* Allow `LuceneSourceOperator` to early terminate {es-pull}108820[#108820] +* Check if `CsvTests` required capabilities exist {es-pull}108684[#108684] +* ESQL: Add aggregates node level reduction {es-pull}107876[#107876] +* ESQL: Add more time span units {es-pull}108300[#108300] +* ESQL: Implement LOOKUP, an "inline" enrich {es-pull}107987[#107987] (issue: {es-issue}107306[#107306]) +* ESQL: Renamed `TopList` to Top {es-pull}110347[#110347] +* ESQL: Union Types Support {es-pull}107545[#107545] (issue: {es-issue}100603[#100603]) +* ESQL: add REPEAT string function {es-pull}109220[#109220] +* ES|QL Add primitive float support to the Compute Engine {es-pull}109746[#109746] (issue: {es-issue}109178[#109178]) +* ES|QL Add primitive float variants of all aggregators to the compute engine {es-pull}109781[#109781] +* ES|QL: vectorize eval {es-pull}109332[#109332] +* Optimize ST_DISTANCE filtering with Lucene circle intersection query {es-pull}110102[#110102] (issue: {es-issue}109972[#109972]) +* Optimize for single value in ordinals grouping {es-pull}108118[#108118] +* Rewrite away type converting functions that do not convert types {es-pull}108713[#108713] (issue: {es-issue}107716[#107716]) +* ST_DISTANCE Function {es-pull}108764[#108764] (issue: {es-issue}108212[#108212]) +* Support metrics counter types in ESQL {es-pull}107877[#107877] +* [ESQL] CBRT function {es-pull}108574[#108574] +* [ES|QL] Convert string to datetime when the other size of an arithmetic operator is `date_period` or `time_duration` {es-pull}108455[#108455] +* [ES|QL] Support Named and Positional Parameters in `EsqlQueryRequest` {es-pull}108421[#108421] (issue: {es-issue}107029[#107029]) +* [ES|QL] `weighted_avg` {es-pull}109993[#109993] + +Engine:: +* Drop shards close timeout when stopping node. {es-pull}107978[#107978] (issue: {es-issue}107938[#107938]) +* Update translog `writeLocation` for `flushListener` after commit {es-pull}109603[#109603] + +Geo:: +* Optimize `GeoBounds` and `GeoCentroid` aggregations for single value fields {es-pull}107663[#107663] + +Health:: +* Log details of non-green indicators in `HealthPeriodicLogger` {es-pull}108266[#108266] + +Highlighting:: +* Unified Highlighter to support matched_fields {es-pull}107640[#107640] (issue: {es-issue}5172[#5172]) + +Infra/Core:: +* Add allocation explain output for THROTTLING shards {es-pull}109563[#109563] +* Create custom parser for ISO-8601 datetimes {es-pull}106486[#106486] (issue: {es-issue}102063[#102063]) +* Extend ISO8601 datetime parser to specify forbidden fields, allowing it to be used on more formats {es-pull}108606[#108606] +* add Elastic-internal stable bridge api for use by Logstash {es-pull}108171[#108171] + +Infra/Metrics:: +* Add auto-sharding APM metrics {es-pull}107593[#107593] +* Add request metric to `RestController` to track success/failure (by status code) {es-pull}109957[#109957] +* Allow RA metrics to be reported upon parsing completed or accumulated {es-pull}108726[#108726] +* Provide the `DocumentSizeReporter` with index mode {es-pull}108947[#108947] +* Return noop instance `DocSizeObserver` for updates with scripts {es-pull}108856[#108856] + +Ingest Node:: +* Add `continent_code` support to the geoip processor {es-pull}108780[#108780] (issue: {es-issue}85820[#85820]) +* Add support for the 'Connection Type' database to the geoip processor {es-pull}108683[#108683] +* Add support for the 'Domain' database to the geoip processor {es-pull}108639[#108639] +* Add support for the 'ISP' database to the geoip processor {es-pull}108651[#108651] +* Adding `hits_time_in_millis` and `misses_time_in_millis` to enrich cache stats {es-pull}107579[#107579] +* Adding `user_type` support for the enterprise database for the geoip processor {es-pull}108687[#108687] +* Adding human readable times to geoip stats {es-pull}107647[#107647] +* Include doc size info in ingest stats {es-pull}107240[#107240] (issue: {es-issue}106386[#106386]) +* Make ingest byte stat names more descriptive {es-pull}108786[#108786] +* Return ingest byte stats even when 0-valued {es-pull}108796[#108796] +* Test pipeline run after reroute {es-pull}108693[#108693] + +Logs:: +* Introduce a node setting controlling the activation of the `logs` index mode in logs@settings component template {es-pull}109025[#109025] (issue: {es-issue}108762[#108762]) +* Support index sorting with nested fields {es-pull}110251[#110251] (issue: {es-issue}107349[#107349]) + +Machine Learning:: +* Add Anthropic messages integration to Inference API {es-pull}109893[#109893] +* Add `sparse_vector` query {es-pull}108254[#108254] +* Add model download progress to the download task status {es-pull}107676[#107676] +* Add rate limiting support for the Inference API {es-pull}107706[#107706] +* Add the rerank task to the Elasticsearch internal inference service {es-pull}108452[#108452] +* Default the HF service to cosine similarity {es-pull}109967[#109967] +* GA the update trained model action {es-pull}108868[#108868] +* Handle the "JSON memory allocator bytes" field {es-pull}109653[#109653] +* Inference Processor: skip inference when all fields are missing {es-pull}108131[#108131] +* Log 'No statistics at.. ' message as a warning {ml-pull}2684[#2684] +* Optimise frequent item sets aggregation for single value fields {es-pull}108130[#108130] +* Sentence Chunker {es-pull}110334[#110334] +* [Inference API] Add Amazon Bedrock Support to Inference API {es-pull}110248[#110248] +* [Inference API] Add Mistral Embeddings Support to Inference API {es-pull}109194[#109194] +* [Inference API] Check for related pipelines on delete inference endpoint {es-pull}109123[#109123] + +Mapping:: +* Add ignored field values to synthetic source {es-pull}107567[#107567] +* Apply FLS to the contents of `IgnoredSourceFieldMapper` {es-pull}109931[#109931] +* Binary field enables doc values by default for index mode with synthe… {es-pull}107739[#107739] (issue: {es-issue}107554[#107554]) +* Feature/annotated text store defaults {es-pull}107922[#107922] (issue: {es-issue}107734[#107734]) +* Handle `ignore_above` in synthetic source for flattened fields {es-pull}110214[#110214] +* Opt in keyword field into fallback synthetic source if needed {es-pull}110016[#110016] +* Opt in number fields into fallback synthetic source when doc values a… {es-pull}110160[#110160] +* Reflect latest changes in synthetic source documentation {es-pull}109501[#109501] +* Store source for fields in objects with `dynamic` override {es-pull}108911[#108911] +* Store source for nested objects {es-pull}108818[#108818] +* Support synthetic source for `geo_point` when `ignore_malformed` is used {es-pull}109651[#109651] +* Support synthetic source for `scaled_float` and `unsigned_long` when `ignore_malformed` is used {es-pull}109506[#109506] +* Support synthetic source for date fields when `ignore_malformed` is used {es-pull}109410[#109410] +* Support synthetic source together with `ignore_malformed` in histogram fields {es-pull}109882[#109882] +* Track source for arrays of objects {es-pull}108417[#108417] (issue: {es-issue}90708[#90708]) +* Track synthetic source for disabled objects {es-pull}108051[#108051] + +Network:: +* Detect long-running tasks on network threads {es-pull}109204[#109204] + +Ranking:: +* Enabling profiling for `RankBuilders` and adding tests for RRF {es-pull}109470[#109470] + +Relevance:: +* [Query Rules] Add API calls to get or delete individual query rules within a ruleset {es-pull}109554[#109554] +* [Query Rules] Require Enterprise License for Query Rules {es-pull}109634[#109634] + +Search:: +* Add AVX-512 optimised vector distance functions for int7 on x64 {es-pull}109084[#109084] +* Add `SparseVectorStats` {es-pull}108793[#108793] +* Add `_name` support for top level `knn` clauses {es-pull}107645[#107645] (issues: {es-issue}106254[#106254], {es-issue}107448[#107448]) +* Add a SIMD (AVX2) optimised vector distance function for int7 on x64 {es-pull}108088[#108088] +* Add min/max range of the `event.ingested` field to cluster state for searchable snapshots {es-pull}106252[#106252] +* Add per-field KNN vector format to Index Segments API {es-pull}107216[#107216] +* Add support for hiragana_uppercase & katakana_uppercase token filters in kuromoji analysis plugin {es-pull}106553[#106553] +* Adding support for explain in rrf {es-pull}108682[#108682] +* Allow rescorer with field collapsing {es-pull}107779[#107779] (issue: {es-issue}27243[#27243]) +* Limit the value in prefix query {es-pull}108537[#108537] (issue: {es-issue}108486[#108486]) +* Make dense vector field type updatable {es-pull}106591[#106591] +* Multivalue Sparse Vector Support {es-pull}109007[#109007] + +Security:: +* Add bulk delete roles API {es-pull}110383[#110383] +* Remote cluster - API key security model - cluster privileges {es-pull}107493[#107493] + +Snapshot/Restore:: +* Denser in-memory representation of `ShardBlobsToDelete` {es-pull}109848[#109848] +* Log repo UUID at generation/registration time {es-pull}109672[#109672] +* Make repository analysis API available to non-operators {es-pull}110179[#110179] (issue: {es-issue}100318[#100318]) +* Track `RequestedRangeNotSatisfiedException` separately in S3 Metrics {es-pull}109657[#109657] + +Stats:: +* DocsStats: Add human readable bytesize {es-pull}109720[#109720] + +TSDB:: +* Optimise `time_series` aggregation for single value fields {es-pull}107990[#107990] +* Support `ignore_above` on keyword dimensions {es-pull}110337[#110337] + +Vector Search:: +* Adding hamming distance function to painless for `dense_vector` fields {es-pull}109359[#109359] +* Support k parameter for knn query {es-pull}110233[#110233] (issue: {es-issue}108473[#108473]) + +[[feature-8.15.0]] +[float] +=== New features + +Aggregations:: +* Opt `scripted_metric` out of parallelization {es-pull}109597[#109597] + +Application:: +* [Connector API] Add claim sync job endpoint {es-pull}109480[#109480] + +ES|QL:: +* ESQL: Add `ip_prefix` function {es-pull}109070[#109070] (issue: {es-issue}99064[#99064]) +* ESQL: Introduce a casting operator, `::` {es-pull}107409[#107409] +* ESQL: `top_list` aggregation {es-pull}109386[#109386] (issue: {es-issue}109213[#109213]) +* ESQL: add Arrow dataframes output format {es-pull}109873[#109873] +* Reapply "ESQL: Expose "_ignored" metadata field" {es-pull}108871[#108871] + +Infra/REST API:: +* Add a capabilities API to check node and cluster capabilities {es-pull}106820[#106820] + +Ingest Node:: +* Directly download commercial ip geolocation databases from providers {es-pull}110844[#110844] +* Mark the Redact processor as Generally Available {es-pull}110395[#110395] + +Logs:: +* Introduce logs index mode as Tech Preview {es-pull}108896[#108896] (issue: {es-issue}108896[#108896]) + +Machine Learning:: +* Add support for Azure AI Studio embeddings and completions to the inference service. {es-pull}108472[#108472] + +Mapping:: +* Add `semantic_text` field type and `semantic` query {es-pull}110338[#110338] +* Add generic fallback implementation for synthetic source {es-pull}108222[#108222] +* Add synthetic source support for `geo_shape` via fallback implementation {es-pull}108881[#108881] +* Add synthetic source support for binary fields {es-pull}107549[#107549] +* Enable fallback synthetic source by default {es-pull}109370[#109370] (issue: {es-issue}106460[#106460]) +* Enable fallback synthetic source for `point` and `shape` {es-pull}109312[#109312] +* Enable fallback synthetic source for `token_count` {es-pull}109044[#109044] +* Implement synthetic source support for annotated text field {es-pull}107735[#107735] +* Implement synthetic source support for range fields {es-pull}107081[#107081] +* Support arrays in fallback synthetic source implementation {es-pull}108878[#108878] +* Support synthetic source for `aggregate_metric_double` when ignore_malf… {es-pull}108746[#108746] + +Ranking:: +* Add text similarity reranker retriever {es-pull}109813[#109813] + +Relevance:: +* Mark Query Rules as GA {es-pull}110004[#110004] + +Search:: +* Add new int4 quantization to dense_vector {es-pull}109317[#109317] +* Adding RankFeature search phase implementation {es-pull}108538[#108538] +* Adding aggregations support for the `_ignored` field {es-pull}101373[#101373] (issue: {es-issue}59946[#59946]) +* Update Lucene version to 9.11 {es-pull}109219[#109219] + +Security:: +* Query Roles API {es-pull}108733[#108733] + +Transform:: +* Introduce _transform/_node_stats API {es-pull}107279[#107279] + +Vector Search:: +* Adds new `bit` `element_type` for `dense_vectors` {es-pull}110059[#110059] + +[[upgrade-8.15.0]] +[float] +=== Upgrades + +Infra/Plugins:: +* Update ASM to 9.7 for plugin scanner {es-pull}108822[#108822] (issue: {es-issue}108776[#108776]) + +Ingest Node:: +* Bump Tika dependency to 2.9.2 {es-pull}108144[#108144] + +Network:: +* Upgrade to Netty 4.1.109 {es-pull}108155[#108155] + +Search:: +* Upgrade to Lucene-9.11.1 {es-pull}110234[#110234] + +Security:: +* Upgrade bouncy castle (non-fips) to 1.78.1 {es-pull}108223[#108223] + +Snapshot/Restore:: +* Bump jackson version in modules:repository-azure {es-pull}109717[#109717] + + diff --git a/docs/reference/release-notes/8.15.1.asciidoc b/docs/reference/release-notes/8.15.1.asciidoc new file mode 100644 index 0000000000000..5abca33c3dcb0 --- /dev/null +++ b/docs/reference/release-notes/8.15.1.asciidoc @@ -0,0 +1,107 @@ +[[release-notes-8.15.1]] +== {es} version 8.15.1 + +Also see <>. + +[[known-issues-8.15.1]] +[float] +=== Known issues +* Elasticsearch will not start if custom role mappings are configured using the +`xpack.security.authc.realms.*.files.role_mapping` configuration option. As a workaround, custom role mappings +can be configured using the https://www.elastic.co/guide/en/elasticsearch/reference/current/security-api-put-role-mapping.html[REST API] (issue: {es-issue}112503[#112503]) + +* ES|QL queries can lead to node crashes due to Out Of Memory errors when: +** Multiple indices match the query pattern +** These indices have many conflicting field mappings +** Many of those fields are included in the request +These issues deplete heap memory, increasing the likelihood of OOM errors. (issue: {es-issue}111964[#111964], {es-issue}111358[#111358]). +In Kibana, you might indirectly execute these queries when using Discover, or adding a Field Statistics panel to a dashboard. ++ +To work around this issue, you have a number of options: +** Downgrade to an earlier version +** Upgrade to 8.15.2 upon release +** Follow the instructions to +<> +** Change the default data view in Discover to a smaller set of indices and/or one with fewer mapping conflicts. + +* Index Stats, Node Stats and Cluster Stats API can return a null pointer exception if an index contains a `dense_vector` field +but there is an index segment that does not contain any documents with a dense vector field ({es-pull}112720[#112720]). Workarounds: +** If the affected index already contains documents with a dense vector field, force merge the index to a single segment. +** If the affected index does not already contain documents with a dense vector field, index a document with a dense vector field +and then force merge to a single segment. +** If the affected index's `dense_vector` fields are unused, reindex without the `dense_vector` fields. + +* Synthetic source bug. Synthetic source may fail generating the _source at runtime, causing failures in get APIs or +partial failures in the search APIs. The result is that for the affected documents the _source can't be retrieved. +There is no workaround and the only option to is to upgrade to 8.15.2 when released. ++ +If you use synthetic source then you may be affected by this bug if the following is true: +** If you have more fields then the `index.mapping.total_fields.limit` setting allows. +** If you use dynamic mappings and the `index.mapping.total_fields.ignore_dynamic_beyond_limit` setting is enabled. + +[[bug-8.15.1]] +[float] +=== Bug fixes + +Aggregations:: +* Revert "Avoid bucket copies in Aggs" {es-pull}111758[#111758] (issue: {es-issue}111679[#111679]) + +Authorization:: +* Fix DLS over Runtime Fields {es-pull}112260[#112260] (issue: {es-issue}111637[#111637]) + +ES|QL:: +* Avoid losing error message in failure collector {es-pull}111983[#111983] (issue: {es-issue}111894[#111894]) +* Avoid wrapping rejection exception in exchange {es-pull}112178[#112178] (issue: {es-issue}112106[#112106]) +* ESQL: Fix for overzealous validation in case of invalid mapped fields {es-pull}111475[#111475] (issue: {es-issue}111452[#111452]) + +Geo:: +* Add maximum nested depth check to WKT parser {es-pull}111843[#111843] +* Always check `crsType` when folding spatial functions {es-pull}112090[#112090] (issue: {es-issue}112089[#112089]) +* Fix NPE when executing doc value queries over shape geometries with empty segments {es-pull}112139[#112139] + +Indices APIs:: +* Fix template alias parsing livelock {es-pull}112217[#112217] + +Inference:: +* Fix the bug where the run() function of ExecutableInferenceRequest throws an exception when get inferenceEntityId. {es-pull}112135[#112135] + +Infra/Core:: +* Fix toReleaseVersion() when called on the current version id {es-pull}112242[#112242] (issue: {es-issue}111900[#111900]) +* Fix windows memory locking {es-pull}111866[#111866] (issue: {es-issue}111847[#111847]) + +Ingest Node:: +* Fixing incorrect bulk request took time {es-pull}111863[#111863] (issue: {es-issue}111854[#111854]) +* Improve performance of grok pattern cycle detection {es-pull}111947[#111947] + +Logs:: +* Merge multiple ignored source entires for the same field {es-pull}111994[#111994] (issue: {es-issue}111694[#111694]) + +Machine Learning:: +* [Inference API] Move Delete inference checks to threadpool worker {es-pull}111646[#111646] + +Mapping:: +* Check for valid `parentDoc` before retrieving its previous {es-pull}112005[#112005] (issue: {es-issue}111990[#111990]) +* Fix calculation of parent offset for ignored source in some cases {es-pull}112046[#112046] +* Fix synthetic source for empty nested objects {es-pull}111943[#111943] (issue: {es-issue}111811[#111811]) +* No error when `store_array_source` is used without synthetic source {es-pull}111966[#111966] +* Prevent synthetic field loaders accessing stored fields from using stale data {es-pull}112173[#112173] (issue: {es-issue}112156[#112156]) + +Ranking:: +* Properly handle filters on `TextSimilarityRank` retriever {es-pull}111673[#111673] + +Relevance:: +* Semantic reranking should fail whenever inference ID does not exist {es-pull}112038[#112038] (issue: {es-issue}111934[#111934]) +* [Bugfix] Add `accessDeclaredMembers` permission to allow search application templates to parse floats {es-pull}111285[#111285] + +Search:: +* Explain Function Score Query {es-pull}111807[#111807] + +Security:: +* Fix "unexpected field [remote_cluster]" for CCS (RCS 1.0) when using API key that references `remote_cluster` {es-pull}112226[#112226] +* Fix connection timeout for `OpenIdConnectAuthenticator` get Userinfo {es-pull}112230[#112230] + +Vector Search:: +* Fix `NullPointerException` when doing knn search on empty index without dims {es-pull}111756[#111756] (issue: {es-issue}111733[#111733]) +* Speed up dense/sparse vector stats {es-pull}111729[#111729] (issue: {es-issue}111715[#111715]) + + diff --git a/docs/reference/release-notes/8.15.2.asciidoc b/docs/reference/release-notes/8.15.2.asciidoc new file mode 100644 index 0000000000000..d2853215098cd --- /dev/null +++ b/docs/reference/release-notes/8.15.2.asciidoc @@ -0,0 +1,44 @@ +[[release-notes-8.15.2]] +== {es} version 8.15.2 + +Also see <>. + +[[bug-8.15.2]] +[float] +=== Bug fixes + +Authorization:: +* Fix remote cluster credential secure settings reload {es-pull}111535[#111535] + +ES|QL:: +* ESQL: Don't mutate the `BoolQueryBuilder` in plan {es-pull}111519[#111519] +* ES|QL: Fix `ResolvedEnrichPolicy` serialization (bwc) in v 8.15 {es-pull}112985[#112985] (issue: {es-issue}112968[#112968]) +* Fix union-types where one index is missing the field {es-pull}111932[#111932] (issue: {es-issue}111912[#111912]) +* Support widening of numeric types in union-types {es-pull}112610[#112610] (issue: {es-issue}111277[#111277]) +* Shorten error messages for UnsupportedAttributes {es-pull}112819[#112819] +* Reduce memory footprint of serialized query execution plans {es-pull}112865[#112865] + +Infra/Core:: +* JSON parse failures should be 4xx codes {es-pull}112703[#112703] +* Json parsing exceptions should not cause 500 errors {es-pull}111548[#111548] (issue: {es-issue}111542[#111542]) +* Make sure file accesses in `DnRoleMapper` are done in stack frames with permissions {es-pull}112400[#112400] + +Ingest Node:: +* Fix missing header in `put_geoip_database` JSON spec {es-pull}112581[#112581] + +Logs:: +* Fix encoding of dynamic arrays in ignored source {es-pull}112713[#112713] + +Mapping:: +* Full coverage of ECS by ecs@mappings when `date_detection` is disabled {es-pull}112444[#112444] (issue: {es-issue}112398[#112398]) + +Search:: +* Fix parsing error in `_terms_enum` API {es-pull}112872[#112872] (issue: {es-issue}94378[#94378]) + +Security:: +* Allowlist `tracestate` header on remote server port {es-pull}112649[#112649] + +Vector Search:: +* Fix NPE in `dense_vector` stats {es-pull}112720[#112720] + + diff --git a/docs/reference/release-notes/highlights.asciidoc b/docs/reference/release-notes/highlights.asciidoc index ead1596c64fdd..c33c882428a7c 100644 --- a/docs/reference/release-notes/highlights.asciidoc +++ b/docs/reference/release-notes/highlights.asciidoc @@ -32,17 +32,88 @@ endif::[] // tag::notable-highlights[] [discrete] -[[stored_fields_are_compressed_with_zstandard_instead_of_lz4_deflate]] -=== Stored fields are now compressed with ZStandard instead of LZ4/DEFLATE -Stored fields are now compressed by splitting documents into blocks, which -are then compressed independently with ZStandard. `index.codec: default` -(default) uses blocks of at most 14kB or 128 documents compressed with level -0, while `index.codec: best_compression` uses blocks of at most 240kB or -2048 documents compressed at level 3. On most datasets that we tested -against, this yielded storage improvements in the order of 10%, slightly -faster indexing and similar retrieval latencies. - -{es-pull}103374[#103374] +[[stricter_failure_handling_in_multi_repo_get_snapshots_request_handling]] +=== Stricter failure handling in multi-repo get-snapshots request handling +If a multi-repo get-snapshots request encounters a failure in one of the +targeted repositories then earlier versions of Elasticsearch would proceed +as if the faulty repository did not exist, except for a per-repository +failure report in a separate section of the response body. This makes it +impossible to paginate the results properly in the presence of failures. In +versions 8.15.0 and later this API's failure handling behaviour has been +made stricter, reporting an overall failure if any targeted repository's +contents cannot be listed. + +{es-pull}107191[#107191] + +[discrete] +[[introduce_logs_index_mode_as_tech_preview]] +=== Introduce `logsdb` index mode as Tech Preview +This change introduces a new index mode named `logsdb`. +When the new index mode is enabled then the following storage savings features are enabled automatically: + +* Synthetic source, which omits storing the `_source` field. When `_source` or part of it is requested it is synthesized on the fly at runtime. +* Index sorting. By default indices are sorted by `host.name` and `@timestamp` fields at index time. This can be overwritten if other sorting fields yield better compression rate. +* Enable more space efficient compression for fields with doc values enabled. These are the same codecs used + when `time_series` index mode is enabled. + +The `index.mode` index setting set to `logsdb` should be configured in index templates or defined when creating a plain index. +Benchmarks and other tests have shown that logs data sets use around 2.5 times less storage with the new index mode enabled compared to not configuring it. +The new `logsdb` index mode is a tech preview feature. + +{es-pull}108896[#108896] + +[discrete] +[[add_new_int4_quantization_to_dense_vector]] +=== Add new int4 quantization to dense_vector +New int4 (half-byte) scalar quantization support via two knew index types: `int4_hnsw` and `int4_flat`. +This gives an 8x reduction from `float32` with some accuracy loss. In addition to less memory required, this +improves query and merge speed significantly when compared to raw vectors. + +{es-pull}109317[#109317] + +[discrete] +[[mark_query_rules_as_ga]] +=== Mark Query Rules as GA +This PR marks query rules as Generally Available. All APIs are no longer +in tech preview. + +{es-pull}110004[#110004] + +[discrete] +[[adds_new_bit_element_type_for_dense_vectors]] +=== Adds new `bit` `element_type` for `dense_vectors` +This adds `bit` vector support by adding `element_type: bit` for +vectors. This new element type works for indexed and non-indexed +vectors. Additionally, it works with `hnsw` and `flat` index types. No +quantization based codec works with this element type, this is +consistent with `byte` vectors. + +`bit` vectors accept up to `32768` dimensions in size and expect vectors +that are being indexed to be encoded either as a hexidecimal string or a +`byte[]` array where each element of the `byte` array represents `8` +bits of the vector. + +`bit` vectors support script usage and regular query usage. When +indexed, all comparisons done are `xor` and `popcount` summations (aka, +hamming distance), and the scores are transformed and normalized given +the vector dimensions. + +For scripts, `l1norm` is the same as `hamming` distance and `l2norm` is +`sqrt(l1norm)`. `dotProduct` and `cosineSimilarity` are not supported. + +Note, the dimensions expected by this element_type are always to be +divisible by `8`, and the `byte[]` vectors provided for index must be +have size `dim/8` size, where each byte element represents `8` bits of +the vectors. + +{es-pull}110059[#110059] + +[discrete] +[[redact_processor_generally_available]] +=== The Redact processor is Generally Available +The Redact processor uses the Grok rules engine to obscure text in the input document matching the given Grok patterns. The Redact processor was initially released as Technical Preview in `8.7.0`, and is now released as Generally Available. + +{es-pull}110395[#110395] // end::notable-highlights[] @@ -61,6 +132,17 @@ set the JVM property `es.datetime.java_time_parsers=true` on all ES nodes. {es-pull}106486[#106486] +[discrete] +[[new_custom_parser_for_more_iso_8601_date_formats]] +=== New custom parser for more ISO-8601 date formats +Following on from #106486, this extends the custom ISO-8601 datetime parser to cover the `strict_year`, +`strict_year_month`, `strict_date_time`, `strict_date_time_no_millis`, `strict_date_hour_minute_second`, +`strict_date_hour_minute_second_millis`, and `strict_date_hour_minute_second_fraction` date formats. +As before, the parser will use the existing java.time parser if there are parsing issues, and the +`es.datetime.java_time_parsers=true` JVM property will force the use of the old parsers regardless. + +{es-pull}108606[#108606] + [discrete] [[preview_support_for_connection_type_domain_isp_databases_in_geoip_processor]] === Preview: Support for the 'Connection Type, 'Domain', and 'ISP' databases in the geoip processor @@ -86,3 +168,23 @@ But, here are some particular highlights: {es-pull}109219[#109219] +[discrete] +[[synthetic_source_improvements]] +=== Synthetic `_source` improvements +There are multiple improvements to synthetic `_source` functionality: + +* Synthetic `_source` is now supported for all field types including `nested` and `object`. `object` fields are supported with `enabled` set to `false`. + +* Synthetic `_source` can be enabled together with `ignore_malformed` and `ignore_above` parameters for all field types that support them. + +{es-pull}109501[#109501] + +[discrete] +[[index_sorting_on_indexes_with_nested_fields]] +=== Index sorting on indexes with nested fields +Index sorting is now supported for indexes with mappings containing nested objects. +The index sort spec (as specified by `index.sort.field`) can't contain any nested +fields, still. + +{es-pull}110251[#110251] + diff --git a/docs/reference/reranking/index.asciidoc b/docs/reference/reranking/index.asciidoc new file mode 100644 index 0000000000000..3171be7c872d4 --- /dev/null +++ b/docs/reference/reranking/index.asciidoc @@ -0,0 +1,70 @@ +[[re-ranking-overview]] += Re-ranking + +Many search systems are built on multi-stage retrieval pipelines. + +Earlier stages use cheap, fast algorithms to find a broad set of possible matches. + +Later stages use more powerful models, often machine learning-based, to reorder the documents. +This step is called re-ranking. +Because the resource-intensive model is only applied to the smaller set of pre-filtered results, this approach returns more relevant results while still optimizing for search performance and computational costs. + +{es} supports various ranking and re-ranking techniques to optimize search relevance and performance. + +[float] +[[re-ranking-two-stage-pipeline]] +== Two-stage retrieval pipelines + + +[float] +[[re-ranking-first-stage-pipeline]] +=== Initial retrieval + +[float] +[[re-ranking-ranking-overview-bm25]] +==== Full-text search: BM25 scoring + +{es} ranks documents based on term frequency and inverse document frequency, adjusted for document length. +https://en.wikipedia.org/wiki/Okapi_BM25[BM25] is the default statistical scoring algorithm in {es}. + +[float] +[[re-ranking-ranking-overview-vector]] +==== Vector search: similarity scoring + +Vector search involves transforming data into dense or sparse vector embeddings to capture semantic meanings, and computing similarity scores for query vectors. +Store vectors using `semantic_text` fields for automatic inference and vectorization or `dense_vector` and `sparse_vector` fields when you need more control over the underlying embedding model. +Query vector fields with `semantic`, `knn` or `sparse_vector` queries to compute similarity scores. +Refer to <> for more information. + +[float] +[[re-ranking-ranking-overview-hybrid]] +==== Hybrid techniques + +Hybrid search techniques combine results from full-text and vector search pipelines. +{es} enables combining lexical matching (BM25) and vector search scores using the <> algorithm. + +[float] +[[re-ranking-overview-second-stage]] +=== Re-ranking + +When using the following advanced re-ranking pipelines, first-stage retrieval mechanisms effectively generate a set of candidates. +These candidates are funneled into the re-ranker to perform more computationally expensive re-ranking tasks. + +[float] +[[re-ranking-overview-semantic]] +==== Semantic re-ranking + +<> uses machine learning models to reorder search results based on their semantic similarity to a query. +Models can be hosted directly in your {es} cluster, or you can use <> to call models provided by third-party services. +Semantic re-ranking enables out-of-the-box semantic search capabilities on existing full-text search indices. + +[float] +[[re-ranking-overview-ltr]] +==== Learning to Rank (LTR) + +<> is for advanced users. +Learning To Rank involves training a machine learning model to build a ranking function for your search experience that updates over time. +LTR is best suited for when you have ample training data and need highly customized relevance tuning. + +include::semantic-reranking.asciidoc[] +include::learning-to-rank.asciidoc[] diff --git a/docs/reference/search/search-your-data/learning-to-rank-model-training.asciidoc b/docs/reference/reranking/learning-to-rank-model-training.asciidoc similarity index 96% rename from docs/reference/search/search-your-data/learning-to-rank-model-training.asciidoc rename to docs/reference/reranking/learning-to-rank-model-training.asciidoc index 6525147839412..0f4640ebdf347 100644 --- a/docs/reference/search/search-your-data/learning-to-rank-model-training.asciidoc +++ b/docs/reference/reranking/learning-to-rank-model-training.asciidoc @@ -4,8 +4,6 @@ Deploy and manage LTR models ++++ -preview::["The Learning To Rank feature is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but this feature is not subject to the support SLA of official GA features."] - NOTE: This feature was introduced in version 8.12.0 and is only available to certain subscription levels. For more information, see {subscriptions}. diff --git a/docs/reference/search/search-your-data/learning-to-rank-search-usage.asciidoc b/docs/reference/reranking/learning-to-rank-search-usage.asciidoc similarity index 91% rename from docs/reference/search/search-your-data/learning-to-rank-search-usage.asciidoc rename to docs/reference/reranking/learning-to-rank-search-usage.asciidoc index 2e9693eff0451..f14219e24bc11 100644 --- a/docs/reference/search/search-your-data/learning-to-rank-search-usage.asciidoc +++ b/docs/reference/reranking/learning-to-rank-search-usage.asciidoc @@ -4,8 +4,6 @@ Search using LTR ++++ -preview::["The Learning To Rank feature is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but this feature is not subject to the support SLA of official GA features."] - NOTE: This feature was introduced in version 8.12.0 and is only available to certain subscription levels. For more information, see {subscriptions}. diff --git a/docs/reference/search/search-your-data/learning-to-rank.asciidoc b/docs/reference/reranking/learning-to-rank.asciidoc similarity index 96% rename from docs/reference/search/search-your-data/learning-to-rank.asciidoc rename to docs/reference/reranking/learning-to-rank.asciidoc index 08fad9db9c0f6..f49f7074d2dbc 100644 --- a/docs/reference/search/search-your-data/learning-to-rank.asciidoc +++ b/docs/reference/reranking/learning-to-rank.asciidoc @@ -1,8 +1,6 @@ [[learning-to-rank]] == Learning To Rank -preview::["The Learning To Rank feature is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but this feature is not subject to the support SLA of official GA features."] - NOTE: This feature was introduced in version 8.12.0 and is only available to certain subscription levels. For more information, see {subscriptions}. @@ -133,4 +131,4 @@ In the next pages of this guide you will learn to: * <> include::learning-to-rank-model-training.asciidoc[] -include::learning-to-rank-search-usage.asciidoc[] +include::learning-to-rank-search-usage.asciidoc[] \ No newline at end of file diff --git a/docs/reference/reranking/semantic-reranking.asciidoc b/docs/reference/reranking/semantic-reranking.asciidoc new file mode 100644 index 0000000000000..32b906c0c8449 --- /dev/null +++ b/docs/reference/reranking/semantic-reranking.asciidoc @@ -0,0 +1,137 @@ +[[semantic-reranking]] +== Semantic re-ranking + +preview::[] + +[TIP] +==== +This overview focuses more on the high-level concepts and use cases for semantic re-ranking. For full implementation details on how to set up and use semantic re-ranking in {es}, see the <> in the Search API docs. +==== + +Re-rankers improve the relevance of results from earlier-stage retrieval mechanisms. +_Semantic_ re-rankers use machine learning models to reorder search results based on their semantic similarity to a query. + +Semantic re-ranking requires relatively large and complex machine learning models and operates in real-time in response to queries. +This technique makes sense on a small _top-k_ result set, as one the of the final steps in a pipeline. +This is a powerful technique for improving search relevance that works equally well with keyword, semantic, or hybrid retrieval algorithms. + +The next sections provide more details on the benefits, use cases, and model types used for semantic re-ranking. +The final sections include a practical, high-level overview of how to implement <> and links to the full reference documentation. + +[discrete] +[[semantic-reranking-use-cases]] +=== Use cases + +Semantic re-ranking enables a variety of use cases: + +* *Lexical (BM25) retrieval results re-ranking* +** Out-of-the-box semantic search by adding a simple API call to any lexical/BM25 retrieval pipeline. +** Adds semantic search capabilities on top of existing indices without reindexing, perfect for quick improvements. +** Ideal for environments with complex existing indices. + +* *Semantic retrieval results re-ranking* +** Improves results from semantic retrievers using ELSER sparse vector embeddings or dense vector embeddings by using more powerful models. + +* *General applications* +** Supports automatic and transparent chunking, eliminating the need for pre-chunking at index time. +** Provides explicit control over document relevance in retrieval-augmented generation (RAG) uses cases or other scenarios involving language model (LLM) inputs. + +Now that we've outlined the value of semantic re-ranking, we'll explore the specific models that power this process and how they differ. + +[discrete] +[[semantic-reranking-models]] +=== Cross-encoder and bi-encoder models + +At a high level, two model types are used for semantic re-ranking: cross-encoders and bi-encoders. + +NOTE: In this version, {es} *only supports cross-encoders* for semantic re-ranking. + +* A *cross-encoder model* can be thought of as a more powerful, all-in-one solution, because it generates query-aware document representations. +It takes the query and document texts as a single, concatenated input. +* A *bi-encoder model* takes as input either document or query text. +Documents and query embeddings are computed separately, so they aren't aware of each other. +** To compute a ranking score, an external operation is required. This typically involves computing dot-product or cosine similarity between the query and document embeddings. + +In brief, cross-encoders provide high accuracy but are more resource-intensive. +Bi-encoders are faster and more cost-effective but less precise. + +In future versions, {es} will also support bi-encoders. +If you're interested in a more detailed analysis of the practical differences between cross-encoders and bi-encoders, untoggle the next section. + +.Comparisons between cross-encoder and bi-encoder +[%collapsible] +============== +The following is a non-exhaustive list of considerations when choosing between cross-encoders and bi-encoders for semantic re-ranking: + +* Because a cross-encoder model simultaneously processes both query and document texts, it can better infer their relevance, making it more effective as a reranker than a bi-encoder. +* Cross-encoder models are generally larger and more computationally intensive, resulting in higher latencies and increased computational costs. +* There are significantly fewer open-source cross-encoders, while bi-encoders offer a wide variety of sizes, languages, and other trade-offs. +* The effectiveness of cross-encoders can also improve the relevance of semantic retrievers. +For example, their ability to take word order into account can improve on dense or sparse embedding retrieval. +* When trained in tandem with specific retrievers (like lexical/BM25), cross-encoders can “correct” typical errors made by those retrievers. +* Cross-encoders output scores that are consistent across queries. +This enables you to maintain high relevance in result sets, by setting a minimum score threshold for all queries. +For example, this is important when using results in a RAG workflow or if you're otherwise feeding results to LLMs. +Note that similarity scores from bi-encoders/embedding similarities are _query-dependent_, meaning you cannot set universal cut-offs. +* Bi-encoders rerank using embeddings. You can improve your re-ranking latency by creating embeddings at ingest-time. These embeddings can be stored for re-ranking without being indexed for retrieval, reducing your memory footprint. +============== + +[discrete] +[[semantic-reranking-in-es]] +=== Semantic re-ranking in {es} + +In {es}, semantic re-rankers are implemented using the {es} <> and a <>. + +To use semantic re-ranking in {es}, you need to: + +. *Choose a re-ranking model*. +Currently you can: + +** Integrate directly with the <> using the `rerank` task type +** Integrate directly with the <> using the `rerank` task type +** Upload a model to {es} from Hugging Face with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland]. You'll need to use the `text_similarity` NLP task type when loading the model using Eland. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es} for semantic re-ranking. +*** Then set up an <> with the `rerank` task type +. *Create a `rerank` task using the <>*. +The Inference API creates an inference endpoint and configures your chosen machine learning model to perform the re-ranking task. +. *Define a `text_similarity_reranker` retriever in your search request*. +The retriever syntax makes it simple to configure both the retrieval and re-ranking of search results in a single API call. + +.*Example search request* with semantic reranker +[%collapsible] +============== +The following example shows a search request that uses a semantic reranker to reorder the top-k documents based on their semantic similarity to the query. +[source,console] +---- +POST _search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "match": { + "text": "How often does the moon hide the sun?" + } + } + } + }, + "field": "text", + "inference_id": "my-cohere-rerank-model", + "inference_text": "How often does the moon hide the sun?", + "rank_window_size": 100, + "min_score": 0.5 + } + } +} +---- +// TEST[skip:TBD] +============== + +[discrete] +[[semantic-reranking-learn-more]] +=== Learn more + +* Read the <> for syntax and implementation details +* Learn more about the <> abstraction +* Learn more about the Elastic <> +* Check out our https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/integrations/cohere/cohere-elasticsearch.ipynb[Python notebook] for using Cohere with {es} diff --git a/docs/reference/rest-api/common-parms.asciidoc b/docs/reference/rest-api/common-parms.asciidoc index 7c2e42a26b923..1437fb109cc21 100644 --- a/docs/reference/rest-api/common-parms.asciidoc +++ b/docs/reference/rest-api/common-parms.asciidoc @@ -1297,10 +1297,11 @@ tag::wait_for_active_shards[] `wait_for_active_shards`:: + -- -(Optional, string) The number of shard copies that must be active before -proceeding with the operation. Set to `all` or any positive integer up -to the total number of shards in the index (`number_of_replicas+1`). -Default: 1, the primary shard. +(Optional, string) The number of copies of each shard that must be active +before proceeding with the operation. Set to `all` or any non-negative integer +up to the total number of copies of each shard in the index +(`number_of_replicas+1`). Defaults to `1`, meaning to wait just for each +primary shard to be active. See <>. -- @@ -1326,13 +1327,21 @@ that lower ranked documents have more influence. This value must be greater than equal to `1`. Defaults to `60`. end::rrf-rank-constant[] -tag::rrf-window-size[] -`window_size`:: +tag::rrf-rank-window-size[] +`rank_window_size`:: (Optional, integer) + This value determines the size of the individual result sets per query. A higher value will improve result relevance at the cost of performance. The final ranked result set is pruned down to the search request's <>. -`window_size` must be greater than or equal to `size` and greater than or equal to `1`. +`rank_window_size` must be greater than or equal to `size` and greater than or equal to `1`. Defaults to the `size` parameter. -end::rrf-window-size[] +end::rrf-rank-window-size[] + +tag::rrf-filter[] +`filter`:: +(Optional, <>) ++ +Applies the specified <> to all of the specified sub-retrievers, +according to each retriever's specifications. +end::rrf-filter[] diff --git a/docs/reference/rest-api/security.asciidoc b/docs/reference/rest-api/security.asciidoc index 04cd838c45600..82cf38e52bd80 100644 --- a/docs/reference/rest-api/security.asciidoc +++ b/docs/reference/rest-api/security.asciidoc @@ -50,6 +50,7 @@ Use the following APIs to add, remove, update, and retrieve roles in the native * <> * <> * <> +* <> [discrete] [[security-token-apis]] @@ -192,6 +193,7 @@ include::security/get-app-privileges.asciidoc[] include::security/get-builtin-privileges.asciidoc[] include::security/get-role-mappings.asciidoc[] include::security/get-roles.asciidoc[] +include::security/query-role.asciidoc[] include::security/get-service-accounts.asciidoc[] include::security/get-service-credentials.asciidoc[] include::security/get-settings.asciidoc[] diff --git a/docs/reference/rest-api/security/get-roles.asciidoc b/docs/reference/rest-api/security/get-roles.asciidoc index 3eb5a735194c6..3cc2f95c6ea7e 100644 --- a/docs/reference/rest-api/security/get-roles.asciidoc +++ b/docs/reference/rest-api/security/get-roles.asciidoc @@ -38,7 +38,10 @@ API cannot retrieve roles that are defined in roles files. ==== {api-response-body-title} A successful call returns an array of roles with the JSON representation of the -role. +role. The returned role format is a simple extension of the <> format, +only adding an extra field `transient_metadata.enabled`. +This field is `false` in case the role is automatically disabled, for example when the license +level does not allow some permissions that the role grants. [[security-api-get-role-response-codes]] ==== {api-response-codes-title} diff --git a/docs/reference/rest-api/security/query-role.asciidoc b/docs/reference/rest-api/security/query-role.asciidoc new file mode 100644 index 0000000000000..937bd263140fc --- /dev/null +++ b/docs/reference/rest-api/security/query-role.asciidoc @@ -0,0 +1,283 @@ +[role="xpack"] +[[security-api-query-role]] +=== Query Role API + +++++ +Query Role +++++ + +Retrieves roles with <> in a <> fashion. + +[[security-api-query-role-request]] +==== {api-request-title} + +`GET /_security/_query/role` + +`POST /_security/_query/role` + +[[security-api-query-role-prereqs]] +==== {api-prereq-title} + +* To use this API, you must have at least the `read_security` cluster privilege. + +[[security-api-query-role-desc]] +==== {api-description-title} + +The role management APIs are generally the preferred way to manage roles, rather than using +<>. +The query roles API does not retrieve roles that are defined in roles files, nor <> ones. +You can optionally filter the results with a query. Also, the results can be paginated and sorted. + +[[security-api-query-role-request-body]] +==== {api-request-body-title} + +You can specify the following parameters in the request body: + +`query`:: +(Optional, string) A <> to filter which roles to return. +The query supports a subset of query types, including +<>, <>, +<>, <>, +<>, <>, +<>, <>, +<>, <>, +and <>. ++ +You can query the following values associated with a role. ++ +.Valid values for `query` +[%collapsible%open] +==== +`name`:: +(keyword) The <> of the role. + +`description`:: +(text) The <> of the role. + +`metadata`:: +(flattened) Metadata field associated with the <>, such as `metadata.app_tag`. +Note that metadata is internally indexed as a <> field type. +This means that all sub-fields act like `keyword` fields when querying and sorting. +It also implies that it is not possible to refer to a subset of metadata fields using wildcard patterns, +e.g. `metadata.field*`, even for query types that support field name patterns. +Lastly, all the metadata fields can be searched together when simply mentioning the +`metadata` field (i.e. not followed by any dot and sub-field name). + +`applications`:: +The list of <> that the role grants. + +`application`::: +(keyword) The name of the application associated to the privileges and resources. + +`privileges`::: +(keyword) The names of the privileges that the role grants. + +`resources`::: +(keyword) The resources to which the privileges apply. + +==== + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=from] ++ +By default, you cannot page through more than 10,000 hits using the `from` and +`size` parameters. To page through more hits, use the +<> parameter. + +`size`:: +(Optional, integer) The number of hits to return. Must not be negative and defaults to `10`. ++ +By default, you cannot page through more than 10,000 hits using the `from` and +`size` parameters. To page through more hits, use the +<> parameter. + +`sort`:: +(Optional, object) <>. You can sort on `username`, `roles` or `enabled`. +In addition, sort can also be applied to the `_doc` field to sort by index order. + +`search_after`:: +(Optional, array) <> definition. + + +[[security-api-query-role-response-body]] +==== {api-response-body-title} + +This API returns the following top level fields: + +`total`:: +The total number of roles found. + +`count`:: +The number of roles returned in the response. + +`roles`:: +A list of roles that match the query. +The returned role format is an extension of the <> format. +It adds the `transient_metadata.enabled` and the `_sort` fields. +`transient_metadata.enabled` is set to `false` in case the role is automatically disabled, +for example when the role grants privileges that are not allowed by the installed license. +`_sort` is present when the search query sorts on some field. +It contains the array of values that have been used for sorting. + +[[security-api-query-role-example]] +==== {api-examples-title} + +The following request lists all roles, sorted by the role name: + +[source,console] +---- +POST /_security/_query/role +{ + "sort": ["name"] +} +---- +// TEST[setup:admin_role,user_role] + +A successful call returns a JSON structure that contains the information +retrieved for one or more roles: + +[source,console-result] +---- +{ + "total": 2, + "count": 2, + "roles": [ <1> + { + "name" : "my_admin_role", + "cluster" : [ + "all" + ], + "indices" : [ + { + "names" : [ + "index1", + "index2" + ], + "privileges" : [ + "all" + ], + "field_security" : { + "grant" : [ + "title", + "body" + ] + }, + "allow_restricted_indices" : false + } + ], + "applications" : [ ], + "run_as" : [ + "other_user" + ], + "metadata" : { + "version" : 1 + }, + "transient_metadata" : { + "enabled" : true + }, + "description" : "Grants full access to all management features within the cluster.", + "_sort" : [ + "my_admin_role" + ] + }, + { + "name" : "my_user_role", + "cluster" : [ ], + "indices" : [ + { + "names" : [ + "index1", + "index2" + ], + "privileges" : [ + "all" + ], + "field_security" : { + "grant" : [ + "title", + "body" + ] + }, + "allow_restricted_indices" : false + } + ], + "applications" : [ ], + "run_as" : [ ], + "metadata" : { + "version" : 1 + }, + "transient_metadata" : { + "enabled" : true + }, + "description" : "Grants user access to some indicies.", + "_sort" : [ + "my_user_role" + ] + } + ] +} +---- +// TEST[continued] + +<1> The list of roles that were retrieved for this request + +Similarly, the following request can be used to query only the user access role, +given its description: + +[source,console] +---- +POST /_security/_query/role +{ + "query": { + "match": { + "description": { + "query": "user access" + } + } + }, + "size": 1 <1> +} +---- +// TEST[continued] + +<1> Return only the best matching role + +[source,console-result] +---- +{ + "total": 2, + "count": 1, + "roles": [ + { + "name" : "my_user_role", + "cluster" : [ ], + "indices" : [ + { + "names" : [ + "index1", + "index2" + ], + "privileges" : [ + "all" + ], + "field_security" : { + "grant" : [ + "title", + "body" + ] + }, + "allow_restricted_indices" : false + } + ], + "applications" : [ ], + "run_as" : [ ], + "metadata" : { + "version" : 1 + }, + "transient_metadata" : { + "enabled" : true + }, + "description" : "Grants user access to some indicies." + } + ] +} +---- diff --git a/docs/reference/rest-api/security/query-user.asciidoc b/docs/reference/rest-api/security/query-user.asciidoc index 952e0f40f2a3a..23852f0f2eed7 100644 --- a/docs/reference/rest-api/security/query-user.asciidoc +++ b/docs/reference/rest-api/security/query-user.asciidoc @@ -66,13 +66,6 @@ The email of the user. Specifies whether the user is enabled. ==== -[[security-api-query-user-query-params]] -==== {api-query-parms-title} - -`with_profile_uid`:: -(Optional, boolean) Determines whether to retrieve the <> `uid`, -if exists, for the users. Defaults to `false`. - include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=from] + By default, you cannot page through more than 10,000 hits using the `from` and @@ -93,6 +86,12 @@ In addition, sort can also be applied to the `_doc` field to sort by index order `search_after`:: (Optional, array) <> definition. +[[security-api-query-user-query-params]] +==== {api-query-parms-title} + +`with_profile_uid`:: +(Optional, boolean) Determines whether to retrieve the <> `uid`, +if exists, for the users. Defaults to `false`. [[security-api-query-user-response-body]] ==== {api-response-body-title} @@ -191,7 +190,7 @@ Use the user information retrieve the user with a query: [source,console] ---- -GET /_security/_query/user +POST /_security/_query/user { "query": { "prefix": { @@ -231,7 +230,7 @@ To retrieve the user `profile_uid` as part of the response: [source,console] -------------------------------------------------- -GET /_security/_query/user?with_profile_uid=true +POST /_security/_query/user?with_profile_uid=true { "query": { "prefix": { @@ -272,7 +271,7 @@ Use a `bool` query to issue complex logical conditions and use [source,js] ---- -GET /_security/_query/user +POST /_security/_query/user { "query": { "bool": { diff --git a/docs/reference/run-elasticsearch-locally.asciidoc b/docs/reference/run-elasticsearch-locally.asciidoc new file mode 100644 index 0000000000000..1a115ae926ea2 --- /dev/null +++ b/docs/reference/run-elasticsearch-locally.asciidoc @@ -0,0 +1,87 @@ +//// +IMPORTANT: This content is replicated in the Elasticsearch repo root readme. Ensure both files are in sync. + +https://github.com/elastic/start-local is the source of truth. +//// + +[[run-elasticsearch-locally]] +== Run {es} locally +++++ +Run {es} locally +++++ + +[WARNING] +==== +*DO NOT USE THESE INSTRUCTIONS FOR PRODUCTION DEPLOYMENTS* + +The instructions on this page are for *local development only*. Do not use this configuration for production deployments, because it is not secure. +Refer to <> for a list of production deployment options. +==== + +Quickly set up {es} and {kib} in Docker for local development or testing, using the https://github.com/elastic/start-local?tab=readme-ov-file#-try-elasticsearch-and-kibana-locally[`start-local` script]. + +This setup comes with a one-month trial of the Elastic *Platinum* license. +After the trial period, the license reverts to *Free and open - Basic*. +Refer to https://www.elastic.co/subscriptions[Elastic subscriptions] for more information. + +[discrete] +[[local-dev-prerequisites]] +=== Prerequisites + +- If you don't have Docker installed, https://www.docker.com/products/docker-desktop[download and install Docker Desktop] for your operating system. +- If you're using Microsoft Windows, then install https://learn.microsoft.com/en-us/windows/wsl/install[Windows Subsystem for Linux (WSL)]. + +[discrete] +[[local-dev-quick-start]] +=== Run `start-local` + +To set up {es} and {kib} locally, run the `start-local` script: + +[source,sh] +---- +curl -fsSL https://elastic.co/start-local | sh +---- +// NOTCONSOLE + +This script creates an `elastic-start-local` folder containing configuration files and starts both {es} and {kib} using Docker. + +After running the script, you can access Elastic services at the following endpoints: + +* *{es}*: http://localhost:9200 +* *{kib}*: http://localhost:5601 + +The script generates a random password for the `elastic` user, which is displayed at the end of the installation and stored in the `.env` file. + +[CAUTION] +==== +This setup is for local testing only. HTTPS is disabled, and Basic authentication is used for {es}. For security, {es} and {kib} are accessible only through `localhost`. +==== + +[discrete] +[[api-access]] +=== API access + +An API key for {es} is generated and stored in the `.env` file as `ES_LOCAL_API_KEY`. +Use this key to connect to {es} with a https://www.elastic.co/guide/en/elasticsearch/client/index.html[programming language client] or the <>. + +From the `elastic-start-local` folder, check the connection to Elasticsearch using `curl`: + +[source,sh] +---- +source .env +curl $ES_LOCAL_URL -H "Authorization: ApiKey ${ES_LOCAL_API_KEY}" +---- +// NOTCONSOLE + +[discrete] +[[local-dev-additional-info]] +=== Learn more + +For more detailed information about the `start-local` setup, refer to the https://github.com/elastic/start-local[README on GitHub]. +Learn about customizing the setup, logging, and more. + +[discrete] +[[local-dev-next-steps]] +=== Next steps + +Use our <> to learn the basics of {es}. \ No newline at end of file diff --git a/docs/reference/scripting/security.asciidoc b/docs/reference/scripting/security.asciidoc index 0f322d08726b9..249a705e92817 100644 --- a/docs/reference/scripting/security.asciidoc +++ b/docs/reference/scripting/security.asciidoc @@ -9,8 +9,8 @@ security in a defense in depth strategy for scripting. The second layer of security is the https://www.oracle.com/java/technologies/javase/seccodeguide.html[Java Security Manager]. As part of its startup sequence, {es} enables the Java Security Manager to limit the actions that -portions of the code can take. <> uses -the Java Security Manager as an additional layer of defense to prevent scripts +portions of the code can take. <> uses +the Java Security Manager as an additional layer of defense to prevent scripts from doing things like writing files and listening to sockets. {es} uses @@ -18,22 +18,28 @@ from doing things like writing files and listening to sockets. https://www.chromium.org/developers/design-documents/sandbox/osx-sandboxing-design[Seatbelt] in macOS, and https://msdn.microsoft.com/en-us/library/windows/desktop/ms684147[ActiveProcessLimit] -on Windows as additional security layers to prevent {es} from forking or +on Windows as additional security layers to prevent {es} from forking or running other processes. +Finally, scripts used in +<> +can be restricted to a defined list of scripts, or forbidden altogether. +This can prevent users from running particularly slow or resource intensive aggregation +queries. + You can modify the following script settings to restrict the type of scripts -that are allowed to run, and control the available +that are allowed to run, and control the available {painless}/painless-contexts.html[contexts] that scripts can run in. To -implement additional layers in your defense in depth strategy, follow the +implement additional layers in your defense in depth strategy, follow the <>. [[allowed-script-types-setting]] [discrete] === Allowed script types setting -{es} supports two script types: `inline` and `stored`. By default, {es} is -configured to run both types of scripts. To limit what type of scripts are run, -set `script.allowed_types` to `inline` or `stored`. To prevent any scripts from +{es} supports two script types: `inline` and `stored`. By default, {es} is +configured to run both types of scripts. To limit what type of scripts are run, +set `script.allowed_types` to `inline` or `stored`. To prevent any scripts from running, set `script.allowed_types` to `none`. IMPORTANT: If you use {kib}, set `script.allowed_types` to both or just `inline`. @@ -61,3 +67,48 @@ For example, to allow scripts to run only in `scoring` and `update` contexts: ---- script.allowed_contexts: score, update ---- + +[[allowed-script-in-aggs-settings]] +[discrete] +=== Allowed scripts in scripted metrics aggregations + +By default, all scripts are permitted in +<>. +To restrict the set of allowed scripts, set +<> +to `true` and provide the allowed scripts using +<> +and/or +<>. + +To disallow certain script types, omit the corresponding script list +(`search.aggs.allowed_inline_metric_scripts` or +`search.aggs.allowed_stored_metric_scripts`) or set it to an empty array. +When both script lists are not empty, the given stored scripts and the given inline scripts +will be allowed. + +The following example permits only 4 specific stored scripts to be used, and no inline scripts: + +[source,yaml] +---- +search.aggs.only_allowed_metric_scripts: true +search.aggs.allowed_inline_metric_scripts: [] +search.aggs.allowed_stored_metric_scripts: + - script_id_1 + - script_id_2 + - script_id_3 + - script_id_4 +---- + +Conversely, the next example allows specific inline scripts but no stored scripts: + +[source,yaml] +---- +search.aggs.only_allowed_metric_scripts: true +search.aggs.allowed_inline_metric_scripts: + - 'state.transactions = []' + - 'state.transactions.add(doc.some_field.value)' + - 'long sum = 0; for (t in state.transactions) { sum += t } return sum' + - 'long sum = 0; for (a in states) { sum += a } return sum' +search.aggs.allowed_stored_metric_scripts: [] +---- diff --git a/docs/reference/search/multi-search-template-api.asciidoc b/docs/reference/search/multi-search-template-api.asciidoc index c8eea52a6fd9b..b1c9518b1f2bc 100644 --- a/docs/reference/search/multi-search-template-api.asciidoc +++ b/docs/reference/search/multi-search-template-api.asciidoc @@ -22,9 +22,6 @@ PUT _scripts/my-search-template }, "from": "{{from}}", "size": "{{size}}" - }, - "params": { - "query_string": "My query string" } } } diff --git a/docs/reference/search/render-search-template-api.asciidoc b/docs/reference/search/render-search-template-api.asciidoc index 1f259dddf6879..0c782f26068e6 100644 --- a/docs/reference/search/render-search-template-api.asciidoc +++ b/docs/reference/search/render-search-template-api.asciidoc @@ -22,9 +22,6 @@ PUT _scripts/my-search-template }, "from": "{{from}}", "size": "{{size}}" - }, - "params": { - "query_string": "My query string" } } } diff --git a/docs/reference/search/retriever.asciidoc b/docs/reference/search/retriever.asciidoc index 590df272cc89e..54836ac33762d 100644 --- a/docs/reference/search/retriever.asciidoc +++ b/docs/reference/search/retriever.asciidoc @@ -28,6 +28,9 @@ A <> that replaces the functionality of a <> that produces top documents from <>. +`text_similarity_reranker`:: +A <> that enhances search results by re-ranking documents based on semantic similarity to a specified inference text, using a machine learning model. + [[standard-retriever]] ==== Standard Retriever @@ -74,23 +77,83 @@ Collapses the top documents by a specified key into a single top document per ke When a retriever tree contains a compound retriever (a retriever with two or more child retrievers) *only* the query element is allowed. -===== Example +[discrete] +[[standard-retriever-example]] +==== Example -[source,js] +//// +[source,console] ---- -GET /index/_search +PUT /restaurants { - "retriever": { - "standard": { - "query" { ... }, - "filter" { ... }, - "min_score": ... + "mappings": { + "properties": { + "region": { "type": "keyword" }, + "year": { "type": "keyword" }, + "vector": { + "type": "dense_vector", + "dims": 3 + } + } + } +} + +POST /restaurants/_bulk?refresh +{"index":{}} +{"region": "Austria", "year": "2019", "vector": [10, 22, 77]} +{"index":{}} +{"region": "France", "year": "2019", "vector": [10, 22, 78]} +{"index":{}} +{"region": "Austria", "year": "2020", "vector": [10, 22, 79]} +{"index":{}} +{"region": "France", "year": "2020", "vector": [10, 22, 80]} +---- +// TESTSETUP + +[source,console] +-------------------------------------------------- +DELETE /restaurants +-------------------------------------------------- +// TEARDOWN +//// + +[source,console] +---- +GET /restaurants/_search +{ + "retriever": { <1> + "standard": { <2> + "query": { <3> + "bool": { <4> + "should": [ <5> + { + "match": { <6> + "region": "Austria" + } + } + ], + "filter": [ <7> + { + "term": { <8> + "year": "2019" <9> + } + } + ] } - }, - "size": ... + } + } + } } ---- -// NOTCONSOLE +<1> Opens the `retriever` object. +<2> The `standard` retriever is used for defining traditional {es} queries. +<3> The entry point for defining the search query. +<4> The `bool` object allows for combining multiple query clauses logically. +<5> The `should` array indicates conditions under which a document will match. Documents matching these conditions will increase their relevancy score. +<6> The `match` object finds documents where the `region` field contains the word "Austria." +<7> The `filter` array provides filtering conditions that must be met but do not contribute to the relevancy score. +<8> The `term` object is used for exact matches, in this case, filtering documents by the `year` field. +<9> The exact value to match in the `year` field. [[knn-retriever]] ==== kNN Retriever @@ -139,29 +202,38 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=knn-similarity] The parameters `query_vector` and `query_vector_builder` cannot be used together. -===== Example: +[discrete] +[[knn-retriever-example]] +==== Example -[source,js] +[source,console] ---- -GET /index/_search +GET /restaurants/_search { - "retriever": { - "knn": { - "field": ..., - "query_vector": ..., - "k": ..., - "num_candidates": ... - } + "retriever": { + "knn": { <1> + "field": "vector", <2> + "query_vector": [10, 22, 77], <3> + "k": 10, <4> + "num_candidates": 10 <5> } + } } ---- -// NOTCONSOLE +// TEST[continued] +<1> Configuration for k-nearest neighbor (knn) search, which is based on vector similarity. +<2> Specifies the field name that contains the vectors. +<3> The query vector against which document vectors are compared in the `knn` search. +<4> The number of nearest neighbors to return as top hits. This value must be fewer than or equal to `num_candidates`. +<5> The size of the initial candidate set from which the final `k` nearest neighbors are selected. [[rrf-retriever]] ==== RRF Retriever -An <> retriever returns top documents based on the RRF formula +An <> retriever returns top documents based on the RRF formula, equally weighting two or more child retrievers. +Reciprocal rank fusion (RRF) is a method for combining multiple result +sets with different relevance indicators into a single result set. ===== Parameters @@ -169,7 +241,9 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-retrievers] include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-constant] -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-window-size] +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-window-size] + +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-filter] ===== Restrictions @@ -177,29 +251,283 @@ An RRF retriever is a compound retriever. Child retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. -===== Example +[discrete] +[[rrf-retriever-example-hybrid]] +==== Example: Hybrid search + +A simple hybrid search example (lexical search + dense vector search) combining a `standard` retriever with a `knn` retriever using RRF: + +[source,console] +---- +GET /restaurants/_search +{ + "retriever": { + "rrf": { <1> + "retrievers": [ <2> + { + "standard": { <3> + "query": { + "multi_match": { + "query": "Austria", + "fields": [ + "city", + "region" + ] + } + } + } + }, + { + "knn": { <4> + "field": "vector", + "query_vector": [10, 22, 77], + "k": 10, + "num_candidates": 10 + } + } + ], + "rank_constant": 1, <5> + "rank_window_size": 50 <6> + } + } +} +---- +// TEST[continued] +<1> Defines a retriever tree with an RRF retriever. +<2> The sub-retriever array. +<3> The first sub-retriever is a `standard` retriever. +<4> The second sub-retriever is a `knn` retriever. +<5> The rank constant for the RRF retriever. +<6> The rank window size for the RRF retriever. + +[discrete] +[[rrf-retriever-example-hybrid-sparse]] +==== Example: Hybrid search with sparse vectors + +A more complex hybrid search example (lexical search + ELSER sparse vector search + dense vector search) using RRF: + +[source,console] +---- +GET movies/_search +{ + "retriever": { + "rrf": { + "retrievers": [ + { + "standard": { + "query": { + "sparse_vector": { + "field": "plot_embedding", + "inference_id": "my-elser-model", + "query": "films that explore psychological depths" + } + } + } + }, + { + "standard": { + "query": { + "multi_match": { + "query": "crime", + "fields": [ + "plot", + "title" + ] + } + } + } + }, + { + "knn": { + "field": "vector", + "query_vector": [10, 22, 77], + "k": 10, + "num_candidates": 10 + } + } + ] + } + } +} +---- +// TEST[skip:uses ELSER] + +[[text-similarity-reranker-retriever]] +==== Text Similarity Re-ranker Retriever + +The `text_similarity_reranker` retriever uses an NLP model to improve search results by reordering the top-k documents based on their semantic similarity to the query. + +[TIP] +==== +Refer to <> for a high level overview of semantic re-ranking. +==== + +===== Prerequisites + +To use `text_similarity_reranker` you must first set up a `rerank` task using the <>. +The `rerank` task should be set up with a machine learning model that can compute text similarity. Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third-party text similarity models supported by {es}. + +Currently you can: + +* Integrate directly with the <> using the `rerank` task type +* Integrate directly with the <> using the `rerank` task type +* Upload a model to {es} with {eland-docs}/machine-learning.html#ml-nlp-pytorch[Eland] using the `text_similarity` NLP task type. +** Then set up an <> with the `rerank` task type +** Refer to the <> on this page for a step-by-step guide. + +===== Parameters +`retriever`:: +(Required, <>) ++ +The child retriever that generates the initial set of top documents to be re-ranked. + +`field`:: +(Required, `string`) ++ +The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the `inferenceText`. + +`inference_id`:: +(Required, `string`) ++ +Unique identifier of the inference endpoint created using the {infer} API. + +`inference_text`:: +(Required, `string`) ++ +The text snippet used as the basis for similarity comparison. + +`rank_window_size`:: +(Optional, `int`) ++ +The number of top documents to consider in the re-ranking process. Defaults to `10`. + +`min_score`:: +(Optional, `float`) ++ +Sets a minimum threshold score for including documents in the re-ranked results. Documents with similarity scores below this threshold will be excluded. Note that score calculations vary depending on the model used. + +`filter`:: +(Optional, <>) ++ +Applies the specified <> to the child <>. +If the child retriever already specifies any filters, then this top-level filter is applied in conjuction +with the filter defined in the child retriever. + +===== Restrictions + +A text similarity re-ranker retriever is a compound retriever. Child retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. + +[discrete] +[[text-similarity-reranker-retriever-example-cohere]] +==== Example: Cohere Rerank -[source,js] +This example enables out-of-the-box semantic search by re-ranking top documents using the Cohere Rerank API. This approach eliminate the need to generate and store embeddings for all indexed documents. +This requires a <> using the `rerank` task type. + +[source,console] ---- GET /index/_search { - "retriever": { - "rrf": { - "retrievers": [ - { - "standard" { ... } - }, - { - "knn": { ... } - } - ], - "rank_constant": ... - "rank_window_size": ... + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "match_phrase": { + "text": "landmark in Paris" + } + } + } + }, + "field": "text", + "inference_id": "my-cohere-rerank-model", + "inference_text": "Most famous landmark in Paris", + "rank_window_size": 100, + "min_score": 0.5 + } + } +} +---- +// TEST[skip:uses ML] + +[discrete] +[[text-similarity-reranker-retriever-example-eland]] +==== Example: Semantic re-ranking with a Hugging Face model + +The following example uses the `cross-encoder/ms-marco-MiniLM-L-6-v2` model from Hugging Face to rerank search results based on semantic similarity. +The model must be uploaded to {es} using https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html#ml-nlp-pytorch[Eland]. + +[TIP] +==== +Refer to {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-similarity[the Elastic NLP model reference] for a list of third party text similarity models supported by {es}. +==== + +Follow these steps to load the model and create a semantic re-ranker. + +. Install Eland using `pip` ++ +[source,sh] +---- +python -m pip install eland[pytorch] +---- ++ +. Upload the model to {es} using Eland. This example assumes you have an Elastic Cloud deployment and an API key. Refer to the https://www.elastic.co/guide/en/elasticsearch/client/eland/current/machine-learning.html#ml-nlp-pytorch-auth[Eland documentation] for more authentication options. ++ +[source,sh] +---- +eland_import_hub_model \ + --cloud-id $CLOUD_ID \ + --es-api-key $ES_API_KEY \ + --hub-model-id cross-encoder/ms-marco-MiniLM-L-6-v2 \ + --task-type text_similarity \ + --clear-previous \ + --start +---- ++ +. Create an inference endpoint for the `rerank` task ++ +[source,console] +---- +PUT _inference/rerank/my-msmarco-minilm-model +{ + "service": "elasticsearch", + "service_settings": { + "num_allocations": 1, + "num_threads": 1, + "model_id": "cross-encoder__ms-marco-minilm-l-6-v2" + } +} +---- +// TEST[skip:uses ML] ++ +. Define a `text_similarity_rerank` retriever. ++ +[source,console] +---- +POST movies/_search +{ + "retriever": { + "text_similarity_reranker": { + "retriever": { + "standard": { + "query": { + "match": { + "genre": "drama" + } + } } + }, + "field": "plot", + "inference_id": "my-msmarco-minilm-model", + "inference_text": "films that explore psychological depths" } + } } ---- -// NOTCONSOLE +// TEST[skip:uses ML] ++ +This retriever uses a standard `match` query to search the `movie` index for films tagged with the genre "drama". +It then re-ranks the results based on semantic similarity to the text in the `inference_text` parameter, using the model we uploaded to {es}. ==== Using `from` and `size` with a retriever tree diff --git a/docs/reference/search/rrf.asciidoc b/docs/reference/search/rrf.asciidoc index fb474fe6bf4e6..2525dfff23b94 100644 --- a/docs/reference/search/rrf.asciidoc +++ b/docs/reference/search/rrf.asciidoc @@ -1,9 +1,7 @@ [[rrf]] === Reciprocal rank fusion -preview::["This functionality is in technical preview and may be changed or removed in a future release. -The syntax will likely change before GA. -Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] +preview::["This functionality is in technical preview and may be changed or removed in a future release. The syntax will likely change before GA. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features."] https://plg.uwaterloo.ca/~gvcormac/cormacksigir09-rrf.pdf[Reciprocal rank fusion (RRF)] is a method for combining multiple result sets with different relevance indicators into a single result set. @@ -43,7 +41,7 @@ include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-retrievers] include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-constant] -include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-window-size] +include::{es-ref-dir}/rest-api/common-parms.asciidoc[tag=rrf-rank-window-size] An example request using RRF: diff --git a/docs/reference/search/search-template-api.asciidoc b/docs/reference/search/search-template-api.asciidoc index 038396e558607..c60b5281c05e5 100644 --- a/docs/reference/search/search-template-api.asciidoc +++ b/docs/reference/search/search-template-api.asciidoc @@ -21,9 +21,6 @@ PUT _scripts/my-search-template }, "from": "{{from}}", "size": "{{size}}" - }, - "params": { - "query_string": "My query string" } } } diff --git a/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc b/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc index 4a5efe09ea5a0..6b9b13b124e9f 100644 --- a/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc +++ b/docs/reference/search/search-your-data/ccs-version-compat-matrix.asciidoc @@ -1,24 +1,24 @@ -[cols="^,^,^,^,^,^,^,^,^,^,^,^,^,^,^,^,^,^,^"] |==== -| 18+^h| Remote cluster version +| 19+^h| Remote cluster version h| Local cluster version - | 6.8 | 7.1–7.16 | 7.17 | 8.0 | 8.1 | 8.2 | 8.3 | 8.4 | 8.5 | 8.6 | 8.7 | 8.8 | 8.9 | 8.10 | 8.11 | 8.12 | 8.13 | 8.14 -| 6.8 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} -| 7.1–7.16 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} -| 7.17 | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.0 | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.1 | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.2 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.3 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.4 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.5 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.6 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.7 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.8 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.9 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.10 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.11 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.12 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.13 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} -| 8.14 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} + | 6.8 | 7.1–7.16 | 7.17 | 8.0 | 8.1 | 8.2 | 8.3 | 8.4 | 8.5 | 8.6 | 8.7 | 8.8 | 8.9 | 8.10 | 8.11 | 8.12 | 8.13 | 8.14 | 8.15 +| 6.8 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} +| 7.1–7.16 | {yes-icon} | {yes-icon} | {yes-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} +| 7.17 | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.0 | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.1 | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.2 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.3 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.4 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.5 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.6 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon}| {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.7 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.8 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.9 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.10 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.11 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.12 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.13 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.14 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} | {yes-icon} +| 8.15 | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {no-icon} | {yes-icon} | {yes-icon} |==== diff --git a/docs/reference/search/search-your-data/near-real-time.asciidoc b/docs/reference/search/search-your-data/near-real-time.asciidoc index 46a996c237c38..47618ecd9fd7a 100644 --- a/docs/reference/search/search-your-data/near-real-time.asciidoc +++ b/docs/reference/search/search-your-data/near-real-time.asciidoc @@ -2,7 +2,7 @@ [[near-real-time]] === Near real-time search -The overview of <> indicates that when a document is stored in {es}, it is indexed and fully searchable in _near real-time_--within 1 second. What defines near real-time search? +When a document is stored in {es}, it is indexed and fully searchable in _near real-time_--within 1 second. What defines near real-time search? Lucene, the Java libraries on which {es} is based, introduced the concept of per-segment search. A _segment_ is similar to an inverted index, but the word _index_ in Lucene means "a collection of segments plus a commit point". After a commit, a new segment is added to the commit point and the buffer is cleared. diff --git a/docs/reference/search/search-your-data/paginate-search-results.asciidoc b/docs/reference/search/search-your-data/paginate-search-results.asciidoc index a81598273dfd3..edd1546dd0854 100644 --- a/docs/reference/search/search-your-data/paginate-search-results.asciidoc +++ b/docs/reference/search/search-your-data/paginate-search-results.asciidoc @@ -362,7 +362,7 @@ Perl:: Python:: - See https://elasticsearch-py.readthedocs.org/en/master/helpers.html[elasticsearch.helpers.*] + See https://elasticsearch-py.readthedocs.io/en/stable/helpers.html[elasticsearch.helpers.*] JavaScript:: diff --git a/docs/reference/search/search-your-data/retrievers-overview.asciidoc b/docs/reference/search/search-your-data/retrievers-overview.asciidoc index 92cd085583916..c0fe7471946f3 100644 --- a/docs/reference/search/search-your-data/retrievers-overview.asciidoc +++ b/docs/reference/search/search-your-data/retrievers-overview.asciidoc @@ -1,7 +1,5 @@ [[retrievers-overview]] -== Retrievers - -// Will move to a top level "Retrievers and reranking" section once reranking is live +=== Retrievers preview::[] @@ -15,33 +13,32 @@ For implementation details, including notable restrictions, check out the [discrete] [[retrievers-overview-types]] -=== Retriever types +==== Retriever types Retrievers come in various types, each tailored for different search operations. The following retrievers are currently available: -* <>. -Returns top documents from a traditional https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl.html[query]. -Mimics a traditional query but in the context of a retriever framework. -This ensures backward compatibility as existing `_search` requests remain supported. -That way you can transition to the new abstraction at your own pace without mixing syntaxes. -* <>. -Returns top documents from a <>, in the context of a retriever framework. -* <>. -Combines and ranks multiple first-stage retrievers using the reciprocal rank fusion (RRF) algorithm. -Allows you to combine multiple result sets with different relevance indicators into a single result set. -An RRF retriever is a *compound retriever*, where its `filter` element is propagated to its sub retrievers. +* <>. Returns top documents from a +traditional https://www.elastic.co/guide/en/elasticsearch/reference/master/query-dsl.html[query]. +Mimics a traditional query but in the context of a retriever framework. This +ensures backward compatibility as existing `_search` requests remain supported. +That way you can transition to the new abstraction at your own pace without +mixing syntaxes. +* <>. Returns top documents from a <>, +in the context of a retriever framework. +* <>. Combines and ranks multiple first-stage retrievers using +the reciprocal rank fusion (RRF) algorithm. Allows you to combine multiple result sets +with different relevance indicators into a single result set. +An RRF retriever is a *compound retriever*, where its `filter` element is +propagated to its sub retrievers. + Sub retrievers may not use elements that are restricted by having a compound retriever as part of the retriever tree. See the <> for detailed examples and information on how to use the RRF retriever. - -[NOTE] -==== -Stay tuned for more retriever types in future releases! -==== +* <>. Used for <>. +Requires first creating a `rerank` task using the <>. [discrete] -=== What makes retrievers useful? +==== What makes retrievers useful? Here's an overview of what makes retrievers useful and how they differ from regular queries. @@ -73,7 +70,7 @@ When using compound retrievers, only the query element is allowed, which enforce [discrete] [[retrievers-overview-example]] -=== Example +==== Example The following example demonstrates how using retrievers simplify the composability of queries for RRF ranking. @@ -143,7 +140,7 @@ GET example-index/_search ], "rank":{ "rrf":{ - "window_size":50, + "rank_window_size":50, "rank_constant":20 } } @@ -154,25 +151,23 @@ GET example-index/_search [discrete] [[retrievers-overview-glossary]] -=== Glossary +==== Glossary Here are some important terms: -* *Retrieval Pipeline*. -Defines the entire retrieval and ranking logic to produce top hits. -* *Retriever Tree*. -A hierarchical structure that defines how retrievers interact. -* *First-stage Retriever*. -Returns an initial set of candidate documents. -* *Compound Retriever*. -Builds on one or more retrievers, enhancing document retrieval and ranking logic. -* *Combiners*. -Compound retrievers that merge top hits from multiple sub-retrievers. -//* NOT YET *Rerankers*. Special compound retrievers that reorder hits and may adjust the number of hits, with distinctions between first-stage and second-stage rerankers. +* *Retrieval Pipeline*. Defines the entire retrieval and ranking logic to +produce top hits. +* *Retriever Tree*. A hierarchical structure that defines how retrievers interact. +* *First-stage Retriever*. Returns an initial set of candidate documents. +* *Compound Retriever*. Builds on one or more retrievers, +enhancing document retrieval and ranking logic. +* *Combiners*. Compound retrievers that merge top hits +from multiple sub-retrievers. +* *Rerankers*. Special compound retrievers that reorder hits and may adjust the number of hits, with distinctions between first-stage and second-stage rerankers. [discrete] [[retrievers-overview-play-in-search]] -=== Retrievers in action +==== Retrievers in action The Search Playground builds Elasticsearch queries using the retriever abstraction. It automatically detects the fields and types in your index and builds a retriever tree based on your selections. @@ -180,7 +175,9 @@ It automatically detects the fields and types in your index and builds a retriev You can use the Playground to experiment with different retriever configurations and see how they affect search results. Refer to the {kibana-ref}/playground.html[Playground documentation] for more information. -// Content coming in https://github.com/elastic/kibana/pull/182692 - +[discrete] +[[retrievers-overview-api-reference]] +==== API reference +For implementation details, including notable restrictions, check out the <> in the Search API docs. diff --git a/docs/reference/search/search-your-data/search-api.asciidoc b/docs/reference/search/search-your-data/search-api.asciidoc index 496812a0cedb4..13cea537ea4fb 100644 --- a/docs/reference/search/search-your-data/search-api.asciidoc +++ b/docs/reference/search/search-your-data/search-api.asciidoc @@ -173,7 +173,7 @@ GET /my-index-000001/_search "script": { "source": """emit(doc['@timestamp'].value.dayOfWeekEnum - .getDisplayName(TextStyle.FULL, Locale.ROOT))""" + .getDisplayName(TextStyle.FULL, Locale.ENGLISH))""" } } }, @@ -530,3 +530,5 @@ include::retrieve-inner-hits.asciidoc[] include::search-shard-routing.asciidoc[] include::search-using-query-rules.asciidoc[] include::search-template.asciidoc[] +include::retrievers-overview.asciidoc[] + diff --git a/docs/reference/search/search-your-data/search-template.asciidoc b/docs/reference/search/search-your-data/search-template.asciidoc index 7a7f09f4a37a7..489a03c0a6a2a 100644 --- a/docs/reference/search/search-your-data/search-template.asciidoc +++ b/docs/reference/search/search-your-data/search-template.asciidoc @@ -42,9 +42,6 @@ PUT _scripts/my-search-template }, "from": "{{from}}", "size": "{{size}}" - }, - "params": { - "query_string": "My query string" } } } diff --git a/docs/reference/search/search-your-data/search-with-synonyms.asciidoc b/docs/reference/search/search-your-data/search-with-synonyms.asciidoc index 596af695b7910..61d3a1d8f925b 100644 --- a/docs/reference/search/search-your-data/search-with-synonyms.asciidoc +++ b/docs/reference/search/search-your-data/search-with-synonyms.asciidoc @@ -82,6 +82,19 @@ If an index is created referencing a nonexistent synonyms set, the index will re The only way to recover from this scenario is to ensure the synonyms set exists then either delete and re-create the index, or close and re-open the index. ====== +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + {es} uses synonyms as part of the <>. You can use two types of <> to include synonyms: diff --git a/docs/reference/search/search-your-data/search-your-data.asciidoc b/docs/reference/search/search-your-data/search-your-data.asciidoc index e1c1618410f2f..cd2b418a7e79b 100644 --- a/docs/reference/search/search-your-data/search-your-data.asciidoc +++ b/docs/reference/search/search-your-data/search-your-data.asciidoc @@ -45,8 +45,6 @@ results directly in the Kibana Search UI. include::search-api.asciidoc[] include::knn-search.asciidoc[] include::semantic-search.asciidoc[] -include::retrievers-overview.asciidoc[] -include::learning-to-rank.asciidoc[] include::search-across-clusters.asciidoc[] include::search-with-synonyms.asciidoc[] include::search-application-overview.asciidoc[] diff --git a/docs/reference/search/search-your-data/semantic-search-deploy-model.asciidoc b/docs/reference/search/search-your-data/semantic-search-deploy-model.asciidoc new file mode 100644 index 0000000000000..6c610159ae0b9 --- /dev/null +++ b/docs/reference/search/search-your-data/semantic-search-deploy-model.asciidoc @@ -0,0 +1,97 @@ +[[semantic-search-deployed-nlp-model]] +=== Tutorial: semantic search with a deployed model + +++++ +Semantic search with deployed model +++++ + +[IMPORTANT] +==== +* For the easiest way to perform semantic search in the {stack}, refer to the <> end-to-end tutorial. +* This tutorial was written before the <> and <> was introduced. +Today we have simpler options for performing semantic search. +==== + +This guide shows you how to implement semantic search with models deployed in {es}: from selecting an NLP model, to writing queries. + + +[discrete] +[[deployed-select-nlp-model]] +==== Select an NLP model + +{es} offers the usage of a {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-embedding[wide range of NLP models], including both dense and sparse vector models. +Your choice of the language model is critical for implementing semantic search successfully. + +While it is possible to bring your own text embedding model, achieving good search results through model tuning is challenging. +Selecting an appropriate model from our third-party model list is the first step. +Training the model on your own data is essential to ensure better search results than using only BM25. +However, the model training process requires a team of data scientists and ML experts, making it expensive and time-consuming. + +To address this issue, Elastic provides a pre-trained representational model called {ml-docs}/ml-nlp-elser.html[Elastic Learned Sparse EncodeR (ELSER)]. +ELSER, currently available only for English, is an out-of-domain sparse vector model that does not require fine-tuning. +This adaptability makes it suitable for various NLP use cases out of the box. +Unless you have a team of ML specialists, it is highly recommended to use the ELSER model. + +In the case of sparse vector representation, the vectors mostly consist of zero values, with only a small subset containing non-zero values. +This representation is commonly used for textual data. +In the case of ELSER, each document in an index and the query text itself are represented by high-dimensional sparse vectors. +Each non-zero element of the vector corresponds to a term in the model vocabulary. +The ELSER vocabulary contains around 30000 terms, so the sparse vectors created by ELSER contain about 30000 values, the majority of which are zero. +Effectively the ELSER model is replacing the terms in the original query with other terms that have been learnt to exist in the documents that best match the original search terms in a training dataset, and weights to control how important each is. + + +[discrete] +[[deployed-deploy-nlp-model]] +==== Deploy the model + +After you decide which model you want to use for implementing semantic search, you need to deploy the model in {es}. + +include::{es-ref-dir}/tab-widgets/semantic-search/deploy-nlp-model-widget.asciidoc[] + + +[discrete] +[[deployed-field-mappings]] +==== Map a field for the text embeddings + +Before you start using the deployed model to generate embeddings based on your input text, you need to prepare your index mapping first. +The mapping of the index depends on the type of model. + +include::{es-ref-dir}/tab-widgets/semantic-search/field-mappings-widget.asciidoc[] + + +[discrete] +[[deployed-generate-embeddings]] +==== Generate text embeddings + +Once you have created the mappings for the index, you can generate text embeddings from your input text. +This can be done by using an +<> with an <>. +The ingest pipeline processes the input data and indexes it into the destination index. +At index time, the inference ingest processor uses the trained model to infer against the data ingested through the pipeline. +After you created the ingest pipeline with the inference processor, you can ingest your data through it to generate the model output. + +include::{es-ref-dir}/tab-widgets/semantic-search/generate-embeddings-widget.asciidoc[] + +Now it is time to perform semantic search! + + +[discrete] +[[deployed-search]] +==== Search the data + +Depending on the type of model you have deployed, you can query rank features with a <> query, or dense vectors with a kNN search. + +include::{es-ref-dir}/tab-widgets/semantic-search/search-widget.asciidoc[] + + +[discrete] +[[deployed-hybrid-search]] +==== Beyond semantic search with hybrid search + +In some situations, lexical search may perform better than semantic search. +For example, when searching for single words or IDs, like product numbers. + +Combining semantic and lexical search into one hybrid search request using <> provides the best of both worlds. +Not only that, but hybrid search using reciprocal rank fusion {blog-ref}improving-information-retrieval-elastic-stack-hybrid[has been shown to perform better in general]. + +include::{es-ref-dir}/tab-widgets/semantic-search/hybrid-search-widget.asciidoc[] \ No newline at end of file diff --git a/docs/reference/search/search-your-data/semantic-search-elser.asciidoc b/docs/reference/search/search-your-data/semantic-search-elser.asciidoc index 11aec59a00b30..5309b24fa37c9 100644 --- a/docs/reference/search/search-your-data/semantic-search-elser.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-elser.asciidoc @@ -117,15 +117,15 @@ All unique passages, along with their IDs, have been extracted from that data se https://github.com/elastic/stack-docs/blob/main/docs/en/stack/ml/nlp/data/msmarco-passagetest2019-unique.tsv[tsv file]. IMPORTANT: The `msmarco-passagetest2019-top1000` dataset was not utilized to train the model. -It is only used in this tutorial as a sample dataset that is easily accessible for demonstration purposes. +We use this sample dataset in the tutorial because is easily accessible for demonstration purposes. You can use a different data set to test the workflow and become familiar with it. -Download the file and upload it to your cluster using the -{kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[Data Visualizer] -in the {ml-app} UI. -Assign the name `id` to the first column and `content` to the second column. -The index name is `test-data`. -Once the upload is complete, you can see an index named `test-data` with 182469 documents. +Download the file and upload it to your cluster using the {kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[File Uploader] in the UI. +After your data is analyzed, click **Override settings**. +Under **Edit field names**, assign `id` to the first column and `content` to the second. +Click **Apply**, then **Import**. +Name the index `test-data`, and click **Import**. +After the upload is complete, you will see an index named `test-data` with 182,469 documents. [discrete] [[reindexing-data-elser]] @@ -161,6 +161,18 @@ GET _tasks/ You can also open the Trained Models UI, select the Pipelines tab under ELSER to follow the progress. +Reindexing large datasets can take a long time. +You can test this workflow using only a subset of the dataset. +Do this by cancelling the reindexing process, and only generating embeddings for the subset that was reindexed. +The following API request will cancel the reindexing task: + +[source,console] +---- +POST _tasks//_cancel +---- +// TEST[skip:TBD] + + [discrete] [[text-expansion-query]] ==== Semantic search by using the `sparse_vector` query diff --git a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc index 6ecfea0a02dbc..360d835560b50 100644 --- a/docs/reference/search/search-your-data/semantic-search-inference.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-inference.asciidoc @@ -9,14 +9,18 @@ The instructions in this tutorial shows you how to use the {infer} API workflow IMPORTANT: For the easiest way to perform semantic search in the {stack}, refer to the <> end-to-end tutorial. -The following examples use Cohere's `embed-english-v3.0` model, the `all-mpnet-base-v2` model from HuggingFace, and OpenAI's `text-embedding-ada-002` second generation embedding model. -You can use any Cohere and OpenAI models, they are all supported by the {infer} API. -For a list of supported models available on HuggingFace, refer to -<>. +The following examples use the: + +* `embed-english-v3.0` model for https://docs.cohere.com/docs/cohere-embed[Cohere] +* `all-mpnet-base-v2` model from https://huggingface.co/sentence-transformers/all-mpnet-base-v2[HuggingFace] +* `text-embedding-ada-002` second generation embedding model for OpenAI +* models available through https://ai.azure.com/explore/models?selectedTask=embeddings[Azure AI Studio] or https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models[Azure OpenAI] +* `text-embedding-004` model for https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api[Google Vertex AI] +* `mistral-embed` model for https://docs.mistral.ai/getting-started/models/[Mistral] +* `amazon.titan-embed-text-v1` model for https://docs.aws.amazon.com/bedrock/latest/userguide/model-ids.html[Amazon Bedrock] -Azure based examples use models available through https://ai.azure.com/explore/models?selectedTask=embeddings[Azure AI Studio] -or https://learn.microsoft.com/en-us/azure/ai-services/openai/concepts/models[Azure OpenAI]. -Mistral examples use the `mistral-embed` model from https://docs.mistral.ai/getting-started/models/[the Mistral API]. +You can use any Cohere and OpenAI models, they are all supported by the {infer} API. +For a list of recommended models available on HuggingFace, refer to <>. Click the name of the service you want to use on any of the widgets below to review the corresponding instructions. @@ -34,13 +38,13 @@ Create an {infer} endpoint by using the <>: include::{es-ref-dir}/tab-widgets/inference-api/infer-api-task-widget.asciidoc[] + [discrete] [[infer-service-mappings]] ==== Create the index mapping The mapping of the destination index - the index that contains the embeddings that the model will create based on your input text - must be created. -The destination index must have a field with the <> -field type to index the output of the used model. +The destination index must have a field with the <> field type for most models and the <> field type for the sparse vector models like in the case of the `elser` service to index the output of the used model. include::{es-ref-dir}/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc[] @@ -48,8 +52,7 @@ include::{es-ref-dir}/tab-widgets/inference-api/infer-api-mapping-widget.asciido [[infer-service-inference-ingest-pipeline]] ==== Create an ingest pipeline with an inference processor -Create an <> with an -<> and use the model you created above to infer against the data that is being ingested in the pipeline. +Create an <> with an <> and use the model you created above to infer against the data that is being ingested in the pipeline. include::{es-ref-dir}/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc[] @@ -64,19 +67,19 @@ It consists of 200 queries, each accompanied by a list of relevant text passages All unique passages, along with their IDs, have been extracted from that data set and compiled into a https://github.com/elastic/stack-docs/blob/main/docs/en/stack/ml/nlp/data/msmarco-passagetest2019-unique.tsv[tsv file]. -Download the file and upload it to your cluster using the -{kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[Data Visualizer] -in the {ml-app} UI. -Assign the name `id` to the first column and `content` to the second column. -The index name is `test-data`. -Once the upload is complete, you can see an index named `test-data` with 182469 documents. +Download the file and upload it to your cluster using the {kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[Data Visualizer] in the {ml-app} UI. +After your data is analyzed, click **Override settings**. +Under **Edit field names**, assign `id` to the first column and `content` to the second. +Click **Apply**, then **Import**. +Name the index `test-data`, and click **Import**. +After the upload is complete, you will see an index named `test-data` with 182,469 documents. [discrete] [[reindexing-data-infer]] ==== Ingest the data through the {infer} ingest pipeline -Create the embeddings from the text by reindexing the data through the {infer} -pipeline that uses the chosen model as the inference model. +Create embeddings from the text by reindexing the data through the {infer} pipeline that uses your chosen model. +This step uses the {ref}/docs-reindex.html[reindex API] to simulate data ingestion through a pipeline. include::{es-ref-dir}/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc[] @@ -88,7 +91,10 @@ GET _tasks/ ---- // TEST[skip:TBD] -You can also cancel the reindexing process if you don't want to wait until the reindexing process is fully complete which might take hours for large data sets: +Reindexing large datasets can take a long time. +You can test this workflow using only a subset of the dataset. +Do this by cancelling the reindexing process, and only generating embeddings for the subset that was reindexed. +The following API request will cancel the reindexing task: [source,console] ---- @@ -102,8 +108,8 @@ POST _tasks//_cancel ==== Semantic search After the data set has been enriched with the embeddings, you can query the data using {ref}/knn-search.html#knn-semantic-search[semantic search]. -Pass a -`query_vector_builder` to the k-nearest neighbor (kNN) vector search API, and provide the query text and the model you have used to create the embeddings. +In case of dense vector models, pass a `query_vector_builder` to the k-nearest neighbor (kNN) vector search API, and provide the query text and the model you have used to create the embeddings. +In case of a sparse vector model like ELSER, use a `sparse_vector` query, and provide the query text with the model you have used to create the embeddings. NOTE: If you cancelled the reindexing process, you run the query only a part of the data which affects the quality of your results. @@ -115,5 +121,6 @@ include::{es-ref-dir}/tab-widgets/inference-api/infer-api-search-widget.asciidoc You can also find tutorials in an interactive Colab notebook format using the {es} Python client: + * https://colab.research.google.com/github/elastic/elasticsearch-labs/blob/main/notebooks/integrations/cohere/inference-cohere.ipynb[Cohere {infer} tutorial notebook] * https://colab.research.google.com/github/elastic/elasticsearch-labs/blob/main/notebooks/search/07-inference.ipynb[OpenAI {infer} tutorial notebook] diff --git a/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc b/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc index c2dabedb0336c..709d17091164c 100644 --- a/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search-semantic-text.asciidoc @@ -24,7 +24,6 @@ This tutorial uses the <> for demonstra To use the `semantic_text` field type, you must have an {infer} endpoint deployed in your cluster using the <>. - [discrete] [[semantic-text-infer-endpoint]] ==== Create the {infer} endpoint @@ -48,15 +47,20 @@ be used and ELSER creates sparse vectors. The `inference_id` is `my-elser-endpoint`. <2> The `elser` service is used in this example. +[NOTE] +==== +You might see a 502 bad gateway error in the response when using the {kib} Console. +This error usually just reflects a timeout, while the model downloads in the background. +You can check the download progress in the {ml-app} UI. +If using the Python client, you can set the `timeout` parameter to a higher value. +==== [discrete] [[semantic-text-index-mapping]] ==== Create the index mapping -The mapping of the destination index - the index that contains the embeddings -that the inference endpoint will generate based on your input text - must be created. The -destination index must have a field with the <> -field type to index the output of the used inference endpoint. +The mapping of the destination index - the index that contains the embeddings that the inference endpoint will generate based on your input text - must be created. +The destination index must have a field with the <> field type to index the output of the used inference endpoint. [source,console] ------------------------------------------------------------ @@ -64,13 +68,9 @@ PUT semantic-embeddings { "mappings": { "properties": { - "semantic_text": { <1> + "content": { <1> "type": "semantic_text", <2> "inference_id": "my-elser-endpoint" <3> - }, - "content": { <4> - "type": "text", - "copy_to": "semantic_text" <5> } } } @@ -82,9 +82,6 @@ PUT semantic-embeddings <3> The `inference_id` is the inference endpoint you created in the previous step. It will be used to generate the embeddings based on the input text. Every time you ingest data into the related `semantic_text` field, this endpoint will be used for creating the vector representation of the text. -<4> The field to store the text reindexed from a source index in the <> step. -<5> The textual data stored in the `content` field will be copied to `semantic_text` and processed by the {infer} endpoint. -The `semantic_text` field will store the embeddings generated based on the input data. [discrete] @@ -99,24 +96,21 @@ a list of relevant text passages. All unique passages, along with their IDs, have been extracted from that data set and compiled into a https://github.com/elastic/stack-docs/blob/main/docs/en/stack/ml/nlp/data/msmarco-passagetest2019-unique.tsv[tsv file]. -Download the file and upload it to your cluster using the -{kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[Data Visualizer] -in the {ml-app} UI. Assign the name `id` to the first column and `content` to -the second column. The index name is `test-data`. Once the upload is complete, -you can see an index named `test-data` with 182469 documents. +Download the file and upload it to your cluster using the {kibana-ref}/connect-to-elasticsearch.html#upload-data-kibana[Data Visualizer] in the {ml-app} UI. +After your data is analyzed, click **Override settings**. +Under **Edit field names**, assign `id` to the first column and `content` to the second. +Click **Apply**, then **Import**. +Name the index `test-data`, and click **Import**. +After the upload is complete, you will see an index named `test-data` with 182,469 documents. [discrete] [[semantic-text-reindex-data]] ==== Reindex the data -Create the embeddings from the text by reindexing the data from the `test-data` -index to the `semantic-embeddings` index. The data in the `content` field will -be reindexed into the `content` field of the destination index. -The `content` field data will be copied to the `semantic_text` field as a result of the `copy_to` -parameter set in the index mapping creation step. The copied data will be -processed by the {infer} endpoint associated with the `semantic_text` semantic text -field. +Create the embeddings from the text by reindexing the data from the `test-data` index to the `semantic-embeddings` index. +The data in the `content` field will be reindexed into the `content` semantic text field of the destination index. +The reindexed data will be processed by the {infer} endpoint associated with the `content` semantic text field. [source,console] ------------------------------------------------------------ @@ -144,8 +138,10 @@ GET _tasks/ ------------------------------------------------------------ // TEST[skip:TBD] -It is recommended to cancel the reindexing process if you don't want to wait -until it is fully complete which might take a long time for an inference endpoint with few assigned resources: +Reindexing large datasets can take a long time. +You can test this workflow using only a subset of the dataset. +Do this by cancelling the reindexing process, and only generating embeddings for the subset that was reindexed. +The following API request will cancel the reindexing task: [source,console] ------------------------------------------------------------ @@ -158,10 +154,9 @@ POST _tasks//_cancel [[semantic-text-semantic-search]] ==== Semantic search -After the data set has been enriched with the embeddings, you can query the data -using semantic search. Provide the `semantic_text` field name and the query text -in a `semantic` query type. The {infer} endpoint used to generate the embeddings -for the `semantic_text` field will be used to process the query text. +After the data set has been enriched with the embeddings, you can query the data using semantic search. +Provide the `semantic_text` field name and the query text in a `semantic` query type. +The {infer} endpoint used to generate the embeddings for the `semantic_text` field will be used to process the query text. [source,console] ------------------------------------------------------------ @@ -169,7 +164,7 @@ GET semantic-embeddings/_search { "query": { "semantic": { - "field": "semantic_text", <1> + "field": "content", <1> "query": "How to avoid muscle soreness while running?" <2> } } @@ -187,10 +182,12 @@ query from the `semantic-embedding` index: "hits": [ { "_index": "semantic-embeddings", - "_id": "6DdEuo8B0vYIvzmhoEtt", - "_score": 24.972616, + "_id": "Jy5065EBBFPLbFsdh_f9", + "_score": 21.487484, "_source": { - "semantic_text": { + "id": 8836652, + "content": { + "text": "There are a few foods and food groups that will help to fight inflammation and delayed onset muscle soreness (both things that are inevitable after a long, hard workout) when you incorporate them into your postworkout eats, whether immediately after your run or at a meal later in the day. Advertisement. Advertisement.", "inference": { "inference_id": "my-elser-endpoint", "model_settings": { @@ -205,17 +202,17 @@ query from the `semantic-embedding` index: } ] } - }, - "id": 1713868, - "content": "There are a few foods and food groups that will help to fight inflammation and delayed onset muscle soreness (both things that are inevitable after a long, hard workout) when you incorporate them into your postworkout eats, whether immediately after your run or at a meal later in the day. Advertisement. Advertisement." + } } }, { "_index": "semantic-embeddings", - "_id": "-zdEuo8B0vYIvzmhplLX", - "_score": 22.143118, + "_id": "Ji5065EBBFPLbFsdh_f9", + "_score": 18.211695, "_source": { - "semantic_text": { + "id": 8836651, + "content": { + "text": "During Your Workout. There are a few things you can do during your workout to help prevent muscle injury and soreness. According to personal trainer and writer for Iron Magazine, Marc David, doing warm-ups and cool-downs between sets can help keep muscle soreness to a minimum.", "inference": { "inference_id": "my-elser-endpoint", "model_settings": { @@ -230,17 +227,17 @@ query from the `semantic-embedding` index: } ] } - }, - "id": 3389244, - "content": "During Your Workout. There are a few things you can do during your workout to help prevent muscle injury and soreness. According to personal trainer and writer for Iron Magazine, Marc David, doing warm-ups and cool-downs between sets can help keep muscle soreness to a minimum." + } } }, { "_index": "semantic-embeddings", - "_id": "77JEuo8BdmhTuQdXtQWt", - "_score": 21.506052, + "_id": "Wi5065EBBFPLbFsdh_b9", + "_score": 13.089405, "_source": { - "semantic_text": { + "id": 8800197, + "content": { + "text": "This is especially important if the soreness is due to a weightlifting routine. For this time period, do not exert more than around 50% of the level of effort (weight, distance and speed) that caused the muscle groups to be sore.", "inference": { "inference_id": "my-elser-endpoint", "model_settings": { @@ -255,13 +252,15 @@ query from the `semantic-embedding` index: } ] } - }, - "id": 363742, - "content": "This is especially important if the soreness is due to a weightlifting routine. For this time period, do not exert more than around 50% of the level of effort (weight, distance and speed) that caused the muscle groups to be sore." + } } - }, - (...) + } ] ------------------------------------------------------------ // NOTCONSOLE +[discrete] +[[semantic-text-further-examples]] +==== Further examples + +If you want to use `semantic_text` in hybrid search, refer to https://colab.research.google.com/github/elastic/elasticsearch-labs/blob/main/notebooks/search/09-semantic-text.ipynb[this notebook] for a step-by-step guide. \ No newline at end of file diff --git a/docs/reference/search/search-your-data/semantic-search.asciidoc b/docs/reference/search/search-your-data/semantic-search.asciidoc index fa84c3848b78c..62e41b3eef3de 100644 --- a/docs/reference/search/search-your-data/semantic-search.asciidoc +++ b/docs/reference/search/search-your-data/semantic-search.asciidoc @@ -7,109 +7,93 @@ Semantic search is a search method that helps you find data based on the intent Using an NLP model enables you to extract text embeddings out of text. Embeddings are vectors that provide a numeric representation of a text. Pieces of content with similar meaning have similar representations. -NLP models can be used in the {stack} various ways, you can: -* deploy models in {es} -* use the <> (recommended) -* use the <> +You have several options for using NLP models in the {stack}: +* use the `semantic_text` workflow (recommended) +* use the {infer} API workflow +* deploy models directly in {es} -[[semantic-search-diagram]] -.A simplified representation of encoding textual concepts as vectors -image::images/search/vector-search-oversimplification.png[A simplified representation of encoding textual concepts as vectors,align="center"] +Refer to <> to choose your workflow. -At query time, {es} can use the same NLP model to convert a query into embeddings, enabling you to find documents with similar text embeddings. +You can also store your own embeddings in {es} as vectors. +Refer to <> for guidance on which query type to use for semantic search. -This guide shows you how to implement semantic search with {es}: From selecting an NLP model, to writing queries. +At query time, {es} can use the same NLP model to convert a query into embeddings, enabling you to find documents with similar text embeddings. -IMPORTANT: For the easiest way to perform semantic search in the {stack}, refer to the <> end-to-end tutorial. [discrete] -[[semantic-search-select-nlp-model]] -=== Select an NLP model - -{es} offers the usage of a -{ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-embedding[wide range of NLP models], including both dense and sparse vector models. -Your choice of the language model is critical for implementing semantic search successfully. - -While it is possible to bring your own text embedding model, achieving good search results through model tuning is challenging. -Selecting an appropriate model from our third-party model list is the first step. -Training the model on your own data is essential to ensure better search results than using only BM25. -However, the model training process requires a team of data scientists and ML experts, making it expensive and time-consuming. - -To address this issue, Elastic provides a pre-trained representational model called {ml-docs}/ml-nlp-elser.html[Elastic Learned Sparse EncodeR (ELSER)]. -ELSER, currently available only for English, is an out-of-domain sparse vector model that does not require fine-tuning. -This adaptability makes it suitable for various NLP use cases out of the box. -Unless you have a team of ML specialists, it is highly recommended to use the ELSER model. - -In the case of sparse vector representation, the vectors mostly consist of zero values, with only a small subset containing non-zero values. -This representation is commonly used for textual data. -In the case of ELSER, each document in an index and the query text itself are represented by high-dimensional sparse vectors. -Each non-zero element of the vector corresponds to a term in the model vocabulary. -The ELSER vocabulary contains around 30000 terms, so the sparse vectors created by ELSER contain about 30000 values, the majority of which are zero. -Effectively the ELSER model is replacing the terms in the original query with other terms that have been learnt to exist in the documents that best match the original search terms in a training dataset, and weights to control how important each is. +[[using-nlp-models]] +=== Choose a semantic search workflow [discrete] -[[semantic-search-deploy-nlp-model]] -=== Deploy the model +==== `semantic_text` workflow -After you decide which model you want to use for implementing semantic search, you need to deploy the model in {es}. +The simplest way to use NLP models in the {stack} is through the <>. +We recommend using this approach because it abstracts away a lot of manual work. +All you need to do is create an {infer} endpoint and an index mapping to start ingesting, embedding, and querying data. +There is no need to define model-related settings and parameters, or to create {infer} ingest pipelines. +Refer to the <> documentation for a list of supported services. -include::{es-ref-dir}/tab-widgets/semantic-search/deploy-nlp-model-widget.asciidoc[] +The <> tutorial shows you the process end-to-end. [discrete] -[[semantic-search-field-mappings]] -=== Map a field for the text embeddings +==== {infer} API workflow -Before you start using the deployed model to generate embeddings based on your input text, you need to prepare your index mapping first. -The mapping of the index depends on the type of model. +The <> is more complex but offers greater control over the {infer} endpoint configuration. +You need to create an {infer} endpoint, provide various model-related settings and parameters, define an index mapping, and set up an {infer} ingest pipeline with the appropriate settings. -include::{es-ref-dir}/tab-widgets/semantic-search/field-mappings-widget.asciidoc[] +The <> tutorial shows you the process end-to-end. [discrete] -[[semantic-search-generate-embeddings]] -=== Generate text embeddings +==== Model deployment workflow -Once you have created the mappings for the index, you can generate text embeddings from your input text. -This can be done by using an -<> with an <>. -The ingest pipeline processes the input data and indexes it into the destination index. -At index time, the inference ingest processor uses the trained model to infer against the data ingested through the pipeline. -After you created the ingest pipeline with the inference processor, you can ingest your data through it to generate the model output. +You can also deploy NLP in {es} manually, without using an {infer} endpoint. +This is the most complex and labor intensive workflow for performing semantic search in the {stack}. +You need to select an NLP model from the {ml-docs}/ml-nlp-model-ref.html#ml-nlp-model-ref-text-embedding[list of supported dense and sparse vector models], deploy it using the Eland client, create an index mapping, and set up a suitable ingest pipeline to start ingesting and querying data. -include::{es-ref-dir}/tab-widgets/semantic-search/generate-embeddings-widget.asciidoc[] +The <> tutorial shows you the process end-to-end. -Now it is time to perform semantic search! [discrete] -[[semantic-search-search]] -=== Search the data +[[using-query]] +=== Using the right query -Depending on the type of model you have deployed, you can query rank features with a <> query, or dense vectors with a kNN search. +Crafting the right query is crucial for semantic search. +Which query you use and which field you target in your queries depends on your chosen workflow. +If you're using the `semantic_text` workflow it's quite simple. +If not, it depends on which type of embeddings you're working with. -include::{es-ref-dir}/tab-widgets/semantic-search/search-widget.asciidoc[] +[cols="30%, 30%, 40%", options="header"] +|======================================================================================================================================================================================================= +| Field type to query | Query to use | Notes +| <> | <> | The `semantic_text` field handles generating embeddings for you at index time and query time. +| <> | <> | The `sparse_vector` query can generate query embeddings for you, but you can also provide your own. You must provide embeddings at index time. +| <> | <> | The `knn` query can generate query embeddings for you, but you can also provide your own. You must provide embeddings at index time. +|======================================================================================================================================================================================================= -[discrete] -[[semantic-search-hybrid-search]] -=== Beyond semantic search with hybrid search +If you want {es} to generate embeddings at both index and query time, use the `semantic_text` field and the `semantic` query. +If you want to bring your own embeddings, use the `sparse_vector` or `dense_vector` field type and the associated query depending on the NLP model you used to generate the embeddings. -In some situations, lexical search may perform better than semantic search. -For example, when searching for single words or IDs, like product numbers. - -Combining semantic and lexical search into one hybrid search request using -<> provides the best of both worlds. -Not only that, but hybrid search using reciprocal rank fusion {blog-ref}improving-information-retrieval-elastic-stack-hybrid[has been shown to perform better in general]. +IMPORTANT: For the easiest way to perform semantic search in the {stack}, refer to the <> end-to-end tutorial. -include::{es-ref-dir}/tab-widgets/semantic-search/hybrid-search-widget.asciidoc[] [discrete] [[semantic-search-read-more]] === Read more * Tutorials: -** <> +** <> +** <> +** <> using the model deployment workflow +** <> ** {ml-docs}/ml-nlp-text-emb-vector-search-example.html[Semantic search with the msmarco-MiniLM-L-12-v3 sentence-transformer model] +* Interactive examples: +** The https://github.com/elastic/elasticsearch-labs[`elasticsearch-labs`] repo contains a number of interactive semantic search examples in the form of executable Python notebooks, using the {es} Python client +** https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/03-ELSER.ipynb[Semantic search with ELSER using the model deployment workflow] +** https://github.com/elastic/elasticsearch-labs/blob/main/notebooks/search/09-semantic-text.ipynb[Semantic search with `semantic_text`] * Blogs: +** https://www.elastic.co/search-labs/blog/semantic-search-simplified-semantic-text[{es} new semantic_text mapping: Simplifying semantic search] ** {blog-ref}may-2023-launch-sparse-encoder-ai-model[Introducing Elastic Learned Sparse Encoder: Elastic's AI model for semantic search] ** {blog-ref}lexical-ai-powered-search-elastic-vector-database[How to get the best of lexical and AI-powered search with Elastic's vector database] ** Information retrieval blog series: @@ -117,10 +101,10 @@ include::{es-ref-dir}/tab-widgets/semantic-search/hybrid-search-widget.asciidoc[ *** {blog-ref}improving-information-retrieval-elastic-stack-benchmarking-passage-retrieval[Part 2: Benchmarking passage retrieval] *** {blog-ref}may-2023-launch-information-retrieval-elasticsearch-ai-model[Part 3: Introducing Elastic Learned Sparse Encoder, our new retrieval model] *** {blog-ref}improving-information-retrieval-elastic-stack-hybrid[Part 4: Hybrid retrieval] -* Interactive examples: -** The https://github.com/elastic/elasticsearch-labs[`elasticsearch-labs`] repo contains a number of interactive semantic search examples in the form of executable Python notebooks, using the {es} Python client -include::semantic-search-elser.asciidoc[] + include::semantic-search-semantic-text.asciidoc[] include::semantic-search-inference.asciidoc[] +include::semantic-search-elser.asciidoc[] include::cohere-es.asciidoc[] +include::semantic-search-deploy-model.asciidoc[] diff --git a/docs/reference/search/search.asciidoc b/docs/reference/search/search.asciidoc index 15985088a6ff7..2ad407b4ae1e4 100644 --- a/docs/reference/search/search.asciidoc +++ b/docs/reference/search/search.asciidoc @@ -38,7 +38,7 @@ must have the `read` index privilege for the alias's data streams or indices. Allows you to execute a search query and get back search hits that match the query. You can provide search queries using the <> or <>. +query string parameter>> or <>. [[search-search-api-path-params]] ==== {api-path-parms-title} @@ -141,7 +141,7 @@ When unspecified, the pre-filter phase is executed if any of these conditions is - The primary sort of the query targets an indexed field. [[search-preference]] -tag::search-preference[] +// tag::search-preference[] `preference`:: (Optional, string) Nodes and shards used for the search. By default, {es} selects from eligible @@ -178,7 +178,7 @@ Any string that does not start with `_`. If the cluster state and selected shards do not change, searches using the same `` value are routed to the same shards in the same order. ==== -end::search-preference[] +// end::search-preference[] [[search-api-query-params-q]] diff --git a/docs/reference/security/authentication/saml-guide.asciidoc b/docs/reference/security/authentication/saml-guide.asciidoc index cf91e11b7f18f..45d153a071ffd 100644 --- a/docs/reference/security/authentication/saml-guide.asciidoc +++ b/docs/reference/security/authentication/saml-guide.asciidoc @@ -32,7 +32,7 @@ that supports at least the SAML 2.0 _Web Browser SSO Profile_. It has been tested with a number of popular IdP implementations, such as https://www.elastic.co/blog/how-to-configure-elasticsearch-saml-authentication-with-adfs[Microsoft Active Directory Federation Services (ADFS)], https://www.elastic.co/blog/saml-based-single-sign-on-with-elasticsearch-and-azure-active-directory[Azure Active Directory (AAD)], -and https://www.elastic.co/blog/setting-up-saml-for-elastic-enterprise-search-okta-edition[Okta]. +and https://www.elastic.co/blog/how-to-set-up-okta-saml-login-kibana-elastic-cloud[Okta]. This guide assumes that you have an existing IdP and wish to add {kib} as a Service Provider. diff --git a/docs/reference/security/authorization/field-and-document-access-control.asciidoc b/docs/reference/security/authorization/field-and-document-access-control.asciidoc index f4d4fcd49a35f..7c7ea75ece161 100644 --- a/docs/reference/security/authorization/field-and-document-access-control.asciidoc +++ b/docs/reference/security/authorization/field-and-document-access-control.asciidoc @@ -54,8 +54,11 @@ specify any field restrictions. If you assign a user both roles, `role_a` gives the user access to all documents and `role_b` gives the user access to all fields. +[IMPORTANT] +=========== If you need to restrict access to both documents and fields, consider splitting documents by index instead. +=========== include::role-templates.asciidoc[] include::set-security-user.asciidoc[] diff --git a/docs/reference/security/authorization/managing-roles.asciidoc b/docs/reference/security/authorization/managing-roles.asciidoc index 253aa33822234..535d70cbc5e9c 100644 --- a/docs/reference/security/authorization/managing-roles.asciidoc +++ b/docs/reference/security/authorization/managing-roles.asciidoc @@ -13,7 +13,9 @@ A role is defined by the following JSON structure: "indices": [ ... ], <4> "applications": [ ... ], <5> "remote_indices": [ ... ], <6> - "remote_cluster": [ ... ] <7> + "remote_cluster": [ ... ], <7> + "metadata": { ... }, <8> + "description": "..." <9> } ----- // NOTCONSOLE @@ -40,6 +42,16 @@ A role is defined by the following JSON structure: <>. This field is optional (missing `remote_cluster` privileges effectively means no additional cluster permissions for any API key based remote clusters). +<8> Metadata field associated with the role, such as `metadata.app_tag`. + Metadata is internally indexed as a <> field type. + This means that all sub-fields act like `keyword` fields when querying and sorting. + Metadata values can be simple values, but also lists and maps. + This field is optional. +<9> A string value with the description text of the role. + The maximum length of it is `1000` chars. + The field is internally indexed as a <> field type + (with default values for all parameters). + This field is optional. [[valid-role-name]] NOTE: Role names must be at least 1 and no more than 507 characters. They can diff --git a/docs/reference/security/authorization/privileges.asciidoc b/docs/reference/security/authorization/privileges.asciidoc index cc44c97a08129..145bd8ebc06bb 100644 --- a/docs/reference/security/authorization/privileges.asciidoc +++ b/docs/reference/security/authorization/privileges.asciidoc @@ -2,7 +2,7 @@ === Security privileges :frontmatter-description: A list of privileges that can be assigned to user roles. :frontmatter-tags-products: [elasticsearch] -:frontmatter-tags-content-type: [reference] +:frontmatter-tags-content-type: [reference] :frontmatter-tags-user-goals: [secure] This section lists the privileges that you can assign to a role. @@ -198,6 +198,10 @@ All {slm} ({slm-init}) actions, including creating and updating policies and starting and stopping {slm-init}. + This privilege is not available in {serverless-full}. ++ +deprecated:[8.15] Also grants the permission to start and stop {Ilm}, using +the {ref}/ilm-start.html[ILM start] and {ref}/ilm-stop.html[ILM stop] APIs. +In a future major release, this privilege will not grant any {Ilm} permissions. `manage_token`:: All security-related operations on tokens that are generated by the {es} Token @@ -278,13 +282,17 @@ status of {Ilm} This privilege is not available in {serverless-full}. `read_pipeline`:: -Read-only access to ingest pipline (get, simulate). +Read-only access to ingest pipeline (get, simulate). `read_slm`:: All read-only {slm-init} actions, such as getting policies and checking the {slm-init} status. + This privilege is not available in {serverless-full}. ++ +deprecated:[8.15] Also grants the permission to get the {Ilm} status, using +the {ref}/ilm-get-status.html[ILM get status API]. In a future major release, +this privilege will not grant any {Ilm} permissions. `read_security`:: All read-only security-related operations, such as getting users, user profiles, diff --git a/docs/reference/security/fips-140-compliance.asciidoc b/docs/reference/security/fips-140-compliance.asciidoc index bf880213c2073..5bf73d43541d6 100644 --- a/docs/reference/security/fips-140-compliance.asciidoc +++ b/docs/reference/security/fips-140-compliance.asciidoc @@ -55,7 +55,8 @@ so that the JVM uses FIPS validated implementations of NIST recommended cryptogr Elasticsearch has been tested with Bouncy Castle's https://repo1.maven.org/maven2/org/bouncycastle/bc-fips/1.0.2.4/bc-fips-1.0.2.4.jar[bc-fips 1.0.2.4] and https://repo1.maven.org/maven2/org/bouncycastle/bctls-fips/1.0.17/bctls-fips-1.0.17.jar[bctls-fips 1.0.17]. -Please refer to the [Support Matrix] for details on which combinations of JVM and security provider are supported in FIPS mode. Elasticsearch does not ship with a FIPS certified provider. It is the responsibility of the user +Please refer to the {es} +https://www.elastic.co/support/matrix#matrix_jvm[JVM support matrix] for details on which combinations of JVM and security provider are supported in FIPS mode. Elasticsearch does not ship with a FIPS certified provider. It is the responsibility of the user to install and configure the security provider to ensure compliance with FIPS 140-2. Using a FIPS certified provider will ensure that only approved cryptographic algorithms are used. diff --git a/docs/reference/settings/security-settings.asciidoc b/docs/reference/settings/security-settings.asciidoc index 7dd9d0574638c..0fc4d59e72350 100644 --- a/docs/reference/settings/security-settings.asciidoc +++ b/docs/reference/settings/security-settings.asciidoc @@ -1990,7 +1990,7 @@ idle for more than the specified timeout. The server can also set the `Keep-Alive` HTTP response header. The effective time-to-live value is the smaller value between this setting and the `Keep-Alive` -reponse header. Configure this setting to `-1` to let the server dictate the value. +response header. Configure this setting to `-1` to let the server dictate the value. If the header is not set by the server and the setting has value of `-1`, the time-to-live is infinite and connections never expire. // end::oidc-http-connection-pool-ttl-tag[] diff --git a/docs/reference/setup.asciidoc b/docs/reference/setup.asciidoc index 64626aafb2441..a284e563917c3 100644 --- a/docs/reference/setup.asciidoc +++ b/docs/reference/setup.asciidoc @@ -27,13 +27,14 @@ the only resource-intensive application on the host or container. For example, you might run {metricbeat} alongside {es} for cluster statistics, but a resource-heavy {ls} deployment should be on its own host. +include::run-elasticsearch-locally.asciidoc[] + include::setup/install.asciidoc[] include::setup/configuration.asciidoc[] include::setup/important-settings.asciidoc[] - include::setup/secure-settings.asciidoc[] include::settings/audit-settings.asciidoc[] @@ -82,6 +83,8 @@ include::modules/indices/search-settings.asciidoc[] include::settings/security-settings.asciidoc[] +include::modules/shard-ops.asciidoc[] + include::modules/indices/request_cache.asciidoc[] include::settings/snapshot-settings.asciidoc[] @@ -93,7 +96,9 @@ include::modules/threadpool.asciidoc[] include::settings/notification-settings.asciidoc[] include::setup/advanced-configuration.asciidoc[] + include::setup/sysconfig.asciidoc[] + include::setup/bootstrap-checks.asciidoc[] include::setup/bootstrap-checks-xes.asciidoc[] diff --git a/docs/reference/setup/bootstrap-checks.asciidoc b/docs/reference/setup/bootstrap-checks.asciidoc index 20f93496934f7..64977ae4e4611 100644 --- a/docs/reference/setup/bootstrap-checks.asciidoc +++ b/docs/reference/setup/bootstrap-checks.asciidoc @@ -245,10 +245,9 @@ properties: - `discovery.seed_providers` - `cluster.initial_master_nodes` -Note that you should remove `cluster.initial_master_nodes` from the -configuration after the cluster has started for the first time. Do not use this -setting when restarting nodes or when adding new nodes to an existing cluster. -Instead, configure `discovery.seed_hosts` or `discovery.seed_providers`. If you -do not need any discovery configuration, for instance if running a single-node -cluster, set `discovery.seed_hosts: []` to disable discovery and satisfy this -bootstrap check. +Note that you must <> after the cluster has started for the +first time. Instead, configure `discovery.seed_hosts` or +`discovery.seed_providers`. If you do not need any discovery configuration, for +instance if running a single-node cluster, set `discovery.seed_hosts: []` to +disable discovery and satisfy this bootstrap check. diff --git a/docs/reference/setup/important-settings/discovery-settings.asciidoc b/docs/reference/setup/important-settings/discovery-settings.asciidoc index 180121b93206a..acd10a045c6c0 100644 --- a/docs/reference/setup/important-settings/discovery-settings.asciidoc +++ b/docs/reference/setup/important-settings/discovery-settings.asciidoc @@ -53,16 +53,18 @@ first election. In <>, with no discovery settings configured, this step is performed automatically by the nodes themselves. -Because auto-bootstrapping is <>, when starting a new cluster in production -mode, you must explicitly list the master-eligible nodes whose votes should be -counted in the very first election. You set this list using the -`cluster.initial_master_nodes` setting. +Because auto-bootstrapping is <>, +when starting a new cluster in production mode, you must explicitly list the +master-eligible nodes whose votes should be counted in the very first election. +You set this list using the `cluster.initial_master_nodes` setting on every +master-eligible node. Do not configure this setting on master-ineligible nodes. IMPORTANT: After the cluster forms successfully for the first time, remove the -`cluster.initial_master_nodes` setting from each node's configuration. Do not -use this setting when restarting a cluster or adding a new node to an existing -cluster. +`cluster.initial_master_nodes` setting from each node's configuration and never +set it again for this cluster. Do not configure this setting on nodes joining +an existing cluster. Do not configure this setting on nodes which are +restarting. Do not configure this setting when performing a full-cluster +restart. See <>. [source,yaml] -------------------------------------------------- diff --git a/docs/reference/setup/install.asciidoc b/docs/reference/setup/install.asciidoc index 89373d0ce8d44..277846cb050bd 100644 --- a/docs/reference/setup/install.asciidoc +++ b/docs/reference/setup/install.asciidoc @@ -5,8 +5,8 @@ [[hosted-elasticsearch-service]] === Hosted Elasticsearch Service -{ecloud} offers all of the features of {es}, {kib}, and Elastic’s {observability}, {ents}, and {elastic-sec} solutions as a hosted service -available on AWS, GCP, and Azure. +{ecloud} offers all of the features of {es}, {kib}, and Elastic’s {observability}, {ents}, and {elastic-sec} solutions as a hosted service +available on AWS, GCP, and Azure. To set up Elasticsearch in {ecloud}, sign up for a {ess-trial}[free {ecloud} trial]. @@ -17,7 +17,7 @@ To set up Elasticsearch in {ecloud}, sign up for a {ess-trial}[free {ecloud} tri If you want to install and manage {es} yourself, you can: * Run {es} using a <>. -* Run {es} in a <>. +* Run {es} in a <>. * Set up and manage {es}, {kib}, {agent}, and the rest of the Elastic Stack on Kubernetes with {eck-ref}[{eck}]. TIP: To try out Elasticsearch on your own machine, we recommend using Docker and running both Elasticsearch and Kibana. For more information, see <>. Please note that this setup is *not suitable for production use*. @@ -76,27 +76,42 @@ Docker container images may be downloaded from the Elastic Docker Registry. [[jvm-version]] === Java (JVM) Version -{es} is built using Java, and includes a bundled version of -https://openjdk.java.net[OpenJDK] from the JDK maintainers (GPLv2+CE) within -each distribution. The bundled JVM is the recommended JVM. - -To use your own version of Java, set the `ES_JAVA_HOME` environment variable. -If you must use a version of Java that is different from the bundled JVM, it is -best to use the latest release of a link:/support/matrix[supported] -https://www.oracle.com/technetwork/java/eol-135779.html[LTS version of Java]. -{es} is closely coupled to certain OpenJDK-specific features, so it may not -work correctly with other JVMs. {es} will refuse to start if a known-bad -version of Java is used. - -If you use a JVM other than the bundled one, you are responsible for reacting -to announcements related to its security issues and bug fixes, and must -yourself determine whether each update is necessary or not. In contrast, the -bundled JVM is treated as an integral part of {es}, which means that Elastic -takes responsibility for keeping it up to date. Security issues and bugs within -the bundled JVM are treated as if they were within {es} itself. - -The bundled JVM is located within the `jdk` subdirectory of the {es} home -directory. You may remove this directory if using your own JVM. +{es} is built using Java, and includes a bundled version of https://openjdk.java.net[OpenJDK] within each distribution. We strongly +recommend using the bundled JVM in all installations of {es}. + +The bundled JVM is treated the same as any other dependency of {es} in terms of support and maintenance. This means that Elastic takes +responsibility for keeping it up to date, and reacts to security issues and bug reports as needed to address vulnerabilities and other bugs +in {es}. Elastic's support of the bundled JVM is subject to Elastic's https://www.elastic.co/support_policy[support policy] and +https://www.elastic.co/support/eol[end-of-life schedule] and is independent of the support policy and end-of-life schedule offered by the +original supplier of the JVM. Elastic does not support using the bundled JVM for purposes other than running {es}. + +TIP: {es} uses only a subset of the features offered by the JVM. Bugs and security issues in the bundled JVM often relate to features that +{es} does not use. Such issues do not apply to {es}. Elastic analyzes reports of security vulnerabilities in all its dependencies, including +in the bundled JVM, and will issue an https://www.elastic.co/community/security[Elastic Security Advisory] if such an advisory is needed. + +If you decide to run {es} using a version of Java that is different from the bundled one, prefer to use the latest release of a +https://www.oracle.com/technetwork/java/eol-135779.html[LTS version of Java] which is link:/support/matrix[listed in the support matrix]. +Although such a configuration is supported, if you encounter a security issue or other bug in your chosen JVM then Elastic may not be able +to help unless the issue is also present in the bundled JVM. Instead, you must seek assistance directly from the supplier of your chosen +JVM. You must also take responsibility for reacting to security and bug announcements from the supplier of your chosen JVM. {es} may not +perform optimally if using a JVM other than the bundled one. {es} is closely coupled to certain OpenJDK-specific features, so it may not +work correctly with JVMs that are not OpenJDK. {es} will refuse to start if you attempt to use a known-bad JVM version. + +To use your own version of Java, set the `ES_JAVA_HOME` environment variable to the path to your own JVM installation. The bundled JVM is +located within the `jdk` subdirectory of the {es} home directory. You may remove this directory if using your own JVM. + +[discrete] +[[jdk-locale]] +=== JDK locale database + +The locale database used by {es}, used to map from various date formats to +the underlying date storage format, depends on the version of the JDK +that {es} is running on. On JDK version 23 and above, {es} will use the +_CLDR_ database. On JDK version 22 and below, {es} will use the _COMPAT_ +database. This may mean that the strings used for textual date formats, +and the output of custom week-date formats, may change when moving from +a previous JDK version to JDK 23 or above. For more information, see +<>. [discrete] [[jvm-agents]] diff --git a/docs/reference/setup/logging-config.asciidoc b/docs/reference/setup/logging-config.asciidoc index 7b36b6382c9bf..e382bbdacb464 100644 --- a/docs/reference/setup/logging-config.asciidoc +++ b/docs/reference/setup/logging-config.asciidoc @@ -140,19 +140,41 @@ documentation]. [[configuring-logging-levels]] === Configuring logging levels -Each Java package in the {es-repo}[{es} source code] has a related logger. For -example, the `org.elasticsearch.discovery` package has -`logger.org.elasticsearch.discovery` for logs related to the -<> process. - -To get more or less verbose logs, use the <> to change the related logger's log level. Each logger -accepts Log4j 2's built-in log levels, from least to most verbose: `OFF`, -`FATAL`, `ERROR`, `WARN`, `INFO`, `DEBUG`, and `TRACE`. The default log level is -`INFO`. Messages logged at higher verbosity levels (`DEBUG` and `TRACE`) are -only intended for expert use. To prevent leaking sensitive information in logs, -{es} will reject setting certain loggers to higher verbosity levels unless -<> is enabled. +Log4J 2 log messages include a _level_ field, which is one of the following (in +order of increasing verbosity): + +* `FATAL` +* `ERROR` +* `WARN` +* `INFO` +* `DEBUG` +* `TRACE` + +By default {es} includes all messages at levels `INFO`, `WARN`, `ERROR` and +`FATAL` in its logs, but filters out messages at levels `DEBUG` and `TRACE`. +This is the recommended configuration. Do not filter out messages at `INFO` or +higher log levels or else you may not be able to understand your cluster's +behaviour or troubleshoot common problems. Do not enable logging at levels +`DEBUG` or `TRACE` unless you are following instructions elsewhere in this +manual which call for more detailed logging, or you are an expert user who will +be reading the {es} source code to determine the meaning of the logs. + +Messages are logged by a hierarchy of loggers which matches the hierarchy of +Java packages and classes in the {es-repo}[{es} source code]. Every logger has +a corresponding <> which can be used +to control the verbosity of its logs. The setting's name is the fully-qualified +name of the package or class, prefixed with `logger.`. + +You may set each logger's verbosity to the name of a log level, for instance +`DEBUG`, which means that messages from this logger at levels up to the +specified one will be included in the logs. You may also use the value `OFF` to +suppress all messages from the logger. + +For example, the `org.elasticsearch.discovery` package contains functionality +related to the <> process, and you can +control the verbosity of its logs with the `logger.org.elasticsearch.discovery` +setting. To enable `DEBUG` logging for this package, use the +<> as follows: [source,console] ---- @@ -164,8 +186,8 @@ PUT /_cluster/settings } ---- -To reset a logger's verbosity to its default level, set the logger setting to -`null`: +To reset this package's log verbosity to its default level, set the logger +setting to `null`: [source,console] ---- @@ -211,6 +233,14 @@ formatting the same information in different ways, renaming the logger or adjusting the log level for specific messages. Do not rely on the contents of the application logs remaining precisely the same between versions. +NOTE: To prevent leaking sensitive information in logs, {es} suppresses certain +log messages by default even at the highest verbosity levels. To disable this +protection on a node, set the Java system property +`es.insecure_network_trace_enabled` to `true`. This feature is primarily +intended for test systems which do not contain any sensitive information. If you +set this property on a system which contains sensitive information, you must +protect your logs from unauthorized access. + [discrete] [[deprecation-logging]] === Deprecation logging diff --git a/docs/reference/setup/restart-cluster.asciidoc b/docs/reference/setup/restart-cluster.asciidoc index 9488c6632836b..a3bf7723cb5a9 100644 --- a/docs/reference/setup/restart-cluster.asciidoc +++ b/docs/reference/setup/restart-cluster.asciidoc @@ -11,7 +11,7 @@ time, so the service remains uninterrupted. [WARNING] ==== Nodes exceeding the low watermark threshold will be slow to restart. Reduce the disk -usage below the <> before to restarting nodes. +usage below the <> before restarting nodes. ==== [discrete] diff --git a/docs/reference/setup/stopping.asciidoc b/docs/reference/setup/stopping.asciidoc index 8c3a8d40fa1d2..f80812f026936 100644 --- a/docs/reference/setup/stopping.asciidoc +++ b/docs/reference/setup/stopping.asciidoc @@ -50,9 +50,14 @@ such a shutdown, it does not go through an orderly shutdown as described above. process will also return with a special status code indicating the nature of the error. [horizontal] +Killed by jvmkiller agent:: 158 +User or kernel SIGTERM:: 143 +Slain by kernel oom-killer:: 137 +Segmentation fault:: 134 JVM internal error:: 128 Out of memory error:: 127 Stack overflow error:: 126 Unknown virtual machine error:: 125 Serious I/O error:: 124 +Bootstrap check failure:: 78 Unknown fatal error:: 1 diff --git a/docs/reference/snapshot-restore/apis/get-snapshot-status-api.asciidoc b/docs/reference/snapshot-restore/apis/get-snapshot-status-api.asciidoc index d8b03cbc0e880..e677408da3f25 100644 --- a/docs/reference/snapshot-restore/apis/get-snapshot-status-api.asciidoc +++ b/docs/reference/snapshot-restore/apis/get-snapshot-status-api.asciidoc @@ -4,7 +4,7 @@ Get snapshot status ++++ -Retrieves a detailed description of the current state for each shard participating in the snapshot. +Retrieves a detailed description of the current state for each shard participating in the snapshot. Note that this API should only be used to obtain detailed shard-level information for ongoing snapshots. If this detail is not needed, or you want to obtain information about one or more existing snapshots, use the <>. //// [source,console] @@ -172,13 +172,8 @@ Indicates the current snapshot state. `STARTED`:: The snapshot is currently running. -`PARTIAL`:: - The global cluster state was stored, but data of at least one shard was not stored successfully. - The <> section of the response contains more detailed information about shards - that were not processed correctly. - `SUCCESS`:: - The snapshot finished and all shards were stored successfully. + The snapshot completed. ==== -- diff --git a/docs/reference/snapshot-restore/repository-s3.asciidoc b/docs/reference/snapshot-restore/repository-s3.asciidoc index d757a74110ca9..91bc40689e246 100644 --- a/docs/reference/snapshot-restore/repository-s3.asciidoc +++ b/docs/reference/snapshot-restore/repository-s3.asciidoc @@ -311,11 +311,8 @@ include::repository-shared-settings.asciidoc[] `delete_objects_max_size`:: - (<>) Sets the maxmimum batch size, betewen 1 and 1000, used - for `DeleteObjects` requests. Defaults to 1000 which is the maximum number - supported by the - https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html[AWS - DeleteObjects API]. + (integer) Sets the maxmimum batch size, betewen 1 and 1000, used for `DeleteObjects` requests. Defaults to 1000 which is the maximum + number supported by the https://docs.aws.amazon.com/AmazonS3/latest/API/API_DeleteObjects.html[AWS DeleteObjects API]. NOTE: The option of defining client settings in the repository settings as documented below is considered deprecated, and will be removed in a future @@ -369,7 +366,7 @@ If you use a Glacier storage class, or another unsupported storage class, or object expiry, then you may permanently lose access to your repository contents. -You may use the `intellligent_tiering` storage class to automatically manage +You may use the `intelligent_tiering` storage class to automatically manage the class of objects, but you must not enable the optional Archive Access or Deep Archive Access tiers. If you use these tiers then you may permanently lose access to your repository contents. diff --git a/docs/reference/synonyms/apis/synonyms-apis.asciidoc b/docs/reference/synonyms/apis/synonyms-apis.asciidoc index c9de52939b2fe..dbbc26c36d3df 100644 --- a/docs/reference/synonyms/apis/synonyms-apis.asciidoc +++ b/docs/reference/synonyms/apis/synonyms-apis.asciidoc @@ -21,6 +21,23 @@ These filters are applied as part of the <> process by the << NOTE: Synonyms sets are limited to a maximum of 10,000 synonym rules per set. If you need to manage more synonym rules, you can create multiple synonyms sets. +WARNING: Synonyms sets must exist before they can be added to indices. +If an index is created referencing a nonexistent synonyms set, the index will remain in a partially created and inoperable state. +The only way to recover from this scenario is to ensure the synonyms set exists then either delete and re-create the index, or close and re-open the index. + +[WARNING] +==== +Invalid synonym rules can cause errors when applying analyzer changes. +For reloadable analyzers, this prevents reloading and applying changes. +You must correct errors in the synonym rules and reload the analyzer. + +An index with invalid synonym rules cannot be reopened, making it inoperable when: + +* A node containing the index starts +* The index is opened from a closed state +* A node restart occurs (which reopens the node assigned shards) +==== + [discrete] [[synonyms-sets-apis]] === Synonyms sets APIs diff --git a/docs/reference/tab-widgets/api-call.asciidoc b/docs/reference/tab-widgets/api-call.asciidoc index bb6b89374075d..5e70d73684436 100644 --- a/docs/reference/tab-widgets/api-call.asciidoc +++ b/docs/reference/tab-widgets/api-call.asciidoc @@ -1,5 +1,5 @@ // tag::cloud[] -**Use {kib}** +**Option 1: Use {kib}** //tag::kibana-api-ex[] . Open {kib}'s main menu ("*☰*" near Elastic logo) and go to **Dev Tools > Console**. @@ -16,9 +16,9 @@ GET / //end::kibana-api-ex[] -**Use curl** +**Option 2: Use `curl`** -To communicate with {es} using curl or another client, you need your cluster's +To communicate with {es} using `curl` or another client, you need your cluster's endpoint. . Open {kib}'s main menu and click **Manage this deployment**. @@ -26,7 +26,7 @@ endpoint. . From your deployment menu, go to the **Elasticsearch** page. Click **Copy endpoint**. -. To submit an example API request, run the following curl command in a new +. To submit an example API request, run the following `curl` command in a new terminal session. Replace `` with the password for the `elastic` user. Replace `` with your endpoint. + diff --git a/docs/reference/tab-widgets/cpu-usage.asciidoc b/docs/reference/tab-widgets/cpu-usage.asciidoc index 575cf459ee5be..c6272228965eb 100644 --- a/docs/reference/tab-widgets/cpu-usage.asciidoc +++ b/docs/reference/tab-widgets/cpu-usage.asciidoc @@ -1,30 +1,20 @@ // tag::cloud[] -From your deployment menu, click **Performance**. The page's **CPU Usage** chart -shows your deployment's CPU usage as a percentage. +* (Recommended) Enable {cloud}/ec-monitoring-setup.html[logs and metrics]. When logs and metrics are enabled, monitoring information is visible on {kib}'s {kibana-ref}/xpack-monitoring.html[Stack Monitoring] page. ++ +You can also enable the {kibana-ref}/kibana-alerts.html[CPU usage threshold alert] to be notified about potential issues through email. -High CPU usage can also deplete your CPU credits. CPU credits let {ess} provide -smaller clusters with a performance boost when needed. The **CPU credits** -chart shows your remaining CPU credits, measured in seconds of CPU time. +* From your deployment menu, view the {cloud}/ec-saas-metrics-accessing.html[**Performance**] page. On this page, you can view two key metrics: +** **CPU usage**: Your deployment's CPU usage, represented as a percentage. +** **CPU credits**: Your remaining CPU credits, measured in seconds of CPU time. -You can also use the <> to get the current CPU usage -for each node. - -// tag::cpu-usage-cat-nodes[] -[source,console] ----- -GET _cat/nodes?v=true&s=cpu:desc ----- - -The response's `cpu` column contains the current CPU usage as a percentage. The -`name` column contains the node's name. -// end::cpu-usage-cat-nodes[] +{ess} grants {cloud}/ec-vcpu-boost-instance.html[CPU credits] per deployment +to provide smaller clusters with performance boosts when needed. High CPU +usage can deplete these credits, which might lead to {cloud}/ec-scenario_why_is_performance_degrading_over_time.html[performance degradation] and {cloud}/ec-scenario_why_are_my_cluster_response_times_suddenly_so_much_worse.html[increased cluster response times]. // end::cloud[] // tag::self-managed[] - -Use the <> to get the current CPU usage for each node. - -include::cpu-usage.asciidoc[tag=cpu-usage-cat-nodes] - +* Enable <>. When logs and metrics are enabled, monitoring information is visible on {kib}'s {kibana-ref}/xpack-monitoring.html[Stack Monitoring] page. ++ +You can also enable the {kibana-ref}/kibana-alerts.html[CPU usage threshold alert] to be notified about potential issues through email. // end::self-managed[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc index c8a42c4d0585a..00adc08b77dfc 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline-widget.asciidoc @@ -7,6 +7,12 @@ id="infer-api-ingest-cohere"> Cohere + + +
+
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc index a239c79e5a6d1..fb3b4b6042778 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-ingest-pipeline.asciidoc @@ -2,7 +2,7 @@ [source,console] ---- -DELETE _ingest/pipeline/*_embeddings +DELETE _ingest/pipeline/*_embeddings_pipeline ---- // TEST // TEARDOWN @@ -13,7 +13,7 @@ DELETE _ingest/pipeline/*_embeddings [source,console] -------------------------------------------------- -PUT _ingest/pipeline/cohere_embeddings +PUT _ingest/pipeline/cohere_embeddings_pipeline { "processors": [ { @@ -35,11 +35,37 @@ and the `output_field` that will contain the {infer} results. // end::cohere[] +// tag::elser[] + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/elser_embeddings_pipeline +{ + "processors": [ + { + "inference": { + "model_id": "elser_embeddings", <1> + "input_output": { <2> + "input_field": "content", + "output_field": "content_embedding" + } + } + } + ] +} +-------------------------------------------------- +<1> The name of the inference endpoint you created by using the +<>, it's referred to as `inference_id` in that step. +<2> Configuration object that defines the `input_field` for the {infer} process +and the `output_field` that will contain the {infer} results. + +// end::elser[] + // tag::hugging-face[] [source,console] -------------------------------------------------- -PUT _ingest/pipeline/hugging_face_embeddings +PUT _ingest/pipeline/hugging_face_embeddings_pipeline { "processors": [ { @@ -65,7 +91,7 @@ and the `output_field` that will contain the {infer} results. [source,console] -------------------------------------------------- -PUT _ingest/pipeline/openai_embeddings +PUT _ingest/pipeline/openai_embeddings_pipeline { "processors": [ { @@ -91,7 +117,7 @@ and the `output_field` that will contain the {infer} results. [source,console] -------------------------------------------------- -PUT _ingest/pipeline/azure_openai_embeddings +PUT _ingest/pipeline/azure_openai_embeddings_pipeline { "processors": [ { @@ -117,7 +143,7 @@ and the `output_field` that will contain the {infer} results. [source,console] -------------------------------------------------- -PUT _ingest/pipeline/azure_ai_studio_embeddings +PUT _ingest/pipeline/azure_ai_studio_embeddings_pipeline { "processors": [ { @@ -139,11 +165,37 @@ and the `output_field` that will contain the {infer} results. // end::azure-ai-studio[] +// tag::google-vertex-ai[] + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/google_vertex_ai_embeddings_pipeline +{ + "processors": [ + { + "inference": { + "model_id": "google_vertex_ai_embeddings", <1> + "input_output": { <2> + "input_field": "content", + "output_field": "content_embedding" + } + } + } + ] +} +-------------------------------------------------- +<1> The name of the inference endpoint you created by using the +<>, it's referred to as `inference_id` in that step. +<2> Configuration object that defines the `input_field` for the {infer} process +and the `output_field` that will contain the {infer} results. + +// end::google-vertex-ai[] + // tag::mistral[] [source,console] -------------------------------------------------- -PUT _ingest/pipeline/mistral_embeddings +PUT _ingest/pipeline/mistral_embeddings_pipeline { "processors": [ { @@ -164,3 +216,29 @@ PUT _ingest/pipeline/mistral_embeddings and the `output_field` that will contain the {infer} results. // end::mistral[] + +// tag::amazon-bedrock[] + +[source,console] +-------------------------------------------------- +PUT _ingest/pipeline/amazon_bedrock_embeddings_pipeline +{ + "processors": [ + { + "inference": { + "model_id": "amazon_bedrock_embeddings", <1> + "input_output": { <2> + "input_field": "content", + "output_field": "content_embedding" + } + } + } + ] +} +-------------------------------------------------- +<1> The name of the inference endpoint you created by using the +<>, it's referred to as `inference_id` in that step. +<2> Configuration object that defines the `input_field` for the {infer} process +and the `output_field` that will contain the {infer} results. + +// end::amazon-bedrock[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc index 80c7c7ef23ee3..376b8e7e309fa 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping-widget.asciidoc @@ -7,6 +7,12 @@ id="infer-api-mapping-cohere"> Cohere + + +
+
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc index a1bce38a02ad2..2f3dbfc812b06 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-mapping.asciidoc @@ -31,6 +31,34 @@ the {infer} pipeline configuration in the next step. // end::cohere[] +// tag::elser[] + +[source,console] +-------------------------------------------------- +PUT elser-embeddings +{ + "mappings": { + "properties": { + "content_embedding": { <1> + "type": "sparse_vector" <2> + }, + "content": { <3> + "type": "text" <4> + } + } + } +} +-------------------------------------------------- +<1> The name of the field to contain the generated tokens. It must be refrenced +in the {infer} pipeline configuration in the next step. +<2> The field to contain the tokens is a `sparse_vector` field for ELSER. +<3> The name of the field from which to create the dense vector representation. +In this example, the name of the field is `content`. It must be referenced in +the {infer} pipeline configuration in the next step. +<4> The field type which is text in this example. + +// end::elser[] + // tag::hugging-face[] [source,console] @@ -174,6 +202,39 @@ the {infer} pipeline configuration in the next step. // end::azure-ai-studio[] +// tag::google-vertex-ai[] + +[source,console] +-------------------------------------------------- +PUT google-vertex-ai-embeddings +{ + "mappings": { + "properties": { + "content_embedding": { <1> + "type": "dense_vector", <2> + "dims": 768, <3> + "element_type": "float", + "similarity": "dot_product" <4> + }, + "content": { <5> + "type": "text" <6> + } + } + } +} +-------------------------------------------------- +<1> The name of the field to contain the generated embeddings. It must be referenced in the {infer} pipeline configuration in the next step. +<2> The field to contain the embeddings is a `dense_vector` field. +<3> The output dimensions of the model. This value may be found on the https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api[Google Vertex AI model reference]. +The {infer} API attempts to calculate the output dimensions automatically if `dims` are not specified. +<4> For Google Vertex AI embeddings, the `dot_product` function should be used to calculate similarity. +<5> The name of the field from which to create the dense vector representation. +In this example, the name of the field is `content`. It must be referenced in +the {infer} pipeline configuration in the next step. +<6> The field type which is `text` in this example. + +// end::google-vertex-ai[] + // tag::mistral[] [source,console] @@ -207,3 +268,38 @@ the {infer} pipeline configuration in the next step. <6> The field type which is text in this example. // end::mistral[] + +// tag::amazon-bedrock[] + +[source,console] +-------------------------------------------------- +PUT amazon-bedrock-embeddings +{ + "mappings": { + "properties": { + "content_embedding": { <1> + "type": "dense_vector", <2> + "dims": 1024, <3> + "element_type": "float", + "similarity": "dot_product" <4> + }, + "content": { <5> + "type": "text" <6> + } + } + } +} +-------------------------------------------------- +<1> The name of the field to contain the generated tokens. It must be referenced +in the {infer} pipeline configuration in the next step. +<2> The field to contain the tokens is a `dense_vector` field. +<3> The output dimensions of the model. This value may be different depending on the underlying model used. +See the https://docs.aws.amazon.com/bedrock/latest/userguide/titan-multiemb-models.html[Amazon Titan model] or the https://docs.cohere.com/reference/embed[Cohere Embeddings model] documentation. +<4> For Amazon Bedrock embeddings, the `dot_product` function should be used to +calculate similarity for Amazon titan models, or `cosine` for Cohere models. +<5> The name of the field from which to create the dense vector representation. +In this example, the name of the field is `content`. It must be referenced in +the {infer} pipeline configuration in the next step. +<6> The field type which is text in this example. + +// end::amazon-bedrock[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc index 4face6a105819..57f774184eb76 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex-widget.asciidoc @@ -7,6 +7,12 @@ id="infer-api-reindex-cohere"> Cohere + + +
+
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc index 927e47ea4d67c..bb994166282cb 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-reindex.asciidoc @@ -10,7 +10,7 @@ POST _reindex?wait_for_completion=false }, "dest": { "index": "cohere-embeddings", - "pipeline": "cohere_embeddings" + "pipeline": "cohere_embeddings_pipeline" } } ---- @@ -25,6 +25,29 @@ may affect the throughput of the reindexing process. // end::cohere[] +// tag::elser[] + +[source,console] +---- +POST _reindex?wait_for_completion=false +{ + "source": { + "index": "test-data", + "size": 50 <1> + }, + "dest": { + "index": "elser-embeddings", + "pipeline": "elser_embeddings_pipeline" + } +} +---- +// TEST[skip:TBD] +<1> The default batch size for reindexing is 1000. Reducing `size` to a smaller +number makes the update of the reindexing process quicker which enables you to +follow the progress closely and detect errors early. + +// end::elser[] + // tag::hugging-face[] [source,console] @@ -37,7 +60,7 @@ POST _reindex?wait_for_completion=false }, "dest": { "index": "hugging-face-embeddings", - "pipeline": "hugging_face_embeddings" + "pipeline": "hugging_face_embeddings_pipeline" } } ---- @@ -61,7 +84,7 @@ POST _reindex?wait_for_completion=false }, "dest": { "index": "openai-embeddings", - "pipeline": "openai_embeddings" + "pipeline": "openai_embeddings_pipeline" } } ---- @@ -89,7 +112,7 @@ POST _reindex?wait_for_completion=false }, "dest": { "index": "azure-openai-embeddings", - "pipeline": "azure_openai_embeddings" + "pipeline": "azure_openai_embeddings_pipeline" } } ---- @@ -117,7 +140,7 @@ POST _reindex?wait_for_completion=false }, "dest": { "index": "azure-ai-studio-embeddings", - "pipeline": "azure_ai_studio_embeddings" + "pipeline": "azure_ai_studio_embeddings_pipeline" } } ---- @@ -132,6 +155,28 @@ might affect the throughput of the reindexing process. If this happens, change // end::azure-ai-studio[] +// tag::google-vertex-ai[] + +[source,console] +---- +POST _reindex?wait_for_completion=false +{ + "source": { + "index": "test-data", + "size": 50 <1> + }, + "dest": { + "index": "google-vertex-ai-embeddings", + "pipeline": "google_vertex_ai_embeddings_pipeline" + } +} +---- +// TEST[skip:TBD] +<1> The default batch size for reindexing is 1000. Reducing `size` will make updates to the reindexing process faster. This enables you to +follow the progress closely and detect errors early. + +// end::google-vertex-ai[] + // tag::mistral[] [source,console] @@ -144,7 +189,7 @@ POST _reindex?wait_for_completion=false }, "dest": { "index": "mistral-embeddings", - "pipeline": "mistral_embeddings" + "pipeline": "mistral_embeddings_pipeline" } } ---- @@ -154,3 +199,26 @@ number makes the update of the reindexing process quicker which enables you to follow the progress closely and detect errors early. // end::mistral[] + +// tag::amazon-bedrock[] + +[source,console] +---- +POST _reindex?wait_for_completion=false +{ + "source": { + "index": "test-data", + "size": 50 <1> + }, + "dest": { + "index": "amazon-bedrock-embeddings", + "pipeline": "amazon_bedrock_embeddings_pipeline" + } +} +---- +// TEST[skip:TBD] +<1> The default batch size for reindexing is 1000. Reducing `size` to a smaller +number makes the update of the reindexing process quicker which enables you to +follow the progress closely and detect errors early. + +// end::amazon-bedrock[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc index 9981eb90d4929..15f4db6021ffc 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements-widget.asciidoc @@ -7,6 +7,12 @@ id="infer-api-requirements-cohere"> Cohere + + +
+
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc index 435e53bbc0bc0..a60ebaa633504 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-requirements.asciidoc @@ -5,6 +5,13 @@ the Cohere service. // end::cohere[] +// tag::elser[] + +ELSER is a model trained by Elastic. If you have an {es} deployment, there is no +further requirement for using the {infer} API with the `elser` service. + +// end::elser[] + // tag::hugging-face[] A https://huggingface.co/[HuggingFace account] is required to use the {infer} @@ -34,8 +41,23 @@ You can apply for access to Azure OpenAI by completing the form at https://aka.m // end::azure-ai-studio[] +// tag::google-vertex-ai[] +* A https://console.cloud.google.com/[Google Cloud account] +* A project in Google Cloud +* The Vertex AI API enabled in your project +* A valid service account for the Google Vertex AI API +* The service account must have the Vertex AI User role and the `aiplatform.endpoints.predict` permission. + +// end::google-vertex-ai[] + // tag::mistral[] * A Mistral Account on https://console.mistral.ai/[La Plateforme] * An API key generated for your account // end::mistral[] + +// tag::amazon-bedrock[] +* An AWS Account with https://aws.amazon.com/bedrock/[Amazon Bedrock] access +* A pair of access and secret keys used to access Amazon Bedrock + +// end::amazon-bedrock[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc index 6a67b28f91601..e6f9455dc2a2d 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-search-widget.asciidoc @@ -7,6 +7,12 @@ id="infer-api-search-cohere"> Cohere + + +
+
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc index 523c2301e75ff..5364ca2052fef 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-search.asciidoc @@ -72,6 +72,67 @@ query from the `cohere-embeddings` index sorted by their proximity to the query: // end::cohere[] +// tag::elser[] + +[source,console] +-------------------------------------------------- +GET elser-embeddings/_search +{ + "query":{ + "sparse_vector":{ + "field": "content_embedding", + "inference_id": "elser_embeddings", + "query": "How to avoid muscle soreness after running?" + } + }, + "_source": [ + "id", + "content" + ] +} +-------------------------------------------------- +// TEST[skip:TBD] + +As a result, you receive the top 10 documents that are closest in meaning to the +query from the `cohere-embeddings` index sorted by their proximity to the query: + +[source,consol-result] +-------------------------------------------------- +"hits": [ + { + "_index": "elser-embeddings", + "_id": "ZLGc_pABZbBmsu5_eCoH", + "_score": 21.472063, + "_source": { + "id": 2258240, + "content": "You may notice some muscle aches while you are exercising. This is called acute soreness. More often, you may begin to feel sore about 12 hours after exercising, and the discomfort usually peaks at 48 to 72 hours after exercise. This is called delayed-onset muscle soreness.It is thought that, during this time, your body is repairing the muscle, making it stronger and bigger.You may also notice the muscles feel better if you exercise lightly. This is normal.his is called delayed-onset muscle soreness. It is thought that, during this time, your body is repairing the muscle, making it stronger and bigger. You may also notice the muscles feel better if you exercise lightly. This is normal." + } + }, + { + "_index": "elser-embeddings", + "_id": "ZbGc_pABZbBmsu5_eCoH", + "_score": 21.421381, + "_source": { + "id": 2258242, + "content": "Photo Credit Jupiterimages/Stockbyte/Getty Images. That stiff, achy feeling you get in the days after exercise is a normal physiological response known as delayed onset muscle soreness. You can take it as a positive sign that your muscles have felt the workout, but the pain may also turn you off to further exercise.ou are more likely to develop delayed onset muscle soreness if you are new to working out, if you’ve gone a long time without exercising and start up again, if you have picked up a new type of physical activity or if you have recently boosted the intensity, length or frequency of your exercise sessions." + } + }, + { + "_index": "elser-embeddings", + "_id": "ZrGc_pABZbBmsu5_eCoH", + "_score": 20.542095, + "_source": { + "id": 2258248, + "content": "They found that stretching before and after exercise has no effect on muscle soreness. Exercise might cause inflammation, which leads to an increase in the production of immune cells (comprised mostly of macrophages and neutrophils). Levels of these immune cells reach a peak 24-48 hours after exercise.These cells, in turn, produce bradykinins and prostaglandins, which make the pain receptors in your body more sensitive. Whenever you move, these pain receptors are stimulated.hey found that stretching before and after exercise has no effect on muscle soreness. Exercise might cause inflammation, which leads to an increase in the production of immune cells (comprised mostly of macrophages and neutrophils). Levels of these immune cells reach a peak 24-48 hours after exercise." + } + }, + (...) + ] +-------------------------------------------------- +// NOTCONSOLE + +// end::elser[] + // tag::hugging-face[] [source,console] @@ -341,6 +402,71 @@ query from the `azure-ai-studio-embeddings` index sorted by their proximity to t // end::azure-ai-studio[] +// tag::google-vertex-ai[] + +[source,console] +-------------------------------------------------- +GET google-vertex-ai-embeddings/_search +{ + "knn": { + "field": "content_embedding", + "query_vector_builder": { + "text_embedding": { + "model_id": "google_vertex_ai_embeddings", + "model_text": "Calculate fuel cost" + } + }, + "k": 10, + "num_candidates": 100 + }, + "_source": [ + "id", + "content" + ] +} +-------------------------------------------------- +// TEST[skip:TBD] + +As a result, you receive the top 10 documents that are closest in meaning to the +query from the `mistral-embeddings` index sorted by their proximity to the query: + +[source,console-result] +-------------------------------------------------- +"hits": [ + { + "_index": "google-vertex-ai-embeddings", + "_id": "Ryv0nZEBBFPLbFsdCbGn", + "_score": 0.86815524, + "_source": { + "id": 3041038, + "content": "For example, the cost of the fuel could be 96.9, the amount could be 10 pounds, and the distance covered could be 80 miles. To convert between Litres per 100KM and Miles Per Gallon, please provide a value and click on the required button.o calculate how much fuel you'll need for a given journey, please provide the distance in miles you will be covering on your journey, and the estimated MPG of your vehicle. To work out what MPG you are really getting, please provide the cost of the fuel, how much you spent on the fuel, and how far it took you." + } + }, + { + "_index": "google-vertex-ai-embeddings", + "_id": "w4j0nZEBZ1nFq1oiHQvK", + "_score": 0.8676357, + "_source": { + "id": 1541469, + "content": "This driving cost calculator takes into consideration the fuel economy of the vehicle that you are travelling in as well as the fuel cost. This road trip gas calculator will give you an idea of how much would it cost to drive before you actually travel.his driving cost calculator takes into consideration the fuel economy of the vehicle that you are travelling in as well as the fuel cost. This road trip gas calculator will give you an idea of how much would it cost to drive before you actually travel." + } + }, + { + "_index": "google-vertex-ai-embeddings", + "_id": "Hoj0nZEBZ1nFq1oiHQjJ", + "_score": 0.80510974, + "_source": { + "id": 7982559, + "content": "What's that light cost you? 1 Select your electric rate (or click to enter your own). 2 You can calculate results for up to four types of lights. 3 Select the type of lamp (i.e. 4 Select the lamp wattage (lamp lumens). 5 Enter the number of lights in use. 6 Select how long the lamps are in use (or click to enter your own; enter hours on per year). 7 Finally, ..." + } + }, + (...) + ] +-------------------------------------------------- +// NOTCONSOLE + +// end::google-vertex-ai[] + // tag::mistral[] [source,console] @@ -405,3 +531,68 @@ query from the `mistral-embeddings` index sorted by their proximity to the query // NOTCONSOLE // end::mistral[] + +// tag::amazon-bedrock[] + +[source,console] +-------------------------------------------------- +GET amazon-bedrock-embeddings/_search +{ + "knn": { + "field": "content_embedding", + "query_vector_builder": { + "text_embedding": { + "model_id": "amazon_bedrock_embeddings", + "model_text": "Calculate fuel cost" + } + }, + "k": 10, + "num_candidates": 100 + }, + "_source": [ + "id", + "content" + ] +} +-------------------------------------------------- +// TEST[skip:TBD] + +As a result, you receive the top 10 documents that are closest in meaning to the +query from the `amazon-bedrock-embeddings` index sorted by their proximity to the query: + +[source,consol-result] +-------------------------------------------------- +"hits": [ + { + "_index": "amazon-bedrock-embeddings", + "_id": "DDd5OowBHxQKHyc3TDSC", + "_score": 0.83704096, + "_source": { + "id": 862114, + "body": "How to calculate fuel cost for a road trip. By Tara Baukus Mello • Bankrate.com. Dear Driving for Dollars, My family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost.It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes.y family is considering taking a long road trip to finish off the end of the summer, but I'm a little worried about gas prices and our overall fuel cost. It doesn't seem easy to calculate since we'll be traveling through many states and we are considering several routes." + } + }, + { + "_index": "amazon-bedrock-embeddings", + "_id": "ajd5OowBHxQKHyc3TDSC", + "_score": 0.8345704, + "_source": { + "id": 820622, + "body": "Home Heating Calculator. Typically, approximately 50% of the energy consumed in a home annually is for space heating. When deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important.This calculator can help you estimate the cost of fuel for different heating appliances.hen deciding on a heating system, many factors will come into play: cost of fuel, installation cost, convenience and life style are all important. This calculator can help you estimate the cost of fuel for different heating appliances." + } + }, + { + "_index": "amazon-bedrock-embeddings", + "_id": "Djd5OowBHxQKHyc3TDSC", + "_score": 0.8327426, + "_source": { + "id": 8202683, + "body": "Fuel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel.If you are paying $4 per gallon, the trip would cost you $200.Most boats have much larger gas tanks than cars.uel is another important cost. This cost will depend on your boat, how far you travel, and how fast you travel. A 33-foot sailboat traveling at 7 knots should be able to travel 300 miles on 50 gallons of diesel fuel." + } + }, + (...) + ] +-------------------------------------------------- +// NOTCONSOLE + +// end::amazon-bedrock[] diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc index 1f3ad645d7c29..63ee0739058ff 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-task-widget.asciidoc @@ -7,7 +7,13 @@ id="infer-api-task-cohere"> Cohere - + + +
+
+
+
diff --git a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc index 18fa3ba541bff..757662efd875d 100644 --- a/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc +++ b/docs/reference/tab-widgets/inference-api/infer-api-task.asciidoc @@ -28,9 +28,37 @@ NOTE: When using this model the recommended similarity measure to use in the embeddings are normalized to unit length in which case the `dot_product` and the `cosine` measures are equivalent. +// end::cohere[] +// tag::elser[] -// end::cohere[] +[source,console] +------------------------------------------------------------ +PUT _inference/sparse_embedding/elser_embeddings <1> +{ + "service": "elser", + "service_settings": { + "num_allocations": 1, + "num_threads": 1 + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The task type is `sparse_embedding` in the path and the `inference_id` which +is the unique identifier of the {infer} endpoint is `elser_embeddings`. + +You don't need to download and deploy the ELSER model upfront, the API request +above will download the model if it's not downloaded yet and then deploy it. + +[NOTE] +==== +You might see a 502 bad gateway error in the response when using the {kib} Console. +This error usually just reflects a timeout, while the model downloads in the background. +You can check the download progress in the {ml-app} UI. +If using the Python client, you can set the `timeout` parameter to a higher value. +==== + +// end::elser[] // tag::hugging-face[] @@ -158,6 +186,30 @@ Also, when using this model the recommended similarity measure to use in the // end::azure-ai-studio[] +// tag::google-vertex-ai[] + +[source,console] +------------------------------------------------------------ +PUT _inference/text_embedding/google_vertex_ai_embeddings <1> +{ + "service": "googlevertexai", + "service_settings": { + "service_account_json": "", <2> + "model_id": "text-embedding-004", <3> + "location": "", <4> + "project_id": "" <5> + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The task type is `text_embedding` per the path. `google_vertex_ai_embeddings` is the unique identifier of the {infer} endpoint (its `inference_id`). +<2> A valid service account in JSON format for the Google Vertex AI API. +<3> For the list of the available models, refer to the https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/text-embeddings-api[Text embeddings API] page. +<4> The name of the location to use for the {infer} task. Refer to https://cloud.google.com/vertex-ai/generative-ai/docs/learn/locations[Generative AI on Vertex AI locations] for available locations. +<5> The name of the project to use for the {infer} task. + +// end::google-vertex-ai[] + // tag::mistral[] [source,console] @@ -177,3 +229,29 @@ PUT _inference/text_embedding/mistral_embeddings <1> <3> The Mistral embeddings model name, for example `mistral-embed`. // end::mistral[] + +// tag::amazon-bedrock[] + +[source,console] +------------------------------------------------------------ +PUT _inference/text_embedding/amazon_bedrock_embeddings <1> +{ + "service": "amazonbedrock", + "service_settings": { + "access_key": "", <2> + "secret_key": "", <3> + "region": "", <4> + "provider": "", <5> + "model": "" <6> + } +} +------------------------------------------------------------ +// TEST[skip:TBD] +<1> The task type is `text_embedding` in the path and the `inference_id` which is the unique identifier of the {infer} endpoint is `amazon_bedrock_embeddings`. +<2> The access key can be found on your AWS IAM management page for the user account to access Amazon Bedrock. +<3> The secret key should be the paired key for the specified access key. +<4> Specify the region that your model is hosted in. +<5> Specify the model provider. +<6> The model ID or ARN of the model to use. + +// end::amazon-bedrock[] diff --git a/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc b/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc index b2e864aab6db9..942b0f6ba21a6 100644 --- a/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc +++ b/docs/reference/tab-widgets/troubleshooting/snapshot/corrupt-repository.asciidoc @@ -71,7 +71,7 @@ GET _snapshot/my-repo ---- // TEST[skip:we're not setting up repos in these tests] + -The reponse will look like this: +The response will look like this: + [source,console-result] ---- diff --git a/docs/reference/transform/painless-examples.asciidoc b/docs/reference/transform/painless-examples.asciidoc index 8eb50964f4d5b..4b0802c79a340 100644 --- a/docs/reference/transform/painless-examples.asciidoc +++ b/docs/reference/transform/painless-examples.asciidoc @@ -5,6 +5,9 @@ Painless examples ++++ + +IMPORTANT: The examples that use the `scripted_metric` aggregation are not supported on {es} Serverless. + These examples demonstrate how to use Painless in {transforms}. You can learn more about the Painless scripting language in the {painless}/painless-guide.html[Painless guide]. @@ -37,6 +40,8 @@ with the latest timestamp. From a technical perspective, it helps to achieve the function of a <> by using scripted metric aggregation in a {transform}, which provides a metric output. +IMPORTANT: This example uses a `scripted_metric` aggregation which is not supported on {es} Serverless. + [source,js] -------------------------------------------------- "aggregations": { @@ -453,6 +458,8 @@ example for details. The example below assumes that the HTTP response codes are stored as keywords in the `response` field of the documents. +IMPORTANT: This example uses a `scripted_metric` aggregation which is not supported on {es} Serverless. + [source,js] -------------------------------------------------- "aggregations": { <1> @@ -507,7 +514,9 @@ Finally, returns the `counts` array with the response counts. == Comparing indices by using scripted metric aggregations This example shows how to compare the content of two indices by a {transform} -that uses a scripted metric aggregation. +that uses a scripted metric aggregation. + +IMPORTANT: This example uses a `scripted_metric` aggregation which is not supported on {es} Serverless. [source,console] -------------------------------------------------- @@ -623,6 +632,8 @@ By using the `sessionid` as a group-by field, you are able to enumerate events through the session and get more details of the session by using scripted metric aggregation. +IMPORTANT: This example uses a `scripted_metric` aggregation which is not supported on {es} Serverless. + [source,js] -------------------------------------------------- POST _transform/_preview diff --git a/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc b/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc index 2b3fcc1b6df9f..1a18f5f9a433e 100644 --- a/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc +++ b/docs/reference/troubleshooting/common-issues/disk-usage-exceeded.asciidoc @@ -36,13 +36,11 @@ GET _cluster/allocation/explain { "index": "my-index", "shard": 0, - "primary": false, - "current_node": "my-node" + "primary": false } ---- // TEST[s/^/PUT my-index\n/] // TEST[s/"primary": false,/"primary": false/] -// TEST[s/"current_node": "my-node"//] To immediately restore write operations, you can temporarily increase the disk watermarks and remove the write block. diff --git a/docs/reference/troubleshooting/common-issues/high-cpu-usage.asciidoc b/docs/reference/troubleshooting/common-issues/high-cpu-usage.asciidoc index 858683ef97a6d..96a9a8f1e32b7 100644 --- a/docs/reference/troubleshooting/common-issues/high-cpu-usage.asciidoc +++ b/docs/reference/troubleshooting/common-issues/high-cpu-usage.asciidoc @@ -9,12 +9,29 @@ If a thread pool is depleted, {es} will <> related to the thread pool. For example, if the `search` thread pool is depleted, {es} will reject search requests until more threads are available. +You might experience high CPU usage if a <>, and therefore the nodes assigned to that tier, is experiencing more traffic than other tiers. This imbalance in resource utilization is also known as <>. + [discrete] [[diagnose-high-cpu-usage]] ==== Diagnose high CPU usage **Check CPU usage** +You can check the CPU usage per node using the <>: + +// tag::cpu-usage-cat-nodes[] +[source,console] +---- +GET _cat/nodes?v=true&s=cpu:desc +---- + +The response's `cpu` column contains the current CPU usage as a percentage. +The `name` column contains the node's name. Elevated but transient CPU usage is +normal. However, if CPU usage is elevated for an extended duration, it should be +investigated. + +To track CPU usage over time, we recommend enabling monitoring: + include::{es-ref-dir}/tab-widgets/cpu-usage-widget.asciidoc[] **Check hot threads** @@ -24,11 +41,13 @@ threads API>> to check for resource-intensive threads running on the node. [source,console] ---- -GET _nodes/my-node,my-other-node/hot_threads +GET _nodes/hot_threads ---- // TEST[s/\/my-node,my-other-node//] -This API returns a breakdown of any hot threads in plain text. +This API returns a breakdown of any hot threads in plain text. High CPU usage +frequently correlates to <>. [discrete] [[reduce-cpu-usage]] diff --git a/docs/reference/troubleshooting/common-issues/high-jvm-memory-pressure.asciidoc b/docs/reference/troubleshooting/common-issues/high-jvm-memory-pressure.asciidoc index 267d6594b8025..3469a0ca5bf42 100644 --- a/docs/reference/troubleshooting/common-issues/high-jvm-memory-pressure.asciidoc +++ b/docs/reference/troubleshooting/common-issues/high-jvm-memory-pressure.asciidoc @@ -66,6 +66,8 @@ searches, consider the following setting changes: <> cluster setting. +* Set a default search timeout using the <> cluster setting. + [source,console] ---- PUT _settings diff --git a/docs/reference/troubleshooting/common-issues/rejected-requests.asciidoc b/docs/reference/troubleshooting/common-issues/rejected-requests.asciidoc index 497bddc562c69..c863709775fcd 100644 --- a/docs/reference/troubleshooting/common-issues/rejected-requests.asciidoc +++ b/docs/reference/troubleshooting/common-issues/rejected-requests.asciidoc @@ -23,9 +23,52 @@ To check the number of rejected tasks for each thread pool, use the [source,console] ---- -GET /_cat/thread_pool?v=true&h=id,name,active,rejected,completed +GET /_cat/thread_pool?v=true&h=id,name,queue,active,rejected,completed ---- +`write` thread pool rejections frequently appear in the erring API and +correlating log as `EsRejectedExecutionException` with either +`QueueResizingEsThreadPoolExecutor` or `queue capacity`. + +These errors are often related to <>. + +[discrete] +[[check-circuit-breakers]] +==== Check circuit breakers + +To check the number of tripped <>, use the +<>. + +[source,console] +---- +GET /_nodes/stats/breaker +---- + +These statistics are cumulative from node startup. For more information, see +<>. + +[discrete] +[[check-indexing-pressure]] +==== Check indexing pressure + +To check the number of <> +rejections, use the <>. + +[source,console] +---- +GET _nodes/stats?human&filter_path=nodes.*.indexing_pressure +---- + +These stats are cumulative from node startup. + +Indexing pressure rejections appear as an +`EsRejectedExecutionException`, and indicate that they were rejected due +to `coordinating_and_primary_bytes`, `coordinating`, `primary`, or `replica`. + +These errors are often related to <>, +<> sizing, or the ingest target's +<>. + [discrete] [[prevent-rejected-requests]] ==== Prevent rejected requests @@ -34,9 +77,4 @@ GET /_cat/thread_pool?v=true&h=id,name,active,rejected,completed If {es} regularly rejects requests and other tasks, your cluster likely has high CPU usage or high JVM memory pressure. For tips, see <> and -<>. - -**Prevent circuit breaker errors** - -If you regularly trigger circuit breaker errors, see <> -for tips on diagnosing and preventing them. \ No newline at end of file +<>. \ No newline at end of file diff --git a/docs/reference/troubleshooting/common-issues/task-queue-backlog.asciidoc b/docs/reference/troubleshooting/common-issues/task-queue-backlog.asciidoc index 1ff5bf2e5c311..5aa6a0129c2d4 100644 --- a/docs/reference/troubleshooting/common-issues/task-queue-backlog.asciidoc +++ b/docs/reference/troubleshooting/common-issues/task-queue-backlog.asciidoc @@ -1,10 +1,10 @@ [[task-queue-backlog]] === Task queue backlog -A backlogged task queue can prevent tasks from completing and -put the cluster into an unhealthy state. -Resource constraints, a large number of tasks being triggered at once, -and long running tasks can all contribute to a backlogged task queue. +A backlogged task queue can prevent tasks from completing and put the cluster +into an unhealthy state. Resource constraints, a large number of tasks being +triggered at once, and long running tasks can all contribute to a backlogged +task queue. [discrete] [[diagnose-task-queue-backlog]] @@ -12,39 +12,77 @@ and long running tasks can all contribute to a backlogged task queue. **Check the thread pool status** -A <> can result in <>. +A <> can result in +<>. -You can use the <> to -see the number of active threads in each thread pool and -how many tasks are queued, how many have been rejected, and how many have completed. +Thread pool depletion might be restricted to a specific <>. If <> is occuring, one node might experience depletion faster than other nodes, leading to performance issues and a growing task backlog. + +You can use the <> to see the number of +active threads in each thread pool and how many tasks are queued, how many +have been rejected, and how many have completed. [source,console] ---- GET /_cat/thread_pool?v&s=t,n&h=type,name,node_name,active,queue,rejected,completed ---- +The `active` and `queue` statistics are instantaneous while the `rejected` and +`completed` statistics are cumulative from node startup. + **Inspect the hot threads on each node** -If a particular thread pool queue is backed up, -you can periodically poll the <> API -to determine if the thread has sufficient -resources to progress and gauge how quickly it is progressing. +If a particular thread pool queue is backed up, you can periodically poll the +<> API to determine if the thread +has sufficient resources to progress and gauge how quickly it is progressing. [source,console] ---- GET /_nodes/hot_threads ---- -**Look for long running tasks** +**Look for long running node tasks** + +Long-running tasks can also cause a backlog. You can use the <> API to get information about the node tasks that are running. +Check the `running_time_in_nanos` to identify tasks that are taking an +excessive amount of time to complete. + +[source,console] +---- +GET /_tasks?pretty=true&human=true&detailed=true +---- -Long-running tasks can also cause a backlog. -You can use the <> API to get information about the tasks that are running. -Check the `running_time_in_nanos` to identify tasks that are taking an excessive amount of time to complete. +If a particular `action` is suspected, you can filter the tasks further. The most common long-running tasks are <>- or search-related. +* Filter for <> actions: ++ [source,console] ---- -GET /_tasks?filter_path=nodes.*.tasks +GET /_tasks?human&detailed&actions=indices:data/write/bulk +---- + +* Filter for search actions: ++ +[source,console] ---- +GET /_tasks?human&detailed&actions=indices:data/write/search +---- + +The API response may contain additional tasks columns, including `description` and `header`, which provides the task parameters, target, and requestor. You can use this information to perform further diagnosis. + +**Look for long running cluster tasks** + +A task backlog might also appear as a delay in synchronizing the cluster state. You +can use the <> to get information +about the pending cluster state sync tasks that are running. + +[source,console] +---- +GET /_cluster/pending_tasks +---- + +Check the `timeInQueue` to identify tasks that are taking an excessive amount +of time to complete. [discrete] [[resolve-task-queue-backlog]] diff --git a/docs/reference/troubleshooting/network-timeouts.asciidoc b/docs/reference/troubleshooting/network-timeouts.asciidoc index ef942ac1d268d..ef666c09f87db 100644 --- a/docs/reference/troubleshooting/network-timeouts.asciidoc +++ b/docs/reference/troubleshooting/network-timeouts.asciidoc @@ -16,20 +16,22 @@ end::troubleshooting-network-timeouts-gc-vm[] tag::troubleshooting-network-timeouts-packet-capture-elections[] * Packet captures will reveal system-level and network-level faults, especially -if you capture the network traffic simultaneously at all relevant nodes. You -should be able to observe any retransmissions, packet loss, or other delays on -the connections between the nodes. +if you capture the network traffic simultaneously at all relevant nodes and +analyse it alongside the {es} logs from those nodes. You should be able to +observe any retransmissions, packet loss, or other delays on the connections +between the nodes. end::troubleshooting-network-timeouts-packet-capture-elections[] tag::troubleshooting-network-timeouts-packet-capture-fault-detection[] * Packet captures will reveal system-level and network-level faults, especially if you capture the network traffic simultaneously at the elected master and the -faulty node. The connection used for follower checks is not used for any other -traffic so it can be easily identified from the flow pattern alone, even if TLS -is in use: almost exactly every second there will be a few hundred bytes sent -each way, first the request by the master and then the response by the -follower. You should be able to observe any retransmissions, packet loss, or -other delays on such a connection. +faulty node and analyse it alongside the {es} logs from those nodes. The +connection used for follower checks is not used for any other traffic so it can +be easily identified from the flow pattern alone, even if TLS is in use: almost +exactly every second there will be a few hundred bytes sent each way, first the +request by the master and then the response by the follower. You should be able +to observe any retransmissions, packet loss, or other delays on such a +connection. end::troubleshooting-network-timeouts-packet-capture-fault-detection[] tag::troubleshooting-network-timeouts-threads[] diff --git a/docs/reference/upgrade/disable-shard-alloc.asciidoc b/docs/reference/upgrade/disable-shard-alloc.asciidoc index a93b6dfc6c60b..f69a673095257 100644 --- a/docs/reference/upgrade/disable-shard-alloc.asciidoc +++ b/docs/reference/upgrade/disable-shard-alloc.asciidoc @@ -17,3 +17,7 @@ PUT _cluster/settings } -------------------------------------------------- // TEST[skip:indexes don't assign] + +You can also consider <> when restarting +large clusters to reduce initial strain while nodes are processing +<>. \ No newline at end of file diff --git a/gradle/verification-metadata.xml b/gradle/verification-metadata.xml index cd408ba75aa10..96d60306e4acd 100644 --- a/gradle/verification-metadata.xml +++ b/gradle/verification-metadata.xml @@ -84,6 +84,11 @@ + + + + + @@ -731,14 +736,14 @@ - - - + + + - - - + + + diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar index e6441136f3d4b..a4b76b9530d66 100644 Binary files a/gradle/wrapper/gradle-wrapper.jar and b/gradle/wrapper/gradle-wrapper.jar differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties index 515ab9d5f1822..e955ee28dd349 100644 --- a/gradle/wrapper/gradle-wrapper.properties +++ b/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=f8b4f4772d302c8ff580bc40d0f56e715de69b163546944f787c87abf209c961 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-all.zip +distributionSha256Sum=fdfca5dbc2834f0ece5020465737538e5ba679deeff5ab6c09621d67f8bb1a15 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.1-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/gradlew b/gradlew index b740cf13397ab..f5feea6d6b116 100755 --- a/gradlew +++ b/gradlew @@ -15,6 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # +# SPDX-License-Identifier: Apache-2.0 +# ############################################################################## # @@ -84,7 +86,8 @@ done # shellcheck disable=SC2034 APP_BASE_NAME=${0##*/} # Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) -APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s +' "$PWD" ) || exit # Use the maximum available, or set MAX_FD != -1 to use that value. MAX_FD=maximum diff --git a/gradlew.bat b/gradlew.bat index 7101f8e4676fc..9b42019c7915b 100644 --- a/gradlew.bat +++ b/gradlew.bat @@ -13,6 +13,8 @@ @rem See the License for the specific language governing permissions and @rem limitations under the License. @rem +@rem SPDX-License-Identifier: Apache-2.0 +@rem @if "%DEBUG%"=="" @echo off @rem ########################################################################## diff --git a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java index f3f53f1b3c5ea..3c01e490369de 100644 --- a/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java +++ b/libs/dissect/src/main/java/org/elasticsearch/dissect/DissectParser.java @@ -203,7 +203,7 @@ public Map parse(String inputString) { DissectKey key = dissectPair.key(); byte[] delimiter = dissectPair.delimiter().getBytes(StandardCharsets.UTF_8); // start dissection after the first delimiter - int i = leadingDelimiter.length(); + int i = leadingDelimiter.getBytes(StandardCharsets.UTF_8).length; int valueStart = i; int lookAheadMatches; // start walking the input string byte by byte, look ahead for matches where needed diff --git a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java index 431b26fc1155d..2893e419a84a3 100644 --- a/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java +++ b/libs/dissect/src/test/java/org/elasticsearch/dissect/DissectParserTests.java @@ -211,6 +211,18 @@ public void testMatchUnicode() { assertMatch("%{a->}࿏%{b}", "⟳༒࿏࿏࿏࿏࿏༒⟲", Arrays.asList("a", "b"), Arrays.asList("⟳༒", "༒⟲")); assertMatch("%{*a}࿏%{&a}", "⟳༒࿏༒⟲", Arrays.asList("⟳༒"), Arrays.asList("༒⟲")); assertMatch("%{}࿏%{a}", "⟳༒࿏༒⟲", Arrays.asList("a"), Arrays.asList("༒⟲")); + assertMatch( + "Zürich, the %{adjective} city in Switzerland", + "Zürich, the largest city in Switzerland", + Arrays.asList("adjective"), + Arrays.asList("largest") + ); + assertMatch( + "Zürich, the %{one} city in Switzerland; Zürich, the %{two} city in Switzerland", + "Zürich, the largest city in Switzerland; Zürich, the LARGEST city in Switzerland", + Arrays.asList("one", "two"), + Arrays.asList("largest", "LARGEST") + ); } public void testMatchRemainder() { diff --git a/libs/geo/src/main/java/org/elasticsearch/geometry/utils/WellKnownText.java b/libs/geo/src/main/java/org/elasticsearch/geometry/utils/WellKnownText.java index d233dcc81a3fc..1e7ac3f8097e9 100644 --- a/libs/geo/src/main/java/org/elasticsearch/geometry/utils/WellKnownText.java +++ b/libs/geo/src/main/java/org/elasticsearch/geometry/utils/WellKnownText.java @@ -43,6 +43,7 @@ public class WellKnownText { public static final String RPAREN = ")"; public static final String COMMA = ","; public static final String NAN = "NaN"; + public static final int MAX_NESTED_DEPTH = 1000; private static final String NUMBER = ""; private static final String EOF = "END-OF-STREAM"; @@ -425,7 +426,7 @@ public static Geometry fromWKT(GeometryValidator validator, boolean coerce, Stri tokenizer.whitespaceChars('\r', '\r'); tokenizer.whitespaceChars('\n', '\n'); tokenizer.commentChar('#'); - Geometry geometry = parseGeometry(tokenizer, coerce); + Geometry geometry = parseGeometry(tokenizer, coerce, 0); validator.validate(geometry); return geometry; } finally { @@ -436,40 +437,35 @@ public static Geometry fromWKT(GeometryValidator validator, boolean coerce, Stri /** * parse geometry from the stream tokenizer */ - private static Geometry parseGeometry(StreamTokenizer stream, boolean coerce) throws IOException, ParseException { + private static Geometry parseGeometry(StreamTokenizer stream, boolean coerce, int depth) throws IOException, ParseException { final String type = nextWord(stream).toLowerCase(Locale.ROOT); - switch (type) { - case "point": - return parsePoint(stream); - case "multipoint": - return parseMultiPoint(stream); - case "linestring": - return parseLine(stream); - case "multilinestring": - return parseMultiLine(stream); - case "polygon": - return parsePolygon(stream, coerce); - case "multipolygon": - return parseMultiPolygon(stream, coerce); - case "bbox": - return parseBBox(stream); - case "geometrycollection": - return parseGeometryCollection(stream, coerce); - case "circle": // Not part of the standard, but we need it for internal serialization - return parseCircle(stream); - } - throw new IllegalArgumentException("Unknown geometry type: " + type); - } - - private static GeometryCollection parseGeometryCollection(StreamTokenizer stream, boolean coerce) throws IOException, - ParseException { + return switch (type) { + case "point" -> parsePoint(stream); + case "multipoint" -> parseMultiPoint(stream); + case "linestring" -> parseLine(stream); + case "multilinestring" -> parseMultiLine(stream); + case "polygon" -> parsePolygon(stream, coerce); + case "multipolygon" -> parseMultiPolygon(stream, coerce); + case "bbox" -> parseBBox(stream); + case "geometrycollection" -> parseGeometryCollection(stream, coerce, depth + 1); + case "circle" -> // Not part of the standard, but we need it for internal serialization + parseCircle(stream); + default -> throw new IllegalArgumentException("Unknown geometry type: " + type); + }; + } + + private static GeometryCollection parseGeometryCollection(StreamTokenizer stream, boolean coerce, int depth) + throws IOException, ParseException { if (nextEmptyOrOpen(stream).equals(EMPTY)) { return GeometryCollection.EMPTY; } + if (depth > MAX_NESTED_DEPTH) { + throw new ParseException("maximum nested depth of " + MAX_NESTED_DEPTH + " exceeded", stream.lineno()); + } List shapes = new ArrayList<>(); - shapes.add(parseGeometry(stream, coerce)); + shapes.add(parseGeometry(stream, coerce, depth)); while (nextCloserOrComma(stream).equals(COMMA)) { - shapes.add(parseGeometry(stream, coerce)); + shapes.add(parseGeometry(stream, coerce, depth)); } return new GeometryCollection<>(shapes); } diff --git a/libs/geo/src/test/java/org/elasticsearch/geometry/GeometryCollectionTests.java b/libs/geo/src/test/java/org/elasticsearch/geometry/GeometryCollectionTests.java index 6a7bda7f9e0bb..b3f7aa610153b 100644 --- a/libs/geo/src/test/java/org/elasticsearch/geometry/GeometryCollectionTests.java +++ b/libs/geo/src/test/java/org/elasticsearch/geometry/GeometryCollectionTests.java @@ -19,6 +19,8 @@ import java.util.Arrays; import java.util.Collections; +import static org.hamcrest.Matchers.containsString; + public class GeometryCollectionTests extends BaseGeometryTestCase> { @Override protected GeometryCollection createTestInstance(boolean hasAlt) { @@ -65,6 +67,31 @@ public void testInitValidation() { StandardValidator.instance(true).validate(new GeometryCollection(Collections.singletonList(new Point(20, 10, 30)))); } + public void testDeeplyNestedCollection() throws IOException, ParseException { + String wkt = makeDeeplyNestedGeometryCollectionWKT(WellKnownText.MAX_NESTED_DEPTH); + Geometry parsed = WellKnownText.fromWKT(GeographyValidator.instance(true), true, wkt); + assertEquals(WellKnownText.MAX_NESTED_DEPTH, countNestedGeometryCollections((GeometryCollection) parsed)); + } + + public void testTooDeeplyNestedCollection() { + String wkt = makeDeeplyNestedGeometryCollectionWKT(WellKnownText.MAX_NESTED_DEPTH + 1); + ParseException ex = expectThrows(ParseException.class, () -> WellKnownText.fromWKT(GeographyValidator.instance(true), true, wkt)); + assertThat(ex.getMessage(), containsString("maximum nested depth of " + WellKnownText.MAX_NESTED_DEPTH)); + } + + private String makeDeeplyNestedGeometryCollectionWKT(int depth) { + return "GEOMETRYCOLLECTION (".repeat(depth) + "POINT (20.0 10.0)" + ")".repeat(depth); + } + + private int countNestedGeometryCollections(GeometryCollection geometry) { + int count = 1; + while (geometry.get(0) instanceof GeometryCollection g) { + count += 1; + geometry = g; + } + return count; + } + @Override protected GeometryCollection mutateInstance(GeometryCollection instance) { return null;// TODO implement https://github.com/elastic/elasticsearch/issues/25929 diff --git a/libs/grok/src/main/java/org/elasticsearch/grok/PatternBank.java b/libs/grok/src/main/java/org/elasticsearch/grok/PatternBank.java index bcf9253866931..3b10d58815169 100644 --- a/libs/grok/src/main/java/org/elasticsearch/grok/PatternBank.java +++ b/libs/grok/src/main/java/org/elasticsearch/grok/PatternBank.java @@ -8,12 +8,17 @@ package org.elasticsearch.grok; +import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; +import java.util.Deque; +import java.util.HashSet; import java.util.LinkedHashMap; +import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Objects; +import java.util.Set; public class PatternBank { @@ -57,52 +62,102 @@ public PatternBank extendWith(Map extraPatterns) { } /** - * Checks whether patterns reference each other in a circular manner and if so fail with an exception. + * Checks whether patterns reference each other in a circular manner and if so fail with an IllegalArgumentException. It will also + * fail if any pattern value contains a pattern name that does not exist in the bank. *

* In a pattern, anything between %{ and } or : is considered * a reference to another named pattern. This method will navigate to all these named patterns and * check for a circular reference. */ static void forbidCircularReferences(Map bank) { - // first ensure that the pattern bank contains no simple circular references (i.e., any pattern - // containing an immediate reference to itself) as those can cause the remainder of this algorithm - // to recurse infinitely - for (Map.Entry entry : bank.entrySet()) { - if (patternReferencesItself(entry.getValue(), entry.getKey())) { - throw new IllegalArgumentException("circular reference in pattern [" + entry.getKey() + "][" + entry.getValue() + "]"); + Set allVisitedNodes = new HashSet<>(); + Set nodesVisitedMoreThanOnceInAPath = new HashSet<>(); + // Walk the full path starting at each node in the graph: + for (String traversalStartNode : bank.keySet()) { + if (nodesVisitedMoreThanOnceInAPath.contains(traversalStartNode) == false && allVisitedNodes.contains(traversalStartNode)) { + // If we have seen this node before in a path, and it only appeared once in that path, there is no need to check it again + continue; } - } - - // next, recursively check any other pattern names referenced in each pattern - for (Map.Entry entry : bank.entrySet()) { - String name = entry.getKey(); - String pattern = entry.getValue(); - innerForbidCircularReferences(bank, name, new ArrayList<>(), pattern); + Set visitedFromThisStartNode = new LinkedHashSet<>(); + /* + * This stack records where we are in the graph. Each String[] in the stack represents a collection of neighbors to the first + * non-null node in the layer below it. Null means that the path from that location has been fully traversed. Once all nodes + * at a layer have been set to null, the layer is popped. So for example say we have the graph + * ( 1 -> (2 -> (4, 5, 8), 3 -> (6, 7))) then when we are at 6 via 1 -> 3 -> 6, the stack looks like this: + * [6, 7] + * [null, 3] + * [1] + */ + Deque stack = new ArrayDeque<>(); + stack.push(new String[] { traversalStartNode }); + // This is used so that we know that we're unwinding the stack and know not to get the current node's neighbors again. + boolean unwinding = false; + while (stack.isEmpty() == false) { + String[] currentLevel = stack.peek(); + int firstNonNullIndex = findFirstNonNull(currentLevel); + String node = currentLevel[firstNonNullIndex]; + boolean endOfThisPath = false; + if (unwinding) { + // We have completed all of this node's neighbors and have popped back to the node + endOfThisPath = true; + } else if (traversalStartNode.equals(node) && stack.size() > 1) { + Deque reversedPath = new ArrayDeque<>(); + for (String[] level : stack) { + reversedPath.push(level[findFirstNonNull(level)]); + } + throw new IllegalArgumentException("circular reference detected: " + String.join("->", reversedPath)); + } else if (visitedFromThisStartNode.contains(node)) { + /* + * We are only looking for a cycle starting and ending at traversalStartNode right now. But this node has been + * visited more than once in the path rooted at traversalStartNode. This could be because it is a cycle, or could be + * because two nodes in the path both point to it. We add it to nodesVisitedMoreThanOnceInAPath so that we make sure + * to check the path rooted at this node later. + */ + nodesVisitedMoreThanOnceInAPath.add(node); + endOfThisPath = true; + } else { + visitedFromThisStartNode.add(node); + String[] neighbors = getPatternNamesForPattern(bank, node); + if (neighbors.length == 0) { + endOfThisPath = true; + } else { + stack.push(neighbors); + } + } + if (endOfThisPath) { + if (firstNonNullIndex == currentLevel.length - 1) { + // We have handled all the neighbors at this level -- there are no more non-null ones + stack.pop(); + unwinding = true; + } else { + currentLevel[firstNonNullIndex] = null; + unwinding = false; + } + } else { + unwinding = false; + } + } + allVisitedNodes.addAll(visitedFromThisStartNode); } } - private static void innerForbidCircularReferences(Map bank, String patternName, List path, String pattern) { - if (patternReferencesItself(pattern, patternName)) { - String message; - if (path.isEmpty()) { - message = "circular reference in pattern [" + patternName + "][" + pattern + "]"; - } else { - message = "circular reference in pattern [" - + path.remove(path.size() - 1) - + "][" - + pattern - + "] back to pattern [" - + patternName - + "]"; - // add rest of the path: - if (path.isEmpty() == false) { - message += " via patterns [" + String.join("=>", path) + "]"; - } + private static int findFirstNonNull(String[] level) { + for (int i = 0; i < level.length; i++) { + if (level[i] != null) { + return i; } - throw new IllegalArgumentException(message); } + return -1; + } - // next check any other pattern names found in the pattern + /** + * This method returns the array of pattern names (if any) found in the bank for the pattern named patternName. If no pattern names + * are found, an empty array is returned. If any of the list of pattern names to be returned does not exist in the bank, an exception + * is thrown. + */ + private static String[] getPatternNamesForPattern(Map bank, String patternName) { + String pattern = bank.get(patternName); + List patternReferences = new ArrayList<>(); for (int i = pattern.indexOf("%{"); i != -1; i = pattern.indexOf("%{", i + 1)) { int begin = i + 2; int bracketIndex = pattern.indexOf('}', begin); @@ -112,25 +167,22 @@ private static void innerForbidCircularReferences(Map bank, Stri end = bracketIndex; } else if (columnIndex != -1 && bracketIndex == -1) { end = columnIndex; - } else if (bracketIndex != -1 && columnIndex != -1) { + } else if (bracketIndex != -1) { end = Math.min(bracketIndex, columnIndex); } else { throw new IllegalArgumentException("pattern [" + pattern + "] has an invalid syntax"); } String otherPatternName = pattern.substring(begin, end); - path.add(otherPatternName); - String otherPattern = bank.get(otherPatternName); - if (otherPattern == null) { - throw new IllegalArgumentException( - "pattern [" + patternName + "] is referencing a non-existent pattern [" + otherPatternName + "]" - ); + if (patternReferences.contains(otherPatternName) == false) { + patternReferences.add(otherPatternName); + String otherPattern = bank.get(otherPatternName); + if (otherPattern == null) { + throw new IllegalArgumentException( + "pattern [" + patternName + "] is referencing a non-existent pattern [" + otherPatternName + "]" + ); + } } - - innerForbidCircularReferences(bank, patternName, path, otherPattern); } - } - - private static boolean patternReferencesItself(String pattern, String patternName) { - return pattern.contains("%{" + patternName + "}") || pattern.contains("%{" + patternName + ":"); + return patternReferences.toArray(new String[0]); } } diff --git a/libs/grok/src/test/java/org/elasticsearch/grok/PatternBankTests.java b/libs/grok/src/test/java/org/elasticsearch/grok/PatternBankTests.java index dcc7ab431611a..08a4965cdb371 100644 --- a/libs/grok/src/test/java/org/elasticsearch/grok/PatternBankTests.java +++ b/libs/grok/src/test/java/org/elasticsearch/grok/PatternBankTests.java @@ -11,8 +11,13 @@ import org.elasticsearch.test.ESTestCase; import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; import java.util.Map; -import java.util.TreeMap; +import java.util.Set; + +import static org.elasticsearch.test.ESTestCase.randomBoolean; +import static org.hamcrest.Matchers.containsString; public class PatternBankTests extends ESTestCase { @@ -32,7 +37,7 @@ public void testBankCannotBeNull() { public void testConstructorValidatesCircularReferences() { var e = expectThrows(IllegalArgumentException.class, () -> new PatternBank(Map.of("NAME", "!!!%{NAME}!!!"))); - assertEquals("circular reference in pattern [NAME][!!!%{NAME}!!!]", e.getMessage()); + assertEquals("circular reference detected: NAME->NAME", e.getMessage()); } public void testExtendWith() { @@ -48,36 +53,36 @@ public void testExtendWith() { public void testCircularReference() { var e = expectThrows(IllegalArgumentException.class, () -> PatternBank.forbidCircularReferences(Map.of("NAME", "!!!%{NAME}!!!"))); - assertEquals("circular reference in pattern [NAME][!!!%{NAME}!!!]", e.getMessage()); + assertEquals("circular reference detected: NAME->NAME", e.getMessage()); e = expectThrows(IllegalArgumentException.class, () -> PatternBank.forbidCircularReferences(Map.of("NAME", "!!!%{NAME:name}!!!"))); - assertEquals("circular reference in pattern [NAME][!!!%{NAME:name}!!!]", e.getMessage()); + assertEquals("circular reference detected: NAME->NAME", e.getMessage()); e = expectThrows( IllegalArgumentException.class, () -> { PatternBank.forbidCircularReferences(Map.of("NAME", "!!!%{NAME:name:int}!!!")); } ); - assertEquals("circular reference in pattern [NAME][!!!%{NAME:name:int}!!!]", e.getMessage()); + assertEquals("circular reference detected: NAME->NAME", e.getMessage()); e = expectThrows(IllegalArgumentException.class, () -> { - Map bank = new TreeMap<>(); + Map bank = new LinkedHashMap<>(); bank.put("NAME1", "!!!%{NAME2}!!!"); bank.put("NAME2", "!!!%{NAME1}!!!"); PatternBank.forbidCircularReferences(bank); }); - assertEquals("circular reference in pattern [NAME2][!!!%{NAME1}!!!] back to pattern [NAME1]", e.getMessage()); + assertEquals("circular reference detected: NAME1->NAME2->NAME1", e.getMessage()); e = expectThrows(IllegalArgumentException.class, () -> { - Map bank = new TreeMap<>(); + Map bank = new LinkedHashMap<>(); bank.put("NAME1", "!!!%{NAME2}!!!"); bank.put("NAME2", "!!!%{NAME3}!!!"); bank.put("NAME3", "!!!%{NAME1}!!!"); PatternBank.forbidCircularReferences(bank); }); - assertEquals("circular reference in pattern [NAME3][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2]", e.getMessage()); + assertEquals("circular reference detected: NAME1->NAME2->NAME3->NAME1", e.getMessage()); e = expectThrows(IllegalArgumentException.class, () -> { - Map bank = new TreeMap<>(); + Map bank = new LinkedHashMap<>(); bank.put("NAME1", "!!!%{NAME2}!!!"); bank.put("NAME2", "!!!%{NAME3}!!!"); bank.put("NAME3", "!!!%{NAME4}!!!"); @@ -85,10 +90,78 @@ public void testCircularReference() { bank.put("NAME5", "!!!%{NAME1}!!!"); PatternBank.forbidCircularReferences(bank); }); - assertEquals( - "circular reference in pattern [NAME5][!!!%{NAME1}!!!] back to pattern [NAME1] via patterns [NAME2=>NAME3=>NAME4]", - e.getMessage() - ); + assertEquals("circular reference detected: NAME1->NAME2->NAME3->NAME4->NAME5->NAME1", e.getMessage()); + + e = expectThrows(IllegalArgumentException.class, () -> { + Map bank = new LinkedHashMap<>(); + bank.put("NAME1", "!!!%{NAME2}!!!"); + bank.put("NAME2", "!!!%{NAME3}!!!"); + bank.put("NAME3", "!!!%{NAME2}!!!"); + PatternBank.forbidCircularReferences(bank); + }); + assertEquals("circular reference detected: NAME2->NAME3->NAME2", e.getMessage()); + + e = expectThrows(IllegalArgumentException.class, () -> { + Map bank = new LinkedHashMap<>(); + bank.put("NAME1", "!!!%{NAME2}!!!"); + bank.put("NAME2", "!!!%{NAME2}!!%{NAME3}!"); + bank.put("NAME3", "!!!%{NAME1}!!!"); + PatternBank.forbidCircularReferences(bank); + }); + assertEquals("circular reference detected: NAME1->NAME2->NAME3->NAME1", e.getMessage()); + + { + Map bank = new HashMap<>(); + bank.put("NAME1", "!!!%{NAME2}!!!%{NAME3}%{NAME4}"); + bank.put("NAME2", "!!!%{NAME3}!!!"); + bank.put("NAME3", "!!!!!!"); + bank.put("NAME4", "!!!%{NAME5}!!!"); + bank.put("NAME5", "!!!!!!"); + PatternBank.forbidCircularReferences(bank); + } + + e = expectThrows(IllegalArgumentException.class, () -> { + Map bank = new LinkedHashMap<>(); + bank.put("NAME1", "!!!%{NAME2}!!!%{NAME3}%{NAME4}"); + bank.put("NAME2", "!!!%{NAME3}!!!"); + bank.put("NAME3", "!!!!!!"); + bank.put("NAME4", "!!!%{NAME5}!!!"); + bank.put("NAME5", "!!!%{NAME1}!!!"); + PatternBank.forbidCircularReferences(bank); + }); + assertEquals("circular reference detected: NAME1->NAME4->NAME5->NAME1", e.getMessage()); + + { + Map bank = new HashMap<>(); + bank.put("NAME1", "!!!%{NAME2}!!!"); + bank.put("NAME2", "!!!%{NAME3}!!!"); + bank.put("NAME3", "!!!!!!"); + bank.put("NAME4", "!!!%{NAME5}!!!"); + bank.put("NAME5", "!!!%{NAME1}!!!"); + PatternBank.forbidCircularReferences(bank); + } + + e = expectThrows(IllegalArgumentException.class, () -> { + Map bank = new LinkedHashMap<>(); + bank.put("NAME1", "!!!%{NAME2} %{NAME3}!!!"); + bank.put("NAME2", "!!!%{NAME4} %{NAME5}!!!"); + bank.put("NAME3", "!!!!!!"); + bank.put("NAME4", "!!!!!!"); + bank.put("NAME5", "!!!%{NAME1}!!!"); + PatternBank.forbidCircularReferences(bank); + }); + assertEquals("circular reference detected: NAME1->NAME2->NAME5->NAME1", e.getMessage()); + + e = expectThrows(IllegalArgumentException.class, () -> { + Map bank = new LinkedHashMap<>(); + bank.put("NAME1", "!!!%{NAME2} %{NAME3}!!!"); + bank.put("NAME2", "!!!%{NAME4} %{NAME5}!!!"); + bank.put("NAME3", "!!!%{NAME1}!!!"); + bank.put("NAME4", "!!!!!!"); + bank.put("NAME5", "!!!!!!"); + PatternBank.forbidCircularReferences(bank); + }); + assertEquals("circular reference detected: NAME1->NAME3->NAME1", e.getMessage()); } public void testCircularSelfReference() { @@ -96,7 +169,7 @@ public void testCircularSelfReference() { IllegalArgumentException.class, () -> PatternBank.forbidCircularReferences(Map.of("ANOTHER", "%{INT}", "INT", "%{INT}")) ); - assertEquals("circular reference in pattern [INT][%{INT}]", e.getMessage()); + assertEquals("circular reference detected: INT->INT", e.getMessage()); } public void testInvalidPatternReferences() { @@ -112,4 +185,80 @@ public void testInvalidPatternReferences() { ); assertEquals("pattern [%{VALID] has an invalid syntax", e.getMessage()); } + + public void testDeepGraphOfPatterns() { + Map patternBankMap = randomBoolean() ? new HashMap<>() : new LinkedHashMap<>(); + final int nodeCount = 20_000; + for (int i = 0; i < nodeCount - 1; i++) { + patternBankMap.put("FOO" + i, "%{FOO" + (i + 1) + "}"); + } + patternBankMap.put("FOO" + (nodeCount - 1), "foo"); + new PatternBank(patternBankMap); + } + + public void testRandomBanksWithoutCycles() { + /* + * This creates a large number of pattens, each of which refers to a large number of patterns. But there are no cycles in any of + * these since each pattern only references patterns with a higher ID. We don't expect any exceptions here. + */ + Map patternBankMap = randomBoolean() ? new HashMap<>() : new LinkedHashMap<>(); + final int nodeCount = 500; + for (int i = 0; i < nodeCount - 1; i++) { + StringBuilder patternBuilder = new StringBuilder(); + for (int j = 0; j < randomIntBetween(0, 20); j++) { + patternBuilder.append("%{FOO-" + randomIntBetween(i + 1, nodeCount - 1) + "}"); + } + patternBankMap.put("FOO-" + i, patternBuilder.toString()); + } + patternBankMap.put("FOO-" + (nodeCount - 1), "foo"); + new PatternBank(patternBankMap); + } + + public void testRandomBanksWithCycles() { + /* + * This creates a large number of pattens, each of which refers to a large number of patterns. We have at least one cycle because + * we pick a node at random, and make sure that a node that it links (or one of its descendants) to links back. If no descendant + * links back to it, we create an artificial cycle at the end. + */ + Map patternBankMap = new LinkedHashMap<>(); + final int nodeCount = 500; + int nodeToHaveCycle = randomIntBetween(0, nodeCount); + int nodeToPotentiallyCreateCycle = -1; + boolean haveCreatedCycle = false; + for (int i = 0; i < nodeCount - 1; i++) { + StringBuilder patternBuilder = new StringBuilder(); + int numberOfLinkedPatterns = randomIntBetween(1, 20); + int nodeToLinkBackIndex = randomIntBetween(0, numberOfLinkedPatterns); + Set childNodes = new HashSet<>(); + for (int j = 0; j < numberOfLinkedPatterns; j++) { + int childNode = randomIntBetween(i + 1, nodeCount - 1); + childNodes.add(childNode); + patternBuilder.append("%{FOO-" + childNode + "}"); + if (i == nodeToHaveCycle) { + if (nodeToLinkBackIndex == j) { + nodeToPotentiallyCreateCycle = childNode; + } + } + } + if (i == nodeToPotentiallyCreateCycle) { + // We either create the cycle here, or randomly pick a child node to maybe create the cycle + if (randomBoolean()) { + patternBuilder.append("%{FOO-" + nodeToHaveCycle + "}"); + haveCreatedCycle = true; + } else { + nodeToPotentiallyCreateCycle = randomFrom(childNodes); + } + } + patternBankMap.put("FOO-" + i, patternBuilder.toString()); + } + if (haveCreatedCycle) { + patternBankMap.put("FOO-" + (nodeCount - 1), "foo"); + } else { + // We didn't randomly create a cycle, so just force one in this last pattern + nodeToHaveCycle = nodeCount - 1; + patternBankMap.put("FOO-" + nodeToHaveCycle, "%{FOO-" + nodeToHaveCycle + "}"); + } + IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new PatternBank(patternBankMap)); + assertThat(e.getMessage(), containsString("FOO-" + nodeToHaveCycle)); + } } diff --git a/libs/native/src/main21/java/org/elasticsearch/nativeaccess/jdk/JdkKernel32Library.java b/libs/native/src/main21/java/org/elasticsearch/nativeaccess/jdk/JdkKernel32Library.java index bbfd26bd061d0..06dc41364b4d4 100644 --- a/libs/native/src/main21/java/org/elasticsearch/nativeaccess/jdk/JdkKernel32Library.java +++ b/libs/native/src/main21/java/org/elasticsearch/nativeaccess/jdk/JdkKernel32Library.java @@ -55,7 +55,7 @@ class JdkKernel32Library implements Kernel32Library { ); private static final MethodHandle SetProcessWorkingSetSize$mh = downcallHandleWithError( "SetProcessWorkingSetSize", - FunctionDescriptor.of(ADDRESS, JAVA_LONG, JAVA_LONG) + FunctionDescriptor.of(JAVA_BOOLEAN, ADDRESS, JAVA_LONG, JAVA_LONG) ); private static final MethodHandle GetShortPathNameW$mh = downcallHandleWithError( "GetShortPathNameW", @@ -94,7 +94,7 @@ static class JdkAddress implements Address { @Override public Address add(long offset) { - return new JdkAddress(MemorySegment.ofAddress(address.address())); + return new JdkAddress(MemorySegment.ofAddress(address.address() + offset)); } } diff --git a/libs/preallocate/build.gradle b/libs/preallocate/build.gradle index a490c7168516e..2bc802daee1d2 100644 --- a/libs/preallocate/build.gradle +++ b/libs/preallocate/build.gradle @@ -11,6 +11,11 @@ dependencies { implementation project(':libs:elasticsearch-core') implementation project(':libs:elasticsearch-logging') implementation "net.java.dev.jna:jna:${versions.jna}" + + testImplementation "junit:junit:${versions.junit}" + testImplementation(project(":test:framework")) { + exclude group: 'org.elasticsearch', module: 'elasticsearch-preallocate' + } } tasks.named('forbiddenApisMain').configure { diff --git a/libs/preallocate/src/main/java/module-info.java b/libs/preallocate/src/main/java/module-info.java index 89c85d95ab2f0..4e980b083701a 100644 --- a/libs/preallocate/src/main/java/module-info.java +++ b/libs/preallocate/src/main/java/module-info.java @@ -11,7 +11,7 @@ requires org.elasticsearch.logging; requires com.sun.jna; - exports org.elasticsearch.preallocate to org.elasticsearch.blobcache, com.sun.jna; + exports org.elasticsearch.preallocate to org.elasticsearch.blobcache, com.sun.jna, org.elasticsearch.server; provides org.elasticsearch.jdk.ModuleQualifiedExportsService with org.elasticsearch.preallocate.PreallocateModuleExportsService; } diff --git a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/AbstractPosixPreallocator.java b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/AbstractPosixPreallocator.java index e841b38c0059e..cfc5855c71f8d 100644 --- a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/AbstractPosixPreallocator.java +++ b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/AbstractPosixPreallocator.java @@ -8,21 +8,27 @@ package org.elasticsearch.preallocate; -import com.sun.jna.FunctionMapper; import com.sun.jna.Library; import com.sun.jna.Native; +import com.sun.jna.NativeLibrary; import com.sun.jna.NativeLong; import com.sun.jna.Platform; import com.sun.jna.Structure; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; + import java.io.IOException; import java.security.AccessController; import java.security.PrivilegedAction; +import java.util.Arrays; +import java.util.List; import java.util.Locale; -import java.util.Map; abstract class AbstractPosixPreallocator implements Preallocator { + static final Logger logger = LogManager.getLogger(AbstractPosixPreallocator.class); + /** * Constants relating to posix libc. * @@ -35,7 +41,7 @@ protected record PosixConstants(int SIZEOF_STAT, int STAT_ST_SIZE_OFFSET, int O_ private static final int O_WRONLY = 1; - static final class Stat64 extends Structure implements Structure.ByReference { + public static final class Stat64 extends Structure implements Structure.ByReference { public byte[] _ignore1; public NativeLong st_size = new NativeLong(0); public byte[] _ignore2; @@ -44,6 +50,11 @@ static final class Stat64 extends Structure implements Structure.ByReference { this._ignore1 = new byte[stSizeOffset]; this._ignore2 = new byte[sizeof - stSizeOffset - 8]; } + + @Override + protected List getFieldOrder() { + return Arrays.asList("_ignore1", "st_size", "_ignore2"); + } } private interface NativeFunctions extends Library { @@ -58,6 +69,10 @@ private interface FStat64Function extends Library { int fstat64(int fd, Stat64 stat); } + private interface FXStatFunction extends Library { + int __fxstat(int version, int fd, Stat64 stat); + } + public static final boolean NATIVES_AVAILABLE; private static final NativeFunctions functions; private static final FStat64Function fstat64; @@ -67,18 +82,29 @@ private interface FStat64Function extends Library { try { return Native.load(Platform.C_LIBRARY_NAME, NativeFunctions.class); } catch (final UnsatisfiedLinkError e) { + logger.warn("Failed to load posix functions for preallocate"); return null; } }); fstat64 = AccessController.doPrivileged((PrivilegedAction) () -> { try { + // JNA lazily finds symbols, so even though we try to bind two different functions below, if fstat64 + // isn't found, we won't know until runtime when calling the function. To force resolution of the + // symbol we get a function object directly from the native library. We don't use it, we just want to + // see if it will throw UnsatisfiedLinkError + NativeLibrary.getInstance(Platform.C_LIBRARY_NAME).getFunction("fstat64"); return Native.load(Platform.C_LIBRARY_NAME, FStat64Function.class); } catch (final UnsatisfiedLinkError e) { + // fstat has a long history in linux from the 32-bit architecture days. On some modern linux systems, + // fstat64 doesn't exist as a symbol in glibc. Instead, the compiler replaces fstat64 calls with + // the internal __fxstat method. Here we fall back to __fxstat, and statically bind the special + // "version" argument so that the call site looks the same as that of fstat64 try { - // on Linux fstat64 isn't available as a symbol, but instead uses a special __ name - var options = Map.of(Library.OPTION_FUNCTION_MAPPER, (FunctionMapper) (lib, method) -> "__fxstat64"); - return Native.load(Platform.C_LIBRARY_NAME, FStat64Function.class, options); + var fxstat = Native.load(Platform.C_LIBRARY_NAME, FXStatFunction.class); + int version = System.getProperty("os.arch").equals("aarch64") ? 0 : 1; + return (fd, stat) -> fxstat.__fxstat(version, fd, stat); } catch (UnsatisfiedLinkError e2) { + logger.warn("Failed to load __fxstat for preallocate"); return null; } } @@ -124,12 +150,20 @@ public void close() throws IOException { @Override public boolean useNative() { - return false; + return NATIVES_AVAILABLE; } @Override public NativeFileHandle open(String path) throws IOException { - int fd = functions.open(path, O_WRONLY, constants.O_CREAT); + // We pass down O_CREAT, so open will create the file if it does not exist. + // From the open man page (https://www.man7.org/linux/man-pages/man2/open.2.html): + // - The mode parameter is needed when specifying O_CREAT + // - The effective mode is modified by the process's umask: in the absence of a default ACL, the mode of the created file is + // (mode & ~umask). + // We choose to pass down 0666 (r/w permission for user/group/others) to mimic what the JDK does for its open operations; + // see for example the fileOpen implementation in libjava: + // https://github.com/openjdk/jdk/blob/98562166e4a4c8921709014423c6cbc993aa0d97/src/java.base/unix/native/libjava/io_util_md.c#L105 + int fd = functions.open(path, O_WRONLY | constants.O_CREAT, 0666); if (fd < 0) { throw newIOException(String.format(Locale.ROOT, "Could not open file [%s] for preallocation", path)); } diff --git a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/MacOsPreallocator.java b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/MacOsPreallocator.java index 149cf80527bd0..f80d6cbafd5cd 100644 --- a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/MacOsPreallocator.java +++ b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/MacOsPreallocator.java @@ -7,18 +7,27 @@ */ package org.elasticsearch.preallocate; +import com.sun.jna.Library; +import com.sun.jna.Memory; import com.sun.jna.Native; import com.sun.jna.NativeLong; import com.sun.jna.Platform; -import com.sun.jna.Structure; +import java.lang.invoke.MethodHandles; import java.security.AccessController; import java.security.PrivilegedAction; -import java.util.Arrays; -import java.util.List; final class MacOsPreallocator extends AbstractPosixPreallocator { + static { + try { + MethodHandles.lookup().ensureInitialized(Natives.class); + logger.info("Initialized macos natives: " + Natives.NATIVES_AVAILABLE); + } catch (IllegalAccessException unexpected) { + throw new AssertionError(unexpected); + } + } + MacOsPreallocator() { super(new PosixConstants(144, 96, 512)); } @@ -31,21 +40,25 @@ public boolean useNative() { @Override public int preallocate(final int fd, final long currentSize /* unused */ , final long fileSize) { // the Structure.ByReference constructor requires access to declared members - final Natives.Fcntl.FStore fst = AccessController.doPrivileged((PrivilegedAction) Natives.Fcntl.FStore::new); - fst.fst_flags = Natives.Fcntl.F_ALLOCATECONTIG; - fst.fst_posmode = Natives.Fcntl.F_PEOFPOSMODE; - fst.fst_offset = new NativeLong(0); - fst.fst_length = new NativeLong(fileSize); + final Natives.Fcntl.FStore fst = new Natives.Fcntl.FStore(); + fst.setFlags(Natives.Fcntl.F_ALLOCATECONTIG); + fst.setPosmode(Natives.Fcntl.F_PEOFPOSMODE); + fst.setOffset(0); + fst.setLength(fileSize); // first, try allocating contiguously - if (Natives.fcntl(fd, Natives.Fcntl.F_PREALLOCATE, fst) != 0) { + logger.info("Calling fcntl for preallocate"); + if (Natives.functions.fcntl(fd, Natives.Fcntl.F_PREALLOCATE, fst.memory) != 0) { + logger.warn("Failed to get contiguous preallocate, trying non-contiguous"); // that failed, so let us try allocating non-contiguously - fst.fst_flags = Natives.Fcntl.F_ALLOCATEALL; - if (Natives.fcntl(fd, Natives.Fcntl.F_PREALLOCATE, fst) != 0) { + fst.setFlags(Natives.Fcntl.F_ALLOCATEALL); + if (Natives.functions.fcntl(fd, Natives.Fcntl.F_PREALLOCATE, fst.memory) != 0) { + logger.warn("Failed to get non-continugous preallocate"); // i'm afraid captain dale had to bail return Native.getLastError(); } } - if (Natives.ftruncate(fd, new NativeLong(fileSize)) != 0) { + if (Natives.functions.ftruncate(fd, new NativeLong(fileSize)) != 0) { + logger.warn("Failed to ftruncate"); return Native.getLastError(); } return 0; @@ -53,17 +66,20 @@ public int preallocate(final int fd, final long currentSize /* unused */ , final private static class Natives { - static boolean NATIVES_AVAILABLE; + static final boolean NATIVES_AVAILABLE; + static final NativeFunctions functions; static { - NATIVES_AVAILABLE = AccessController.doPrivileged((PrivilegedAction) () -> { + NativeFunctions nativeFunctions = AccessController.doPrivileged((PrivilegedAction) () -> { try { - Native.register(Natives.class, Platform.C_LIBRARY_NAME); + return Native.load(Platform.C_LIBRARY_NAME, NativeFunctions.class); } catch (final UnsatisfiedLinkError e) { - return false; + logger.warn("Failed to load macos native preallocate functions"); + return null; } - return true; }); + functions = nativeFunctions; + NATIVES_AVAILABLE = nativeFunctions != null; } static class Fcntl { @@ -79,25 +95,37 @@ static class Fcntl { @SuppressWarnings("unused") private static final int F_VOLPOSMODE = 4; // allocate from the volume offset - public static final class FStore extends Structure implements Structure.ByReference { - public int fst_flags = 0; - public int fst_posmode = 0; - public NativeLong fst_offset = new NativeLong(0); - public NativeLong fst_length = new NativeLong(0); - @SuppressWarnings("unused") - public NativeLong fst_bytesalloc = new NativeLong(0); - - @Override - protected List getFieldOrder() { - return Arrays.asList("fst_flags", "fst_posmode", "fst_offset", "fst_length", "fst_bytesalloc"); + public static final class FStore { + final Memory memory = new Memory(32); + + public void setFlags(int flags) { + memory.setInt(0, flags); + } + + public void setPosmode(int posmode) { + memory.setInt(4, posmode); + } + + public void setOffset(long offset) { + memory.setLong(8, offset); + } + + public void setLength(long length) { + memory.setLong(16, length); + } + + public void getBytesalloc() { + memory.getLong(24); } } } - static native int fcntl(int fd, int cmd, Fcntl.FStore fst); + private interface NativeFunctions extends Library { + int fcntl(int fd, int cmd, Object... args); - static native int ftruncate(int fd, NativeLong length); + int ftruncate(int fd, NativeLong length); + } } } diff --git a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/Preallocate.java b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/Preallocate.java index 8f7214e0877ba..bd6dc50f48af3 100644 --- a/libs/preallocate/src/main/java/org/elasticsearch/preallocate/Preallocate.java +++ b/libs/preallocate/src/main/java/org/elasticsearch/preallocate/Preallocate.java @@ -15,6 +15,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.RandomAccessFile; +import java.lang.invoke.MethodHandles; import java.lang.reflect.Field; import java.nio.file.Files; import java.nio.file.Path; @@ -24,12 +25,27 @@ public class Preallocate { private static final Logger logger = LogManager.getLogger(Preallocate.class); - private static final boolean IS_LINUX; - private static final boolean IS_MACOS; + static final boolean IS_LINUX; + static final boolean IS_MACOS; static { String osName = System.getProperty("os.name"); IS_LINUX = osName.startsWith("Linux"); IS_MACOS = osName.startsWith("Mac OS X"); + + // make sure the allocator native methods are initialized + Class clazz = null; + if (IS_LINUX) { + clazz = LinuxPreallocator.class; + } else if (IS_MACOS) { + clazz = MacOsPreallocator.class; + } + if (clazz != null) { + try { + MethodHandles.lookup().ensureInitialized(clazz); + } catch (IllegalAccessException unexpected) { + throw new AssertionError(unexpected); + } + } } public static void preallocate(final Path cacheFile, final long fileSize) throws IOException { @@ -47,7 +63,9 @@ private static void preallocate(final Path cacheFile, final long fileSize, final boolean success = false; try { if (prealloactor.useNative()) { - try (NativeFileHandle openFile = prealloactor.open(cacheFile.toAbsolutePath().toString())) { + var absolutePath = cacheFile.toAbsolutePath(); + Files.createDirectories(absolutePath.getParent()); + try (NativeFileHandle openFile = prealloactor.open(absolutePath.toString())) { long currentSize = openFile.getSize(); if (currentSize < fileSize) { logger.info("pre-allocating cache file [{}] ({} bytes) using native methods", cacheFile, fileSize); diff --git a/libs/preallocate/src/test/java/org/elasticsearch/preallocate/PreallocateTests.java b/libs/preallocate/src/test/java/org/elasticsearch/preallocate/PreallocateTests.java new file mode 100644 index 0000000000000..04d7e6adee556 --- /dev/null +++ b/libs/preallocate/src/test/java/org/elasticsearch/preallocate/PreallocateTests.java @@ -0,0 +1,55 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.preallocate; + +import org.elasticsearch.common.filesystem.FileSystemNatives; +import org.elasticsearch.test.ESTestCase; +import org.junit.Before; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.OptionalLong; + +import static org.hamcrest.Matchers.equalTo; + +public class PreallocateTests extends ESTestCase { + + @Before + public void setup() { + assumeFalse("no preallocate on windows", System.getProperty("os.name").startsWith("Windows")); + assumeFalse("preallocate not supported on encrypted block devices", "encryption-at-rest".equals(System.getenv("BUILDKITE_LABEL"))); + } + + public void testPreallocate() throws Exception { + Path cacheFile = createTempFile(); + long size = 1024 * 1024; // 1 MB + Preallocate.preallocate(cacheFile, size); + OptionalLong foundSize = FileSystemNatives.allocatedSizeInBytes(cacheFile); + assertTrue(foundSize.isPresent()); + assertThat(foundSize.getAsLong(), equalTo(size)); + } + + public void testPreallocateNonExistingFile() throws IOException { + Path file = createTempDir().resolve("test-preallocate"); + long size = 1024 * 1024; // 1 MB + Preallocate.preallocate(file, size); + OptionalLong foundSize = FileSystemNatives.allocatedSizeInBytes(file); + assertTrue(foundSize.isPresent()); + assertThat(foundSize.getAsLong(), equalTo(size)); + } + + public void testPreallocateNonExistingDirectory() throws IOException { + Path file = createTempDir().resolve("intermediate-dir").resolve("test-preallocate"); + long size = 1024 * 1024; // 1 MB + Preallocate.preallocate(file, size); + OptionalLong foundSize = FileSystemNatives.allocatedSizeInBytes(file); + assertTrue(foundSize.isPresent()); + assertThat(foundSize.getAsLong(), equalTo(size)); + } +} diff --git a/libs/tdigest/src/main/java/org/elasticsearch/tdigest/MergingDigest.java b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/MergingDigest.java index 0be2b68d76a21..fc22bda52e104 100644 --- a/libs/tdigest/src/main/java/org/elasticsearch/tdigest/MergingDigest.java +++ b/libs/tdigest/src/main/java/org/elasticsearch/tdigest/MergingDigest.java @@ -302,9 +302,13 @@ private void merge( addThis = projectedW <= wLimit; } if (i == 1 || i == incomingCount - 1) { - // force last centroid to never merge + // force first and last centroid to never merge addThis = false; } + if (lastUsedCell == mean.length - 1) { + // use the last centroid, there's no more + addThis = true; + } if (addThis) { // next point will fit diff --git a/libs/tdigest/src/test/java/org/elasticsearch/tdigest/MergingDigestTests.java b/libs/tdigest/src/test/java/org/elasticsearch/tdigest/MergingDigestTests.java index 16a81bad50756..9fadf2218f203 100644 --- a/libs/tdigest/src/test/java/org/elasticsearch/tdigest/MergingDigestTests.java +++ b/libs/tdigest/src/test/java/org/elasticsearch/tdigest/MergingDigestTests.java @@ -151,4 +151,14 @@ public void testFill() { i++; } } + + public void testLargeInputSmallCompression() { + MergingDigest td = new MergingDigest(10); + for (int i = 0; i < 10_000_000; i++) { + td.add(between(0, 3_600_000)); + } + assertTrue(td.centroidCount() < 100); + assertTrue(td.quantile(0.00001) < 100_000); + assertTrue(td.quantile(0.99999) > 3_000_000); + } } diff --git a/libs/tdigest/src/test/java/org/elasticsearch/tdigest/TDigestTests.java b/libs/tdigest/src/test/java/org/elasticsearch/tdigest/TDigestTests.java index 72b460da19da2..815346100532c 100644 --- a/libs/tdigest/src/test/java/org/elasticsearch/tdigest/TDigestTests.java +++ b/libs/tdigest/src/test/java/org/elasticsearch/tdigest/TDigestTests.java @@ -152,7 +152,7 @@ public void testQuantile() { hist2.compress(); double x1 = hist1.quantile(0.5); double x2 = hist2.quantile(0.5); - assertEquals(Dist.quantile(0.5, data), x1, 0.2); + assertEquals(Dist.quantile(0.5, data), x1, 0.25); assertEquals(x1, x2, 0.01); } diff --git a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java index c8e429d4c1490..63191084ca837 100644 --- a/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java +++ b/libs/x-content/impl/src/main/java/org/elasticsearch/xcontent/provider/json/JsonXContentParser.java @@ -57,7 +57,8 @@ public Token nextToken() throws IOException { try { return convertToken(parser.nextToken()); } catch (JsonEOFException e) { - throw new XContentEOFException(e); + JsonLocation location = e.getLocation(); + throw new XContentEOFException(new XContentLocation(location.getLineNr(), location.getColumnNr()), "Unexpected end of file", e); } catch (JsonParseException e) { throw newXContentParseException(e); } @@ -110,7 +111,7 @@ public String text() throws IOException { } private void throwOnNoText() { - throw new IllegalStateException("Can't get text on a " + currentToken() + " at " + getTokenLocation()); + throw new IllegalArgumentException("Expected text at " + getTokenLocation() + " but found " + currentToken()); } @Override diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentEOFException.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentEOFException.java index de9ea6fb04f26..01a2407598159 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentEOFException.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/XContentEOFException.java @@ -8,11 +8,9 @@ package org.elasticsearch.xcontent; -import java.io.IOException; +public class XContentEOFException extends XContentParseException { -public class XContentEOFException extends IOException { - - public XContentEOFException(IOException cause) { - super(cause); + public XContentEOFException(XContentLocation location, String message, Exception cause) { + super(location, message, cause); } } diff --git a/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/AbstractXContentParser.java b/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/AbstractXContentParser.java index be100e1a6d120..9672c73ef56df 100644 --- a/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/AbstractXContentParser.java +++ b/libs/x-content/src/main/java/org/elasticsearch/xcontent/support/AbstractXContentParser.java @@ -151,11 +151,8 @@ public int intValue(boolean coerce) throws IOException { protected abstract int doIntValue() throws IOException; - private static BigInteger LONG_MAX_VALUE_AS_BIGINTEGER = BigInteger.valueOf(Long.MAX_VALUE); - private static BigInteger LONG_MIN_VALUE_AS_BIGINTEGER = BigInteger.valueOf(Long.MIN_VALUE); - // weak bounds on the BigDecimal representation to allow for coercion - private static BigDecimal BIGDECIMAL_GREATER_THAN_LONG_MAX_VALUE = BigDecimal.valueOf(Long.MAX_VALUE).add(BigDecimal.ONE); - private static BigDecimal BIGDECIMAL_LESS_THAN_LONG_MIN_VALUE = BigDecimal.valueOf(Long.MIN_VALUE).subtract(BigDecimal.ONE); + private static final BigInteger LONG_MAX_VALUE_AS_BIGINTEGER = BigInteger.valueOf(Long.MAX_VALUE); + private static final BigInteger LONG_MIN_VALUE_AS_BIGINTEGER = BigInteger.valueOf(Long.MIN_VALUE); /** Return the long that {@code stringValue} stores or throws an exception if the * stored value cannot be converted to a long that stores the exact same @@ -170,11 +167,21 @@ private static long toLong(String stringValue, boolean coerce) { final BigInteger bigIntegerValue; try { final BigDecimal bigDecimalValue = new BigDecimal(stringValue); - if (bigDecimalValue.compareTo(BIGDECIMAL_GREATER_THAN_LONG_MAX_VALUE) >= 0 - || bigDecimalValue.compareTo(BIGDECIMAL_LESS_THAN_LONG_MIN_VALUE) <= 0) { + // long can have a maximum of 19 digits - any more than that cannot be a long + // the scale is stored as the negation, so negative scale -> big number + if (bigDecimalValue.scale() < -19) { throw new IllegalArgumentException("Value [" + stringValue + "] is out of range for a long"); } - bigIntegerValue = coerce ? bigDecimalValue.toBigInteger() : bigDecimalValue.toBigIntegerExact(); + // large scale -> very small number + if (bigDecimalValue.scale() > 19) { + if (coerce) { + bigIntegerValue = BigInteger.ZERO; + } else { + throw new ArithmeticException("Number has a decimal part"); + } + } else { + bigIntegerValue = coerce ? bigDecimalValue.toBigInteger() : bigDecimalValue.toBigIntegerExact(); + } } catch (ArithmeticException e) { throw new IllegalArgumentException("Value [" + stringValue + "] has a decimal part"); } catch (NumberFormatException e) { diff --git a/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java b/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java index c8df9929d007b..b9cb7df84a8e4 100644 --- a/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java +++ b/libs/x-content/src/test/java/org/elasticsearch/xcontent/XContentParserTests.java @@ -31,6 +31,7 @@ import static org.hamcrest.Matchers.hasSize; import static org.hamcrest.Matchers.in; import static org.hamcrest.Matchers.instanceOf; +import static org.hamcrest.Matchers.is; import static org.hamcrest.Matchers.nullValue; import static org.junit.internal.matchers.ThrowableMessageMatcher.hasMessage; @@ -74,6 +75,44 @@ public void testFloat() throws IOException { } } + public void testLongCoercion() throws IOException { + XContentType xContentType = randomFrom(XContentType.values()); + + try (XContentBuilder builder = XContentBuilder.builder(xContentType.xContent())) { + builder.startObject(); + builder.field("decimal", "5.5"); + builder.field("expInRange", "5e18"); + builder.field("expTooBig", "2e100"); + builder.field("expTooSmall", "2e-100"); + builder.endObject(); + + try (XContentParser parser = createParser(xContentType.xContent(), BytesReference.bytes(builder))) { + assertThat(parser.nextToken(), is(XContentParser.Token.START_OBJECT)); + + assertThat(parser.nextToken(), is(XContentParser.Token.FIELD_NAME)); + assertThat(parser.currentName(), is("decimal")); + assertThat(parser.nextToken(), is(XContentParser.Token.VALUE_STRING)); + assertThat(parser.longValue(), equalTo(5L)); + + assertThat(parser.nextToken(), is(XContentParser.Token.FIELD_NAME)); + assertThat(parser.currentName(), is("expInRange")); + assertThat(parser.nextToken(), is(XContentParser.Token.VALUE_STRING)); + assertThat(parser.longValue(), equalTo((long) 5e18)); + + assertThat(parser.nextToken(), is(XContentParser.Token.FIELD_NAME)); + assertThat(parser.currentName(), is("expTooBig")); + assertThat(parser.nextToken(), is(XContentParser.Token.VALUE_STRING)); + expectThrows(IllegalArgumentException.class, parser::longValue); + + // too small goes to zero + assertThat(parser.nextToken(), is(XContentParser.Token.FIELD_NAME)); + assertThat(parser.currentName(), is("expTooSmall")); + assertThat(parser.nextToken(), is(XContentParser.Token.VALUE_STRING)); + assertThat(parser.longValue(), equalTo(0L)); + } + } + } + public void testReadList() throws IOException { assertThat(readList("{\"foo\": [\"bar\"]}"), contains("bar")); assertThat(readList("{\"foo\": [\"bar\",\"baz\"]}"), contains("bar", "baz")); diff --git a/modules/aggregations/build.gradle b/modules/aggregations/build.gradle index a773c751eeaf5..91f3303d9d4a8 100644 --- a/modules/aggregations/build.gradle +++ b/modules/aggregations/build.gradle @@ -54,6 +54,9 @@ tasks.named("yamlRestTestV7CompatTransform").configure { task -> task.skipTest("search.aggregation/180_percentiles_tdigest_metric/Filtered test", "Hybrid t-digest produces different results.") task.skipTest("search.aggregation/420_percentile_ranks_tdigest_metric/filtered", "Hybrid t-digest produces different results.") + // Something has changed with response codes + task.skipTest("search.aggregation/20_terms/IP test", "Hybrid t-digest produces different results.") + task.addAllowedWarningRegex("\\[types removal\\].*") } diff --git a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/stats_metric_fail_formatting.yml b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/stats_metric_fail_formatting.yml index d9298a832e650..0a6d171c35ac5 100644 --- a/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/stats_metric_fail_formatting.yml +++ b/modules/aggregations/src/yamlRestTest/resources/rest-api-spec/test/aggregations/stats_metric_fail_formatting.yml @@ -3,6 +3,8 @@ setup: indices.create: index: test_date body: + settings: + number_of_shards: 1 mappings: properties: date_field: diff --git a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml index 802e599b89f12..71c26372dac59 100644 --- a/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml +++ b/modules/analysis-common/src/yamlRestTest/resources/rest-api-spec/test/analysis-common/30_tokenizers.yml @@ -317,22 +317,24 @@ body: text: "a/b/c" explain: true - tokenizer: - type: PathHierarchy + tokenizer: path_hierarchy - length: { detail.tokenizer.tokens: 3 } - - match: { detail.tokenizer.name: __anonymous__PathHierarchy } + - match: { detail.tokenizer.name: path_hierarchy } - match: { detail.tokenizer.tokens.0.token: a } - match: { detail.tokenizer.tokens.1.token: a/b } - match: { detail.tokenizer.tokens.2.token: a/b/c } +--- +"PathHierarchy": - do: indices.analyze: body: text: "a/b/c" explain: true - tokenizer: path_hierarchy + tokenizer: + type: PathHierarchy - length: { detail.tokenizer.tokens: 3 } - - match: { detail.tokenizer.name: path_hierarchy } + - match: { detail.tokenizer.name: __anonymous__PathHierarchy } - match: { detail.tokenizer.tokens.0.token: a } - match: { detail.tokenizer.tokens.1.token: a/b } - match: { detail.tokenizer.tokens.2.token: a/b/c } diff --git a/modules/data-streams/build.gradle b/modules/data-streams/build.gradle index a0375c61d7c29..daf0c188cc83e 100644 --- a/modules/data-streams/build.gradle +++ b/modules/data-streams/build.gradle @@ -1,4 +1,5 @@ import org.elasticsearch.gradle.internal.info.BuildParams +import org.elasticsearch.gradle.testclusters.StandaloneRestIntegTestTask apply plugin: 'elasticsearch.test-with-dependencies' apply plugin: 'elasticsearch.internal-cluster-test' @@ -23,11 +24,7 @@ dependencies { internalClusterTestImplementation project(":modules:mapper-extras") } -tasks.named('yamlRestTest') { - usesDefaultDistribution() -} - -tasks.named('javaRestTest') { +tasks.withType(StandaloneRestIntegTestTask).configureEach { usesDefaultDistribution() } diff --git a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/LogsDataStreamIT.java b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/LogsDataStreamIT.java index f95d9a0b0431f..52ce2a7a33ea6 100644 --- a/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/LogsDataStreamIT.java +++ b/modules/data-streams/src/internalClusterTest/java/org/elasticsearch/datastreams/LogsDataStreamIT.java @@ -165,7 +165,7 @@ public void testLogsIndexModeDataStreamIndexing() throws IOException, ExecutionE client(), "logs-composable-template", LOGS_OR_STANDARD_MAPPING, - Map.of("index.mode", "logs"), + Map.of("index.mode", "logsdb"), List.of("logs-*-*") ); final String dataStreamName = generateDataStreamName("logs"); @@ -188,7 +188,7 @@ public void testIndexModeLogsAndStandardSwitching() throws IOException, Executio ); createDataStream(client(), dataStreamName); for (int i = 0; i < randomIntBetween(5, 10); i++) { - final IndexMode indexMode = i % 2 == 0 ? IndexMode.LOGS : IndexMode.STANDARD; + final IndexMode indexMode = i % 2 == 0 ? IndexMode.LOGSDB : IndexMode.STANDARD; indexModes.add(indexMode); updateComposableIndexTemplate( client(), @@ -206,7 +206,7 @@ public void testIndexModeLogsAndStandardSwitching() throws IOException, Executio public void testIndexModeLogsAndTimeSeriesSwitching() throws IOException, ExecutionException, InterruptedException { final String dataStreamName = generateDataStreamName("custom"); final List indexPatterns = List.of("custom-*-*"); - final Map logsSettings = Map.of("index.mode", "logs"); + final Map logsSettings = Map.of("index.mode", "logsdb"); final Map timeSeriesSettings = Map.of("index.mode", "time_series", "index.routing_path", "host.name"); putComposableIndexTemplate(client(), "custom-composable-template", LOGS_OR_STANDARD_MAPPING, logsSettings, indexPatterns); @@ -221,13 +221,13 @@ public void testIndexModeLogsAndTimeSeriesSwitching() throws IOException, Execut rolloverDataStream(dataStreamName); indexLogOrStandardDocuments(client(), randomIntBetween(10, 20), randomIntBetween(32, 64), dataStreamName); - assertDataStreamBackingIndicesModes(dataStreamName, List.of(IndexMode.LOGS, IndexMode.TIME_SERIES, IndexMode.LOGS)); + assertDataStreamBackingIndicesModes(dataStreamName, List.of(IndexMode.LOGSDB, IndexMode.TIME_SERIES, IndexMode.LOGSDB)); } public void testInvalidIndexModeTimeSeriesSwitchWithoutRoutingPath() throws IOException, ExecutionException, InterruptedException { final String dataStreamName = generateDataStreamName("custom"); final List indexPatterns = List.of("custom-*-*"); - final Map logsSettings = Map.of("index.mode", "logs"); + final Map logsSettings = Map.of("index.mode", "logsdb"); final Map timeSeriesSettings = Map.of("index.mode", "time_series"); putComposableIndexTemplate(client(), "custom-composable-template", LOGS_OR_STANDARD_MAPPING, logsSettings, indexPatterns); @@ -249,7 +249,7 @@ public void testInvalidIndexModeTimeSeriesSwitchWithoutRoutingPath() throws IOEx public void testInvalidIndexModeTimeSeriesSwitchWithoutDimensions() throws IOException, ExecutionException, InterruptedException { final String dataStreamName = generateDataStreamName("custom"); final List indexPatterns = List.of("custom-*-*"); - final Map logsSettings = Map.of("index.mode", "logs"); + final Map logsSettings = Map.of("index.mode", "logsdb"); final Map timeSeriesSettings = Map.of("index.mode", "time_series", "index.routing_path", "host.name"); putComposableIndexTemplate(client(), "custom-composable-template", LOGS_OR_STANDARD_MAPPING, logsSettings, indexPatterns); diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java index d3ec5b29ff5b9..780864db8b629 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/LogsDataStreamRestIT.java @@ -72,7 +72,7 @@ private static void waitForLogs(RestClient client) throws Exception { "template": { "settings": { "index": { - "mode": "logs" + "mode": "logsdb" } }, "mappings": { @@ -161,7 +161,7 @@ public void testLogsIndexing() throws IOException { randomIp(randomBoolean()) ) ); - assertDataStreamBackingIndexMode("logs", 0); + assertDataStreamBackingIndexMode("logsdb", 0); rolloverDataStream(client, DATA_STREAM_NAME); indexDocument( client, @@ -175,7 +175,7 @@ public void testLogsIndexing() throws IOException { randomIp(randomBoolean()) ) ); - assertDataStreamBackingIndexMode("logs", 1); + assertDataStreamBackingIndexMode("logsdb", 1); } public void testLogsStandardIndexModeSwitch() throws IOException { @@ -193,7 +193,7 @@ public void testLogsStandardIndexModeSwitch() throws IOException { randomIp(randomBoolean()) ) ); - assertDataStreamBackingIndexMode("logs", 0); + assertDataStreamBackingIndexMode("logsdb", 0); putTemplate(client, "custom-template", STANDARD_TEMPLATE); rolloverDataStream(client, DATA_STREAM_NAME); @@ -225,7 +225,7 @@ public void testLogsStandardIndexModeSwitch() throws IOException { randomIp(randomBoolean()) ) ); - assertDataStreamBackingIndexMode("logs", 2); + assertDataStreamBackingIndexMode("logsdb", 2); } private void assertDataStreamBackingIndexMode(final String indexMode, int backingIndex) throws IOException { diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeDisabledRestTestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeDisabledRestTestIT.java index dcd2457b88f18..fada21224e3b2 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeDisabledRestTestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeDisabledRestTestIT.java @@ -50,7 +50,7 @@ public void setup() throws Exception { public void testLogsSettingsIndexModeDisabled() throws IOException { assertOK(createDataStream(client, "logs-custom-dev")); final String indexMode = (String) getSetting(client, getDataStreamBackingIndex(client, "logs-custom-dev", 0), "index.mode"); - assertThat(indexMode, Matchers.not(equalTo(IndexMode.LOGS.getName()))); + assertThat(indexMode, Matchers.not(equalTo(IndexMode.LOGSDB.getName()))); } } diff --git a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeEnabledRestTestIT.java b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeEnabledRestTestIT.java index 832267cebf97c..a4277748ea9bd 100644 --- a/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeEnabledRestTestIT.java +++ b/modules/data-streams/src/javaRestTest/java/org/elasticsearch/datastreams/logsdb/LogsIndexModeEnabledRestTestIT.java @@ -179,7 +179,7 @@ public void testCreateDataStream() throws IOException { assertOK(putComponentTemplate(client, "logs@custom", MAPPINGS)); assertOK(createDataStream(client, "logs-custom-dev")); final String indexMode = (String) getSetting(client, getDataStreamBackingIndex(client, "logs-custom-dev", 0), "index.mode"); - assertThat(indexMode, equalTo(IndexMode.LOGS.getName())); + assertThat(indexMode, equalTo(IndexMode.LOGSDB.getName())); } public void testBulkIndexing() throws IOException { diff --git a/modules/ingest-common/build.gradle b/modules/ingest-common/build.gradle index 90d52de6f0fff..ebd788383bff5 100644 --- a/modules/ingest-common/build.gradle +++ b/modules/ingest-common/build.gradle @@ -5,6 +5,8 @@ * in compliance with, at your election, the Elastic License 2.0 or the Server * Side Public License, v 1. */ +import org.elasticsearch.gradle.testclusters.StandaloneRestIntegTestTask + apply plugin: 'elasticsearch.internal-yaml-rest-test' apply plugin: 'elasticsearch.yaml-rest-compat-test' apply plugin: 'elasticsearch.internal-cluster-test' @@ -29,7 +31,7 @@ restResources { } } -tasks.named('yamlRestTest') { +tasks.withType(StandaloneRestIntegTestTask).configureEach { usesDefaultDistribution() } @@ -48,6 +50,22 @@ tasks.named("thirdPartyAudit").configure { ) } +tasks.named("yamlRestTest").configure { + systemProperty 'tests.rest.blacklist', [ + // for some reason, allowed_warnings on the test isn't working here + 'ingest/30_date_processor/Test date processor with no timezone configured', + 'ingest/30_date_processor/Test week based date parsing', + ].join(',') +} + +tasks.named("yamlRestTestV7CompatTest").configure { + systemProperty 'tests.rest.blacklist', [ + // for some reason, allowed_warnings on the test isn't working here + 'ingest/30_date_processor/Test date processor with no timezone configured', + 'ingest/30_date_processor/Test week based date parsing', + ].join(',') +} + tasks.named("yamlRestTestV7CompatTransform").configure { task -> task.addAllowedWarningRegex("\\[types removal\\].*") } diff --git a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/30_date_processor.yml b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/30_date_processor.yml index 78fb895ea791c..a5caf7493340c 100644 --- a/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/30_date_processor.yml +++ b/modules/ingest-common/src/yamlRestTest/resources/rest-api-spec/test/ingest/30_date_processor.yml @@ -1,3 +1,6 @@ +setup: + - requires: + test_runner_features: allowed_warnings_regex --- teardown: - do: @@ -97,6 +100,8 @@ teardown: "Test date processor with no timezone configured": - do: + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:H:m:s Z] contains textual field specifiers that could change in JDK 23.*' ingest.put_pipeline: id: "my_pipeline" # sample formats from beats, featuring mongodb, icinga, apache @@ -165,6 +170,8 @@ teardown: - match: { acknowledged: true } - do: + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:H:m:s Z] contains textual field specifiers that could change in JDK 23.*' index: index: test id: "1" @@ -204,6 +211,8 @@ teardown: --- "Test week based date parsing": - do: + allowed_warnings_regex: + - 'Date format \[YYYY-ww] contains week-date field specifiers that are changing in JDK 23.*' indices.create: index: test body: @@ -214,6 +223,8 @@ teardown: format: YYYY-ww - do: + allowed_warnings_regex: + - 'Date format \[YYYY-ww] contains week-date field specifiers that are changing in JDK 23.*' ingest.put_pipeline: id: "my_pipeline" body: > @@ -263,69 +274,3 @@ teardown: id: "1" - match: { _source.date_source_field: "2020-33" } - match: { _source.date_target_field: "2020-08-10T00:00:00.000Z" } - ---- -"Test week based date parsing with locale": - #locale is used when parsing as well on a pipeline. As per US locale, start of the 33rd week 2020 is on 09August2020 (sunday) - - do: - indices.create: - index: test - body: - mappings: - properties: - date_source_field: - type: date - format: YYYY-ww - locale: en-US - - - do: - ingest.put_pipeline: - id: "my_pipeline" - body: > - { - "description": "_description", - "processors": [ - { - "date" : { - "field" : "date_source_field", - "target_field" : "date_target_field", - "formats" : ["YYYY-ww"], - "locale" : "en-US" - } - } - ] - } - - match: { acknowledged: true } - - - do: - ingest.simulate: - id: "my_pipeline" - body: > - { - "docs": [ - { - "_source": { - "date_source_field": "2020-33" - } - } - ] - } - - length: { docs: 1 } - - match: { docs.0.doc._source.date_source_field: "2020-33" } - - match: { docs.0.doc._source.date_target_field: "2020-08-09T00:00:00.000Z" } - - length: { docs.0.doc._ingest: 1 } - - is_true: docs.0.doc._ingest.timestamp - - - do: - index: - index: test - id: "1" - pipeline: "my_pipeline" - body: {date_source_field: "2020-33"} - - - do: - get: - index: test - id: "1" - - match: { _source.date_source_field: "2020-33" } - - match: { _source.date_target_field: "2020-08-09T00:00:00.000Z" } diff --git a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderIT.java b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderIT.java new file mode 100644 index 0000000000000..cc757c413713d --- /dev/null +++ b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderIT.java @@ -0,0 +1,196 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import fixture.geoip.EnterpriseGeoIpHttpFixture; + +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.ResourceAlreadyExistsException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.bulk.BulkItemResponse; +import org.elasticsearch.action.bulk.BulkRequest; +import org.elasticsearch.action.bulk.BulkResponse; +import org.elasticsearch.action.get.GetRequest; +import org.elasticsearch.action.get.GetResponse; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.ingest.PutPipelineRequest; +import org.elasticsearch.action.search.SearchRequest; +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.settings.MockSecureSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.util.CollectionUtils; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.ingest.EnterpriseGeoIpTask; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction; +import org.elasticsearch.persistent.PersistentTasksService; +import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.reindex.ReindexPlugin; +import org.elasticsearch.rest.RestStatus; +import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.transport.RemoteTransportException; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentType; +import org.elasticsearch.xcontent.json.JsonXContent; +import org.junit.ClassRule; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; + +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER; +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderTaskExecutor.MAXMIND_LICENSE_KEY_SETTING; +import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; +import static org.hamcrest.Matchers.equalTo; + +public class EnterpriseGeoIpDownloaderIT extends ESIntegTestCase { + + private static final String DATABASE_TYPE = "GeoIP2-City"; + + @ClassRule + public static final EnterpriseGeoIpHttpFixture fixture = new EnterpriseGeoIpHttpFixture(DATABASE_TYPE); + + protected String getEndpoint() { + return fixture.getAddress(); + } + + @Override + protected Settings nodeSettings(int nodeOrdinal, Settings otherSettings) { + MockSecureSettings secureSettings = new MockSecureSettings(); + secureSettings.setString(MAXMIND_LICENSE_KEY_SETTING.getKey(), "license_key"); + Settings.Builder builder = Settings.builder(); + builder.setSecureSettings(secureSettings) + .put(super.nodeSettings(nodeOrdinal, otherSettings)) + .put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), true); + // note: this is using the enterprise fixture for the regular downloader, too, as + // a slightly hacky way of making the regular downloader not actually download any files + builder.put(GeoIpDownloader.ENDPOINT_SETTING.getKey(), getEndpoint()); + return builder.build(); + } + + @SuppressWarnings("unchecked") + protected Collection> nodePlugins() { + // the reindex plugin is (somewhat surprisingly) necessary in order to be able to delete-by-query, + // which modules/ingest-geoip does to delete old chunks + return CollectionUtils.appendToCopyNoNullElements(super.nodePlugins(), IngestGeoIpPlugin.class, ReindexPlugin.class); + } + + @SuppressWarnings("unchecked") + public void testEnterpriseDownloaderTask() throws Exception { + /* + * This test starts the enterprise geoip downloader task, and creates a database configuration. Then it creates an ingest + * pipeline that references that database, and ingests a single document using that pipeline. It then asserts that the document + * was updated with information from the database. + * Note that the "enterprise database" is actually just a geolite database being loaded by the GeoIpHttpFixture. + */ + EnterpriseGeoIpDownloader.DEFAULT_MAXMIND_ENDPOINT = getEndpoint(); + final String pipelineName = "enterprise_geoip_pipeline"; + final String indexName = "enterprise_geoip_test_index"; + final String sourceField = "ip"; + final String targetField = "ip-city"; + + startEnterpriseGeoIpDownloaderTask(); + configureDatabase(DATABASE_TYPE); + createGeoIpPipeline(pipelineName, DATABASE_TYPE, sourceField, targetField); + + assertBusy(() -> { + /* + * We know that the .geoip_databases index has been populated, but we don't know for sure that the database has been pulled + * down and made available on all nodes. So we run this ingest-and-check step in an assertBusy. + */ + logger.info("Ingesting a test document"); + String documentId = ingestDocument(indexName, pipelineName, sourceField); + GetResponse getResponse = client().get(new GetRequest(indexName, documentId)).actionGet(); + Map returnedSource = getResponse.getSource(); + assertNotNull(returnedSource); + Object targetFieldValue = returnedSource.get(targetField); + assertNotNull(targetFieldValue); + assertThat(((Map) targetFieldValue).get("organization_name"), equalTo("Bredband2 AB")); + }); + } + + private void startEnterpriseGeoIpDownloaderTask() { + PersistentTasksService persistentTasksService = internalCluster().getInstance(PersistentTasksService.class); + persistentTasksService.sendStartRequest( + ENTERPRISE_GEOIP_DOWNLOADER, + ENTERPRISE_GEOIP_DOWNLOADER, + new EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams(), + TimeValue.MAX_VALUE, + ActionListener.wrap(r -> logger.debug("Started enterprise geoip downloader task"), e -> { + Throwable t = e instanceof RemoteTransportException ? ExceptionsHelper.unwrapCause(e) : e; + if (t instanceof ResourceAlreadyExistsException == false) { + logger.error("failed to create enterprise geoip downloader task", e); + } + }) + ); + } + + private void configureDatabase(String databaseType) throws Exception { + admin().cluster() + .execute( + PutDatabaseConfigurationAction.INSTANCE, + new PutDatabaseConfigurationAction.Request( + TimeValue.MAX_VALUE, + TimeValue.MAX_VALUE, + new DatabaseConfiguration("test", databaseType, new DatabaseConfiguration.Maxmind("test_account")) + ) + ) + .actionGet(); + ensureGreen(GeoIpDownloader.DATABASES_INDEX); + assertBusy(() -> { + SearchResponse searchResponse = client().search(new SearchRequest(GeoIpDownloader.DATABASES_INDEX)).actionGet(); + try { + assertThat(searchResponse.getHits().getHits().length, equalTo(1)); + } finally { + searchResponse.decRef(); + } + }); + } + + private void createGeoIpPipeline(String pipelineName, String databaseType, String sourceField, String targetField) throws IOException { + final BytesReference bytes; + try (XContentBuilder builder = JsonXContent.contentBuilder()) { + builder.startObject(); + { + builder.field("description", "test"); + builder.startArray("processors"); + { + builder.startObject(); + { + builder.startObject("geoip"); + { + builder.field("field", sourceField); + builder.field("target_field", targetField); + builder.field("database_file", databaseType + ".mmdb"); + } + builder.endObject(); + } + builder.endObject(); + } + builder.endArray(); + } + builder.endObject(); + bytes = BytesReference.bytes(builder); + } + assertAcked(clusterAdmin().putPipeline(new PutPipelineRequest(pipelineName, bytes, XContentType.JSON)).actionGet()); + } + + private String ingestDocument(String indexName, String pipelineName, String sourceField) { + BulkRequest bulkRequest = new BulkRequest(); + bulkRequest.add( + new IndexRequest(indexName).source("{\"" + sourceField + "\": \"89.160.20.128\"}", XContentType.JSON).setPipeline(pipelineName) + ); + BulkResponse response = client().bulk(bulkRequest).actionGet(); + BulkItemResponse[] bulkItemResponses = response.getItems(); + assertThat(bulkItemResponses.length, equalTo(1)); + assertThat(bulkItemResponses[0].status(), equalTo(RestStatus.CREATED)); + return bulkItemResponses[0].getId(); + } +} diff --git a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java index 9dcd8abc7bc57..f7ab384c69bf1 100644 --- a/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java +++ b/modules/ingest-geoip/src/internalClusterTest/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderIT.java @@ -152,9 +152,9 @@ public void testInvalidTimestamp() throws Exception { updateClusterSettings(Settings.builder().put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), true)); assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); }, 2, TimeUnit.MINUTES); @@ -227,9 +227,9 @@ public void testGeoIpDatabasesDownload() throws Exception { updateClusterSettings(Settings.builder().put(GeoIpDownloaderTaskExecutor.ENABLED_SETTING.getKey(), true)); assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); putGeoIpPipeline(); // This is to work around the race condition described in #92888 }, 2, TimeUnit.MINUTES); @@ -238,11 +238,11 @@ public void testGeoIpDatabasesDownload() throws Exception { assertBusy(() -> { try { GeoIpTaskState state = (GeoIpTaskState) getTask().getState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); - GeoIpTaskState.Metadata metadata = state.get(id); + GeoIpTaskState.Metadata metadata = state.getDatabases().get(id); int size = metadata.lastChunk() - metadata.firstChunk() + 1; assertResponse( prepareSearch(GeoIpDownloader.DATABASES_INDEX).setSize(size) @@ -301,9 +301,9 @@ public void testGeoIpDatabasesDownloadNoGeoipProcessors() throws Exception { assertNotNull(getTask().getState()); // removing all geoip processors should not result in the task being stopped assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); }); } @@ -337,9 +337,9 @@ public void testDoNotDownloadDatabaseOnPipelineCreation() throws Exception { assertAcked(indicesAdmin().prepareUpdateSettings(indexIdentifier).setSettings(indexSettings).get()); assertBusy(() -> { GeoIpTaskState state = getGeoIpTaskState(); - assertEquals( - Set.of("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb"), - state.getDatabases().keySet() + assertThat( + state.getDatabases().keySet(), + containsInAnyOrder("GeoLite2-ASN.mmdb", "GeoLite2-City.mmdb", "GeoLite2-Country.mmdb", "MyCustomGeoLite2-City.mmdb") ); }, 2, TimeUnit.MINUTES); diff --git a/modules/ingest-geoip/src/main/java/module-info.java b/modules/ingest-geoip/src/main/java/module-info.java index fa0b0266414f0..4d0acefcb6c9f 100644 --- a/modules/ingest-geoip/src/main/java/module-info.java +++ b/modules/ingest-geoip/src/main/java/module-info.java @@ -15,5 +15,6 @@ requires com.maxmind.geoip2; requires com.maxmind.db; + exports org.elasticsearch.ingest.geoip.direct to org.elasticsearch.server; exports org.elasticsearch.ingest.geoip.stats to org.elasticsearch.server; } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java index efae8fa0c50ca..dcb882ede230c 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/DatabaseNodeService.java @@ -24,6 +24,7 @@ import org.elasticsearch.core.CheckedConsumer; import org.elasticsearch.core.CheckedRunnable; import org.elasticsearch.core.IOUtils; +import org.elasticsearch.core.Tuple; import org.elasticsearch.env.Environment; import org.elasticsearch.gateway.GatewayService; import org.elasticsearch.index.Index; @@ -52,7 +53,6 @@ import java.util.Collection; import java.util.List; import java.util.Locale; -import java.util.Map; import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; @@ -64,6 +64,7 @@ import java.util.zip.GZIPInputStream; import static org.elasticsearch.core.Strings.format; +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpTaskState.getEnterpriseGeoIpTaskState; import static org.elasticsearch.ingest.geoip.GeoIpTaskState.getGeoIpTaskState; /** @@ -183,13 +184,14 @@ public Boolean isValid(String databaseFile) { if (state == null) { return true; } + GeoIpTaskState.Metadata metadata = state.getDatabases().get(databaseFile); // we never remove metadata from cluster state, if metadata is null we deal with built-in database, which is always valid if (metadata == null) { return true; } - boolean valid = metadata.isValid(currentState.metadata().settings()); + boolean valid = metadata.isNewEnough(currentState.metadata().settings()); if (valid && metadata.isCloseToExpiration()) { HeaderWarning.addWarning( "database [{}] was not updated for over 25 days, geoip processor will stop working if there is no update for 30 days", @@ -269,20 +271,52 @@ void checkDatabases(ClusterState state) { } } - GeoIpTaskState taskState = getGeoIpTaskState(state); - if (taskState == null) { - // Note: an empty state will purge stale entries in databases map - taskState = GeoIpTaskState.EMPTY; + // we'll consult each of the geoip downloaders to build up a list of database metadatas to work with + List> validMetadatas = new ArrayList<>(); + + // process the geoip task state for the (ordinary) geoip downloader + { + GeoIpTaskState taskState = getGeoIpTaskState(state); + if (taskState == null) { + // Note: an empty state will purge stale entries in databases map + taskState = GeoIpTaskState.EMPTY; + } + validMetadatas.addAll( + taskState.getDatabases() + .entrySet() + .stream() + .filter(e -> e.getValue().isNewEnough(state.getMetadata().settings())) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList() + ); + } + + // process the geoip task state for the enterprise geoip downloader + { + EnterpriseGeoIpTaskState taskState = getEnterpriseGeoIpTaskState(state); + if (taskState == null) { + // Note: an empty state will purge stale entries in databases map + taskState = EnterpriseGeoIpTaskState.EMPTY; + } + validMetadatas.addAll( + taskState.getDatabases() + .entrySet() + .stream() + .filter(e -> e.getValue().isNewEnough(state.getMetadata().settings())) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList() + ); } - taskState.getDatabases().entrySet().stream().filter(e -> e.getValue().isValid(state.getMetadata().settings())).forEach(e -> { - String name = e.getKey(); - GeoIpTaskState.Metadata metadata = e.getValue(); + // run through all the valid metadatas, regardless of source, and retrieve them + validMetadatas.forEach(e -> { + String name = e.v1(); + GeoIpTaskState.Metadata metadata = e.v2(); DatabaseReaderLazyLoader reference = databases.get(name); String remoteMd5 = metadata.md5(); String localMd5 = reference != null ? reference.getMd5() : null; if (Objects.equals(localMd5, remoteMd5)) { - logger.debug("Current reference of [{}] is up to date [{}] with was recorded in CS [{}]", name, localMd5, remoteMd5); + logger.debug("[{}] is up to date [{}] with cluster state [{}]", name, localMd5, remoteMd5); return; } @@ -293,15 +327,14 @@ void checkDatabases(ClusterState state) { } }); + // TODO perhaps we need to handle the license flap persistent task state better than we do + // i think the ideal end state is that we *do not* drop the files that the enterprise downloader + // handled if they fall out -- which means we need to track that in the databases map itself + + // start with the list of all databases we currently know about in this service, + // then drop the ones that didn't check out as valid from the task states List staleEntries = new ArrayList<>(databases.keySet()); - staleEntries.removeAll( - taskState.getDatabases() - .entrySet() - .stream() - .filter(e -> e.getValue().isValid(state.getMetadata().settings())) - .map(Map.Entry::getKey) - .collect(Collectors.toSet()) - ); + staleEntries.removeAll(validMetadatas.stream().map(Tuple::v1).collect(Collectors.toSet())); removeStaleEntries(staleEntries); } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java new file mode 100644 index 0000000000000..9645e34751642 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloader.java @@ -0,0 +1,474 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ExceptionsHelper; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.admin.indices.flush.FlushRequest; +import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.support.PlainActionFuture; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.hash.MessageDigests; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.MatchQueryBuilder; +import org.elasticsearch.index.query.RangeQueryBuilder; +import org.elasticsearch.index.reindex.DeleteByQueryAction; +import org.elasticsearch.index.reindex.DeleteByQueryRequest; +import org.elasticsearch.ingest.geoip.GeoIpTaskState.Metadata; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata; +import org.elasticsearch.persistent.AllocatedPersistentTask; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask; +import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.threadpool.Scheduler; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xcontent.XContentType; + +import java.io.IOException; +import java.io.InputStream; +import java.net.PasswordAuthentication; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.function.Function; +import java.util.function.Supplier; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloaderTaskExecutor.MAXMIND_SETTINGS_PREFIX; + +/** + * Main component responsible for downloading new GeoIP databases. + * New databases are downloaded in chunks and stored in .geoip_databases index + * Downloads are verified against MD5 checksum provided by the server + * Current state of all stored databases is stored in cluster state in persistent task state + */ +public class EnterpriseGeoIpDownloader extends AllocatedPersistentTask { + + private static final Logger logger = LogManager.getLogger(EnterpriseGeoIpDownloader.class); + private static final Pattern CHECKSUM_PATTERN = Pattern.compile("(\\w{64})\\s\\s(.*)"); + + // for overriding in tests + static String DEFAULT_MAXMIND_ENDPOINT = System.getProperty( + MAXMIND_SETTINGS_PREFIX + "endpoint.default", + "https://download.maxmind.com/geoip/databases" + ); + // n.b. a future enhancement might be to allow for a MAXMIND_ENDPOINT_SETTING, but + // at the moment this is an unsupported system property for use in tests (only) + + static String downloadUrl(final String name, final String suffix) { + String endpointPattern = DEFAULT_MAXMIND_ENDPOINT; + if (endpointPattern.contains("%")) { + throw new IllegalArgumentException("Invalid endpoint [" + endpointPattern + "]"); + } + if (endpointPattern.endsWith("/") == false) { + endpointPattern += "/"; + } + endpointPattern += "%s/download?suffix=%s"; + + // at this point the pattern looks like this (in the default case): + // https://download.maxmind.com/geoip/databases/%s/download?suffix=%s + + return Strings.format(endpointPattern, name, suffix); + } + + static final String DATABASES_INDEX = ".geoip_databases"; + static final int MAX_CHUNK_SIZE = 1024 * 1024; + + private final Client client; + private final HttpClient httpClient; + private final ClusterService clusterService; + private final ThreadPool threadPool; + + // visible for testing + protected volatile EnterpriseGeoIpTaskState state; + private volatile Scheduler.ScheduledCancellable scheduled; + private final Supplier pollIntervalSupplier; + private final Function credentialsBuilder; + + EnterpriseGeoIpDownloader( + Client client, + HttpClient httpClient, + ClusterService clusterService, + ThreadPool threadPool, + long id, + String type, + String action, + String description, + TaskId parentTask, + Map headers, + Supplier pollIntervalSupplier, + Function credentialsBuilder + ) { + super(id, type, action, description, parentTask, headers); + this.client = client; + this.httpClient = httpClient; + this.clusterService = clusterService; + this.threadPool = threadPool; + this.pollIntervalSupplier = pollIntervalSupplier; + this.credentialsBuilder = credentialsBuilder; + } + + void setState(EnterpriseGeoIpTaskState state) { + // this is for injecting the state in GeoIpDownloaderTaskExecutor#nodeOperation just after the task instance has been created + // by the PersistentTasksNodeService -- since the GeoIpDownloader is newly created, the state will be null, and the passed-in + // state cannot be null + assert this.state == null + : "setState() cannot be called when state is already non-null. This most likely happened because setState() was called twice"; + assert state != null : "Should never call setState with a null state. Pass an EnterpriseGeoIpTaskState.EMPTY instead."; + this.state = state; + } + + // visible for testing + void updateDatabases() throws IOException { + var clusterState = clusterService.state(); + var geoipIndex = clusterState.getMetadata().getIndicesLookup().get(EnterpriseGeoIpDownloader.DATABASES_INDEX); + if (geoipIndex != null) { + logger.trace("the geoip index [{}] exists", EnterpriseGeoIpDownloader.DATABASES_INDEX); + if (clusterState.getRoutingTable().index(geoipIndex.getWriteIndex()).allPrimaryShardsActive() == false) { + logger.debug("not updating databases because not all primary shards of [{}] index are active yet", DATABASES_INDEX); + return; + } + var blockException = clusterState.blocks().indexBlockedException(ClusterBlockLevel.WRITE, geoipIndex.getWriteIndex().getName()); + if (blockException != null) { + throw blockException; + } + } + + logger.trace("Updating geoip databases"); + IngestGeoIpMetadata geoIpMeta = clusterState.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + // if there are entries in the cs that aren't in the persistent task state, + // then download those (only) + // --- + // if there are in the persistent task state, that aren't in the cluster state + // then nuke those (only) + // --- + // else, just download everything + boolean addedSomething = false; + { + Set existingDatabaseNames = state.getDatabases().keySet(); + for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { + final String id = entry.getKey(); + DatabaseConfiguration database = entry.getValue().database(); + if (existingDatabaseNames.contains(database.name() + ".mmdb") == false) { + logger.debug("A new database appeared [{}]", database.name()); + + final String accountId = database.maxmind().accountId(); + try (HttpClient.PasswordAuthenticationHolder holder = credentialsBuilder.apply(accountId)) { + if (holder == null) { + logger.warn("No credentials found to download database [{}], skipping download...", id); + } else { + processDatabase(holder.get(), database); + addedSomething = true; + } + } + } + } + } + + boolean droppedSomething = false; + { + // rip anything out of the task state that doesn't match what's in the cluster state, + // that is, if there's no longer an entry for a database in the repository, + // then drop it from the task state, too + Set databases = geoIpMeta.getDatabases() + .values() + .stream() + .map(c -> c.database().name() + ".mmdb") + .collect(Collectors.toSet()); + EnterpriseGeoIpTaskState _state = state; + Collection> metas = _state.getDatabases() + .entrySet() + .stream() + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList(); + for (Tuple metaTuple : metas) { + String name = metaTuple.v1(); + Metadata meta = metaTuple.v2(); + if (databases.contains(name) == false) { + logger.debug("Dropping [{}], databases was {}", name, databases); + _state = _state.remove(name); + deleteOldChunks(name, meta.lastChunk() + 1); + droppedSomething = true; + } + } + if (droppedSomething) { + state = _state; + updateTaskState(); + } + } + + if (addedSomething == false && droppedSomething == false) { + RuntimeException accumulator = null; + for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { + final String id = entry.getKey(); + DatabaseConfiguration database = entry.getValue().database(); + + final String accountId = database.maxmind().accountId(); + try (HttpClient.PasswordAuthenticationHolder holder = credentialsBuilder.apply(accountId)) { + if (holder == null) { + logger.warn("No credentials found to download database [{}], skipping download...", id); + } else { + processDatabase(holder.get(), database); + } + } catch (Exception e) { + accumulator = ExceptionsHelper.useOrSuppress(accumulator, ExceptionsHelper.convertToRuntime(e)); + } + } + if (accumulator != null) { + throw accumulator; + } + } + } + + /** + * This method fetches the sha256 file and tar.gz file for the given database from the Maxmind endpoint, then indexes that tar.gz + * file into the .geoip_databases Elasticsearch index, deleting any old versions of the database tar.gz from the index if they exist. + * If the computed sha256 does not match the expected sha256, an error will be logged and the database will not be put into the + * Elasticsearch index. + *

+ * As an implementation detail, this method retrieves the sha256 checksum of the database to download and then invokes + * {@link EnterpriseGeoIpDownloader#processDatabase(PasswordAuthentication, String, String, String)} with that checksum, deferring to + * that method to actually download and process the tar.gz itself. + * + * @param auth The credentials to use to download from the Maxmind endpoint + * @param database The database to be downloaded from Maxmind and indexed into an Elasticsearch index + * @throws IOException If there is an error fetching the sha256 file + */ + void processDatabase(PasswordAuthentication auth, DatabaseConfiguration database) throws IOException { + final String name = database.name(); + logger.debug("Processing database [{}] for configuration [{}]", name, database.id()); + + final String sha256Url = downloadUrl(name, "tar.gz.sha256"); + final String tgzUrl = downloadUrl(name, "tar.gz"); + + String result = new String(httpClient.getBytes(auth, sha256Url), StandardCharsets.UTF_8).trim(); // this throws if the auth is bad + var matcher = CHECKSUM_PATTERN.matcher(result); + boolean match = matcher.matches(); + if (match == false) { + throw new RuntimeException("Unexpected sha256 response from [" + sha256Url + "]"); + } + final String sha256 = matcher.group(1); + // the name that comes from the enterprise downloader cluster state doesn't include the .mmdb extension, + // but the downloading and indexing of database code expects it to be there, so we add it on here before further processing + processDatabase(auth, name + ".mmdb", sha256, tgzUrl); + } + + /** + * This method fetches the tar.gz file for the given database from the Maxmind endpoint, then indexes that tar.gz + * file into the .geoip_databases Elasticsearch index, deleting any old versions of the database tar.gz from the index if they exist. + * + * @param auth The credentials to use to download from the Maxmind endpoint + * The name of the database to be downloaded from Maxmind and indexed into an Elasticsearch index + * @param sha256 The sha256 to compare to the computed sha256 of the downloaded tar.gz file + * @param url The URL for the Maxmind endpoint from which the database's tar.gz will be downloaded + */ + private void processDatabase(PasswordAuthentication auth, String name, String sha256, String url) { + Metadata metadata = state.getDatabases().getOrDefault(name, Metadata.EMPTY); + if (Objects.equals(metadata.sha256(), sha256)) { + updateTimestamp(name, metadata); + return; + } + logger.debug("downloading geoip database [{}]", name); + long start = System.currentTimeMillis(); + try (InputStream is = httpClient.get(auth, url)) { + int firstChunk = metadata.lastChunk() + 1; // if there is no metadata, then Metadata.EMPTY + 1 = 0 + Tuple tuple = indexChunks(name, is, firstChunk, MessageDigests.sha256(), sha256, start); + int lastChunk = tuple.v1(); + String md5 = tuple.v2(); + if (lastChunk > firstChunk) { + state = state.put(name, new Metadata(start, firstChunk, lastChunk - 1, md5, start, sha256)); + updateTaskState(); + logger.info("successfully downloaded geoip database [{}]", name); + deleteOldChunks(name, firstChunk); + } + } catch (Exception e) { + logger.error(() -> "error downloading geoip database [" + name + "]", e); + } + } + + // visible for testing + void deleteOldChunks(String name, int firstChunk) { + BoolQueryBuilder queryBuilder = new BoolQueryBuilder().filter(new MatchQueryBuilder("name", name)) + .filter(new RangeQueryBuilder("chunk").to(firstChunk, false)); + DeleteByQueryRequest request = new DeleteByQueryRequest(); + request.indices(DATABASES_INDEX); + request.setQuery(queryBuilder); + client.execute( + DeleteByQueryAction.INSTANCE, + request, + ActionListener.wrap(r -> {}, e -> logger.warn("could not delete old chunks for geoip database [" + name + "]", e)) + ); + } + + // visible for testing + protected void updateTimestamp(String name, Metadata old) { + logger.debug("geoip database [{}] is up to date, updated timestamp", name); + state = state.put( + name, + new Metadata(old.lastUpdate(), old.firstChunk(), old.lastChunk(), old.md5(), System.currentTimeMillis(), old.sha256()) + ); + updateTaskState(); + } + + void updateTaskState() { + PlainActionFuture> future = new PlainActionFuture<>(); + updatePersistentTaskState(state, future); + state = ((EnterpriseGeoIpTaskState) future.actionGet().getState()); + } + + // visible for testing + Tuple indexChunks( + String name, + InputStream is, + int chunk, + @Nullable MessageDigest digest, + String expectedChecksum, + long timestamp + ) throws IOException { + MessageDigest md5 = MessageDigests.md5(); + for (byte[] buf = getChunk(is); buf.length != 0; buf = getChunk(is)) { + md5.update(buf); + if (digest != null) { + digest.update(buf); + } + IndexRequest indexRequest = new IndexRequest(DATABASES_INDEX).id(name + "_" + chunk + "_" + timestamp) + .create(true) + .source(XContentType.SMILE, "name", name, "chunk", chunk, "data", buf); + client.index(indexRequest).actionGet(); + chunk++; + } + + // May take some time before automatic flush kicks in: + // (otherwise the translog will contain large documents for some time without good reason) + FlushRequest flushRequest = new FlushRequest(DATABASES_INDEX); + client.admin().indices().flush(flushRequest).actionGet(); + // Ensure that the chunk documents are visible: + RefreshRequest refreshRequest = new RefreshRequest(DATABASES_INDEX); + client.admin().indices().refresh(refreshRequest).actionGet(); + + String actualMd5 = MessageDigests.toHexString(md5.digest()); + String actualChecksum = digest == null ? actualMd5 : MessageDigests.toHexString(digest.digest()); + if (Objects.equals(expectedChecksum, actualChecksum) == false) { + throw new IOException("checksum mismatch, expected [" + expectedChecksum + "], actual [" + actualChecksum + "]"); + } + return Tuple.tuple(chunk, actualMd5); + } + + // visible for testing + static byte[] getChunk(InputStream is) throws IOException { + byte[] buf = new byte[MAX_CHUNK_SIZE]; + int chunkSize = 0; + while (chunkSize < MAX_CHUNK_SIZE) { + int read = is.read(buf, chunkSize, MAX_CHUNK_SIZE - chunkSize); + if (read == -1) { + break; + } + chunkSize += read; + } + if (chunkSize < MAX_CHUNK_SIZE) { + buf = Arrays.copyOf(buf, chunkSize); + } + return buf; + } + + /** + * Downloads the geoip databases now, and schedules them to be downloaded again after pollInterval. + */ + synchronized void runDownloader() { + // by the time we reach here, the state will never be null + assert this.state != null : "this.setState() is null. You need to call setState() before calling runDownloader()"; + + // there's a race condition between here and requestReschedule. originally this scheduleNextRun call was at the end of this + // block, but remember that updateDatabases can take seconds to run (it's downloading bytes from the internet), and so during the + // very first run there would be no future run scheduled to reschedule in requestReschedule. which meant that if you went from zero + // to N(>=2) databases in quick succession, then all but the first database wouldn't necessarily get downloaded, because the + // requestReschedule call in the EnterpriseGeoIpDownloaderTaskExecutor's clusterChanged wouldn't have a scheduled future run to + // reschedule. scheduling the next run at the beginning of this run means that there's a much smaller window (milliseconds?, rather + // than seconds) in which such a race could occur. technically there's a window here, still, but i think it's _greatly_ reduced. + scheduleNextRun(pollIntervalSupplier.get()); + // TODO regardless of the above comment, i like the idea of checking the lowest last-checked time and then running the math to get + // to the next interval from then -- maybe that's a neat future enhancement to add + + if (isCancelled() || isCompleted()) { + return; + } + try { + updateDatabases(); // n.b. this downloads bytes from the internet, it can take a while + } catch (Exception e) { + logger.error("exception during geoip databases update", e); + } + try { + cleanDatabases(); + } catch (Exception e) { + logger.error("exception during geoip databases cleanup", e); + } + } + + /** + * This method requests that the downloader be rescheduled to run immediately (presumably because a dynamic property supplied by + * pollIntervalSupplier or eagerDownloadSupplier has changed, or a pipeline with a geoip processor has been added). This method does + * nothing if this task is cancelled, completed, or has not yet been scheduled to run for the first time. It cancels any existing + * scheduled run. + */ + public void requestReschedule() { + if (isCancelled() || isCompleted()) { + return; + } + if (scheduled != null && scheduled.cancel()) { + scheduleNextRun(TimeValue.ZERO); + } + } + + private void cleanDatabases() { + List> expiredDatabases = state.getDatabases() + .entrySet() + .stream() + .filter(e -> e.getValue().isNewEnough(clusterService.state().metadata().settings()) == false) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList(); + expiredDatabases.forEach(e -> { + String name = e.v1(); + Metadata meta = e.v2(); + deleteOldChunks(name, meta.lastChunk() + 1); + state = state.put(name, new Metadata(meta.lastUpdate(), meta.firstChunk(), meta.lastChunk(), meta.md5(), meta.lastCheck() - 1)); + updateTaskState(); + }); + } + + @Override + protected void onCancelled() { + if (scheduled != null) { + scheduled.cancel(); + } + markAsCompleted(); + } + + private void scheduleNextRun(TimeValue time) { + if (threadPool.scheduler().isShutdown() == false) { + scheduled = threadPool.schedule(this::runDownloader, time, threadPool.generic()); + } + } + +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java new file mode 100644 index 0000000000000..8fc46fe157548 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTaskExecutor.java @@ -0,0 +1,257 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ElasticsearchException; +import org.elasticsearch.client.internal.Client; +import org.elasticsearch.client.internal.OriginSettingClient; +import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.cluster.ClusterStateListener; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.settings.SecureSetting; +import org.elasticsearch.common.settings.SecureSettings; +import org.elasticsearch.common.settings.SecureString; +import org.elasticsearch.common.settings.Setting; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.ingest.EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams; +import org.elasticsearch.ingest.IngestService; +import org.elasticsearch.persistent.AllocatedPersistentTask; +import org.elasticsearch.persistent.PersistentTaskState; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.persistent.PersistentTasksExecutor; +import org.elasticsearch.tasks.TaskId; +import org.elasticsearch.threadpool.ThreadPool; + +import java.io.IOException; +import java.io.InputStream; +import java.security.GeneralSecurityException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.concurrent.atomic.AtomicReference; + +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER; +import static org.elasticsearch.ingest.geoip.GeoIpDownloaderTaskExecutor.ENABLED_SETTING; +import static org.elasticsearch.ingest.geoip.GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING; + +public class EnterpriseGeoIpDownloaderTaskExecutor extends PersistentTasksExecutor + implements + ClusterStateListener { + private static final Logger logger = LogManager.getLogger(EnterpriseGeoIpDownloader.class); + + static final String MAXMIND_SETTINGS_PREFIX = "ingest.geoip.downloader.maxmind."; + + public static final Setting MAXMIND_LICENSE_KEY_SETTING = SecureSetting.secureString( + MAXMIND_SETTINGS_PREFIX + "license_key", + null + ); + + private final Client client; + private final HttpClient httpClient; + private final ClusterService clusterService; + private final ThreadPool threadPool; + private final Settings settings; + private volatile TimeValue pollInterval; + private final AtomicReference currentTask = new AtomicReference<>(); + + private volatile SecureSettings cachedSecureSettings; + + EnterpriseGeoIpDownloaderTaskExecutor(Client client, HttpClient httpClient, ClusterService clusterService, ThreadPool threadPool) { + super(ENTERPRISE_GEOIP_DOWNLOADER, threadPool.generic()); + this.client = new OriginSettingClient(client, IngestService.INGEST_ORIGIN); + this.httpClient = httpClient; + this.clusterService = clusterService; + this.threadPool = threadPool; + this.settings = clusterService.getSettings(); + this.pollInterval = POLL_INTERVAL_SETTING.get(settings); + + // do an initial load using the node settings + reload(clusterService.getSettings()); + } + + /** + * This method completes the initialization of the EnterpriseGeoIpDownloaderTaskExecutor by registering several listeners. + */ + public void init() { + clusterService.addListener(this); + clusterService.getClusterSettings().addSettingsUpdateConsumer(POLL_INTERVAL_SETTING, this::setPollInterval); + } + + private void setPollInterval(TimeValue pollInterval) { + if (Objects.equals(this.pollInterval, pollInterval) == false) { + this.pollInterval = pollInterval; + EnterpriseGeoIpDownloader currentDownloader = getCurrentTask(); + if (currentDownloader != null) { + currentDownloader.requestReschedule(); + } + } + } + + private HttpClient.PasswordAuthenticationHolder buildCredentials(final String username) { + final char[] passwordChars; + if (cachedSecureSettings.getSettingNames().contains(MAXMIND_LICENSE_KEY_SETTING.getKey())) { + passwordChars = cachedSecureSettings.getString(MAXMIND_LICENSE_KEY_SETTING.getKey()).getChars(); + } else { + passwordChars = null; + } + + // if the username is missing, empty, or blank, return null as 'no auth' + if (username == null || username.isEmpty() || username.isBlank()) { + return null; + } + + // likewise if the password chars array is missing or empty, return null as 'no auth' + if (passwordChars == null || passwordChars.length == 0) { + return null; + } + + return new HttpClient.PasswordAuthenticationHolder(username, passwordChars); + } + + @Override + protected EnterpriseGeoIpDownloader createTask( + long id, + String type, + String action, + TaskId parentTaskId, + PersistentTasksCustomMetadata.PersistentTask taskInProgress, + Map headers + ) { + return new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + id, + type, + action, + getDescription(taskInProgress), + parentTaskId, + headers, + () -> pollInterval, + this::buildCredentials + ); + } + + @Override + protected void nodeOperation(AllocatedPersistentTask task, EnterpriseGeoIpTaskParams params, PersistentTaskState state) { + EnterpriseGeoIpDownloader downloader = (EnterpriseGeoIpDownloader) task; + EnterpriseGeoIpTaskState geoIpTaskState = (state == null) ? EnterpriseGeoIpTaskState.EMPTY : (EnterpriseGeoIpTaskState) state; + downloader.setState(geoIpTaskState); + currentTask.set(downloader); + if (ENABLED_SETTING.get(clusterService.state().metadata().settings(), settings)) { + downloader.runDownloader(); + } + } + + public EnterpriseGeoIpDownloader getCurrentTask() { + return currentTask.get(); + } + + @Override + public void clusterChanged(ClusterChangedEvent event) { + EnterpriseGeoIpDownloader currentDownloader = getCurrentTask(); + if (currentDownloader != null) { + boolean hasGeoIpMetadataChanges = event.metadataChanged() + && event.changedCustomMetadataSet().contains(IngestGeoIpMetadata.TYPE); + if (hasGeoIpMetadataChanges) { + currentDownloader.requestReschedule(); // watching the cluster changed events to kick the thing off if it's not running + } + } + } + + public synchronized void reload(Settings settings) { + // `SecureSettings` are available here! cache them as they will be needed + // whenever dynamic cluster settings change and we have to rebuild the accounts + try { + this.cachedSecureSettings = extractSecureSettings(settings, List.of(MAXMIND_LICENSE_KEY_SETTING)); + } catch (GeneralSecurityException e) { + // rethrow as a runtime exception, there's logging higher up the call chain around ReloadablePlugin + throw new ElasticsearchException("Exception while reloading enterprise geoip download task executor", e); + } + } + + /** + * Extracts the {@link SecureSettings}` out of the passed in {@link Settings} object. The {@code Setting} argument has to have the + * {@code SecureSettings} open/available. Normally {@code SecureSettings} are available only under specific callstacks (eg. during node + * initialization or during a `reload` call). The returned copy can be reused freely as it will never be closed (this is a bit of + * cheating, but it is necessary in this specific circumstance). Only works for secure settings of type string (not file). + * + * @param source A {@code Settings} object with its {@code SecureSettings} open/available. + * @param securePluginSettings The list of settings to copy. + * @return A copy of the {@code SecureSettings} of the passed in {@code Settings} argument. + */ + private static SecureSettings extractSecureSettings(Settings source, List> securePluginSettings) + throws GeneralSecurityException { + // get the secure settings out + final SecureSettings sourceSecureSettings = Settings.builder().put(source, true).getSecureSettings(); + // filter and cache them... + final Map innerMap = new HashMap<>(); + if (sourceSecureSettings != null && securePluginSettings != null) { + for (final String settingKey : sourceSecureSettings.getSettingNames()) { + for (final Setting secureSetting : securePluginSettings) { + if (secureSetting.match(settingKey)) { + innerMap.put( + settingKey, + new SecureSettingValue( + sourceSecureSettings.getString(settingKey), + sourceSecureSettings.getSHA256Digest(settingKey) + ) + ); + } + } + } + } + return new SecureSettings() { + @Override + public boolean isLoaded() { + return true; + } + + @Override + public SecureString getString(String setting) { + return innerMap.get(setting).value(); + } + + @Override + public Set getSettingNames() { + return innerMap.keySet(); + } + + @Override + public InputStream getFile(String setting) { + throw new UnsupportedOperationException("A cached SecureSetting cannot be a file"); + } + + @Override + public byte[] getSHA256Digest(String setting) { + return innerMap.get(setting).sha256Digest(); + } + + @Override + public void close() throws IOException {} + + @Override + public void writeTo(StreamOutput out) throws IOException { + throw new UnsupportedOperationException("A cached SecureSetting cannot be serialized"); + } + }; + } + + /** + * A single-purpose record for the internal implementation of extractSecureSettings + */ + private record SecureSettingValue(SecureString value, byte[] sha256Digest) {} +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskState.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskState.java new file mode 100644 index 0000000000000..1dd6422fd388a --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskState.java @@ -0,0 +1,153 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.VersionedNamedWriteable; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.EnterpriseGeoIpTask; +import org.elasticsearch.ingest.geoip.GeoIpTaskState.Metadata; +import org.elasticsearch.persistent.PersistentTaskState; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.stream.Collectors; + +import static org.elasticsearch.ingest.geoip.GeoIpDownloader.GEOIP_DOWNLOADER; +import static org.elasticsearch.persistent.PersistentTasksCustomMetadata.getTaskWithId; +import static org.elasticsearch.xcontent.ConstructingObjectParser.constructorArg; + +class EnterpriseGeoIpTaskState implements PersistentTaskState, VersionedNamedWriteable { + + private static final ParseField DATABASES = new ParseField("databases"); + + static final EnterpriseGeoIpTaskState EMPTY = new EnterpriseGeoIpTaskState(Map.of()); + + @SuppressWarnings("unchecked") + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + GEOIP_DOWNLOADER, + true, + args -> { + List> databases = (List>) args[0]; + return new EnterpriseGeoIpTaskState(databases.stream().collect(Collectors.toMap(Tuple::v1, Tuple::v2))); + } + ); + + static { + PARSER.declareNamedObjects(constructorArg(), (p, c, name) -> Tuple.tuple(name, Metadata.fromXContent(p)), DATABASES); + } + + public static EnterpriseGeoIpTaskState fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + private final Map databases; + + EnterpriseGeoIpTaskState(Map databases) { + this.databases = Map.copyOf(databases); + } + + EnterpriseGeoIpTaskState(StreamInput input) throws IOException { + databases = input.readImmutableMap( + in -> new Metadata(in.readLong(), in.readVInt(), in.readVInt(), in.readString(), in.readLong(), in.readOptionalString()) + ); + } + + public EnterpriseGeoIpTaskState put(String name, Metadata metadata) { + HashMap newDatabases = new HashMap<>(databases); + newDatabases.put(name, metadata); + return new EnterpriseGeoIpTaskState(newDatabases); + } + + public EnterpriseGeoIpTaskState remove(String name) { + HashMap newDatabases = new HashMap<>(databases); + newDatabases.remove(name); + return new EnterpriseGeoIpTaskState(newDatabases); + } + + public Map getDatabases() { + return databases; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + EnterpriseGeoIpTaskState that = (EnterpriseGeoIpTaskState) o; + return databases.equals(that.databases); + } + + @Override + public int hashCode() { + return Objects.hash(databases); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + { + builder.startObject("databases"); + for (Map.Entry e : databases.entrySet()) { + builder.field(e.getKey(), e.getValue()); + } + builder.endObject(); + } + builder.endObject(); + return builder; + } + + @Override + public String getWriteableName() { + return "enterprise-geoip-downloader"; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER_BACKPORT_8_15; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeMap(databases, (o, v) -> { + o.writeLong(v.lastUpdate()); + o.writeVInt(v.firstChunk()); + o.writeVInt(v.lastChunk()); + o.writeString(v.md5()); + o.writeLong(v.lastCheck()); + o.writeOptionalString(v.sha256()); + }); + } + + /** + * Retrieves the geoip downloader's task state from the cluster state. This may return null in some circumstances, + * for example if the geoip downloader task hasn't been created yet (which it wouldn't be if it's disabled). + * + * @param state the cluster state to read the task state from + * @return the geoip downloader's task state or null if there is not a state to read + */ + @Nullable + static EnterpriseGeoIpTaskState getEnterpriseGeoIpTaskState(ClusterState state) { + PersistentTasksCustomMetadata.PersistentTask task = getTaskWithId(state, EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER); + return (task == null) ? null : (EnterpriseGeoIpTaskState) task.getState(); + } + +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java index 895c9315d2325..ee6f2f16f051b 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloader.java @@ -24,6 +24,7 @@ import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.Tuple; import org.elasticsearch.index.query.BoolQueryBuilder; import org.elasticsearch.index.query.MatchQueryBuilder; import org.elasticsearch.index.query.RangeQueryBuilder; @@ -170,23 +171,28 @@ private List fetchDatabasesOverview() throws IOException { } // visible for testing - void processDatabase(Map databaseInfo) { + void processDatabase(final Map databaseInfo) { String name = databaseInfo.get("name").toString().replace(".tgz", "") + ".mmdb"; String md5 = (String) databaseInfo.get("md5_hash"); - if (state.contains(name) && Objects.equals(md5, state.get(name).md5())) { - updateTimestamp(name, state.get(name)); - return; - } - logger.debug("downloading geoip database [{}]", name); String url = databaseInfo.get("url").toString(); if (url.startsWith("http") == false) { // relative url, add it after last slash (i.e. resolve sibling) or at the end if there's no slash after http[s]:// int lastSlash = endpoint.substring(8).lastIndexOf('/'); url = (lastSlash != -1 ? endpoint.substring(0, lastSlash + 8) : endpoint) + "/" + url; } + processDatabase(name, md5, url); + } + + private void processDatabase(final String name, final String md5, final String url) { + Metadata metadata = state.getDatabases().getOrDefault(name, Metadata.EMPTY); + if (Objects.equals(metadata.md5(), md5)) { + updateTimestamp(name, metadata); + return; + } + logger.debug("downloading geoip database [{}]", name); long start = System.currentTimeMillis(); try (InputStream is = httpClient.get(url)) { - int firstChunk = state.contains(name) ? state.get(name).lastChunk() + 1 : 0; + int firstChunk = metadata.lastChunk() + 1; // if there is no metadata, then Metadata.EMPTY.lastChunk() + 1 = 0 int lastChunk = indexChunks(name, is, firstChunk, md5, start); if (lastChunk > firstChunk) { state = state.put(name, new Metadata(start, firstChunk, lastChunk - 1, md5, start)); @@ -313,22 +319,20 @@ public void requestReschedule() { } private void cleanDatabases() { - long expiredDatabases = state.getDatabases() + List> expiredDatabases = state.getDatabases() .entrySet() .stream() - .filter(e -> e.getValue().isValid(clusterService.state().metadata().settings()) == false) - .peek(e -> { - String name = e.getKey(); - Metadata meta = e.getValue(); - deleteOldChunks(name, meta.lastChunk() + 1); - state = state.put( - name, - new Metadata(meta.lastUpdate(), meta.firstChunk(), meta.lastChunk(), meta.md5(), meta.lastCheck() - 1) - ); - updateTaskState(); - }) - .count(); - stats = stats.expiredDatabases((int) expiredDatabases); + .filter(e -> e.getValue().isNewEnough(clusterService.state().metadata().settings()) == false) + .map(entry -> Tuple.tuple(entry.getKey(), entry.getValue())) + .toList(); + expiredDatabases.forEach(e -> { + String name = e.v1(); + Metadata meta = e.v2(); + deleteOldChunks(name, meta.lastChunk() + 1); + state = state.put(name, new Metadata(meta.lastUpdate(), meta.firstChunk(), meta.lastChunk(), meta.md5(), meta.lastCheck() - 1)); + updateTaskState(); + }); + stats = stats.expiredDatabases(expiredDatabases.size()); } @Override diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java index 09ac488f96e2d..3f89bb1dd5c50 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTaskExecutor.java @@ -217,7 +217,7 @@ public void clusterChanged(ClusterChangedEvent event) { } boolean hasIndicesChanges = event.previousState().metadata().indices().equals(event.state().metadata().indices()) == false; - boolean hasIngestPipelineChanges = event.changedCustomMetadataSet().contains(IngestMetadata.TYPE); + boolean hasIngestPipelineChanges = event.metadataChanged() && event.changedCustomMetadataSet().contains(IngestMetadata.TYPE); if (hasIngestPipelineChanges || hasIndicesChanges) { boolean newAtLeastOneGeoipProcessor = hasAtLeastOneGeoipProcessor(event.state()); diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java index e39705a71f56c..82b9e930280b7 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpProcessor.java @@ -592,7 +592,7 @@ private Map retrieveEnterpriseGeoData(GeoIpDatabase geoIpDatabas } case ISP_ORGANIZATION_NAME -> { if (ispOrganization != null) { - geoData.put("isp_organization", ispOrganization); + geoData.put("isp_organization_name", ispOrganization); } } case MOBILE_COUNTRY_CODE -> { @@ -660,7 +660,7 @@ private Map retrieveIspGeoData(GeoIpDatabase geoIpDatabase, Inet } case ISP_ORGANIZATION_NAME -> { if (ispOrganization != null) { - geoData.put("isp_organization", ispOrganization); + geoData.put("isp_organization_name", ispOrganization); } } case MOBILE_COUNTRY_CODE -> { diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java index d55f517b46e24..93dc345a80a2f 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/GeoIpTaskState.java @@ -42,6 +42,10 @@ class GeoIpTaskState implements PersistentTaskState, VersionedNamedWriteable { + private static boolean includeSha256(TransportVersion version) { + return version.onOrAfter(TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER_BACKPORT_8_15); + } + private static final ParseField DATABASES = new ParseField("databases"); static final GeoIpTaskState EMPTY = new GeoIpTaskState(Map.of()); @@ -71,7 +75,16 @@ public static GeoIpTaskState fromXContent(XContentParser parser) throws IOExcept } GeoIpTaskState(StreamInput input) throws IOException { - databases = input.readImmutableMap(in -> new Metadata(in.readLong(), in.readVInt(), in.readVInt(), in.readString(), in.readLong())); + databases = input.readImmutableMap( + in -> new Metadata( + in.readLong(), + in.readVInt(), + in.readVInt(), + in.readString(), + in.readLong(), + includeSha256(in.getTransportVersion()) ? input.readOptionalString() : null + ) + ); } public GeoIpTaskState put(String name, Metadata metadata) { @@ -84,14 +97,6 @@ public Map getDatabases() { return databases; } - public boolean contains(String name) { - return databases.containsKey(name); - } - - public Metadata get(String name) { - return databases.get(name); - } - @Override public boolean equals(Object o) { if (this == o) return true; @@ -137,17 +142,29 @@ public void writeTo(StreamOutput out) throws IOException { o.writeVInt(v.lastChunk); o.writeString(v.md5); o.writeLong(v.lastCheck); + if (includeSha256(o.getTransportVersion())) { + o.writeOptionalString(v.sha256); + } }); } - record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long lastCheck) implements ToXContentObject { + record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long lastCheck, @Nullable String sha256) + implements + ToXContentObject { - static final String NAME = GEOIP_DOWNLOADER + "-metadata"; + /** + * An empty Metadata object useful for getOrDefault -type calls. Crucially, the 'lastChunk' is -1, so it's safe to use + * with logic that says the new firstChunk is the old lastChunk + 1. + */ + static Metadata EMPTY = new Metadata(-1, -1, -1, "", -1, null); + + private static final String NAME = GEOIP_DOWNLOADER + "-metadata"; private static final ParseField LAST_CHECK = new ParseField("last_check"); private static final ParseField LAST_UPDATE = new ParseField("last_update"); private static final ParseField FIRST_CHUNK = new ParseField("first_chunk"); private static final ParseField LAST_CHUNK = new ParseField("last_chunk"); private static final ParseField MD5 = new ParseField("md5"); + private static final ParseField SHA256 = new ParseField("sha256"); private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( NAME, @@ -157,7 +174,8 @@ record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long (int) args[1], (int) args[2], (String) args[3], - (long) (args[4] == null ? args[0] : args[4]) + (long) (args[4] == null ? args[0] : args[4]), + (String) args[5] ) ); @@ -167,6 +185,7 @@ record Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long PARSER.declareInt(constructorArg(), LAST_CHUNK); PARSER.declareString(constructorArg(), MD5); PARSER.declareLong(optionalConstructorArg(), LAST_CHECK); + PARSER.declareString(optionalConstructorArg(), SHA256); } public static Metadata fromXContent(XContentParser parser) { @@ -181,11 +200,15 @@ public static Metadata fromXContent(XContentParser parser) { Objects.requireNonNull(md5); } + Metadata(long lastUpdate, int firstChunk, int lastChunk, String md5, long lastCheck) { + this(lastUpdate, firstChunk, lastChunk, md5, lastCheck, null); + } + public boolean isCloseToExpiration() { return Instant.ofEpochMilli(lastCheck).isBefore(Instant.now().minus(25, ChronoUnit.DAYS)); } - public boolean isValid(Settings settings) { + public boolean isNewEnough(Settings settings) { TimeValue valid = settings.getAsTime("ingest.geoip.database_validity", TimeValue.timeValueDays(30)); return Instant.ofEpochMilli(lastCheck).isAfter(Instant.now().minus(valid.getMillis(), ChronoUnit.MILLIS)); } @@ -199,6 +222,9 @@ public XContentBuilder toXContent(XContentBuilder builder, Params params) throws builder.field(FIRST_CHUNK.getPreferredName(), firstChunk); builder.field(LAST_CHUNK.getPreferredName(), lastChunk); builder.field(MD5.getPreferredName(), md5); + if (sha256 != null) { // only serialize if not null, for prettiness reasons + builder.field(SHA256.getPreferredName(), sha256); + } } builder.endObject(); return builder; diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java index 8efc4dc2e74bd..2f6bd6ef20fd0 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/HttpClient.java @@ -24,6 +24,7 @@ import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; +import java.util.Arrays; import java.util.Objects; import static java.net.HttpURLConnection.HTTP_MOVED_PERM; @@ -34,6 +35,31 @@ class HttpClient { + /** + * A PasswordAuthenticationHolder is just a wrapper around a PasswordAuthentication to implement AutoCloseable. + * This construction makes it possible to use a PasswordAuthentication in a try-with-resources statement, which + * makes it easier to ensure cleanup of the PasswordAuthentication is performed after it's finished being used. + */ + static final class PasswordAuthenticationHolder implements AutoCloseable { + private PasswordAuthentication auth; + + PasswordAuthenticationHolder(String username, char[] passwordChars) { + this.auth = new PasswordAuthentication(username, passwordChars); // clones the passed-in chars + } + + public PasswordAuthentication get() { + Objects.requireNonNull(auth); + return auth; + } + + @Override + public void close() { + final PasswordAuthentication clear = this.auth; + this.auth = null; // set to null and then clear it + Arrays.fill(clear.getPassword(), '\0'); // zero out the password chars + } + } + // a private sentinel value for representing the idea that there's no auth for some request. // this allows us to have a not-null requirement on the methods that do accept an auth. // if you don't want auth, then don't use those methods. ;) diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadata.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadata.java new file mode 100644 index 0000000000000..b6bfbf94fa8f7 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadata.java @@ -0,0 +1,157 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.TransportVersion; +import org.elasticsearch.TransportVersions; +import org.elasticsearch.cluster.Diff; +import org.elasticsearch.cluster.DiffableUtils; +import org.elasticsearch.cluster.NamedDiff; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.common.collect.Iterators; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.xcontent.ChunkedToXContentHelper; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContent; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.EnumSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.function.Function; +import java.util.stream.Collectors; + +/** + * Holds the ingest-geoip databases that are available in the cluster state. + */ +public final class IngestGeoIpMetadata implements Metadata.Custom { + + public static final String TYPE = "ingest_geoip"; + private static final ParseField DATABASES_FIELD = new ParseField("databases"); + + public static final IngestGeoIpMetadata EMPTY = new IngestGeoIpMetadata(Map.of()); + + @SuppressWarnings("unchecked") + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "ingest_geoip_metadata", + a -> new IngestGeoIpMetadata( + ((List) a[0]).stream().collect(Collectors.toMap((m) -> m.database().id(), Function.identity())) + ) + ); + static { + PARSER.declareNamedObjects(ConstructingObjectParser.constructorArg(), (p, c, n) -> DatabaseConfigurationMetadata.parse(p, n), v -> { + throw new IllegalArgumentException("ordered " + DATABASES_FIELD.getPreferredName() + " are not supported"); + }, DATABASES_FIELD); + } + + private final Map databases; + + public IngestGeoIpMetadata(Map databases) { + this.databases = Map.copyOf(databases); + } + + @Override + public String getWriteableName() { + return TYPE; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER_BACKPORT_8_15; + } + + public Map getDatabases() { + return databases; + } + + public IngestGeoIpMetadata(StreamInput in) throws IOException { + this.databases = in.readMap(StreamInput::readString, DatabaseConfigurationMetadata::new); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeMap(databases, StreamOutput::writeWriteable); + } + + public static IngestGeoIpMetadata fromXContent(XContentParser parser) throws IOException { + return PARSER.parse(parser, null); + } + + @Override + public Iterator toXContentChunked(ToXContent.Params ignored) { + return Iterators.concat(ChunkedToXContentHelper.xContentValuesMap(DATABASES_FIELD.getPreferredName(), databases)); + } + + @Override + public EnumSet context() { + return Metadata.ALL_CONTEXTS; + } + + @Override + public Diff diff(Metadata.Custom before) { + return new GeoIpMetadataDiff((IngestGeoIpMetadata) before, this); + } + + static class GeoIpMetadataDiff implements NamedDiff { + + final Diff> databases; + + GeoIpMetadataDiff(IngestGeoIpMetadata before, IngestGeoIpMetadata after) { + this.databases = DiffableUtils.diff(before.databases, after.databases, DiffableUtils.getStringKeySerializer()); + } + + GeoIpMetadataDiff(StreamInput in) throws IOException { + databases = DiffableUtils.readJdkMapDiff( + in, + DiffableUtils.getStringKeySerializer(), + DatabaseConfigurationMetadata::new, + DatabaseConfigurationMetadata::readDiffFrom + ); + } + + @Override + public Metadata.Custom apply(Metadata.Custom part) { + return new IngestGeoIpMetadata(databases.apply(((IngestGeoIpMetadata) part).databases)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + databases.writeTo(out); + } + + @Override + public String getWriteableName() { + return TYPE; + } + + @Override + public TransportVersion getMinimalSupportedVersion() { + return TransportVersions.ENTERPRISE_GEOIP_DOWNLOADER_BACKPORT_8_15; + } + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + IngestGeoIpMetadata that = (IngestGeoIpMetadata) o; + return Objects.equals(databases, that.databases); + } + + @Override + public int hashCode() { + return Objects.hash(databases); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java index 9d0f9848d97b6..e606688ad60a0 100644 --- a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/IngestGeoIpPlugin.java @@ -12,8 +12,10 @@ import org.elasticsearch.action.ActionRequest; import org.elasticsearch.action.ActionResponse; import org.elasticsearch.client.internal.Client; +import org.elasticsearch.cluster.NamedDiff; import org.elasticsearch.cluster.metadata.IndexMetadata; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.metadata.Metadata; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.io.stream.NamedWriteableRegistry; @@ -25,8 +27,18 @@ import org.elasticsearch.common.settings.SettingsModule; import org.elasticsearch.features.NodeFeature; import org.elasticsearch.indices.SystemIndexDescriptor; +import org.elasticsearch.ingest.EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams; import org.elasticsearch.ingest.IngestService; import org.elasticsearch.ingest.Processor; +import org.elasticsearch.ingest.geoip.direct.DeleteDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.GetDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.RestDeleteDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.RestGetDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.RestPutDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.TransportDeleteDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.TransportGetDatabaseConfigurationAction; +import org.elasticsearch.ingest.geoip.direct.TransportPutDatabaseConfigurationAction; import org.elasticsearch.ingest.geoip.stats.GeoIpDownloaderStats; import org.elasticsearch.ingest.geoip.stats.GeoIpStatsAction; import org.elasticsearch.ingest.geoip.stats.GeoIpStatsTransportAction; @@ -38,6 +50,7 @@ import org.elasticsearch.plugins.IngestPlugin; import org.elasticsearch.plugins.PersistentTaskPlugin; import org.elasticsearch.plugins.Plugin; +import org.elasticsearch.plugins.ReloadablePlugin; import org.elasticsearch.plugins.SystemIndexPlugin; import org.elasticsearch.rest.RestController; import org.elasticsearch.rest.RestHandler; @@ -57,13 +70,21 @@ import java.util.function.Supplier; import static org.elasticsearch.index.mapper.MapperService.SINGLE_MAPPING_NAME; +import static org.elasticsearch.ingest.EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER; import static org.elasticsearch.ingest.IngestService.INGEST_ORIGIN; import static org.elasticsearch.ingest.geoip.GeoIpDownloader.DATABASES_INDEX; import static org.elasticsearch.ingest.geoip.GeoIpDownloader.DATABASES_INDEX_PATTERN; import static org.elasticsearch.ingest.geoip.GeoIpDownloader.GEOIP_DOWNLOADER; import static org.elasticsearch.xcontent.XContentFactory.jsonBuilder; -public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, SystemIndexPlugin, Closeable, PersistentTaskPlugin, ActionPlugin { +public class IngestGeoIpPlugin extends Plugin + implements + IngestPlugin, + SystemIndexPlugin, + Closeable, + PersistentTaskPlugin, + ActionPlugin, + ReloadablePlugin { public static final Setting CACHE_SIZE = Setting.longSetting("ingest.geoip.cache_size", 1000, 0, Setting.Property.NodeScope); private static final int GEOIP_INDEX_MAPPINGS_VERSION = 1; /** @@ -78,6 +99,7 @@ public class IngestGeoIpPlugin extends Plugin implements IngestPlugin, SystemInd private final SetOnce ingestService = new SetOnce<>(); private final SetOnce databaseRegistry = new SetOnce<>(); private GeoIpDownloaderTaskExecutor geoIpDownloaderTaskExecutor; + private EnterpriseGeoIpDownloaderTaskExecutor enterpriseGeoIpDownloaderTaskExecutor; @Override public List> getSettings() { @@ -86,7 +108,8 @@ public List> getSettings() { GeoIpDownloaderTaskExecutor.EAGER_DOWNLOAD_SETTING, GeoIpDownloaderTaskExecutor.ENABLED_SETTING, GeoIpDownloader.ENDPOINT_SETTING, - GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING + GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING, + EnterpriseGeoIpDownloaderTaskExecutor.MAXMIND_LICENSE_KEY_SETTING ); } @@ -123,7 +146,16 @@ public Collection createComponents(PluginServices services) { services.threadPool() ); geoIpDownloaderTaskExecutor.init(); - return List.of(databaseRegistry.get(), geoIpDownloaderTaskExecutor); + + enterpriseGeoIpDownloaderTaskExecutor = new EnterpriseGeoIpDownloaderTaskExecutor( + services.client(), + new HttpClient(), + services.clusterService(), + services.threadPool() + ); + enterpriseGeoIpDownloaderTaskExecutor.init(); + + return List.of(databaseRegistry.get(), geoIpDownloaderTaskExecutor, enterpriseGeoIpDownloaderTaskExecutor); } @Override @@ -139,12 +171,17 @@ public List> getPersistentTasksExecutor( SettingsModule settingsModule, IndexNameExpressionResolver expressionResolver ) { - return List.of(geoIpDownloaderTaskExecutor); + return List.of(geoIpDownloaderTaskExecutor, enterpriseGeoIpDownloaderTaskExecutor); } @Override public List> getActions() { - return List.of(new ActionHandler<>(GeoIpStatsAction.INSTANCE, GeoIpStatsTransportAction.class)); + return List.of( + new ActionHandler<>(GeoIpStatsAction.INSTANCE, GeoIpStatsTransportAction.class), + new ActionHandler<>(GetDatabaseConfigurationAction.INSTANCE, TransportGetDatabaseConfigurationAction.class), + new ActionHandler<>(DeleteDatabaseConfigurationAction.INSTANCE, TransportDeleteDatabaseConfigurationAction.class), + new ActionHandler<>(PutDatabaseConfigurationAction.INSTANCE, TransportPutDatabaseConfigurationAction.class) + ); } @Override @@ -159,22 +196,41 @@ public List getRestHandlers( Supplier nodesInCluster, Predicate clusterSupportsFeature ) { - return List.of(new RestGeoIpStatsAction()); + return List.of( + new RestGeoIpStatsAction(), + new RestGetDatabaseConfigurationAction(), + new RestDeleteDatabaseConfigurationAction(), + new RestPutDatabaseConfigurationAction() + ); } @Override public List getNamedXContent() { return List.of( new NamedXContentRegistry.Entry(PersistentTaskParams.class, new ParseField(GEOIP_DOWNLOADER), GeoIpTaskParams::fromXContent), - new NamedXContentRegistry.Entry(PersistentTaskState.class, new ParseField(GEOIP_DOWNLOADER), GeoIpTaskState::fromXContent) + new NamedXContentRegistry.Entry(PersistentTaskState.class, new ParseField(GEOIP_DOWNLOADER), GeoIpTaskState::fromXContent), + new NamedXContentRegistry.Entry( + PersistentTaskParams.class, + new ParseField(ENTERPRISE_GEOIP_DOWNLOADER), + EnterpriseGeoIpTaskParams::fromXContent + ), + new NamedXContentRegistry.Entry( + PersistentTaskState.class, + new ParseField(ENTERPRISE_GEOIP_DOWNLOADER), + EnterpriseGeoIpTaskState::fromXContent + ) ); } @Override public List getNamedWriteables() { return List.of( + new NamedWriteableRegistry.Entry(Metadata.Custom.class, IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata::new), + new NamedWriteableRegistry.Entry(NamedDiff.class, IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.GeoIpMetadataDiff::new), new NamedWriteableRegistry.Entry(PersistentTaskState.class, GEOIP_DOWNLOADER, GeoIpTaskState::new), new NamedWriteableRegistry.Entry(PersistentTaskParams.class, GEOIP_DOWNLOADER, GeoIpTaskParams::new), + new NamedWriteableRegistry.Entry(PersistentTaskState.class, ENTERPRISE_GEOIP_DOWNLOADER, EnterpriseGeoIpTaskState::new), + new NamedWriteableRegistry.Entry(PersistentTaskParams.class, ENTERPRISE_GEOIP_DOWNLOADER, EnterpriseGeoIpTaskParams::new), new NamedWriteableRegistry.Entry(Task.Status.class, GEOIP_DOWNLOADER, GeoIpDownloaderStats::new) ); } @@ -235,4 +291,9 @@ private static XContentBuilder mappings() { throw new UncheckedIOException("Failed to build mappings for " + DATABASES_INDEX, e); } } + + @Override + public void reload(Settings settings) { + enterpriseGeoIpDownloaderTaskExecutor.reload(settings); + } } diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java new file mode 100644 index 0000000000000..0a43d7a2d830b --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfiguration.java @@ -0,0 +1,209 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.cluster.metadata.MetadataCreateIndexService; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Objects; +import java.util.Set; +import java.util.regex.Pattern; + +/** + * A database configuration is an identified (has an id) configuration of a named geoip location database to download, + * and the identifying information/configuration to download the named database from some database provider. + *

+ * That is, it has an id e.g. "my_db_config_1" and it says "download the file named XXXX from SomeCompany, and here's the + * magic token to use to do that." + */ +public record DatabaseConfiguration(String id, String name, Maxmind maxmind) implements Writeable, ToXContentObject { + + // id is a user selected signifier like 'my_domain_db' + // name is the name of a file that can be downloaded (like 'GeoIP2-Domain') + + // a configuration will have a 'type' like "maxmind", and that might have some more details, + // for now, though the important thing is that the json has to have it even though we don't model it meaningfully in this class + + public DatabaseConfiguration { + // these are invariants, not actual validation + Objects.requireNonNull(id); + Objects.requireNonNull(name); + Objects.requireNonNull(maxmind); + } + + /** + * An alphanumeric, followed by 0-126 alphanumerics, dashes, or underscores. That is, 1-127 alphanumerics, dashes, or underscores, + * but a leading dash or underscore isn't allowed (we're reserving leading dashes and underscores [and other odd characters] for + * Elastic and the future). + */ + private static final Pattern ID_PATTERN = Pattern.compile("\\p{Alnum}[_\\-\\p{Alnum}]{0,126}"); + + public static final Set MAXMIND_NAMES = Set.of( + "GeoIP2-Anonymous-IP", + "GeoIP2-City", + "GeoIP2-Connection-Type", + "GeoIP2-Country", + "GeoIP2-Domain", + "GeoIP2-Enterprise", + "GeoIP2-ISP" + + // in order to prevent a conflict between the (ordinary) geoip downloader and the enterprise geoip downloader, + // the enterprise geoip downloader is limited only to downloading the commercial files that the (ordinary) geoip downloader + // doesn't support out of the box -- in the future if we would like to relax this constraint, then we'll need to resolve that + // conflict at the same time. + + // "GeoLite2-ASN", + // "GeoLite2-City", + // "GeoLite2-Country" + ); + + private static final ParseField NAME = new ParseField("name"); + private static final ParseField MAXMIND = new ParseField("maxmind"); + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "database", + false, + (a, id) -> { + String name = (String) a[0]; + Maxmind maxmind = (Maxmind) a[1]; + return new DatabaseConfiguration(id, name, maxmind); + } + ); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), NAME); + PARSER.declareObject(ConstructingObjectParser.constructorArg(), (parser, id) -> Maxmind.PARSER.apply(parser, null), MAXMIND); + } + + public DatabaseConfiguration(StreamInput in) throws IOException { + this(in.readString(), in.readString(), new Maxmind(in)); + } + + public static DatabaseConfiguration parse(XContentParser parser, String id) { + return PARSER.apply(parser, id); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(id); + out.writeString(name); + maxmind.writeTo(out); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("name", name); + builder.field("maxmind", maxmind); + builder.endObject(); + return builder; + } + + /** + * An id is intended to be alphanumerics, dashes, and underscores (only), but we're reserving leading dashes and underscores for + * ourselves in the future, that is, they're not for the ones that users can PUT. + */ + static void validateId(String id) throws IllegalArgumentException { + if (Strings.isNullOrEmpty(id)) { + throw new IllegalArgumentException("invalid database configuration id [" + id + "]: must not be null or empty"); + } + MetadataCreateIndexService.validateIndexOrAliasName( + id, + (id1, description) -> new IllegalArgumentException("invalid database configuration id [" + id1 + "]: " + description) + ); + int byteCount = id.getBytes(StandardCharsets.UTF_8).length; + if (byteCount > 127) { + throw new IllegalArgumentException( + "invalid database configuration id [" + id + "]: id is too long, (" + byteCount + " > " + 127 + ")" + ); + } + if (ID_PATTERN.matcher(id).matches() == false) { + throw new IllegalArgumentException( + "invalid database configuration id [" + + id + + "]: id doesn't match required rules (alphanumerics, dashes, and underscores, only)" + ); + } + } + + public ActionRequestValidationException validate() { + ActionRequestValidationException err = new ActionRequestValidationException(); + + // how do we cross the id validation divide here? or do we? it seems unfortunate to not invoke it at all. + + // name validation + if (Strings.hasText(name) == false) { + err.addValidationError("invalid name [" + name + "]: cannot be empty"); + } + + if (MAXMIND_NAMES.contains(name) == false) { + err.addValidationError("invalid name [" + name + "]: must be a supported name ([" + MAXMIND_NAMES + "])"); + } + + // important: the name must be unique across all configurations of this same type, + // but we validate that in the cluster state update, not here. + try { + validateId(id); + } catch (IllegalArgumentException e) { + err.addValidationError(e.getMessage()); + } + return err.validationErrors().isEmpty() ? null : err; + } + + public record Maxmind(String accountId) implements Writeable, ToXContentObject { + + public Maxmind { + // this is an invariant, not actual validation + Objects.requireNonNull(accountId); + } + + private static final ParseField ACCOUNT_ID = new ParseField("account_id"); + + private static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>("database", false, (a, id) -> { + String accountId = (String) a[0]; + return new Maxmind(accountId); + }); + + static { + PARSER.declareString(ConstructingObjectParser.constructorArg(), ACCOUNT_ID); + } + + public Maxmind(StreamInput in) throws IOException { + this(in.readString()); + } + + public static Maxmind parse(XContentParser parser) { + return PARSER.apply(parser, null); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeString(accountId); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.field("account_id", accountId); + builder.endObject(); + return builder; + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadata.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadata.java new file mode 100644 index 0000000000000..574f97e4c5e64 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadata.java @@ -0,0 +1,84 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.cluster.Diff; +import org.elasticsearch.cluster.SimpleDiffable; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.xcontent.ConstructingObjectParser; +import org.elasticsearch.xcontent.ParseField; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; + +/** + * {@code DatabaseConfigurationMetadata} encapsulates a {@link DatabaseConfiguration} as well as + * the additional meta information like version (a monotonically incrementing number), and last modified date. + */ +public record DatabaseConfigurationMetadata(DatabaseConfiguration database, long version, long modifiedDate) + implements + SimpleDiffable, + ToXContentObject { + + public static final ParseField DATABASE = new ParseField("database"); + public static final ParseField VERSION = new ParseField("version"); + public static final ParseField MODIFIED_DATE_MILLIS = new ParseField("modified_date_millis"); + public static final ParseField MODIFIED_DATE = new ParseField("modified_date"); + // later, things like this: + // static final ParseField LAST_SUCCESS = new ParseField("last_success"); + // static final ParseField LAST_FAILURE = new ParseField("last_failure"); + + public static final ConstructingObjectParser PARSER = new ConstructingObjectParser<>( + "database_metadata", + true, + a -> { + DatabaseConfiguration database = (DatabaseConfiguration) a[0]; + return new DatabaseConfigurationMetadata(database, (long) a[1], (long) a[2]); + } + ); + static { + PARSER.declareObject(ConstructingObjectParser.constructorArg(), DatabaseConfiguration::parse, DATABASE); + PARSER.declareLong(ConstructingObjectParser.constructorArg(), VERSION); + PARSER.declareLong(ConstructingObjectParser.constructorArg(), MODIFIED_DATE_MILLIS); + } + + public static DatabaseConfigurationMetadata parse(XContentParser parser, String name) { + return PARSER.apply(parser, name); + } + + public DatabaseConfigurationMetadata(StreamInput in) throws IOException { + this(new DatabaseConfiguration(in), in.readVLong(), in.readVLong()); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + // this is cluster state serialization, the id is implicit and doesn't need to included here + // (we'll be a in a json map where the id is the key) + builder.startObject(); + builder.field(VERSION.getPreferredName(), version); + builder.timeField(MODIFIED_DATE_MILLIS.getPreferredName(), MODIFIED_DATE.getPreferredName(), modifiedDate); + builder.field(DATABASE.getPreferredName(), database); + builder.endObject(); + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + database.writeTo(out); + out.writeVLong(version); + out.writeVLong(modifiedDate); + } + + public static Diff readDiffFrom(StreamInput in) throws IOException { + return SimpleDiffable.readDiffFrom(DatabaseConfigurationMetadata::new, in); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DeleteDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DeleteDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..843cc986c47e7 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/DeleteDatabaseConfigurationAction.java @@ -0,0 +1,70 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.TimeValue; + +import java.io.IOException; +import java.util.Objects; + +public class DeleteDatabaseConfigurationAction extends ActionType { + public static final DeleteDatabaseConfigurationAction INSTANCE = new DeleteDatabaseConfigurationAction(); + public static final String NAME = "cluster:admin/ingest/geoip/database/delete"; + + protected DeleteDatabaseConfigurationAction() { + super(NAME); + } + + public static class Request extends AcknowledgedRequest { + + private final String databaseId; + + public Request(StreamInput in) throws IOException { + super(in); + databaseId = in.readString(); + } + + public Request(TimeValue masterNodeTimeout, TimeValue ackTimeout, String databaseId) { + super(masterNodeTimeout, ackTimeout); + this.databaseId = Objects.requireNonNull(databaseId, "id may not be null"); + } + + public String getDatabaseId() { + return this.databaseId; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(databaseId); + } + + @Override + public int hashCode() { + return databaseId.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Request other = (Request) obj; + return Objects.equals(databaseId, other.databaseId); + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..546c0c2df821d --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/GetDatabaseConfigurationAction.java @@ -0,0 +1,142 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xcontent.ToXContentObject; +import org.elasticsearch.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.DATABASE; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.MODIFIED_DATE; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.MODIFIED_DATE_MILLIS; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata.VERSION; + +public class GetDatabaseConfigurationAction extends ActionType { + public static final GetDatabaseConfigurationAction INSTANCE = new GetDatabaseConfigurationAction(); + public static final String NAME = "cluster:admin/ingest/geoip/database/get"; + + protected GetDatabaseConfigurationAction() { + super(NAME); + } + + public static class Request extends AcknowledgedRequest { + + private final String[] databaseIds; + + public Request(TimeValue masterNodeTimeout, TimeValue ackTimeout, String... databaseIds) { + super(masterNodeTimeout, ackTimeout); + this.databaseIds = Objects.requireNonNull(databaseIds, "ids may not be null"); + } + + public Request(StreamInput in) throws IOException { + super(in); + databaseIds = in.readStringArray(); + } + + public String[] getDatabaseIds() { + return this.databaseIds; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeStringArray(databaseIds); + } + + @Override + public int hashCode() { + return Arrays.hashCode(databaseIds); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Request other = (Request) obj; + return Arrays.equals(databaseIds, other.databaseIds); + } + } + + public static class Response extends ActionResponse implements ToXContentObject { + + private final List databases; + + public Response(List databases) { + this.databases = List.copyOf(databases); // defensive copy + } + + public Response(StreamInput in) throws IOException { + this(in.readCollectionAsList(DatabaseConfigurationMetadata::new)); + } + + public List getDatabases() { + return this.databases; + } + + @Override + public String toString() { + return Strings.toString(this); + } + + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startArray("databases"); + for (DatabaseConfigurationMetadata item : databases) { + DatabaseConfiguration database = item.database(); + builder.startObject(); + builder.field("id", database.id()); // serialize including the id -- this is get response serialization + builder.field(VERSION.getPreferredName(), item.version()); + builder.timeField(MODIFIED_DATE_MILLIS.getPreferredName(), MODIFIED_DATE.getPreferredName(), item.modifiedDate()); + builder.field(DATABASE.getPreferredName(), database); + builder.endObject(); + } + builder.endArray(); + builder.endObject(); + return builder; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + out.writeCollection(databases); + } + + @Override + public int hashCode() { + return Objects.hash(databases); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Response other = (Response) obj; + return databases.equals(other.databases); + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..7bd5e1fa5cc68 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/PutDatabaseConfigurationAction.java @@ -0,0 +1,87 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.action.ActionRequestValidationException; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.support.master.AcknowledgedRequest; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.common.Strings; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Objects; + +public class PutDatabaseConfigurationAction extends ActionType { + public static final PutDatabaseConfigurationAction INSTANCE = new PutDatabaseConfigurationAction(); + public static final String NAME = "cluster:admin/ingest/geoip/database/put"; + + protected PutDatabaseConfigurationAction() { + super(NAME); + } + + public static class Request extends AcknowledgedRequest { + + private final DatabaseConfiguration database; + + public Request(TimeValue masterNodeTimeout, TimeValue ackTimeout, DatabaseConfiguration database) { + super(masterNodeTimeout, ackTimeout); + this.database = database; + } + + public Request(StreamInput in) throws IOException { + super(in); + database = new DatabaseConfiguration(in); + } + + public DatabaseConfiguration getDatabase() { + return this.database; + } + + public static Request parseRequest(TimeValue masterNodeTimeout, TimeValue ackTimeout, String id, XContentParser parser) { + return new Request(masterNodeTimeout, ackTimeout, DatabaseConfiguration.parse(parser, id)); + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + database.writeTo(out); + } + + @Override + public ActionRequestValidationException validate() { + return database.validate(); + } + + @Override + public int hashCode() { + return Objects.hash(database); + } + + @Override + public boolean equals(Object obj) { + if (obj == null) { + return false; + } + if (obj.getClass() != getClass()) { + return false; + } + Request other = (Request) obj; + return database.equals(other.database); + } + + @Override + public String toString() { + return Strings.toString((b, p) -> b.field(database.id(), database)); + } + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestDeleteDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestDeleteDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..4dc263224ad0a --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestDeleteDatabaseConfigurationAction.java @@ -0,0 +1,46 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.Scope; +import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.rest.action.RestToXContentListener; + +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.DELETE; +import static org.elasticsearch.rest.RestUtils.getAckTimeout; +import static org.elasticsearch.rest.RestUtils.getMasterNodeTimeout; + +@ServerlessScope(Scope.INTERNAL) +public class RestDeleteDatabaseConfigurationAction extends BaseRestHandler { + + @Override + public List routes() { + return List.of(new Route(DELETE, "/_ingest/geoip/database/{id}")); + } + + @Override + public String getName() { + return "geoip_delete_database_configuration"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) { + final var req = new DeleteDatabaseConfigurationAction.Request( + getMasterNodeTimeout(request), + getAckTimeout(request), + request.param("id") + ); + return channel -> client.execute(DeleteDatabaseConfigurationAction.INSTANCE, req, new RestToXContentListener<>(channel)); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..b237ceb638918 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestGetDatabaseConfigurationAction.java @@ -0,0 +1,47 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.common.Strings; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.Scope; +import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.rest.action.RestToXContentListener; + +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.GET; +import static org.elasticsearch.rest.RestUtils.getAckTimeout; +import static org.elasticsearch.rest.RestUtils.getMasterNodeTimeout; + +@ServerlessScope(Scope.INTERNAL) +public class RestGetDatabaseConfigurationAction extends BaseRestHandler { + + @Override + public List routes() { + return List.of(new Route(GET, "/_ingest/geoip/database"), new Route(GET, "/_ingest/geoip/database/{id}")); + } + + @Override + public String getName() { + return "geoip_get_database_configuration"; + } + + @Override + protected RestChannelConsumer prepareRequest(final RestRequest request, final NodeClient client) { + final var req = new GetDatabaseConfigurationAction.Request( + getMasterNodeTimeout(request), + getAckTimeout(request), + Strings.splitStringByCommaToArray(request.param("id")) + ); + return channel -> client.execute(GetDatabaseConfigurationAction.INSTANCE, req, new RestToXContentListener<>(channel)); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestPutDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestPutDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..62b01b930d5cd --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/RestPutDatabaseConfigurationAction.java @@ -0,0 +1,52 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.client.internal.node.NodeClient; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction.Request; +import org.elasticsearch.rest.BaseRestHandler; +import org.elasticsearch.rest.RestRequest; +import org.elasticsearch.rest.Scope; +import org.elasticsearch.rest.ServerlessScope; +import org.elasticsearch.rest.action.RestToXContentListener; + +import java.io.IOException; +import java.util.List; + +import static org.elasticsearch.rest.RestRequest.Method.PUT; +import static org.elasticsearch.rest.RestUtils.getAckTimeout; +import static org.elasticsearch.rest.RestUtils.getMasterNodeTimeout; + +@ServerlessScope(Scope.INTERNAL) +public class RestPutDatabaseConfigurationAction extends BaseRestHandler { + + @Override + public List routes() { + return List.of(new Route(PUT, "/_ingest/geoip/database/{id}")); + } + + @Override + public String getName() { + return "geoip_put_database_configuration"; + } + + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) throws IOException { + final Request req; + try (var parser = request.contentParser()) { + req = PutDatabaseConfigurationAction.Request.parseRequest( + getMasterNodeTimeout(request), + getAckTimeout(request), + request.param("id"), + parser + ); + } + return channel -> client.execute(PutDatabaseConfigurationAction.INSTANCE, req, new RestToXContentListener<>(channel)); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..43aacee956279 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportDeleteDatabaseConfigurationAction.java @@ -0,0 +1,128 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateTaskListener; +import org.elasticsearch.cluster.SimpleBatchedExecutor; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.service.MasterServiceTaskQueue; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.core.Strings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.ingest.geoip.direct.DeleteDatabaseConfigurationAction.Request; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.util.HashMap; +import java.util.Map; + +public class TransportDeleteDatabaseConfigurationAction extends TransportMasterNodeAction { + + private static final Logger logger = LogManager.getLogger(TransportDeleteDatabaseConfigurationAction.class); + + private static final SimpleBatchedExecutor DELETE_TASK_EXECUTOR = new SimpleBatchedExecutor<>() { + @Override + public Tuple executeTask(DeleteDatabaseConfigurationTask task, ClusterState clusterState) throws Exception { + return Tuple.tuple(task.execute(clusterState), null); + } + + @Override + public void taskSucceeded(DeleteDatabaseConfigurationTask task, Void unused) { + logger.trace("Updated cluster state for deletion of database configuration [{}]", task.databaseId); + task.listener.onResponse(AcknowledgedResponse.TRUE); + } + }; + + private final MasterServiceTaskQueue deleteDatabaseConfigurationTaskQueue; + + @Inject + public TransportDeleteDatabaseConfigurationAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + DeleteDatabaseConfigurationAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + Request::new, + indexNameExpressionResolver, + AcknowledgedResponse::readFrom, + EsExecutors.DIRECT_EXECUTOR_SERVICE + ); + this.deleteDatabaseConfigurationTaskQueue = clusterService.createTaskQueue( + "delete-geoip-database-configuration-state-update", + Priority.NORMAL, + DELETE_TASK_EXECUTOR + ); + } + + @Override + protected void masterOperation(Task task, Request request, ClusterState state, ActionListener listener) + throws Exception { + final String id = request.getDatabaseId(); + final IngestGeoIpMetadata geoIpMeta = state.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + if (geoIpMeta.getDatabases().containsKey(id) == false) { + throw new ResourceNotFoundException("Database configuration not found: {}", id); + } + deleteDatabaseConfigurationTaskQueue.submitTask( + Strings.format("delete-geoip-database-configuration-[%s]", id), + new DeleteDatabaseConfigurationTask(listener, id), + null + ); + } + + private record DeleteDatabaseConfigurationTask(ActionListener listener, String databaseId) + implements + ClusterStateTaskListener { + + ClusterState execute(ClusterState currentState) throws Exception { + final IngestGeoIpMetadata geoIpMeta = currentState.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + logger.debug("deleting database configuration [{}]", databaseId); + Map databases = new HashMap<>(geoIpMeta.getDatabases()); + databases.remove(databaseId); + + Metadata currentMeta = currentState.metadata(); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentMeta).putCustom(IngestGeoIpMetadata.TYPE, new IngestGeoIpMetadata(databases))) + .build(); + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + } + } + + @Override + protected ClusterBlockException checkBlock(Request request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..a14a143e3f404 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportGetDatabaseConfigurationAction.java @@ -0,0 +1,109 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.ResourceNotFoundException; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.regex.Regex; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.LinkedHashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +public class TransportGetDatabaseConfigurationAction extends TransportMasterNodeAction< + GetDatabaseConfigurationAction.Request, + GetDatabaseConfigurationAction.Response> { + + @Inject + public TransportGetDatabaseConfigurationAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + GetDatabaseConfigurationAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + GetDatabaseConfigurationAction.Request::new, + indexNameExpressionResolver, + GetDatabaseConfigurationAction.Response::new, + EsExecutors.DIRECT_EXECUTOR_SERVICE + ); + } + + @Override + protected void masterOperation( + final Task task, + final GetDatabaseConfigurationAction.Request request, + final ClusterState state, + final ActionListener listener + ) { + final Set ids; + if (request.getDatabaseIds().length == 0) { + // if we did not ask for a specific name, then return all databases + ids = Set.of("*"); + } else { + ids = new LinkedHashSet<>(Arrays.asList(request.getDatabaseIds())); + } + + if (ids.size() > 1 && ids.stream().anyMatch(Regex::isSimpleMatchPattern)) { + throw new IllegalArgumentException( + "wildcard only supports a single value, please use comma-separated values or a single wildcard value" + ); + } + + final IngestGeoIpMetadata geoIpMeta = state.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + List results = new ArrayList<>(); + + for (String id : ids) { + if (Regex.isSimpleMatchPattern(id)) { + for (Map.Entry entry : geoIpMeta.getDatabases().entrySet()) { + if (Regex.simpleMatch(id, entry.getKey())) { + results.add(entry.getValue()); + } + } + } else { + DatabaseConfigurationMetadata meta = geoIpMeta.getDatabases().get(id); + if (meta == null) { + listener.onFailure(new ResourceNotFoundException("database configuration not found: {}", id)); + return; + } else { + results.add(meta); + } + } + } + + listener.onResponse(new GetDatabaseConfigurationAction.Response(results)); + } + + @Override + protected ClusterBlockException checkBlock(GetDatabaseConfigurationAction.Request request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_READ); + } +} diff --git a/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationAction.java b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationAction.java new file mode 100644 index 0000000000000..540be68671d38 --- /dev/null +++ b/modules/ingest-geoip/src/main/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationAction.java @@ -0,0 +1,178 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.master.AcknowledgedResponse; +import org.elasticsearch.action.support.master.TransportMasterNodeAction; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateTaskListener; +import org.elasticsearch.cluster.SimpleBatchedExecutor; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlockLevel; +import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.service.MasterServiceTaskQueue; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.util.concurrent.EsExecutors; +import org.elasticsearch.core.Nullable; +import org.elasticsearch.core.Strings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.ingest.geoip.direct.PutDatabaseConfigurationAction.Request; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportService; + +import java.time.Instant; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +public class TransportPutDatabaseConfigurationAction extends TransportMasterNodeAction { + + private static final Logger logger = LogManager.getLogger(TransportPutDatabaseConfigurationAction.class); + + private static final SimpleBatchedExecutor UPDATE_TASK_EXECUTOR = new SimpleBatchedExecutor<>() { + @Override + public Tuple executeTask(UpdateDatabaseConfigurationTask task, ClusterState clusterState) throws Exception { + return Tuple.tuple(task.execute(clusterState), null); + } + + @Override + public void taskSucceeded(UpdateDatabaseConfigurationTask task, Void unused) { + logger.trace("Updated cluster state for creation-or-update of database configuration [{}]", task.database.id()); + task.listener.onResponse(AcknowledgedResponse.TRUE); + } + }; + + private final MasterServiceTaskQueue updateDatabaseConfigurationTaskQueue; + + @Inject + public TransportPutDatabaseConfigurationAction( + TransportService transportService, + ClusterService clusterService, + ThreadPool threadPool, + ActionFilters actionFilters, + IndexNameExpressionResolver indexNameExpressionResolver + ) { + super( + PutDatabaseConfigurationAction.NAME, + transportService, + clusterService, + threadPool, + actionFilters, + Request::new, + indexNameExpressionResolver, + AcknowledgedResponse::readFrom, + EsExecutors.DIRECT_EXECUTOR_SERVICE + ); + this.updateDatabaseConfigurationTaskQueue = clusterService.createTaskQueue( + "update-geoip-database-configuration-state-update", + Priority.NORMAL, + UPDATE_TASK_EXECUTOR + ); + } + + @Override + protected void masterOperation(Task task, Request request, ClusterState state, ActionListener listener) { + final String id = request.getDatabase().id(); + updateDatabaseConfigurationTaskQueue.submitTask( + Strings.format("update-geoip-database-configuration-[%s]", id), + new UpdateDatabaseConfigurationTask(listener, request.getDatabase()), + null + ); + } + + /** + * Returns 'true' if the database configuration is effectually the same, and thus can be a no-op update. + */ + static boolean isNoopUpdate(@Nullable DatabaseConfigurationMetadata existingDatabase, DatabaseConfiguration newDatabase) { + if (existingDatabase == null) { + return false; + } else { + return newDatabase.equals(existingDatabase.database()); + } + } + + static void validatePrerequisites(DatabaseConfiguration database, ClusterState state) { + // we need to verify that the database represents a unique file (name) among the various databases for this same provider + IngestGeoIpMetadata geoIpMeta = state.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + Optional sameName = geoIpMeta.getDatabases() + .values() + .stream() + .map(DatabaseConfigurationMetadata::database) + // .filter(d -> d.type().equals(database.type())) // of the same type (right now the type is always just 'maxmind') + .filter(d -> d.id().equals(database.id()) == false) // and a different id + .filter(d -> d.name().equals(database.name())) // but has the same name! + .findFirst(); + + sameName.ifPresent(d -> { + throw new IllegalArgumentException( + Strings.format("database [%s] is already being downloaded via configuration [%s]", database.name(), d.id()) + ); + }); + } + + private record UpdateDatabaseConfigurationTask(ActionListener listener, DatabaseConfiguration database) + implements + ClusterStateTaskListener { + + ClusterState execute(ClusterState currentState) throws Exception { + IngestGeoIpMetadata geoIpMeta = currentState.metadata().custom(IngestGeoIpMetadata.TYPE, IngestGeoIpMetadata.EMPTY); + + String id = database.id(); + final DatabaseConfigurationMetadata existingDatabase = geoIpMeta.getDatabases().get(id); + // double-check for no-op in the state update task, in case it was changed/reset in the meantime + if (isNoopUpdate(existingDatabase, database)) { + return currentState; + } + + validatePrerequisites(database, currentState); + + Map databases = new HashMap<>(geoIpMeta.getDatabases()); + databases.put( + id, + new DatabaseConfigurationMetadata( + database, + existingDatabase == null ? 1 : existingDatabase.version() + 1, + Instant.now().toEpochMilli() + ) + ); + geoIpMeta = new IngestGeoIpMetadata(databases); + + if (existingDatabase == null) { + logger.debug("adding new database configuration [{}]", id); + } else { + logger.debug("updating existing database configuration [{}]", id); + } + + Metadata currentMeta = currentState.metadata(); + return ClusterState.builder(currentState) + .metadata(Metadata.builder(currentMeta).putCustom(IngestGeoIpMetadata.TYPE, geoIpMeta)) + .build(); + } + + @Override + public void onFailure(Exception e) { + listener.onFailure(e); + } + } + + @Override + protected ClusterBlockException checkBlock(Request request, ClusterState state) { + return state.blocks().globalBlockedException(ClusterBlockLevel.METADATA_WRITE); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java new file mode 100644 index 0000000000000..58cb566165db2 --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpDownloaderTests.java @@ -0,0 +1,538 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.ActionRequest; +import org.elasticsearch.action.ActionResponse; +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.DocWriteRequest.OpType; +import org.elasticsearch.action.DocWriteResponse; +import org.elasticsearch.action.admin.indices.flush.FlushAction; +import org.elasticsearch.action.admin.indices.flush.FlushRequest; +import org.elasticsearch.action.admin.indices.refresh.RefreshAction; +import org.elasticsearch.action.admin.indices.refresh.RefreshRequest; +import org.elasticsearch.action.index.IndexRequest; +import org.elasticsearch.action.index.IndexResponse; +import org.elasticsearch.action.index.TransportIndexAction; +import org.elasticsearch.action.support.broadcast.BroadcastResponse; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.block.ClusterBlockException; +import org.elasticsearch.cluster.block.ClusterBlocks; +import org.elasticsearch.cluster.metadata.IndexMetadata; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.hash.MessageDigests; +import org.elasticsearch.common.settings.ClusterSettings; +import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.core.Tuple; +import org.elasticsearch.ingest.EnterpriseGeoIpTask; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.node.Node; +import org.elasticsearch.persistent.PersistentTasksCustomMetadata; +import org.elasticsearch.persistent.PersistentTasksService; +import org.elasticsearch.telemetry.metric.MeterRegistry; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.test.client.NoOpClient; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.xcontent.XContentType; +import org.hamcrest.Matchers; +import org.junit.After; +import org.junit.Before; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.net.PasswordAuthentication; +import java.nio.charset.StandardCharsets; +import java.security.MessageDigest; +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.BiConsumer; + +import static org.elasticsearch.ingest.geoip.DatabaseNodeServiceTests.createClusterState; +import static org.elasticsearch.ingest.geoip.EnterpriseGeoIpDownloader.MAX_CHUNK_SIZE; +import static org.elasticsearch.tasks.TaskId.EMPTY_TASK_ID; +import static org.hamcrest.Matchers.equalTo; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.ArgumentMatchers.anyString; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.verifyNoInteractions; +import static org.mockito.Mockito.when; + +public class EnterpriseGeoIpDownloaderTests extends ESTestCase { + + private HttpClient httpClient; + private ClusterService clusterService; + private ThreadPool threadPool; + private MockClient client; + private EnterpriseGeoIpDownloader geoIpDownloader; + + @Before + public void setup() throws IOException { + httpClient = mock(HttpClient.class); + when(httpClient.getBytes(any(), anyString())).thenReturn( + "e4a3411cdd7b21eaf18675da5a7f9f360d33c6882363b2c19c38715834c9e836 GeoIP2-City_20240709.tar.gz".getBytes(StandardCharsets.UTF_8) + ); + clusterService = mock(ClusterService.class); + threadPool = new ThreadPool(Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "test").build(), MeterRegistry.NOOP); + when(clusterService.getClusterSettings()).thenReturn( + new ClusterSettings(Settings.EMPTY, Set.of(GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING)) + ); + ClusterState state = createClusterState(new PersistentTasksCustomMetadata(1L, Map.of())); + when(clusterService.state()).thenReturn(state); + client = new MockClient(threadPool); + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + { + EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams geoIpTaskParams = mock(EnterpriseGeoIpTask.EnterpriseGeoIpTaskParams.class); + when(geoIpTaskParams.getWriteableName()).thenReturn(EnterpriseGeoIpTask.ENTERPRISE_GEOIP_DOWNLOADER); + init(new PersistentTasksService(clusterService, threadPool, client), null, null, 0); + } + }; + } + + @After + public void tearDown() throws Exception { + super.tearDown(); + threadPool.shutdownNow(); + } + + public void testGetChunkEndOfStream() throws IOException { + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(new InputStream() { + @Override + public int read() { + return -1; + } + }); + assertArrayEquals(new byte[0], chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(new ByteArrayInputStream(new byte[0])); + assertArrayEquals(new byte[0], chunk); + } + + public void testGetChunkLessThanChunkSize() throws IOException { + ByteArrayInputStream is = new ByteArrayInputStream(new byte[] { 1, 2, 3, 4 }); + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[] { 1, 2, 3, 4 }, chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[0], chunk); + + } + + public void testGetChunkExactlyChunkSize() throws IOException { + byte[] bigArray = new byte[MAX_CHUNK_SIZE]; + for (int i = 0; i < MAX_CHUNK_SIZE; i++) { + bigArray[i] = (byte) i; + } + ByteArrayInputStream is = new ByteArrayInputStream(bigArray); + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(bigArray, chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[0], chunk); + } + + public void testGetChunkMoreThanChunkSize() throws IOException { + byte[] bigArray = new byte[MAX_CHUNK_SIZE * 2]; + for (int i = 0; i < MAX_CHUNK_SIZE * 2; i++) { + bigArray[i] = (byte) i; + } + byte[] smallArray = new byte[MAX_CHUNK_SIZE]; + System.arraycopy(bigArray, 0, smallArray, 0, MAX_CHUNK_SIZE); + ByteArrayInputStream is = new ByteArrayInputStream(bigArray); + byte[] chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(smallArray, chunk); + System.arraycopy(bigArray, MAX_CHUNK_SIZE, smallArray, 0, MAX_CHUNK_SIZE); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(smallArray, chunk); + chunk = EnterpriseGeoIpDownloader.getChunk(is); + assertArrayEquals(new byte[0], chunk); + } + + public void testGetChunkRethrowsIOException() { + expectThrows(IOException.class, () -> EnterpriseGeoIpDownloader.getChunk(new InputStream() { + @Override + public int read() throws IOException { + throw new IOException(); + } + })); + } + + public void testIndexChunksNoData() throws IOException { + client.addHandler(FlushAction.INSTANCE, (FlushRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + }); + client.addHandler( + RefreshAction.INSTANCE, + (RefreshRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + } + ); + + InputStream empty = new ByteArrayInputStream(new byte[0]); + assertEquals( + Tuple.tuple(0, "d41d8cd98f00b204e9800998ecf8427e"), + geoIpDownloader.indexChunks( + "test", + empty, + 0, + MessageDigests.sha256(), + "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855", + 0 + ) + ); + } + + public void testIndexChunksMd5Mismatch() { + client.addHandler(FlushAction.INSTANCE, (FlushRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + }); + client.addHandler( + RefreshAction.INSTANCE, + (RefreshRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + } + ); + + IOException exception = expectThrows( + IOException.class, + () -> geoIpDownloader.indexChunks("test", new ByteArrayInputStream(new byte[0]), 0, MessageDigests.sha256(), "123123", 0) + ); + assertEquals( + "checksum mismatch, expected [123123], actual [e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855]", + exception.getMessage() + ); + } + + public void testIndexChunks() throws IOException { + byte[] bigArray = new byte[MAX_CHUNK_SIZE + 20]; + for (int i = 0; i < MAX_CHUNK_SIZE + 20; i++) { + bigArray[i] = (byte) i; + } + byte[][] chunksData = new byte[2][]; + chunksData[0] = new byte[MAX_CHUNK_SIZE]; + System.arraycopy(bigArray, 0, chunksData[0], 0, MAX_CHUNK_SIZE); + chunksData[1] = new byte[20]; + System.arraycopy(bigArray, MAX_CHUNK_SIZE, chunksData[1], 0, 20); + + AtomicInteger chunkIndex = new AtomicInteger(); + + client.addHandler(TransportIndexAction.TYPE, (IndexRequest request, ActionListener listener) -> { + int chunk = chunkIndex.getAndIncrement(); + assertEquals(OpType.CREATE, request.opType()); + assertThat(request.id(), Matchers.startsWith("test_" + (chunk + 15) + "_")); + assertEquals(XContentType.SMILE, request.getContentType()); + Map source = request.sourceAsMap(); + assertEquals("test", source.get("name")); + assertArrayEquals(chunksData[chunk], (byte[]) source.get("data")); + assertEquals(chunk + 15, source.get("chunk")); + listener.onResponse(mock(IndexResponse.class)); + }); + client.addHandler(FlushAction.INSTANCE, (FlushRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + }); + client.addHandler( + RefreshAction.INSTANCE, + (RefreshRequest request, ActionListener flushResponseActionListener) -> { + assertArrayEquals(new String[] { EnterpriseGeoIpDownloader.DATABASES_INDEX }, request.indices()); + flushResponseActionListener.onResponse(mock(BroadcastResponse.class)); + } + ); + + InputStream big = new ByteArrayInputStream(bigArray); + assertEquals( + Tuple.tuple(17, "a67563dfa8f3cba8b8cff61eb989a749"), + geoIpDownloader.indexChunks( + "test", + big, + 15, + MessageDigests.sha256(), + "f2304545f224ff9ffcc585cb0a993723f911e03beb552cc03937dd443e931eab", + 0 + ) + ); + + assertEquals(2, chunkIndex.get()); + } + + public void testProcessDatabaseNew() throws IOException { + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get(any(), any())).thenReturn(bais); + AtomicBoolean indexedChunks = new AtomicBoolean(false); + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + @Override + protected void updateTimestamp(String name, GeoIpTaskState.Metadata metadata) { + fail(); + } + + @Override + Tuple indexChunks( + String name, + InputStream is, + int chunk, + MessageDigest digest, + String expectedMd5, + long start + ) { + assertSame(bais, is); + assertEquals(0, chunk); + indexedChunks.set(true); + return Tuple.tuple(11, expectedMd5); + } + + @Override + void updateTaskState() { + assertEquals(0, state.getDatabases().get("test.mmdb").firstChunk()); + assertEquals(10, state.getDatabases().get("test.mmdb").lastChunk()); + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + assertEquals("test.mmdb", name); + assertEquals(0, firstChunk); + } + }; + + geoIpDownloader.setState(EnterpriseGeoIpTaskState.EMPTY); + PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); + String id = randomIdentifier(); + DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); + geoIpDownloader.processDatabase(auth, databaseConfiguration); + assertThat(indexedChunks.get(), equalTo(true)); + } + + public void testProcessDatabaseUpdate() throws IOException { + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get(any(), any())).thenReturn(bais); + AtomicBoolean indexedChunks = new AtomicBoolean(false); + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + @Override + protected void updateTimestamp(String name, GeoIpTaskState.Metadata metadata) { + fail(); + } + + @Override + Tuple indexChunks( + String name, + InputStream is, + int chunk, + MessageDigest digest, + String expectedMd5, + long start + ) { + assertSame(bais, is); + assertEquals(9, chunk); + indexedChunks.set(true); + return Tuple.tuple(1, expectedMd5); + } + + @Override + void updateTaskState() { + assertEquals(9, state.getDatabases().get("test.mmdb").firstChunk()); + assertEquals(10, state.getDatabases().get("test.mmdb").lastChunk()); + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + assertEquals("test.mmdb", name); + assertEquals(9, firstChunk); + } + }; + + geoIpDownloader.setState(EnterpriseGeoIpTaskState.EMPTY.put("test.mmdb", new GeoIpTaskState.Metadata(0, 5, 8, "0", 0))); + PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); + String id = randomIdentifier(); + DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); + geoIpDownloader.processDatabase(auth, databaseConfiguration); + assertThat(indexedChunks.get(), equalTo(true)); + } + + public void testProcessDatabaseSame() throws IOException { + GeoIpTaskState.Metadata metadata = new GeoIpTaskState.Metadata( + 0, + 4, + 10, + "1", + 0, + "e4a3411cdd7b21eaf18675da5a7f9f360d33c6882363b2c19c38715834c9e836" + ); + EnterpriseGeoIpTaskState taskState = EnterpriseGeoIpTaskState.EMPTY.put("test.mmdb", metadata); + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get(any(), any())).thenReturn(bais); + + geoIpDownloader = new EnterpriseGeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + (input) -> new HttpClient.PasswordAuthenticationHolder("name", "password".toCharArray()) + ) { + @Override + protected void updateTimestamp(String name, GeoIpTaskState.Metadata newMetadata) { + assertEquals(metadata, newMetadata); + assertEquals("test.mmdb", name); + } + + @Override + Tuple indexChunks( + String name, + InputStream is, + int chunk, + MessageDigest digest, + String expectedChecksum, + long start + ) { + fail(); + return Tuple.tuple(0, expectedChecksum); + } + + @Override + void updateTaskState() { + fail(); + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + fail(); + } + }; + geoIpDownloader.setState(taskState); + PasswordAuthentication auth = new PasswordAuthentication("name", "password".toCharArray()); + String id = randomIdentifier(); + DatabaseConfiguration databaseConfiguration = new DatabaseConfiguration(id, "test", new DatabaseConfiguration.Maxmind("name")); + geoIpDownloader.processDatabase(auth, databaseConfiguration); + } + + public void testUpdateDatabasesWriteBlock() { + ClusterState state = createClusterState(new PersistentTasksCustomMetadata(1L, Map.of())); + var geoIpIndex = state.getMetadata().getIndicesLookup().get(EnterpriseGeoIpDownloader.DATABASES_INDEX).getWriteIndex().getName(); + state = ClusterState.builder(state) + .blocks(new ClusterBlocks.Builder().addIndexBlock(geoIpIndex, IndexMetadata.INDEX_READ_ONLY_ALLOW_DELETE_BLOCK)) + .build(); + when(clusterService.state()).thenReturn(state); + var e = expectThrows(ClusterBlockException.class, () -> geoIpDownloader.updateDatabases()); + assertThat( + e.getMessage(), + equalTo( + "index [" + + geoIpIndex + + "] blocked by: [TOO_MANY_REQUESTS/12/disk usage exceeded flood-stage watermark, " + + "index has read-only-allow-delete block];" + ) + ); + verifyNoInteractions(httpClient); + } + + public void testUpdateDatabasesIndexNotReady() throws IOException { + ClusterState state = createClusterState(new PersistentTasksCustomMetadata(1L, Map.of()), true); + var geoIpIndex = state.getMetadata().getIndicesLookup().get(EnterpriseGeoIpDownloader.DATABASES_INDEX).getWriteIndex().getName(); + state = ClusterState.builder(state) + .blocks(new ClusterBlocks.Builder().addIndexBlock(geoIpIndex, IndexMetadata.INDEX_READ_ONLY_ALLOW_DELETE_BLOCK)) + .build(); + when(clusterService.state()).thenReturn(state); + geoIpDownloader.updateDatabases(); + verifyNoInteractions(httpClient); + } + + private GeoIpTaskState.Metadata newGeoIpTaskStateMetadata(boolean expired) { + Instant lastChecked; + if (expired) { + lastChecked = Instant.now().minus(randomIntBetween(31, 100), ChronoUnit.DAYS); + } else { + lastChecked = Instant.now().minus(randomIntBetween(0, 29), ChronoUnit.DAYS); + } + return new GeoIpTaskState.Metadata(0, 0, 0, randomAlphaOfLength(20), lastChecked.toEpochMilli()); + } + + private static class MockClient extends NoOpClient { + + private final Map, BiConsumer>> handlers = new HashMap<>(); + + private MockClient(ThreadPool threadPool) { + super(threadPool); + } + + public void addHandler( + ActionType action, + BiConsumer> listener + ) { + handlers.put(action, listener); + } + + @SuppressWarnings("unchecked") + @Override + protected void doExecute( + ActionType action, + Request request, + ActionListener listener + ) { + if (handlers.containsKey(action)) { + BiConsumer> biConsumer = (BiConsumer>) handlers.get( + action + ); + biConsumer.accept(request, listener); + } else { + throw new IllegalStateException("unexpected action called [" + action.name() + "]"); + } + } + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskStateSerializationTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskStateSerializationTests.java new file mode 100644 index 0000000000000..a136f90780989 --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/EnterpriseGeoIpTaskStateSerializationTests.java @@ -0,0 +1,72 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class EnterpriseGeoIpTaskStateSerializationTests extends AbstractXContentSerializingTestCase { + @Override + protected GeoIpTaskState doParseInstance(XContentParser parser) throws IOException { + return GeoIpTaskState.fromXContent(parser); + } + + @Override + protected Writeable.Reader instanceReader() { + return GeoIpTaskState::new; + } + + @Override + protected GeoIpTaskState createTestInstance() { + GeoIpTaskState state = GeoIpTaskState.EMPTY; + int databaseCount = randomInt(20); + for (int i = 0; i < databaseCount; i++) { + state = state.put(randomAlphaOfLengthBetween(5, 10), createRandomMetadata()); + } + return state; + } + + @Override + protected GeoIpTaskState mutateInstance(GeoIpTaskState instance) { + Map databases = new HashMap<>(instance.getDatabases()); + switch (between(0, 2)) { + case 0: + String databaseName = randomValueOtherThanMany(databases::containsKey, () -> randomAlphaOfLengthBetween(5, 10)); + databases.put(databaseName, createRandomMetadata()); + return new GeoIpTaskState(databases); + case 1: + if (databases.size() > 0) { + String randomDatabaseName = databases.keySet().iterator().next(); + databases.put(randomDatabaseName, createRandomMetadata()); + } else { + databases.put(randomAlphaOfLengthBetween(5, 10), createRandomMetadata()); + } + return new GeoIpTaskState(databases); + case 2: + if (databases.size() > 0) { + String randomDatabaseName = databases.keySet().iterator().next(); + databases.remove(randomDatabaseName); + } else { + databases.put(randomAlphaOfLengthBetween(5, 10), createRandomMetadata()); + } + return new GeoIpTaskState(databases); + default: + throw new AssertionError("failure, got illegal switch case"); + } + } + + private GeoIpTaskState.Metadata createRandomMetadata() { + return new GeoIpTaskState.Metadata(randomLong(), randomInt(), randomInt(), randomAlphaOfLength(32), randomLong()); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java index 9cc5405c1b617..06b2605bd6d41 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpDownloaderTests.java @@ -30,11 +30,17 @@ import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.settings.ClusterSettings; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.index.reindex.BulkByScrollResponse; +import org.elasticsearch.index.reindex.DeleteByQueryAction; +import org.elasticsearch.index.reindex.DeleteByQueryRequest; import org.elasticsearch.ingest.geoip.stats.GeoIpDownloaderStats; import org.elasticsearch.node.Node; +import org.elasticsearch.persistent.PersistentTaskResponse; import org.elasticsearch.persistent.PersistentTaskState; import org.elasticsearch.persistent.PersistentTasksCustomMetadata; import org.elasticsearch.persistent.PersistentTasksCustomMetadata.PersistentTask; +import org.elasticsearch.persistent.PersistentTasksService; +import org.elasticsearch.persistent.UpdatePersistentTaskStatusAction; import org.elasticsearch.telemetry.metric.MeterRegistry; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.client.NoOpClient; @@ -49,6 +55,9 @@ import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.charset.StandardCharsets; +import java.time.Instant; +import java.time.temporal.ChronoUnit; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -63,6 +72,8 @@ import static org.elasticsearch.ingest.geoip.GeoIpDownloader.MAX_CHUNK_SIZE; import static org.elasticsearch.tasks.TaskId.EMPTY_TASK_ID; import static org.hamcrest.Matchers.equalTo; +import static org.hamcrest.Matchers.greaterThanOrEqualTo; +import static org.mockito.ArgumentMatchers.anyString; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.verifyNoInteractions; import static org.mockito.Mockito.when; @@ -76,8 +87,9 @@ public class GeoIpDownloaderTests extends ESTestCase { private GeoIpDownloader geoIpDownloader; @Before - public void setup() { + public void setup() throws IOException { httpClient = mock(HttpClient.class); + when(httpClient.getBytes(anyString())).thenReturn("[]".getBytes(StandardCharsets.UTF_8)); clusterService = mock(ClusterService.class); threadPool = new ThreadPool(Settings.builder().put(Node.NODE_NAME_SETTING.getKey(), "test").build(), MeterRegistry.NOOP); when(clusterService.getClusterSettings()).thenReturn( @@ -109,7 +121,13 @@ public void setup() { () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), () -> GeoIpDownloaderTaskExecutor.EAGER_DOWNLOAD_SETTING.getDefault(Settings.EMPTY), () -> true - ); + ) { + { + GeoIpTaskParams geoIpTaskParams = mock(GeoIpTaskParams.class); + when(geoIpTaskParams.getWriteableName()).thenReturn(GeoIpDownloader.GEOIP_DOWNLOADER); + init(new PersistentTasksService(clusterService, threadPool, client), null, null, 0); + } + }; } @After @@ -290,8 +308,8 @@ int indexChunks(String name, InputStream is, int chunk, String expectedMd5, long @Override void updateTaskState() { - assertEquals(0, state.get("test.mmdb").firstChunk()); - assertEquals(10, state.get("test.mmdb").lastChunk()); + assertEquals(0, state.getDatabases().get("test.mmdb").firstChunk()); + assertEquals(10, state.getDatabases().get("test.mmdb").lastChunk()); } @Override @@ -341,8 +359,8 @@ int indexChunks(String name, InputStream is, int chunk, String expectedMd5, long @Override void updateTaskState() { - assertEquals(9, state.get("test.mmdb").firstChunk()); - assertEquals(10, state.get("test.mmdb").lastChunk()); + assertEquals(9, state.getDatabases().get("test.mmdb").firstChunk()); + assertEquals(10, state.getDatabases().get("test.mmdb").lastChunk()); } @Override @@ -408,6 +426,55 @@ void deleteOldChunks(String name, int firstChunk) { assertEquals(0, stats.getFailedDownloads()); } + public void testCleanDatabases() throws IOException { + ByteArrayInputStream bais = new ByteArrayInputStream(new byte[0]); + when(httpClient.get("http://a.b/t1")).thenReturn(bais); + + final AtomicInteger count = new AtomicInteger(0); + + geoIpDownloader = new GeoIpDownloader( + client, + httpClient, + clusterService, + threadPool, + Settings.EMPTY, + 1, + "", + "", + "", + EMPTY_TASK_ID, + Map.of(), + () -> GeoIpDownloaderTaskExecutor.POLL_INTERVAL_SETTING.getDefault(Settings.EMPTY), + () -> GeoIpDownloaderTaskExecutor.EAGER_DOWNLOAD_SETTING.getDefault(Settings.EMPTY), + () -> true + ) { + @Override + void updateDatabases() throws IOException { + // noop + } + + @Override + void deleteOldChunks(String name, int firstChunk) { + count.incrementAndGet(); + assertEquals("test.mmdb", name); + assertEquals(21, firstChunk); + } + + @Override + void updateTaskState() { + // noop + } + }; + + geoIpDownloader.setState(GeoIpTaskState.EMPTY.put("test.mmdb", new GeoIpTaskState.Metadata(10, 10, 20, "md5", 20))); + geoIpDownloader.runDownloader(); + geoIpDownloader.runDownloader(); + GeoIpDownloaderStats stats = geoIpDownloader.getStatus(); + assertEquals(1, stats.getExpiredDatabases()); + assertEquals(2, count.get()); // somewhat surprising, not necessarily wrong + assertEquals(18, geoIpDownloader.state.getDatabases().get("test.mmdb").lastCheck()); // highly surprising, seems wrong + } + @SuppressWarnings("unchecked") public void testUpdateTaskState() { geoIpDownloader = new GeoIpDownloader( @@ -541,6 +608,78 @@ public void testUpdateDatabasesIndexNotReady() { verifyNoInteractions(httpClient); } + public void testThatRunDownloaderDeletesExpiredDatabases() { + /* + * This test puts some expired databases and some non-expired ones into the GeoIpTaskState, and then calls runDownloader(), making + * sure that the expired databases have been deleted. + */ + AtomicInteger updatePersistentTaskStateCount = new AtomicInteger(0); + AtomicInteger deleteCount = new AtomicInteger(0); + int expiredDatabasesCount = randomIntBetween(1, 100); + int unexpiredDatabasesCount = randomIntBetween(0, 100); + Map databases = new HashMap<>(); + for (int i = 0; i < expiredDatabasesCount; i++) { + databases.put("expiredDatabase" + i, newGeoIpTaskStateMetadata(true)); + } + for (int i = 0; i < unexpiredDatabasesCount; i++) { + databases.put("unexpiredDatabase" + i, newGeoIpTaskStateMetadata(false)); + } + GeoIpTaskState geoIpTaskState = new GeoIpTaskState(databases); + geoIpDownloader.setState(geoIpTaskState); + client.addHandler( + UpdatePersistentTaskStatusAction.INSTANCE, + (UpdatePersistentTaskStatusAction.Request request, ActionListener taskResponseListener) -> { + PersistentTasksCustomMetadata.Assignment assignment = mock(PersistentTasksCustomMetadata.Assignment.class); + PersistentTasksCustomMetadata.PersistentTask persistentTask = new PersistentTasksCustomMetadata.PersistentTask<>( + GeoIpDownloader.GEOIP_DOWNLOADER, + GeoIpDownloader.GEOIP_DOWNLOADER, + new GeoIpTaskParams(), + request.getAllocationId(), + assignment + ); + updatePersistentTaskStateCount.incrementAndGet(); + taskResponseListener.onResponse(new PersistentTaskResponse(new PersistentTask<>(persistentTask, request.getState()))); + } + ); + client.addHandler( + DeleteByQueryAction.INSTANCE, + (DeleteByQueryRequest request, ActionListener flushResponseActionListener) -> { + deleteCount.incrementAndGet(); + } + ); + geoIpDownloader.runDownloader(); + assertThat(geoIpDownloader.getStatus().getExpiredDatabases(), equalTo(expiredDatabasesCount)); + for (int i = 0; i < expiredDatabasesCount; i++) { + // This currently fails because we subtract one millisecond from the lastChecked time + // assertThat(geoIpDownloader.state.getDatabases().get("expiredDatabase" + i).lastCheck(), equalTo(-1L)); + } + for (int i = 0; i < unexpiredDatabasesCount; i++) { + assertThat( + geoIpDownloader.state.getDatabases().get("unexpiredDatabase" + i).lastCheck(), + greaterThanOrEqualTo(Instant.now().minus(30, ChronoUnit.DAYS).toEpochMilli()) + ); + } + assertThat(deleteCount.get(), equalTo(expiredDatabasesCount)); + assertThat(updatePersistentTaskStateCount.get(), equalTo(expiredDatabasesCount)); + geoIpDownloader.runDownloader(); + /* + * The following two lines assert current behavior that might not be desirable -- we continue to delete expired databases every + * time that runDownloader runs. This seems unnecessary. + */ + assertThat(deleteCount.get(), equalTo(expiredDatabasesCount * 2)); + assertThat(updatePersistentTaskStateCount.get(), equalTo(expiredDatabasesCount * 2)); + } + + private GeoIpTaskState.Metadata newGeoIpTaskStateMetadata(boolean expired) { + Instant lastChecked; + if (expired) { + lastChecked = Instant.now().minus(randomIntBetween(31, 100), ChronoUnit.DAYS); + } else { + lastChecked = Instant.now().minus(randomIntBetween(0, 29), ChronoUnit.DAYS); + } + return new GeoIpTaskState.Metadata(0, 0, 0, randomAlphaOfLength(20), lastChecked.toEpochMilli()); + } + private static class MockClient extends NoOpClient { private final Map, BiConsumer>> handlers = new HashMap<>(); diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java index 6276155d9f083..87d1881a9e743 100644 --- a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/GeoIpProcessorTests.java @@ -463,7 +463,7 @@ public void testEnterprise() throws Exception { assertThat(geoData.get("residential_proxy"), equalTo(false)); assertThat(geoData.get("domain"), equalTo("frpt.net")); assertThat(geoData.get("isp"), equalTo("Fairpoint Communications")); - assertThat(geoData.get("isp_organization"), equalTo("Fairpoint Communications")); + assertThat(geoData.get("isp_organization_name"), equalTo("Fairpoint Communications")); assertThat(geoData.get("user_type"), equalTo("residential")); assertThat(geoData.get("connection_type"), equalTo("Cable/DSL")); } @@ -497,7 +497,7 @@ public void testIsp() throws Exception { assertThat(geoData.get("organization_name"), equalTo("CELLCO-PART")); assertThat(geoData.get("network"), equalTo("149.101.100.0/28")); assertThat(geoData.get("isp"), equalTo("Verizon Wireless")); - assertThat(geoData.get("isp_organization"), equalTo("Verizon Wireless")); + assertThat(geoData.get("isp_organization_name"), equalTo("Verizon Wireless")); assertThat(geoData.get("mobile_network_code"), equalTo("004")); assertThat(geoData.get("mobile_country_code"), equalTo("310")); } diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java new file mode 100644 index 0000000000000..eca23cb13cd3d --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/IngestGeoIpMetadataTests.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationMetadata; +import org.elasticsearch.test.AbstractChunkedSerializingTestCase; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +public class IngestGeoIpMetadataTests extends AbstractChunkedSerializingTestCase { + @Override + protected IngestGeoIpMetadata doParseInstance(XContentParser parser) throws IOException { + return IngestGeoIpMetadata.fromXContent(parser); + } + + @Override + protected Writeable.Reader instanceReader() { + return IngestGeoIpMetadata::new; + } + + @Override + protected IngestGeoIpMetadata createTestInstance() { + return randomIngestGeoIpMetadata(); + } + + @Override + protected IngestGeoIpMetadata mutateInstance(IngestGeoIpMetadata instance) throws IOException { + Map databases = new HashMap<>(instance.getDatabases()); + switch (between(0, 2)) { + case 0 -> { + String databaseId = randomValueOtherThanMany(databases::containsKey, ESTestCase::randomIdentifier); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + return new IngestGeoIpMetadata(databases); + } + case 1 -> { + if (databases.size() > 0) { + String randomDatabaseId = databases.keySet().iterator().next(); + databases.put(randomDatabaseId, randomDatabaseConfigurationMetadata(randomDatabaseId)); + } else { + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + } + return new IngestGeoIpMetadata(databases); + } + case 2 -> { + if (databases.size() > 0) { + String randomDatabaseId = databases.keySet().iterator().next(); + databases.remove(randomDatabaseId); + } else { + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + } + return new IngestGeoIpMetadata(databases); + } + default -> throw new AssertionError("failure, got illegal switch case"); + } + } + + private IngestGeoIpMetadata randomIngestGeoIpMetadata() { + Map databases = new HashMap<>(); + for (int i = 0; i < randomIntBetween(0, 20); i++) { + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId)); + } + return new IngestGeoIpMetadata(databases); + } + + private DatabaseConfigurationMetadata randomDatabaseConfigurationMetadata(String id) { + return new DatabaseConfigurationMetadata( + randomDatabaseConfiguration(id), + randomNonNegativeLong(), + randomPositiveTimeValue().millis() + ); + } + + private DatabaseConfiguration randomDatabaseConfiguration(String id) { + return new DatabaseConfiguration(id, randomAlphaOfLength(10), new DatabaseConfiguration.Maxmind(randomAlphaOfLength(10))); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java new file mode 100644 index 0000000000000..f035416d48068 --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationMetadataTests.java @@ -0,0 +1,74 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.test.ESTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; + +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.MAXMIND_NAMES; +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfigurationTests.randomDatabaseConfiguration; + +public class DatabaseConfigurationMetadataTests extends AbstractXContentSerializingTestCase { + + private String id; + + @Override + protected DatabaseConfigurationMetadata doParseInstance(XContentParser parser) throws IOException { + return DatabaseConfigurationMetadata.parse(parser, id); + } + + @Override + protected DatabaseConfigurationMetadata createTestInstance() { + id = randomAlphaOfLength(5); + return randomDatabaseConfigurationMetadata(id); + } + + public static DatabaseConfigurationMetadata randomDatabaseConfigurationMetadata(String id) { + return new DatabaseConfigurationMetadata( + new DatabaseConfiguration(id, randomFrom(MAXMIND_NAMES), new DatabaseConfiguration.Maxmind(randomAlphaOfLength(5))), + randomNonNegativeLong(), + randomPositiveTimeValue().millis() + ); + } + + @Override + protected DatabaseConfigurationMetadata mutateInstance(DatabaseConfigurationMetadata instance) { + switch (between(0, 2)) { + case 0: + return new DatabaseConfigurationMetadata( + randomValueOtherThan(instance.database(), () -> randomDatabaseConfiguration(randomAlphaOfLength(5))), + instance.version(), + instance.modifiedDate() + ); + case 1: + return new DatabaseConfigurationMetadata( + instance.database(), + randomValueOtherThan(instance.version(), ESTestCase::randomNonNegativeLong), + instance.modifiedDate() + ); + case 2: + return new DatabaseConfigurationMetadata( + instance.database(), + instance.version(), + randomValueOtherThan(instance.modifiedDate(), () -> ESTestCase.randomPositiveTimeValue().millis()) + ); + default: + throw new AssertionError("failure, got illegal switch case"); + } + } + + @Override + protected Writeable.Reader instanceReader() { + return DatabaseConfigurationMetadata::new; + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java new file mode 100644 index 0000000000000..02c067561b49c --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/DatabaseConfigurationTests.java @@ -0,0 +1,86 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.common.io.stream.Writeable; +import org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.Maxmind; +import org.elasticsearch.test.AbstractXContentSerializingTestCase; +import org.elasticsearch.xcontent.XContentParser; + +import java.io.IOException; +import java.util.Set; + +import static org.elasticsearch.ingest.geoip.direct.DatabaseConfiguration.MAXMIND_NAMES; + +public class DatabaseConfigurationTests extends AbstractXContentSerializingTestCase { + + private String id; + + @Override + protected DatabaseConfiguration doParseInstance(XContentParser parser) throws IOException { + return DatabaseConfiguration.parse(parser, id); + } + + @Override + protected DatabaseConfiguration createTestInstance() { + id = randomAlphaOfLength(5); + return randomDatabaseConfiguration(id); + } + + public static DatabaseConfiguration randomDatabaseConfiguration(String id) { + return new DatabaseConfiguration(id, randomFrom(MAXMIND_NAMES), new Maxmind(randomAlphaOfLength(5))); + } + + @Override + protected DatabaseConfiguration mutateInstance(DatabaseConfiguration instance) { + switch (between(0, 2)) { + case 0: + return new DatabaseConfiguration(instance.id() + randomAlphaOfLength(2), instance.name(), instance.maxmind()); + case 1: + return new DatabaseConfiguration( + instance.id(), + randomValueOtherThan(instance.name(), () -> randomFrom(MAXMIND_NAMES)), + instance.maxmind() + ); + case 2: + return new DatabaseConfiguration( + instance.id(), + instance.name(), + new Maxmind(instance.maxmind().accountId() + randomAlphaOfLength(2)) + ); + default: + throw new AssertionError("failure, got illegal switch case"); + } + } + + @Override + protected Writeable.Reader instanceReader() { + return DatabaseConfiguration::new; + } + + public void testValidateId() { + Set invalidIds = Set.of("-foo", "_foo", "foo,bar", "foo bar", "foo*bar", "foo.bar"); + for (String id : invalidIds) { + expectThrows(IllegalArgumentException.class, "expected exception for " + id, () -> DatabaseConfiguration.validateId(id)); + } + Set validIds = Set.of("f-oo", "f_oo", "foobar"); + for (String id : validIds) { + DatabaseConfiguration.validateId(id); + } + // Note: the code checks for byte length, but randomAlphoOfLength is only using characters in the ascii subset + String longId = randomAlphaOfLength(128); + expectThrows(IllegalArgumentException.class, "expected exception for " + longId, () -> DatabaseConfiguration.validateId(longId)); + String longestAllowedId = randomAlphaOfLength(127); + DatabaseConfiguration.validateId(longestAllowedId); + String shortId = randomAlphaOfLengthBetween(1, 127); + DatabaseConfiguration.validateId(shortId); + expectThrows(IllegalArgumentException.class, "expected exception for empty string", () -> DatabaseConfiguration.validateId("")); + expectThrows(IllegalArgumentException.class, "expected exception for null string", () -> DatabaseConfiguration.validateId(null)); + } +} diff --git a/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationActionTests.java b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationActionTests.java new file mode 100644 index 0000000000000..710c3ee23916d --- /dev/null +++ b/modules/ingest-geoip/src/test/java/org/elasticsearch/ingest/geoip/direct/TransportPutDatabaseConfigurationActionTests.java @@ -0,0 +1,69 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.ingest.geoip.direct; + +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.metadata.Metadata; +import org.elasticsearch.ingest.geoip.IngestGeoIpMetadata; +import org.elasticsearch.test.ESTestCase; + +import java.util.HashMap; +import java.util.Map; + +public class TransportPutDatabaseConfigurationActionTests extends ESTestCase { + + public void testValidatePrerequisites() { + // Test that we reject two configurations with the same database name but different ids: + String name = randomAlphaOfLengthBetween(1, 50); + IngestGeoIpMetadata ingestGeoIpMetadata = randomIngestGeoIpMetadata(name); + ClusterState state = ClusterState.builder(ClusterState.EMPTY_STATE) + .metadata(Metadata.builder(Metadata.EMPTY_METADATA).putCustom(IngestGeoIpMetadata.TYPE, ingestGeoIpMetadata)) + .build(); + DatabaseConfiguration databaseConfiguration = randomDatabaseConfiguration(randomIdentifier(), name); + expectThrows( + IllegalArgumentException.class, + () -> TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfiguration, state) + ); + + // Test that we do not reject two configurations with different database names: + String differentName = randomValueOtherThan(name, () -> randomAlphaOfLengthBetween(1, 50)); + DatabaseConfiguration databaseConfigurationForDifferentName = randomDatabaseConfiguration(randomIdentifier(), differentName); + TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfigurationForDifferentName, state); + + // Test that we do not reject a configuration if none already exists: + TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfiguration, ClusterState.EMPTY_STATE); + + // Test that we do not reject a configuration if one with the same database name AND id already exists: + DatabaseConfiguration databaseConfigurationSameNameSameId = ingestGeoIpMetadata.getDatabases() + .values() + .iterator() + .next() + .database(); + TransportPutDatabaseConfigurationAction.validatePrerequisites(databaseConfigurationSameNameSameId, state); + } + + private IngestGeoIpMetadata randomIngestGeoIpMetadata(String name) { + Map databases = new HashMap<>(); + String databaseId = randomIdentifier(); + databases.put(databaseId, randomDatabaseConfigurationMetadata(databaseId, name)); + return new IngestGeoIpMetadata(databases); + } + + private DatabaseConfigurationMetadata randomDatabaseConfigurationMetadata(String id, String name) { + return new DatabaseConfigurationMetadata( + randomDatabaseConfiguration(id, name), + randomNonNegativeLong(), + randomPositiveTimeValue().millis() + ); + } + + private DatabaseConfiguration randomDatabaseConfiguration(String id, String name) { + return new DatabaseConfiguration(id, name, new DatabaseConfiguration.Maxmind(randomAlphaOfLength(10))); + } +} diff --git a/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java b/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java index 58a6e3771b30d..0f0a0c998bd75 100644 --- a/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java +++ b/modules/ingest-geoip/src/yamlRestTest/java/org/elasticsearch/ingest/geoip/IngestGeoIpClientYamlTestSuiteIT.java @@ -46,7 +46,12 @@ public class IngestGeoIpClientYamlTestSuiteIT extends ESClientYamlSuiteTestCase .module("reindex") .module("ingest-geoip") .systemProperty("ingest.geoip.downloader.enabled.default", "true") + // sets the plain (geoip.elastic.co) downloader endpoint, which is used in these tests .setting("ingest.geoip.downloader.endpoint", () -> fixture.getAddress(), s -> useFixture) + // also sets the enterprise downloader maxmind endpoint, to make sure we do not accidentally hit the real endpoint from tests + // note: it's not important that the downloading actually work at this point -- the rest tests (so far) don't exercise + // the downloading code because of license reasons -- but if they did, then it would be important that we're hitting a fixture + .systemProperty("ingest.geoip.downloader.maxmind.endpoint.default", () -> fixture.getAddress(), s -> useFixture) .build(); @ClassRule diff --git a/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml b/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml new file mode 100644 index 0000000000000..6809443fdfbc3 --- /dev/null +++ b/modules/ingest-geoip/src/yamlRestTest/resources/rest-api-spec/test/ingest_geoip/40_geoip_databases.yml @@ -0,0 +1,72 @@ +setup: + - requires: + cluster_features: ["geoip.downloader.database.configuration"] + reason: "geoip downloader database configuration APIs added in 8.15" + +--- +"Test adding, getting, and removing geoip databases": + - do: + ingest.put_geoip_database: + id: "my_database_1" + body: > + { + "name": "GeoIP2-City", + "maxmind": { + "account_id": "1234" + } + } + - match: { acknowledged: true } + + - do: + ingest.put_geoip_database: + id: "my_database_1" + body: > + { + "name": "GeoIP2-Country", + "maxmind": { + "account_id": "4321" + } + } + - match: { acknowledged: true } + + - do: + ingest.put_geoip_database: + id: "my_database_2" + body: > + { + "name": "GeoIP2-City", + "maxmind": { + "account_id": "1234" + } + } + - match: { acknowledged: true } + + - do: + ingest.get_geoip_database: + id: "my_database_1" + - length: { databases: 1 } + - match: { databases.0.id: "my_database_1" } + - gte: { databases.0.modified_date_millis: 0 } + - match: { databases.0.database.name: "GeoIP2-Country" } + - match: { databases.0.database.maxmind.account_id: "4321" } + + - do: + ingest.get_geoip_database: {} + - length: { databases: 2 } + + - do: + ingest.get_geoip_database: + id: "my_database_1,my_database_2" + - length: { databases: 2 } + + - do: + ingest.delete_geoip_database: + id: "my_database_1" + + - do: + ingest.get_geoip_database: {} + - length: { databases: 1 } + - match: { databases.0.id: "my_database_2" } + - gte: { databases.0.modified_date_millis: 0 } + - match: { databases.0.database.name: "GeoIP2-City" } + - match: { databases.0.database.maxmind.account_id: "1234" } diff --git a/modules/lang-mustache/build.gradle b/modules/lang-mustache/build.gradle index c36275699e21f..7059165af2d9f 100644 --- a/modules/lang-mustache/build.gradle +++ b/modules/lang-mustache/build.gradle @@ -29,4 +29,5 @@ restResources { tasks.named("yamlRestTestV7CompatTransform").configure {task -> task.addAllowedWarningRegex("\\[types removal\\].*") task.replaceValueInMatch("responses.1.error.root_cause.0.type", "x_content_e_o_f_exception", "Multi-search template with errors") + task.replaceValueInMatch("responses.1.error.root_cause.0.reason", "/\\[1:22\\].Unexpected.end.of.file/", "Multi-search template with errors") } diff --git a/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/50_multi_search_template.yml b/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/50_multi_search_template.yml index 109bc8888889f..de9b3a0ec9bc2 100644 --- a/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/50_multi_search_template.yml +++ b/modules/lang-mustache/src/yamlRestTest/resources/rest-api-spec/test/lang_mustache/50_multi_search_template.yml @@ -114,14 +114,14 @@ setup: - match: { responses.0.hits.total: 2 } - match: { responses.1.error.root_cause.0.type: x_content_e_o_f_exception } - - match: { responses.1.error.root_cause.0.reason: "/Unexpected.end.of.input/" } + - match: { responses.1.error.root_cause.0.reason: "/\\[1:22\\].Unexpected.end.of.file/" } - match: { responses.2.hits.total: 1 } - match: { responses.3.error.root_cause.0.type: parsing_exception } - match: { responses.3.error.root_cause.0.reason: "/unknown.query.\\[unknown\\]/" } - match: { responses.4.error.root_cause.0.type: illegal_argument_exception } - match: { responses.4.error.root_cause.0.reason: "[rest_total_hits_as_int] cannot be used if the tracking of total hits is not accurate, got 1" } - match: { responses.0.status: 200 } - - match: { responses.1.status: 500 } + - match: { responses.1.status: 400 } - match: { responses.2.status: 200 } - match: { responses.3.status: 400 } diff --git a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/146_dense_vector_bit_basic.yml b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/146_dense_vector_bit_basic.yml index 3eb686bda2174..4c195a0e32623 100644 --- a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/146_dense_vector_bit_basic.yml +++ b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/146_dense_vector_bit_basic.yml @@ -8,6 +8,8 @@ setup: indices.create: index: test-index body: + settings: + number_of_shards: 1 mappings: properties: vector: @@ -107,7 +109,6 @@ setup: headers: Content-Type: application/json search: - rest_total_hits_as_int: true body: query: script_score: @@ -138,7 +139,6 @@ setup: headers: Content-Type: application/json search: - rest_total_hits_as_int: true body: query: script_score: @@ -152,7 +152,6 @@ setup: headers: Content-Type: application/json search: - rest_total_hits_as_int: true body: query: script_score: @@ -167,7 +166,6 @@ setup: headers: Content-Type: application/json search: - rest_total_hits_as_int: true body: query: script_score: diff --git a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/65_runtime_doc_values.yml b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/65_runtime_doc_values.yml index 148b8e55e1a4a..b5190a579f62d 100644 --- a/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/65_runtime_doc_values.yml +++ b/modules/lang-painless/src/yamlRestTest/resources/rest-api-spec/test/painless/65_runtime_doc_values.yml @@ -12,7 +12,7 @@ setup: script: source: | for (date in field('date')) { - emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); } total_value_double: type: double @@ -55,7 +55,7 @@ setup: source: | if (doc.containsKey('date')) { for (date in doc['date']) { - emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); } } doc_total_value_double: @@ -737,7 +737,7 @@ setup: script: source: | for (date in field('date')) { - emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); } sort: [ { rank: asc } ] script_fields: @@ -758,7 +758,7 @@ setup: script: source: | for (date in field('date')) { - emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); } sort: [ { rank: asc } ] script_fields: @@ -924,7 +924,7 @@ setup: source: | if (doc.containsKey('date')) { for (date in doc['date']) { - emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); } } sort: [ { rank: asc } ] @@ -947,7 +947,7 @@ setup: source: | if (doc.containsKey('date')) { for (date in doc['date']) { - emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); } } sort: [ { rank: asc } ] @@ -1133,7 +1133,7 @@ setup: script: source: | for (date in field('date')) { - emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); } sort: [ { rank: asc } ] script_fields: @@ -1156,7 +1156,7 @@ setup: script: source: | for (date in field('date')) { - emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); } sort: [ { rank: asc } ] script_fields: @@ -1337,7 +1337,7 @@ setup: source: | if (doc.containsKey('date')) { for (date in doc['date']) { - emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); } } sort: [ { rank: asc } ] @@ -1362,7 +1362,7 @@ setup: source: | if (doc.containsKey('date')) { for (date in doc['date']) { - emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(date.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); } } sort: [ { rank: asc } ] diff --git a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java index 899cc42fea1e0..b3cd3586fca54 100644 --- a/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/elasticsearch/index/mapper/extras/MatchOnlyTextFieldMapper.java @@ -447,7 +447,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { "field [" + fullPath() + "] of type [" + typeName() + "] doesn't support synthetic source because it declares copy_to" ); } - return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), leafName(), null) { + return new StringStoredFieldFieldLoader(fieldType().storedFieldNameForSyntheticSource(), leafName()) { @Override protected void write(XContentBuilder b, Object value) throws IOException { b.value((String) value); diff --git a/modules/repository-url/build.gradle b/modules/repository-url/build.gradle index 3537d430e212b..3fe2f9d9bae42 100644 --- a/modules/repository-url/build.gradle +++ b/modules/repository-url/build.gradle @@ -33,6 +33,11 @@ dependencies { internalClusterTestImplementation project(':test:fixtures:url-fixture') } +tasks.named("yamlRestTestV7CompatTransform").configure { task -> + task.skipTest("repository_url/10_basic/Restore with repository-url using file://", "Error message has changed") + task.skipTest("repository_url/10_basic/Restore with repository-url using http://", "Error message has changed") +} + tasks.named("thirdPartyAudit").configure { ignoreMissingClasses( 'javax.servlet.ServletContextEvent', diff --git a/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/10_keyword.yml b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/10_keyword.yml index 7bd7b6c7779e2..8728d4ac413b7 100644 --- a/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/10_keyword.yml +++ b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/10_keyword.yml @@ -12,14 +12,14 @@ setup: day_of_week: type: keyword script: | - emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); # Test fetching from _source day_of_week_from_source: type: keyword script: | Instant instant = Instant.ofEpochMilli(params._source.timestamp); ZonedDateTime dt = ZonedDateTime.ofInstant(instant, ZoneId.of("UTC")); - emit(dt.dayOfWeek.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(dt.dayOfWeek.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); # Test fetching many values day_of_week_letters: type: keyword @@ -75,7 +75,7 @@ setup: - match: {sensor.mappings.runtime.day_of_week.type: keyword } - match: sensor.mappings.runtime.day_of_week.script.source: | - emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); - match: {sensor.mappings.runtime.day_of_week.script.lang: painless } # --- TODO get field mappings needs to be adapted @@ -90,7 +90,7 @@ setup: # type: keyword # script: # source: | -# emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); +# emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); # lang: painless # meta: {} # @@ -218,7 +218,7 @@ setup: day_of_week: type: keyword script: | - emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.SHORT, Locale.ROOT)); + emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.SHORT, Locale.ENGLISH)); - do: search: index: sensor diff --git a/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/13_keyword_calculated_at_index.yml b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/13_keyword_calculated_at_index.yml index 1c10a017a5c33..c27ddab72bff4 100644 --- a/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/13_keyword_calculated_at_index.yml +++ b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/13_keyword_calculated_at_index.yml @@ -21,14 +21,14 @@ setup: day_of_week: type: keyword script: | - emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); # Test fetching from _source day_of_week_from_source: type: keyword script: | Instant instant = Instant.ofEpochMilli(params._source.timestamp); ZonedDateTime dt = ZonedDateTime.ofInstant(instant, ZoneId.of("UTC")); - emit(dt.dayOfWeek.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(dt.dayOfWeek.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); # Test fetching many values day_of_week_letters: type: keyword @@ -74,7 +74,7 @@ setup: - match: {sensor.mappings.properties.day_of_week.type: keyword } - match: sensor.mappings.properties.day_of_week.script.source: | - emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); - match: {sensor.mappings.properties.day_of_week.script.lang: painless } --- diff --git a/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/40_runtime_mappings.yml b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/40_runtime_mappings.yml index 0e7d0b78bba47..b6acc7a18345a 100644 --- a/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/40_runtime_mappings.yml +++ b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/40_runtime_mappings.yml @@ -34,7 +34,7 @@ setup: day_of_week: type: keyword script: - source: "emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))" + source: "emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" - match: {indices: ["test-1"]} - length: {fields.timestamp: 1} @@ -78,7 +78,7 @@ setup: day_of_week: type: keyword script: - source: "emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))" + source: "emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" - match: {indices: ["test-1", "test-2"]} - length: {fields.day_of_week: 1} diff --git a/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/80_multiple_indices.yml b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/80_multiple_indices.yml index 0c571975098b2..dc52350a25a75 100644 --- a/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/80_multiple_indices.yml +++ b/modules/runtime-fields-common/src/yamlRestTest/resources/rest-api-spec/test/runtime_fields/80_multiple_indices.yml @@ -12,7 +12,7 @@ setup: day_of_week: type: keyword script: | - emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT)); + emit(doc['timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH)); tomorrow: type: date script: diff --git a/muted-tests.yml b/muted-tests.yml index d8eba8ad2dba6..040e25add3ebf 100644 --- a/muted-tests.yml +++ b/muted-tests.yml @@ -1,31 +1,13 @@ tests: -- class: "org.elasticsearch.xpack.textstructure.structurefinder.TimestampFormatFinderTests" - issue: "https://github.com/elastic/elasticsearch/issues/108855" - method: "testGuessIsDayFirstFromLocale" -- class: "org.elasticsearch.test.rest.ClientYamlTestSuiteIT" - issue: "https://github.com/elastic/elasticsearch/issues/108857" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" - class: "org.elasticsearch.upgrades.SearchStatesIT" issue: "https://github.com/elastic/elasticsearch/issues/108991" method: "testCanMatch" - class: "org.elasticsearch.upgrades.MlTrainedModelsUpgradeIT" issue: "https://github.com/elastic/elasticsearch/issues/108993" method: "testTrainedModelInference" -- class: "org.elasticsearch.xpack.security.CoreWithSecurityClientYamlTestSuiteIT" - issue: "https://github.com/elastic/elasticsearch/issues/109188" - method: "test {yaml=search/180_locale_dependent_mapping/Test Index and Search locale dependent mappings / dates}" -- class: "org.elasticsearch.xpack.esql.qa.mixed.EsqlClientYamlIT" - issue: "https://github.com/elastic/elasticsearch/issues/109189" - method: "test {p0=esql/70_locale/Date format with Italian locale}" -- class: "org.elasticsearch.xpack.test.rest.XPackRestIT" - issue: "https://github.com/elastic/elasticsearch/issues/109200" - method: "test {p0=esql/70_locale/Date format with Italian locale}" - class: org.elasticsearch.smoketest.DocsClientYamlTestSuiteIT method: test {yaml=reference/esql/esql-async-query-api/line_17} issue: https://github.com/elastic/elasticsearch/issues/109260 -- class: "org.elasticsearch.index.engine.frozen.FrozenIndexIT" - issue: "https://github.com/elastic/elasticsearch/issues/109315" - method: "testTimestampFieldTypeExposedByAllIndicesServices" - class: "org.elasticsearch.analysis.common.CommonAnalysisClientYamlTestSuiteIT" issue: "https://github.com/elastic/elasticsearch/issues/109318" method: "test {yaml=analysis-common/50_char_filters/pattern_replace error handling (too complex pattern)}" @@ -44,12 +26,6 @@ tests: - class: "org.elasticsearch.xpack.test.rest.XPackRestIT" issue: "https://github.com/elastic/elasticsearch/issues/109687" method: "test {p0=sql/translate/Translate SQL}" -- class: org.elasticsearch.action.search.SearchProgressActionListenerIT - method: testSearchProgressWithHits - issue: https://github.com/elastic/elasticsearch/issues/109830 -- class: "org.elasticsearch.xpack.security.ScrollHelperIntegTests" - issue: "https://github.com/elastic/elasticsearch/issues/109905" - method: "testFetchAllEntities" - class: "org.elasticsearch.xpack.esql.action.AsyncEsqlQueryActionIT" issue: "https://github.com/elastic/elasticsearch/issues/109944" method: "testBasicAsyncExecution" @@ -67,57 +43,63 @@ tests: issue: https://github.com/elastic/elasticsearch/issues/110211 - class: "org.elasticsearch.rest.RestControllerIT" issue: "https://github.com/elastic/elasticsearch/issues/110225" -- class: "org.elasticsearch.xpack.security.authz.store.NativePrivilegeStoreCacheTests" - issue: "https://github.com/elastic/elasticsearch/issues/110227" - method: "testGetPrivilegesUsesCache" - class: org.elasticsearch.upgrades.SecurityIndexRolesMetadataMigrationIT method: testMetadataMigratedAfterUpgrade issue: https://github.com/elastic/elasticsearch/issues/110232 - class: org.elasticsearch.compute.lucene.ValueSourceReaderTypeConversionTests method: testLoadAll issue: https://github.com/elastic/elasticsearch/issues/110244 -- class: org.elasticsearch.painless.LangPainlessClientYamlTestSuiteIT - method: test {yaml=painless/146_dense_vector_bit_basic/Cosine Similarity is not supported} - issue: https://github.com/elastic/elasticsearch/issues/110290 -- class: org.elasticsearch.painless.LangPainlessClientYamlTestSuiteIT - method: test {yaml=painless/146_dense_vector_bit_basic/Dot Product is not supported} - issue: https://github.com/elastic/elasticsearch/issues/110291 -- class: org.elasticsearch.action.search.SearchProgressActionListenerIT - method: testSearchProgressWithQuery - issue: https://github.com/elastic/elasticsearch/issues/109867 - class: org.elasticsearch.backwards.SearchWithMinCompatibleSearchNodeIT method: testMinVersionAsNewVersion issue: https://github.com/elastic/elasticsearch/issues/95384 - class: org.elasticsearch.backwards.SearchWithMinCompatibleSearchNodeIT method: testCcsMinimizeRoundtripsIsFalse issue: https://github.com/elastic/elasticsearch/issues/101974 -- class: org.elasticsearch.backwards.SearchWithMinCompatibleSearchNodeIT - method: testMinVersionAsOldVersion - issue: https://github.com/elastic/elasticsearch/issues/109454 -- class: org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests - method: testReplaceChildren {class org.elasticsearch.xpack.esql.expression.function.aggregate.ToPartial} - issue: https://github.com/elastic/elasticsearch/issues/110310 -- class: org.elasticsearch.xpack.esql.tree.EsqlNodeSubclassTests - method: testInfoParameters {class org.elasticsearch.xpack.esql.expression.function.aggregate.ToPartial} - issue: https://github.com/elastic/elasticsearch/issues/110310 -- class: org.elasticsearch.search.vectors.ExactKnnQueryBuilderTests - method: testToQuery - issue: https://github.com/elastic/elasticsearch/issues/110357 -- class: org.elasticsearch.search.aggregations.bucket.terms.RareTermsIT - method: testSingleValuedString - issue: https://github.com/elastic/elasticsearch/issues/110388 - class: "org.elasticsearch.xpack.searchablesnapshots.FrozenSearchableSnapshotsIntegTests" issue: "https://github.com/elastic/elasticsearch/issues/110408" method: "testCreateAndRestorePartialSearchableSnapshot" -- class: "org.elasticsearch.xpack.security.role.RoleWithDescriptionRestIT" - issue: "https://github.com/elastic/elasticsearch/issues/110416" - method: "testCreateOrUpdateRoleWithDescription" -- class: "org.elasticsearch.xpack.security.role.RoleWithDescriptionRestIT" - issue: "https://github.com/elastic/elasticsearch/issues/110417" - method: "testCreateOrUpdateRoleWithDescription" -- class: org.elasticsearch.test.rest.yaml.CcsCommonYamlTestSuiteIT - method: test {p0=search.vectors/41_knn_search_half_byte_quantized/Test create, merge, and search cosine} - issue: https://github.com/elastic/elasticsearch/issues/109978 +- class: "org.elasticsearch.xpack.esql.qa.mixed.MixedClusterEsqlSpecIT" + issue: "https://github.com/elastic/elasticsearch/issues/110591" +- class: org.elasticsearch.packaging.test.DockerTests + method: test021InstallPlugin + issue: https://github.com/elastic/elasticsearch/issues/110343 +- class: org.elasticsearch.multi_node.GlobalCheckpointSyncActionIT + issue: https://github.com/elastic/elasticsearch/issues/111124 +- class: org.elasticsearch.packaging.test.DockerTests + method: test600Interrupt + issue: https://github.com/elastic/elasticsearch/issues/111324 +- class: org.elasticsearch.xpack.transform.integration.TransformIT + method: testStopWaitForCheckpoint + issue: https://github.com/elastic/elasticsearch/issues/106113 +- class: org.elasticsearch.index.mapper.IgnoredSourceFieldMapperTests + method: testStoredNestedSubObjectWithNameOverlappingParentName + issue: https://github.com/elastic/elasticsearch/issues/112083 +- class: org.elasticsearch.xpack.security.authc.kerberos.SimpleKdcLdapServerTests + method: testClientServiceMutualAuthentication + issue: https://github.com/elastic/elasticsearch/issues/112529 +- class: org.elasticsearch.upgrades.FullClusterRestartIT + method: testSnapshotRestore {cluster=OLD} + issue: https://github.com/elastic/elasticsearch/issues/111777 +- class: org.elasticsearch.xpack.restart.CoreFullClusterRestartIT + method: testSnapshotRestore {cluster=OLD} + issue: https://github.com/elastic/elasticsearch/issues/111774 +- class: org.elasticsearch.upgrades.FullClusterRestartIT + method: testSnapshotRestore {cluster=UPGRADED} + issue: https://github.com/elastic/elasticsearch/issues/111798 +- class: org.elasticsearch.xpack.restart.CoreFullClusterRestartIT + method: testSnapshotRestore {cluster=UPGRADED} + issue: https://github.com/elastic/elasticsearch/issues/111799 +- class: org.elasticsearch.xpack.security.authc.kerberos.KerberosTicketValidatorTests + method: testValidKebrerosTicket + issue: https://github.com/elastic/elasticsearch/issues/112632 +- class: org.elasticsearch.xpack.security.authc.kerberos.KerberosTicketValidatorTests + method: testKerbTicketGeneratedForDifferentServerFailsValidation + issue: https://github.com/elastic/elasticsearch/issues/112639 +- class: org.elasticsearch.xpack.security.authc.kerberos.KerberosTicketValidatorTests + method: testWhenKeyTabWithInvalidContentFailsValidation + issue: https://github.com/elastic/elasticsearch/issues/112631 +- class: org.elasticsearch.xpack.inference.TextEmbeddingCrudIT + issue: https://github.com/elastic/elasticsearch/issues/113915 # Examples: # diff --git a/plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/EC2RetriesTests.java b/plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/EC2RetriesTests.java index 9fcf7e51ba30a..a3fa71054d335 100644 --- a/plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/EC2RetriesTests.java +++ b/plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/EC2RetriesTests.java @@ -100,6 +100,7 @@ public void testEC2DiscoveryRetriesOnRateLimiting() throws IOException { exchange.getResponseHeaders().set("Content-Type", "text/xml; charset=UTF-8"); exchange.sendResponseHeaders(HttpStatus.SC_OK, responseBody.length); exchange.getResponseBody().write(responseBody); + exchange.getResponseBody().flush(); return; } } diff --git a/plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/Ec2DiscoveryTests.java b/plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/Ec2DiscoveryTests.java index a9ade11d6a7e3..da0da87e6e2bd 100644 --- a/plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/Ec2DiscoveryTests.java +++ b/plugins/discovery-ec2/src/test/java/org/elasticsearch/discovery/ec2/Ec2DiscoveryTests.java @@ -58,7 +58,7 @@ public class Ec2DiscoveryTests extends AbstractEC2MockAPITestCase { private static final String PREFIX_PUBLIC_IP = "8.8.8."; private static final String PREFIX_PRIVATE_IP = "10.0.0."; - private Map poorMansDNS = new ConcurrentHashMap<>(); + private final Map poorMansDNS = new ConcurrentHashMap<>(); protected MockTransportService createTransportService() { final Transport transport = new Netty4Transport( @@ -132,7 +132,7 @@ protected List buildDynamicHosts(Settings nodeSettings, int no .stream() .filter(t -> t.getKey().equals(entry.getKey())) .map(Tag::getValue) - .collect(Collectors.toList()) + .toList() .containsAll(entry.getValue()) ) ) @@ -143,6 +143,7 @@ protected List buildDynamicHosts(Settings nodeSettings, int no exchange.getResponseHeaders().set("Content-Type", "text/xml; charset=UTF-8"); exchange.sendResponseHeaders(HttpStatus.SC_OK, responseBody.length); exchange.getResponseBody().write(responseBody); + exchange.getResponseBody().flush(); return; } } @@ -159,14 +160,14 @@ protected List buildDynamicHosts(Settings nodeSettings, int no } } - public void testDefaultSettings() throws InterruptedException { + public void testDefaultSettings() { int nodes = randomInt(10); Settings nodeSettings = Settings.builder().build(); List discoveryNodes = buildDynamicHosts(nodeSettings, nodes); assertThat(discoveryNodes, hasSize(nodes)); } - public void testPrivateIp() throws InterruptedException { + public void testPrivateIp() { int nodes = randomInt(10); for (int i = 0; i < nodes; i++) { poorMansDNS.put(PREFIX_PRIVATE_IP + (i + 1), buildNewFakeTransportAddress()); @@ -182,7 +183,7 @@ public void testPrivateIp() throws InterruptedException { } } - public void testPublicIp() throws InterruptedException { + public void testPublicIp() { int nodes = randomInt(10); for (int i = 0; i < nodes; i++) { poorMansDNS.put(PREFIX_PUBLIC_IP + (i + 1), buildNewFakeTransportAddress()); @@ -198,7 +199,7 @@ public void testPublicIp() throws InterruptedException { } } - public void testPrivateDns() throws InterruptedException { + public void testPrivateDns() { int nodes = randomInt(10); for (int i = 0; i < nodes; i++) { String instanceId = "node" + (i + 1); @@ -216,7 +217,7 @@ public void testPrivateDns() throws InterruptedException { } } - public void testPublicDns() throws InterruptedException { + public void testPublicDns() { int nodes = randomInt(10); for (int i = 0; i < nodes; i++) { String instanceId = "node" + (i + 1); @@ -234,14 +235,14 @@ public void testPublicDns() throws InterruptedException { } } - public void testInvalidHostType() throws InterruptedException { + public void testInvalidHostType() { Settings nodeSettings = Settings.builder().put(AwsEc2Service.HOST_TYPE_SETTING.getKey(), "does_not_exist").build(); IllegalArgumentException exception = expectThrows(IllegalArgumentException.class, () -> { buildDynamicHosts(nodeSettings, 1); }); assertThat(exception.getMessage(), containsString("does_not_exist is unknown for discovery.ec2.host_type")); } - public void testFilterByTags() throws InterruptedException { + public void testFilterByTags() { int nodes = randomIntBetween(5, 10); Settings nodeSettings = Settings.builder().put(AwsEc2Service.TAG_SETTING.getKey() + "stage", "prod").build(); @@ -264,7 +265,7 @@ public void testFilterByTags() throws InterruptedException { assertThat(dynamicHosts, hasSize(prodInstances)); } - public void testFilterByMultipleTags() throws InterruptedException { + public void testFilterByMultipleTags() { int nodes = randomIntBetween(5, 10); Settings nodeSettings = Settings.builder().putList(AwsEc2Service.TAG_SETTING.getKey() + "stage", "prod", "preprod").build(); diff --git a/plugins/examples/gradle/wrapper/gradle-wrapper.properties b/plugins/examples/gradle/wrapper/gradle-wrapper.properties index 515ab9d5f1822..e955ee28dd349 100644 --- a/plugins/examples/gradle/wrapper/gradle-wrapper.properties +++ b/plugins/examples/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=f8b4f4772d302c8ff580bc40d0f56e715de69b163546944f787c87abf209c961 -distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-all.zip +distributionSha256Sum=fdfca5dbc2834f0ece5020465737538e5ba679deeff5ab6c09621d67f8bb1a15 +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.1-all.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/plugins/examples/script-expert-scoring/src/main/java/org/elasticsearch/example/expertscript/ExpertScriptPlugin.java b/plugins/examples/script-expert-scoring/src/main/java/org/elasticsearch/example/expertscript/ExpertScriptPlugin.java index 894f4ebe4bc54..dc429538fec3b 100644 --- a/plugins/examples/script-expert-scoring/src/main/java/org/elasticsearch/example/expertscript/ExpertScriptPlugin.java +++ b/plugins/examples/script-expert-scoring/src/main/java/org/elasticsearch/example/expertscript/ExpertScriptPlugin.java @@ -35,10 +35,7 @@ public class ExpertScriptPlugin extends Plugin implements ScriptPlugin { @Override - public ScriptEngine getScriptEngine( - Settings settings, - Collection> contexts - ) { + public ScriptEngine getScriptEngine(Settings settings, Collection> contexts) { return new MyExpertScriptEngine(); } @@ -143,6 +140,9 @@ public ScoreScript newInstance(DocReader docReader) public double execute( ExplanationHolder explanation ) { + if(explanation != null) { + explanation.set("An example optional custom description to explain details for this script's execution; we'll provide a default one if you leave this out."); + } return 0.0d; } }; @@ -166,6 +166,9 @@ public void setDocument(int docid) { } @Override public double execute(ExplanationHolder explanation) { + if(explanation != null) { + explanation.set("An example optional custom description to explain details for this script's execution; we'll provide a default one if you leave this out."); + } if (postings.docID() != currentDocid) { /* * advance moved past the current doc, so this diff --git a/plugins/examples/script-expert-scoring/src/yamlRestTest/resources/rest-api-spec/test/script_expert_scoring/20_score.yml b/plugins/examples/script-expert-scoring/src/yamlRestTest/resources/rest-api-spec/test/script_expert_scoring/20_score.yml index 89194d162872d..7436768416e00 100644 --- a/plugins/examples/script-expert-scoring/src/yamlRestTest/resources/rest-api-spec/test/script_expert_scoring/20_score.yml +++ b/plugins/examples/script-expert-scoring/src/yamlRestTest/resources/rest-api-spec/test/script_expert_scoring/20_score.yml @@ -4,26 +4,27 @@ setup: - do: indices.create: - index: test + index: test - do: index: - index: test - id: "1" - body: { "important_field": "foo" } + index: test + id: "1" + body: { "important_field": "foo" } - do: - index: - index: test - id: "2" - body: { "important_field": "foo foo foo" } + index: + index: test + id: "2" + body: { "important_field": "foo foo foo" } - do: - index: - index: test - id: "3" - body: { "important_field": "foo foo" } + index: + index: test + id: "3" + body: { "important_field": "foo foo" } - do: - indices.refresh: {} + indices.refresh: { } + --- "document scoring": - do: @@ -46,6 +47,39 @@ setup: term: "foo" - length: { hits.hits: 3 } - - match: {hits.hits.0._id: "2" } - - match: {hits.hits.1._id: "3" } - - match: {hits.hits.2._id: "1" } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "1" } + +--- +"document scoring with custom explanation": + + - requires: + cluster_features: [ "gte_v8.15.1" ] + reason: "bug fixed where explanations were throwing npe prior to 8.16" + + - do: + search: + rest_total_hits_as_int: true + index: test + body: + explain: true + query: + function_score: + query: + match: + important_field: "foo" + functions: + - script_score: + script: + source: "pure_df" + lang: "expert_scripts" + params: + field: "important_field" + term: "foo" + + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.2._id: "1" } + - match: { hits.hits.0._explanation.details.1.details.0.description: "An example optional custom description to explain details for this script's execution; we'll provide a default one if you leave this out." } diff --git a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java index dac8e051f25f8..8d50a9f7e29a9 100644 --- a/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java +++ b/plugins/mapper-annotated-text/src/main/java/org/elasticsearch/index/mapper/annotatedtext/AnnotatedTextFieldMapper.java @@ -584,7 +584,7 @@ public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { ); } if (fieldType.stored()) { - return new StringStoredFieldFieldLoader(fullPath(), leafName(), null) { + return new StringStoredFieldFieldLoader(fullPath(), leafName()) { @Override protected void write(XContentBuilder b, Object value) throws IOException { b.value((String) value); diff --git a/qa/ccs-common-rest/src/yamlRestTest/resources/rest-api-spec/test/eql/10_basic.yml b/qa/ccs-common-rest/src/yamlRestTest/resources/rest-api-spec/test/eql/10_basic.yml index e35282bb6bfde..05d90d582e7f2 100644 --- a/qa/ccs-common-rest/src/yamlRestTest/resources/rest-api-spec/test/eql/10_basic.yml +++ b/qa/ccs-common-rest/src/yamlRestTest/resources/rest-api-spec/test/eql/10_basic.yml @@ -13,7 +13,7 @@ setup: day_of_week: type: keyword script: - source: "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))" + source: "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" - do: bulk: refresh: true diff --git a/qa/ccs-common-rest/src/yamlRestTest/resources/rest-api-spec/test/eql/20_runtime_mappings.yml b/qa/ccs-common-rest/src/yamlRestTest/resources/rest-api-spec/test/eql/20_runtime_mappings.yml index 58462786f9a2f..1c1a39a7bc1ac 100644 --- a/qa/ccs-common-rest/src/yamlRestTest/resources/rest-api-spec/test/eql/20_runtime_mappings.yml +++ b/qa/ccs-common-rest/src/yamlRestTest/resources/rest-api-spec/test/eql/20_runtime_mappings.yml @@ -9,7 +9,7 @@ setup: day_of_week: type: keyword script: - source: "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ROOT))" + source: "emit(doc['@timestamp'].value.dayOfWeekEnum.getDisplayName(TextStyle.FULL, Locale.ENGLISH))" - do: bulk: refresh: true diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ConfigurationTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ConfigurationTests.java index 1925b1e8f36ab..2ce9eef29d903 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ConfigurationTests.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/ConfigurationTests.java @@ -20,7 +20,6 @@ import static java.nio.file.attribute.PosixFilePermissions.fromString; import static org.elasticsearch.packaging.util.FileUtils.append; -import static org.hamcrest.Matchers.equalTo; import static org.junit.Assume.assumeFalse; public class ConfigurationTests extends PackagingTestCase { @@ -50,13 +49,15 @@ public void test20HostnameSubstitution() throws Exception { // security auto-config requires that the archive owner and the node process user be the same Platforms.onWindows(() -> sh.chown(confPath, installation.getOwner())); assertWhileRunning(() -> { - final String nameResponse = ServerUtils.makeRequest( - Request.Get("https://localhost:9200/_cat/nodes?h=name"), - "test_superuser", - "test_superuser_password", - ServerUtils.getCaCert(confPath) - ).strip(); - assertThat(nameResponse, equalTo("mytesthost")); + assertBusy(() -> { + final String nameResponse = ServerUtils.makeRequest( + Request.Get("https://localhost:9200/_cat/nodes?h=name"), + "test_superuser", + "test_superuser_password", + ServerUtils.getCaCert(confPath) + ).strip(); + assertEquals("mytesthost", nameResponse); + }); }); Platforms.onWindows(() -> sh.chown(confPath)); }); diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java index f9723f30cc371..18668b842b2d3 100644 --- a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/DockerTests.java @@ -1231,8 +1231,7 @@ public void test500Readiness() throws Exception { assertBusy(() -> assertTrue(readinessProbe(9399))); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/99508") - public void test600Interrupt() { + public void test600Interrupt() throws Exception { waitForElasticsearch(installation, "elastic", PASSWORD); final Result containerLogs = getContainerLogs(); @@ -1242,10 +1241,12 @@ public void test600Interrupt() { final int maxPid = infos.stream().map(i -> i.pid()).max(Integer::compareTo).get(); sh.run("bash -c 'kill -int " + maxPid + "'"); // send ctrl+c to all java processes - final Result containerLogsAfter = getContainerLogs(); - assertThat("Container logs should contain stopping ...", containerLogsAfter.stdout(), containsString("stopping ...")); - assertThat("No errors stdout", containerLogsAfter.stdout(), not(containsString("java.security.AccessControlException:"))); - assertThat("No errors stderr", containerLogsAfter.stderr(), not(containsString("java.security.AccessControlException:"))); + assertBusy(() -> { + final Result containerLogsAfter = getContainerLogs(); + assertThat("Container logs should contain stopping ...", containerLogsAfter.stdout(), containsString("stopping ...")); + assertThat("No errors stdout", containerLogsAfter.stdout(), not(containsString("java.security.AccessControlException:"))); + assertThat("No errors stderr", containerLogsAfter.stderr(), not(containsString("java.security.AccessControlException:"))); + }); } } diff --git a/qa/packaging/src/test/java/org/elasticsearch/packaging/test/MemoryLockingTests.java b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/MemoryLockingTests.java new file mode 100644 index 0000000000000..82a17c54b6d69 --- /dev/null +++ b/qa/packaging/src/test/java/org/elasticsearch/packaging/test/MemoryLockingTests.java @@ -0,0 +1,59 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.packaging.test; + +import org.elasticsearch.packaging.util.ServerUtils; +import org.elasticsearch.packaging.util.docker.DockerRun; + +import java.util.Map; + +import static org.elasticsearch.packaging.util.docker.Docker.runContainer; +import static org.elasticsearch.packaging.util.docker.DockerRun.builder; + +public class MemoryLockingTests extends PackagingTestCase { + + public void test10Install() throws Exception { + install(); + } + + public void test20MemoryLockingEnabled() throws Exception { + configureAndRun( + Map.of( + "bootstrap.memory_lock", + "true", + "xpack.security.enabled", + "false", + "xpack.security.http.ssl.enabled", + "false", + "xpack.security.enrollment.enabled", + "false", + "discovery.type", + "single-node" + ) + ); + // TODO: very locking worked. logs? check memory of process? at least we know the process started successfully + stopElasticsearch(); + } + + public void configureAndRun(Map settings) throws Exception { + if (distribution().isDocker()) { + DockerRun builder = builder(); + settings.forEach(builder::envVar); + runContainer(distribution(), builder); + } else { + + for (var setting : settings.entrySet()) { + ServerUtils.addSettingToExistingConfiguration(installation.config, setting.getKey(), setting.getValue()); + } + ServerUtils.removeSettingFromExistingConfiguration(installation.config, "cluster.initial_master_nodes"); + } + + startElasticsearch(); + } +} diff --git a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/20_combine_processors.yml b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/20_combine_processors.yml index 9a7444c4ffc6c..5b14efc7cce6a 100644 --- a/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/20_combine_processors.yml +++ b/qa/smoke-test-ingest-with-all-dependencies/src/yamlRestTest/resources/rest-api-spec/test/ingest/20_combine_processors.yml @@ -1,6 +1,11 @@ +setup: + - requires: + test_runner_features: allowed_warnings_regex --- "Test with date processor": - do: + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23.*' ingest.put_pipeline: id: "_id" body: > @@ -41,6 +46,8 @@ - match: { acknowledged: true } - do: + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23.*' index: index: test id: "1" @@ -70,6 +77,8 @@ --- "Test with date processor and ECS-v1": - do: + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23.*' ingest.put_pipeline: id: "_id" body: > @@ -99,6 +108,8 @@ - match: { acknowledged: true } - do: + allowed_warnings_regex: + - 'Date format \[dd/MMM/yyyy:HH:mm:ss xx] contains textual field specifiers that could change in JDK 23.*' index: index: test id: "1" diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/indices.resolve_index.json b/rest-api-spec/src/main/resources/rest-api-spec/api/indices.resolve_index.json index 4ea78bfd45460..e27e3a0450bff 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/api/indices.resolve_index.json +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/indices.resolve_index.json @@ -37,6 +37,16 @@ ], "default":"open", "description":"Whether wildcard expressions should get expanded to open or closed indices (default: open)" + }, + "ignore_unavailable":{ + "type":"boolean", + "description":"Whether specified concrete indices should be ignored when unavailable (missing or closed)", + "default":false + }, + "allow_no_indices":{ + "type":"boolean", + "description":"Whether to ignore if a wildcard indices expression resolves into no concrete indices. (This includes `_all` string or when no indices have been specified)", + "default":true } } } diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.delete_geoip_database.json b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.delete_geoip_database.json new file mode 100644 index 0000000000000..fe50da720a4da --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.delete_geoip_database.json @@ -0,0 +1,31 @@ +{ + "ingest.delete_geoip_database":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/delete-geoip-database-api.html", + "description":"Deletes a geoip database configuration" + }, + "stability":"stable", + "visibility":"public", + "headers":{ + "accept": [ "application/json"] + }, + "url":{ + "paths":[ + { + "path":"/_ingest/geoip/database/{id}", + "methods":[ + "DELETE" + ], + "parts":{ + "id":{ + "type":"list", + "description":"A comma-separated list of geoip database configurations to delete" + } + } + } + ] + }, + "params":{ + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.get_geoip_database.json b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.get_geoip_database.json new file mode 100644 index 0000000000000..5c59994d4b22e --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.get_geoip_database.json @@ -0,0 +1,37 @@ +{ + "ingest.get_geoip_database":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/get-geoip-database-api.html", + "description":"Returns geoip database configuration." + }, + "stability":"stable", + "visibility":"public", + "headers":{ + "accept": [ "application/json"] + }, + "url":{ + "paths":[ + { + "path":"/_ingest/geoip/database", + "methods":[ + "GET" + ] + }, + { + "path":"/_ingest/geoip/database/{id}", + "methods":[ + "GET" + ], + "parts":{ + "id":{ + "type":"list", + "description":"A comma-separated list of geoip database configurations to get; use `*` to get all geoip database configurations" + } + } + } + ] + }, + "params":{ + } + } +} diff --git a/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.put_geoip_database.json b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.put_geoip_database.json new file mode 100644 index 0000000000000..6d088e3f164f4 --- /dev/null +++ b/rest-api-spec/src/main/resources/rest-api-spec/api/ingest.put_geoip_database.json @@ -0,0 +1,36 @@ +{ + "ingest.put_geoip_database":{ + "documentation":{ + "url":"https://www.elastic.co/guide/en/elasticsearch/reference/master/put-geoip-database-api.html", + "description":"Puts the configuration for a geoip database to be downloaded" + }, + "stability":"stable", + "visibility":"public", + "headers":{ + "accept": [ "application/json"], + "content_type": ["application/json"] + }, + "url":{ + "paths":[ + { + "path":"/_ingest/geoip/database/{id}", + "methods":[ + "PUT" + ], + "parts":{ + "id":{ + "type":"string", + "description":"The id of the database configuration" + } + } + } + ] + }, + "params":{ + }, + "body":{ + "description":"The database configuration definition", + "required":true + } + } +} diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/bulk/10_basic.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/bulk/10_basic.yml index f4f6245603aab..a2dfe3784d5ae 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/bulk/10_basic.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/bulk/10_basic.yml @@ -229,3 +229,23 @@ - match: { items.0.index.error.type: illegal_argument_exception } - match: { items.0.index.error.reason: "no write index is defined for alias [test_index]. The write index may be explicitly disabled using is_write_index=false or the alias points to multiple indices without one being designated as a write index" } +--- +"Took is not orders of magnitude off": + - requires: + cluster_features: ["gte_v8.15.1"] + reason: "Bug reporting wrong took time introduced in 8.15.0, fixed in 8.15.1" + - do: + bulk: + body: + - index: + _index: took_test + - f: 1 + - index: + _index: took_test + - f: 2 + - index: + _index: took_test + - f: 3 + - match: { errors: false } + - gte: { took: 0 } + - lte: { took: 60000 } # Making sure we have a reasonable upper bound and that we're not for example returning nanoseconds diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/10_basic.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/10_basic.yml index cf43797a451e7..06139542c5e55 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/10_basic.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/cluster.stats/10_basic.yml @@ -258,8 +258,8 @@ --- "Dense vector stats": - requires: - cluster_features: [ "gte_v8.15.0" ] - reason: "dense vector stats reports from primary indices in 8.15" + cluster_features: [ "gte_v8.16.0" ] + reason: "dense vector stats reports from primary indices in 8.15 and fixed in 8.16" - do: indices.create: index: test1 @@ -329,9 +329,17 @@ - do: indices.refresh: { } + - do: + index: + index: test2 + id: "3" + refresh: true + body: + not_vector_field: "not vector" + - do: { cluster.stats: { } } - - match: { indices.docs.count: 4 } + - match: { indices.docs.count: 5 } - match: { indices.docs.deleted: 0 } - match: { indices.dense_vector.value_count: 8 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml index 22deb7012c4ed..08a50d2c69302 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/indices.create/20_synthetic_source.yml @@ -1204,3 +1204,54 @@ nested object with stored array: - match: { hits.hits.1._source.nested_array_stored.0.b.1.c: 100 } - match: { hits.hits.1._source.nested_array_stored.1.b.0.c: 20 } - match: { hits.hits.1._source.nested_array_stored.1.b.1.c: 200 } + +--- +# 112156 +stored field under object with store_array_source: + - requires: + cluster_features: ["mapper.source.synthetic_source_stored_fields_advance_fix"] + reason: requires bug fix to be implemented + + - do: + indices.create: + index: test + body: + settings: + index: + sort.field: "name" + sort.order: "asc" + mappings: + _source: + mode: synthetic + properties: + name: + type: keyword + obj: + store_array_source: true + properties: + foo: + type: keyword + store: true + + - do: + bulk: + index: test + refresh: true + body: + - '{ "create": { } }' + - '{ "name": "B", "obj": null }' + - '{ "create": { } }' + - '{ "name": "A", "obj": [ { "foo": "hello_from_the_other_side" } ] }' + + - match: { errors: false } + + - do: + search: + index: test + sort: name + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._source.name: A } + - match: { hits.hits.0._source.obj: [ { "foo": "hello_from_the_other_side" } ] } + - match: { hits.hits.1._source.name: B } + - match: { hits.hits.1._source.obj: null } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml index 4976e5e15adbe..07cb154449a70 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/logsdb/10_settings.yml @@ -5,8 +5,8 @@ setup: capabilities: - method: PUT path: /{index} - capabilities: [logs_index_mode] - reason: "Support for 'logs' index mode capability required" + capabilities: [logsdb_index_mode] + reason: "Support for 'logsdb' index mode capability required" --- create logs index: @@ -15,8 +15,8 @@ create logs index: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -24,7 +24,7 @@ create logs index: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 mappings: @@ -75,7 +75,7 @@ create logs index: index: test - is_true: test - - match: { test.settings.index.mode: "logs" } + - match: { test.settings.index.mode: "logsdb" } - do: indices.get_mapping: @@ -89,8 +89,8 @@ using default timestamp field mapping: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -98,7 +98,7 @@ using default timestamp field mapping: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 mappings: @@ -121,17 +121,16 @@ missing hostname field: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: - catch: bad_request indices.create: index: test-hostname-missing body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 mappings: @@ -147,9 +146,12 @@ missing hostname field: message: type: text - - match: { error.root_cause.0.type: "illegal_argument_exception" } - - match: { error.type: "illegal_argument_exception" } - - match: { error.reason: "unknown index sort field:[host.name]" } + - do: + indices.get_settings: + index: test-hostname-missing + + - is_true: test-hostname-missing + - match: { test-hostname-missing.settings.index.mode: "logsdb" } --- missing sort field: @@ -158,8 +160,8 @@ missing sort field: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: catch: bad_request @@ -168,7 +170,7 @@ missing sort field: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 sort: @@ -199,8 +201,8 @@ non-default sort settings: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -209,7 +211,7 @@ non-default sort settings: settings: index: - mode: logs + mode: logsdb number_of_shards: 2 number_of_replicas: 0 sort: @@ -235,7 +237,7 @@ non-default sort settings: index: test-sort - is_true: test-sort - - match: { test-sort.settings.index.mode: "logs" } + - match: { test-sort.settings.index.mode: "logsdb" } - match: { test-sort.settings.index.sort.field.0: "agent_id" } - match: { test-sort.settings.index.sort.field.1: "@timestamp" } - match: { test-sort.settings.index.sort.order.0: "asc" } @@ -252,8 +254,8 @@ override sort order settings: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -262,7 +264,7 @@ override sort order settings: settings: index: - mode: logs + mode: logsdb number_of_shards: 2 number_of_replicas: 0 sort: @@ -287,7 +289,7 @@ override sort order settings: index: test-sort-order - is_true: test-sort-order - - match: { test-sort-order.settings.index.mode: "logs" } + - match: { test-sort-order.settings.index.mode: "logsdb" } - match: { test-sort-order.settings.index.sort.field.0: null } - match: { test-sort-order.settings.index.sort.field.1: null } - match: { test-sort-order.settings.index.sort.order.0: "asc" } @@ -300,8 +302,8 @@ override sort missing settings: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -310,7 +312,7 @@ override sort missing settings: settings: index: - mode: logs + mode: logsdb number_of_shards: 2 number_of_replicas: 0 sort: @@ -335,7 +337,7 @@ override sort missing settings: index: test-sort-missing - is_true: test-sort-missing - - match: { test-sort-missing.settings.index.mode: "logs" } + - match: { test-sort-missing.settings.index.mode: "logsdb" } - match: { test-sort-missing.settings.index.sort.field.0: null } - match: { test-sort-missing.settings.index.sort.field.1: null } - match: { test-sort-missing.settings.index.sort.missing.0: "_last" } @@ -348,8 +350,8 @@ override sort mode settings: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -358,7 +360,7 @@ override sort mode settings: settings: index: - mode: logs + mode: logsdb number_of_shards: 2 number_of_replicas: 0 sort: @@ -383,7 +385,7 @@ override sort mode settings: index: test-sort-mode - is_true: test-sort-mode - - match: { test-sort-mode.settings.index.mode: "logs" } + - match: { test-sort-mode.settings.index.mode: "logsdb" } - match: { test-sort-mode.settings.index.sort.field.0: null } - match: { test-sort-mode.settings.index.sort.field.1: null } - match: { test-sort-mode.settings.index.sort.mode.0: "max" } @@ -397,8 +399,8 @@ override sort field using nested field type in sorting: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: catch: bad_request @@ -407,7 +409,7 @@ override sort field using nested field type in sorting: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 sort: @@ -444,8 +446,8 @@ override sort field using nested field type: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: indices.create: @@ -453,7 +455,7 @@ override sort field using nested field type: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 mappings: @@ -484,8 +486,8 @@ routing path not allowed in logs mode: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: catch: bad_request @@ -494,7 +496,7 @@ routing path not allowed in logs mode: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 routing_path: [ "host.name", "agent_id" ] @@ -524,8 +526,8 @@ start time not allowed in logs mode: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: catch: bad_request @@ -534,7 +536,7 @@ start time not allowed in logs mode: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 time_series: @@ -565,8 +567,8 @@ end time not allowed in logs mode: capabilities: - method: PUT path: /{index} - capabilities: [ logs_index_mode ] - reason: "Support for 'logs' index mode capability required" + capabilities: [ logsdb_index_mode ] + reason: "Support for 'logsdb' index mode capability required" - do: catch: bad_request @@ -575,7 +577,7 @@ end time not allowed in logs mode: body: settings: index: - mode: logs + mode: logsdb number_of_replicas: 0 number_of_shards: 2 time_series: diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml index 72c6abab22600..d255a644183dc 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/100_knn_nested_search.yml @@ -411,3 +411,53 @@ setup: - match: {hits.total.value: 1} - match: {hits.hits.0._id: "2"} +--- +"nested Knn search with required similarity appropriately filters inner_hits": + - requires: + cluster_features: "gte_v8.15.0" + reason: 'bugfix for 8.15' + + - do: + search: + index: test + body: + query: + nested: + path: nested + inner_hits: + size: 3 + _source: false + fields: + - nested.paragraph_id + query: + knn: + field: nested.vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + num_candidates: 3 + similarity: 10.5 + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "2"} + - length: {hits.hits.0.inner_hits.nested.hits.hits: 1} + - match: {hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0"} + + - do: + search: + index: test + body: + knn: + field: nested.vector + query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + num_candidates: 3 + k: 3 + similarity: 10.5 + inner_hits: + size: 3 + _source: false + fields: + - nested.paragraph_id + + - match: {hits.total.value: 1} + - match: {hits.hits.0._id: "2"} + - length: {hits.hits.0.inner_hits.nested.hits.hits: 1} + - match: {hits.hits.0.inner_hits.nested.hits.hits.0.fields.nested.0.paragraph_id.0: "0"} diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml index 7f0c24e217d14..50f3986fda7bd 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/40_knn_search.yml @@ -28,15 +28,14 @@ setup: type: hnsw m: 16 ef_construction: 200 - - do: index: index: test id: "1" body: name: cow.jpg - vector: [230.0, 300.33, -34.8988, 15.555, -200.0] - another_vector: [130.0, 115.0, -1.02, 15.555, -100.0] + vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] + another_vector: [ 130.0, 115.0, -1.02, 15.555, -100.0 ] - do: index: @@ -44,8 +43,8 @@ setup: id: "2" body: name: moose.jpg - vector: [-0.5, 100.0, -13, 14.8, -156.0] - another_vector: [-0.5, 50.0, -1, 1, 120] + vector: [ -0.5, 100.0, -13, 14.8, -156.0 ] + another_vector: [ -0.5, 50.0, -1, 1, 120 ] - do: index: @@ -53,11 +52,11 @@ setup: id: "3" body: name: rabbit.jpg - vector: [0.5, 111.3, -13.0, 14.8, -156.0] - another_vector: [-0.5, 11.0, 0, 12, 111.0] + vector: [ 0.5, 111.3, -13.0, 14.8, -156.0 ] + another_vector: [ -0.5, 11.0, 0, 12, 111.0 ] - do: - indices.refresh: {} + indices.refresh: { } --- "kNN search only": @@ -71,15 +70,15 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } - - match: {hits.hits.1._id: "3"} - - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } --- "kNN multi-field search only": - requires: @@ -91,14 +90,14 @@ setup: body: fields: [ "name" ] knn: - - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} - - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + - { field: vector, query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ], k: 2, num_candidates: 3 } + - { field: another_vector, query_vector: [ -0.5, 11.0, 0, 12, 111.0 ], k: 2, num_candidates: 3 } - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - - match: {hits.hits.1._id: "2"} - - match: {hits.hits.1.fields.name.0: "moose.jpg"} + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1.fields.name.0: "moose.jpg" } --- "kNN search plus query": - requires: @@ -111,21 +110,21 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 query: term: name: cow.jpg - - match: {hits.hits.0._id: "1"} - - match: {hits.hits.0.fields.name.0: "cow.jpg"} + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0.fields.name.0: "cow.jpg" } - - match: {hits.hits.1._id: "2"} - - match: {hits.hits.1.fields.name.0: "moose.jpg"} + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1.fields.name.0: "moose.jpg" } - - match: {hits.hits.2._id: "3"} - - match: {hits.hits.2.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2.fields.name.0: "rabbit.jpg" } --- "kNN multi-field search with query": - requires: @@ -137,20 +136,20 @@ setup: body: fields: [ "name" ] knn: - - {field: vector, query_vector: [-0.5, 90.0, -10, 14.8, -156.0], k: 2, num_candidates: 3} - - {field: another_vector, query_vector: [-0.5, 11.0, 0, 12, 111.0], k: 2, num_candidates: 3} + - { field: vector, query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ], k: 2, num_candidates: 3 } + - { field: another_vector, query_vector: [ -0.5, 11.0, 0, 12, 111.0 ], k: 2, num_candidates: 3 } query: term: name: cow.jpg - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - - match: {hits.hits.1._id: "1"} - - match: {hits.hits.1.fields.name.0: "cow.jpg"} + - match: { hits.hits.1._id: "1" } + - match: { hits.hits.1.fields.name.0: "cow.jpg" } - - match: {hits.hits.2._id: "2"} - - match: {hits.hits.2.fields.name.0: "moose.jpg"} + - match: { hits.hits.2._id: "2" } + - match: { hits.hits.2.fields.name.0: "moose.jpg" } --- "kNN search with filter": - requires: @@ -163,16 +162,16 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: term: name: "rabbit.jpg" - - match: {hits.total.value: 1} - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - do: search: @@ -181,7 +180,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: @@ -190,7 +189,7 @@ setup: - term: _id: 2 - - match: {hits.total.value: 0} + - match: { hits.total.value: 0 } --- "kNN search with explicit search_type": @@ -206,7 +205,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 @@ -216,7 +215,7 @@ setup: --- "kNN search in _knn_search endpoint": - skip: - features: ["allowed_warnings"] + features: [ "allowed_warnings" ] - do: allowed_warnings: - "The kNN search API has been replaced by the `knn` option in the search API." @@ -226,22 +225,22 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } - - match: {hits.hits.1._id: "3"} - - match: {hits.hits.1.fields.name.0: "rabbit.jpg"} + - match: { hits.hits.1._id: "3" } + - match: { hits.hits.1.fields.name.0: "rabbit.jpg" } --- "kNN search with filter in _knn_search endpoint": - requires: cluster_features: "gte_v8.2.0" reason: 'kNN with filtering added in 8.2' - test_runner_features: ["allowed_warnings"] + test_runner_features: [ "allowed_warnings" ] - do: allowed_warnings: - "The kNN search API has been replaced by the `knn` option in the search API." @@ -251,16 +250,16 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: term: name: "rabbit.jpg" - - match: {hits.total.value: 1} - - match: {hits.hits.0._id: "3"} - - match: {hits.hits.0.fields.name.0: "rabbit.jpg"} + - match: { hits.total.value: 1 } + - match: { hits.hits.0._id: "3" } + - match: { hits.hits.0.fields.name.0: "rabbit.jpg" } - do: allowed_warnings: @@ -271,7 +270,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 2 num_candidates: 3 filter: @@ -280,7 +279,7 @@ setup: - term: _id: 2 - - match: {hits.total.value: 0} + - match: { hits.total.value: 0 } --- "Test nonexistent field": @@ -316,12 +315,12 @@ setup: k: 3 field: vector similarity: 11 - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] - - length: {hits.hits: 1} + - length: { hits.hits: 1 } - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } --- "Vector similarity with filter only": - requires: @@ -337,13 +336,13 @@ setup: k: 3 field: vector similarity: 11 - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] - filter: {"term": {"name": "moose.jpg"}} + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + filter: { "term": { "name": "moose.jpg" } } - - length: {hits.hits: 1} + - length: { hits.hits: 1 } - - match: {hits.hits.0._id: "2"} - - match: {hits.hits.0.fields.name.0: "moose.jpg"} + - match: { hits.hits.0._id: "2" } + - match: { hits.hits.0.fields.name.0: "moose.jpg" } - do: search: @@ -355,10 +354,10 @@ setup: k: 3 field: vector similarity: 110 - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] - filter: {"term": {"name": "cow.jpg"}} + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + filter: { "term": { "name": "cow.jpg" } } - - length: {hits.hits: 0} + - length: { hits.hits: 0 } --- "Knn search with mip": - requires: @@ -390,7 +389,7 @@ setup: id: "1" body: name: cow.jpg - vector: [230.0, 300.33, -34.8988, 15.555, -200.0] + vector: [ 230.0, 300.33, -34.8988, 15.555, -200.0 ] - do: index: @@ -398,7 +397,7 @@ setup: id: "2" body: name: moose.jpg - vector: [-0.5, 100.0, -13, 14.8, -156.0] + vector: [ -0.5, 100.0, -13, 14.8, -156.0 ] - do: index: @@ -406,10 +405,10 @@ setup: id: "3" body: name: rabbit.jpg - vector: [0.5, 111.3, -13.0, 14.8, -156.0] + vector: [ 0.5, 111.3, -13.0, 14.8, -156.0 ] - do: - indices.refresh: {} + indices.refresh: { } - do: search: @@ -420,16 +419,16 @@ setup: num_candidates: 3 k: 3 field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] - - length: {hits.hits: 3} - - match: {hits.hits.0._id: "1"} - - close_to: {hits.hits.0._score: {value: 58694.902, error: 0.01}} - - match: {hits.hits.1._id: "3"} - - close_to: {hits.hits.1._score: {value: 34702.79, error: 0.01}} - - match: {hits.hits.2._id: "2"} - - close_to: {hits.hits.2._score: {value: 33686.29, error: 0.01}} + - length: { hits.hits: 3 } + - match: { hits.hits.0._id: "1" } + - close_to: { hits.hits.0._score: { value: 58694.902, error: 0.01 } } + - match: { hits.hits.1._id: "3" } + - close_to: { hits.hits.1._score: { value: 34702.79, error: 0.01 } } + - match: { hits.hits.2._id: "2" } + - close_to: { hits.hits.2._score: { value: 33686.29, error: 0.01 } } - do: search: @@ -440,14 +439,14 @@ setup: num_candidates: 3 k: 3 field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] filter: { "term": { "name": "moose.jpg" } } - - length: {hits.hits: 1} - - match: {hits.hits.0._id: "2"} - - close_to: {hits.hits.0._score: {value: 33686.29, error: 0.01}} + - length: { hits.hits: 1 } + - match: { hits.hits.0._id: "2" } + - close_to: { hits.hits.0._score: { value: 33686.29, error: 0.01 } } --- "Knn search with _name": - requires: @@ -462,7 +461,7 @@ setup: fields: [ "name" ] knn: field: vector - query_vector: [-0.5, 90.0, -10, 14.8, -156.0] + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] k: 3 num_candidates: 3 _name: "my_knn_query" @@ -473,15 +472,44 @@ setup: _name: "my_query" - - match: {hits.hits.0._id: "1"} - - match: {hits.hits.0.fields.name.0: "cow.jpg"} - - match: {hits.hits.0.matched_queries.0: "my_knn_query"} - - match: {hits.hits.0.matched_queries.1: "my_query"} + - match: { hits.hits.0._id: "1" } + - match: { hits.hits.0.fields.name.0: "cow.jpg" } + - match: { hits.hits.0.matched_queries.0: "my_knn_query" } + - match: { hits.hits.0.matched_queries.1: "my_query" } + + - match: { hits.hits.1._id: "2" } + - match: { hits.hits.1.fields.name.0: "moose.jpg" } + - match: { hits.hits.1.matched_queries.0: "my_knn_query" } + + - match: { hits.hits.2._id: "3" } + - match: { hits.hits.2.fields.name.0: "rabbit.jpg" } + - match: { hits.hits.2.matched_queries.0: "my_knn_query" } + +--- +"kNN search on empty index should return 0 results and not an error": + - requires: + cluster_features: "gte_v8.15.1" + reason: 'Error fixed in 8.15.1' + + - do: + indices.create: + index: test_empty + body: + mappings: + properties: + vector: + type: dense_vector + + - do: + search: + index: test_empty + body: + fields: [ "name" ] + knn: + field: vector + query_vector: [ -0.5, 90.0, -10, 14.8, -156.0 ] + k: 2 + num_candidates: 3 - - match: {hits.hits.1._id: "2"} - - match: {hits.hits.1.fields.name.0: "moose.jpg"} - - match: {hits.hits.1.matched_queries.0: "my_knn_query"} + - match: { hits.total.value: 0 } - - match: {hits.hits.2._id: "3"} - - match: {hits.hits.2.fields.name.0: "rabbit.jpg"} - - match: {hits.hits.2.matched_queries.0: "my_knn_query"} diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml index cb5aae482507a..5f1af2ca5c52f 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.vectors/41_knn_search_half_byte_quantized.yml @@ -428,7 +428,7 @@ setup: index: hnsw_byte_quantized_merge_cosine id: "1" body: - embedding: [1.0, 1.0, 1.0, 1.0] + embedding: [0.5, 0.5, 0.5, 0.5, 0.5, 1.0] # Flush in order to provoke a merge later - do: @@ -439,7 +439,7 @@ setup: index: hnsw_byte_quantized_merge_cosine id: "2" body: - embedding: [1.0, 1.0, 1.0, 2.0] + embedding: [0.0, 0.0, 0.0, 1.0, 1.0, 0.5] # Flush in order to provoke a merge later - do: @@ -450,7 +450,7 @@ setup: index: hnsw_byte_quantized_merge_cosine id: "3" body: - embedding: [1.0, 1.0, 1.0, 3.0] + embedding: [0.0, 0.0, 0.0, 0.0, 0.0, 10.5] - do: indices.forcemerge: @@ -468,7 +468,7 @@ setup: query: knn: field: embedding - query_vector: [1.0, 1.0, 1.0, 1.0] + query_vector: [1.0, 1.0, 1.0, 1.0, 1.0, 1.0] num_candidates: 10 - length: { hits.hits: 3 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml index c4815304e0799..47876d2820aac 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/180_locale_dependent_mapping.yml @@ -1,6 +1,11 @@ +setup: + - requires: + test_runner_features: allowed_warnings_regex --- "Test Index and Search locale dependent mappings / dates": - do: + allowed_warnings_regex: + - 'Date format \[E, d MMM yyyy HH:mm:ss Z] contains textual field specifiers that could change in JDK 23.*' indices.create: index: test_index body: @@ -11,26 +16,26 @@ date_field: type: date format: "E, d MMM yyyy HH:mm:ss Z" - locale: "de" + locale: "fr" - do: bulk: refresh: true body: - '{"index": {"_index": "test_index", "_id": "1"}}' - - '{"date_field": "Mi, 06 Dez 2000 02:55:00 -0800"}' + - '{"date_field": "mer., 6 déc. 2000 02:55:00 -0800"}' - '{"index": {"_index": "test_index", "_id": "2"}}' - - '{"date_field": "Do, 07 Dez 2000 02:55:00 -0800"}' + - '{"date_field": "jeu., 7 déc. 2000 02:55:00 -0800"}' - do: search: rest_total_hits_as_int: true index: test_index - body: {"query" : {"range" : {"date_field" : {"gte": "Di, 05 Dez 2000 02:55:00 -0800", "lte": "Do, 07 Dez 2000 00:00:00 -0800"}}}} + body: {"query" : {"range" : {"date_field" : {"gte": "mar., 5 déc. 2000 02:55:00 -0800", "lte": "jeu., 7 déc. 2000 00:00:00 -0800"}}}} - match: { hits.total: 1 } - do: search: rest_total_hits_as_int: true index: test_index - body: {"query" : {"range" : {"date_field" : {"gte": "Di, 05 Dez 2000 02:55:00 -0800", "lte": "Fr, 08 Dez 2000 00:00:00 -0800"}}}} + body: {"query" : {"range" : {"date_field" : {"gte": "mar., 5 déc. 2000 02:55:00 -0800", "lte": "ven., 8 déc. 2000 00:00:00 -0800"}}}} - match: { hits.total: 2 } diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml index 82fb18a879346..99bd001bd95e2 100644 --- a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml +++ b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search/230_interval_query.yml @@ -21,6 +21,10 @@ setup: - '{"text" : "Baby its cold there outside"}' - '{"index": {"_index": "test", "_id": "4"}}' - '{"text" : "Outside it is cold and wet"}' + - '{"index": {"_index": "test", "_id": "5"}}' + - '{"text" : "the big bad wolf"}' + - '{"index": {"_index": "test", "_id": "6"}}' + - '{"text" : "the big wolf"}' --- "Test ordered matching": @@ -444,4 +448,31 @@ setup: prefix: out - match: { hits.total.value: 3 } +--- +"Test rewrite disjunctions": + - do: + search: + index: test + body: + query: + intervals: + text: + all_of: + intervals: + - "match": + "query": "the" + - "any_of": + "intervals": + - "match": + "query": "big" + - "match": + "query": "big bad" + - "match": + "query": "wolf" + max_gaps: 0 + ordered: true + + - match: { hits.total.value: 2 } + - match: { hits.hits.0._id: "6" } + - match: { hits.hits.1._id: "5" } diff --git a/server/build.gradle b/server/build.gradle index e62abed2bc75a..2628158c7d14a 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -40,8 +40,7 @@ dependencies { implementation project(":libs:elasticsearch-simdvec") implementation project(':libs:elasticsearch-plugin-classloader') - // no compile dependency by server, but server defines security policy for this codebase so it i> - runtimeOnly project(":libs:elasticsearch-preallocate") + implementation project(":libs:elasticsearch-preallocate") // lucene api "org.apache.lucene:lucene-core:${versions.lucene}" diff --git a/server/src/internalClusterTest/java/org/elasticsearch/action/bulk/BulkAfterWriteFsyncFailureIT.java b/server/src/internalClusterTest/java/org/elasticsearch/action/bulk/BulkAfterWriteFsyncFailureIT.java index 5adc0b090ed37..d531686bb5207 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/action/bulk/BulkAfterWriteFsyncFailureIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/action/bulk/BulkAfterWriteFsyncFailureIT.java @@ -29,6 +29,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import static org.elasticsearch.index.IndexSettings.INDEX_REFRESH_INTERVAL_SETTING; +import static org.elasticsearch.indices.IndicesService.WRITE_DANGLING_INDICES_INFO_SETTING; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; import static org.hamcrest.Matchers.is; @@ -48,6 +49,11 @@ public static void removeDisruptFSyncFS() { PathUtilsForTesting.teardown(); } + @Override + protected Settings nodeSettings() { + return Settings.builder().put(WRITE_DANGLING_INDICES_INFO_SETTING.getKey(), false).build(); + } + public void testFsyncFailureDoesNotAdvanceLocalCheckpoints() { String indexName = randomIdentifier(); client().admin() diff --git a/server/src/internalClusterTest/java/org/elasticsearch/features/ClusterFeaturesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/features/ClusterFeaturesIT.java index 24bf198b7b42f..448e6b44f8fb7 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/features/ClusterFeaturesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/features/ClusterFeaturesIT.java @@ -30,6 +30,7 @@ public void testClusterHasFeatures() { FeatureService service = internalCluster().getCurrentMasterNodeInstance(FeatureService.class); assertThat(service.getNodeFeatures(), hasKey(FeatureService.FEATURES_SUPPORTED.id())); + assertThat(service.getNodeFeatures(), hasKey(FeatureService.TEST_FEATURES_ENABLED.id())); // check the nodes all have a feature in their cluster state (there should always be features_supported) var response = clusterAdmin().state(new ClusterStateRequest().clear().nodes(true)).actionGet(); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java index 4ce92610eff17..f2e662fb2d589 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/reservedstate/service/ComponentTemplatesFileSettingsIT.java @@ -128,8 +128,7 @@ public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase { "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } }, @@ -159,8 +158,7 @@ public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase { "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } }, @@ -190,8 +188,7 @@ public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase { "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } }, @@ -259,8 +256,7 @@ public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase { "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } }, @@ -290,8 +286,7 @@ public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase { "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } }, @@ -335,8 +330,7 @@ public class ComponentTemplatesFileSettingsIT extends ESIntegTestCase { "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } }, @@ -662,8 +656,7 @@ private TransportPutComposableIndexTemplateAction.Request sampleIndexTemplateRes "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } }, @@ -704,8 +697,7 @@ private TransportPutComposableIndexTemplateAction.Request sampleIndexTemplateRes "type": "keyword" }, "created_at": { - "type": "date", - "format": "EEE MMM dd HH:mm:ss Z yyyy" + "type": "date" } } }, diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java index f5fdd752a6f57..6e52ed96cb6f7 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/CollapseSearchResultsIT.java @@ -13,6 +13,7 @@ import org.elasticsearch.index.query.MatchAllQueryBuilder; import org.elasticsearch.search.collapse.CollapseBuilder; import org.elasticsearch.test.ESIntegTestCase; +import org.elasticsearch.xcontent.XContentType; import java.util.Map; import java.util.Set; @@ -61,4 +62,56 @@ public void testCollapseWithDocValueFields() { } ); } + + public void testCollapseWithFields() { + final String indexName = "test_collapse"; + createIndex(indexName); + final String collapseField = "collapse_field"; + final String otherField = "other_field"; + assertAcked(indicesAdmin().preparePutMapping(indexName).setSource(collapseField, "type=keyword", otherField, "type=keyword")); + index(indexName, "id_1_0", Map.of(collapseField, "value1", otherField, "other_value1")); + index(indexName, "id_1_1", Map.of(collapseField, "value1", otherField, "other_value2")); + index(indexName, "id_2_0", Map.of(collapseField, "value2", otherField, "other_value3")); + refresh(indexName); + + assertNoFailuresAndResponse( + prepareSearch(indexName).setQuery(new MatchAllQueryBuilder()) + .setFetchSource(false) + .addFetchField(otherField) + .setCollapse(new CollapseBuilder(collapseField).setInnerHits(new InnerHitBuilder("ih").setSize(2))), + searchResponse -> { + assertEquals(collapseField, searchResponse.getHits().getCollapseField()); + assertEquals(Set.of(new BytesRef("value1"), new BytesRef("value2")), Set.of(searchResponse.getHits().getCollapseValues())); + } + ); + } + + public void testCollapseWithStoredFields() { + final String indexName = "test_collapse"; + createIndex(indexName); + final String collapseField = "collapse_field"; + assertAcked(indicesAdmin().preparePutMapping(indexName).setSource(""" + { + "dynamic": "strict", + "properties": { + "collapse_field": { "type": "keyword", "store": true }, + "ts": { "type": "date", "store": true } + } + } + """, XContentType.JSON)); + index(indexName, "id_1_0", Map.of(collapseField, "value1", "ts", 0)); + index(indexName, "id_1_1", Map.of(collapseField, "value1", "ts", 1)); + index(indexName, "id_2_0", Map.of(collapseField, "value2", "ts", 2)); + refresh(indexName); + + assertNoFailuresAndResponse( + prepareSearch(indexName).setQuery(new MatchAllQueryBuilder()) + .setFetchSource(false) + .storedFields("*") + .setCollapse(new CollapseBuilder(collapseField)), + searchResponse -> { + assertEquals(collapseField, searchResponse.getHits().getCollapseField()); + } + ); + } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/NestedIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/NestedIT.java index 1cce1ab3b1c4c..7e908572c1fa6 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/NestedIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/bucket/NestedIT.java @@ -8,6 +8,7 @@ package org.elasticsearch.search.aggregations.bucket; import org.apache.lucene.search.join.ScoreMode; +import org.elasticsearch.action.admin.indices.delete.DeleteIndexRequest; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.index.query.InnerHitBuilder; @@ -17,11 +18,15 @@ import org.elasticsearch.search.aggregations.InternalAggregation; import org.elasticsearch.search.aggregations.bucket.filter.Filter; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; +import org.elasticsearch.search.aggregations.bucket.nested.InternalNested; import org.elasticsearch.search.aggregations.bucket.nested.Nested; +import org.elasticsearch.search.aggregations.bucket.nested.NestedAggregationBuilder; import org.elasticsearch.search.aggregations.bucket.terms.LongTerms; import org.elasticsearch.search.aggregations.bucket.terms.StringTerms; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.bucket.terms.Terms.Bucket; +import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregationBuilder; +import org.elasticsearch.search.aggregations.metrics.CardinalityAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.Max; import org.elasticsearch.search.aggregations.metrics.Stats; import org.elasticsearch.search.aggregations.metrics.Sum; @@ -889,4 +894,87 @@ public void testSyntheticSource() throws Exception { assertEquals("a", nested.get("number")); }); } + + public void testScoring() throws Exception { + assertAcked( + prepareCreate("scoring").setMapping( + jsonBuilder().startObject() + .startObject("properties") + .startObject("tags") + .field("type", "nested") + .startObject("properties") + .startObject("key") + .field("type", "keyword") + .endObject() + .startObject("value") + .field("type", "keyword") + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + ) + ); + ensureGreen("scoring"); + + prepareIndex("scoring").setId("1") + .setSource( + jsonBuilder().startObject() + .startArray("tags") + .startObject() + .field("key", "state") + .field("value", "texas") + .endObject() + .endArray() + .endObject() + ) + .get(); + refresh("scoring"); + prepareIndex("scoring").setId("2") + .setSource( + jsonBuilder().startObject() + .startArray("tags") + .startObject() + .field("key", "state") + .field("value", "utah") + .endObject() + .endArray() + .endObject() + ) + .get(); + refresh("scoring"); + prepareIndex("scoring").setId("3") + .setSource( + jsonBuilder().startObject() + .startArray("tags") + .startObject() + .field("key", "state") + .field("value", "texas") + .endObject() + .endArray() + .endObject() + ) + .get(); + refresh("scoring"); + + assertResponse( + client().prepareSearch("scoring") + .setSize(0) + .addAggregation( + new NestedAggregationBuilder("tags", "tags").subAggregation( + new TermsAggregationBuilder("keys").field("tags.key") + .executionHint("map") + .subAggregation(new TermsAggregationBuilder("values").field("tags.value")) + .subAggregation(new CardinalityAggregationBuilder("values_count").field("tags.value")) + ) + ), + searchResponse -> { + InternalNested nested = searchResponse.getAggregations().get("tags"); + assertThat(nested.getDocCount(), equalTo(3L)); + assertThat(nested.getAggregations().asList().size(), equalTo(1)); + } + ); + + assertAcked(indicesAdmin().delete(new DeleteIndexRequest("scoring")).get()); + } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/MedianAbsoluteDeviationIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/MedianAbsoluteDeviationIT.java index 6c80931914ac6..3c0617153a074 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/MedianAbsoluteDeviationIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/MedianAbsoluteDeviationIT.java @@ -129,7 +129,7 @@ protected Collection> nodePlugins() { private static MedianAbsoluteDeviationAggregationBuilder randomBuilder() { final MedianAbsoluteDeviationAggregationBuilder builder = new MedianAbsoluteDeviationAggregationBuilder("mad"); if (randomBoolean()) { - builder.compression(randomDoubleBetween(25, 1000, false)); + builder.compression(randomDoubleBetween(30, 1000, false)); } return builder; } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java index 0a6fceea9a3f1..d9d6979ffd710 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fetch/subphase/highlight/HighlighterSearchIT.java @@ -2177,6 +2177,15 @@ public void testHighlightNoMatchSize() throws IOException { field.highlighterType("unified"); assertNotHighlighted(prepareSearch("test").highlighter(new HighlightBuilder().field(field)), 0, "text"); + + // Check when the requested fragment size equals the size of the string + var anotherText = "I am unusual and don't end with your regular )token)"; + indexDoc("test", "1", "text", anotherText); + refresh(); + for (String type : new String[] { "plain", "unified", "fvh" }) { + field.highlighterType(type).noMatchSize(anotherText.length()).numOfFragments(0); + assertHighlight(prepareSearch("test").highlighter(new HighlightBuilder().field(field)), 0, "text", 0, 1, equalTo(anotherText)); + } } public void testHighlightNoMatchSizeWithMultivaluedFields() throws IOException { diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java index 076158ee22037..0bce9ecb178d0 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/fieldcaps/FieldCapabilitiesIT.java @@ -859,7 +859,7 @@ protected String contentType() { @Override public SourceLoader.SyntheticFieldLoader syntheticFieldLoader() { - return new StringStoredFieldFieldLoader(fullPath(), leafName(), null) { + return new StringStoredFieldFieldLoader(fullPath(), leafName()) { @Override protected void write(XContentBuilder b, Object value) throws IOException { BytesRef ref = (BytesRef) value; diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java index 384395bcb78e7..0a30de1bb3741 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/SearchQueryIT.java @@ -1629,14 +1629,8 @@ public void testRangeQueryWithTimeZone() throws Exception { * Test range with a custom locale, e.g. "de" in this case. Documents here mention the day of week * as "Mi" for "Mittwoch (Wednesday" and "Do" for "Donnerstag (Thursday)" and the month in the query * as "Dez" for "Dezember (December)". - * Note: this test currently needs the JVM arg `-Djava.locale.providers=SPI,COMPAT` to be set. - * When running with gradle this is done implicitly through the BuildPlugin, but when running from - * an IDE this might need to be set manually in the run configuration. See also CONTRIBUTING.md section - * on "Configuring IDEs And Running Tests". */ public void testRangeQueryWithLocaleMapping() throws Exception { - assert ("SPI,COMPAT".equals(System.getProperty("java.locale.providers"))) : "`-Djava.locale.providers=SPI,COMPAT` needs to be set"; - assertAcked( prepareCreate("test").setMapping( jsonBuilder().startObject() @@ -1644,7 +1638,7 @@ public void testRangeQueryWithLocaleMapping() throws Exception { .startObject("date_field") .field("type", "date") .field("format", "E, d MMM yyyy HH:mm:ss Z") - .field("locale", "de") + .field("locale", "fr") .endObject() .endObject() .endObject() @@ -1653,19 +1647,19 @@ public void testRangeQueryWithLocaleMapping() throws Exception { indexRandom( true, - prepareIndex("test").setId("1").setSource("date_field", "Mi, 06 Dez 2000 02:55:00 -0800"), - prepareIndex("test").setId("2").setSource("date_field", "Do, 07 Dez 2000 02:55:00 -0800") + prepareIndex("test").setId("1").setSource("date_field", "mer., 6 déc. 2000 02:55:00 -0800"), + prepareIndex("test").setId("2").setSource("date_field", "jeu., 7 déc. 2000 02:55:00 -0800") ); assertHitCount( prepareSearch("test").setQuery( - QueryBuilders.rangeQuery("date_field").gte("Di, 05 Dez 2000 02:55:00 -0800").lte("Do, 07 Dez 2000 00:00:00 -0800") + QueryBuilders.rangeQuery("date_field").gte("mar., 5 déc. 2000 02:55:00 -0800").lte("jeu., 7 déc. 2000 00:00:00 -0800") ), 1L ); assertHitCount( prepareSearch("test").setQuery( - QueryBuilders.rangeQuery("date_field").gte("Di, 05 Dez 2000 02:55:00 -0800").lte("Fr, 08 Dez 2000 00:00:00 -0800") + QueryBuilders.rangeQuery("date_field").gte("mar., 5 déc. 2000 02:55:00 -0800").lte("ven., 8 déc. 2000 00:00:00 -0800") ), 2L ); diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/query/SimpleQueryStringIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/query/SimpleQueryStringIT.java index 7630ddb000140..7ff9304038cd9 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/query/SimpleQueryStringIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/query/SimpleQueryStringIT.java @@ -581,6 +581,32 @@ public void testFieldAliasOnDisallowedFieldType() throws Exception { }); } + public void testSimpleQueryStringWithAnalysisStopWords() throws Exception { + String mapping = Strings.toString( + XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("body") + .field("type", "text") + .field("analyzer", "stop") + .endObject() + .endObject() + .endObject() + ); + + CreateIndexRequestBuilder mappingRequest = indicesAdmin().prepareCreate("test1").setMapping(mapping); + mappingRequest.get(); + indexRandom(true, prepareIndex("test1").setId("1").setSource("body", "Some Text")); + refresh(); + + assertHitCount( + prepareSearch().setQuery( + simpleQueryStringQuery("the* text*").analyzeWildcard(true).defaultOperator(Operator.AND).field("body") + ), + 1 + ); + } + private void assertHits(SearchHits hits, String... ids) { assertThat(hits.getTotalHits().value, equalTo((long) ids.length)); Set hitIds = new HashSet<>(); diff --git a/server/src/main/java/module-info.java b/server/src/main/java/module-info.java index aaf8b3d0c8d84..bf5f88d264612 100644 --- a/server/src/main/java/module-info.java +++ b/server/src/main/java/module-info.java @@ -33,6 +33,7 @@ requires org.elasticsearch.grok; requires org.elasticsearch.tdigest; requires org.elasticsearch.simdvec; + requires org.elasticsearch.preallocate; requires com.sun.jna; requires hppc; @@ -431,6 +432,7 @@ org.elasticsearch.indices.IndicesFeatures, org.elasticsearch.action.admin.cluster.allocation.AllocationStatsFeatures, org.elasticsearch.index.mapper.MapperFeatures, + org.elasticsearch.ingest.IngestGeoIpFeatures, org.elasticsearch.search.SearchFeatures, org.elasticsearch.script.ScriptFeatures, org.elasticsearch.search.retriever.RetrieversFeatures, @@ -464,4 +466,5 @@ org.elasticsearch.serverless.shardhealth, org.elasticsearch.serverless.apifiltering; exports org.elasticsearch.lucene.spatial; + } diff --git a/server/src/main/java/org/elasticsearch/ReleaseVersions.java b/server/src/main/java/org/elasticsearch/ReleaseVersions.java index 7b5c8d1d42382..bb90bc79a528a 100644 --- a/server/src/main/java/org/elasticsearch/ReleaseVersions.java +++ b/server/src/main/java/org/elasticsearch/ReleaseVersions.java @@ -41,7 +41,7 @@ public class ReleaseVersions { private static final Pattern VERSION_LINE = Pattern.compile("(\\d+\\.\\d+\\.\\d+),(\\d+)"); - public static IntFunction generateVersionsLookup(Class versionContainer) { + public static IntFunction generateVersionsLookup(Class versionContainer, int current) { if (USES_VERSIONS == false) return Integer::toString; try { @@ -52,6 +52,9 @@ public static IntFunction generateVersionsLookup(Class versionContain } NavigableMap> versions = new TreeMap<>(); + // add the current version id, which won't be in the csv + versions.computeIfAbsent(current, k -> new ArrayList<>()).add(Version.CURRENT); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(versionsFile, StandardCharsets.UTF_8))) { String line; while ((line = reader.readLine()) != null) { @@ -121,8 +124,8 @@ private static IntFunction lookupFunction(NavigableMap + *

  • https://github.com/elastic/elasticsearch/pull/112008
  • + *
  • https://github.com/elastic/elasticsearch/pull/111447
  • + * + * so it incorporates the changes of two transport versions from 8.16: + *
      + *
    • ESQL_ES_FIELD_CACHED_SERIALIZATION
    • + *
    • ESQL_ATTRIBUTE_CACHED_SERIALIZATION
    • + *
    + */ + public static final TransportVersion ESQL_ATTRIBUTE_CACHED_SERIALIZATION_8_15 = def(8_702_00_3); /* * STOP! READ THIS FIRST! No, really, @@ -329,7 +345,7 @@ static Collection getAllVersions() { return VERSION_IDS.values(); } - static final IntFunction VERSION_LOOKUP = ReleaseVersions.generateVersionsLookup(TransportVersions.class); + static final IntFunction VERSION_LOOKUP = ReleaseVersions.generateVersionsLookup(TransportVersions.class, LATEST_DEFINED.id()); // no instance private TransportVersions() {} diff --git a/server/src/main/java/org/elasticsearch/Version.java b/server/src/main/java/org/elasticsearch/Version.java index b2c78453d9c75..c44fe8ed5580e 100644 --- a/server/src/main/java/org/elasticsearch/Version.java +++ b/server/src/main/java/org/elasticsearch/Version.java @@ -123,6 +123,8 @@ public class Version implements VersionId, ToXContentFragment { public static final Version V_7_17_21 = new Version(7_17_21_99); public static final Version V_7_17_22 = new Version(7_17_22_99); public static final Version V_7_17_23 = new Version(7_17_23_99); + public static final Version V_7_17_24 = new Version(7_17_24_99); + public static final Version V_7_17_25 = new Version(7_17_25_99); public static final Version V_8_0_0 = new Version(8_00_00_99); public static final Version V_8_0_1 = new Version(8_00_01_99); @@ -178,8 +180,12 @@ public class Version implements VersionId, ToXContentFragment { public static final Version V_8_14_0 = new Version(8_14_00_99); public static final Version V_8_14_1 = new Version(8_14_01_99); public static final Version V_8_14_2 = new Version(8_14_02_99); + public static final Version V_8_14_3 = new Version(8_14_03_99); public static final Version V_8_15_0 = new Version(8_15_00_99); - public static final Version CURRENT = V_8_15_0; + public static final Version V_8_15_1 = new Version(8_15_01_99); + public static final Version V_8_15_2 = new Version(8_15_02_99); + public static final Version V_8_15_3 = new Version(8_15_03_99); + public static final Version CURRENT = V_8_15_3; private static final NavigableMap VERSION_IDS; private static final Map VERSION_STRINGS; diff --git a/server/src/main/java/org/elasticsearch/action/ActionModule.java b/server/src/main/java/org/elasticsearch/action/ActionModule.java index b550755ce7bdd..a9c6894355cb6 100644 --- a/server/src/main/java/org/elasticsearch/action/ActionModule.java +++ b/server/src/main/java/org/elasticsearch/action/ActionModule.java @@ -30,6 +30,7 @@ import org.elasticsearch.action.admin.cluster.migration.TransportGetFeatureUpgradeStatusAction; import org.elasticsearch.action.admin.cluster.migration.TransportPostFeatureUpgradeAction; import org.elasticsearch.action.admin.cluster.node.capabilities.TransportNodesCapabilitiesAction; +import org.elasticsearch.action.admin.cluster.node.features.TransportNodesFeaturesAction; import org.elasticsearch.action.admin.cluster.node.hotthreads.TransportNodesHotThreadsAction; import org.elasticsearch.action.admin.cluster.node.info.TransportNodesInfoAction; import org.elasticsearch.action.admin.cluster.node.reload.TransportNodesReloadSecureSettingsAction; @@ -621,6 +622,7 @@ public void reg actions.register(TransportNodesInfoAction.TYPE, TransportNodesInfoAction.class); actions.register(TransportRemoteInfoAction.TYPE, TransportRemoteInfoAction.class); actions.register(TransportNodesCapabilitiesAction.TYPE, TransportNodesCapabilitiesAction.class); + actions.register(TransportNodesFeaturesAction.TYPE, TransportNodesFeaturesAction.class); actions.register(RemoteClusterNodesAction.TYPE, RemoteClusterNodesAction.TransportAction.class); actions.register(TransportNodesStatsAction.TYPE, TransportNodesStatsAction.class); actions.register(TransportNodesUsageAction.TYPE, TransportNodesUsageAction.class); diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodeFeatures.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodeFeatures.java new file mode 100644 index 0000000000000..b33520624d114 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodeFeatures.java @@ -0,0 +1,42 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.node.features; + +import org.elasticsearch.action.support.nodes.BaseNodeResponse; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; +import java.util.Set; + +public class NodeFeatures extends BaseNodeResponse { + + private final Set features; + + public NodeFeatures(StreamInput in) throws IOException { + super(in); + features = in.readCollectionAsImmutableSet(StreamInput::readString); + } + + public NodeFeatures(Set features, DiscoveryNode node) { + super(node); + this.features = Set.copyOf(features); + } + + public Set nodeFeatures() { + return features; + } + + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeCollection(features, StreamOutput::writeString); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesRequest.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesRequest.java new file mode 100644 index 0000000000000..83b6fff7cf2b2 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesRequest.java @@ -0,0 +1,17 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.node.features; + +import org.elasticsearch.action.support.nodes.BaseNodesRequest; + +public class NodesFeaturesRequest extends BaseNodesRequest { + public NodesFeaturesRequest(String... nodes) { + super(nodes); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesResponse.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesResponse.java new file mode 100644 index 0000000000000..0fca588216b15 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/NodesFeaturesResponse.java @@ -0,0 +1,35 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.node.features; + +import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.support.TransportAction; +import org.elasticsearch.action.support.nodes.BaseNodesResponse; +import org.elasticsearch.cluster.ClusterName; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.common.io.stream.StreamOutput; + +import java.io.IOException; +import java.util.List; + +public class NodesFeaturesResponse extends BaseNodesResponse { + public NodesFeaturesResponse(ClusterName clusterName, List nodes, List failures) { + super(clusterName, nodes, failures); + } + + @Override + protected List readNodesFrom(StreamInput in) throws IOException { + return TransportAction.localOnly(); + } + + @Override + protected void writeNodesTo(StreamOutput out, List nodes) throws IOException { + TransportAction.localOnly(); + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/TransportNodesFeaturesAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/TransportNodesFeaturesAction.java new file mode 100644 index 0000000000000..d1b7a4f1b7e95 --- /dev/null +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/features/TransportNodesFeaturesAction.java @@ -0,0 +1,91 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.action.admin.cluster.node.features; + +import org.elasticsearch.action.ActionType; +import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.support.ActionFilters; +import org.elasticsearch.action.support.nodes.TransportNodesAction; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.common.inject.Inject; +import org.elasticsearch.common.io.stream.StreamInput; +import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.features.FeatureService; +import org.elasticsearch.tasks.Task; +import org.elasticsearch.threadpool.ThreadPool; +import org.elasticsearch.transport.TransportRequest; +import org.elasticsearch.transport.TransportService; + +import java.io.IOException; +import java.util.List; + +@UpdateForV9 +// @UpdateForV10 // this can be removed in v10. It may be called by v8 nodes to v9 nodes. +public class TransportNodesFeaturesAction extends TransportNodesAction< + NodesFeaturesRequest, + NodesFeaturesResponse, + TransportNodesFeaturesAction.NodeFeaturesRequest, + NodeFeatures> { + + public static final ActionType TYPE = new ActionType<>("cluster:monitor/nodes/features"); + + private final FeatureService featureService; + + @Inject + public TransportNodesFeaturesAction( + ThreadPool threadPool, + ClusterService clusterService, + TransportService transportService, + ActionFilters actionFilters, + FeatureService featureService + ) { + super( + TYPE.name(), + clusterService, + transportService, + actionFilters, + NodeFeaturesRequest::new, + threadPool.executor(ThreadPool.Names.MANAGEMENT) + ); + this.featureService = featureService; + } + + @Override + protected NodesFeaturesResponse newResponse( + NodesFeaturesRequest request, + List responses, + List failures + ) { + return new NodesFeaturesResponse(clusterService.getClusterName(), responses, failures); + } + + @Override + protected NodeFeaturesRequest newNodeRequest(NodesFeaturesRequest request) { + return new NodeFeaturesRequest(); + } + + @Override + protected NodeFeatures newNodeResponse(StreamInput in, DiscoveryNode node) throws IOException { + return new NodeFeatures(in); + } + + @Override + protected NodeFeatures nodeOperation(NodeFeaturesRequest request, Task task) { + return new NodeFeatures(featureService.getNodeFeatures().keySet(), transportService.getLocalNode()); + } + + public static class NodeFeaturesRequest extends TransportRequest { + public NodeFeaturesRequest(StreamInput in) throws IOException { + super(in); + } + + public NodeFeaturesRequest() {} + } +} diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java index f906b7d659b7b..82df12d9cfef7 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/node/reload/TransportNodesReloadSecureSettingsAction.java @@ -123,6 +123,7 @@ protected NodesReloadSecureSettingsResponse.NodeResponse nodeOperation( final List exceptions = new ArrayList<>(); // broadcast the new settings object (with the open embedded keystore) to all reloadable plugins pluginsService.filterPlugins(ReloadablePlugin.class).forEach(p -> { + logger.debug("Reloading plugin [" + p.getClass().getSimpleName() + "]"); try { p.reload(settingsWithKeystore); } catch (final Exception e) { diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java index 28f970eb8c9fe..7cf1d55622b7c 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/snapshots/status/TransportSnapshotsStatusAction.java @@ -331,8 +331,7 @@ private void loadRepositoryData( final SnapshotsInProgress.State state = switch (snapshotInfo.state()) { case FAILED -> SnapshotsInProgress.State.FAILED; case SUCCESS, PARTIAL -> - // Translating both PARTIAL and SUCCESS to SUCCESS for now - // TODO: add the differentiation on the metadata level in the next major release + // Both of these means the snapshot has completed. SnapshotsInProgress.State.SUCCESS; default -> throw new IllegalArgumentException("Unexpected snapshot state " + snapshotInfo.state()); }; diff --git a/server/src/main/java/org/elasticsearch/action/admin/indices/segments/IndicesSegmentResponse.java b/server/src/main/java/org/elasticsearch/action/admin/indices/segments/IndicesSegmentResponse.java index bd12cfdbc7962..429ebe365bbe1 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/indices/segments/IndicesSegmentResponse.java +++ b/server/src/main/java/org/elasticsearch/action/admin/indices/segments/IndicesSegmentResponse.java @@ -36,7 +36,7 @@ public class IndicesSegmentResponse extends ChunkedBroadcastResponse { private volatile Map indicesSegments; - IndicesSegmentResponse( + public IndicesSegmentResponse( ShardSegments[] shards, int totalShards, int successfulShards, diff --git a/server/src/main/java/org/elasticsearch/action/bulk/TransportAbstractBulkAction.java b/server/src/main/java/org/elasticsearch/action/bulk/TransportAbstractBulkAction.java index ff306cfb08745..c44ad505aea84 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/TransportAbstractBulkAction.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/TransportAbstractBulkAction.java @@ -56,7 +56,7 @@ public abstract class TransportAbstractBulkAction extends HandledTransportAction protected final SystemIndices systemIndices; private final IngestService ingestService; private final IngestActionForwarder ingestForwarder; - protected final LongSupplier relativeTimeProvider; + protected final LongSupplier relativeTimeNanosProvider; protected final Executor writeExecutor; protected final Executor systemWriteExecutor; private final ActionType bulkAction; @@ -71,7 +71,7 @@ public TransportAbstractBulkAction( IngestService ingestService, IndexingPressure indexingPressure, SystemIndices systemIndices, - LongSupplier relativeTimeProvider + LongSupplier relativeTimeNanosProvider ) { super(action.name(), transportService, actionFilters, requestReader, EsExecutors.DIRECT_EXECUTOR_SERVICE); this.threadPool = threadPool; @@ -83,7 +83,7 @@ public TransportAbstractBulkAction( this.systemWriteExecutor = threadPool.executor(ThreadPool.Names.SYSTEM_WRITE); this.ingestForwarder = new IngestActionForwarder(transportService); clusterService.addStateApplier(this.ingestForwarder); - this.relativeTimeProvider = relativeTimeProvider; + this.relativeTimeNanosProvider = relativeTimeNanosProvider; this.bulkAction = action; } @@ -216,7 +216,7 @@ private void processBulkIndexIngestRequest( Metadata metadata, ActionListener listener ) { - final long ingestStartTimeInNanos = System.nanoTime(); + final long ingestStartTimeInNanos = relativeTimeNanos(); final BulkRequestModifier bulkRequestModifier = new BulkRequestModifier(original); getIngestService(original).executeBulkRequest( original.numberOfActions(), @@ -230,7 +230,7 @@ private void processBulkIndexIngestRequest( logger.debug("failed to execute pipeline for a bulk request", exception); listener.onFailure(exception); } else { - long ingestTookInMillis = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - ingestStartTimeInNanos); + long ingestTookInMillis = TimeUnit.NANOSECONDS.toMillis(relativeTimeNanos() - ingestStartTimeInNanos); BulkRequest bulkRequest = bulkRequestModifier.getBulkRequest(); ActionListener actionListener = bulkRequestModifier.wrapActionListenerIfNeeded( ingestTookInMillis, @@ -307,12 +307,12 @@ protected IngestService getIngestService(BulkRequest request) { return ingestService; } - protected long relativeTime() { - return relativeTimeProvider.getAsLong(); + protected long relativeTimeNanos() { + return relativeTimeNanosProvider.getAsLong(); } protected long buildTookInMillis(long startTimeNanos) { - return TimeUnit.NANOSECONDS.toMillis(relativeTime() - startTimeNanos); + return TimeUnit.NANOSECONDS.toMillis(relativeTimeNanos() - startTimeNanos); } private void applyPipelinesAndDoInternalExecute( @@ -321,9 +321,9 @@ private void applyPipelinesAndDoInternalExecute( Executor executor, ActionListener listener ) { - final long relativeStartTime = threadPool.relativeTimeInMillis(); + final long relativeStartTimeNanos = relativeTimeNanos(); if (applyPipelines(task, bulkRequest, executor, listener) == false) { - doInternalExecute(task, bulkRequest, executor, listener, relativeStartTime); + doInternalExecute(task, bulkRequest, executor, listener, relativeStartTimeNanos); } } diff --git a/server/src/main/java/org/elasticsearch/action/bulk/TransportBulkAction.java b/server/src/main/java/org/elasticsearch/action/bulk/TransportBulkAction.java index d9d5bc92a24d1..246afd64e6805 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/TransportBulkAction.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/TransportBulkAction.java @@ -107,7 +107,7 @@ public TransportBulkAction( indexNameExpressionResolver, indexingPressure, systemIndices, - System::nanoTime + threadPool::relativeTimeInNanos ); } @@ -197,7 +197,7 @@ protected void doInternalExecute( BulkRequest bulkRequest, Executor executor, ActionListener listener, - long relativeStartTime + long relativeStartTimeNanos ) { Map indicesToAutoCreate = new HashMap<>(); Set dataStreamsToBeRolledOver = new HashSet<>(); @@ -212,7 +212,7 @@ protected void doInternalExecute( indicesToAutoCreate, dataStreamsToBeRolledOver, failureStoresToBeRolledOver, - relativeStartTime + relativeStartTimeNanos ); } @@ -309,19 +309,19 @@ protected void createMissingIndicesAndIndexData( Map indicesToAutoCreate, Set dataStreamsToBeRolledOver, Set failureStoresToBeRolledOver, - long startTime + long startTimeNanos ) { final AtomicArray responses = new AtomicArray<>(bulkRequest.requests.size()); // Optimizing when there are no prerequisite actions if (indicesToAutoCreate.isEmpty() && dataStreamsToBeRolledOver.isEmpty() && failureStoresToBeRolledOver.isEmpty()) { - executeBulk(task, bulkRequest, startTime, listener, executor, responses, Map.of()); + executeBulk(task, bulkRequest, startTimeNanos, listener, executor, responses, Map.of()); return; } final Map indicesThatCannotBeCreated = new HashMap<>(); Runnable executeBulkRunnable = () -> executor.execute(new ActionRunnable<>(listener) { @Override protected void doRun() { - executeBulk(task, bulkRequest, startTime, listener, executor, responses, indicesThatCannotBeCreated); + executeBulk(task, bulkRequest, startTimeNanos, listener, executor, responses, indicesThatCannotBeCreated); } }); try (RefCountingRunnable refs = new RefCountingRunnable(executeBulkRunnable)) { @@ -533,7 +533,7 @@ void executeBulk( responses, indicesThatCannotBeCreated, indexNameExpressionResolver, - relativeTimeProvider, + relativeTimeNanosProvider, startTimeNanos, listener ).run(); diff --git a/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java b/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java index 95c1c0ce05d89..5c8603c09d865 100644 --- a/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java +++ b/server/src/main/java/org/elasticsearch/action/bulk/TransportSimulateBulkAction.java @@ -54,7 +54,7 @@ public TransportSimulateBulkAction( ingestService, indexingPressure, systemIndices, - System::nanoTime + threadPool::relativeTimeInNanos ); } @@ -64,7 +64,7 @@ protected void doInternalExecute( BulkRequest bulkRequest, Executor executor, ActionListener listener, - long relativeStartTime + long relativeStartTimeNanos ) { final AtomicArray responses = new AtomicArray<>(bulkRequest.requests.size()); for (int i = 0; i < bulkRequest.requests.size(); i++) { @@ -90,7 +90,7 @@ protected void doInternalExecute( ); } listener.onResponse( - new BulkResponse(responses.toArray(new BulkItemResponse[responses.length()]), buildTookInMillis(relativeStartTime)) + new BulkResponse(responses.toArray(new BulkItemResponse[responses.length()]), buildTookInMillis(relativeStartTimeNanos)) ); } @@ -105,7 +105,7 @@ protected IngestService getIngestService(BulkRequest request) { } @Override - protected boolean shouldStoreFailure(String indexName, Metadata metadata, long time) { + protected boolean shouldStoreFailure(String indexName, Metadata metadata, long epochMillis) { // A simulate bulk request should not change any persistent state in the system, so we never write to the failure store return false; } diff --git a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java index 9ddac7f13eb51..7a33eaa59eb03 100644 --- a/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/DfsQueryPhase.java @@ -155,7 +155,8 @@ ShardSearchRequest rewriteShardSearchRequest(ShardSearchRequest request) { QueryBuilder query = new KnnScoreDocQueryBuilder( scoreDocs.toArray(Lucene.EMPTY_SCORE_DOCS), source.knnSearch().get(i).getField(), - source.knnSearch().get(i).getQueryVector() + source.knnSearch().get(i).getQueryVector(), + source.knnSearch().get(i).getSimilarity() ).boost(source.knnSearch().get(i).boost()).queryName(source.knnSearch().get(i).queryName()); if (nestedPath != null) { query = new NestedQueryBuilder(nestedPath, query, ScoreMode.Max).innerHit(source.knnSearch().get(i).innerHit()); diff --git a/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java b/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java index e8470ba77632f..e2385745149c1 100644 --- a/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/ExpandSearchPhase.java @@ -100,6 +100,10 @@ private void doRun() { if (hit.getInnerHits() == null) { hit.setInnerHits(Maps.newMapWithExpectedSize(innerHitBuilders.size())); } + if (hit.isPooled() == false) { + // TODO: make this work pooled by forcing the hit itself to become pooled as needed here + innerHits = innerHits.asUnpooled(); + } hit.getInnerHits().put(innerHitBuilder.getName(), innerHits); assert innerHits.isPooled() == false || hit.isPooled() : "pooled inner hits can only be added to a pooled hit"; innerHits.mustIncRef(); diff --git a/server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java b/server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java index 5b42afcb86928..0f7cbd65a63c2 100644 --- a/server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java +++ b/server/src/main/java/org/elasticsearch/action/search/RankFeaturePhase.java @@ -28,8 +28,8 @@ /** * This search phase is responsible for executing any re-ranking needed for the given search request, iff that is applicable. - * It starts by retrieving {@code num_shards * window_size} results from the query phase and reduces them to a global list of - * the top {@code window_size} results. It then reaches out to the shards to extract the needed feature data, + * It starts by retrieving {@code num_shards * rank_window_size} results from the query phase and reduces them to a global list of + * the top {@code rank_window_size} results. It then reaches out to the shards to extract the needed feature data, * and finally passes all this information to the appropriate {@code RankFeatureRankCoordinatorContext} which is responsible for reranking * the results. If no rank query is specified, it proceeds directly to the next phase (FetchSearchPhase) by first reducing the results. */ @@ -88,7 +88,7 @@ public void onFailure(Exception e) { void innerRun() throws Exception { // if the RankBuilder specifies a QueryPhaseCoordinatorContext, it will be called as part of the reduce call - // to operate on the first `window_size * num_shards` results and merge them appropriately. + // to operate on the first `rank_window_size * num_shards` results and merge them appropriately. SearchPhaseController.ReducedQueryPhase reducedQueryPhase = queryPhaseResults.reduce(); RankFeaturePhaseRankCoordinatorContext rankFeaturePhaseRankCoordinatorContext = coordinatorContext(context.getRequest().source()); if (rankFeaturePhaseRankCoordinatorContext != null) { diff --git a/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java b/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java index 082e1dd9257e0..072e2eed42c20 100644 --- a/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java +++ b/server/src/main/java/org/elasticsearch/bootstrap/Elasticsearch.java @@ -40,6 +40,7 @@ import org.elasticsearch.nativeaccess.NativeAccess; import org.elasticsearch.node.Node; import org.elasticsearch.node.NodeValidationException; +import org.elasticsearch.preallocate.Preallocate; import java.io.IOException; import java.io.InputStream; @@ -195,7 +196,8 @@ private static void initPhase2(Bootstrap bootstrap) throws IOException { SubscribableListener.class, RunOnce.class, // We eagerly initialize to work around log4j permissions & JDK-8309727 - VectorUtil.class + VectorUtil.class, + Preallocate.class ); // install SM after natives, shutdown hooks, etc. @@ -209,7 +211,7 @@ private static void initPhase2(Bootstrap bootstrap) throws IOException { private static void ensureInitialized(Class... classes) { for (final var clazz : classes) { try { - MethodHandles.publicLookup().ensureInitialized(clazz); + MethodHandles.lookup().ensureInitialized(clazz); } catch (IllegalAccessException unexpected) { throw new AssertionError(unexpected); } diff --git a/server/src/main/java/org/elasticsearch/cluster/ClusterState.java b/server/src/main/java/org/elasticsearch/cluster/ClusterState.java index f9294210e0a6a..c54269da68507 100644 --- a/server/src/main/java/org/elasticsearch/cluster/ClusterState.java +++ b/server/src/main/java/org/elasticsearch/cluster/ClusterState.java @@ -884,6 +884,11 @@ public Map> nodeFeatures() { return Collections.unmodifiableMap(this.nodeFeatures); } + public Builder putNodeFeatures(String node, Set features) { + this.nodeFeatures.put(node, features); + return this; + } + public Builder routingTable(RoutingTable.Builder routingTableBuilder) { return routingTable(routingTableBuilder.build()); } diff --git a/server/src/main/java/org/elasticsearch/cluster/features/NodeFeaturesFixupListener.java b/server/src/main/java/org/elasticsearch/cluster/features/NodeFeaturesFixupListener.java new file mode 100644 index 0000000000000..c8b2555c0f15d --- /dev/null +++ b/server/src/main/java/org/elasticsearch/cluster/features/NodeFeaturesFixupListener.java @@ -0,0 +1,217 @@ +/* + * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one + * or more contributor license agreements. Licensed under the Elastic License + * 2.0 and the Server Side Public License, v 1; you may not use this file except + * in compliance with, at your election, the Elastic License 2.0 or the Server + * Side Public License, v 1. + */ + +package org.elasticsearch.cluster.features; + +import org.elasticsearch.Version; +import org.elasticsearch.action.ActionListener; +import org.elasticsearch.action.FailedNodeException; +import org.elasticsearch.action.admin.cluster.node.features.NodeFeatures; +import org.elasticsearch.action.admin.cluster.node.features.NodesFeaturesRequest; +import org.elasticsearch.action.admin.cluster.node.features.NodesFeaturesResponse; +import org.elasticsearch.action.admin.cluster.node.features.TransportNodesFeaturesAction; +import org.elasticsearch.client.internal.ClusterAdminClient; +import org.elasticsearch.cluster.ClusterChangedEvent; +import org.elasticsearch.cluster.ClusterFeatures; +import org.elasticsearch.cluster.ClusterState; +import org.elasticsearch.cluster.ClusterStateListener; +import org.elasticsearch.cluster.ClusterStateTaskExecutor; +import org.elasticsearch.cluster.ClusterStateTaskListener; +import org.elasticsearch.cluster.node.DiscoveryNode; +import org.elasticsearch.cluster.service.ClusterService; +import org.elasticsearch.cluster.service.MasterServiceTaskQueue; +import org.elasticsearch.common.Priority; +import org.elasticsearch.common.util.set.Sets; +import org.elasticsearch.core.SuppressForbidden; +import org.elasticsearch.core.TimeValue; +import org.elasticsearch.core.UpdateForV9; +import org.elasticsearch.logging.LogManager; +import org.elasticsearch.logging.Logger; +import org.elasticsearch.threadpool.Scheduler; +import org.elasticsearch.threadpool.ThreadPool; + +import java.util.Collections; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.Executor; +import java.util.stream.Collectors; + +@UpdateForV9 // this can be removed in v9 +public class NodeFeaturesFixupListener implements ClusterStateListener { + + private static final Logger logger = LogManager.getLogger(NodeFeaturesFixupListener.class); + + private static final TimeValue RETRY_TIME = TimeValue.timeValueSeconds(30); + + private final MasterServiceTaskQueue taskQueue; + private final ClusterAdminClient client; + private final Scheduler scheduler; + private final Executor executor; + private final Set pendingNodes = Collections.synchronizedSet(new HashSet<>()); + + public NodeFeaturesFixupListener(ClusterService service, ClusterAdminClient client, ThreadPool threadPool) { + // there tends to be a lot of state operations on an upgrade - this one is not time-critical, + // so use LOW priority. It just needs to be run at some point after upgrade. + this( + service.createTaskQueue("fix-node-features", Priority.LOW, new NodesFeaturesUpdater()), + client, + threadPool, + threadPool.executor(ThreadPool.Names.CLUSTER_COORDINATION) + ); + } + + NodeFeaturesFixupListener( + MasterServiceTaskQueue taskQueue, + ClusterAdminClient client, + Scheduler scheduler, + Executor executor + ) { + this.taskQueue = taskQueue; + this.client = client; + this.scheduler = scheduler; + this.executor = executor; + } + + class NodesFeaturesTask implements ClusterStateTaskListener { + private final Map> results; + private final int retryNum; + + NodesFeaturesTask(Map> results, int retryNum) { + this.results = results; + this.retryNum = retryNum; + } + + @Override + public void onFailure(Exception e) { + logger.error("Could not apply features for nodes {} to cluster state", results.keySet(), e); + scheduleRetry(results.keySet(), retryNum); + } + + public Map> results() { + return results; + } + } + + static class NodesFeaturesUpdater implements ClusterStateTaskExecutor { + @Override + public ClusterState execute(BatchExecutionContext context) { + ClusterState.Builder builder = ClusterState.builder(context.initialState()); + var existingFeatures = builder.nodeFeatures(); + + boolean modified = false; + for (var c : context.taskContexts()) { + for (var e : c.getTask().results().entrySet()) { + // double check there are still no features for the node + if (existingFeatures.getOrDefault(e.getKey(), Set.of()).isEmpty()) { + builder.putNodeFeatures(e.getKey(), e.getValue()); + modified = true; + } + } + c.success(() -> {}); + } + return modified ? builder.build() : context.initialState(); + } + } + + @Override + public void clusterChanged(ClusterChangedEvent event) { + if (event.nodesDelta().masterNodeChanged() && event.localNodeMaster()) { + /* + * Execute this if we have just become master. + * Check if there are any nodes that should have features in cluster state, but don't. + * This can happen if the master was upgraded from before 8.13, and one or more non-master nodes + * were already upgraded. They don't re-join the cluster with the new master, so never get their features + * (which the master now understands) added to cluster state. + * So we need to do a separate transport call to get the node features and add them to cluster state. + * We can't use features to determine when this should happen, as the features are incorrect. + * We also can't use transport version, as that is unreliable for upgrades + * from versions before 8.8 (see TransportVersionFixupListener). + * So the only thing we can use is release version. + * This is ok here, as Serverless will never hit this case, so the node feature fetch action will never be called on Serverless. + * This whole class will be removed in ES v9. + */ + ClusterFeatures nodeFeatures = event.state().clusterFeatures(); + Set queryNodes = event.state() + .nodes() + .stream() + .filter(n -> n.getVersion().onOrAfter(Version.V_8_15_0)) + .map(DiscoveryNode::getId) + .filter(n -> getNodeFeatures(nodeFeatures, n).isEmpty()) + .collect(Collectors.toSet()); + + if (queryNodes.isEmpty() == false) { + logger.debug("Fetching actual node features for nodes {}", queryNodes); + queryNodesFeatures(queryNodes, 0); + } + } + } + + @SuppressForbidden(reason = "Need to access a specific node's features") + private static Set getNodeFeatures(ClusterFeatures features, String nodeId) { + return features.nodeFeatures().getOrDefault(nodeId, Set.of()); + } + + private void scheduleRetry(Set nodes, int thisRetryNum) { + // just keep retrying until this succeeds + logger.debug("Scheduling retry {} for nodes {}", thisRetryNum + 1, nodes); + scheduler.schedule(() -> queryNodesFeatures(nodes, thisRetryNum + 1), RETRY_TIME, executor); + } + + private void queryNodesFeatures(Set nodes, int retryNum) { + // some might already be in-progress + Set outstandingNodes = Sets.newHashSetWithExpectedSize(nodes.size()); + synchronized (pendingNodes) { + for (String n : nodes) { + if (pendingNodes.add(n)) { + outstandingNodes.add(n); + } + } + } + if (outstandingNodes.isEmpty()) { + // all nodes already have in-progress requests + return; + } + + NodesFeaturesRequest request = new NodesFeaturesRequest(outstandingNodes.toArray(String[]::new)); + client.execute(TransportNodesFeaturesAction.TYPE, request, new ActionListener<>() { + @Override + public void onResponse(NodesFeaturesResponse response) { + pendingNodes.removeAll(outstandingNodes); + handleResponse(response, retryNum); + } + + @Override + public void onFailure(Exception e) { + pendingNodes.removeAll(outstandingNodes); + logger.warn("Could not read features for nodes {}", outstandingNodes, e); + scheduleRetry(outstandingNodes, retryNum); + } + }); + } + + private void handleResponse(NodesFeaturesResponse response, int retryNum) { + if (response.hasFailures()) { + Set failedNodes = new HashSet<>(); + for (FailedNodeException fne : response.failures()) { + logger.warn("Failed to read features from node {}", fne.nodeId(), fne); + failedNodes.add(fne.nodeId()); + } + scheduleRetry(failedNodes, retryNum); + } + // carry on and read what we can + + Map> results = response.getNodes() + .stream() + .collect(Collectors.toUnmodifiableMap(n -> n.getNode().getId(), NodeFeatures::nodeFeatures)); + + if (results.isEmpty() == false) { + taskQueue.submitTask("fix-node-features", new NodesFeaturesTask(results, retryNum), null); + } + } +} diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/AliasMetadata.java b/server/src/main/java/org/elasticsearch/cluster/metadata/AliasMetadata.java index a0f4a929dafdb..ff412d629b3b1 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/AliasMetadata.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/AliasMetadata.java @@ -396,6 +396,8 @@ public static AliasMetadata fromXContent(XContentParser parser) throws IOExcepti } else if ("is_hidden".equals(currentFieldName)) { builder.isHidden(parser.booleanValue()); } + } else if (token == null) { + throw new IllegalArgumentException("unexpected null token while parsing alias"); } } return builder.build(); diff --git a/server/src/main/java/org/elasticsearch/cluster/metadata/Template.java b/server/src/main/java/org/elasticsearch/cluster/metadata/Template.java index 70440adc4ebbe..b044ef6042428 100644 --- a/server/src/main/java/org/elasticsearch/cluster/metadata/Template.java +++ b/server/src/main/java/org/elasticsearch/cluster/metadata/Template.java @@ -70,7 +70,11 @@ public class Template implements SimpleDiffable