Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
860b20c
Add Apache Arrow as a bulk ingestion format
swallez Nov 22, 2024
be7cffc
Update docs/changelog/125040.yaml
swallez Mar 17, 2025
d23ba11
[CI] Auto commit changes from spotless
Mar 17, 2025
5dfdbd8
useDefaultDistribution is not needed anymore
swallez Mar 17, 2025
e1af882
Fix Arrow lib license files
swallez Mar 19, 2025
604da53
Remove licence files now provided by the server module via libs:arrow
swallez Mar 19, 2025
af76ead
Fix server entitlements
swallez Mar 19, 2025
5510ae8
Fix forbiddanApis task
swallez Mar 19, 2025
57186c8
[CI] Auto commit changes from spotless
Mar 19, 2025
5952435
Fix server entitlements (hopefully for good)
swallez Mar 19, 2025
259ed8a
[CI] Auto commit changes from spotless
Mar 19, 2025
40c3176
Fix thirdPartyAudit
swallez Mar 19, 2025
21cb8ff
Refactor: move Arrow bulk out of the server module. This causes too m…
swallez Mar 19, 2025
8cca7e2
[CI] Auto commit changes from spotless
Mar 21, 2025
86ab390
Move Arrow-related security policy to the Arrow module
swallez Mar 21, 2025
44a1b64
Fix API name, remove test warning
swallez Mar 22, 2025
f2036d3
Merge branch 'main' into bulk-arrow
swallez Mar 22, 2025
181e78a
Re-add tests that were lost in the refactoring
swallez Mar 23, 2025
d686e88
Add support for timestamps, more tests
swallez Apr 12, 2025
98e2e7e
Return an Arrow response
swallez Apr 12, 2025
726df62
merge main
swallez Apr 12, 2025
923c7e8
Merge remote-tracking branch 'upstream/main' into bulk-arrow
swallez Aug 20, 2025
c20bccf
Bump Arrow version
swallez Aug 20, 2025
68779ce
wip
swallez Aug 22, 2025
cb72323
Refactor, add tests
swallez Oct 13, 2025
f920cfb
Merge remote-tracking branch 'upstream/main' into bulk-arrow
swallez Oct 13, 2025
a39e113
Add support for non-streamed requests, plus minor fixes
swallez Oct 14, 2025
1b14d72
Fix verification-metadata file
swallez Oct 14, 2025
1e93bf4
Fix style
swallez Oct 14, 2025
a9f9ee3
Do not publish libs/arrow
swallez Oct 14, 2025
93cbede
Merge branch 'main' into bulk-arrow
swallez Oct 20, 2025
ee0d9eb
Merge branch 'main' into bulk-arrow
swallez Oct 21, 2025
c490de0
Ensure histogram is created only once (causes an exception otherwise)
swallez Oct 21, 2025
5d7dd92
Fix XContentBuffer
swallez Oct 23, 2025
bf48259
Fix test
swallez Oct 23, 2025
de6c895
Merge branch 'main' into bulk-arrow
swallez Oct 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,10 @@ public void execute(Task t) {
"--add-opens=java.base/java.time=ALL-UNNAMED",
"--add-opens=java.management/java.lang.management=ALL-UNNAMED",
"--enable-native-access=ALL-UNNAMED",
// Arrow (may need to be replaced by org.apache.arrow.memory.core once modularized)
"--add-opens=java.base/java.nio=ALL-UNNAMED",
// Define the allocation manager type to avoid classpath scanning to locate one.
"-Darrow.allocation.manager.type=Unsafe",
"-XX:+HeapDumpOnOutOfMemoryError"
);

Expand Down
9 changes: 9 additions & 0 deletions distribution/src/config/jvm.options
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,12 @@

## GC logging
-Xlog:gc*,gc+age=trace,safepoint:file=gc.log:utctime,level,pid,tags:filecount=32,filesize=64m

## Arrow
# Allow accessing a private field of java.nio.Buffer for direct memory access.
# See org.apache.arrow.memory.MemoryUtil and https://arrow.apache.org/docs/java/install.html
# See also libs/arrow/src/main/java/module-info.java-disabled for why we open to ALL-UNNAMED
# instead of limiting to org.apache.arrow.memory.core
--add-opens=java.base/java.nio=ALL-UNNAMED
# Define the allocation manager type to avoid classpath scanning to locate one.
-Darrow.allocation.manager.type=Unsafe
5 changes: 5 additions & 0 deletions docs/changelog/125040.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 125040
summary: Add Apache Arrow as a bulk ingestion format
area: CRUD
type: enhancement
issues: []
20 changes: 20 additions & 0 deletions gradle/verification-metadata.xml
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,11 @@
<sha256 value="baf7d6ea97ce606c53e11b6854ba5f2ce7ef5c24dddf0afa18d1260bd25b002c" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.google.errorprone" name="error_prone_annotations" version="2.31.0">
<artifact name="error_prone_annotations-2.31.0.jar">
<sha256 value="ba8d20fb1fc181672552b323f3c7549b30be1d57c49dd5835e2729e7647d9cfa" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.google.errorprone" name="error_prone_annotations" version="2.36.0">
<artifact name="error_prone_annotations-2.36.0.jar">
<sha256 value="77440e270b0bc9a249903c5a076c36a722c4886ca4f42675f2903a1c53ed61a5" origin="Generated by Gradle"/>
Expand All @@ -697,6 +702,11 @@
<sha256 value="8d10cac2ea9878896077ba437d76fdb1b9a07f55a863c560bb8a024b04103f8b" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.google.flatbuffers" name="flatbuffers-java" version="25.2.10">
<artifact name="flatbuffers-java-25.2.10.jar">
<sha256 value="587bd6c31cda747587493a113bec8602d3a0b0ca579b2b1b838ef71b19e6525d" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="com.google.googlejavaformat" name="google-java-format" version="1.19.2">
<artifact name="google-java-format-1.19.2.jar">
<sha256 value="bac84458eb12499585f2fabb1ac13bbe5b455c120bf3d19db21597814a27c863" origin="Generated by Gradle"/>
Expand Down Expand Up @@ -3728,6 +3738,11 @@
<sha256 value="ccaedd33af0b7894d9f2f3b644f4d19e43928e32902e61ac4d10777830f5aac7" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.checkerframework" name="checker-qual" version="3.48.1">
<artifact name="checker-qual-3.48.1.jar">
<sha256 value="21e8dfe8103e125d96a329653ca81e87ac430326dbdbf299cea3dc1ae3f039a2" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.checkerframework" name="checker-qual" version="3.49.0">
<artifact name="checker-qual-3.49.0.jar">
<sha256 value="8b9d9a36eaaf7c0fc26503c83cd97d8c9c0f9e2913cc2a6e92ac26c735d4dcbe" origin="Generated by Gradle"/>
Expand Down Expand Up @@ -4263,6 +4278,11 @@
<sha256 value="95d40913be28dfd439cefea9170c40898ea84f11f25e6ff8de50339b8a7b5e3e" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.immutables" name="value-annotations" version="2.10.1">
<artifact name="value-annotations-2.10.1.jar">
<sha256 value="9ef9629d2b710d9d705aa154457e1ba33b8c12118129b7c400bf65d923b46f26" origin="Generated by Gradle"/>
</artifact>
</component>
<component group="org.ini4j" name="ini4j" version="0.5.2">
<artifact name="ini4j-0.5.2.jar">
<sha256 value="631656eb38639b0ae41161f706ff7fbe04313b5b8f42892da5ec656390031fc6" origin="Generated by Gradle"/>
Expand Down
81 changes: 81 additions & 0 deletions libs/arrow/build.gradle
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the "Elastic License
* 2.0", the "GNU Affero General Public License v3.0 only", and the "Server Side
* Public License v 1"; you may not use this file except in compliance with, at
* your election, the "Elastic License 2.0", the "GNU Affero General Public
* License v3.0 only", or the "Server Side Public License, v 1".
*/

// Notes:
// - additional JVM arguments are added to distribution/src/config/jvm.options and ElasticsearchTestBasePlugin
// - additional permissions are added to server/src/main/resources/org/elasticsearch/bootstrap/security.policy

import org.elasticsearch.gradle.internal.precommit.CheckForbiddenApisTask

apply plugin: 'elasticsearch.build'

var arrowVersion = "18.3.0"

dependencies {
//implementation(project(":libs:x-content"))

// jackson-core is provided by :libs:x-content:impl. If declared here, there's a module issue that prevents ES from starting:
//
// fatal exception while booting Elasticsearch java.lang.IllegalAccessError: class org.elasticsearch.xcontent.provider.json.JsonXContentImpl (in module org.elasticsearch.xcontent.impl) cannot access class com.fasterxml.jackson.core.JsonFactoryBuilder (in unnamed module @0x4727e5fc) because module org.elasticsearch.xcontent.impl does not read unnamed module @0x4727e5fc
// at [email protected]/org.elasticsearch.xcontent.provider.json.JsonXContentImpl.<clinit>(JsonXContentImpl.java:50)
// at [email protected]/org.elasticsearch.xcontent.provider.XContentProviderImpl$2.XContent(XContentProviderImpl.java:54)
// at [email protected]/org.elasticsearch.xcontent.json.JsonXContent.<clinit>(JsonXContent.java:37)
// at [email protected]/org.elasticsearch.xcontent.XContentType.<clinit>(XContentType.java:28)
// at [email protected]/org.elasticsearch.common.settings.Setting.arrayToParsableString(Setting.java:1883)
//implementation(project(":libs:x-content:impl"))

// arrow-vector
api("org.apache.arrow:arrow-vector:${arrowVersion}")
api("com.fasterxml.jackson.core:jackson-core:${versions.jackson}")
api("com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}")
api("com.fasterxml.jackson.core:jackson-databind:${versions.jackson}")
api("com.fasterxml.jackson.datatype:jackson-datatype-jsr310:${versions.jackson}")

api("com.google.flatbuffers:flatbuffers-java:25.2.10")
api("commons-codec:commons-codec:${versions.commonscodec}") // Arrow 18 -> commons-codec 1.17.1
api("org.slf4j:slf4j-api:${versions.slf4j}")
api("org.immutables:value-annotations:2.10.1") // provided dependency

// arrow-format
api("org.apache.arrow:arrow-format:${arrowVersion}")
// also depends on flatbuffers

// arrow-memory-core
api("org.apache.arrow:arrow-memory-core:${arrowVersion}")
api("com.google.errorprone:error_prone_annotations:2.31.0") // provided dependency
api('org.checkerframework:checker-qual:3.48.1') // provided dependency
// also depends on value-annotations (provided dependency)

// arrow-memory-unsafe
api("org.apache.arrow:arrow-memory-unsafe:${arrowVersion}")
// also depends on value-annotations (provided dependency)

testImplementation(project(":test:framework")) {
exclude group: 'org.elasticsearch', module: 'arrow'
}
}

tasks.named("dependencyLicenses").configure {
mapping from: /jackson-.*/, to: 'jackson'
mapping from: /arrow-.*/, to: 'arrow'
mapping from: /value-annotations.*/, to: 'org-immutables'
}

tasks.named("thirdPartyAudit").configure {
ignoreViolations(
'org.apache.arrow.memory.util.MemoryUtil',
'org.apache.arrow.memory.util.MemoryUtil$1',
)
}

tasks.withType(CheckForbiddenApisTask).configureEach {
// Remove server signatures as they will fail on classes missing in this lib's classpath,
// like org.apache.lucene.util.IOUtils
replaceSignatureFiles('jdk-signatures')
}
17 changes: 17 additions & 0 deletions libs/arrow/licenses/commons-codec-NOTICE.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Apache Commons Codec
Copyright 2002-2015 The Apache Software Foundation

This product includes software developed at
The Apache Software Foundation (http://www.apache.org/).

src/test/org/apache/commons/codec/language/DoubleMetaphoneTest.java
contains test data from http://aspell.net/test/orig/batch0.tab.
Copyright (C) 2002 Kevin Atkinson ([email protected])

===============================================================================

The content of package org.apache.commons.codec.language.bm has been translated
from the original php source code available at http://stevemorse.org/phoneticinfo.htm
with permission from the original authors.
Original source copyright:
Copyright (c) 2008 Alexander Beider & Stephen P. Morse.
Loading