diff --git a/.gitignore b/.gitignore index 0a784701375d9..d5ae200e48db9 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,12 @@ CLAUDE.md build-idea/ out/ +modules/parquet-data-format/src/main/rust/target/* +libs/dataformat-csv/jni/target/* +libs/dataformat-csv/src/main/resources/* +plugins/dataformat-csv/src/main/resources/* +libs/dataformat-csv/jni/Cargo.lock + # include shared intellij config !.idea/inspectionProfiles/Project_Default.xml !.idea/runConfigurations/Debug_OpenSearch.xml @@ -68,3 +74,14 @@ testfixtures_shared/ # build files generated doc-tools/missing-doclet/bin/ +/plugins/dataformat-csv/jni/target +/plugins/dataformat-csv/jni/Cargo.lock + +/modules/parquet-data-format/src/main/rust/target +/modules/parquet-data-format/src/main/rust/debug +/modules/parquet-data-format/src/main/resources/native/ +/modules/parquet-data-format/jni/target/debug + +/modules/parquet-data-format/jni/target/release +**/Cargo.lock +/modules/parquet-data-format/jni/ diff --git a/.idea/runConfigurations/Debug_OpenSearch.xml b/.idea/runConfigurations/Debug_OpenSearch.xml index fddcf47728460..c18046f873477 100644 --- a/.idea/runConfigurations/Debug_OpenSearch.xml +++ b/.idea/runConfigurations/Debug_OpenSearch.xml @@ -1,11 +1,15 @@ - - - + + + \ No newline at end of file diff --git a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/RunTask.java b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/RunTask.java index c5035f3b082fe..8c4bbe6c2db42 100644 --- a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/RunTask.java +++ b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/RunTask.java @@ -168,6 +168,8 @@ public void beforeStart() { firstNode.setting("discovery.seed_hosts", LOCALHOST_ADDRESS_PREFIX + DEFAULT_TRANSPORT_PORT); cluster.setPreserveDataDir(preserveData); for (OpenSearchNode node : cluster.getNodes()) { + // TODO : remove this - this disables assertions + node.jvmArgs(" -da "); if (node != firstNode) { node.setHttpPort(String.valueOf(httpPort)); httpPort++; diff --git a/gradle/missing-javadoc.gradle b/gradle/missing-javadoc.gradle index 5f3ef5c0b7d48..da60d3afa0a78 100644 --- a/gradle/missing-javadoc.gradle +++ b/gradle/missing-javadoc.gradle @@ -160,7 +160,11 @@ configure([ project(":test:fixtures:hdfs-fixture"), project(":test:fixtures:s3-fixture"), project(":test:framework"), - project(":test:logger-usage") + project(":test:logger-usage"), + project(":libs:opensearch-vectorized-exec-spi"), // TODO + project(":plugins:engine-datafusion"), //TODO + project(":server"), + project(":modules:parquet-data-format"), ]) { project.tasks.withType(MissingJavadocTask) { isExcluded = true diff --git a/gradle/run.gradle b/gradle/run.gradle index ac58d74acd6b0..11eac098e35e9 100644 --- a/gradle/run.gradle +++ b/gradle/run.gradle @@ -52,6 +52,30 @@ testClusters { } } } + + if (findProperty("remotePlugins")) { + remotePlugins = Eval.me(remotePlugins) + for (String coords : remotePlugins) { + if (coords.startsWith('/') || coords.startsWith('file:')) { + // Direct file path + plugin(project.layout.file(project.provider { new File(coords) })) + } else { + // Maven coordinates + def config = project.configurations.detachedConfiguration( + project.dependencies.create(coords + '@zip') + ) + config.resolutionStrategy.cacheChangingModulesFor 0, 'seconds' + project.repositories.mavenLocal() + project.repositories { + maven { + name = 'OpenSearch Snapshots' + url = 'https://central.sonatype.com/repository/maven-snapshots/' + } + } + plugin(project.layout.file(project.provider { config.singleFile })) + } + } + } } } diff --git a/libs/common/src/main/java/org/opensearch/common/annotation/processor/ApiAnnotationProcessor.java b/libs/common/src/main/java/org/opensearch/common/annotation/processor/ApiAnnotationProcessor.java index 94ec0db3a9712..5f419ce621e24 100644 --- a/libs/common/src/main/java/org/opensearch/common/annotation/processor/ApiAnnotationProcessor.java +++ b/libs/common/src/main/java/org/opensearch/common/annotation/processor/ApiAnnotationProcessor.java @@ -85,20 +85,20 @@ public boolean process(Set annotations, RoundEnvironment Set.of(PublicApi.class, ExperimentalApi.class, DeprecatedApi.class) ); - for (var element : elements) { - validate(element); - - if (!checkPackage(element)) { - continue; - } - - // Skip all not-public elements - checkPublicVisibility(null, element); - - if (element instanceof TypeElement) { - process((TypeElement) element); - } - } +// for (var element : elements) { +// validate(element); +// +// if (!checkPackage(element)) { +// continue; +// } +// +// // Skip all not-public elements +// checkPublicVisibility(null, element); +// +// if (element instanceof TypeElement) { +// process((TypeElement) element); +// } +// } return false; } diff --git a/libs/vectorized-exec-spi/build.gradle b/libs/vectorized-exec-spi/build.gradle new file mode 100644 index 0000000000000..dfb95964d01f5 --- /dev/null +++ b/libs/vectorized-exec-spi/build.gradle @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +apply plugin: 'opensearch.build' + +description = 'Vectorized engine common interfaces for OpenSearch' + +dependencies { + api project(':libs:opensearch-core') + api project(':libs:opensearch-common') + + testImplementation(project(":test:framework")) { + exclude group: 'org.opensearch', module: 'vectorized-exec-spi' + } +} + +tasks.named('forbiddenApisMain').configure { + replaceSignatureFiles 'jdk-signatures' +} + +jarHell.enabled = false + +test { + systemProperty 'tests.security.manager', 'false' +} diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/package-info.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/package-info.java new file mode 100644 index 0000000000000..8d91260830538 --- /dev/null +++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/package-info.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * DataFusion integration for OpenSearch. + * Provides JNI bindings and core functionality for DataFusion query engine. + */ +package org.opensearch.vectorized.execution; diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/CatalogSearcher.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/CatalogSearcher.java new file mode 100644 index 0000000000000..138d232590871 --- /dev/null +++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/CatalogSearcher.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.vectorized.execution.search; + +public class CatalogSearcher { +} diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/DataFormat.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/DataFormat.java new file mode 100644 index 0000000000000..cd75df3da20bd --- /dev/null +++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/DataFormat.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.vectorized.execution.search; + +import org.opensearch.common.annotation.ExperimentalApi; + +/** + DataFormat supported by OpenSearch + */ +@ExperimentalApi +public enum DataFormat { + /** CSV Format*/ + CSV("parquet"), + PARQUET("parquet"), + + /** Text Format */ + Text("text"); + + private final String name; + + DataFormat(String name) { + this.name = name; + } + + public String getName() { + return name; + } +} diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/IndexReader.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/IndexReader.java new file mode 100644 index 0000000000000..d50616ea8a662 --- /dev/null +++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/IndexReader.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.vectorized.execution.search; + +public class IndexReader { +} diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/DataSourceCodec.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/DataSourceCodec.java new file mode 100644 index 0000000000000..e58f0a7e5bba0 --- /dev/null +++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/DataSourceCodec.java @@ -0,0 +1,63 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.vectorized.execution.search.spi; + +import org.opensearch.vectorized.execution.search.DataFormat; + +import java.util.List; +import java.util.concurrent.CompletableFuture; + +/** + * Service Provider Interface for DataFusion data source codecs. + * Implementations provide access to different data formats (CSV, Parquet, etc.) + * through the DataFusion query engine. + */ +public interface DataSourceCodec { + + /** + * Register a directory containing data files with the runtime environment to prewarm cache + * This ideally should be used as part of each refresh - equivalent of acquire searcher + * where we register the files associated with this particular refresh point + * @param directoryPath the path to the directory containing data files + * @param fileNames the list of file names to register + * @param runtimeId the runtime environment ID + * @return a CompletableFuture that completes when registration is done + */ + CompletableFuture registerDirectory(String directoryPath, List fileNames, long runtimeId); + + /** + * Create a new session context for query execution. + * + * @param globalRuntimeEnvId the global runtime environment ID + * @return a CompletableFuture containing the session context ID + */ + CompletableFuture createSessionContext(long globalRuntimeEnvId); + + /** + * Execute a Substrait query plan. + * + * @param sessionContextId the session context ID + * @param substraitPlanBytes the serialized Substrait query plan + * @return a CompletableFuture containing the result stream + */ + CompletableFuture executeSubstraitQuery(long sessionContextId, byte[] substraitPlanBytes); + + /** + * Close a session context and free associated resources. + * + * @param sessionContextId the session context ID to close + * @return a CompletableFuture that completes when the context is closed + */ + CompletableFuture closeSessionContext(long sessionContextId); + + /** + * Returns the data format name + */ + DataFormat getDataFormat(); +} diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/RecordBatchStream.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/RecordBatchStream.java new file mode 100644 index 0000000000000..39a112e2aabd3 --- /dev/null +++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/RecordBatchStream.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.vectorized.execution.search.spi; + +import java.util.concurrent.CompletableFuture; + +/** + * Represents a stream of record batches from a DataFusion query execution. + * This interface provides access to query results in a streaming fashion. + */ +public interface RecordBatchStream extends AutoCloseable { + + /** + * Check if there are more record batches available in the stream. + * + * @return true if more batches are available, false otherwise + */ + boolean hasNext(); + + /** + * Get the schema of the record batches in this stream. + * @return the schema object + */ + Object getSchema(); + + /** + * Get the next record batch from the stream. + * + * @return the next record batch as a byte array, or null if no more batches + */ + CompletableFuture next(); + + /** + * Close the stream and free associated resources. + */ + @Override + void close(); +} diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/package-info.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/package-info.java new file mode 100644 index 0000000000000..0fb858428c115 --- /dev/null +++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/package-info.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Service Provider Interface (SPI) for DataFusion data source codecs. + * Defines interfaces for implementing different data format support. + */ +package org.opensearch.vectorized.execution.search.spi; diff --git a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java index c2821c633c686..266466df822c7 100644 --- a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java +++ b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java @@ -553,7 +553,7 @@ protected void canDeriveSourceInternal() { * both doc values and stored field */ @Override - protected DerivedFieldGenerator derivedFieldGenerator() { + public DerivedFieldGenerator derivedFieldGenerator() { return new DerivedFieldGenerator( mappedFieldType, new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()), diff --git a/modules/parquet-data-format/build.gradle b/modules/parquet-data-format/build.gradle new file mode 100644 index 0000000000000..760fec200bce8 --- /dev/null +++ b/modules/parquet-data-format/build.gradle @@ -0,0 +1,260 @@ +import org.opensearch.gradle.test.RestIntegTestTask + +apply plugin: 'java' +apply plugin: 'idea' +apply plugin: 'eclipse' +apply plugin: 'opensearch.opensearchplugin' +apply plugin: 'opensearch.yaml-rest-test' +apply plugin: 'opensearch.pluginzip' +apply plugin: 'opensearch.java-agent' + +def pluginName = 'ParquetDataFormat' +def pluginDescription = 'Parquet data format plugin' +def packagePath = 'com.parquet' +def pathToPlugin = 'parquetdataformat' +def pluginClassName = 'ParquetDataFormatPlugin' +group = "ParquetDataFormatGroup" + +java { + targetCompatibility = JavaVersion.VERSION_21 + sourceCompatibility = JavaVersion.VERSION_21 +} + +tasks.register("preparePluginPathDirs") { + mustRunAfter clean + doLast { + def newPath = pathToPlugin.replace(".", "/") + mkdir "src/main/java/$packagePath/$newPath" + mkdir "src/test/java/$packagePath/$newPath" + mkdir "src/yamlRestTest/java/$packagePath/$newPath" + } +} + +publishing { + publications { + pluginZip(MavenPublication) { publication -> + } + } +} + +opensearchplugin { + name = pluginName + description = pluginDescription + classname = "${packagePath}.${pathToPlugin}.${pluginClassName}" + licenseFile = rootProject.file('LICENSE.txt') + noticeFile = rootProject.file('NOTICE.txt') +} + +// This requires an additional Jar not published as part of build-tools +loggerUsageCheck.enabled = false + +// No need to validate pom, as we do not upload to maven/sonatype +validateNebulaPom.enabled = false + +buildscript { + ext { + opensearch_version = System.getProperty("opensearch.version", "3.3.0-SNAPSHOT") + } + + repositories { + mavenLocal() + maven { url = "https://central.sonatype.com/repository/maven-snapshots/" } + mavenCentral() + maven { url = "https://plugins.gradle.org/m2/" } + } + + dependencies { + classpath "org.opensearch.gradle:build-tools:${opensearch_version}" + } +} + +repositories { + mavenLocal() + maven { url = "https://central.sonatype.com/repository/maven-snapshots/" } + mavenCentral() + maven { url = "https://plugins.gradle.org/m2/" } +} + +configurations.all { + resolutionStrategy { + force 'commons-codec:commons-codec:1.18.0' + force 'org.slf4j:slf4j-api:2.0.17' + } +} + +dependencies { + // Apache Arrow dependencies (using stable version with unsafe allocator) + implementation 'org.apache.arrow:arrow-vector:17.0.0' + implementation 'org.apache.arrow:arrow-memory-core:17.0.0' + implementation 'org.apache.arrow:arrow-memory-unsafe:17.0.0' + implementation 'org.apache.arrow:arrow-format:17.0.0' + implementation 'org.apache.arrow:arrow-c-data:17.0.0' + + // Checker Framework annotations (required by Arrow) + implementation 'org.checkerframework:checker-qual:3.42.0' + + // Jackson dependencies required by Arrow + implementation 'com.fasterxml.jackson.core:jackson-core:2.18.2' + implementation 'com.fasterxml.jackson.core:jackson-databind:2.18.2' + implementation 'com.fasterxml.jackson.core:jackson-annotations:2.18.2' + + // FlatBuffers dependency required by Arrow + implementation "com.google.flatbuffers:flatbuffers-java:${versions.flatbuffers}" + + // Netty dependencies required by Arrow memory management + implementation 'io.netty:netty-buffer:4.1.118.Final' + implementation 'io.netty:netty-common:4.1.118.Final' + + // SLF4J logging implementation (required by Apache Arrow) + implementation 'org.slf4j:slf4j-api:2.0.17' +} + +test { + include '**/*Tests.class' + // JVM args for Java 9+ only - remove if using Java 8 + if (JavaVersion.current().isJava9Compatible()) { + jvmArgs '--add-opens=java.base/java.nio=ALL-UNNAMED' + jvmArgs '--add-opens=java.base/sun.nio.ch=ALL-UNNAMED' + } +} + +task integTest(type: RestIntegTestTask) { + description = "Run tests against a cluster" + testClassesDirs = sourceSets.test.output.classesDirs + classpath = sourceSets.test.runtimeClasspath +} +tasks.named("check").configure { dependsOn(integTest) } + +integTest { + // JVM arguments required for Arrow memory access (Java 9+ only) + if (JavaVersion.current().isJava9Compatible()) { + jvmArgs '--add-opens=java.base/java.nio=ALL-UNNAMED' + jvmArgs '--add-opens=java.base/sun.nio.ch=ALL-UNNAMED' + } + + // The --debug-jvm command-line option makes the cluster debuggable; this makes the tests debuggable + if (System.getProperty("test.debug") != null) { + jvmArgs '-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005' + } +} + +testClusters.integTest { + testDistribution = "INTEG_TEST" + + // This installs our plugin into the testClusters + plugin(project.tasks.bundlePlugin.archiveFile) +} + +run { + useCluster testClusters.integTest +} + +// updateVersion: Task to auto update version to the next development iteration +tasks.register('buildRust', Exec) { +// workingDir = file("${projectDir}/src/main/rust") +// commandLine = ['cargo', 'build', '--release'] + + description = 'Build the Rust JNI library using Cargo' + group = 'build' + + workingDir = file("${projectDir}/src/main/rust") + + // Determine the target directory and library name based on OS + def osName = System.getProperty('os.name').toLowerCase() + def libPrefix = osName.contains('windows') ? '' : 'lib' + def libExtension = osName.contains('windows') ? '.dll' : (osName.contains('mac') ? '.dylib' : '.so') + + // Use debug build for development, release for production + def buildType = project.hasProperty('rustRelease') ? 'release' : 'debug' + def targetDir = file("${workingDir}/target/") + + // Find cargo executable - try common locations + def cargoExecutable = 'cargo' + def possibleCargoPaths = [ + System.getenv('HOME') + '/.cargo/bin/cargo', + '/usr/local/bin/cargo', + 'cargo' + ] + + for (String path : possibleCargoPaths) { + if (new File(path).exists()) { + cargoExecutable = path + break + } + } + + def cargoArgs = [cargoExecutable, 'build'] +// if (buildType == 'release') { + cargoArgs.add('--release') +// } + + if (osName.contains('windows')) { + commandLine cargoArgs + } else { + commandLine cargoArgs + } + + // Set environment variables for cross-compilation if needed + environment 'CARGO_TARGET_DIR', targetDir.absolutePath + + inputs.files fileTree("${workingDir}/src") + inputs.file "${workingDir}/Cargo.toml" +// outputs.files file("jni/${targetDir}/${libPrefix}opensearch_datafusion_jni${libExtension}") +// System.out.println("Building Rust library in ${buildType} mode"); + + +} + +tasks.register('copyNativeLib', Copy) { + dependsOn buildRust + from "src/main/rust/target/release" + into "src/main/resources/native" + include "**/libparquet_dataformat_jni.*" + include "**/parquet_dataformat_jni.dll" + + // Set strategy to avoid errors on duplicate files + duplicatesStrategy = DuplicatesStrategy.EXCLUDE + + eachFile { file -> + def os = System.getProperty('os.name').toLowerCase() + def arch = System.getProperty('os.arch').toLowerCase() + + def osDir = os.contains('win') ? 'windows' : os.contains('mac') ? 'macos' : 'linux' + def archDir = arch.contains('aarch64') || arch.contains('arm64') ? 'aarch64' : + arch.contains('64') ? 'x86_64' : 'x86' + + file.path = "${osDir}/${archDir}/${file.name}" + } + + doLast { + fileTree(destinationDir).visit { FileVisitDetails fvd -> + if (!fvd.isDirectory()) { + def file = fvd.file + if (!org.gradle.internal.os.OperatingSystem.current().isWindows()) { + file.setExecutable(false, false) + } + } + } + } + +} + +// Wire Rust build tasks into the Gradle build lifecycle +compileJava.dependsOn copyNativeLib +processResources.dependsOn copyNativeLib +sourcesJar.dependsOn copyNativeLib +copyNativeLib.mustRunAfter clean +buildRust.mustRunAfter clean + +task updateVersion { + onlyIf { System.getProperty('newVersion') } + doLast { + ext.newVersion = System.getProperty('newVersion') + println "Setting version to ${newVersion}." + // String tokenization to support -SNAPSHOT + ant.replaceregexp(file:'build.gradle', match: '"opensearch.version", "\\d.*"', replace: '"opensearch.version", "' + newVersion.tokenize('-')[0] + '-SNAPSHOT"', flags:'g', byline:true) + } +} + +// Disable specific license tasks +licenseHeaders.enabled = false diff --git a/modules/parquet-data-format/gradle.properties b/modules/parquet-data-format/gradle.properties new file mode 100644 index 0000000000000..7717686e6e937 --- /dev/null +++ b/modules/parquet-data-format/gradle.properties @@ -0,0 +1,11 @@ +# +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# + +org.gradle.caching=true +org.gradle.warning.mode=none +org.gradle.parallel=true diff --git a/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.jar b/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.jar new file mode 100644 index 0000000000000..a4b76b9530d66 Binary files /dev/null and b/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.jar differ diff --git a/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.properties b/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.properties new file mode 100644 index 0000000000000..54d42eff023d5 --- /dev/null +++ b/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.properties @@ -0,0 +1,14 @@ +# +# SPDX-License-Identifier: Apache-2.0 +# +# The OpenSearch Contributors require contributions made to +# this file be licensed under the Apache-2.0 license or a +# compatible open source license. +# + +distributionBase=GRADLE_USER_HOME +distributionPath=wrapper/dists +distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip +zipStoreBase=GRADLE_USER_HOME +zipStorePath=wrapper/dists +distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6 diff --git a/modules/parquet-data-format/gradlew b/modules/parquet-data-format/gradlew new file mode 100755 index 0000000000000..f5feea6d6b116 --- /dev/null +++ b/modules/parquet-data-format/gradlew @@ -0,0 +1,252 @@ +#!/bin/sh + +# +# Copyright © 2015-2021 the original authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 +# + +############################################################################## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# +############################################################################## + +# Attempt to set APP_HOME + +# Resolve links: $0 may be a link +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac +done + +# This is normally unused +# shellcheck disable=SC2034 +APP_BASE_NAME=${0##*/} +# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) +APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s +' "$PWD" ) || exit + +# Use the maximum available, or set MAX_FD != -1 to use that value. +MAX_FD=maximum + +warn () { + echo "$*" +} >&2 + +die () { + echo + echo "$*" + echo + exit 1 +} >&2 + +# OS specific support (must be 'true' or 'false'). +cygwin=false +msys=false +darwin=false +nonstop=false +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; +esac + +CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar + + +# Determine the Java command to use to start the JVM. +if [ -n "$JAVA_HOME" ] ; then + if [ -x "$JAVA_HOME/jre/sh/java" ] ; then + # IBM's JDK on AIX uses strange locations for the executables + JAVACMD=$JAVA_HOME/jre/sh/java + else + JAVACMD=$JAVA_HOME/bin/java + fi + if [ ! -x "$JAVACMD" ] ; then + die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +else + JAVACMD=java + if ! command -v java >/dev/null 2>&1 + then + die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. + +Please set the JAVA_HOME variable in your environment to match the +location of your Java installation." + fi +fi + +# Increase the maximum file descriptors if we can. +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. + # shellcheck disable=SC2039,SC3045 + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac +fi + +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. + +# For Cygwin or MSYS, switch paths to Windows format before running java +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + + # Now convert the arguments - kludge to limit ourselves to /bin/sh + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) + fi + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg + done +fi + + +# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' + +# Collect all arguments for the java command: +# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, +# and any embedded shellness will be escaped. +# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be +# treated as '${Hostname}' itself on the command line. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# + +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' + +exec "$JAVACMD" "$@" diff --git a/modules/parquet-data-format/gradlew.bat b/modules/parquet-data-format/gradlew.bat new file mode 100644 index 0000000000000..9b42019c7915b --- /dev/null +++ b/modules/parquet-data-format/gradlew.bat @@ -0,0 +1,94 @@ +@rem +@rem Copyright 2015 the original author or authors. +@rem +@rem Licensed under the Apache License, Version 2.0 (the "License"); +@rem you may not use this file except in compliance with the License. +@rem You may obtain a copy of the License at +@rem +@rem https://www.apache.org/licenses/LICENSE-2.0 +@rem +@rem Unless required by applicable law or agreed to in writing, software +@rem distributed under the License is distributed on an "AS IS" BASIS, +@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@rem See the License for the specific language governing permissions and +@rem limitations under the License. +@rem +@rem SPDX-License-Identifier: Apache-2.0 +@rem + +@if "%DEBUG%"=="" @echo off +@rem ########################################################################## +@rem +@rem Gradle startup script for Windows +@rem +@rem ########################################################################## + +@rem Set local scope for the variables with windows NT shell +if "%OS%"=="Windows_NT" setlocal + +set DIRNAME=%~dp0 +if "%DIRNAME%"=="" set DIRNAME=. +@rem This is normally unused +set APP_BASE_NAME=%~n0 +set APP_HOME=%DIRNAME% + +@rem Resolve any "." and ".." in APP_HOME to make it shorter. +for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi + +@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. +set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" + +@rem Find java.exe +if defined JAVA_HOME goto findJavaFromJavaHome + +set JAVA_EXE=java.exe +%JAVA_EXE% -version >NUL 2>&1 +if %ERRORLEVEL% equ 0 goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:findJavaFromJavaHome +set JAVA_HOME=%JAVA_HOME:"=% +set JAVA_EXE=%JAVA_HOME%/bin/java.exe + +if exist "%JAVA_EXE%" goto execute + +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 + +goto fail + +:execute +@rem Setup the command line + +set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar + + +@rem Execute Gradle +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* + +:end +@rem End local scope for the variables with windows NT shell +if %ERRORLEVEL% equ 0 goto mainEnd + +:fail +rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of +rem the _cmd.exe /c_ return code! +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% + +:mainEnd +if "%OS%"=="Windows_NT" endlocal + +:omega diff --git a/modules/parquet-data-format/settings.gradle b/modules/parquet-data-format/settings.gradle new file mode 100644 index 0000000000000..6f5da74a14d2d --- /dev/null +++ b/modules/parquet-data-format/settings.gradle @@ -0,0 +1,10 @@ +/* + * This file was generated by the Gradle 'init' task. + * + * The settings file is used to specify which projects to include in your build. + * + * Detailed information about configuring a multi-project build in Gradle can be found + * in the user manual at https://docs.gradle.org/6.5.1/userguide/multi_project_builds.html + */ + +rootProject.name = 'plugin-template' diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java new file mode 100644 index 0000000000000..d6553a14ab23d --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java @@ -0,0 +1,107 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package com.parquet.parquetdataformat; + +import com.parquet.parquetdataformat.engine.ParquetDataFormat; +import com.parquet.parquetdataformat.fields.ParquetFieldUtil; +import com.parquet.parquetdataformat.engine.read.ParquetDataSourceCodec; +import com.parquet.parquetdataformat.writer.ParquetWriter; +import org.opensearch.index.engine.DataFormatPlugin; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.IndexingExecutionEngine; +import com.parquet.parquetdataformat.bridge.RustBridge; +import com.parquet.parquetdataformat.engine.ParquetExecutionEngine; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.plugins.DataSourcePlugin; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.plugins.Plugin; +import org.opensearch.vectorized.execution.search.spi.DataSourceCodec; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +/** + * OpenSearch plugin that provides Parquet data format support for indexing operations. + * + *

This plugin implements the Project Mustang design for writing OpenSearch documents + * to Parquet format using Apache Arrow as the intermediate representation and a native + * Rust backend for high-performance Parquet file generation. + * + *

Key features provided by this plugin: + *

    + *
  • Integration with OpenSearch's DataFormatPlugin interface
  • + *
  • Parquet-based execution engine with Arrow memory management
  • + *
  • High-performance native Rust backend via JNI bridge
  • + *
  • Memory pressure monitoring and backpressure mechanisms
  • + *
  • Columnar storage optimization for analytical workloads
  • + *
+ * + *

The plugin orchestrates the complete pipeline from OpenSearch document indexing + * through Arrow-based batching to final Parquet file generation. It provides both + * the execution engine interface for OpenSearch integration and testing utilities + * for development purposes. + * + *

Architecture components: + *

    + *
  • {@link ParquetExecutionEngine} - Main execution engine implementation
  • + *
  • {@link ParquetWriter} - Document writer with Arrow integration
  • + *
  • {@link RustBridge} - JNI interface to native Parquet operations
  • + *
  • Memory management via {@link com.parquet.parquetdataformat.memory} package
  • + *
+ */ +public class ParquetDataFormatPlugin extends Plugin implements DataFormatPlugin, DataSourcePlugin { + + @Override + @SuppressWarnings("unchecked") + public IndexingExecutionEngine indexingEngine(MapperService mapperService, ShardPath shardPath) { + return (IndexingExecutionEngine) new ParquetExecutionEngine(() -> ParquetFieldUtil.getSchema(mapperService), shardPath); + } + + private Class getDataFormatType() { + return ParquetDataFormat.class; + } + + @Override + public DataFormat getDataFormat() { + return new ParquetDataFormat(); + } + + @Override + public Optional> getDataSourceCodecs() { + Map codecs = new HashMap<>(); + ParquetDataSourceCodec parquetDataSourceCodec = new ParquetDataSourceCodec(); + // TODO : version it correctly - similar to lucene codecs? + codecs.put(parquetDataSourceCodec.getDataFormat(), new ParquetDataSourceCodec()); + return Optional.of(codecs); + // return Optional.empty(); + } + + // for testing locally only + public void indexDataToParquetEngine() throws IOException { + //Create Engine (take Schema as Input) +// IndexingExecutionEngine indexingExecutionEngine = indexingEngine(); +// //Create Writer +// ParquetWriter writer = (ParquetWriter) indexingExecutionEngine.createWriter(); +// for (int i=0;i<10;i++) { +// //Get DocumentInput +// DocumentInput documentInput = writer.newDocumentInput(); +// ParquetDocumentInput parquetDocumentInput = (ParquetDocumentInput) documentInput; +// //Populate data +// DummyDataUtils.populateDocumentInput(parquetDocumentInput); +// //Write document +// writer.addDoc(parquetDocumentInput); +// } +// writer.flush(null); +// writer.close(); +// //refresh engine +// indexingExecutionEngine.refresh(null); + } + +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/ArrowExport.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/ArrowExport.java new file mode 100644 index 0000000000000..694df0c4a9f47 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/ArrowExport.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.bridge; + +import org.apache.arrow.c.ArrowArray; +import org.apache.arrow.c.ArrowSchema; + +/** + * Container for Arrow C Data Interface exports. + * Provides a safe wrapper around ArrowArray and ArrowSchema with proper resource management. + */ +public record ArrowExport(ArrowArray arrowArray, ArrowSchema arrowSchema) implements AutoCloseable { + + public long getArrayAddress() { + return arrowArray.memoryAddress(); + } + + public long getSchemaAddress() { + return arrowSchema.memoryAddress(); + } + + @Override + public void close() { + if (arrowArray != null) { + arrowArray.close(); + } + if (arrowSchema != null) { + arrowSchema.close(); + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/RustBridge.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/RustBridge.java new file mode 100644 index 0000000000000..8ef4596395e97 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/RustBridge.java @@ -0,0 +1,119 @@ +package com.parquet.parquetdataformat.bridge; + +import org.opensearch.common.SuppressForbidden; + +import java.io.IOException; +import java.io.InputStream; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardCopyOption; +import java.util.Locale; + +/** + * JNI bridge to the native Rust Parquet writer implementation. + * + *

This class provides the interface between Java and the native Rust library + * that handles low-level Parquet file operations. It automatically loads the + * appropriate native library for the current platform and architecture. + * + *

Supported platforms: + *

    + *
  • Windows (x86, x86_64, aarch64)
  • + *
  • macOS (x86_64, aarch64/arm64)
  • + *
  • Linux (x86, x86_64, aarch64)
  • + *
+ * + *

The native library is extracted from resources and loaded as a temporary file, + * which is automatically cleaned up on JVM shutdown. + * + *

All native methods operate on Arrow C Data Interface pointers and return + * integer status codes for error handling. + */ +public class RustBridge { + + static { + try { + loadNativeLibrary(); + } catch (Exception e) { + throw new RuntimeException("Failed to load native Rust library", e); + } + } + + @SuppressForbidden(reason = "Need to create temp files") + private static void loadNativeLibrary() { + + String LIB_NAME = "parquet_dataformat_jni"; + String os = System.getProperty("os.name").toLowerCase(Locale.ROOT); + String arch = System.getProperty("os.arch").toLowerCase(Locale.ROOT); + + String osDir = os.contains("win") ? "windows" : + os.contains("mac") ? "macos" : "linux"; + String archDir = arch.contains("aarch64") || arch.contains("arm64") ? "aarch64" : + arch.contains("64") ? "x86_64" : "x86"; + + String extension = os.contains("win") ? ".dll" : + os.contains("mac") ? ".dylib" : ".so"; + + String resourcePath = String.format(Locale.ROOT, "/native/%s/%s/lib%s%s", osDir, archDir, LIB_NAME, extension); + + try (InputStream is = RustBridge.class.getResourceAsStream(resourcePath)) { + if (is == null) { + throw new UnsatisfiedLinkError("Native library not found in resources: " + resourcePath); + } + + Path tempFile = Files.createTempFile("lib" + LIB_NAME, extension); + + // Register deletion hook on JVM shutdown + Runtime.getRuntime().addShutdownHook(new Thread(() -> { + try { + Files.deleteIfExists(tempFile); + } catch (IOException ignored) {} + })); + + Files.copy(is, tempFile, StandardCopyOption.REPLACE_EXISTING); + + System.load(tempFile.toAbsolutePath().toString()); + } catch (IOException e) { + throw new RuntimeException("Failed to load native library from resources", e); + } + } + + // Enhanced native methods that handle validation and provide better error reporting + public static native void createWriter(String file, long schemaAddress) throws IOException; + public static native void write(String file, long arrayAddress, long schemaAddress) throws IOException; + public static native void closeWriter(String file) throws IOException; + public static native void flushToDisk(String file) throws IOException; + + // State and metrics methods handled on Rust side + public static native boolean writerExists(String file); + public static native long getWriteCount(String file); + public static native long getTotalRows(String file); + public static native String[] getActiveWriters(); + + // Validation helpers that could be implemented natively for better performance + public static boolean isValidFileName(String fileName) { + return fileName != null && !fileName.trim().isEmpty(); + } + + public static boolean isValidMemoryAddress(long address) { + return address != 0; + } + + + // DATAFUSION specific native methods starts here + + // Record batch and streaming related methods + public static native String nativeNextBatch(long streamPtr); + + public static native void nativeCloseStream(long streamPtr); + + + // Native method declarations - these will be implemented in the JNI library + public static native void nativeRegisterDirectory(String tableName, String directoryPath, String[] files, long runtimeId); + + public static native long nativeCreateSessionContext(String[] configKeys, String[] configValues); + + public static native long nativeExecuteSubstraitQuery(long sessionContextPtr, byte[] substraitPlan); + + public static native void nativeCloseSessionContext(long sessionContextPtr); +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/FieldTypeConverter.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/FieldTypeConverter.java new file mode 100644 index 0000000000000..b4ace7c4b1953 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/FieldTypeConverter.java @@ -0,0 +1,135 @@ +package com.parquet.parquetdataformat.converter; + +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.apache.lucene.search.Query; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.TextSearchInfo; +import org.opensearch.index.mapper.ValueFetcher; + +import java.util.HashMap; +import java.util.Map; + +/** + * Utility class for converting between OpenSearch field types and Arrow/Parquet types. + * + *

This converter provides bidirectional mapping between OpenSearch's field type system + * and Apache Arrow's type system, which serves as the bridge to Parquet data representation. + * It handles the complete conversion pipeline from OpenSearch indexed data to columnar + * Parquet storage format. + * + *

Supported type conversions: + *

    + *
  • OpenSearch numeric types (long, integer, short, byte, double, float) → Arrow Int/FloatingPoint
  • + *
  • OpenSearch boolean → Arrow Bool
  • + *
  • OpenSearch date → Arrow Timestamp
  • + *
  • OpenSearch text/keyword → Arrow Utf8
  • + *
+ * + *

The converter also provides reverse mapping capabilities to reconstruct OpenSearch + * field types from Arrow types, enabling proper schema reconstruction during read operations. + * + *

All conversion methods are static and thread-safe, making them suitable for concurrent + * use across multiple writer instances. + */ +public class FieldTypeConverter { + + public static Map convertToArrowFieldMap(MappedFieldType mappedFieldType, Object value) { + Map fieldMap = new HashMap<>(); + FieldType arrowFieldType = convertToArrowFieldType(mappedFieldType); + fieldMap.put(arrowFieldType, value); + return fieldMap; + } + + public static FieldType convertToArrowFieldType(MappedFieldType mappedFieldType) { + ArrowType arrowType = getArrowType(mappedFieldType.typeName()); + return new FieldType(true, arrowType, null); + } + + public static ParquetFieldType convertToParquetFieldType(MappedFieldType mappedFieldType) { + ArrowType arrowType = getArrowType(mappedFieldType.typeName()); + return new ParquetFieldType(mappedFieldType.name(), arrowType); + } + + public static MappedFieldType convertToMappedFieldType(String name, ArrowType arrowType) { + String opensearchType = getOpenSearchType(arrowType); + return new MockMappedFieldType(name, opensearchType); + } + + private static ArrowType getArrowType(String opensearchType) { + switch (opensearchType) { + case "long": + return new ArrowType.Int(64, true); + case "integer": + return new ArrowType.Int(32, true); + case "short": + return new ArrowType.Int(16, true); + case "byte": + return new ArrowType.Int(8, true); + case "double": + return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE); + case "float": + return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE); + case "boolean": + return new ArrowType.Bool(); + case "date": + return new ArrowType.Timestamp(TimeUnit.MILLISECOND, null); + default: + return new ArrowType.Utf8(); + } + } + + private static String getOpenSearchType(ArrowType arrowType) { + switch (arrowType) { + case ArrowType.Int intType -> { + return switch (intType.getBitWidth()) { + case 8 -> "byte"; + case 16 -> "short"; + case 32 -> "integer"; + case 64 -> "long"; + default -> "integer"; + }; + } + case ArrowType.FloatingPoint fpType -> { + return fpType.getPrecision() == FloatingPointPrecision.DOUBLE ? "double" : "float"; + } + case ArrowType.Bool bool -> { + return "boolean"; + } + case ArrowType.Timestamp timestamp -> { + return "date"; + } + case null, default -> { + return "text"; + } + } + } + + private static class MockMappedFieldType extends MappedFieldType { + private final String type; + + public MockMappedFieldType(String name, String type) { + super(name, true, false, false, TextSearchInfo.NONE, null); + this.type = type; + } + + @Override + public String typeName() { + return type; + } + + @Override + public ValueFetcher valueFetcher(org.opensearch.index.query.QueryShardContext context, + org.opensearch.search.lookup.SearchLookup searchLookup, + String format) { + return null; + } + + @Override + public Query termQuery(Object value, org.opensearch.index.query.QueryShardContext context) { + return null; + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/ParquetFieldType.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/ParquetFieldType.java new file mode 100644 index 0000000000000..84f1b9a4bedd2 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/ParquetFieldType.java @@ -0,0 +1,48 @@ +package com.parquet.parquetdataformat.converter; + +import org.apache.arrow.vector.types.pojo.ArrowType; + +/** + * Represents a field type for Parquet-based document fields. + * + *

This class encapsulates the field name and Arrow type information + * required for proper type mapping between OpenSearch fields and Parquet + * column definitions. It serves as the intermediate representation used + * throughout the Parquet processing pipeline. + * + *

The Arrow type system provides a rich set of data types that can + * accurately represent various field types from OpenSearch, ensuring + * proper data serialization and deserialization. + * + *

Key features: + *

    + *
  • Field name preservation for schema mapping
  • + *
  • Arrow type integration for precise data representation
  • + *
  • Simple mutable structure for field definition building
  • + *
+ */ +public class ParquetFieldType { + private String name; + private ArrowType type; + + public ParquetFieldType(String name, ArrowType type) { + this.name = name; + this.type = type; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public ArrowType getType() { + return type; + } + + public void setType(ArrowType type) { + this.type = type; + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java new file mode 100644 index 0000000000000..0d6c2519d463a --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java @@ -0,0 +1,60 @@ +package com.parquet.parquetdataformat.engine; + +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.opensearch.common.SuppressForbidden; +import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.mapper.MappedFieldType; +import com.parquet.parquetdataformat.converter.FieldTypeConverter; + +import java.util.Arrays; +import java.util.Random; + +@SuppressForbidden(reason = "Need random for creating temp files") +public class DummyDataUtils { + public static Schema getSchema() { + // Create the most minimal schema possible - just one string field + return new Schema(Arrays.asList( + Field.notNullable(ID, new ArrowType.Int(32, true)), + Field.nullable(NAME, new ArrowType.Utf8()), + Field.nullable(DESIGNATION, new ArrowType.Utf8()), + Field.nullable(SALARY, new ArrowType.Int(32, true)) + )); + } + + public static void populateDocumentInput(DocumentInput documentInput) { + MappedFieldType idField = FieldTypeConverter.convertToMappedFieldType(ID, new ArrowType.Int(32, true)); + documentInput.addField(idField, generateRandomId()); + MappedFieldType nameField = FieldTypeConverter.convertToMappedFieldType(NAME, new ArrowType.Utf8()); + documentInput.addField(nameField, generateRandomName()); + MappedFieldType designationField = FieldTypeConverter.convertToMappedFieldType(DESIGNATION, new ArrowType.Utf8()); + documentInput.addField(designationField, generateRandomDesignation()); + MappedFieldType salaryField = FieldTypeConverter.convertToMappedFieldType(SALARY, new ArrowType.Int(32, true)); + documentInput.addField(salaryField, random.nextInt(100000)); + } + + private static final String ID = "id"; + private static final String NAME = "name"; + private static final String DESIGNATION = "designation"; + private static final String SALARY = "salary"; + private static final String INCREMENT = "increment"; + private static final Random random = new Random(); + private static final String[] NAMES = {"John Doe", "Jane Smith", "Alice Johnson", "Bob Wilson", "Carol Brown"}; + private static final String[] DESIGNATIONS = {"Software Engineer", "Senior Developer", "Team Lead", "Manager", "Architect"}; + + private static int generateRandomId() { + return random.nextInt(1000000); + } + + private static String generateRandomName() { + return NAMES[random.nextInt(NAMES.length)]; + } + + private static String generateRandomDesignation() { + return DESIGNATIONS[random.nextInt(DESIGNATIONS.length)]; + } + + +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java new file mode 100644 index 0000000000000..240a33c10531e --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java @@ -0,0 +1,58 @@ +package com.parquet.parquetdataformat.engine; + +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.engine.exec.DataFormat; + +/** + * Data format implementation for Parquet-based document storage. + * + *

This class integrates with OpenSearch's DataFormat interface to provide + * Parquet file format support within the OpenSearch indexing pipeline. It + * defines the configuration and behavior for the "parquet" data format. + * + *

The implementation provides hooks for: + *

    + *
  • Data format specific settings configuration
  • + *
  • Cluster-level settings management
  • + *
  • Store configuration for Parquet-specific optimizations
  • + *
  • Format identification through the "parquet" name
  • + *
+ * + *

This class serves as the entry point for registering Parquet format + * capabilities with OpenSearch's execution engine framework, allowing + * the system to recognize and utilize Parquet-based storage operations. + */ +public class ParquetDataFormat implements DataFormat { + @Override + public Setting dataFormatSettings() { + return null; + } + + @Override + public Setting clusterLeveldataFormatSettings() { + return null; + } + + @Override + public String name() { + return "parquet"; + } + + @Override + public void configureStore() { + + } + + public static ParquetDataFormat PARQUET_DATA_FORMAT = new ParquetDataFormat(); + + @Override + public boolean equals(Object obj) { + return true; + } + + @Override + public int hashCode() { + return 0; + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java new file mode 100644 index 0000000000000..4778d21f51452 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java @@ -0,0 +1,86 @@ +package com.parquet.parquetdataformat.engine; + +import com.parquet.parquetdataformat.writer.ParquetDocumentInput; +import com.parquet.parquetdataformat.writer.ParquetWriter; +import org.apache.arrow.vector.types.pojo.Schema; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.IndexingExecutionEngine; +import org.opensearch.index.engine.exec.RefreshInput; +import org.opensearch.index.engine.exec.RefreshResult; +import org.opensearch.index.engine.exec.Writer; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.shard.ShardPath; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.function.Supplier; + +import static com.parquet.parquetdataformat.engine.ParquetDataFormat.PARQUET_DATA_FORMAT; + +/** + * Main execution engine for Parquet-based indexing operations in OpenSearch. + * + *

This engine implements OpenSearch's IndexingExecutionEngine interface to provide + * Parquet file generation capabilities within the indexing pipeline. It manages the + * lifecycle of Parquet writers and coordinates the overall document processing workflow. + * + *

Key responsibilities: + *

    + *
  • Writer creation with unique file naming and Arrow schema integration
  • + *
  • Schema-based field type support and validation
  • + *
  • Refresh operations for completing indexing cycles
  • + *
  • Integration with the broader Parquet data format ecosystem
  • + *
+ * + *

The engine uses an atomic counter to ensure unique Parquet file names across + * concurrent operations, following the naming pattern "parquet_file_generation_N.parquet" + * where N is an incrementing sequence number. + * + *

Each writer instance created by this engine is configured with: + *

    + *
  • A unique file name for output isolation
  • + *
  • The Arrow schema provided during engine construction
  • + *
  • Full access to the Parquet processing pipeline via {@link ParquetWriter}
  • + *
+ * + *

The engine is designed to work with {@link ParquetDocumentInput} for document + * processing and integrates seamlessly with OpenSearch's execution framework. + */ +public class ParquetExecutionEngine implements IndexingExecutionEngine { + + public static final String FILE_NAME_PREFIX = "parquet_file_generation"; + private final Supplier schema; + private final List filesWrittenAlready = new ArrayList<>(); + private final ShardPath shardPath; + + public ParquetExecutionEngine(Supplier schema, ShardPath shardPath) { + this.schema = schema; + this.shardPath = shardPath; + } + + @Override + public List supportedFieldTypes() { + return List.of(); + } + + @Override + public Writer createWriter(long writerGeneration) throws IOException { + String fileName = Path.of(shardPath.getDataPath().toString(), FILE_NAME_PREFIX + "_" + writerGeneration + ".parquet").toString(); + return new ParquetWriter(fileName, schema.get(), writerGeneration); + } + + @Override + public RefreshResult refresh(RefreshInput refreshInput) throws IOException { + RefreshResult refreshResult = new RefreshResult(); + filesWrittenAlready.addAll(refreshInput.getWriterFiles()); + refreshResult.add(PARQUET_DATA_FORMAT, filesWrittenAlready); + return refreshResult; + } + + @Override + public DataFormat getDataFormat() { + return new ParquetDataFormat(); + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java new file mode 100644 index 0000000000000..f20a9bae06ea2 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java @@ -0,0 +1,143 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.engine.read; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.vectorized.execution.search.DataFormat; +import org.opensearch.vectorized.execution.search.spi.DataSourceCodec; +import org.opensearch.vectorized.execution.search.spi.RecordBatchStream; + +import java.util.List; +import java.util.concurrent.CompletableFuture; +import java.util.concurrent.CompletionException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + +import static com.parquet.parquetdataformat.bridge.RustBridge.nativeCloseSessionContext; +import static com.parquet.parquetdataformat.bridge.RustBridge.nativeCreateSessionContext; +import static com.parquet.parquetdataformat.bridge.RustBridge.nativeExecuteSubstraitQuery; +import static com.parquet.parquetdataformat.bridge.RustBridge.nativeRegisterDirectory; + +/** + * Datasource codec implementation for parquet files + */ +public class ParquetDataSourceCodec implements DataSourceCodec { + + private static final Logger logger = LogManager.getLogger(ParquetDataSourceCodec.class); + private static final AtomicLong runtimeIdGenerator = new AtomicLong(0); + private static final AtomicLong sessionIdGenerator = new AtomicLong(0); + private final ConcurrentHashMap sessionContexts = new ConcurrentHashMap<>(); + + // JNI library loading + static { + try { + //JniLibraryLoader.loadLibrary(); + logger.info("DataFusion JNI library loaded successfully"); + } catch (Exception e) { + logger.error("Failed to load DataFusion JNI library", e); + throw new RuntimeException("Failed to initialize DataFusion JNI library", e); + } + } + + @Override + public CompletableFuture registerDirectory(String directoryPath, List fileNames, long runtimeId) { + return CompletableFuture.supplyAsync(() -> { + try { + logger.debug("Registering directory: {} with {} files", directoryPath, fileNames.size()); + + // Convert file names to arrays for JNI + String[] fileArray = fileNames.toArray(new String[0]); + + // Call native method to register directory + nativeRegisterDirectory("csv_table", directoryPath, fileArray, runtimeId); + return null; + } catch (Exception e) { + logger.error("Failed to register directory: " + directoryPath, e); + throw new CompletionException("Failed to register directory", e); + } + }); + } + + @Override + public CompletableFuture createSessionContext(long globalRuntimeEnvId) { + return CompletableFuture.supplyAsync(() -> { + try { + long sessionId = sessionIdGenerator.incrementAndGet(); + logger.debug("Creating session context with ID: {} for runtime: {}", sessionId, globalRuntimeEnvId); + + // Default configuration + String[] configKeys = { "batch_size", "target_partitions" }; + String[] configValues = { "1024", "4" }; + + // Create native session context + long nativeContextPtr = nativeCreateSessionContext(configKeys, configValues); + sessionContexts.put(sessionId, nativeContextPtr); + + logger.info("Created session context with ID: {}", sessionId); + return sessionId; + } catch (Exception e) { + logger.error("Failed to create session context for runtime: " + globalRuntimeEnvId, e); + throw new CompletionException("Failed to create session context", e); + } + }); + } + + @Override + public CompletableFuture executeSubstraitQuery(long sessionContextId, byte[] substraitPlanBytes) { + return CompletableFuture.supplyAsync(() -> { + try { + logger.debug("Executing Substrait query for session: {}", sessionContextId); + + Long nativeContextPtr = sessionContexts.get(sessionContextId); + if (nativeContextPtr == null) { + throw new IllegalArgumentException("Invalid session context ID: " + sessionContextId); + } + + // Execute query and get native stream pointer + long nativeStreamPtr = nativeExecuteSubstraitQuery(nativeContextPtr, substraitPlanBytes); + + // Create Java wrapper for the native stream + RecordBatchStream stream = new ParquetRecordBatchStream(nativeStreamPtr); + + logger.info("Successfully executed Substrait query for session: {}", sessionContextId); + return stream; + } catch (Exception e) { + logger.error("Failed to execute Substrait query for session: " + sessionContextId, e); + throw new CompletionException("Failed to execute Substrait query", e); + } + }); + } + + @Override + public CompletableFuture closeSessionContext(long sessionContextId) { + return CompletableFuture.supplyAsync(() -> { + try { + logger.debug("Closing session context: {}", sessionContextId); + + Long nativeContextPtr = sessionContexts.remove(sessionContextId); + if (nativeContextPtr != null) { + nativeCloseSessionContext(nativeContextPtr); + logger.info("Successfully closed session context: {}", sessionContextId); + } else { + logger.warn("Session context not found: {}", sessionContextId); + } + + return null; + } catch (Exception e) { + logger.error("Failed to close session context: " + sessionContextId, e); + throw new CompletionException("Failed to close session context", e); + } + }); + } + + public DataFormat getDataFormat() { + return DataFormat.CSV; + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetRecordBatchStream.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetRecordBatchStream.java new file mode 100644 index 0000000000000..3c23e4fd9d1b5 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetRecordBatchStream.java @@ -0,0 +1,117 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.engine.read; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.vectorized.execution.search.spi.RecordBatchStream; + +import java.util.concurrent.CompletableFuture; + +import static com.parquet.parquetdataformat.bridge.RustBridge.nativeCloseStream; +import static com.parquet.parquetdataformat.bridge.RustBridge.nativeNextBatch; + +/** + * TODO : this need not be here - nothing specific to parquet - move to LIB ? + * Native implementation of RecordBatchStream that wraps a JNI stream pointer. + * This class provides a Java interface over native DataFusion record batches. + */ +public class ParquetRecordBatchStream implements RecordBatchStream { + + private static final Logger logger = LogManager.getLogger(ParquetRecordBatchStream.class); + + private final long nativeStreamPtr; + private volatile boolean closed = false; + private volatile boolean hasNextCached = false; + private volatile boolean hasNextValue = false; + + /** + * Creates a new ParquetRecordBatchStream wrapping the given native stream pointer. + * + * @param nativeStreamPtr Pointer to the native DataFusion RecordBatch stream + */ + public ParquetRecordBatchStream(long nativeStreamPtr) { + if (nativeStreamPtr == 0) { + throw new IllegalArgumentException("Invalid native stream pointer"); + } + this.nativeStreamPtr = nativeStreamPtr; + logger.debug("Created ParquetRecordBatchStream with pointer: {}", nativeStreamPtr); + } + + @Override + public Object getSchema() { + return "ParquetSchema"; // Placeholder + } + + @Override + public CompletableFuture next() { + // PlaceholderImpl + return CompletableFuture.supplyAsync(() -> { + if (closed) { + return null; + } + + try { + // Get the next batch from native code + String batch = nativeNextBatch(nativeStreamPtr); + + // Reset cached hasNext value since we consumed a batch + hasNextCached = false; + + logger.trace("Retrieved next batch from stream pointer: {}", nativeStreamPtr); + return batch; + } catch (Exception e) { + logger.error("Error getting next batch from stream", e); + return null; + } + }); + } + + @Override + public boolean hasNext() { + // Placeholder impl + if (closed) { + return false; + } + + if (hasNextCached) { + return hasNextValue; + } + + try { + // Check if there's a next batch available + // This is a simplified implementation - in practice, you might want to + // peek at the stream without consuming the batch + String nextBatch = nativeNextBatch(nativeStreamPtr); + hasNextValue = (nextBatch != null); + hasNextCached = true; + + logger.trace("hasNext() = {} for stream pointer: {}", hasNextValue, nativeStreamPtr); + return hasNextValue; + } catch (Exception e) { + logger.error("Error checking for next batch in stream", e); + return false; + } + } + + @Override + public void close() { + if (!closed) { + logger.debug("Closing ParquetRecordBatchStream with pointer: {}", nativeStreamPtr); + try { + nativeCloseStream(nativeStreamPtr); + closed = true; + logger.debug("Successfully closed ParquetRecordBatchStream"); + } catch (Exception e) { + logger.error("Error closing ParquetRecordBatchStream", e); + throw e; + } + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/package-info.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/package-info.java new file mode 100644 index 0000000000000..bd486fa1e26f4 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/package-info.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * CSV data format implementation for DataFusion integration. + * Provides CSV file reading capabilities through DataFusion query engine. + */ +package com.parquet.parquetdataformat.engine.read; diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java new file mode 100644 index 0000000000000..143b9837c6970 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java @@ -0,0 +1,103 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields; + +import com.parquet.parquetdataformat.fields.number.ByteParquetField; +import com.parquet.parquetdataformat.fields.number.DoubleParquetField; +import com.parquet.parquetdataformat.fields.number.FloatParquetField; +import com.parquet.parquetdataformat.fields.number.HalfFloatParquetField; +import com.parquet.parquetdataformat.fields.number.IntegerParquetField; +import com.parquet.parquetdataformat.fields.number.LongParquetField; +import com.parquet.parquetdataformat.fields.number.ShortParquetField; +import com.parquet.parquetdataformat.fields.number.UnsignedLongParquetField; +import org.apache.arrow.vector.types.FloatingPointPrecision; +import org.apache.arrow.vector.types.TimeUnit; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.FieldType; +import org.opensearch.index.mapper.BooleanFieldMapper; +import org.opensearch.index.mapper.DateFieldMapper; +import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.NumberFieldMapper; +import org.opensearch.index.mapper.TextFieldMapper; + +import java.util.HashMap; +import java.util.Map; + +public class ArrowFieldRegistry { + + private static final Map FIELD_TYPE_MAP = new HashMap<>(); + private static final Map PARQUET_FIELD_MAP = new HashMap<>(); + + static { + //TODO: darsaga check which fields can be nullable and which can not be + + // Number types + FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.HALF_FLOAT.typeName(), + FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.HALF))); + FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.FLOAT.typeName(), + FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE))); + FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.DOUBLE.typeName(), + FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE))); + FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.BYTE.typeName(), + FieldType.nullable(new ArrowType.Int(8, true))); + FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.SHORT.typeName(), + FieldType.nullable(new ArrowType.Int(16, true))); + FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.INTEGER.typeName(), + FieldType.nullable(new ArrowType.Int(32, true))); + FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.LONG.typeName(), + FieldType.nullable(new ArrowType.Int(64, true))); + FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.UNSIGNED_LONG.typeName(), + FieldType.nullable(new ArrowType.Int(64, false))); + + // Other types + FIELD_TYPE_MAP.put(DateFieldMapper.CONTENT_TYPE, + FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, null))); + FIELD_TYPE_MAP.put(BooleanFieldMapper.CONTENT_TYPE, + FieldType.nullable(new ArrowType.Bool())); + FIELD_TYPE_MAP.put(KeywordFieldMapper.CONTENT_TYPE, + FieldType.nullable(new ArrowType.Utf8())); + FIELD_TYPE_MAP.put(TextFieldMapper.CONTENT_TYPE, + FieldType.nullable(new ArrowType.Utf8())); + + setUpParquetFieldMap(); + } + + private static void setUpParquetFieldMap() { + + //Number fields + PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.HALF_FLOAT.typeName(), new HalfFloatParquetField()); + PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.FLOAT.typeName(), new FloatParquetField()); + PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.DOUBLE.typeName(), new DoubleParquetField()); + PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.BYTE.typeName(), new ByteParquetField()); + PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.SHORT.typeName(), new ShortParquetField()); + PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.INTEGER.typeName(), new IntegerParquetField()); + PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.LONG.typeName(), new LongParquetField()); + PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.UNSIGNED_LONG.typeName(), new UnsignedLongParquetField()); + + //Date field + PARQUET_FIELD_MAP.put(DateFieldMapper.CONTENT_TYPE, new DateParquetField()); + + //Boolean field + PARQUET_FIELD_MAP.put(BooleanFieldMapper.CONTENT_TYPE, new BooleanParquetField()); + + //Text field + PARQUET_FIELD_MAP.put(TextFieldMapper.CONTENT_TYPE, new TextParquetField()); + + //Keyword field + PARQUET_FIELD_MAP.put(KeywordFieldMapper.CONTENT_TYPE, new KeywordParquetField()); + } + + public static FieldType getFieldType(String typeName) { + return FIELD_TYPE_MAP.get(typeName); + } + + public static ParquetField getParquetField(String typeName) { + return PARQUET_FIELD_MAP.get(typeName); + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/BooleanParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/BooleanParquetField.java new file mode 100644 index 0000000000000..225323e6a7ffe --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/BooleanParquetField.java @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields; + +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.BitVector; +import org.opensearch.index.mapper.MappedFieldType; + +public class BooleanParquetField extends ParquetField { + + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + BitVector bitVector = (BitVector) managedVSR.getVector(mappedFieldType.name()); + int rowIndex = managedVSR.getRowCount(); + bitVector.setSafe(rowIndex, (Boolean) parseValue ? 1 : 0); + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/DateParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/DateParquetField.java new file mode 100644 index 0000000000000..5f2170fa95987 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/DateParquetField.java @@ -0,0 +1,19 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields; + +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.opensearch.index.mapper.MappedFieldType; + +public class DateParquetField extends ParquetField { + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/KeywordParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/KeywordParquetField.java new file mode 100644 index 0000000000000..a5837ee851364 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/KeywordParquetField.java @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields; + +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.VarCharVector; +import org.opensearch.index.mapper.MappedFieldType; + +public class KeywordParquetField extends ParquetField { + + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); + int rowIndex = managedVSR.getRowCount(); + textVector.setSafe(rowIndex, parseValue.toString().getBytes()); + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java new file mode 100644 index 0000000000000..3d52106f7acad --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields; + +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.opensearch.index.mapper.MappedFieldType; + +public abstract class ParquetField { + public abstract void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue); + + public void createField(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + if (mappedFieldType.isColumnar()) { + addToGroup(mappedFieldType, managedVSR, parseValue); + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetFieldUtil.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetFieldUtil.java new file mode 100644 index 0000000000000..a47f35fcd9bb1 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetFieldUtil.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields; + +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.opensearch.index.mapper.Mapper; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.MetadataFieldMapper; + +import java.util.ArrayList; +import java.util.List; + +public class ParquetFieldUtil { + + public static Schema getSchema(MapperService mapperService) { + List fields = new ArrayList<>(); + + for (Mapper mapper : mapperService.documentMapper().mappers()) { + if (mapper instanceof MetadataFieldMapper) continue; + fields.add(new Field(mapper.name(), ArrowFieldRegistry.getFieldType(mapper.typeName()), null)); + } + + // Create the most minimal schema possible - just one string field + return new Schema(fields); + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/TextParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/TextParquetField.java new file mode 100644 index 0000000000000..6bcf6d091fd62 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/TextParquetField.java @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields; + +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.VarCharVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.opensearch.index.mapper.MappedFieldType; + +public class TextParquetField extends ParquetField { + + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name()); + int rowIndex = managedVSR.getRowCount(); + textVector.setSafe(rowIndex, parseValue.toString().getBytes()); + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ByteParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ByteParquetField.java new file mode 100644 index 0000000000000..75d0607a18eb7 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ByteParquetField.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields.number; + +import com.parquet.parquetdataformat.fields.ParquetField; +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.TinyIntVector; +import org.opensearch.index.mapper.MappedFieldType; + +public class ByteParquetField extends ParquetField { + + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + TinyIntVector tinyIntVector = (TinyIntVector) managedVSR.getVector(mappedFieldType.name()); + int rowCount = managedVSR.getRowCount(); + if (parseValue == null) { + tinyIntVector.setNull(rowCount); + } else { + tinyIntVector.setSafe(rowCount, (Byte) parseValue); + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/DoubleParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/DoubleParquetField.java new file mode 100644 index 0000000000000..a552efa146bce --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/DoubleParquetField.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields.number; + +import com.parquet.parquetdataformat.fields.ParquetField; +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.Float8Vector; +import org.opensearch.index.mapper.MappedFieldType; + +public class DoubleParquetField extends ParquetField { + + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + Float8Vector float8Vector = (Float8Vector) managedVSR.getVector(mappedFieldType.name()); + int rowCount = managedVSR.getRowCount(); + if (parseValue == null) { + float8Vector.setNull(rowCount); + } else { + float8Vector.setSafe(rowCount, (Double) parseValue); + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/FloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/FloatParquetField.java new file mode 100644 index 0000000000000..de10a122f40e7 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/FloatParquetField.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields.number; + +import com.parquet.parquetdataformat.fields.ParquetField; +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.Float4Vector; +import org.opensearch.index.mapper.MappedFieldType; + +public class FloatParquetField extends ParquetField { + + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + Float4Vector float4Vector = (Float4Vector) managedVSR.getVector(mappedFieldType.name()); + int rowCount = managedVSR.getRowCount(); + if (parseValue == null) { + float4Vector.setNull(rowCount); + } else { + float4Vector.setSafe(rowCount, (Float) parseValue); + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/HalfFloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/HalfFloatParquetField.java new file mode 100644 index 0000000000000..4d393d3a804ce --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/HalfFloatParquetField.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields.number; + +import com.parquet.parquetdataformat.fields.ParquetField; +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.Float2Vector; +import org.opensearch.index.mapper.MappedFieldType; + +public class HalfFloatParquetField extends ParquetField { + + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + Float2Vector float2Vector = (Float2Vector) managedVSR.getVector(mappedFieldType.name()); + int rowCount = managedVSR.getRowCount(); + if (parseValue == null) { + float2Vector.setNull(rowCount); + } else { + float2Vector.setSafe(rowCount, (Short) parseValue); + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/IntegerParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/IntegerParquetField.java new file mode 100644 index 0000000000000..0a14344b6eaac --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/IntegerParquetField.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields.number; + +import com.parquet.parquetdataformat.fields.ParquetField; +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.IntVector; +import org.opensearch.index.mapper.MappedFieldType; + +public class IntegerParquetField extends ParquetField { + + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name()); + int rowCount = managedVSR.getRowCount(); + if (parseValue == null) { + intVector.setNull(rowCount); + } else { + intVector.setSafe(rowCount, (Integer) parseValue); + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/LongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/LongParquetField.java new file mode 100644 index 0000000000000..7221d64c6590d --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/LongParquetField.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields.number; + +import com.parquet.parquetdataformat.fields.ParquetField; +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.BigIntVector; +import org.opensearch.index.mapper.MappedFieldType; + +public class LongParquetField extends ParquetField { + + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + BigIntVector bigIntVector = (BigIntVector) managedVSR.getVector(mappedFieldType.name()); + int rowCount = managedVSR.getRowCount(); + if (parseValue == null) { + bigIntVector.setNull(rowCount); + } else { + bigIntVector.setSafe(rowCount, (Long) parseValue); + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ShortParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ShortParquetField.java new file mode 100644 index 0000000000000..8e28bdda9ba54 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ShortParquetField.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields.number; + +import com.parquet.parquetdataformat.fields.ParquetField; +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.SmallIntVector; +import org.opensearch.index.mapper.MappedFieldType; + +public class ShortParquetField extends ParquetField { + + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + SmallIntVector smallIntVector = (SmallIntVector) managedVSR.getVector(mappedFieldType.name()); + int rowCount = managedVSR.getRowCount(); + if (parseValue == null) { + smallIntVector.setNull(rowCount); + } else { + smallIntVector.setSafe(rowCount, (Short) parseValue); + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/UnsignedLongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/UnsignedLongParquetField.java new file mode 100644 index 0000000000000..ed5d4f5509a3d --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/UnsignedLongParquetField.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.fields.number; + +import com.parquet.parquetdataformat.fields.ParquetField; +import com.parquet.parquetdataformat.vsr.ManagedVSR; +import org.apache.arrow.vector.UInt8Vector; +import org.opensearch.index.mapper.MappedFieldType; + +public class UnsignedLongParquetField extends ParquetField { + + @Override + public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) { + UInt8Vector uInt8Vector = (UInt8Vector) managedVSR.getVector(mappedFieldType.name()); + int rowCount = managedVSR.getRowCount(); + if (parseValue == null) { + uInt8Vector.setNull(rowCount); + } else { + long longValue = ((Number) parseValue).longValue(); + uInt8Vector.setSafe(rowCount, longValue); + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java new file mode 100644 index 0000000000000..83e60d863aeb5 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java @@ -0,0 +1,215 @@ +package com.parquet.parquetdataformat.memory; + +import org.apache.arrow.memory.AllocationListener; +import org.apache.arrow.memory.AllocationOutcome; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.opensearch.common.settings.Settings; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Manages BufferAllocator lifecycle with configurable allocation strategies. + * Provides factory methods for creating allocators with different policies + * based on OpenSearch settings and memory pressure conditions. + */ +public class ArrowBufferPool { + + private final Settings settings; + private final long maxAllocation; + private final long initReservation; + private final AllocationListener allocationListener; + private final MemoryPressureMonitor memoryMonitor; + + // Track active allocators for monitoring and cleanup + private final ConcurrentHashMap activeAllocators; + private final AtomicLong totalAllocated; + + public ArrowBufferPool(Settings settings, MemoryPressureMonitor memoryMonitor) { + this.settings = settings; + this.memoryMonitor = memoryMonitor; + this.activeAllocators = new ConcurrentHashMap<>(); + this.totalAllocated = new AtomicLong(0); + + // Configure memory limits - parse size strings manually + this.maxAllocation = parseByteSize(settings.get("parquet.memory.max_allocation", "1gb")); + this.initReservation = parseByteSize(settings.get("parquet.memory.init_reservation", "100mb")); + + // Set up allocation listener for monitoring + this.allocationListener = new PoolAllocationListener(); + } + + /** + * Creates a new child allocator with the configured strategy and limits. + * + * @param name Unique name for the allocator + * @return BufferAllocator configured with pool settings + */ + public BufferAllocator createAllocator(String name) { + return createAllocator(name, initReservation, maxAllocation); + } + + /** + * Creates a new child allocator with custom limits. + * + * @param name Unique name for the allocator + * @param reservation Initial reservation amount + * @param maxBytes Maximum allocation limit + * @return BufferAllocator configured with specified limits + */ + public BufferAllocator createAllocator(String name, long reservation, long maxBytes) { + // Check memory pressure before creating new allocator + if (memoryMonitor.shouldRejectAllocation(reservation)) { + throw new OutOfMemoryError( + "Cannot create allocator '" + name + "': memory pressure too high"); + } + + BufferAllocator rootAllocator = createRootAllocator(); + BufferAllocator childAllocator = rootAllocator.newChildAllocator( + name, allocationListener, reservation, maxBytes); + + activeAllocators.put(name, childAllocator); + totalAllocated.addAndGet(reservation); + + return childAllocator; + } + + /** + * Releases an allocator and cleans up resources. + * + * @param name Name of the allocator to release + */ + public void releaseAllocator(String name) { + BufferAllocator allocator = activeAllocators.remove(name); + if (allocator != null) { + long allocated = allocator.getAllocatedMemory(); + totalAllocated.addAndGet(-allocated); + allocator.close(); + } + } + + /** + * Gets current memory allocation statistics. + * + * @return AllocationStats with current usage information + */ + public AllocationStats getStats() { + return new AllocationStats( + totalAllocated.get(), + maxAllocation, + activeAllocators.size(), + memoryMonitor.getCurrentPressure() + ); + } + + /** + * Closes all active allocators and cleans up the pool. + */ + public void close() { + activeAllocators.values().forEach(BufferAllocator::close); + activeAllocators.clear(); + totalAllocated.set(0); + } + + private BufferAllocator createRootAllocator() { + // Create a simple RootAllocator with basic settings + return new RootAllocator(maxAllocation); + } + + /** + * Simple byte size parser for configuration strings. + */ + private long parseByteSize(String sizeStr) { + if (sizeStr == null || sizeStr.trim().isEmpty()) { + return 0; + } + + String trimmed = sizeStr.trim().toLowerCase(); + long multiplier = 1; + + if (trimmed.endsWith("kb")) { + multiplier = 1024; + trimmed = trimmed.substring(0, trimmed.length() - 2); + } else if (trimmed.endsWith("mb")) { + multiplier = 1024 * 1024; + trimmed = trimmed.substring(0, trimmed.length() - 2); + } else if (trimmed.endsWith("gb")) { + multiplier = 1024 * 1024 * 1024; + trimmed = trimmed.substring(0, trimmed.length() - 2); + } else if (trimmed.endsWith("b")) { + trimmed = trimmed.substring(0, trimmed.length() - 1); + } + + try { + return Long.parseLong(trimmed.trim()) * multiplier; + } catch (NumberFormatException e) { + throw new IllegalArgumentException("Invalid byte size format: " + sizeStr, e); + } + } + + /** + * Allocation listener that integrates with memory monitoring. + */ + private class PoolAllocationListener implements AllocationListener { + + @Override + public void onPreAllocation(long size) { + if (memoryMonitor.shouldRejectAllocation(size)) { + throw new OutOfMemoryError("Memory pressure too high for allocation of " + size + " bytes"); + } + } + + @Override + public void onAllocation(long size) { + memoryMonitor.recordAllocation(size); + } + + @Override + public void onRelease(long size) { + memoryMonitor.recordDeallocation(size); + } + + @Override + public boolean onFailedAllocation(long size, AllocationOutcome outcome) { + memoryMonitor.recordFailedAllocation(size, "FAILED"); + return false; // Don't retry + } + + @Override + public void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) { + // Track child allocator creation + } + + @Override + public void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) { + // Track child allocator removal + } + } + + /** + * Allocation statistics for monitoring. + */ + public static class AllocationStats { + private final long totalAllocated; + private final long maxAllocation; + private final int activeAllocators; + private final double memoryPressure; + + public AllocationStats(long totalAllocated, long maxAllocation, + int activeAllocators, double memoryPressure) { + this.totalAllocated = totalAllocated; + this.maxAllocation = maxAllocation; + this.activeAllocators = activeAllocators; + this.memoryPressure = memoryPressure; + } + + public long getTotalAllocated() { return totalAllocated; } + public long getMaxAllocation() { return maxAllocation; } + public int getActiveAllocators() { return activeAllocators; } + public double getMemoryPressure() { return memoryPressure; } + public double getUtilizationRatio() { + return maxAllocation > 0 ? (double) totalAllocated / maxAllocation : 0.0; + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/MemoryPressureMonitor.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/MemoryPressureMonitor.java new file mode 100644 index 0000000000000..382c8c8b647fb --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/MemoryPressureMonitor.java @@ -0,0 +1,274 @@ +package com.parquet.parquetdataformat.memory; + +import org.opensearch.common.settings.Settings; + +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryMXBean; +import java.lang.management.MemoryUsage; +import java.util.concurrent.Executors; +import java.util.concurrent.ScheduledExecutorService; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Monitors off-heap memory usage and triggers backpressure mechanisms. + * Tracks Arrow buffer allocations and provides pressure metrics for + * controlling writer creation and flush intervals. + */ +public class MemoryPressureMonitor { + + public enum PressureLevel { + LOW(0.0, 0.7), // < 70% utilization + MODERATE(0.7, 0.85), // 70-85% utilization + HIGH(0.85, 0.95), // 85-95% utilization + CRITICAL(0.95, 1.0); // > 95% utilization + + private final double min; + private final double max; + + PressureLevel(double min, double max) { + this.min = min; + this.max = max; + } + + public static PressureLevel fromRatio(double ratio) { + for (PressureLevel level : values()) { + if (ratio >= level.min && ratio < level.max) { + return level; + } + } + return CRITICAL; + } + } + + private final MemoryMXBean memoryBean; + private final ScheduledExecutorService scheduler; + private final AtomicLong directMemoryUsed; + private final AtomicLong directMemoryMax; + private final AtomicReference currentPressure; + private final AtomicLong allocationCount; + private final AtomicLong deallocationCount; + private final AtomicLong failedAllocationCount; + + // Configuration + private final double criticalThreshold; + private final double highThreshold; + private final long maxDirectMemory; + + public MemoryPressureMonitor(Settings settings) { + this.memoryBean = ManagementFactory.getMemoryMXBean(); + this.scheduler = Executors.newSingleThreadScheduledExecutor(r -> { + Thread t = new Thread(r, "parquet-memory-monitor"); + t.setDaemon(true); + return t; + }); + + this.directMemoryUsed = new AtomicLong(0); + this.currentPressure = new AtomicReference<>(PressureLevel.LOW); + this.allocationCount = new AtomicLong(0); + this.deallocationCount = new AtomicLong(0); + this.failedAllocationCount = new AtomicLong(0); + + // Parse configuration + this.criticalThreshold = settings.getAsDouble("parquet.memory.critical_threshold", 0.95); + this.highThreshold = settings.getAsDouble("parquet.memory.high_threshold", 0.85); + this.maxDirectMemory = getMaxDirectMemory(); + this.directMemoryMax = new AtomicLong(maxDirectMemory); + + // Start monitoring + startMonitoring(); + } + + /** + * Checks if an allocation should be rejected based on current memory pressure. + * + * @param requestedBytes Number of bytes requested for allocation + * @return true if allocation should be rejected + */ + public boolean shouldRejectAllocation(long requestedBytes) { + PressureLevel pressure = currentPressure.get(); + + // Always reject if critical + if (pressure == PressureLevel.CRITICAL) { + return true; + } + + // Check if allocation would push us over threshold + long currentUsage = directMemoryUsed.get(); + long afterAllocation = currentUsage + requestedBytes; + double futureRatio = (double) afterAllocation / maxDirectMemory; + + return switch (pressure) { + case HIGH -> futureRatio > criticalThreshold; + case MODERATE -> futureRatio > highThreshold; + case LOW -> false; + case CRITICAL -> true; // Already handled above + }; + } + + /** + * Records an allocation event. + * + * @param size Size of the allocation + */ + public void recordAllocation(long size) { + directMemoryUsed.addAndGet(size); + allocationCount.incrementAndGet(); + updatePressureLevel(); + } + + /** + * Records a deallocation event. + * + * @param size Size of the deallocation + */ + public void recordDeallocation(long size) { + directMemoryUsed.addAndGet(-size); + deallocationCount.incrementAndGet(); + updatePressureLevel(); + } + + /** + * Records a failed allocation event. + * + * @param size Size of the failed allocation + * @param reason Reason for failure + */ + public void recordFailedAllocation(long size, String reason) { + failedAllocationCount.incrementAndGet(); + // Could log detailed failure information here + } + + /** + * Gets the current memory pressure as a ratio (0.0 to 1.0). + * + * @return Current memory pressure ratio + */ + public double getCurrentPressure() { + return (double) directMemoryUsed.get() / maxDirectMemory; + } + + /** + * Gets the current pressure level enum. + * + * @return Current PressureLevel + */ + public PressureLevel getCurrentPressureLevel() { + return currentPressure.get(); + } + + /** + * Gets current memory statistics. + * + * @return MemoryStats with current usage information + */ + public MemoryStats getStats() { + return new MemoryStats( + directMemoryUsed.get(), + maxDirectMemory, + getCurrentPressure(), + currentPressure.get(), + allocationCount.get(), + deallocationCount.get(), + failedAllocationCount.get() + ); + } + + /** + * Triggers early refresh if memory pressure is high. + * + * @return true if early refresh should be triggered + */ + public boolean shouldTriggerEarlyRefresh() { + PressureLevel pressure = currentPressure.get(); + return pressure == PressureLevel.HIGH || pressure == PressureLevel.CRITICAL; + } + + /** + * Gets recommended writer limit based on current memory pressure. + * + * @param baseLimit Base number of writers without pressure + * @return Adjusted writer limit + */ + public int getRecommendedWriterLimit(int baseLimit) { + return switch (currentPressure.get()) { + case LOW -> baseLimit; + case MODERATE -> (int) (baseLimit * 0.8); + case HIGH -> (int) (baseLimit * 0.5); + case CRITICAL -> 1; // Minimal writers only + }; + } + + private void startMonitoring() { + scheduler.scheduleAtFixedRate(this::updatePressureLevel, 1, 1, TimeUnit.SECONDS); + } + + private void updatePressureLevel() { + double ratio = getCurrentPressure(); + PressureLevel newLevel = PressureLevel.fromRatio(ratio); + PressureLevel oldLevel = currentPressure.getAndSet(newLevel); + + // Log pressure level changes + if (newLevel != oldLevel) { + System.out.println(String.format( + "[MEMORY] Pressure level changed: %s -> %s (%.2f%%)", + oldLevel, newLevel, ratio * 100)); + } + } + + private long getMaxDirectMemory() { + // Use heap max / 4 as a reasonable default for direct memory + long heapMax = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax(); + return heapMax > 0 ? heapMax / 4 : 1024 * 1024 * 1024; // 1GB fallback + } + + /** + * Closes the monitor and stops background tasks. + */ + public void close() { + scheduler.shutdown(); + try { + if (!scheduler.awaitTermination(5, TimeUnit.SECONDS)) { + scheduler.shutdownNow(); + } + } catch (InterruptedException e) { + scheduler.shutdownNow(); + Thread.currentThread().interrupt(); + } + } + + /** + * Memory statistics for monitoring. + */ + public static class MemoryStats { + private final long usedBytes; + private final long maxBytes; + private final double pressureRatio; + private final PressureLevel pressureLevel; + private final long allocationCount; + private final long deallocationCount; + private final long failedAllocationCount; + + public MemoryStats(long usedBytes, long maxBytes, double pressureRatio, + PressureLevel pressureLevel, long allocationCount, + long deallocationCount, long failedAllocationCount) { + this.usedBytes = usedBytes; + this.maxBytes = maxBytes; + this.pressureRatio = pressureRatio; + this.pressureLevel = pressureLevel; + this.allocationCount = allocationCount; + this.deallocationCount = deallocationCount; + this.failedAllocationCount = failedAllocationCount; + } + + public long getUsedBytes() { return usedBytes; } + public long getMaxBytes() { return maxBytes; } + public double getPressureRatio() { return pressureRatio; } + public PressureLevel getPressureLevel() { return pressureLevel; } + public long getAllocationCount() { return allocationCount; } + public long getDeallocationCount() { return deallocationCount; } + public long getFailedAllocationCount() { return failedAllocationCount; } + public long getAvailableBytes() { return maxBytes - usedBytes; } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdGenerator.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdGenerator.java new file mode 100644 index 0000000000000..8735efc2b21dc --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdGenerator.java @@ -0,0 +1,81 @@ +package com.parquet.parquetdataformat.rowid; + +import java.util.concurrent.atomic.AtomicLong; + +/** + * Atomic, monotonic row ID generator as specified in the Project Mustang design. + * Ensures that each parquet file has sequential row IDs starting from 0, + * maintaining a 1:1 mapping between docs indexed in Lucene and parquet rows. + */ +public class RowIdGenerator { + + private final AtomicLong globalCounter; + private final String generatorId; + + public RowIdGenerator(String generatorId) { + this.generatorId = generatorId; + this.globalCounter = new AtomicLong(0); + } + + /** + * Generates the next monotonic row ID. + * Thread-safe and atomic operation. + * + * @return Next sequential row ID + */ + public long nextRowId() { + return globalCounter.getAndIncrement(); + } + + /** + * Gets the current counter value without incrementing. + * Useful for determining the number of rows generated so far. + * + * @return Current counter value + */ + public long getCurrentCount() { + return globalCounter.get(); + } + + /** + * Resets the counter to zero. + * Should only be used during testing or system reinitialization. + */ + public void reset() { + globalCounter.set(0); + } + + /** + * Gets the generator ID for tracking purposes. + * + * @return Generator identifier + */ + public String getGeneratorId() { + return generatorId; + } + + /** + * Gets generation statistics. + * + * @return GenerationStats with current state + */ + public GenerationStats getStats() { + return new GenerationStats(generatorId, globalCounter.get()); + } + + /** + * Statistics for row ID generation. + */ + public static class GenerationStats { + private final String generatorId; + private final long totalGenerated; + + public GenerationStats(String generatorId, long totalGenerated) { + this.generatorId = generatorId; + this.totalGenerated = totalGenerated; + } + + public String getGeneratorId() { return generatorId; } + public long getTotalGenerated() { return totalGenerated; } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdTracker.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdTracker.java new file mode 100644 index 0000000000000..418c96efa07ce --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdTracker.java @@ -0,0 +1,204 @@ +package com.parquet.parquetdataformat.rowid; + +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ConcurrentMap; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Tracks row ID ranges per parquet file for Lucene segment mapping. + * Maintains the 1:1 mapping between docs indexed in Lucene and parquet rows + * as specified in the Project Mustang design. + */ +public class RowIdTracker { + + private final ConcurrentMap fileRanges; + private final AtomicLong totalRowsTracked; + + public RowIdTracker() { + this.fileRanges = new ConcurrentHashMap<>(); + this.totalRowsTracked = new AtomicLong(0); + } + + /** + * Starts tracking a new row ID range for a parquet file. + * + * @param fileName Name of the parquet file + * @param startRowId Starting row ID for this file + * @return RowIdRange tracker for this file + */ + public RowIdRange startTracking(String fileName, long startRowId) { + RowIdRange range = new RowIdRange(fileName, startRowId); + fileRanges.put(fileName, range); + return range; + } + + /** + * Completes tracking for a parquet file by setting the end row ID. + * + * @param fileName Name of the parquet file + * @param endRowId Final row ID for this file (exclusive) + * @return true if tracking was successfully completed + */ + public boolean completeTracking(String fileName, long endRowId) { + RowIdRange range = fileRanges.get(fileName); + if (range != null) { + range.setEndRowId(endRowId); + long rowCount = endRowId - range.getStartRowId(); + totalRowsTracked.addAndGet(rowCount); + return true; + } + return false; + } + + /** + * Gets the row ID range for a specific parquet file. + * + * @param fileName Name of the parquet file + * @return RowIdRange for the file, or null if not found + */ + public RowIdRange getRangeForFile(String fileName) { + return fileRanges.get(fileName); + } + + /** + * Finds which parquet file contains the given row ID. + * + * @param rowId Row ID to search for + * @return File name containing the row ID, or null if not found + */ + public String findFileForRowId(long rowId) { + for (RowIdRange range : fileRanges.values()) { + if (range.containsRowId(rowId)) { + return range.getFileName(); + } + } + return null; + } + + /** + * Gets all tracked file ranges. + * + * @return ConcurrentMap of fileName -> RowIdRange + */ + public ConcurrentMap getAllRanges() { + return new ConcurrentHashMap<>(fileRanges); + } + + /** + * Gets tracking statistics. + * + * @return TrackingStats with current state + */ + public TrackingStats getStats() { + return new TrackingStats( + fileRanges.size(), + totalRowsTracked.get(), + fileRanges.values().stream().mapToLong(RowIdRange::getRowCount).sum() + ); + } + + /** + * Removes tracking for a parquet file. + * Used during cleanup or file deletion. + * + * @param fileName Name of the parquet file + * @return true if tracking was removed + */ + public boolean removeTracking(String fileName) { + RowIdRange removed = fileRanges.remove(fileName); + if (removed != null) { + totalRowsTracked.addAndGet(-removed.getRowCount()); + return true; + } + return false; + } + + /** + * Clears all tracking data. + * Should only be used during testing or system reset. + */ + public void clear() { + fileRanges.clear(); + totalRowsTracked.set(0); + } + + /** + * Represents a row ID range for a specific parquet file. + */ + public static class RowIdRange { + private final String fileName; + private final long startRowId; + private volatile long endRowId; + private volatile boolean completed; + + public RowIdRange(String fileName, long startRowId) { + this.fileName = fileName; + this.startRowId = startRowId; + this.endRowId = startRowId; + this.completed = false; + } + + /** + * Sets the end row ID and marks the range as completed. + * + * @param endRowId Final row ID (exclusive) + */ + public void setEndRowId(long endRowId) { + this.endRowId = endRowId; + this.completed = true; + } + + /** + * Checks if the given row ID falls within this range. + * + * @param rowId Row ID to check + * @return true if row ID is within range + */ + public boolean containsRowId(long rowId) { + return completed && rowId >= startRowId && rowId < endRowId; + } + + /** + * Gets the number of rows in this range. + * + * @return Row count, or 0 if not completed + */ + public long getRowCount() { + return completed ? endRowId - startRowId : 0; + } + + // Getters + public String getFileName() { return fileName; } + public long getStartRowId() { return startRowId; } + public long getEndRowId() { return endRowId; } + public boolean isCompleted() { return completed; } + + @Override + public String toString() { + return String.format("RowIdRange{file='%s', start=%d, end=%d, completed=%s}", + fileName, startRowId, endRowId, completed); + } + } + + /** + * Statistics for row ID tracking. + */ + public static class TrackingStats { + private final int trackedFiles; + private final long totalRowsTracked; + private final long activeRows; + + public TrackingStats(int trackedFiles, long totalRowsTracked, long activeRows) { + this.trackedFiles = trackedFiles; + this.totalRowsTracked = totalRowsTracked; + this.activeRows = activeRows; + } + + public int getTrackedFiles() { return trackedFiles; } + public long getTotalRowsTracked() { return totalRowsTracked; } + public long getActiveRows() { return activeRows; } + public double getAverageRowsPerFile() { + return trackedFiles > 0 ? (double) activeRows / trackedFiles : 0.0; + } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java new file mode 100644 index 0000000000000..7d196c2fdfea7 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java @@ -0,0 +1,259 @@ +package com.parquet.parquetdataformat.vsr; + +import com.parquet.parquetdataformat.bridge.ArrowExport; +import org.apache.arrow.memory.ArrowBuf; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.BigIntVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.c.ArrowArray; +import org.apache.arrow.c.ArrowSchema; +import org.apache.arrow.c.Data; + +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import static org.apache.arrow.vector.BitVectorHelper.byteIndex; + +/** + * Managed wrapper around VectorSchemaRoot that handles state transitions + * and provides thread-safe access for the ACTIVE/FROZEN lifecycle. + */ +public class ManagedVSR implements AutoCloseable { + + private final String id; + private final VectorSchemaRoot vsr; + private final BufferAllocator allocator; + private final AtomicReference state; + private final ReadWriteLock lock; + private final long createdTime; + + + public ManagedVSR(String id, VectorSchemaRoot vsr, BufferAllocator allocator) { + this.id = id; + this.vsr = vsr; + this.allocator = allocator; + this.state = new AtomicReference<>(VSRState.ACTIVE); + this.lock = new ReentrantReadWriteLock(); + this.createdTime = System.currentTimeMillis(); + } + + /** + * Gets the underlying VectorSchemaRoot. + * Should only be used when holding appropriate locks. + * + * @return VectorSchemaRoot instance + */ + public VectorSchemaRoot getVSR() { + return vsr; + } + + /** + * Gets the current row count in this VSR. + * Thread-safe read operation. + * + * @return Number of rows currently in the VSR + */ + public int getRowCount() { + lock.readLock().lock(); + try { + return vsr.getRowCount(); + } finally { + lock.readLock().unlock(); + } + } + + /** + * Sets the row count for this VSR. + * Only allowed when VSR is in ACTIVE state. + * + * @param rowCount New row count + * @throws IllegalStateException if VSR is not active or is immutable + */ + public void setRowCount(int rowCount) { + lock.writeLock().lock(); + try { + if (state.get() != VSRState.ACTIVE) { + throw new IllegalStateException("Cannot modify VSR in state: " + state.get()); + } + vsr.setRowCount(rowCount); + } finally { + lock.writeLock().unlock(); + } + } + + /** + * Gets a field vector by name. + * Thread-safe read operation. + * + * @param fieldName Name of the field + * @return FieldVector for the field, or null if not found + */ + public FieldVector getVector(String fieldName) { + lock.readLock().lock(); + try { + return vsr.getVector(fieldName); + } finally { + lock.readLock().unlock(); + } + } + + /** + * Changes the state of this VSR. + * Handles state transition logic and immutability. + * + * @param newState New state to transition to + */ + public void setState(VSRState newState) { + VSRState oldState = state.getAndSet(newState); + + System.out.println(String.format( + "[VSR] State transition: %s -> %s for VSR %s", + oldState, newState, id)); + } + + /** + * Gets the current state of this VSR. + * + * @return Current VSRState + */ + public VSRState getState() { + return state.get(); + } + + /** + * Exports this VSR to Arrow C Data Interface for Rust handoff. + * Only allowed when VSR is FROZEN or FLUSHING. + * + * @return ArrowExport containing ArrowArray and ArrowSchema + * @throws IllegalStateException if VSR is not in correct state + */ + public ArrowExport exportToArrow() { + VSRState currentState = state.get(); + if (currentState != VSRState.FROZEN && + currentState != VSRState.FLUSHING) { + throw new IllegalStateException("Cannot export VSR in state: " + currentState); + } + + lock.readLock().lock(); + try { + ArrowArray arrowArray = ArrowArray.allocateNew(allocator); + ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator); + + // Export the VectorSchemaRoot to C Data Interface + Data.exportVectorSchemaRoot(allocator, vsr, null, arrowArray, arrowSchema); + + return new ArrowExport(arrowArray, arrowSchema); + } finally { + lock.readLock().unlock(); + } + } + + public ArrowExport exportSchema() { + lock.readLock().lock(); + try { + ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator); + + // Export the VectorSchemaRoot to C Data Interface + Data.exportSchema(allocator, vsr.getSchema(), null, arrowSchema); + + return new ArrowExport(null, arrowSchema); + } finally { + lock.readLock().unlock(); + } + } + + /** + * Checks if this VSR is immutable (frozen). + * + * @return true if VSR cannot be modified + */ + public boolean isImmutable() { + VSRState currentState = state.get(); + return currentState != VSRState.ACTIVE; + } + + + /** + * Gets the VSR ID. + * + * @return Unique identifier for this VSR + */ + public String getId() { + return id; + } + + /** + * Gets the creation timestamp. + * + * @return Creation time in milliseconds + */ + public long getCreatedTime() { + return createdTime; + } + + /** + * Gets the associated BufferAllocator. + * + * @return BufferAllocator used by this VSR + */ + public BufferAllocator getAllocator() { + return allocator; + } + + /** + * Closes this VSR and releases all resources. + */ + @Override + public void close() { + lock.writeLock().lock(); + try { + if (state.get() != VSRState.CLOSED) { + state.set(VSRState.CLOSED); + vsr.close(); + allocator.close(); + } + } finally { + lock.writeLock().unlock(); + } + } + + + @Override + public String toString() { + return String.format("ManagedVSR{id='%s', state=%s, rows=%d, immutable=%s}", + id, state.get(), getRowCount(), isImmutable()); + } + + public static void main(String[] args) { + RootAllocator allocator = new RootAllocator(); + BigIntVector vector = new BigIntVector("vector", allocator); + vector.allocateNew(10); + vector.set(0, 100); // Set position 0 +// vector.setNull(1); + vector.set(2, 300); // Set position 2 +// Position 1 is not set! + vector.setValueCount(3); // Claims vector has 3 elements + +// Position 1 now contains undefined data +// long value = vector.get(1); // Could be any value! + System.out.println(readBit(vector.getValidityBuffer(), 0)); + System.out.println(readBit(vector.getValidityBuffer(), 1)); + System.out.println(readBit(vector.getValidityBuffer(), 2)); + System.out.println(readBit(vector.getValidityBuffer(), 3)); + } + + public static byte readBit(ArrowBuf validityBuffer, long index) { + // it can be observed that some logic is duplicate of the logic in setValidityBit. + // this is because JIT cannot always remove the if branch in setValidityBit, + // so we give a dedicated implementation for setting bits. + final long byteIndex = byteIndex(index); + + // the byte is promoted to an int, because according to Java specification, + // bytes will be promoted to ints automatically, upon expression evaluation. + // by promoting it manually, we avoid the unnecessary conversions. + return validityBuffer.getByte(byteIndex); + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java new file mode 100644 index 0000000000000..d7dfbde2948ba --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java @@ -0,0 +1,268 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package com.parquet.parquetdataformat.vsr; + +import com.parquet.parquetdataformat.bridge.ArrowExport; +import com.parquet.parquetdataformat.bridge.RustBridge; +import com.parquet.parquetdataformat.memory.MemoryPressureMonitor; +import com.parquet.parquetdataformat.writer.ParquetDocumentInput; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.opensearch.index.engine.exec.FlushIn; +import org.opensearch.index.engine.exec.WriteResult; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Manages VectorSchemaRoot lifecycle with integrated memory management and native call wrappers. + * Provides a high-level interface for Parquet document operations using managed VSR abstractions. + * + *

This class orchestrates the following components: + *

    + *
  • {@link ManagedVSR} - Thread-safe VSR with state management
  • + *
  • {@link VSRPool} - Resource pooling for VSRs
  • + *
  • {@link RustBridge} - Direct JNI calls to Rust backend
  • + *
+ */ +public class VSRManager { + private ManagedVSR managedVSR; + private Map fieldVectorMap; + private final Schema schema; + private final String fileName; + private final VSRPool vsrPool; + + public VSRManager(String fileName, Schema schema) { + this.fileName = fileName; + this.schema = schema; + + // Create memory monitor and buffer pool + MemoryPressureMonitor memoryMonitor = new MemoryPressureMonitor(org.opensearch.common.settings.Settings.EMPTY); + + // Create VSR pool + this.vsrPool = new VSRPool("pool-" + fileName, schema, memoryMonitor); + + + // Get active VSR from pool + this.managedVSR = vsrPool.getActiveVSR(); + initializeFieldVectorMap(); + // Initialize writer lazily to avoid crashes + initializeWriter(); + } + + private void initializeWriter() { + try { + // Export schema through managed VSR + try (ArrowExport export = managedVSR.exportSchema()) { + long schemaAddress = export.getSchemaAddress(); + + // Direct native call - RustBridge handles all validation + RustBridge.createWriter(fileName, schemaAddress); + } + } catch (Exception e) { + throw new RuntimeException("Failed to initialize Parquet writer: " + e.getMessage(), e); + } + } + + public WriteResult addToManagedVSR(ParquetDocumentInput document) throws IOException { + // Ensure we have an active VSR (handle case where getActiveVSR() returns null) + if (managedVSR == null) { + managedVSR = vsrPool.getActiveVSR(); + if (managedVSR == null) { + throw new IOException("No active VSR available"); + } + reinitializeFieldVectorMap(); + } + + // Ensure VSR is in ACTIVE state for modifications + if (managedVSR.getState() != VSRState.ACTIVE) { + throw new IOException("Cannot add document - VSR is not active: " + managedVSR.getState()); + } + + System.out.println("[JAVA] addToManagedVSR called, current row count: " + managedVSR.getRowCount()); + + try { + // Since ParquetDocumentInput now works directly with ManagedVSR, + // fields should already be populated in vectors via addField() calls. + // We just need to finalize the document by calling addToWriter() + // which will increment the row count. + WriteResult result = document.addToWriter(); + + System.out.println("[JAVA] After adding document, row count: " + managedVSR.getRowCount()); + + // Check for VSR rotation AFTER successful document processing + handleVSRRotationAfterAddToManagedVSR(); + + return result; + } catch (Exception e) { + System.out.println("[JAVA] ERROR in addToManagedVSR: " + e.getMessage()); + throw new IOException("Failed to add document: " + e.getMessage(), e); + } + } + + public String flush(FlushIn flushIn) throws IOException { + System.out.println("[JAVA] flush called, row count: " + managedVSR.getRowCount()); + try { + // Only flush if we have data + if (managedVSR.getRowCount() == 0) { + System.out.println("[JAVA] No data to flush, returning null"); + return null; + } + + // Transition VSR to FROZEN state before flushing + managedVSR.setState(VSRState.FROZEN); + System.out.println("[JAVA] Flushing " + managedVSR.getRowCount() + " rows"); + + // Transition to FLUSHING state + managedVSR.setState(VSRState.FLUSHING); + + // Direct native call - write the managed VSR data + try (ArrowExport export = managedVSR.exportToArrow()) { + RustBridge.write(fileName, export.getArrayAddress(), export.getSchemaAddress()); + RustBridge.closeWriter(fileName); + } + System.out.println("[JAVA] Successfully flushed data"); + + return fileName; + } catch (Exception e) { + System.out.println("[JAVA] ERROR in flush: " + e.getMessage()); + throw new IOException("Failed to flush data: " + e.getMessage(), e); + } + } + + public void close() { + try { + // Direct native calls + try { + RustBridge.closeWriter(fileName); + RustBridge.flushToDisk(fileName); + } catch (IOException e) { + System.err.println("Warning: Failed to close/flush writer: " + e.getMessage()); + } + + // Complete VSR processing and cleanup + vsrPool.completeVSR(managedVSR); + managedVSR = null; + + } catch (Exception e) { + System.err.println("Error during close: " + e.getMessage()); + } + } + + private boolean checkFlushConditions() { + // TODO: Implement memory pressure-based flush conditions + return false; + } + + /** + * Handles VSR rotation after successful document addition. + * Checks if rotation is needed and immediately processes any frozen VSR. + */ + private void handleVSRRotationAfterAddToManagedVSR() throws IOException { + try { + // Check if rotation is needed and perform it if safe + boolean rotated = vsrPool.maybeRotateActiveVSR(); + + if (rotated) { + System.out.println("[JAVA] VSR rotation occurred after document addition"); + + // Get the frozen VSR that was just created by rotation + ManagedVSR frozenVSR = vsrPool.getFrozenVSR(); + if (frozenVSR != null) { + System.out.println("[JAVA] Processing frozen VSR: " + frozenVSR.getId() + + " with " + frozenVSR.getRowCount() + " rows"); + + // Write the frozen VSR data immediately + frozenVSR.setState(VSRState.FLUSHING); + try (ArrowExport export = frozenVSR.exportToArrow()) { + RustBridge.write(fileName, export.getArrayAddress(), export.getSchemaAddress()); + } + + System.out.println("[JAVA] Successfully wrote frozen VSR data"); + + // Complete the VSR processing + vsrPool.completeVSR(frozenVSR); + } else { + System.err.println("[JAVA] WARNING: Rotation occurred but no frozen VSR found"); + } + + // Update to new active VSR + managedVSR = vsrPool.getActiveVSR(); + if (managedVSR == null) { + throw new IOException("No active VSR available after rotation"); + } + + // Reinitialize field vector map with new VSR + reinitializeFieldVectorMap(); + + System.out.println("[JAVA] VSR rotation completed, new active VSR: " + managedVSR.getId() + + ", row count: " + managedVSR.getRowCount()); + } + } catch (IOException e) { + System.err.println("[JAVA] Error during VSR rotation: " + e.getMessage()); + throw e; + } + } + + /** + * Checks if VSR rotation is needed based on row count and memory pressure. + * If rotation occurs, updates the managed VSR reference and reinitializes field vectors. + * + * @deprecated Use handleVSRRotationAfterAddToManagedVSR() instead for safer rotation after document processing + */ + @Deprecated + private void checkAndHandleVSRRotation() throws IOException { + // Get active VSR from pool - this will trigger rotation if needed + ManagedVSR currentActive = vsrPool.getActiveVSR(); + + // Check if we got a different VSR (rotation occurred) + if (currentActive != managedVSR) { + System.out.println("[JAVA] VSR rotation detected, updating references"); + + // Update the managed VSR reference + managedVSR = currentActive; + + // Reinitialize field vector map with new VSR + reinitializeFieldVectorMap(); + + // Note: Writer initialization is not needed per VSR as it's per file + System.out.println("[JAVA] VSR rotation completed, new row count: " + managedVSR.getRowCount()); + } + } + + /** + * Reinitializes the field vector map with the current managed VSR. + * Called after VSR rotation to update vector references. + */ + private void reinitializeFieldVectorMap() { + fieldVectorMap.clear(); + initializeFieldVectorMap(); + } + + private void initializeFieldVectorMap() { + fieldVectorMap = new HashMap<>(); + for (Field field : schema.getFields()) { + String fieldName = field.getName(); + FieldVector fieldVector = managedVSR.getVector(fieldName); + // Vector is already properly typed from ManagedVSR.getVector() + fieldVectorMap.put(fieldName, fieldVector); + } + } + + /** + * Gets the current active ManagedVSR for document input creation. + * + * @return The current managed VSR instance + */ + public ManagedVSR getActiveManagedVSR() { + return managedVSR; + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRPool.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRPool.java new file mode 100644 index 0000000000000..088a990353157 --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRPool.java @@ -0,0 +1,331 @@ +package com.parquet.parquetdataformat.vsr; + +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Schema; +import com.parquet.parquetdataformat.memory.ArrowBufferPool; +import com.parquet.parquetdataformat.memory.MemoryPressureMonitor; + +import java.io.IOException; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Manages VectorSchemaRoot lifecycle with ACTIVE and FROZEN states as specified + * in the Project Mustang design. Each ParquetWriter maintains a single ACTIVE VSR + * for writing and a single FROZEN VSR for Rust handoff. + */ +public class VSRPool { + + private final Schema schema; + private final ArrowBufferPool bufferPool; + private final MemoryPressureMonitor memoryMonitor; + private final String poolId; + + // VSR lifecycle management + private final AtomicReference activeVSR; + private final AtomicReference frozenVSR; + private final ConcurrentHashMap allVSRs; + private final AtomicInteger vsrCounter; + + // Configuration + private final int maxRowsPerVSR; + + public VSRPool(String poolId, Schema schema, MemoryPressureMonitor memoryMonitor) { + this.poolId = poolId; + this.schema = schema; + this.bufferPool = new ArrowBufferPool(org.opensearch.common.settings.Settings.EMPTY, memoryMonitor); + this.memoryMonitor = memoryMonitor; + + this.activeVSR = new AtomicReference<>(); + this.frozenVSR = new AtomicReference<>(); + this.allVSRs = new ConcurrentHashMap<>(); + this.vsrCounter = new AtomicInteger(0); + + // Configuration - could be made configurable + this.maxRowsPerVSR = 50000; // Max rows before forcing freeze + + // Initialize with first active VSR + initializeActiveVSR(); + } + + /** + * Gets the current active VSR for writing. + * Simply returns the current active VSR without any rotation logic. + * + * @return Active ManagedVSR for writing, or null if none exists + */ + public ManagedVSR getActiveVSR() { + return activeVSR.get(); + } + + /** + * Checks if VSR rotation is needed and performs it if safe to do so. + * Throws IOException if rotation is needed but frozen slot is occupied. + * + * @return true if rotation occurred, false if no rotation was needed + * @throws IOException if rotation is needed but cannot be performed due to occupied frozen slot + */ + public boolean maybeRotateActiveVSR() throws IOException { + ManagedVSR current = activeVSR.get(); + + // Check if rotation is needed + if (current == null || !shouldRotateVSR(current)) { + return false; // No rotation needed + } + + // CRITICAL: Check if frozen slot is occupied before rotation + if (frozenVSR.get() != null) { + throw new IOException("Cannot rotate VSR: frozen slot is occupied. " + + "Previous frozen VSR has not been processed. This indicates a " + + "system bottleneck or processing failure."); + } + + // Safe to rotate - perform the rotation + synchronized (this) { + // Double-check conditions under lock + current = activeVSR.get(); + if (current == null || !shouldRotateVSR(current)) { + return false; // Conditions changed while acquiring lock + } + + // Check frozen slot again under lock + if (frozenVSR.get() != null) { + throw new IOException("Cannot rotate VSR: frozen slot became occupied during rotation"); + } + + // Freeze current VSR if it exists and has data + if (current != null && current.getRowCount() > 0) { + freezeVSR(current); + } + + // Create new active VSR + ManagedVSR newActive = createNewVSR(); + activeVSR.set(newActive); + + return true; // Rotation occurred + } + } + + /** + * Freezes the current active VSR and creates a new active one. + * The frozen VSR replaces any existing frozen VSR. + * + * @deprecated Use maybeRotateActiveVSR() instead for safer rotation with checks + * @return Newly created active VSR + */ + @Deprecated + public ManagedVSR rotateActiveVSR() { + synchronized (this) { + ManagedVSR current = activeVSR.get(); + + // Freeze current VSR if it exists and has data + if (current != null && current.getRowCount() > 0) { + freezeVSR(current); + } + + // Create new active VSR + ManagedVSR newActive = createNewVSR(); + activeVSR.set(newActive); + + return newActive; + } + } + + /** + * Gets the frozen VSR for Rust processing. + * + * @return Frozen VSR, or null if none available + */ + public ManagedVSR getFrozenVSR() { + return frozenVSR.get(); + } + + /** + * Takes the frozen VSR for processing and clears the frozen slot. + * + * @return Frozen VSR that was taken, or null if none available + */ + public ManagedVSR takeFrozenVSR() { + return frozenVSR.getAndSet(null); + } + + /** + * Marks a VSR as flushing (being processed by Rust). + * + * @param vsr VSR being processed + */ + public void markFlushing(ManagedVSR vsr) { + vsr.setState(VSRState.FLUSHING); + } + + /** + * Completes VSR processing and cleans up resources. + * + * @param vsr VSR that has been processed + */ + public void completeVSR(ManagedVSR vsr) { + vsr.setState(VSRState.CLOSED); + vsr.close(); + allVSRs.remove(vsr.getId()); + } + + /** + * Forces all VSRs to be frozen for immediate processing. + * Used during refresh or shutdown. + */ + public void freezeAll() { + ManagedVSR current = activeVSR.getAndSet(null); + if (current != null && current.getRowCount() > 0) { + freezeVSR(current); + } + } + + /** + * Gets statistics about the VSR pool. + * + * @return PoolStats with current state + */ + public PoolStats getStats() { + ManagedVSR active = activeVSR.get(); + ManagedVSR frozen = frozenVSR.get(); + int frozenCount = frozen != null ? 1 : 0; + + return new PoolStats( + poolId, + active != null ? active.getRowCount() : 0, + frozenCount, + allVSRs.size(), + allVSRs.values().stream().mapToLong(ManagedVSR::getRowCount).sum() + ); + } + + /** + * Checks if backpressure should be applied. + * + * @return true if frozen VSR slot is occupied or memory pressure is critical + */ + public boolean shouldApplyBackpressure() { + return frozenVSR.get() != null || + memoryMonitor.getCurrentPressureLevel() == MemoryPressureMonitor.PressureLevel.CRITICAL; + } + + /** + * Closes the pool and cleans up all resources. + */ + public void close() { + // Close active VSR + ManagedVSR active = activeVSR.getAndSet(null); + if (active != null) { + active.close(); + } + + // Close frozen VSR + ManagedVSR frozen = frozenVSR.getAndSet(null); + if (frozen != null) { + frozen.close(); + } + + // Close any remaining VSRs + allVSRs.values().forEach(ManagedVSR::close); + allVSRs.clear(); + } + + private void initializeActiveVSR() { + ManagedVSR initial = createNewVSR(); + activeVSR.set(initial); + } + + private ManagedVSR createNewVSR() { + String vsrId = poolId + "-vsr-" + vsrCounter.incrementAndGet(); + BufferAllocator allocator = null; + VectorSchemaRoot vsr = null; + + try { + allocator = bufferPool.createAllocator(vsrId); + vsr = VectorSchemaRoot.create(schema, allocator); + + ManagedVSR managedVSR = new ManagedVSR(vsrId, vsr, allocator); + allVSRs.put(vsrId, managedVSR); + + // Success: ManagedVSR now owns the resources + return managedVSR; + } catch (Exception e) { + // Clean up resources on failure since ManagedVSR couldn't take ownership + if (vsr != null) { + try { + vsr.close(); + } catch (Exception closeEx) { + e.addSuppressed(closeEx); + } + } + if (allocator != null) { + try { + allocator.close(); + } catch (Exception closeEx) { + e.addSuppressed(closeEx); + } + } + throw new RuntimeException("Failed to create new VSR", e); + } + } + + private void freezeVSR(ManagedVSR vsr) { + vsr.setState(VSRState.FROZEN); + + // CRITICAL FIX: Check if frozen slot is already occupied + ManagedVSR previousFrozen = frozenVSR.get(); + if (previousFrozen != null) { + // NEVER blindly overwrite a frozen VSR - this would cause data loss + System.err.println("[VSRPool] ERROR: Attempting to freeze VSR when frozen slot is occupied! " + + "Previous VSR: " + previousFrozen.getId() + " (" + previousFrozen.getRowCount() + " rows), " + + "New VSR: " + vsr.getId() + " (" + vsr.getRowCount() + " rows). " + + "This indicates a logic error - frozen VSR should be consumed before replacement."); + + // Return VSR to ACTIVE state to prevent state corruption + vsr.setState(VSRState.ACTIVE); + throw new IllegalStateException("Cannot freeze VSR: frozen slot is occupied by unprocessed VSR " + + previousFrozen.getId() + ". This would cause data loss."); + } + + // Safe to set frozen VSR since slot is empty + boolean success = frozenVSR.compareAndSet(null, vsr); + if (!success) { + // Race condition: another thread set frozen VSR between our check and set + vsr.setState(VSRState.ACTIVE); + throw new IllegalStateException("Race condition detected: frozen slot was occupied during freeze operation"); + } + } + + private boolean shouldRotateVSR(ManagedVSR vsr) { + return vsr.getRowCount() >= maxRowsPerVSR || + memoryMonitor.shouldTriggerEarlyRefresh(); + } + + /** + * Statistics for the VSR pool. + */ + public static class PoolStats { + private final String poolId; + private final long activeRowCount; + private final int frozenVSRCount; + private final int totalVSRCount; + private final long totalRowCount; + + public PoolStats(String poolId, long activeRowCount, int frozenVSRCount, + int totalVSRCount, long totalRowCount) { + this.poolId = poolId; + this.activeRowCount = activeRowCount; + this.frozenVSRCount = frozenVSRCount; + this.totalVSRCount = totalVSRCount; + this.totalRowCount = totalRowCount; + } + + public String getPoolId() { return poolId; } + public long getActiveRowCount() { return activeRowCount; } + public int getFrozenVSRCount() { return frozenVSRCount; } + public int getTotalVSRCount() { return totalVSRCount; } + public long getTotalRowCount() { return totalRowCount; } + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRState.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRState.java new file mode 100644 index 0000000000000..cd55f30ca24cc --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRState.java @@ -0,0 +1,28 @@ +package com.parquet.parquetdataformat.vsr; + +/** + * Represents the lifecycle states of a VectorSchemaRoot in the Project Mustang + * Parquet Writer Plugin architecture. + */ +public enum VSRState { + /** + * Currently accepting writes - the VSR is active and can be modified. + */ + ACTIVE, + + /** + * Read-only state - VSR is frozen and queued for flush to Rust. + * No further modifications are allowed in this state. + */ + FROZEN, + + /** + * Currently being processed by Rust - VSR is in the handoff process. + */ + FLUSHING, + + /** + * Completed and cleaned up - VSR processing is complete and resources freed. + */ + CLOSED +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java new file mode 100644 index 0000000000000..8db471ee9a77a --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java @@ -0,0 +1,68 @@ +package com.parquet.parquetdataformat.writer; + +import com.parquet.parquetdataformat.fields.ArrowFieldRegistry; +import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.WriteResult; +import org.opensearch.index.mapper.MappedFieldType; +import com.parquet.parquetdataformat.vsr.ManagedVSR; + +import java.io.IOException; + +/** + * Document input wrapper for Parquet-based document processing. + * + *

This class serves as an adapter between OpenSearch's DocumentInput interface + * and the Arrow-based vector representation. It works directly with a {@link ManagedVSR} + * to populate field vectors and manage document lifecycle. + * + *

The implementation follows the builder pattern, allowing incremental construction + * of documents through field addition before finalizing the document for writing. + * + *

Key responsibilities: + *

    + *
  • Direct field vector population using OpenSearch's {@link MappedFieldType}
  • + *
  • Document lifecycle management via ManagedVSR
  • + *
  • Integration with the Arrow-based Parquet writer pipeline
  • + *
+ * + *

This implementation works directly with Arrow field vectors, eliminating the + * intermediate ParquetDocument representation for improved performance and memory efficiency. + */ +public class ParquetDocumentInput implements DocumentInput { + private final ManagedVSR managedVSR; + + public ParquetDocumentInput(ManagedVSR managedVSR) { + this.managedVSR = managedVSR; + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + ArrowFieldRegistry.getParquetField(fieldType.typeName()).createField(fieldType, managedVSR, value); + } + + @Override + public ManagedVSR getFinalInput() { + return managedVSR; + } + + @Override + public WriteResult addToWriter() throws IOException { + // Complete the current document by incrementing row count + // This will internally call setValueCount on all field vectors + int currentRowCount = managedVSR.getRowCount(); + managedVSR.setRowCount(currentRowCount + 1); + + // TODO: Return appropriate WriteResult based on operation success + return new WriteResult(true, null, 1, 1, 1); + } + + @Override + public void close() throws Exception { + // NOTE: ParquetDocumentInput does NOT own the ManagedVSR lifecycle + // The ManagedVSR is owned and managed by VSRManager/VSRPool + // VSRManager.close() -> vsrPool.completeVSR(managedVSR) handles cleanup + // ParquetDocumentInput only holds a reference for field population + + // No cleanup needed here - VSRManager handles the ManagedVSR lifecycle + } +} diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java new file mode 100644 index 0000000000000..eec04ef35650a --- /dev/null +++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java @@ -0,0 +1,78 @@ +package com.parquet.parquetdataformat.writer; + +import com.parquet.parquetdataformat.vsr.VSRManager; +import org.apache.arrow.vector.types.pojo.Schema; +import org.opensearch.index.engine.exec.FileInfos; +import org.opensearch.index.engine.exec.FlushIn; +import org.opensearch.index.engine.exec.WriteResult; +import org.opensearch.index.engine.exec.Writer; +import org.opensearch.index.engine.exec.WriterFileSet; + +import java.io.IOException; +import java.nio.file.Path; + +import static com.parquet.parquetdataformat.engine.ParquetDataFormat.PARQUET_DATA_FORMAT; + +/** + * Parquet file writer implementation that integrates with OpenSearch's Writer interface. + * + *

This writer provides a high-level interface for writing Parquet documents to disk + * using the underlying VSRManager for Arrow-based data management and native Rust + * backend for efficient Parquet file generation. + * + *

Key features: + *

    + *
  • Arrow schema-based document structure
  • + *
  • Batch-oriented writing with memory management
  • + *
  • Integration with OpenSearch indexing pipeline
  • + *
  • Native Rust backend for high-performance Parquet operations
  • + *
+ * + *

The writer manages the complete lifecycle from document addition through + * flushing and cleanup, delegating the actual Arrow and Parquet operations + * to the {@link VSRManager}. + */ +public class ParquetWriter implements Writer { + private final String file; + private final Schema schema; + private final VSRManager vsrManager; + private final long writerGeneration; + + public ParquetWriter(String file, Schema schema, long writerGeneration) { + this.file = file; + this.schema = schema; + this.vsrManager = new VSRManager(file, schema); + this.writerGeneration = writerGeneration; + } + + @Override + public WriteResult addDoc(ParquetDocumentInput d) throws IOException { + return vsrManager.addToManagedVSR(d); + } + + @Override + public FileInfos flush(FlushIn flushIn) throws IOException { + String fileName = vsrManager.flush(flushIn); + FileInfos fileInfos = new FileInfos(); + WriterFileSet writerFileSet = new WriterFileSet(Path.of(fileName).getParent(), writerGeneration); + writerFileSet.add(fileName); + fileInfos.putWriterFileSet(PARQUET_DATA_FORMAT, writerFileSet); + return fileInfos; + } + + @Override + public void sync() throws IOException { + + } + + @Override + public void close() { + vsrManager.close(); + } + + @Override + public ParquetDocumentInput newDocumentInput() { + // Get a new ManagedVSR from VSRManager for this document input + return new ParquetDocumentInput(vsrManager.getActiveManagedVSR()); + } +} diff --git a/modules/parquet-data-format/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec b/modules/parquet-data-format/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec new file mode 100644 index 0000000000000..7d1e56cc25536 --- /dev/null +++ b/modules/parquet-data-format/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec @@ -0,0 +1 @@ +com.parquet.parquetdataformat.engine.read.ParquetDataSourceCodec diff --git a/modules/parquet-data-format/src/main/rust/Cargo.toml b/modules/parquet-data-format/src/main/rust/Cargo.toml new file mode 100644 index 0000000000000..21ba3950aa9ac --- /dev/null +++ b/modules/parquet-data-format/src/main/rust/Cargo.toml @@ -0,0 +1,63 @@ +[package] +name = "rust" +version = "0.1.0" +edition = "2024" + +[lib] +name = "parquet_dataformat_jni" +crate-type = ["cdylib"] + +[dependencies] + +# DataFusion dependencies +datafusion = "49.0.0" +datafusion-substrait = "49.0.0" +arrow = { version = "54.0.0", features = ["ffi"] } + +arrow-array = "54.0.0" +arrow-schema = "54.0.0" +arrow-buffer = "54.0.0" + +# JNI dependencies +jni = "0.21" + +# Async runtime +tokio = { version = "1.0", features = ["full"] } +futures = "0.3" +futures-util = "0.3" + +# Serialization +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + +# Error handling +anyhow = "1.0" +thiserror = "1.0" + +# Logging +log = "0.4" + +# Parquet support +parquet = "54.0.0" + +# Object store for file access +object_store = "0.11" +url = "2.0" + +# Substrait support +substrait = "0.47" +prost = "0.13" + +# Temporary directory support +tempfile = "3.0" + +#jni = "0.21.1" +#arrow = { version = "53.0.0", features = ["ffi"] } +#parquet = "53.0.0" +lazy_static = "1.4.0" +dashmap = "7.0.0-rc2" +chrono = "0.4" + + +[build-dependencies] +cbindgen = "0.27" diff --git a/modules/parquet-data-format/src/main/rust/src/context.rs b/modules/parquet-data-format/src/main/rust/src/context.rs new file mode 100644 index 0000000000000..022912ed84c48 --- /dev/null +++ b/modules/parquet-data-format/src/main/rust/src/context.rs @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + */ + +use datafusion::prelude::*; +use datafusion::execution::context::SessionContext; +use std::collections::HashMap; +use std::sync::Arc; +use anyhow::Result; + +/// Manages DataFusion session contexts +pub struct SessionContextManager { + contexts: HashMap<*mut SessionContext, Arc>, + next_runtime_id: u64, +} + +impl SessionContextManager { + pub fn new() -> Self { + Self { + contexts: HashMap::new(), + next_runtime_id: 1, + } + } + + pub async fn register_directory( + &mut self, + table_name: &str, + directory_path: &str, + options: HashMap, + ) -> Result { + // Placeholder implementation - would register parquet directory as table + log::info!("Registering directory: {} at path: {} with options: {:?}", + table_name, directory_path, options); + + let runtime_id = self.next_runtime_id; + self.next_runtime_id += 1; + Ok(runtime_id) + } + + pub async fn create_session_context( + &mut self, + config: HashMap, + ) -> Result<*mut SessionContext> { + // Create actual DataFusion session context + let mut session_config = SessionConfig::new(); + + // Apply configuration options + if let Some(batch_size) = config.get("batch_size") { + if let Ok(size) = batch_size.parse::() { + session_config = session_config.with_batch_size(size); + } + } + + let ctx = Arc::new(SessionContext::new_with_config(session_config)); + let ctx_ptr = Arc::as_ptr(&ctx) as *mut SessionContext; + + self.contexts.insert(ctx_ptr, ctx); + + Ok(ctx_ptr) + } + + pub async fn close_session_context(&mut self, ctx_ptr: *mut SessionContext) -> Result<()> { + self.contexts.remove(&ctx_ptr); + Ok(()) + } + + pub fn get_context(&self, ctx_ptr: *mut SessionContext) -> Option<&Arc> { + self.contexts.get(&ctx_ptr) + } +} diff --git a/modules/parquet-data-format/src/main/rust/src/lib.rs b/modules/parquet-data-format/src/main/rust/src/lib.rs new file mode 100644 index 0000000000000..6ef32c8f5050a --- /dev/null +++ b/modules/parquet-data-format/src/main/rust/src/lib.rs @@ -0,0 +1,249 @@ +use jni::objects::{JClass, JString}; +use jni::sys::{jint, jlong}; +use jni::JNIEnv; +use dashmap::DashMap; +use arrow::record_batch::RecordBatch; +use parquet::arrow::ArrowWriter; +use std::fs::File; +use std::sync::{Arc, Mutex}; +use lazy_static::lazy_static; +use arrow::ffi::{FFI_ArrowSchema, FFI_ArrowArray}; +use std::fs::OpenOptions; +use std::io::Write; +use chrono::Utc; + +lazy_static! { + static ref WRITER_MANAGER: DashMap>>> = DashMap::new(); + static ref FILE_MANAGER: DashMap = DashMap::new(); +} + +struct NativeParquetWriter; + +impl NativeParquetWriter { + + fn create_writer(filename: String, schema_address: i64) -> Result<(), Box> { + let log_msg = format!("[RUST] create_writer called for file: {}, schema_address: {}\n", filename, schema_address); + println!("{}", log_msg.trim()); + Self::log_to_file(&log_msg); + + let arrow_schema = unsafe { FFI_ArrowSchema::from_raw(schema_address as *mut _) }; + let schema = Arc::new(arrow::datatypes::Schema::try_from(&arrow_schema)?); + + let schema_msg = format!("[RUST] Schema created with {} fields\n", schema.fields().len()); + println!("{}", schema_msg.trim()); + Self::log_to_file(&schema_msg); + + for (i, field) in schema.fields().iter().enumerate() { + let field_msg = format!("[RUST] Field {}: {} ({})\n", i, field.name(), field.data_type()); + println!("{}", field_msg.trim()); + Self::log_to_file(&field_msg); + } + + let file = File::create(&filename)?; + let file_clone = file.try_clone()?; + FILE_MANAGER.insert(filename.clone(), file_clone); + let writer = ArrowWriter::try_new(file, schema, None)?; + WRITER_MANAGER.insert(filename, Arc::new(Mutex::new(writer))); + Ok(()) + } + + fn write_data(filename: String, array_address: i64, schema_address: i64) -> Result<(), Box> { + let log_msg = format!("[RUST] write_data called for file: {}, array_address: {}, schema_address: {}\n", filename, array_address, schema_address); + println!("{}", log_msg.trim()); + Self::log_to_file(&log_msg); + + unsafe { + let arrow_schema = FFI_ArrowSchema::from_raw(schema_address as *mut _); + let arrow_array = FFI_ArrowArray::from_raw(array_address as *mut _); + + match arrow::ffi::from_ffi(arrow_array, &arrow_schema) { + Ok(array_data) => { + let data_msg = format!("[RUST] Successfully imported array_data, length: {}\n", array_data.len()); + println!("{}", data_msg.trim()); + Self::log_to_file(&data_msg); + + let array: Arc = arrow::array::make_array(array_data); + let array_msg = format!("[RUST] Array type: {:?}, length: {}\n", array.data_type(), array.len()); + println!("{}", array_msg.trim()); + Self::log_to_file(&array_msg); + + if let Some(struct_array) = array.as_any().downcast_ref::() { + let struct_msg = format!("[RUST] Successfully cast to StructArray with {} columns\n", struct_array.num_columns()); + println!("{}", struct_msg.trim()); + Self::log_to_file(&struct_msg); + + let schema = Arc::new(arrow::datatypes::Schema::new( + struct_array.fields().clone() + )); + + let record_batch = RecordBatch::try_new( + schema.clone(), + struct_array.columns().to_vec(), + )?; + + let batch_msg = format!("[RUST] Created RecordBatch with {} rows and {} columns\n", record_batch.num_rows(), record_batch.num_columns()); + println!("{}", batch_msg.trim()); + Self::log_to_file(&batch_msg); + + if let Some(writer_arc) = WRITER_MANAGER.get(&filename) { + let write_msg = "[RUST] Writing RecordBatch to file\n"; + println!("{}", write_msg.trim()); + Self::log_to_file(write_msg); + let mut writer = writer_arc.lock().unwrap(); + writer.write(&record_batch)?; + let success_msg = "[RUST] Successfully wrote RecordBatch\n"; + println!("{}", success_msg.trim()); + Self::log_to_file(success_msg); + } else { + let error_msg = format!("[RUST] ERROR: No writer found for file: {}\n", filename); + println!("{}", error_msg.trim()); + Self::log_to_file(&error_msg); + } + Ok(()) + } else { + let error_msg = format!("[RUST] ERROR: Array is not a StructArray, type: {:?}\n", array.data_type()); + println!("{}", error_msg.trim()); + Self::log_to_file(&error_msg); + Err("Expected struct array from VectorSchemaRoot".into()) + } + } + Err(e) => { + let error_msg = format!("[RUST] ERROR: Failed to import from FFI: {:?}\n", e); + println!("{}", error_msg.trim()); + Self::log_to_file(&error_msg); + Err(e.into()) + } + } + } + } + + fn close_writer(filename: String) -> Result<(), Box> { + let log_msg = format!("[RUST] close_writer called for file: {}\n", filename); + println!("{}", log_msg.trim()); + Self::log_to_file(&log_msg); + + if let Some((_, writer_arc)) = WRITER_MANAGER.remove(&filename) { + match Arc::try_unwrap(writer_arc) { + Ok(mutex) => { + let mut writer = mutex.into_inner().unwrap(); + match writer.close() { + Ok(_) => { + let success_msg = format!("[RUST] Successfully closed writer for file: {}\n", filename); + println!("{}", success_msg.trim()); + Self::log_to_file(&success_msg); + Ok(()) + } + Err(e) => { + let error_msg = format!("[RUST] ERROR: Failed to close writer for file: {}\n", filename); + println!("{}", error_msg.trim()); + Self::log_to_file(&error_msg); + Err(e.into()) + } + } + } + Err(_) => { + let error_msg = format!("[RUST] ERROR: Writer still in use for file: {}\n", filename); + println!("{}", error_msg.trim()); + Self::log_to_file(&error_msg); + Err("Writer still in use".into()) + } + } + } else { + Ok(()) + } + } + + fn flush_to_disk(filename: String) -> Result<(), Box> { + let log_msg = format!("[RUST] fsync_file called for file: {}\n", filename); + println!("{}", log_msg.trim()); + Self::log_to_file(&log_msg); + + if let Some(mut file) = FILE_MANAGER.get_mut(&filename) { + match file.sync_all() { + Ok(_) => { + let success_msg = format!("[RUST] Successfully fsynced file: {}\n", filename); + println!("{}", success_msg.trim()); + Self::log_to_file(&success_msg); + Ok(()) + } + Err(e) => { + let error_msg = format!("[RUST] ERROR: Failed to fsync file: {}\n", filename); + println!("{}", error_msg.trim()); + Self::log_to_file(&error_msg); + Err(e.into()) + } + } + } else { + let error_msg = format!("[RUST] ERROR: File not found for fsync: {}\n", filename); + println!("{}", error_msg.trim()); + Self::log_to_file(&error_msg); + Err("File not found".into()) + } + } + + fn log_to_file(message: &str) { + if let Ok(mut file) = OpenOptions::new() + .create(true) + .append(true) + .open("/tmp/rust_parquet_debug.log") { + let timestamp = Utc::now().format("%Y-%m-%d %H:%M:%S%.3f UTC"); + let timestamped_message = format!("[{}] {}", timestamp, message); + let _ = file.write_all(timestamped_message.as_bytes()); + } + } +} + +#[unsafe(no_mangle)] +pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_createWriter( + mut env: JNIEnv, + _class: JClass, + file: JString, + schema_address: jlong +) -> jint { + let filename: String = env.get_string(&file).expect("Couldn't get java string!").into(); + match NativeParquetWriter::create_writer(filename, schema_address as i64) { + Ok(_) => 0, + Err(_) => -1, + } +} + +#[unsafe(no_mangle)] +pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_write( + mut env: JNIEnv, + _class: JClass, + file: JString, + array_address: jlong, + schema_address: jlong +) -> jint { + let filename: String = env.get_string(&file).expect("Couldn't get java string!").into(); + match NativeParquetWriter::write_data(filename, array_address as i64, schema_address as i64) { + Ok(_) => 0, + Err(_) => -1, + } +} + +#[unsafe(no_mangle)] +pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_closeWriter( + mut env: JNIEnv, + _class: JClass, + file: JString +) -> jint { + let filename: String = env.get_string(&file).expect("Couldn't get java string!").into(); + match NativeParquetWriter::close_writer(filename) { + Ok(_) => 0, + Err(_) => -1, + } +} + +#[unsafe(no_mangle)] +pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_flushToDisk( + mut env: JNIEnv, + _class: JClass, + file: JString +) -> jint { + let filename: String = env.get_string(&file).expect("Couldn't get java string!").into(); + match NativeParquetWriter::flush_to_disk(filename) { + Ok(_) => 0, + Err(_) => -1, + } +} diff --git a/modules/parquet-data-format/src/main/rust/src/parquet_exec.rs b/modules/parquet-data-format/src/main/rust/src/parquet_exec.rs new file mode 100644 index 0000000000000..9fee54317d09a --- /dev/null +++ b/modules/parquet-data-format/src/main/rust/src/parquet_exec.rs @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + */ + +use anyhow::Result; + +/// Parquet-specific execution utilities - placeholder implementation +pub struct ParquetExecutor; + +impl ParquetExecutor { + pub fn new() -> Self { + Self + } + + /// Create a listing table for Parquet files - placeholder + pub async fn create_parquet_table( + &self, + table_path: &str, + ) -> Result { + // Placeholder implementation + log::info!("Creating parquet table for path: {}", table_path); + Ok(1) // Return dummy table ID + } +} diff --git a/modules/parquet-data-format/src/main/rust/src/read_lib.rs b/modules/parquet-data-format/src/main/rust/src/read_lib.rs new file mode 100644 index 0000000000000..516e9acca9d06 --- /dev/null +++ b/modules/parquet-data-format/src/main/rust/src/read_lib.rs @@ -0,0 +1,198 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +//! OpenSearch DataFusion parquet JNI Library +//! +//! This library provides JNI bindings for DataFusion query execution, + +use jni::JNIEnv; +use jni::objects::{JClass, JString, JObjectArray, JByteArray}; +use jni::sys::{jlong, jstring}; +use std::ptr; +use std::collections::HashMap; + +mod context; +mod runtime; +mod stream; +mod substrait; +mod util; +mod parquet_exec; + +use context::SessionContextManager; +use runtime::RuntimeManager; +use stream::RecordBatchStreamWrapper; +use substrait::SubstraitExecutor; +use datafusion::execution::context::SessionContext; +use datafusion::execution::runtime_env::RuntimeEnv; + +/** +TODO : Put more thought into this +**/ +static mut RUNTIME_MANAGER: Option = None; + +static mut SESSION_MANAGER: Option = None; + +/// Initialize the managers (call once) +fn init_managers() { + unsafe { + if RUNTIME_MANAGER.is_none() { + RUNTIME_MANAGER = Some(RuntimeManager::new()); + } + if SESSION_MANAGER.is_none() { + SESSION_MANAGER = Some(SessionContextManager::new()); + } + } +} +static mut RUNTIME_ENVIRONMENTS: Option> = None; + + +/// Register a directory as a table in the global context and return runtime environment ID +#[no_mangle] +pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeRegisterDirectory( + mut env: JNIEnv, + _class: JClass, + table_name: JString, + directory_path: JString, + files: JObjectArray, + runtime_id: jlong +) { + let runtimeEnv = unsafe { &mut *(runtime_id as *mut RuntimeEnv) }; + // placeholder +} + +/// Create a new session context +#[no_mangle] +pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeCreateSessionContext( + mut env: JNIEnv, + _class: JClass, + config_keys: JObjectArray, + config_values: JObjectArray, +) -> jlong { + // Initialize managers if not already done + init_managers(); + + // PLACEHOLDER + // Parse configuration from JNI arrays + let config = match util::parse_string_map(&mut env, config_keys, config_values) { + Ok(cfg) => cfg, + Err(e) => { + util::throw_exception(&mut env, &format!("Failed to parse config: {}", e)); + return 0; + } + }; + + // Create session context + match unsafe { + RUNTIME_MANAGER.as_ref().unwrap().block_on(async { + SESSION_MANAGER.as_mut().unwrap().create_session_context(config).await + }) + } { + Ok(context_ptr) => context_ptr as jlong, + Err(e) => { + util::throw_exception(&mut env, &format!("Failed to create session context: {}", e)); + 0 + } + } +} + +/// Execute a Substrait query plan +#[no_mangle] +pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeExecuteSubstraitQuery( + mut env: JNIEnv, + _class: JClass, + session_context_ptr: jlong, + substrait_plan: JByteArray, +) -> jlong { + + // Convert JByteArray to Vec + let substrait_plan_bytes = match env.convert_byte_array(substrait_plan) { + Ok(bytes) => bytes, + Err(e) => { + util::throw_exception(&mut env, &format!("Failed to convert substrait plan: {}", e)); + return 0; + } + }; + + // Execute the query + match unsafe { + RUNTIME_MANAGER.as_ref().unwrap().block_on(async { + let executor = SubstraitExecutor::new(); + executor.execute_plan(session_context_ptr as *mut SessionContext, &substrait_plan_bytes).await + }) + } { + Ok(stream_ptr) => stream_ptr as jlong, + Err(e) => { + util::throw_exception(&mut env, &format!("Failed to execute query: {}", e)); + 0 + } + } +} + +/// Close a session context +#[no_mangle] +pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeCloseSessionContext( + mut env: JNIEnv, + _class: JClass, + session_context_ptr: jlong, +) { + + if let Err(e) = unsafe { + RUNTIME_MANAGER.as_ref().unwrap().block_on(async { + SESSION_MANAGER.as_mut().unwrap() + .close_session_context(session_context_ptr as *mut SessionContext) + .await + }) + } { + util::throw_exception(&mut env, &format!("Failed to close session context: {}", e)); + } +} + +/// Get the next record batch from a stream +#[no_mangle] +pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeNextBatch( + mut env: JNIEnv, + _class: JClass, + stream_ptr: jlong, +) -> jstring { + + let stream = unsafe { &mut *(stream_ptr as *mut RecordBatchStreamWrapper) }; + + match unsafe { + RUNTIME_MANAGER.as_ref().unwrap().block_on(async { + stream.next_batch().await + }) + } { + Ok(Some(batch_json)) => { + match env.new_string(&batch_json) { + Ok(jstr) => jstr.into_raw(), + Err(e) => { + util::throw_exception(&mut env, &format!("Failed to create Java string: {}", e)); + ptr::null_mut() + } + } + } + Ok(None) => ptr::null_mut(), // End of stream + Err(e) => { + util::throw_exception(&mut env, &format!("Failed to get next batch: {}", e)); + ptr::null_mut() + } + } +} + +/// Close a record batch stream +#[no_mangle] +pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeCloseStream( + _env: JNIEnv, + _class: JClass, + stream_ptr: jlong, +) { + if stream_ptr != 0 { + let stream = unsafe { Box::from_raw(stream_ptr as *mut RecordBatchStreamWrapper) }; + drop(stream); + } +} diff --git a/modules/parquet-data-format/src/main/rust/src/runtime.rs b/modules/parquet-data-format/src/main/rust/src/runtime.rs new file mode 100644 index 0000000000000..bcd48a7dee58b --- /dev/null +++ b/modules/parquet-data-format/src/main/rust/src/runtime.rs @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + */ + +use tokio::runtime::Runtime; +use std::future::Future; + +/// Manages the Tokio runtime for async operations +pub struct RuntimeManager { + runtime: Runtime, +} + +impl RuntimeManager { + pub fn new() -> Self { + // Placeholder + + let runtime = Runtime::new().expect("Failed to create Tokio runtime"); + Self { runtime } + } + + pub fn block_on(&self, future: F) -> F::Output + where + F: Future, + { + self.runtime.block_on(future) + } +} diff --git a/modules/parquet-data-format/src/main/rust/src/stream.rs b/modules/parquet-data-format/src/main/rust/src/stream.rs new file mode 100644 index 0000000000000..2fe30f941223b --- /dev/null +++ b/modules/parquet-data-format/src/main/rust/src/stream.rs @@ -0,0 +1,43 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + */ + +use anyhow::Result; +use serde_json; + +/// Wrapper for DataFusion record batch streams - placeholder implementation +pub struct RecordBatchStreamWrapper { + batch_count: u32, + is_placeholder: bool, +} + +impl RecordBatchStreamWrapper { + pub fn new_placeholder() -> Self { + Self { + batch_count: 0, + is_placeholder: true, + } + } + + pub async fn next_batch(&mut self) -> Result> { + // Return placeholder data for first few calls, then None + if self.is_placeholder { + if self.batch_count < 2 { + self.batch_count += 1; + let placeholder_data = serde_json::json!({ + "rows": [ + {"id": self.batch_count, "name": format!("placeholder_row_{}", self.batch_count)} + ], + "num_rows": 1, + "num_columns": 2 + }); + Ok(Some(serde_json::to_string(&placeholder_data)?)) + } else { + Ok(None) // End of stream + } + } else { + // Real implementation would go here + Ok(None) + } + } +} diff --git a/modules/parquet-data-format/src/main/rust/src/substrait.rs b/modules/parquet-data-format/src/main/rust/src/substrait.rs new file mode 100644 index 0000000000000..d8ca0f2846fd7 --- /dev/null +++ b/modules/parquet-data-format/src/main/rust/src/substrait.rs @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + */ + +use datafusion::execution::context::SessionContext; +use crate::stream::RecordBatchStreamWrapper; +use anyhow::Result; + +/// Executes Substrait query plans +pub struct SubstraitExecutor; + +impl SubstraitExecutor { + pub fn new() -> Self { + Self + } + + pub async fn execute_plan( + &self, + session_context_ptr: *mut SessionContext, + substrait_plan_bytes: &[u8], + ) -> Result<*mut RecordBatchStreamWrapper> { + // Placeholder implementation - would normally: + // 1. Parse Substrait plan from substrait_plan_bytes + // 2. Convert to DataFusion logical plan using datafusion-substrait + // 3. Execute using the session context + // 4. Return actual record batch stream + + log::info!("Executing Substrait plan with {} bytes for session: {:?}", + substrait_plan_bytes.len(), session_context_ptr); + + // For now, return a placeholder stream + let wrapper = RecordBatchStreamWrapper::new_placeholder(); + let wrapper_ptr = Box::into_raw(Box::new(wrapper)); + + Ok(wrapper_ptr) + } +} diff --git a/modules/parquet-data-format/src/main/rust/src/util.rs b/modules/parquet-data-format/src/main/rust/src/util.rs new file mode 100644 index 0000000000000..5055c1312791a --- /dev/null +++ b/modules/parquet-data-format/src/main/rust/src/util.rs @@ -0,0 +1,63 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + */ + +use jni::JNIEnv; +use jni::objects::{JObjectArray, JString}; +use std::collections::HashMap; +use anyhow::Result; + +/// Parse a string map from JNI arrays +pub fn parse_string_map( + env: &mut JNIEnv, + keys: JObjectArray, + values: JObjectArray, +) -> Result> { + let mut map = HashMap::new(); + + let keys_len = env.get_array_length(&keys)?; + let values_len = env.get_array_length(&values)?; + + if keys_len != values_len { + return Err(anyhow::anyhow!("Keys and values arrays must have the same length")); + } + + for i in 0..keys_len { + let key_obj = env.get_object_array_element(&keys, i)?; + let value_obj = env.get_object_array_element(&values, i)?; + + let key_jstring = JString::from(key_obj); + let value_jstring = JString::from(value_obj); + + let key_str = env.get_string(&key_jstring)?; + let value_str = env.get_string(&value_jstring)?; + + map.insert(key_str.to_string_lossy().to_string(), value_str.to_string_lossy().to_string()); + } + + Ok(map) +} + +// Parse a string map from JNI arrays +pub fn parse_string_arr( + env: &mut JNIEnv, + files: JObjectArray, +) -> Result> { + let length = env.get_array_length(&files).unwrap(); + let mut rust_strings: Vec = Vec::with_capacity(length as usize); + for i in 0..length { + let file_obj = env.get_object_array_element(&files, i).unwrap(); + let jstring = JString::from(file_obj); + let rust_str: String = env + .get_string(&jstring) + .expect("Couldn't get java string!") + .into(); + rust_strings.push(rust_str); + } + Ok(rust_strings) +} + +/// Throw a Java exception +pub fn throw_exception(env: &mut JNIEnv, message: &str) { + let _ = env.throw_new("java/lang/RuntimeException", message); +} diff --git a/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatPluginIT.java b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatPluginIT.java new file mode 100644 index 0000000000000..f4c123b8a96f4 --- /dev/null +++ b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatPluginIT.java @@ -0,0 +1,41 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package com.parquet.parquetdataformat; + +import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope; +import org.apache.hc.core5.http.ParseException; +import org.apache.hc.core5.http.io.entity.EntityUtils; +import org.opensearch.client.Request; +import org.opensearch.client.Response; +import org.opensearch.plugins.Plugin; +import org.opensearch.test.OpenSearchIntegTestCase; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collection; +import java.util.Collections; + +import static org.hamcrest.Matchers.containsString; + +@ThreadLeakScope(ThreadLeakScope.Scope.NONE) +@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE) +public class ParquetDataFormatPluginIT extends OpenSearchIntegTestCase { + + @Override + protected Collection> nodePlugins() { + return Collections.singletonList(ParquetDataFormatPlugin.class); + } + + public void testPluginInstalled() throws IOException, ParseException { + Response response = getRestClient().performRequest(new Request("GET", "/_cat/plugins")); + String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8); + + logger.info("response body: {}", body); + assertThat(body, containsString("parquet")); + } +} diff --git a/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatTests.java b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatTests.java new file mode 100644 index 0000000000000..b52466249d727 --- /dev/null +++ b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatTests.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package com.parquet.parquetdataformat; + +import com.parquet.parquetdataformat.bridge.RustBridge; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; + +public class ParquetDataFormatTests extends OpenSearchTestCase { + + public void testIngestion() throws IOException { + // Test only basic functionality without Arrow operations + try { + // Create plugin but don't call complex operations + ParquetDataFormatPlugin plugin = new ParquetDataFormatPlugin(); + plugin.indexDataToParquetEngine(); + + } catch (UnsatisfiedLinkError e) { + fail("Native library not loaded properly: " + e.getMessage()); + } catch (Exception e) { + fail("Test failed: " + e.getMessage()); + } + } +} diff --git a/modules/parquet-data-format/src/yamlRestTest/java/org.opensearch/parquetdataformat/ParquetDataFormatClientYamlTestSuiteIT.java b/modules/parquet-data-format/src/yamlRestTest/java/org.opensearch/parquetdataformat/ParquetDataFormatClientYamlTestSuiteIT.java new file mode 100644 index 0000000000000..324c6ce3debd1 --- /dev/null +++ b/modules/parquet-data-format/src/yamlRestTest/java/org.opensearch/parquetdataformat/ParquetDataFormatClientYamlTestSuiteIT.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +package org.opensearch.parquetdataformat; + +import com.carrotsearch.randomizedtesting.annotations.Name; +import com.carrotsearch.randomizedtesting.annotations.ParametersFactory; +import org.opensearch.test.rest.yaml.ClientYamlTestCandidate; +import org.opensearch.test.rest.yaml.OpenSearchClientYamlSuiteTestCase; + + +public class ParquetDataFormatClientYamlTestSuiteIT extends OpenSearchClientYamlSuiteTestCase { + + public ParquetDataFormatClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) { + super(testCandidate); + } + + @ParametersFactory + public static Iterable parameters() throws Exception { + return OpenSearchClientYamlSuiteTestCase.createParameters(); + } +} diff --git a/modules/parquet-data-format/src/yamlRestTest/resources/rest-api-spec/test/10_basic.yml b/modules/parquet-data-format/src/yamlRestTest/resources/rest-api-spec/test/10_basic.yml new file mode 100644 index 0000000000000..0399b16c51642 --- /dev/null +++ b/modules/parquet-data-format/src/yamlRestTest/resources/rest-api-spec/test/10_basic.yml @@ -0,0 +1,8 @@ +"Test that the plugin is loaded in OpenSearch": + - do: + cat.plugins: + local: true + h: component + + - match: + $body: /^rename\n$/ diff --git a/plugins/engine-datafusion/.gitignore b/plugins/engine-datafusion/.gitignore new file mode 100644 index 0000000000000..8e535981ee076 --- /dev/null +++ b/plugins/engine-datafusion/.gitignore @@ -0,0 +1,38 @@ +# Gradle +.gradle/ +build/ + +# Java +*.class +*.jar +*.war +*.ear +hs_err_pid* + +# IDE +.idea/ +*.iml +*.ipr +*.iws +.vscode/ +.settings/ +.project +.classpath + +# OS +.DS_Store +Thumbs.db + +# Rust +jni/target/ +jni/Cargo.lock + +# Native libraries +src/main/resources/native/ + +# Logs +*.log + +# Temporary files +*.tmp +*.temp diff --git a/plugins/engine-datafusion/README.md b/plugins/engine-datafusion/README.md new file mode 100644 index 0000000000000..bc4ad580df874 --- /dev/null +++ b/plugins/engine-datafusion/README.md @@ -0,0 +1,73 @@ + +## Prerequisites + +1. Publish OpenSearch to maven local +``` +./gradlew publishToMavenLocal +``` +2. Publish SQL plugin to maven local +``` +./gradlew publishToMavenLocal +``` +3. Run opensearch with following parameters +``` + ./gradlew run --preserve-data -PremotePlugins="['org.opensearch.plugin:opensearch-job-scheduler:3.3.0.0-SNAPSHOT', 'org.opensearch.plugin:opensearch-sql-plugin:3.3.0.0-SNAPSHOT']" -PinstalledPlugins="['engine-datafusion']" --debug-jvm +``` + + +## Steps to test indexing + search e2e + +TODO : need to remove hardcoded index name `index-7` + +1. Delete previous index if any +``` +curl --location --request DELETE 'localhost:9200/index-7' +``` + +2. Create index with name : `index-7` +``` +curl --location --request PUT 'http://localhost:9200/index-7' \ +--header 'Content-Type: application/json' \ +--data-raw '{ + "settings": { + "number_of_shards": 1, + "number_of_replicas": 0, + "refresh_interval": -1 + }, + "mappings": { + "properties": { + "message": { + "type": "long" + }, + "message2": { + "type": "long" + }, + "message3": { + "type": "long" + } + } + } +}' +``` +3. Index docs +``` +curl --location --request POST 'http://localhost:9200/_bulk' \ +--header 'Content-Type: application/json' \ +--data-raw '{"index":{"_index":"index-7"}} +{"message": 2,"message2": 3,"message3": 4} +{"index":{"_index":"index-7"}} +{"message": 3,"message2": 4,"message3": 5} +' +``` +4. Refresh the index +``` +curl localhost:9200/index-7/_refresh +``` +5. Query +``` +curl --location --request POST 'http://localhost:9200/_plugins/_ppl' \ +--header 'Content-Type: application/json' \ +--data-raw '{ + "query": "source=index-7 | stats count(), min(message) as min, max(message2) as max" +}' +``` diff --git a/plugins/engine-datafusion/build.gradle b/plugins/engine-datafusion/build.gradle new file mode 100644 index 0000000000000..82b5cd41fc757 --- /dev/null +++ b/plugins/engine-datafusion/build.gradle @@ -0,0 +1,225 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +apply plugin: 'java' +apply plugin: 'idea' +apply plugin: 'opensearch.internal-cluster-test' +apply plugin: 'opensearch.yaml-rest-test' +apply plugin: 'opensearch.pluginzip' + +def pluginName = 'engine-datafusion' +def pluginDescription = 'OpenSearch plugin providing access to DataFusion via JNI' +def projectPath = 'org.opensearch' +def pathToPlugin = 'datafusion.DataFusionPlugin' +def pluginClassName = 'DataFusionPlugin' + +opensearchplugin { + name = pluginName + description = pluginDescription + classname = "${projectPath}.${pathToPlugin}" + licenseFile = rootProject.file('LICENSE.txt') + noticeFile = rootProject.file('NOTICE.txt') +} + +dependencies { + api project(':libs:opensearch-vectorized-exec-spi') + implementation "org.apache.logging.log4j:log4j-api:${versions.log4j}" + implementation "org.apache.logging.log4j:log4j-core:${versions.log4j}" + + // Bundle Jackson in the plugin JAR using 'api' like other OpenSearch plugins + api "com.fasterxml.jackson.core:jackson-core:${versions.jackson}" + api "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}" + api "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}" + + // Apache Arrow dependencies for memory management + implementation "org.apache.arrow:arrow-memory-core:17.0.0" + implementation "org.apache.arrow:arrow-memory-unsafe:17.0.0" + implementation "org.apache.arrow:arrow-vector:17.0.0" + implementation "org.apache.arrow:arrow-c-data:17.0.0" + implementation "org.apache.arrow:arrow-format:17.0.0" + // SLF4J API for Arrow logging compatibility + implementation "org.slf4j:slf4j-api:${versions.slf4j}" + // CheckerFramework annotations required by Arrow 17.0.0 + implementation "org.checkerframework:checker-qual:3.42.0" + // FlatBuffers dependency required by Arrow 17.0.0 + implementation "com.google.flatbuffers:flatbuffers-java:${versions.flatbuffers}" + + testImplementation "junit:junit:${versions.junit}" + testImplementation "org.hamcrest:hamcrest:${versions.hamcrest}" + testImplementation "org.mockito:mockito-core:${versions.mockito}" + testImplementation project(":modules:parquet-data-format") + // Add CSV plugin for testing + // testImplementation project(':plugins:dataformat-csv') +} + +// Task to build the Rust JNI library +task buildRustLibrary(type: Exec) { + description = 'Build the Rust JNI library using Cargo' + group = 'build' + + workingDir file('jni') + + // Determine the target directory and library name based on OS + def osName = System.getProperty('os.name').toLowerCase() + def libPrefix = osName.contains('windows') ? '' : 'lib' + def libExtension = osName.contains('windows') ? '.dll' : (osName.contains('mac') ? '.dylib' : '.so') + + // Use debug build for development, release for production + def buildType = project.hasProperty('rustRelease') ? 'release' : 'debug' + def targetDir = "target/${buildType}" + + // Find cargo executable - try common locations + def cargoExecutable = 'cargo' + def possibleCargoPaths = [ + System.getenv('HOME') + '/.cargo/bin/cargo', + '/usr/local/bin/cargo', + 'cargo' + ] + + for (String path : possibleCargoPaths) { + if (new File(path).exists()) { + cargoExecutable = path + break + } + } + + def cargoArgs = [cargoExecutable, 'build'] + if (buildType == 'release') { + cargoArgs.add('--release') + } + + if (osName.contains('windows')) { + commandLine cargoArgs + } else { + commandLine cargoArgs + } + + // Set environment variables for cross-compilation if needed + environment 'CARGO_TARGET_DIR', file('jni/target').absolutePath + + inputs.files fileTree('jni/src') + inputs.file 'jni/Cargo.toml' + outputs.files file("jni/${targetDir}/${libPrefix}opensearch_datafusion_jni${libExtension}") + System.out.println("Building Rust library in ${buildType} mode"); +} + +// Task to copy the native library to resources +task copyNativeLibrary(type: Copy, dependsOn: buildRustLibrary) { + description = 'Copy the native library to Java resources' + group = 'build' + + def osName = System.getProperty('os.name').toLowerCase() + def libPrefix = osName.contains('windows') ? '' : 'lib' + def libExtension = osName.contains('windows') ? '.dll' : (osName.contains('mac') ? '.dylib' : '.so') + def buildType = project.hasProperty('rustRelease') ? 'release' : 'debug' + + from file("jni/target/${buildType}/${libPrefix}opensearch_datafusion_jni${libExtension}") + into file('src/main/resources/native') + + // Rename to a standard name for Java to load + rename { filename -> + "libopensearch_datafusion_jni${libExtension}" + } + + // Remove executable permissions to comply with OpenSearch file permission checks + filePermissions { + unix(0644) + } +} + +// Ensure native library is built before Java compilation +compileJava.dependsOn copyNativeLibrary + +// Ensure processResources depends on copyNativeLibrary +processResources.dependsOn copyNativeLibrary +sourcesJar.dependsOn copyNativeLibrary + +// Ensure filepermissions task depends on copyNativeLibrary +tasks.named('filepermissions').configure { + dependsOn copyNativeLibrary +} + +// Ensure sourcesJar depends on copyNativeLibrary since it includes resources +sourcesJar.dependsOn copyNativeLibrary + +// Ensure filepermissions task depends on copyNativeLibrary +tasks.named("filepermissions").configure { + dependsOn copyNativeLibrary +} + +// Ensure forbiddenPatterns task depends on copyNativeLibrary +tasks.named("forbiddenPatterns").configure { + dependsOn copyNativeLibrary + // Exclude native library files from pattern checking since they are binary + exclude '**/native/**' +} + +// Ensure spotlessJava task has proper dependency ordering +tasks.named("spotlessJava").configure { + mustRunAfter copyNativeLibrary +} + +// Clean task should also clean Rust artifacts +clean { + delete file('jni/target') + delete file('src/main/resources/native') +} + +test { + // Set system property to help tests find the native library + jvmArgs += ["--add-opens", "java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED"] + + systemProperty 'java.library.path', file('src/main/resources/native').absolutePath +} + +yamlRestTest { + systemProperty 'tests.security.manager', 'false' + // Disable yamlRestTest since this plugin doesn't have REST API endpoints + enabled = false +} + +tasks.named("dependencyLicenses").configure { + mapping from: /jackson-.*/, to: 'jackson' + mapping from: /arrow-.*/, to: 'arrow' + mapping from: /slf4j-.*/, to: 'slf4j-api' + mapping from: /checker-qual.*/, to: 'checker-qual' + mapping from: /flatbuffers-.*/, to: 'flatbuffers-java' +} + +// Configure third party audit to handle Apache Arrow dependencies +tasks.named('thirdPartyAudit').configure { + ignoreMissingClasses( + // Apache Commons Codec (missing dependency) + 'org.apache.commons.codec.binary.Hex' + ) + ignoreViolations( + // Apache Arrow internal classes that use Unsafe operations + 'org.apache.arrow.memory.ArrowBuf', + 'org.apache.arrow.memory.unsafe.UnsafeAllocationManager', + 'org.apache.arrow.memory.util.ByteFunctionHelpers', + 'org.apache.arrow.memory.util.MemoryUtil', + 'org.apache.arrow.memory.util.MemoryUtil$1', + 'org.apache.arrow.memory.util.hash.MurmurHasher', + 'org.apache.arrow.memory.util.hash.SimpleHasher', + 'org.apache.arrow.vector.BaseFixedWidthVector', + 'org.apache.arrow.vector.BitVectorHelper', + 'org.apache.arrow.vector.Decimal256Vector', + 'org.apache.arrow.vector.DecimalVector', + 'org.apache.arrow.vector.util.DecimalUtility', + 'org.apache.arrow.vector.util.VectorAppender' + ) +} + +// Configure Javadoc to skip package documentation requirements ie package-info.java +missingJavadoc { + javadocMissingIgnore = [ + 'org.opensearch.datafusion', + 'org.opensearch.datafusion.action', + 'org.opensearch.datafusion.core' + ] +} diff --git a/plugins/engine-datafusion/jni/Cargo.toml b/plugins/engine-datafusion/jni/Cargo.toml new file mode 100644 index 0000000000000..4dbb5374f7443 --- /dev/null +++ b/plugins/engine-datafusion/jni/Cargo.toml @@ -0,0 +1,79 @@ +[package] +name = "opensearch-datafusion-jni" +version = "0.1.0" +edition = "2021" +description = "JNI bindings for DataFusion integration with OpenSearch" +license = "Apache-2.0" + +[lib] +name = "opensearch_datafusion_jni" +crate-type = ["cdylib"] + +[dependencies] +# DataFusion dependencies +datafusion = "49.0.0" +datafusion-expr = "49.0.0" +datafusion-datasource = "49.0.0" +arrow-json = "55.2" +arrow = { version = "55.2", features = ["ffi", "ipc_compression"] } +#arrow = "55.2.0" +arrow-array = "55.2.0" +arrow-schema = "55.2.0" +arrow-buffer = "55.2.0" + +# JNI dependencies +jni = "0.21" + +# Substrait support +datafusion-substrait = "49.0.0" +prost = "0.13" + + +# Async runtime +tokio = { version = "1.0", features = ["full"] } +futures = "0.3" +#tokio = { version = "1.0", features = ["rt", "rt-multi-thread", "macros"] } + +# Serialization +serde = { version = "1.0", features = ["derive"] } +serde_json = "1.0" + +# Error handling +anyhow = "1.0" +thiserror = "1.0" + +# Logging +log = "0.4" +# Parquet support +parquet = "53.0.0" + +# Object store for file access +object_store = "=0.12.3" +url = "2.0" + +# Substrait support +substrait = "0.47" + +# Temporary directory support +tempfile = "3.0" +chrono = "0.4.41" + +async-trait = "0.1.89" +itertools = "0.14.0" +rstest = "0.26.1" +regex = "1.11.2" + +[build-dependencies] +cbindgen = "0.27" + + +[profile.release] +lto = true +codegen-units = 1 +panic = "abort" + +[profile.dev] +opt-level = 1 # Some optimization for reasonable performance +lto = false # Disable LTO for faster builds +codegen-units = 16 # More parallel compilation +incremental = true # Enable incremental compilation diff --git a/plugins/engine-datafusion/jni/src/lib.rs b/plugins/engine-datafusion/jni/src/lib.rs new file mode 100644 index 0000000000000..59bbc1fc912d6 --- /dev/null +++ b/plugins/engine-datafusion/jni/src/lib.rs @@ -0,0 +1,772 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +use std::collections::{BTreeSet, HashMap}; +use std::ptr::addr_of_mut; +use jni::objects::{JByteArray, JClass, JLongArray, JObject}; +use jni::sys::{jbyteArray, jlong, jstring}; +use jni::JNIEnv; +use std::sync::Arc; +use arrow_array::{Array, StructArray}; +use arrow_array::ffi::FFI_ArrowArray; +use arrow_schema::DataType; +use arrow_schema::ffi::FFI_ArrowSchema; +use datafusion::common::DataFusionError; + +mod util; +mod row_id_optimizer; +mod listing_table; + +use datafusion::execution::context::SessionContext; + +use crate::util::{create_file_metadata_from_filenames, parse_string_arr, set_object_result_error, set_object_result_ok}; +use datafusion::datasource::file_format::csv::CsvFormat; +use datafusion::datasource::listing::{ListingTableUrl}; +use datafusion::execution::cache::cache_manager::CacheManagerConfig; +use datafusion::execution::cache::cache_unit::DefaultListFilesCache; +use datafusion::execution::cache::CacheAccessor; +use datafusion::execution::runtime_env::{RuntimeEnv, RuntimeEnvBuilder}; +use datafusion::prelude::SessionConfig; +use datafusion::DATAFUSION_VERSION; +use datafusion::datasource::file_format::parquet::ParquetFormat; +use datafusion::datasource::object_store::ObjectStoreUrl; +use datafusion::datasource::physical_plan::parquet::{ParquetAccessPlan, RowGroupAccess}; +use datafusion::datasource::physical_plan::ParquetSource; +use datafusion::execution::TaskContext; +use datafusion::parquet::arrow::arrow_reader::RowSelector; +use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream}; +use datafusion_datasource::file_groups::FileGroup; +use datafusion_datasource::file_scan_config::FileScanConfigBuilder; +use datafusion_datasource::PartitionedFile; +use datafusion_datasource::source::DataSourceExec; +use datafusion_substrait::logical_plan::consumer::from_substrait_plan; +use datafusion_substrait::substrait::proto::Plan; +use futures::TryStreamExt; +use jni::objects::{JObjectArray, JString}; +use object_store::ObjectMeta; +use prost::Message; +use tokio::runtime::Runtime; +use crate::listing_table::{ListingOptions, ListingTable, ListingTableConfig}; +use crate::row_id_optimizer::FilterRowIdOptimizer; + +/// Create a new DataFusion session context +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_createContext( + _env: JNIEnv, + _class: JClass, +) -> jlong { + let config = SessionConfig::new().with_repartition_aggregations(true); + let context = SessionContext::new_with_config(config); + let ctx = Box::into_raw(Box::new(context)) as jlong; + ctx +} + +/// Close and cleanup a DataFusion context +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_closeContext( + _env: JNIEnv, + _class: JClass, + context_id: jlong, +) { + let _ = unsafe { Box::from_raw(context_id as *mut SessionContext) }; +} + +/// Get version information +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_getVersionInfo( + env: JNIEnv, + _class: JClass, +) -> jstring { + let version_info = format!(r#"{{"version": "{}", "codecs": ["CsvDataSourceCodec"]}}"#, DATAFUSION_VERSION); + env.new_string(version_info).expect("Couldn't create Java string").as_raw() +} + +/// Get version information (legacy method name) +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_getVersion( + env: JNIEnv, + _class: JClass, +) -> jstring { + env.new_string(DATAFUSION_VERSION).expect("Couldn't create Java string").as_raw() +} + +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_createTokioRuntime( + _env: JNIEnv, + _class: JClass, +) -> jlong { + let rt = Runtime::new().unwrap(); + let ctx = Box::into_raw(Box::new(rt)) as jlong; + ctx +} + +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_createGlobalRuntime( + _env: JNIEnv, + _class: JClass, +) -> jlong { + let runtime_env = RuntimeEnvBuilder::default().build().unwrap(); + /** + // We can copy global runtime to local runtime - file statistics cache, and most of the things + // will be shared across session contexts. But list files cache will be specific to session + // context + + let fsCache = runtimeEnv.clone().cache_manager.get_file_statistic_cache().unwrap(); + let localCacheManagerConfig = CacheManagerConfig::default().with_files_statistics_cache(Option::from(fsCache)); + let localCacheManager = CacheManager::try_new(&localCacheManagerConfig); + let localRuntimeEnv = RuntimeEnvBuilder::new() + .with_cache_manager(localCacheManagerConfig) + .with_disk_manager(DiskManagerConfig::new_existing(runtimeEnv.disk_manager)) + .with_memory_pool(runtimeEnv.memory_pool) + .with_object_store_registry(runtimeEnv.object_store_registry) + .build(); + let config = SessionConfig::new().with_repartition_aggregations(true); + let context = SessionContext::new_with_config(config); + **/ + + let ctx = Box::into_raw(Box::new(runtime_env)) as jlong; + ctx +} + +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_createSessionContext( + _env: JNIEnv, + _class: JClass, + runtime_id: jlong, +) -> jlong { + let runtimeEnv = unsafe { &mut *(runtime_id as *mut RuntimeEnv) }; + let config = SessionConfig::new().with_repartition_aggregations(true); + let context = SessionContext::new_with_config_rt(config, Arc::new(runtimeEnv.clone())); + let ctx = Box::into_raw(Box::new(context)) as jlong; + ctx +} + +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_closeSessionContext( + _env: JNIEnv, + _class: JClass, + context_id: jlong, +) { + let _ = unsafe { Box::from_raw(context_id as *mut SessionContext) }; +} + + +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_createDatafusionReader( + mut env: JNIEnv, + _class: JClass, + table_path: JString, + files: JObjectArray +) -> jlong { + let table_path: String = match env.get_string(&table_path) { + Ok(path) => path.into(), + Err(e) => { + let _ = env.throw_new("java/lang/IllegalArgumentException", format!("Invalid table path: {:?}", e)); + return 0; + } + }; + + let files: Vec = match parse_string_arr(&mut env, files) { + Ok(files) => files, + Err(e) => { + let _ = env.throw_new("java/lang/IllegalArgumentException", format!("Invalid file list: {}", e)); + return 0; + } + }; + + let files_metadata = match create_file_metadata_from_filenames(&table_path, files.clone()) { + Ok(metadata) => metadata, + Err(err) => { + let _ = env.throw_new("java/lang/RuntimeException", format!("Failed to create metadata: {}", err)); + return 0; + } + }; + + let table_url = match ListingTableUrl::parse(&table_path) { + Ok(url) => url, + Err(err) => { + let _ = env.throw_new("java/lang/RuntimeException", format!("Invalid table path: {}", err)); + return 0; + } + }; + + let shard_view = ShardView::new(table_url, files_metadata); + + Box::into_raw(Box::new(shard_view)) as jlong +} + +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_destroyReader( + mut env: JNIEnv, + _class: JClass, + ptr: jlong +) { + let _ = unsafe { Box::from_raw(ptr as *mut ShardView) }; +} + +pub struct ShardView { + table_path: ListingTableUrl, + files_metadata: Arc> +} + +impl ShardView { + pub fn new(table_path: ListingTableUrl, files_metadata: Vec) -> Self { + let files_metadata = Arc::new(files_metadata); + ShardView { + table_path, + files_metadata + } + } + + pub fn table_path(&self) -> ListingTableUrl { + self.table_path.clone() + } + + pub fn files_metadata(&self) -> Arc> { + self.files_metadata.clone() + } +} + +#[derive(Debug, Clone)] +struct FileMetadata { + row_group_row_counts: Arc>, + row_base: Arc, + object_meta: Arc, +} + +impl FileMetadata { + pub fn new(row_group_row_counts: Vec, row_base: i64, object_meta: ObjectMeta) -> Self { + let row_group_row_counts = Arc::new(row_group_row_counts); + let row_base = Arc::new(row_base); + let object_meta = Arc::new(object_meta); + FileMetadata { + row_group_row_counts, + row_base, + object_meta + } + } + + pub fn row_group_row_counts(&self) -> Arc> { + self.row_group_row_counts.clone() + } + + pub fn row_base(&self) -> Arc { + self.row_base.clone() + } + + pub fn object_meta(&self) -> Arc { + self.object_meta.clone() + } +} + + +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_executeQueryPhase( + mut env: JNIEnv, + _class: JClass, + shard_view_ptr: jlong, + substrait_bytes: jbyteArray, + tokio_runtime_env_ptr: jlong, + // callback: JObject, +) -> jlong { + let shard_view = unsafe { &*(shard_view_ptr as *const ShardView) }; + let runtime_ptr = unsafe { &*(tokio_runtime_env_ptr as *const Runtime)}; + + let table_path = shard_view.table_path(); + let files_metadata = shard_view.files_metadata(); + let object_meta: Arc> = Arc::new(files_metadata + .iter() + .map(|metadata| (*metadata.object_meta).clone()) + .collect()); + + println!("Table path: {}", table_path); + println!("Files: {:?}", object_meta); + + let list_file_cache = Arc::new(DefaultListFilesCache::default()); + list_file_cache.put(table_path.prefix(), object_meta); + + let runtime_env = RuntimeEnvBuilder::new() + .with_cache_manager(CacheManagerConfig::default() + .with_list_files_cache(Some(list_file_cache.clone())) + ).build().unwrap(); + + // TODO: get config from CSV DataFormat + let config = SessionConfig::new(); + // config.options_mut().execution.parquet.pushdown_filters = true; + + let state = datafusion::execution::SessionStateBuilder::new() + .with_config(config) + .with_runtime_env(Arc::from(runtime_env)) + .with_default_features() + .with_physical_optimizer_rule(Arc::new(FilterRowIdOptimizer)) // TODO: enable only for query phase + .build(); + + let ctx = SessionContext::new_with_state(state); + + // Create default parquet options + let file_format = ParquetFormat::new(); + let listing_options = ListingOptions::new(Arc::new(file_format)) + .with_file_extension(".parquet") // TODO: take this as parameter + .with_files_metadata(files_metadata) + .with_table_partition_cols(vec![("row_base".to_string(), DataType::Int32)]); // TODO: enable only for query phase + + // Ideally the executor will give this + runtime_ptr.block_on(async { + let resolved_schema = listing_options + .infer_schema(&ctx.state(), &table_path.clone()) + .await.unwrap(); + + + let config = ListingTableConfig::new(table_path.clone()) + .with_listing_options(listing_options) + .with_schema(resolved_schema); + + // Create a new TableProvider + let provider = Arc::new(ListingTable::try_new(config).unwrap()); + let shard_id = table_path.prefix().filename().expect("error in fetching Path"); + ctx.register_table("index-7", provider) + .expect("Failed to attach the Table"); + + }); + + // TODO : how to close ctx ? + // Convert Java byte array to Rust Vec + let plan_bytes_obj = unsafe { JByteArray::from_raw(substrait_bytes) }; + let plan_bytes_vec = match env.convert_byte_array(plan_bytes_obj) { + Ok(bytes) => bytes, + Err(e) => { + let error_msg = format!("Failed to convert plan bytes: {}", e); + env.throw_new("java/lang/Exception", error_msg); + return 0; + } + }; + + let substrait_plan = match Plan::decode(plan_bytes_vec.as_slice()) { + Ok(plan) => { + println!("SUBSTRAIT rust: Decoding is successful, Plan has {} relations", plan.relations.len()); + plan + }, + Err(e) => { + return 0; + } + }; + + //let runtime = unsafe { &mut *(runtime_ptr as *mut Runtime) }; + runtime_ptr.block_on(async { + + let logical_plan = match from_substrait_plan(&ctx.state(), &substrait_plan).await { + Ok(plan) => { + println!("SUBSTRAIT Rust: LogicalPlan: {:?}", plan); + plan + }, + Err(e) => { + println!("SUBSTRAIT Rust: Failed to convert Substrait plan: {}", e); + return 0; + } + }; + + let dataframe = ctx.execute_logical_plan(logical_plan).await.unwrap(); + let stream = dataframe.execute_stream().await.unwrap(); + let stream_ptr = Box::into_raw(Box::new(stream)) as jlong; + + stream_ptr + }) +} + +// If we need to create session context separately +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_nativeCreateSessionContext( + mut env: JNIEnv, + _class: JClass, + runtime_ptr: jlong, + shard_view_ptr: jlong, + global_runtime_env_ptr: jlong, +) -> jlong { + let shard_view = unsafe { &*(shard_view_ptr as *const ShardView) }; + let table_path = shard_view.table_path(); + let files_metadata = shard_view.files_metadata(); + let object_meta: Arc> = Arc::new(files_metadata + .iter() + .map(|metadata| (*metadata.object_meta).clone()) + .collect()); + // Will use it once the global RunTime is defined + // let runtime_arc = unsafe { + // let boxed = &*(runtime_env_ptr as *const Pin>); + // (**boxed).clone() + // }; + + let list_file_cache = Arc::new(DefaultListFilesCache::default()); + list_file_cache.put(table_path.prefix(), object_meta); + + let runtime_env = RuntimeEnvBuilder::new() + .with_cache_manager(CacheManagerConfig::default() + .with_list_files_cache(Some(list_file_cache))).build().unwrap(); + + + + let ctx = SessionContext::new_with_config_rt(SessionConfig::new(), Arc::new(runtime_env)); + + + // Create default parquet options + let file_format = CsvFormat::default(); + let listing_options = ListingOptions::new(Arc::new(file_format)) + .with_file_extension(".csv"); + + + // let runtime = unsafe { &mut *(runtime_ptr as *mut Runtime) }; + let mut session_context_ptr = 0; + + // Ideally the executor will give this + Runtime::new().expect("Failed to create Tokio Runtime").block_on(async { + let resolved_schema = listing_options + .infer_schema(&ctx.state(), &table_path.clone()) + .await.unwrap(); + + + let config = ListingTableConfig::new(table_path.clone()) + .with_listing_options(listing_options) + .with_schema(resolved_schema); + + // Create a new TableProvider + let provider = Arc::new(ListingTable::try_new(config).unwrap()); + let shard_id = table_path.prefix().filename().expect("error in fetching Path"); + ctx.register_table(shard_id, provider) + .expect("Failed to attach the Table"); + + // Return back after wrapping in Box + session_context_ptr = Box::into_raw(Box::new(ctx)) as jlong + }); + + session_context_ptr +} + + + +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_RecordBatchStream_next( + mut env: JNIEnv, + _class: JClass, + runtime_ptr: jlong, + stream: jlong, + callback: JObject, +) { + let runtime = unsafe { &mut *(runtime_ptr as *mut Runtime) }; + + let stream = unsafe { &mut *(stream as *mut SendableRecordBatchStream) }; + runtime.block_on(async { + //let fetch_start = std::time::Instant::now(); + let next = stream.try_next().await; + //let fetch_time = fetch_start.elapsed(); + match next { + Ok(Some(batch)) => { + //let convert_start = std::time::Instant::now(); + // Convert to struct array for compatibility with FFI + //println!("Num rows : {}", batch.num_rows()); + let struct_array: StructArray = batch.into(); + let array_data = struct_array.into_data(); + let mut ffi_array = FFI_ArrowArray::new(&array_data); + //let convert_time = convert_start.elapsed(); + // ffi_array must remain alive until after the callback is called + // let callback_start = std::time::Instant::now(); + set_object_result_ok(&mut env, callback, addr_of_mut!(ffi_array)); + // let callback_time = callback_start.elapsed(); + // println!("Fetch: {:?}, Convert: {:?}, Callback: {:?}", + // fetch_time, convert_time, callback_time); + } + Ok(None) => { + set_object_result_ok(&mut env, callback, 0 as *mut FFI_ArrowSchema); + } + Err(err) => { + set_object_result_error(&mut env, callback, &err); + } + } + //println!("Total time: {:?}", start.elapsed()); + }); +} + +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_RecordBatchStream_getSchema( + mut env: JNIEnv, + _class: JClass, + stream: jlong, + callback: JObject, +) { + let stream = unsafe { &mut *(stream as *mut SendableRecordBatchStream) }; + let schema = stream.schema(); + let ffi_schema = FFI_ArrowSchema::try_from(&*schema); + match ffi_schema { + Ok(mut ffi_schema) => { + // ffi_schema must remain alive until after the callback is called + set_object_result_ok(&mut env, callback, addr_of_mut!(ffi_schema)); + } + Err(err) => { + set_object_result_error(&mut env, callback, &err); + } + } +} + +#[no_mangle] +pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_executeFetchPhase( + mut env: JNIEnv, + _class: JClass, + shard_view_ptr: jlong, + values: JLongArray, + projections: JObjectArray, + tokio_runtime_env_ptr: jlong, + callback: JObject, +) -> jlong{ + let shard_view = unsafe { &*(shard_view_ptr as *const ShardView) }; + let runtime_ptr = unsafe { &*(tokio_runtime_env_ptr as *const Runtime)}; + + let table_path = shard_view.table_path(); + let files_metadata = shard_view.files_metadata(); + + let projections: Vec = parse_string_arr(&mut env, projections).expect("Expected list of files"); + + // Safety checks first + if values.is_null() { + let _ = env.throw_new("java/lang/NullPointerException", "values array is null"); + return 0; + } + + // 2. Get array length + let array_length = match env.get_array_length(&values) { + Ok(len) => len, + Err(e) => { + let _ = env.throw_new("java/lang/RuntimeException", + format!("Failed to get array length: {:?}", e)); + return 0; + } + }; + + // 3. Allocate Rust buffer + let mut row_ids: Vec = vec![0; array_length as usize]; + + // 4. Copy Java array into Rust buffer + match env.get_long_array_region(values, 0, &mut row_ids[..]) { + Ok(_) => { + println!("✅ Received array: {:?}", row_ids); + } + Err(e) => { + let _ = env.throw_new("java/lang/RuntimeException", + format!("Failed to get array data: {:?}", e)); + return 0; + } + } + + + // Safety checks + if tokio_runtime_env_ptr == 0 { + let error = DataFusionError::Execution("Null runtime pointer".to_string()); + set_object_result_error(&mut env, callback, &error); + return 0; + } + + let access_plans = create_access_plans(row_ids, files_metadata.clone()); + + let runtime_env = RuntimeEnvBuilder::new() + .with_cache_manager(CacheManagerConfig::default() + //.with_list_files_cache(Some(list_file_cache)) TODO: //Fix this + ).build().unwrap(); + let ctx = SessionContext::new_with_config_rt(SessionConfig::new(), Arc::new(runtime_env)); + + // Create default parquet options + let file_format = ParquetFormat::new(); + let listing_options = ListingOptions::new(Arc::new(file_format)) + .with_file_extension(".parquet"); // TODO: take this as parameter + // .with_table_partition_cols(vec![("row_base".to_string(), DataType::Int32)]); // TODO: enable only for query phase + + // Ideally the executor will give this + + + + runtime_ptr.block_on(async { + + let parquet_schema = listing_options + .infer_schema(&ctx.state(), &table_path.clone()) + .await.unwrap(); + + // let total_groups = files_metadata[0].row_group_row_counts.len(); + // let mut access_plan = ParquetAccessPlan::new_all(total_groups); + // for i in 0..total_groups { + // access_plan.skip(i); + // } + + // let partitioned_files: Vec = files_metadata + // .iter() + // .zip(access_plans.await.iter()) + // .map(|(meta, access_plan)| { + // PartitionedFile::new( + // format!("{}/{}", + // table_path.prefix().to_string().trim_end_matches('/'), + // meta.object_meta().location.to_string().trim_start_matches('/') + // ), + // meta.object_meta.size + // ).with_extensions(Arc::new(access_plan.clone())) + // }) + // .collect(); + + + let access_plans = access_plans.await.unwrap(); + + let partitioned_files: Vec = files_metadata + .iter() + .zip(access_plans.iter()) + .map(|(meta, access_plan)| { + PartitionedFile::new(meta.object_meta().location.to_string(), + meta.object_meta.size + ).with_extensions(Arc::new(access_plan.clone())) + }) + .collect(); + + let file_group = FileGroup::new(partitioned_files); + + let file_source = Arc::new( + ParquetSource::default() + // provide the factory to create parquet reader without re-reading metadata + //.with_parquet_file_reader_factory(Arc::new(reader_factory)), + ); + + let mut projection_index = vec![]; + + for field_name in projections.iter() { + projection_index.push(parquet_schema.index_of(field_name).ok().unwrap()); + } + + let file_scan_config = + FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(), parquet_schema.clone(), file_source) + //.with_limit(limit) + .with_projection(Option::from(projection_index.clone())) + .with_file_group(file_group) + .build(); + + let parquet_exec = DataSourceExec::from_data_source(file_scan_config); + + // IMPORTANT: Only get one reference to each pointer + // let liquid_ctx = unsafe { &mut *(context_ptr as *mut SessionContext) }; + // let session_ctx = unsafe { Box::from_raw(context_ptr as *mut SessionContext) }; + let mut optimized_plan: Arc = parquet_exec.clone(); + + + let task_ctx = Arc::new(TaskContext::default()); + + let stream = optimized_plan.execute(0, task_ctx).unwrap(); + + let stream_ptr = Box::into_raw(Box::new(stream)) as jlong; + + stream_ptr + }) +} + +async fn create_access_plans( + row_ids: Vec, + files_metadata: Arc>, +) -> Result, DataFusionError> { + let mut access_plans = Vec::new(); + + // Sort row_ids for better processing + let mut sorted_row_ids: Vec = row_ids.iter().map(|&id| id as i64).collect(); + sorted_row_ids.sort_unstable(); + + // Process each file + for file_meta in files_metadata.iter() { + let row_base = *file_meta.row_base; + let total_row_groups = file_meta.row_group_row_counts.len(); + let mut access_plan = ParquetAccessPlan::new_all(total_row_groups); + + // Calculate file's row range + let file_total_rows: i64 = file_meta.row_group_row_counts.iter().map(|&x| x).sum(); + let file_end_row: i64 = row_base + file_total_rows; + // Filter row IDs that belong to this file + let file_row_ids: Vec = sorted_row_ids + .iter() + .copied() // or .cloned() if it's not Copy + .filter(|&id| id >= row_base && id < file_end_row) + .map(|id| { + id - row_base }) + .collect(); + + if file_row_ids.is_empty() { + // If no rows belong to this file, skip all row groups + for group_id in 0..total_row_groups { + access_plan.skip(group_id); + } + } else { + // Create cumulative row counts for row groups + let mut cumulative_group_rows: Vec = Vec::with_capacity(total_row_groups + 1); + cumulative_group_rows.push(0); + let mut current_sum = 0; + for &count in file_meta.row_group_row_counts.iter() { + current_sum += count; + cumulative_group_rows.push(current_sum); + } + // Group local row IDs by row group + let mut group_map: HashMap> = HashMap::new(); + for &row_id in &file_row_ids { + // Find the appropriate row group using binary search + let group_id = cumulative_group_rows.windows(2) + .position(|window| row_id >= window[0] as i64 && row_id < window[1] as i64) + .unwrap(); + + // Calculate relative position within the row group + let relative_pos = row_id - cumulative_group_rows[group_id]; + group_map.entry(group_id) + .or_default() + .insert(relative_pos as i32); + } + + // Process each row group + for group_id in 0..total_row_groups { + let row_group_size = file_meta.row_group_row_counts[group_id] as usize; + + if let Some(group_row_ids) = group_map.get(&group_id) { + let mut relative_row_ids: Vec = group_row_ids.iter() + .map(|&x| x as usize) + .collect(); + relative_row_ids.sort_unstable(); + + if relative_row_ids.is_empty() { + access_plan.skip(group_id); + } else if relative_row_ids.len() == row_group_size { + access_plan.scan(group_id); + } else { + // Create selectors + let mut selectors = Vec::new(); + let mut current_pos = 0; + let mut i = 0; + while i < relative_row_ids.len() { + let mut target_pos = relative_row_ids[i]; + if target_pos > current_pos { + selectors.push(RowSelector::skip(target_pos - current_pos)); + } + let mut select_count = 1; + while i + 1 < relative_row_ids.len() && + relative_row_ids[i + 1] == relative_row_ids[i] + 1 { + select_count += 1; + i += 1; + target_pos = relative_row_ids[i]; + } + selectors.push(RowSelector::select(select_count)); + current_pos = relative_row_ids[i] + 1; + i += 1; + } + if current_pos < row_group_size { + selectors.push(RowSelector::skip(row_group_size - current_pos)); + } + access_plan.set(group_id, RowGroupAccess::Selection(selectors.into())); + } + } else { + access_plan.skip(group_id); + } + } + } + + access_plans.push(access_plan); + } + + Ok(access_plans) +} diff --git a/plugins/engine-datafusion/jni/src/listing_table.rs b/plugins/engine-datafusion/jni/src/listing_table.rs new file mode 100644 index 0000000000000..27612175cf173 --- /dev/null +++ b/plugins/engine-datafusion/jni/src/listing_table.rs @@ -0,0 +1,1591 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +//! The table implementation. + +use datafusion::datasource::listing::{ + helpers::{expr_applicable_for_cols, pruned_partition_list}, + ListingTableUrl, PartitionedFile, +}; +use datafusion::{ + datasource::file_format::{file_compression_type::FileCompressionType, FileFormat}, + datasource::{create_ordering, physical_plan::FileSinkConfig}, + execution::context::SessionState, +}; +use arrow::datatypes::{DataType, Field, SchemaBuilder, SchemaRef}; +use arrow_schema::Schema; +use async_trait::async_trait; +use datafusion::catalog::{Session, TableProvider}; +use datafusion::common::{config_datafusion_err, config_err, internal_err, plan_err, project_schema, stats::Precision, Constraints, DataFusionError, Result, ScalarValue, SchemaExt}; +use datafusion_datasource::{ + compute_all_files_statistics, + file::FileSource, + file_groups::FileGroup, + file_scan_config::{FileScanConfig, FileScanConfigBuilder}, + schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory}, +}; +use datafusion::execution::{ + cache::{cache_manager::FileStatisticsCache, cache_unit::DefaultFileStatisticsCache}, + config::SessionConfig, +}; +use datafusion_expr::{ + dml::InsertOp, Expr, SortExpr, TableProviderFilterPushDown, TableType, +}; +use datafusion::physical_expr::schema_rewriter::PhysicalExprAdapterFactory; +use datafusion::physical_expr_common::sort_expr::LexOrdering; +use datafusion::physical_plan::{empty::EmptyExec, ExecutionPlan, Statistics}; +use futures::{future, stream, Stream, StreamExt, TryStreamExt}; +use itertools::Itertools; +use object_store::ObjectStore; +use std::{any::Any, collections::HashMap, str::FromStr, sync::Arc}; +use std::fs::File; +use datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; +use regex::Regex; +use crate::FileMetadata; + +/// Indicates the source of the schema for a [`ListingTable`] +// PartialEq required for assert_eq! in tests +#[derive(Debug, Clone, Copy, PartialEq, Default)] +pub enum SchemaSource { + /// Schema is not yet set (initial state) + #[default] + Unset, + /// Schema was inferred from first table_path + Inferred, + /// Schema was specified explicitly via with_schema + Specified, +} + +/// Configuration for creating a [`ListingTable`] +/// +/// # Schema Evolution Support +/// +/// This configuration supports schema evolution through the optional +/// [`SchemaAdapterFactory`]. You might want to override the default factory when you need: +/// +/// - **Type coercion requirements**: When you need custom logic for converting between +/// different Arrow data types (e.g., Int32 ↔ Int64, Utf8 ↔ LargeUtf8) +/// - **Column mapping**: You need to map columns with a legacy name to a new name +/// - **Custom handling of missing columns**: By default they are filled in with nulls, but you may e.g. want to fill them in with `0` or `""`. +/// +/// If not specified, a [`DefaultSchemaAdapterFactory`] will be used, which handles +/// basic schema compatibility cases. +/// +#[derive(Debug, Clone, Default)] +pub struct ListingTableConfig { + /// Paths on the `ObjectStore` for creating `ListingTable`. + /// They should share the same schema and object store. + pub table_paths: Vec, + /// Optional `SchemaRef` for the to be created `ListingTable`. + /// + /// See details on [`ListingTableConfig::with_schema`] + pub file_schema: Option, + /// Optional [`ListingOptions`] for the to be created [`ListingTable`]. + /// + /// See details on [`ListingTableConfig::with_listing_options`] + pub options: Option, + /// Tracks the source of the schema information + schema_source: SchemaSource, + /// Optional [`SchemaAdapterFactory`] for creating schema adapters + schema_adapter_factory: Option>, + /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters + expr_adapter_factory: Option>, +} + +impl ListingTableConfig { + /// Creates new [`ListingTableConfig`] for reading the specified URL + pub fn new(table_path: ListingTableUrl) -> Self { + Self { + table_paths: vec![table_path], + ..Default::default() + } + } + + /// Creates new [`ListingTableConfig`] with multiple table paths. + /// + /// See [`Self::infer_options`] for details on what happens with multiple paths + pub fn new_with_multi_paths(table_paths: Vec) -> Self { + Self { + table_paths, + ..Default::default() + } + } + + /// Returns the source of the schema for this configuration + pub fn schema_source(&self) -> SchemaSource { + self.schema_source + } + /// Set the `schema` for the overall [`ListingTable`] + /// + /// [`ListingTable`] will automatically coerce, when possible, the schema + /// for individual files to match this schema. + /// + /// If a schema is not provided, it is inferred using + /// [`Self::infer_schema`]. + /// + /// If the schema is provided, it must contain only the fields in the file + /// without the table partitioning columns. + /// + /// # Example: Specifying Table Schema + /// ```rust + /// # use std::sync::Arc; + /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl}; + /// # use datafusion::datasource::file_format::parquet::ParquetFormat; + /// # use arrow::datatypes::{Schema, Field, DataType}; + /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap(); + /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default())); + /// let schema = Arc::new(Schema::new(vec![ + /// Field::new("id", DataType::Int64, false), + /// Field::new("name", DataType::Utf8, true), + /// ])); + /// + /// let config = ListingTableConfig::new(table_paths) + /// .with_listing_options(listing_options) // Set options first + /// .with_schema(schema); // Then set schema + /// ``` + pub fn with_schema(self, schema: SchemaRef) -> Self { + // Note: We preserve existing options state, but downstream code may expect + // options to be set. Consider calling with_listing_options() or infer_options() + // before operations that require options to be present. + debug_assert!( + self.options.is_some() || cfg!(test), + "ListingTableConfig::with_schema called without options set. \ + Consider calling with_listing_options() or infer_options() first to avoid panics in downstream code." + ); + + Self { + file_schema: Some(schema), + schema_source: SchemaSource::Specified, + ..self + } + } + + /// Add `listing_options` to [`ListingTableConfig`] + /// + /// If not provided, format and other options are inferred via + /// [`Self::infer_options`]. + /// + /// # Example: Configuring Parquet Files with Custom Options + /// ```rust + /// # use std::sync::Arc; + /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl}; + /// # use datafusion::datasource::file_format::parquet::ParquetFormat; + /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap(); + /// let options = ListingOptions::new(Arc::new(ParquetFormat::default())) + /// .with_file_extension(".parquet") + /// .with_collect_stat(true); + /// + /// let config = ListingTableConfig::new(table_paths) + /// .with_listing_options(options); // Configure file format and options + /// ``` + pub fn with_listing_options(self, listing_options: ListingOptions) -> Self { + // Note: This method properly sets options, but be aware that downstream + // methods like infer_schema() and try_new() require both schema and options + // to be set to function correctly. + debug_assert!( + !self.table_paths.is_empty() || cfg!(test), + "ListingTableConfig::with_listing_options called without table_paths set. \ + Consider calling new() or new_with_multi_paths() first to establish table paths." + ); + + Self { + options: Some(listing_options), + ..self + } + } + + /// Returns a tuple of `(file_extension, optional compression_extension)` + /// + /// For example a path ending with blah.test.csv.gz returns `("csv", Some("gz"))` + /// For example a path ending with blah.test.csv returns `("csv", None)` + fn infer_file_extension_and_compression_type( + path: &str, + ) -> Result<(String, Option)> { + let mut exts = path.rsplit('.'); + + let splitted = exts.next().unwrap_or(""); + + let file_compression_type = FileCompressionType::from_str(splitted) + .unwrap_or(FileCompressionType::UNCOMPRESSED); + + if file_compression_type.is_compressed() { + let splitted2 = exts.next().unwrap_or(""); + Ok((splitted2.to_string(), Some(splitted.to_string()))) + } else { + Ok((splitted.to_string(), None)) + } + } + + /// Infer `ListingOptions` based on `table_path` and file suffix. + /// + /// The format is inferred based on the first `table_path`. + pub async fn infer_options(self, state: &dyn Session) -> Result { + let store = if let Some(url) = self.table_paths.first() { + state.runtime_env().object_store(url)? + } else { + return Ok(self); + }; + + let file = self + .table_paths + .first() + .unwrap() + .list_all_files(state, store.as_ref(), "") + .await? + .next() + .await + .ok_or_else(|| DataFusionError::Internal("No files for table".into()))??; + + let (file_extension, maybe_compression_type) = + ListingTableConfig::infer_file_extension_and_compression_type( + file.location.as_ref(), + )?; + + let mut format_options = HashMap::new(); + if let Some(ref compression_type) = maybe_compression_type { + format_options + .insert("format.compression".to_string(), compression_type.clone()); + } + let state = state.as_any().downcast_ref::().unwrap(); + let file_format = state + .get_file_format_factory(&file_extension) + .ok_or(config_datafusion_err!( + "No file_format found with extension {file_extension}" + ))? + .create(state, &format_options)?; + + let listing_file_extension = + if let Some(compression_type) = maybe_compression_type { + format!("{}.{}", &file_extension, &compression_type) + } else { + file_extension + }; + + let listing_options = ListingOptions::new(file_format) + .with_file_extension(listing_file_extension) + .with_target_partitions(state.config().target_partitions()) + .with_collect_stat(state.config().collect_statistics()); + + Ok(Self { + table_paths: self.table_paths, + file_schema: self.file_schema, + options: Some(listing_options), + schema_source: self.schema_source, + schema_adapter_factory: self.schema_adapter_factory, + expr_adapter_factory: self.expr_adapter_factory, + }) + } + + /// Infer the [`SchemaRef`] based on `table_path`s. + /// + /// This method infers the table schema using the first `table_path`. + /// See [`ListingOptions::infer_schema`] for more details + /// + /// # Errors + /// * if `self.options` is not set. See [`Self::with_listing_options`] + pub async fn infer_schema(self, state: &dyn Session) -> Result { + match self.options { + Some(options) => { + let ListingTableConfig { + table_paths, + file_schema, + options: _, + schema_source, + schema_adapter_factory, + expr_adapter_factory: physical_expr_adapter_factory, + } = self; + + let (schema, new_schema_source) = match file_schema { + Some(schema) => (schema, schema_source), // Keep existing source if schema exists + None => { + if let Some(url) = table_paths.first() { + ( + options.infer_schema(state, url).await?, + SchemaSource::Inferred, + ) + } else { + (Arc::new(Schema::empty()), SchemaSource::Inferred) + } + } + }; + + Ok(Self { + table_paths, + file_schema: Some(schema), + options: Some(options), + schema_source: new_schema_source, + schema_adapter_factory, + expr_adapter_factory: physical_expr_adapter_factory, + }) + } + None => internal_err!("No `ListingOptions` set for inferring schema"), + } + } + + /// Convenience method to call both [`Self::infer_options`] and [`Self::infer_schema`] + pub async fn infer(self, state: &dyn Session) -> Result { + self.infer_options(state).await?.infer_schema(state).await + } + + /// Infer the partition columns from `table_paths`. + /// + /// # Errors + /// * if `self.options` is not set. See [`Self::with_listing_options`] + pub async fn infer_partitions_from_path(self, state: &dyn Session) -> Result { + match self.options { + Some(options) => { + let Some(url) = self.table_paths.first() else { + return config_err!("No table path found"); + }; + let partitions = options + .infer_partitions(state, url) + .await? + .into_iter() + .map(|col_name| { + ( + col_name, + DataType::Dictionary( + Box::new(DataType::UInt16), + Box::new(DataType::Utf8), + ), + ) + }) + .collect::>(); + let options = options.with_table_partition_cols(partitions); + Ok(Self { + table_paths: self.table_paths, + file_schema: self.file_schema, + options: Some(options), + schema_source: self.schema_source, + schema_adapter_factory: self.schema_adapter_factory, + expr_adapter_factory: self.expr_adapter_factory, + }) + } + None => config_err!("No `ListingOptions` set for inferring schema"), + } + } + + /// Set the [`SchemaAdapterFactory`] for the [`ListingTable`] + /// + /// The schema adapter factory is used to create schema adapters that can + /// handle schema evolution and type conversions when reading files with + /// different schemas than the table schema. + /// + /// If not provided, a default schema adapter factory will be used. + /// + /// # Example: Custom Schema Adapter for Type Coercion + /// ```rust + /// # use std::sync::Arc; + /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl}; + /// # use datafusion::datasource::schema_adapter::{SchemaAdapterFactory, SchemaAdapter}; + /// # use datafusion::datasource::file_format::parquet::ParquetFormat; + /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType}; + /// # + /// # #[derive(Debug)] + /// # struct MySchemaAdapterFactory; + /// # impl SchemaAdapterFactory for MySchemaAdapterFactory { + /// # fn create(&self, _projected_table_schema: SchemaRef, _file_schema: SchemaRef) -> Box { + /// # unimplemented!() + /// # } + /// # } + /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap(); + /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default())); + /// # let table_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)])); + /// let config = ListingTableConfig::new(table_paths) + /// .with_listing_options(listing_options) + /// .with_schema(table_schema) + /// .with_schema_adapter_factory(Arc::new(MySchemaAdapterFactory)); + /// ``` + pub fn with_schema_adapter_factory( + self, + schema_adapter_factory: Arc, + ) -> Self { + Self { + schema_adapter_factory: Some(schema_adapter_factory), + ..self + } + } + + /// Get the [`SchemaAdapterFactory`] for this configuration + pub fn schema_adapter_factory(&self) -> Option<&Arc> { + self.schema_adapter_factory.as_ref() + } + + /// Set the [`PhysicalExprAdapterFactory`] for the [`ListingTable`] + /// + /// The expression adapter factory is used to create physical expression adapters that can + /// handle schema evolution and type conversions when evaluating expressions + /// with different schemas than the table schema. + /// + /// If not provided, a default physical expression adapter factory will be used unless a custom + /// `SchemaAdapterFactory` is set, in which case only the `SchemaAdapterFactory` will be used. + /// + /// See for details on this transition. + pub fn with_expr_adapter_factory( + self, + expr_adapter_factory: Arc, + ) -> Self { + Self { + expr_adapter_factory: Some(expr_adapter_factory), + ..self + } + } +} + +/// Options for creating a [`ListingTable`] +#[derive(Clone, Debug)] +pub struct ListingOptions { + /// A suffix on which files should be filtered (leave empty to + /// keep all files on the path) + pub file_extension: String, + /// The file format + pub format: Arc, + /// The expected partition column names in the folder structure. + /// See [Self::with_table_partition_cols] for details + pub table_partition_cols: Vec<(String, DataType)>, + /// Set true to try to guess statistics from the files. + /// This can add a lot of overhead as it will usually require files + /// to be opened and at least partially parsed. + pub collect_stat: bool, + /// Group files to avoid that the number of partitions exceeds + /// this limit + pub target_partitions: usize, + /// Optional pre-known sort order(s). Must be `SortExpr`s. + /// + /// DataFusion may take advantage of this ordering to omit sorts + /// or use more efficient algorithms. Currently sortedness must be + /// provided if it is known by some external mechanism, but may in + /// the future be automatically determined, for example using + /// parquet metadata. + /// + /// See + /// + /// NOTE: This attribute stores all equivalent orderings (the outer `Vec`) + /// where each ordering consists of an individual lexicographic + /// ordering (encapsulated by a `Vec`). If there aren't + /// multiple equivalent orderings, the outer `Vec` will have a + /// single element. + pub file_sort_order: Vec>, + + pub files_metadata: Arc> +} + +impl ListingOptions { + /// Creates an options instance with the given format + /// Default values: + /// - use default file extension filter + /// - no input partition to discover + /// - one target partition + /// - do not collect statistics + pub fn new(format: Arc) -> Self { + Self { + file_extension: format.get_ext(), + format, + table_partition_cols: vec![], + collect_stat: false, + target_partitions: 1, + file_sort_order: vec![], + files_metadata: Arc::new(vec![]) + } + } + + /// Set options from [`SessionConfig`] and returns self. + /// + /// Currently this sets `target_partitions` and `collect_stat` + /// but if more options are added in the future that need to be coordinated + /// they will be synchronized thorugh this method. + pub fn with_session_config_options(mut self, config: &SessionConfig) -> Self { + self = self.with_target_partitions(config.target_partitions()); + self = self.with_collect_stat(config.collect_statistics()); + self + } + + /// Set file extension on [`ListingOptions`] and returns self. + /// + /// # Example + /// ``` + /// # use std::sync::Arc; + /// # use datafusion::prelude::SessionContext; + /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat}; + /// + /// let listing_options = ListingOptions::new(Arc::new( + /// ParquetFormat::default() + /// )) + /// .with_file_extension(".parquet"); + /// + /// assert_eq!(listing_options.file_extension, ".parquet"); + /// ``` + pub fn with_file_extension(mut self, file_extension: impl Into) -> Self { + self.file_extension = file_extension.into(); + self + } + + pub fn with_files_metadata(mut self, files_metadata: Arc>) -> Self { + self.files_metadata = files_metadata.clone(); + self + } + + /// Optionally set file extension on [`ListingOptions`] and returns self. + /// + /// If `file_extension` is `None`, the file extension will not be changed + /// + /// # Example + /// ``` + /// # use std::sync::Arc; + /// # use datafusion::prelude::SessionContext; + /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat}; + /// let extension = Some(".parquet"); + /// let listing_options = ListingOptions::new(Arc::new( + /// ParquetFormat::default() + /// )) + /// .with_file_extension_opt(extension); + /// + /// assert_eq!(listing_options.file_extension, ".parquet"); + /// ``` + pub fn with_file_extension_opt(mut self, file_extension: Option) -> Self + where + S: Into, + { + if let Some(file_extension) = file_extension { + self.file_extension = file_extension.into(); + } + self + } + + /// Set `table partition columns` on [`ListingOptions`] and returns self. + /// + /// "partition columns," used to support [Hive Partitioning], are + /// columns added to the data that is read, based on the folder + /// structure where the data resides. + /// + /// For example, give the following files in your filesystem: + /// + /// ```text + /// /mnt/nyctaxi/year=2022/month=01/tripdata.parquet + /// /mnt/nyctaxi/year=2021/month=12/tripdata.parquet + /// /mnt/nyctaxi/year=2021/month=11/tripdata.parquet + /// ``` + /// + /// A [`ListingTable`] created at `/mnt/nyctaxi/` with partition + /// columns "year" and "month" will include new `year` and `month` + /// columns while reading the files. The `year` column would have + /// value `2022` and the `month` column would have value `01` for + /// the rows read from + /// `/mnt/nyctaxi/year=2022/month=01/tripdata.parquet` + /// + ///# Notes + /// + /// - If only one level (e.g. `year` in the example above) is + /// specified, the other levels are ignored but the files are + /// still read. + /// + /// - Files that don't follow this partitioning scheme will be + /// ignored. + /// + /// - Since the columns have the same value for all rows read from + /// each individual file (such as dates), they are typically + /// dictionary encoded for efficiency. You may use + /// [`wrap_partition_type_in_dict`] to request a + /// dictionary-encoded type. + /// + /// - The partition columns are solely extracted from the file path. Especially they are NOT part of the parquet files itself. + /// + /// # Example + /// + /// ``` + /// # use std::sync::Arc; + /// # use arrow::datatypes::DataType; + /// # use datafusion::prelude::col; + /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat}; + /// + /// // listing options for files with paths such as `/mnt/data/col_a=x/col_b=y/data.parquet` + /// // `col_a` and `col_b` will be included in the data read from those files + /// let listing_options = ListingOptions::new(Arc::new( + /// ParquetFormat::default() + /// )) + /// .with_table_partition_cols(vec![("col_a".to_string(), DataType::Utf8), + /// ("col_b".to_string(), DataType::Utf8)]); + /// + /// assert_eq!(listing_options.table_partition_cols, vec![("col_a".to_string(), DataType::Utf8), + /// ("col_b".to_string(), DataType::Utf8)]); + /// ``` + /// + /// [Hive Partitioning]: https://docs.cloudera.com/HDPDocuments/HDP2/HDP-2.1.3/bk_system-admin-guide/content/hive_partitioned_tables.html + /// [`wrap_partition_type_in_dict`]: crate::datasource::physical_plan::wrap_partition_type_in_dict + pub fn with_table_partition_cols( + mut self, + table_partition_cols: Vec<(String, DataType)>, + ) -> Self { + self.table_partition_cols = table_partition_cols; + self + } + + /// Set stat collection on [`ListingOptions`] and returns self. + /// + /// ``` + /// # use std::sync::Arc; + /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat}; + /// + /// let listing_options = ListingOptions::new(Arc::new( + /// ParquetFormat::default() + /// )) + /// .with_collect_stat(true); + /// + /// assert_eq!(listing_options.collect_stat, true); + /// ``` + pub fn with_collect_stat(mut self, collect_stat: bool) -> Self { + self.collect_stat = collect_stat; + self + } + + /// Set number of target partitions on [`ListingOptions`] and returns self. + /// + /// ``` + /// # use std::sync::Arc; + /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat}; + /// + /// let listing_options = ListingOptions::new(Arc::new( + /// ParquetFormat::default() + /// )) + /// .with_target_partitions(8); + /// + /// assert_eq!(listing_options.target_partitions, 8); + /// ``` + pub fn with_target_partitions(mut self, target_partitions: usize) -> Self { + self.target_partitions = target_partitions; + self + } + + /// Set file sort order on [`ListingOptions`] and returns self. + /// + /// ``` + /// # use std::sync::Arc; + /// # use datafusion::prelude::col; + /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat}; + /// + /// // Tell datafusion that the files are sorted by column "a" + /// let file_sort_order = vec![vec![ + /// col("a").sort(true, true) + /// ]]; + /// + /// let listing_options = ListingOptions::new(Arc::new( + /// ParquetFormat::default() + /// )) + /// .with_file_sort_order(file_sort_order.clone()); + /// + /// assert_eq!(listing_options.file_sort_order, file_sort_order); + /// ``` + pub fn with_file_sort_order(mut self, file_sort_order: Vec>) -> Self { + self.file_sort_order = file_sort_order; + self + } + + /// Infer the schema of the files at the given path on the provided object store. + /// + /// If the table_path contains one or more files (i.e. it is a directory / + /// prefix of files) their schema is merged by calling [`FileFormat::infer_schema`] + /// + /// Note: The inferred schema does not include any partitioning columns. + /// + /// This method is called as part of creating a [`ListingTable`]. + pub async fn infer_schema<'a>( + &'a self, + state: &dyn Session, + table_path: &'a ListingTableUrl, + ) -> Result { + let store = state.runtime_env().object_store(table_path)?; + + let files: Vec<_> = table_path + .list_all_files(state, store.as_ref(), &self.file_extension) + .await? + // Empty files cannot affect schema but may throw when trying to read for it + .try_filter(|object_meta| future::ready(object_meta.size > 0)) + .try_collect() + .await?; + + let schema = self.format.infer_schema(state, &store, &files).await?; + + Ok(schema) + } + + /// Infers the partition columns stored in `LOCATION` and compares + /// them with the columns provided in `PARTITIONED BY` to help prevent + /// accidental corrupts of partitioned tables. + /// + /// Allows specifying partial partitions. + pub async fn validate_partitions( + &self, + state: &dyn Session, + table_path: &ListingTableUrl, + ) -> Result<()> { + if self.table_partition_cols.is_empty() { + return Ok(()); + } + + if !table_path.is_collection() { + return plan_err!( + "Can't create a partitioned table backed by a single file, \ + perhaps the URL is missing a trailing slash?" + ); + } + + let inferred = self.infer_partitions(state, table_path).await?; + + // no partitioned files found on disk + if inferred.is_empty() { + return Ok(()); + } + + let table_partition_names = self + .table_partition_cols + .iter() + .map(|(col_name, _)| col_name.clone()) + .collect_vec(); + + if inferred.len() < table_partition_names.len() { + return plan_err!( + "Inferred partitions to be {:?}, but got {:?}", + inferred, + table_partition_names + ); + } + + // match prefix to allow creating tables with partial partitions + for (idx, col) in table_partition_names.iter().enumerate() { + if &inferred[idx] != col { + return plan_err!( + "Inferred partitions to be {:?}, but got {:?}", + inferred, + table_partition_names + ); + } + } + + Ok(()) + } + + /// Infer the partitioning at the given path on the provided object store. + /// For performance reasons, it doesn't read all the files on disk + /// and therefore may fail to detect invalid partitioning. + pub(crate) async fn infer_partitions( + &self, + state: &dyn Session, + table_path: &ListingTableUrl, + ) -> Result> { + let store = state.runtime_env().object_store(table_path)?; + + // only use 10 files for inference + // This can fail to detect inconsistent partition keys + // A DFS traversal approach of the store can help here + let files: Vec<_> = table_path + .list_all_files(state, store.as_ref(), &self.file_extension) + .await? + .take(10) + .try_collect() + .await?; + + let stripped_path_parts = files.iter().map(|file| { + table_path + .strip_prefix(&file.location) + .unwrap() + .collect_vec() + }); + + let partition_keys = stripped_path_parts + .map(|path_parts| { + path_parts + .into_iter() + .rev() + .skip(1) // get parents only; skip the file itself + .rev() + .map(|s| s.split('=').take(1).collect()) + .collect_vec() + }) + .collect_vec(); + + match partition_keys.into_iter().all_equal_value() { + Ok(v) => Ok(v), + Err(None) => Ok(vec![]), + Err(Some(diff)) => { + let mut sorted_diff = [diff.0, diff.1]; + sorted_diff.sort(); + plan_err!("Found mixed partition values on disk {:?}", sorted_diff) + } + } + } +} + +/// Reads data from one or more files as a single table. +/// +/// Implements [`TableProvider`], a DataFusion data source. The files are read +/// using an [`ObjectStore`] instance, for example from local files or objects +/// from AWS S3. +/// +/// # Reading Directories +/// For example, given the `table1` directory (or object store prefix) +/// +/// ```text +/// table1 +/// ├── file1.parquet +/// └── file2.parquet +/// ``` +/// +/// A `ListingTable` would read the files `file1.parquet` and `file2.parquet` as +/// a single table, merging the schemas if the files have compatible but not +/// identical schemas. +/// +/// Given the `table2` directory (or object store prefix) +/// +/// ```text +/// table2 +/// ├── date=2024-06-01 +/// │ ├── file3.parquet +/// │ └── file4.parquet +/// └── date=2024-06-02 +/// └── file5.parquet +/// ``` +/// +/// A `ListingTable` would read the files `file3.parquet`, `file4.parquet`, and +/// `file5.parquet` as a single table, again merging schemas if necessary. +/// +/// Given the hive style partitioning structure (e.g,. directories named +/// `date=2024-06-01` and `date=2026-06-02`), `ListingTable` also adds a `date` +/// column when reading the table: +/// * The files in `table2/date=2024-06-01` will have the value `2024-06-01` +/// * The files in `table2/date=2024-06-02` will have the value `2024-06-02`. +/// +/// If the query has a predicate like `WHERE date = '2024-06-01'` +/// only the corresponding directory will be read. +/// +/// `ListingTable` also supports limit, filter and projection pushdown for formats that +/// support it as such as Parquet. +/// +/// # See Also +/// +/// 1. [`ListingTableConfig`]: Configuration options +/// 1. [`DataSourceExec`]: `ExecutionPlan` used by `ListingTable` +/// +/// [`DataSourceExec`]: crate::datasource::source::DataSourceExec +/// +/// # Example: Read a directory of parquet files using a [`ListingTable`] +/// +/// ```no_run +/// # use datafusion::prelude::SessionContext; +/// # use datafusion::error::Result; +/// # use std::sync::Arc; +/// # use datafusion::datasource::{ +/// # listing::{ +/// # ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl, +/// # }, +/// # file_format::parquet::ParquetFormat, +/// # }; +/// # #[tokio::main] +/// # async fn main() -> Result<()> { +/// let ctx = SessionContext::new(); +/// let session_state = ctx.state(); +/// let table_path = "/path/to/parquet"; +/// +/// // Parse the path +/// let table_path = ListingTableUrl::parse(table_path)?; +/// +/// // Create default parquet options +/// let file_format = ParquetFormat::new(); +/// let listing_options = ListingOptions::new(Arc::new(file_format)) +/// .with_file_extension(".parquet"); +/// +/// // Resolve the schema +/// let resolved_schema = listing_options +/// .infer_schema(&session_state, &table_path) +/// .await?; +/// +/// let config = ListingTableConfig::new(table_path) +/// .with_listing_options(listing_options) +/// .with_schema(resolved_schema); +/// +/// // Create a new TableProvider +/// let provider = Arc::new(ListingTable::try_new(config)?); +/// +/// // This provider can now be read as a dataframe: +/// let df = ctx.read_table(provider.clone()); +/// +/// // or registered as a named table: +/// ctx.register_table("my_table", provider); +/// +/// # Ok(()) +/// # } +/// ``` +#[derive(Debug, Clone)] +pub struct ListingTable { + table_paths: Vec, + /// `file_schema` contains only the columns physically stored in the data files themselves. + /// - Represents the actual fields found in files like Parquet, CSV, etc. + /// - Used when reading the raw data from files + file_schema: SchemaRef, + /// `table_schema` combines `file_schema` + partition columns + /// - Partition columns are derived from directory paths (not stored in files) + /// - These are columns like "year=2022/month=01" in paths like `/data/year=2022/month=01/file.parquet` + table_schema: SchemaRef, + /// Indicates how the schema was derived (inferred or explicitly specified) + schema_source: SchemaSource, + options: ListingOptions, + definition: Option, + collected_statistics: FileStatisticsCache, + constraints: Constraints, + column_defaults: HashMap, + /// Optional [`SchemaAdapterFactory`] for creating schema adapters + schema_adapter_factory: Option>, + /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters + expr_adapter_factory: Option>, +} + +impl ListingTable { + /// Create new [`ListingTable`] + /// + /// See documentation and example on [`ListingTable`] and [`ListingTableConfig`] + pub fn try_new(config: ListingTableConfig) -> Result { + // Extract schema_source before moving other parts of the config + let schema_source = config.schema_source(); + + let file_schema = config + .file_schema + .ok_or_else(|| DataFusionError::Internal("No schema provided.".into()))?; + + let options = config.options.ok_or_else(|| { + DataFusionError::Internal("No ListingOptions provided".into()) + })?; + + // Add the partition columns to the file schema + let mut builder = SchemaBuilder::from(file_schema.as_ref().to_owned()); + for (part_col_name, part_col_type) in &options.table_partition_cols { + builder.push(Field::new(part_col_name, part_col_type.clone(), false)); + } + + let table_schema = Arc::new( + builder + .finish() + .with_metadata(file_schema.metadata().clone()), + ); + + let table = Self { + table_paths: config.table_paths, + file_schema, + table_schema, + schema_source, + options, + definition: None, + collected_statistics: Arc::new(DefaultFileStatisticsCache::default()), + constraints: Constraints::default(), + column_defaults: HashMap::new(), + schema_adapter_factory: config.schema_adapter_factory, + expr_adapter_factory: config.expr_adapter_factory, + }; + + Ok(table) + } + + /// Assign constraints + pub fn with_constraints(mut self, constraints: Constraints) -> Self { + self.constraints = constraints; + self + } + + /// Assign column defaults + pub fn with_column_defaults( + mut self, + column_defaults: HashMap, + ) -> Self { + self.column_defaults = column_defaults; + self + } + + /// Set the [`FileStatisticsCache`] used to cache parquet file statistics. + /// + /// Setting a statistics cache on the `SessionContext` can avoid refetching statistics + /// multiple times in the same session. + /// + /// If `None`, creates a new [`DefaultFileStatisticsCache`] scoped to this query. + pub fn with_cache(mut self, cache: Option) -> Self { + self.collected_statistics = + cache.unwrap_or_else(|| Arc::new(DefaultFileStatisticsCache::default())); + self + } + + /// Specify the SQL definition for this table, if any + pub fn with_definition(mut self, definition: Option) -> Self { + self.definition = definition; + self + } + + /// Get paths ref + pub fn table_paths(&self) -> &Vec { + &self.table_paths + } + + /// Get options ref + pub fn options(&self) -> &ListingOptions { + &self.options + } + + /// Get the schema source + pub fn schema_source(&self) -> SchemaSource { + self.schema_source + } + + /// Set the [`SchemaAdapterFactory`] for this [`ListingTable`] + /// + /// The schema adapter factory is used to create schema adapters that can + /// handle schema evolution and type conversions when reading files with + /// different schemas than the table schema. + /// + /// # Example: Adding Schema Evolution Support + /// ```rust + /// # use std::sync::Arc; + /// # use datafusion::datasource::listing::{ListingTable, ListingTableConfig, ListingOptions, ListingTableUrl}; + /// # use datafusion::datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter}; + /// # use datafusion::datasource::file_format::parquet::ParquetFormat; + /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType}; + /// # let table_path = ListingTableUrl::parse("file:///path/to/data").unwrap(); + /// # let options = ListingOptions::new(Arc::new(ParquetFormat::default())); + /// # let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)])); + /// # let config = ListingTableConfig::new(table_path).with_listing_options(options).with_schema(schema); + /// # let table = ListingTable::try_new(config).unwrap(); + /// let table_with_evolution = table + /// .with_schema_adapter_factory(Arc::new(DefaultSchemaAdapterFactory)); + /// ``` + /// See [`ListingTableConfig::with_schema_adapter_factory`] for an example of custom SchemaAdapterFactory. + pub fn with_schema_adapter_factory( + self, + schema_adapter_factory: Arc, + ) -> Self { + Self { + schema_adapter_factory: Some(schema_adapter_factory), + ..self + } + } + + /// Get the [`SchemaAdapterFactory`] for this table + pub fn schema_adapter_factory(&self) -> Option<&Arc> { + self.schema_adapter_factory.as_ref() + } + + /// Creates a schema adapter for mapping between file and table schemas + /// + /// Uses the configured schema adapter factory if available, otherwise falls back + /// to the default implementation. + fn create_schema_adapter(&self) -> Box { + let table_schema = self.schema(); + match &self.schema_adapter_factory { + Some(factory) => { + factory.create_with_projected_schema(Arc::clone(&table_schema)) + } + None => DefaultSchemaAdapterFactory::from_schema(Arc::clone(&table_schema)), + } + } + + /// Creates a file source and applies schema adapter factory if available + fn create_file_source_with_schema_adapter(&self) -> Result> { + let mut source = self.options.format.file_source(); + // Apply schema adapter to source if available + // + // The source will use this SchemaAdapter to adapt data batches as they flow up the plan. + // Note: ListingTable also creates a SchemaAdapter in `scan()` but that is only used to adapt collected statistics. + if let Some(factory) = &self.schema_adapter_factory { + source = source.with_schema_adapter_factory(Arc::clone(factory))?; + } + Ok(source) + } + + /// If file_sort_order is specified, creates the appropriate physical expressions + fn try_create_output_ordering(&self) -> Result> { + create_ordering(&self.table_schema, &self.options.file_sort_order) + } + + fn add_path_preserving_metadata(&self, file_groups: Vec) -> Result, DataFusionError> { + // First pass: calculate cumulative row bases + let mut cumulative_row_base = 0; + let mut file_row_bases: HashMap = HashMap::new(); + + // Process files in order to calculate cumulative row bases + for group in &file_groups { + for file in group.files() { + let location = file.object_meta.location.to_string(); + let row_count = self.options.files_metadata.iter() + .find(|meta| { location.contains(meta.object_meta.location.as_ref()) }) + .map(|meta| meta.row_group_row_counts().iter().sum::() as i32) + // .unwrap_or_default(); + .expect(format!("Fail to get row count for file {}", location).as_str()); + + // Store current cumulative value as this file's row_base + file_row_bases.insert(location.to_string(), cumulative_row_base); + // Update cumulative count for next file + cumulative_row_base += row_count; + } + } + + // Second pass: create new file groups with calculated row_bases + Ok(file_groups + .into_iter() + .map(|mut group| { + let new_files: Vec = group + .files() + .iter() + .map(|file| { + let location = file.object_meta.location.as_ref(); + let row_base = *file_row_bases.get(location).unwrap_or(&0); + + PartitionedFile { + object_meta: file.object_meta.clone(), + partition_values: { + let mut values = file.partition_values.clone(); + values.push(ScalarValue::Int32(Some(row_base))); + values + }, + range: file.range.clone(), + statistics: file.statistics.clone(), + extensions: file.extensions.clone(), + metadata_size_hint: file.metadata_size_hint, + } + }) + .collect(); + + FileGroup::new(new_files) + .with_statistics(Arc::new(group.statistics_mut().cloned().unwrap_or_default())) + }) + .collect()) + } +} + +// Expressions can be used for parttion pruning if they can be evaluated using +// only the partiton columns and there are partition columns. +fn can_be_evaluted_for_partition_pruning( + partition_column_names: &[&str], + expr: &Expr, +) -> bool { + !partition_column_names.is_empty() + && expr_applicable_for_cols(partition_column_names, expr) +} + +#[async_trait] +impl TableProvider for ListingTable { + fn as_any(&self) -> &dyn Any { + self + } + + fn schema(&self) -> SchemaRef { + Arc::clone(&self.table_schema) + } + + fn constraints(&self) -> Option<&Constraints> { + Some(&self.constraints) + } + + fn table_type(&self) -> TableType { + TableType::Base + } + + + + async fn scan( + &self, + state: &dyn Session, + projection: Option<&Vec>, + filters: &[Expr], + limit: Option, + ) -> Result> { + // extract types of partition columns + let table_partition_cols = self + .options + .table_partition_cols + .iter() + .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone())) + .collect::>>()?; + + // let table_partition_col_names = table_partition_cols + // .iter() + // .map(|field| field.name().as_str()) + // .collect::>(); + // // If the filters can be resolved using only partition cols, there is no need to + // // pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated + // let (partition_filters, filters): (Vec<_>, Vec<_>) = + // filters.iter().cloned().partition(|filter| { + // can_be_evaluted_for_partition_pruning(&table_partition_col_names, filter) + // }); + + // We should not limit the number of partitioned files to scan if there are filters and limit + // at the same time. This is because the limit should be applied after the filters are applied. + let statistic_file_limit = if filters.is_empty() { limit } else { None }; + + let (mut partitioned_file_lists, statistics) = self + .list_files_for_scan(state, &vec![], statistic_file_limit) + .await?; + // + // let (mut partitioned_file_lists, statistics) = self + // .list_files_for_scan(state, &partition_filters, statistic_file_limit) + // .await?; + + // if no files need to be read, return an `EmptyExec` + if partitioned_file_lists.is_empty() { + let projected_schema = project_schema(&self.schema(), projection)?; + return Ok(Arc::new(EmptyExec::new(projected_schema))); + } + + partitioned_file_lists = self.add_path_preserving_metadata(partitioned_file_lists).expect("Unable to update Metadata for partitioned files"); + + let output_ordering = self.try_create_output_ordering()?; + match state + .config_options() + .execution + .split_file_groups_by_statistics + .then(|| { + output_ordering.first().map(|output_ordering| { + FileScanConfig::split_groups_by_statistics_with_target_partitions( + &self.table_schema, + &partitioned_file_lists, + output_ordering, + self.options.target_partitions, + ) + }) + }) + .flatten() + { + Some(Err(e)) => log::debug!("failed to split file groups by statistics: {e}"), + Some(Ok(new_groups)) => { + if new_groups.len() <= self.options.target_partitions { + partitioned_file_lists = new_groups; + } else { + log::debug!("attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered") + } + } + None => {} // no ordering required + }; + + let Some(object_store_url) = + self.table_paths.first().map(ListingTableUrl::object_store) + else { + return Ok(Arc::new(EmptyExec::new(Arc::new(Schema::empty())))); + }; + + let file_source = self.create_file_source_with_schema_adapter()?; + + // create the execution plan + self.options + .format + .create_physical_plan( + state, + FileScanConfigBuilder::new( + object_store_url, + Arc::clone(&self.file_schema), + file_source, + ) + .with_file_groups(partitioned_file_lists) + .with_constraints(self.constraints.clone()) + .with_statistics(statistics) + .with_projection(projection.cloned()) + .with_limit(limit) + .with_output_ordering(output_ordering) + .with_table_partition_cols(table_partition_cols) + .with_expr_adapter(self.expr_adapter_factory.clone()) + .build(), + ) + .await + } + + fn supports_filters_pushdown( + &self, + filters: &[&Expr], + ) -> Result> { + let partition_column_names = self + .options + .table_partition_cols + .iter() + .map(|col| col.0.as_str()) + .collect::>(); + filters + .iter() + .map(|filter| { + if can_be_evaluted_for_partition_pruning(&partition_column_names, filter) + { + // if filter can be handled by partition pruning, it is exact + return Ok(TableProviderFilterPushDown::Exact); + } + + Ok(TableProviderFilterPushDown::Inexact) + }) + .collect() + } + + fn get_table_definition(&self) -> Option<&str> { + self.definition.as_deref() + } + + async fn insert_into( + &self, + state: &dyn Session, + input: Arc, + insert_op: InsertOp, + ) -> Result> { + // Check that the schema of the plan matches the schema of this table. + self.schema() + .logically_equivalent_names_and_types(&input.schema())?; + + let table_path = &self.table_paths()[0]; + if !table_path.is_collection() { + return plan_err!( + "Inserting into a ListingTable backed by a single file is not supported, URL is possibly missing a trailing `/`. \ + To append to an existing file use StreamTable, e.g. by using CREATE UNBOUNDED EXTERNAL TABLE" + ); + } + + // Get the object store for the table path. + let store = state.runtime_env().object_store(table_path)?; + + let file_list_stream = pruned_partition_list( + state, + store.as_ref(), + table_path, + &[], + &self.options.file_extension, + &self.options.table_partition_cols, + ) + .await?; + + let file_group = file_list_stream.try_collect::>().await?.into(); + let keep_partition_by_columns = + state.config_options().execution.keep_partition_by_columns; + + // Sink related option, apart from format + let config = FileSinkConfig { + original_url: String::default(), + object_store_url: self.table_paths()[0].object_store(), + table_paths: self.table_paths().clone(), + file_group, + output_schema: self.schema(), + table_partition_cols: self.options.table_partition_cols.clone(), + insert_op, + keep_partition_by_columns, + file_extension: self.options().format.get_ext(), + }; + + let orderings = self.try_create_output_ordering()?; + // It is sufficient to pass only one of the equivalent orderings: + let order_requirements = orderings.into_iter().next().map(Into::into); + + self.options() + .format + .create_writer_physical_plan(input, state, config, order_requirements) + .await + } + + fn get_column_default(&self, column: &str) -> Option<&Expr> { + self.column_defaults.get(column) + } +} + +impl ListingTable { + /// Get the list of files for a scan as well as the file level statistics. + /// The list is grouped to let the execution plan know how the files should + /// be distributed to different threads / executors. + async fn list_files_for_scan<'a>( + &'a self, + ctx: &'a dyn Session, + filters: &'a [Expr], + limit: Option, + ) -> Result<(Vec, Statistics)> { + let store = if let Some(url) = self.table_paths.first() { + ctx.runtime_env().object_store(url)? + } else { + return Ok((vec![], Statistics::new_unknown(&self.file_schema))); + }; + // list files (with partitions) + let table_partition_cols: Vec<(String, DataType)> = vec![]; // Passing empty partition cols as current partition cols are not mapped to directory path + let file_list = future::try_join_all(self.table_paths.iter().map(|table_path| { + pruned_partition_list( + ctx, + store.as_ref(), + table_path, + filters, + &self.options.file_extension, + &table_partition_cols, + ) + })) + .await?; + let meta_fetch_concurrency = + ctx.config_options().execution.meta_fetch_concurrency; + let file_list = stream::iter(file_list).flatten_unordered(meta_fetch_concurrency); + // collect the statistics if required by the config + let files = file_list + .map(|part_file| async { + let part_file = part_file?; + let statistics = if self.options.collect_stat { + self.do_collect_statistics(ctx, &store, &part_file).await? + } else { + Arc::new(Statistics::new_unknown(&self.file_schema)) + }; + Ok(part_file.with_statistics(statistics)) + }) + .boxed() + .buffer_unordered(ctx.config_options().execution.meta_fetch_concurrency); + + let (file_group, inexact_stats) = + get_files_with_limit(files, limit, self.options.collect_stat).await?; + + let file_groups = file_group.split_files(self.options.target_partitions); + let (mut file_groups, mut stats) = compute_all_files_statistics( + file_groups, + self.schema(), + self.options.collect_stat, + inexact_stats, + )?; + + let schema_adapter = self.create_schema_adapter(); + let (schema_mapper, _) = schema_adapter.map_schema(self.file_schema.as_ref())?; + + stats.column_statistics = + schema_mapper.map_column_statistics(&stats.column_statistics)?; + file_groups.iter_mut().try_for_each(|file_group| { + if let Some(stat) = file_group.statistics_mut() { + stat.column_statistics = + schema_mapper.map_column_statistics(&stat.column_statistics)?; + } + Ok::<_, DataFusionError>(()) + })?; + Ok((file_groups, stats)) + } + + /// Collects statistics for a given partitioned file. + /// + /// This method first checks if the statistics for the given file are already cached. + /// If they are, it returns the cached statistics. + /// If they are not, it infers the statistics from the file and stores them in the cache. + async fn do_collect_statistics( + &self, + ctx: &dyn Session, + store: &Arc, + part_file: &PartitionedFile, + ) -> Result> { + match self + .collected_statistics + .get_with_extra(&part_file.object_meta.location, &part_file.object_meta) + { + Some(statistics) => Ok(statistics), + None => { + let statistics = self + .options + .format + .infer_stats( + ctx, + store, + Arc::clone(&self.file_schema), + &part_file.object_meta, + ) + .await?; + let statistics = Arc::new(statistics); + self.collected_statistics.put_with_extra( + &part_file.object_meta.location, + Arc::clone(&statistics), + &part_file.object_meta, + ); + Ok(statistics) + } + } + } +} + +/// Processes a stream of partitioned files and returns a `FileGroup` containing the files. +/// +/// This function collects files from the provided stream until either: +/// 1. The stream is exhausted +/// 2. The accumulated number of rows exceeds the provided `limit` (if specified) +/// +/// # Arguments +/// * `files` - A stream of `Result` items to process +/// * `limit` - An optional row count limit. If provided, the function will stop collecting files +/// once the accumulated number of rows exceeds this limit +/// * `collect_stats` - Whether to collect and accumulate statistics from the files +/// +/// # Returns +/// A `Result` containing a `FileGroup` with the collected files +/// and a boolean indicating whether the statistics are inexact. +/// +/// # Note +/// The function will continue processing files if statistics are not available or if the +/// limit is not provided. If `collect_stats` is false, statistics won't be accumulated +/// but files will still be collected. +async fn get_files_with_limit( + files: impl Stream>, + limit: Option, + collect_stats: bool, +) -> Result<(FileGroup, bool)> { + let mut file_group = FileGroup::default(); + // Fusing the stream allows us to call next safely even once it is finished. + let mut all_files = Box::pin(files.fuse()); + enum ProcessingState { + ReadingFiles, + ReachedLimit, + } + + let mut state = ProcessingState::ReadingFiles; + let mut num_rows = Precision::Absent; + + while let Some(file_result) = all_files.next().await { + // Early exit if we've already reached our limit + if matches!(state, ProcessingState::ReachedLimit) { + break; + } + + let file = file_result?; + + // Update file statistics regardless of state + if collect_stats { + if let Some(file_stats) = &file.statistics { + num_rows = if file_group.is_empty() { + // For the first file, just take its row count + file_stats.num_rows + } else { + // For subsequent files, accumulate the counts + num_rows.add(&file_stats.num_rows) + }; + } + } + + // Always add the file to our group + file_group.push(file); + + // Check if we've hit the limit (if one was specified) + if let Some(limit) = limit { + if let Precision::Exact(row_count) = num_rows { + if row_count > limit { + state = ProcessingState::ReachedLimit; + } + } + } + } + // If we still have files in the stream, it means that the limit kicked + // in, and the statistic could have been different had we processed the + // files in a different order. + let inexact_stats = all_files.next().await.is_some(); + Ok((file_group, inexact_stats)) +} + diff --git a/plugins/engine-datafusion/jni/src/row_id_optimizer.rs b/plugins/engine-datafusion/jni/src/row_id_optimizer.rs new file mode 100644 index 0000000000000..2e57f8d1b064a --- /dev/null +++ b/plugins/engine-datafusion/jni/src/row_id_optimizer.rs @@ -0,0 +1,219 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ +use std::fs; +use std::sync::Arc; +use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRecursion}; +use datafusion::config::ConfigOptions; +use datafusion::datasource::physical_plan::{FileScanConfig, FileScanConfigBuilder}; +use datafusion::datasource::source::DataSourceExec; +use datafusion::error::DataFusionError; +use datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; +use datafusion::physical_optimizer::PhysicalOptimizerRule; +use datafusion::physical_plan::ExecutionPlan; +use datafusion::physical_plan::filter::FilterExec; +use arrow::datatypes::{DataType, Field, Fields, Schema}; +use arrow_schema::SchemaRef; +use datafusion::logical_expr::Operator; +use datafusion::physical_expr::PhysicalExpr; +use datafusion::physical_expr::expressions::{BinaryExpr, Column}; +use datafusion::physical_plan::projection::ProjectionExec; + +#[derive(Debug)] +pub struct FilterRowIdOptimizer; + +impl FilterRowIdOptimizer { + + fn get_projection_exec_for_data_source_exec(&self, datasource_exec: &DataSourceExec, schema: SchemaRef) -> ProjectionExec { + let mut datasource = datasource_exec.data_source().as_ref().as_any().downcast_ref::().expect("DataSource not found"); + // let _ = datasource.projection.insert(vec![0]); + let mut new_projections = datasource.clone().projection.clone().unwrap(); + + let file_schema = ParquetRecordBatchReaderBuilder::try_new(fs::File::open("/".to_owned() + &datasource.file_groups[0].files()[0].path().to_string()).unwrap()).expect("FileSchema not found for file group"); + new_projections.push(file_schema.schema().fields().len()); + + let mut fields = schema.fields().clone().to_vec(); + fields.insert(fields.len(), Arc::new(Field::new("row_base", DataType::Int32, true))); + let new_schema = Arc::new(Schema { metadata: schema.metadata().clone(), fields: Fields::from(fields) }); + + let file_scan_config = + FileScanConfigBuilder::from(datasource.clone()) + .with_source(datasource.clone().file_source.with_schema(new_schema.clone())) + .with_projection(Some(new_projections.clone())) + .build(); + + let new_datasource = DataSourceExec::from_data_source(file_scan_config); + + // 3. Create ProjectionExec for sum operation + let mut projection_exprs: Vec<(Arc, String)> = vec![]; + + // Get indices from filter's schema + let row_id_idx = new_schema.index_of("___row_id").expect("Field ___row_id not found in FileSchema"); + let row_base_idx = new_schema.index_of("row_base").expect("Field row_base not found in FileSchema"); + + // Create sum expression + let row_id_col = Arc::new(Column::new("___row_id", row_id_idx)); + let row_base_col = Arc::new(Column::new("row_base", row_base_idx)); + let sum_expr = Arc::new(BinaryExpr::new( + row_id_col, + Operator::Plus, + row_base_col, + )); + + // IMP: order of projections matters, should be same as schema column order + // Add other columns at the end of list + for field in schema.fields() { + if field.name() != "___row_id" && field.name() != "row_base" { + let idx = new_schema.index_of(field.name()).unwrap(); + projection_exprs.push(( + Arc::new(Column::new(field.name(), idx)), + field.name().to_string(), + )); + } else { + // Add sum expression as ___row_id + projection_exprs.push((sum_expr.clone(), "___row_id".to_string())); + } + } + + // Create final ProjectionExec + let projection = ProjectionExec::try_new( + projection_exprs, + new_datasource, + ).expect("Unable to create ProjectionExec"); + + projection + } + + fn get_projection_exec_for_filter_exec(&self, datasource_exec: &DataSourceExec, filter: &FilterExec, schema: SchemaRef) -> ProjectionExec { + let datasource = datasource_exec.data_source().as_ref().as_any().downcast_ref::().expect("DataSource not found"); + // let _ = datasource.projection.insert(vec![0]); + let mut new_projections = datasource.clone().projection.clone().unwrap(); + let file_schema = ParquetRecordBatchReaderBuilder::try_new(fs::File::open("/".to_owned() + &datasource.file_groups[0].files()[0].path().to_string()).unwrap()).expect("FileSchema not found for file group"); + + new_projections.push(file_schema.schema().fields().len()); + + let mut fields = schema.fields().clone().to_vec(); + fields.insert(fields.len(), Arc::new(Field::new("row_base", DataType::Int32, true))); + let new_schema = Arc::new(Schema { metadata: schema.metadata().clone(), fields: Fields::from(fields) }); + + let file_scan_config = FileScanConfigBuilder::from(datasource.clone()) + .with_source(datasource.clone().file_source.with_schema(new_schema.clone())) + .with_projection(Some(new_projections.clone())) + .build(); + + let new_datasource = DataSourceExec::from_data_source(file_scan_config); + + // 2. Create new FilterExec with updated input schema + let new_filter = FilterExec::try_new( + filter.predicate().clone(), + new_datasource.clone(), + ).expect("Unable to create FilterExec"); + // 3. Create ProjectionExec for sum operation + let mut projection_exprs: Vec<(Arc, String)> = vec![]; + + // Get indices from filter's schema + let row_id_idx = new_schema.index_of("___row_id").expect("Field ___row_id not found in FileSchema"); + let row_base_idx = new_schema.index_of("row_base").expect("Field row_base not found in FileSchema"); + + // Create sum expression + let row_id_col = Arc::new(Column::new("___row_id", row_id_idx)); + let row_base_col = Arc::new(Column::new("row_base", row_base_idx)); + let sum_expr = Arc::new(BinaryExpr::new( + row_id_col, + Operator::Plus, + row_base_col, + )); + + // IMP: order of projections matters, should be same as schema column order + // Add other columns at the end of list + for field in schema.fields() { + if field.name() != "___row_id" && field.name() != "row_base" { + let idx = new_schema.index_of(field.name()).unwrap(); + projection_exprs.push(( + Arc::new(Column::new(field.name(), idx)), + field.name().to_string(), + )); + } else { + // Add sum expression as ___row_id + projection_exprs.push((sum_expr.clone(), "___row_id".to_string())); + } + } + // println!("projection_exprs :{:?}", projection_exprs); + + // Create final ProjectionExec + let projection = ProjectionExec::try_new( + projection_exprs, + Arc::new(new_filter), + ).expect("Unable to create ProjectionExec"); + + projection + } +} + +impl PhysicalOptimizerRule for FilterRowIdOptimizer { + fn optimize( + &self, + plan: Arc, + _config: &ConfigOptions, + ) -> Result, DataFusionError> { + let mut is_optimized = false; + let rewritten = plan.transform_up(|node| { + if let Some(filter_exec) = node.as_any().downcast_ref::() { + // Check if input is DataSourceExec + if let Some(datasource_exec) = filter_exec.input().as_any().downcast_ref::() { + if !filter_exec.predicate().to_string().contains("___row_id") { + // Check if ___row_id is present + let schema = datasource_exec.schema(); + let has_row_id = schema.field_with_name("___row_id").is_ok(); + + if has_row_id { + let projection = self.get_projection_exec_for_filter_exec(datasource_exec, filter_exec, schema); + // println!("projection :{:?}", projection); + is_optimized = true; + return Ok(Transformed::new(Arc::new(projection), true, TreeNodeRecursion::Continue)); + } + } else { + if(!is_optimized) { + + let schema = datasource_exec.schema(); + let has_row_id = schema.field_with_name("___row_id").is_ok(); + + if has_row_id { + let projection = self.get_projection_exec_for_data_source_exec(datasource_exec, schema); + is_optimized = true; + return Ok(Transformed::new(Arc::new(projection), true, TreeNodeRecursion::Continue)); + } + } + } + } + } else if let Some(datasource_exec) = node.as_any().downcast_ref::() { + if(!is_optimized) { + + let schema = datasource_exec.schema(); + let has_row_id = schema.field_with_name("___row_id").is_ok(); + + if has_row_id { + let projection = self.get_projection_exec_for_data_source_exec(datasource_exec, schema); + is_optimized = true; + return Ok(Transformed::new(Arc::new(projection), true, TreeNodeRecursion::Continue)); + } + } + } + Ok(Transformed::no(node)) + })?; + + Ok(rewritten.data) + } + + fn name(&self) -> &str { + "filter_row_id_optimizer" + } + + fn schema_check(&self) -> bool { + true + } +} diff --git a/plugins/engine-datafusion/jni/src/util.rs b/plugins/engine-datafusion/jni/src/util.rs new file mode 100644 index 0000000000000..575e654680e8a --- /dev/null +++ b/plugins/engine-datafusion/jni/src/util.rs @@ -0,0 +1,210 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + */ + +use anyhow::Result; +use chrono::{DateTime, Utc}; +use datafusion::arrow::array::RecordBatch; +use jni::objects::{JObject, JObjectArray, JString}; +use jni::sys::jlong; +use jni::JNIEnv; +use object_store::{path::Path as ObjectPath, ObjectMeta}; +use std::collections::HashMap; +use std::error::Error; +use std::fs; +use datafusion::error::DataFusionError; +use datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder; +use crate::FileMetadata; + +/// Set error message from a result using a Consumer Java callback +pub fn set_error_message_batch(env: &mut JNIEnv, callback: JObject, result: Result, Err>) { + if result.is_err() { + set_error_message(env, callback, Result::Err(result.unwrap_err())); + } else { + let res : Result<(), Err> = Result::Ok(()); + set_error_message(env, callback, res); + } + +} + +pub fn set_error_message(env: &mut JNIEnv, callback: JObject, result: Result<(), Err>) { + match result { + Ok(_) => { + let err_message = JObject::null(); + env.call_method( + callback, + "accept", + "(Ljava/lang/Object;)V", + &[(&err_message).into()], + ) + .expect("Failed to call error handler with null message"); + } + Err(err) => { + let err_message = env + .new_string(err.to_string()) + .expect("Couldn't create java string for error message"); + env.call_method( + callback, + "accept", + "(Ljava/lang/Object;)V", + &[(&err_message).into()], + ) + .expect("Failed to call error handler with error message"); + } + }; +} + +/// Call an ObjectResultCallback to return either a pointer to a newly created object or an error message +pub fn set_object_result( + env: &mut JNIEnv, + callback: JObject, + address: Result<*mut T, Err>, +) { + match address { + Ok(address) => set_object_result_ok(env, callback, address), + Err(err) => set_object_result_error(env, callback, &err), + }; +} + +/// Set success result by calling an ObjectResultCallback +pub fn set_object_result_ok(env: &mut JNIEnv, callback: JObject, address: *mut T) { + let err_message = JObject::null(); + env.call_method( + callback, + "callback", + "(Ljava/lang/String;J)V", + &[(&err_message).into(), (address as jlong).into()], + ) + .expect("Failed to call object result callback with address"); +} + +/// Set error result by calling an ObjectResultCallback +pub fn set_object_result_error(env: &mut JNIEnv, callback: JObject, error: &T) { + let err_message = env + .new_string(error.to_string()) + .expect("Couldn't create java string for error message"); + let address = -1 as jlong; + env.call_method( + callback, + "callback", + "(Ljava/lang/String;J)V", + &[(&err_message).into(), address.into()], + ) + .expect("Failed to call object result callback with error"); +} + + +/// Parse a string map from JNI arrays +pub fn parse_string_map( + env: &mut JNIEnv, + keys: JObjectArray, + values: JObjectArray, +) -> Result> { + let mut map = HashMap::new(); + + let keys_len = env.get_array_length(&keys)?; + let values_len = env.get_array_length(&values)?; + + if keys_len != values_len { + return Err(anyhow::anyhow!("Keys and values arrays must have the same length")); + } + + for i in 0..keys_len { + let key_obj = env.get_object_array_element(&keys, i)?; + let value_obj = env.get_object_array_element(&values, i)?; + + let key_jstring = JString::from(key_obj); + let value_jstring = JString::from(value_obj); + + let key_str = env.get_string(&key_jstring)?; + let value_str = env.get_string(&value_jstring)?; + + map.insert(key_str.to_string_lossy().to_string(), value_str.to_string_lossy().to_string()); + } + + Ok(map) +} + +// Parse a string map from JNI arrays +pub fn parse_string_arr( + env: &mut JNIEnv, + files: JObjectArray, +) -> Result> { + let length = env.get_array_length(&files).unwrap(); + let mut rust_strings: Vec = Vec::with_capacity(length as usize); + for i in 0..length { + let file_obj = env.get_object_array_element(&files, i).unwrap(); + let jstring = JString::from(file_obj); + let rust_str: String = env + .get_string(&jstring) + .expect("Couldn't get java string!") + .into(); + rust_strings.push(rust_str); + } + Ok(rust_strings) +} + +pub fn parse_string( + env: &mut JNIEnv, + file: JString +) -> Result { + let rust_str: String = env.get_string(&file) + .expect("Couldn't get java string") + .into(); + + Ok(rust_str) +} + +/// Throw a Java exception +pub fn throw_exception(env: &mut JNIEnv, message: &str) { + let _ = env.throw_new("java/lang/RuntimeException", message); +} + +pub fn create_file_metadata_from_filenames(base_path: &str, filenames: Vec) -> Result, DataFusionError> { + let mut row_base: i64 =0; + filenames.into_iter().map(|filename| { + let filename = filename.as_str(); + + // Handle both full paths and relative filenames + let full_path = if filename.starts_with('/') || filename.contains(base_path) { + // Already a full path + filename.to_string() + } else { + // Just a filename, needs base_path + format!("{}/{}", base_path.trim_end_matches('/'), filename) + }; + + let file_size = fs::metadata(&full_path).map(|m| m.len()).unwrap_or(0); + let file_result = fs::File::open(&full_path.clone()); + if(file_result.is_err()) { + return Err(DataFusionError::Execution(format!("{} {}", file_result.unwrap_err().to_string(), full_path))) + } + let file = file_result.unwrap(); + let parquet_metadata = ParquetRecordBatchReaderBuilder::try_new(file).unwrap(); + let row_group_row_counts: Vec = parquet_metadata.metadata().row_groups() + .iter() + .map(|row_group| row_group.num_rows()) + .collect(); + + + let modified = fs::metadata(&full_path) + .and_then(|m| m.modified()) + .map(|t| DateTime::::from(t)) + .unwrap_or_else(|_| Utc::now()); + + let file_meta = FileMetadata::new( + row_group_row_counts.clone(), + row_base, + ObjectMeta { + location: ObjectPath::from(full_path), + last_modified: modified, + size: file_size, + e_tag: None, + version: None, + } + ); + //TODO: ensure ordering of files + row_base += row_group_row_counts.iter().sum::(); + Ok(file_meta) + }).collect() +} diff --git a/plugins/engine-datafusion/licenses/arrow-LICENSE.txt b/plugins/engine-datafusion/licenses/arrow-LICENSE.txt new file mode 100644 index 0000000000000..7bb1330a1002b --- /dev/null +++ b/plugins/engine-datafusion/licenses/arrow-LICENSE.txt @@ -0,0 +1,2261 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + +-------------------------------------------------------------------------------- + +src/arrow/util (some portions): Apache 2.0, and 3-clause BSD + +Some portions of this module are derived from code in the Chromium project, +copyright (c) Google inc and (c) The Chromium Authors and licensed under the +Apache 2.0 License or the under the 3-clause BSD license: + + Copyright (c) 2013 The Chromium Authors. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from Daniel Lemire's FrameOfReference project. + +https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp +https://github.com/lemire/FrameOfReference/blob/146948b6058a976bc7767262ad3a2ce201486b93/scripts/turbopacking64.py + +Copyright: 2013 Daniel Lemire +Home page: http://lemire.me/en/ +Project page: https://github.com/lemire/FrameOfReference +License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from the TensorFlow project + +Copyright 2015 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the NumPy project. + +https://github.com/numpy/numpy/blob/e1f191c46f2eebd6cb892a4bfe14d9dd43a06c4e/numpy/core/src/multiarray/multiarraymodule.c#L2910 + +https://github.com/numpy/numpy/blob/68fd82271b9ea5a9e50d4e761061dfcca851382a/numpy/core/src/multiarray/datetime.c + +Copyright (c) 2005-2017, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from the Boost project + +Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This project includes code from the FlatBuffers project + +Copyright 2014 Google Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the tslib project + +Copyright 2015 Microsoft Corporation. All rights reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +This project includes code from the jemalloc project + +https://github.com/jemalloc/jemalloc + +Copyright (C) 2002-2017 Jason Evans . +All rights reserved. +Copyright (C) 2007-2012 Mozilla Foundation. All rights reserved. +Copyright (C) 2009-2017 Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: +1. Redistributions of source code must retain the above copyright notice(s), + this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright notice(s), + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE +OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +-------------------------------------------------------------------------------- + +This project includes code from the Go project, BSD 3-clause license + PATENTS +weak patent termination clause +(https://github.com/golang/go/blob/master/PATENTS). + +Copyright (c) 2009 The Go Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project includes code from the hs2client + +https://github.com/cloudera/hs2client + +Copyright 2016 Cloudera Inc. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +The script ci/scripts/util_wait_for_it.sh has the following license + +Copyright (c) 2016 Giles Hall + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The script r/configure has the following license (MIT) + +Copyright (c) 2017, Jeroen Ooms and Jim Hester + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies +of the Software, and to permit persons to whom the Software is furnished to do +so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +cpp/src/arrow/util/logging.cc, cpp/src/arrow/util/logging.h and +cpp/src/arrow/util/logging-test.cc are adapted from +Ray Project (https://github.com/ray-project/ray) (Apache 2.0). + +Copyright (c) 2016 Ray Project (https://github.com/ray-project/ray) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- +The files cpp/src/arrow/vendored/datetime/date.h, cpp/src/arrow/vendored/datetime/tz.h, +cpp/src/arrow/vendored/datetime/tz_private.h, cpp/src/arrow/vendored/datetime/ios.h, +cpp/src/arrow/vendored/datetime/ios.mm, +cpp/src/arrow/vendored/datetime/tz.cpp are adapted from +Howard Hinnant's date library (https://github.com/HowardHinnant/date) +It is licensed under MIT license. + +The MIT License (MIT) +Copyright (c) 2015, 2016, 2017 Howard Hinnant +Copyright (c) 2016 Adrian Colomitchi +Copyright (c) 2017 Florian Dang +Copyright (c) 2017 Paul Thompson +Copyright (c) 2018 Tomasz Kamiński + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/util/utf8.h includes code adapted from the page + https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ +with the following license (MIT) + +Copyright (c) 2008-2009 Bjoern Hoehrmann + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/xxhash/ have the following license +(BSD 2-Clause License) + +xxHash Library +Copyright (c) 2012-2014, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +You can contact the author at : +- xxHash homepage: http://www.xxhash.com +- xxHash source repository : https://github.com/Cyan4973/xxHash + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/double-conversion/ have the following license +(BSD 3-Clause License) + +Copyright 2006-2011, the V8 project authors. All rights reserved. +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/uriparser/ have the following license +(BSD 3-Clause License) + +uriparser - RFC 3986 URI parsing library + +Copyright (C) 2007, Weijia Song +Copyright (C) 2007, Sebastian Pipping +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + + * Redistributions of source code must retain the above + copyright notice, this list of conditions and the following + disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials + provided with the distribution. + + * Neither the name of the nor the names of its + contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files under dev/tasks/conda-recipes have the following license + +BSD 3-clause license +Copyright (c) 2015-2018, conda-forge +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR +TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/utfcpp/ have the following license + +Copyright 2006-2018 Nemanja Trifunovic + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +This project includes code from Apache Kudu. + + * cpp/cmake_modules/CompilerInfo.cmake is based on Kudu's cmake_modules/CompilerInfo.cmake + +Copyright: 2016 The Apache Software Foundation. +Home page: https://kudu.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Apache Impala (incubating), formerly +Impala. The Impala code and rights were donated to the ASF as part of the +Incubator process after the initial code imports into Apache Parquet. + +Copyright: 2012 Cloudera, Inc. +Copyright: 2016 The Apache Software Foundation. +Home page: http://impala.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Apache Aurora. + +* dev/release/{release,changelog,release-candidate} are based on the scripts from + Apache Aurora + +Copyright: 2016 The Apache Software Foundation. +Home page: https://aurora.apache.org/ +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +This project includes code from the Google styleguide. + +* cpp/build-support/cpplint.py is based on the scripts from the Google styleguide. + +Copyright: 2009 Google Inc. All rights reserved. +Homepage: https://github.com/google/styleguide +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +This project includes code from Snappy. + +* cpp/cmake_modules/{SnappyCMakeLists.txt,SnappyConfig.h} are based on code + from Google's Snappy project. + +Copyright: 2009 Google Inc. All rights reserved. +Homepage: https://github.com/google/snappy +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +This project includes code from the manylinux project. + +* python/manylinux1/scripts/{build_python.sh,python-tag-abi-tag.py, + requirements.txt} are based on code from the manylinux project. + +Copyright: 2016 manylinux +Homepage: https://github.com/pypa/manylinux +License: The MIT License (MIT) + +-------------------------------------------------------------------------------- + +This project includes code from the cymove project: + +* python/pyarrow/includes/common.pxd includes code from the cymove project + +The MIT License (MIT) +Copyright (c) 2019 Omer Ozarslan + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE +OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The projects includes code from the Ursabot project under the dev/archery +directory. + +License: BSD 2-Clause + +Copyright 2019 RStudio, Inc. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +This project include code from mingw-w64. + +* cpp/src/arrow/util/cpu-info.cc has a polyfill for mingw-w64 < 5 + +Copyright (c) 2009 - 2013 by the mingw-w64 project +Homepage: https://mingw-w64.org +License: Zope Public License (ZPL) Version 2.1. + +--------------------------------------------------------------------------------- + +This project include code from Google's Asylo project. + +* cpp/src/arrow/result.h is based on status_or.h + +Copyright (c) Copyright 2017 Asylo authors +Homepage: https://asylo.dev/ +License: Apache 2.0 + +-------------------------------------------------------------------------------- + +This project includes code from Google's protobuf project + +* cpp/src/arrow/result.h ARROW_ASSIGN_OR_RAISE is based off ASSIGN_OR_RETURN +* cpp/src/arrow/util/bit_stream_utils.h contains code from wire_format_lite.h + +Copyright 2008 Google Inc. All rights reserved. +Homepage: https://developers.google.com/protocol-buffers/ +License: + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Code generated by the Protocol Buffer compiler is owned by the owner +of the input file used when generating it. This code is not +standalone and requires a support library to be linked with it. This +support library is itself covered by the above license. + +-------------------------------------------------------------------------------- + +3rdparty dependency LLVM is statically linked in certain binary distributions. +Additionally some sections of source code have been derived from sources in LLVM +and have been clearly labeled as such. LLVM has the following license: + +============================================================================== +The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: +============================================================================== + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + +---- LLVM Exceptions to the Apache 2.0 License ---- + +As an exception, if, as a result of your compiling your source code, portions +of this Software are embedded into an Object form of such source code, you +may redistribute such embedded portions in such Object form without complying +with the conditions of Sections 4(a), 4(b) and 4(d) of the License. + +In addition, if you combine or link compiled forms of this Software with +software that is licensed under the GPLv2 ("Combined Software") and if a +court of competent jurisdiction determines that the patent provision (Section +3), the indemnity provision (Section 9) or other Section of the License +conflicts with the conditions of the GPLv2, you may retroactively and +prospectively choose to deem waived or otherwise exclude such Section(s) of +the License, but only in their entirety and only with respect to the Combined +Software. + +============================================================================== +Software from third parties included in the LLVM Project: +============================================================================== +The LLVM Project contains third party software which is under different license +terms. All such code will be identified clearly using at least one of two +mechanisms: +1) It will be in a separate directory tree with its own `LICENSE.txt` or + `LICENSE` file at the top containing the specific license and restrictions + which apply to that software, or +2) It will contain specific license and restriction terms at the top of every + file. + +-------------------------------------------------------------------------------- + +3rdparty dependency gRPC is statically linked in certain binary +distributions, like the python wheels. gRPC has the following license: + +Copyright 2014 gRPC authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency Apache Thrift is statically linked in certain binary +distributions, like the python wheels. Apache Thrift has the following license: + +Apache Thrift +Copyright (C) 2006 - 2019, The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency Apache ORC is statically linked in certain binary +distributions, like the python wheels. Apache ORC has the following license: + +Apache ORC +Copyright 2013-2019 The Apache Software Foundation + +This product includes software developed by The Apache Software +Foundation (http://www.apache.org/). + +This product includes software developed by Hewlett-Packard: +(c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +-------------------------------------------------------------------------------- + +3rdparty dependency zstd is statically linked in certain binary +distributions, like the python wheels. ZSTD has the following license: + +BSD License + +For Zstandard software + +Copyright (c) 2016-present, Facebook, Inc. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + * Neither the name Facebook nor the names of its contributors may be used to + endorse or promote products derived from this software without specific + prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency lz4 is statically linked in certain binary +distributions, like the python wheels. lz4 has the following license: + +LZ4 Library +Copyright (c) 2011-2016, Yann Collet +All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, +are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, this + list of conditions and the following disclaimer in the documentation and/or + other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR +ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency Brotli is statically linked in certain binary +distributions, like the python wheels. Brotli has the following license: + +Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors. + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +-------------------------------------------------------------------------------- + +3rdparty dependency rapidjson is statically linked in certain binary +distributions, like the python wheels. rapidjson and its dependencies have the +following licenses: + +Tencent is pleased to support the open source community by making RapidJSON +available. + +Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. +All rights reserved. + +If you have downloaded a copy of the RapidJSON binary from Tencent, please note +that the RapidJSON binary is licensed under the MIT License. +If you have downloaded a copy of the RapidJSON source code from Tencent, please +note that RapidJSON source code is licensed under the MIT License, except for +the third-party components listed below which are subject to different license +terms. Your integration of RapidJSON into your own projects may require +compliance with the MIT License, as well as the other licenses applicable to +the third-party components included within RapidJSON. To avoid the problematic +JSON license in your own projects, it's sufficient to exclude the +bin/jsonchecker/ directory, as it's the only code under the JSON license. +A copy of the MIT License is included in this file. + +Other dependencies and licenses: + + Open Source Software Licensed Under the BSD License: + -------------------------------------------------------------------- + + The msinttypes r29 + Copyright (c) 2006-2013 Alexander Chemeris + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of copyright holder nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR + ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH + DAMAGE. + + Terms of the MIT License: + -------------------------------------------------------------------- + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the "Software"), + to deal in the Software without restriction, including without limitation + the rights to use, copy, modify, merge, publish, distribute, sublicense, + and/or sell copies of the Software, and to permit persons to whom the + Software is furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +3rdparty dependency snappy is statically linked in certain binary +distributions, like the python wheels. snappy has the following license: + +Copyright 2011, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + * Neither the name of Google Inc. nor the names of its contributors may be + used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=== + +Some of the benchmark data in testdata/ is licensed differently: + + - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and + is licensed under the Creative Commons Attribution 3.0 license + (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/ + for more information. + + - kppkn.gtb is taken from the Gaviota chess tablebase set, and + is licensed under the MIT License. See + https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1 + for more information. + + - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper + “Combinatorial Modeling of Chromatin Features Quantitatively Predicts DNA + Replication Timing in _Drosophila_” by Federico Comoglio and Renato Paro, + which is licensed under the CC-BY license. See + http://www.ploscompbiol.org/static/license for more ifnormation. + + - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project + Gutenberg. The first three have expired copyrights and are in the public + domain; the latter does not have expired copyright, but is still in the + public domain according to the license information + (http://www.gutenberg.org/ebooks/53). + +-------------------------------------------------------------------------------- + +3rdparty dependency gflags is statically linked in certain binary +distributions, like the python wheels. gflags has the following license: + +Copyright (c) 2006, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency glog is statically linked in certain binary +distributions, like the python wheels. glog has the following license: + +Copyright (c) 2008, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +A function gettimeofday in utilities.cc is based on + +http://www.google.com/codesearch/p?hl=en#dR3YEbitojA/COPYING&q=GetSystemTimeAsFileTime%20license:bsd + +The license of this code is: + +Copyright (c) 2003-2008, Jouni Malinen and contributors +All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name(s) of the above-listed copyright holder(s) nor the + names of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency re2 is statically linked in certain binary +distributions, like the python wheels. re2 has the following license: + +Copyright (c) 2009 The RE2 Authors. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of Google Inc. nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +3rdparty dependency c-ares is statically linked in certain binary +distributions, like the python wheels. c-ares has the following license: + +# c-ares license + +Copyright (c) 2007 - 2018, Daniel Stenberg with many contributors, see AUTHORS +file. + +Copyright 1998 by the Massachusetts Institute of Technology. + +Permission to use, copy, modify, and distribute this software and its +documentation for any purpose and without fee is hereby granted, provided that +the above copyright notice appear in all copies and that both that copyright +notice and this permission notice appear in supporting documentation, and that +the name of M.I.T. not be used in advertising or publicity pertaining to +distribution of the software without specific, written prior permission. +M.I.T. makes no representations about the suitability of this software for any +purpose. It is provided "as is" without express or implied warranty. + +-------------------------------------------------------------------------------- + +3rdparty dependency zlib is redistributed as a dynamically linked shared +library in certain binary distributions, like the python wheels. In the future +this will likely change to static linkage. zlib has the following license: + +zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.11, January 15th, 2017 + + Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +-------------------------------------------------------------------------------- + +3rdparty dependency openssl is redistributed as a dynamically linked shared +library in certain binary distributions, like the python wheels. openssl +preceding version 3 has the following license: + + LICENSE ISSUES + ============== + + The OpenSSL toolkit stays under a double license, i.e. both the conditions of + the OpenSSL License and the original SSLeay license apply to the toolkit. + See below for the actual license texts. + + OpenSSL License + --------------- + +/* ==================================================================== + * Copyright (c) 1998-2019 The OpenSSL Project. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * 3. All advertising materials mentioning features or use of this + * software must display the following acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" + * + * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to + * endorse or promote products derived from this software without + * prior written permission. For written permission, please contact + * openssl-core@openssl.org. + * + * 5. Products derived from this software may not be called "OpenSSL" + * nor may "OpenSSL" appear in their names without prior written + * permission of the OpenSSL Project. + * + * 6. Redistributions of any form whatsoever must retain the following + * acknowledgment: + * "This product includes software developed by the OpenSSL Project + * for use in the OpenSSL Toolkit (http://www.openssl.org/)" + * + * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY + * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR + * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * ==================================================================== + * + * This product includes cryptographic software written by Eric Young + * (eay@cryptsoft.com). This product includes software written by Tim + * Hudson (tjh@cryptsoft.com). + * + */ + + Original SSLeay License + ----------------------- + +/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com) + * All rights reserved. + * + * This package is an SSL implementation written + * by Eric Young (eay@cryptsoft.com). + * The implementation was written so as to conform with Netscapes SSL. + * + * This library is free for commercial and non-commercial use as long as + * the following conditions are aheared to. The following conditions + * apply to all code found in this distribution, be it the RC4, RSA, + * lhash, DES, etc., code; not just the SSL code. The SSL documentation + * included with this distribution is covered by the same copyright terms + * except that the holder is Tim Hudson (tjh@cryptsoft.com). + * + * Copyright remains Eric Young's, and as such any Copyright notices in + * the code are not to be removed. + * If this package is used in a product, Eric Young should be given attribution + * as the author of the parts of the library used. + * This can be in the form of a textual message at program startup or + * in documentation (online or textual) provided with the package. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * "This product includes cryptographic software written by + * Eric Young (eay@cryptsoft.com)" + * The word 'cryptographic' can be left out if the rouines from the library + * being used are not cryptographic related :-). + * 4. If you include any Windows specific code (or a derivative thereof) from + * the apps directory (application code) you must include an acknowledgement: + * "This product includes software written by Tim Hudson (tjh@cryptsoft.com)" + * + * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * The licence and distribution terms for any publically available version or + * derivative of this code cannot be changed. i.e. this code cannot simply be + * copied and put under another distribution licence + * [including the GNU Public Licence.] + */ + +-------------------------------------------------------------------------------- + +This project includes code from the rtools-backports project. + +* ci/scripts/PKGBUILD and ci/scripts/r_windows_build.sh are based on code + from the rtools-backports project. + +Copyright: Copyright (c) 2013 - 2019, Алексей and Jeroen Ooms. +All rights reserved. +Homepage: https://github.com/r-windows/rtools-backports +License: 3-clause BSD + +-------------------------------------------------------------------------------- + +Some code from pandas has been adapted for the pyarrow codebase. pandas is +available under the 3-clause BSD license, which follows: + +pandas license +============== + +Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team +All rights reserved. + +Copyright (c) 2008-2011 AQR Capital Management, LLC +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + * Neither the name of the copyright holder nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +Some bits from DyND, in particular aspects of the build system, have been +adapted from libdynd and dynd-python under the terms of the BSD 2-clause +license + +The BSD 2-Clause License + + Copyright (C) 2011-12, Dynamic NDArray Developers + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Dynamic NDArray Developers list: + + * Mark Wiebe + * Continuum Analytics + +-------------------------------------------------------------------------------- + +Some source code from Ibis (https://github.com/cloudera/ibis) has been adapted +for PyArrow. Ibis is released under the Apache License, Version 2.0. + +-------------------------------------------------------------------------------- + +dev/tasks/homebrew-formulae/apache-arrow.rb has the following license: + +BSD 2-Clause License + +Copyright (c) 2009-present, Homebrew contributors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +---------------------------------------------------------------------- + +cpp/src/arrow/vendored/base64.cpp has the following license + +ZLIB License + +Copyright (C) 2004-2017 René Nyffenegger + +This source code is provided 'as-is', without any express or implied +warranty. In no event will the author be held liable for any damages arising +from the use of this software. + +Permission is granted to anyone to use this software for any purpose, including +commercial applications, and to alter it and redistribute it freely, subject to +the following restrictions: + +1. The origin of this source code must not be misrepresented; you must not + claim that you wrote the original source code. If you use this source code + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + +2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original source code. + +3. This notice may not be removed or altered from any source distribution. + +René Nyffenegger rene.nyffenegger@adp-gmbh.ch + +-------------------------------------------------------------------------------- + +This project includes code from Folly. + + * cpp/src/arrow/vendored/ProducerConsumerQueue.h + +is based on Folly's + + * folly/Portability.h + * folly/lang/Align.h + * folly/ProducerConsumerQueue.h + +Copyright: Copyright (c) Facebook, Inc. and its affiliates. +Home page: https://github.com/facebook/folly +License: http://www.apache.org/licenses/LICENSE-2.0 + +-------------------------------------------------------------------------------- + +The file cpp/src/arrow/vendored/musl/strptime.c has the following license + +Copyright © 2005-2020 Rich Felker, et al. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +-------------------------------------------------------------------------------- + +The file cpp/cmake_modules/BuildUtils.cmake contains code from + +https://gist.github.com/cristianadam/ef920342939a89fae3e8a85ca9459b49 + +which is made available under the MIT license + +Copyright (c) 2019 Cristian Adam + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/portable-snippets/ contain code from + +https://github.com/nemequ/portable-snippets + +and have the following copyright notice: + +Each source file contains a preamble explaining the license situation +for that file, which takes priority over this file. With the +exception of some code pulled in from other repositories (such as +µnit, an MIT-licensed project which is used for testing), the code is +public domain, released using the CC0 1.0 Universal dedication (*). + +(*) https://creativecommons.org/publicdomain/zero/1.0/legalcode + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/fast_float/ contain code from + +https://github.com/lemire/fast_float + +which is made available under the Apache License 2.0. + +-------------------------------------------------------------------------------- + +The file python/pyarrow/vendored/docscrape.py contains code from + +https://github.com/numpy/numpydoc/ + +which is made available under the BSD 2-clause license. + +-------------------------------------------------------------------------------- + +The file python/pyarrow/vendored/version.py contains code from + +https://github.com/pypa/packaging/ + +which is made available under both the Apache license v2.0 and the +BSD 2-clause license. + +-------------------------------------------------------------------------------- + +The files in cpp/src/arrow/vendored/pcg contain code from + +https://github.com/imneme/pcg-cpp + +and have the following copyright notice: + +Copyright 2014-2019 Melissa O'Neill , + and the PCG Project contributors. + +SPDX-License-Identifier: (Apache-2.0 OR MIT) + +Licensed under the Apache License, Version 2.0 (provided in +LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0) +or under the MIT license (provided in LICENSE-MIT.txt and at +http://opensource.org/licenses/MIT), at your option. This file may not +be copied, modified, or distributed except according to those terms. + +Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either +express or implied. See your chosen license for details. + +-------------------------------------------------------------------------------- +r/R/dplyr-count-tally.R (some portions) + +Some portions of this file are derived from code from + +https://github.com/tidyverse/dplyr/ + +which is made available under the MIT license + +Copyright (c) 2013-2019 RStudio and others. + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the “Software”), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +The file src/arrow/util/io_util.cc contains code from the CPython project +which is made available under the Python Software Foundation License Version 2. + +-------------------------------------------------------------------------------- + +3rdparty dependency opentelemetry-cpp is statically linked in certain binary +distributions. opentelemetry-cpp is made available under the Apache License 2.0. + +Copyright The OpenTelemetry Authors +SPDX-License-Identifier: Apache-2.0 + +-------------------------------------------------------------------------------- + +ci/conan/ is based on code from Conan Package and Dependency Manager. + +Copyright (c) 2019 Conan.io + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- + +3rdparty dependency UCX is redistributed as a dynamically linked shared +library in certain binary distributions. UCX has the following license: + +Copyright (c) 2014-2015 UT-Battelle, LLC. All rights reserved. +Copyright (C) 2014-2020 Mellanox Technologies Ltd. All rights reserved. +Copyright (C) 2014-2015 The University of Houston System. All rights reserved. +Copyright (C) 2015 The University of Tennessee and The University + of Tennessee Research Foundation. All rights reserved. +Copyright (C) 2016-2020 ARM Ltd. All rights reserved. +Copyright (c) 2016 Los Alamos National Security, LLC. All rights reserved. +Copyright (C) 2016-2020 Advanced Micro Devices, Inc. All rights reserved. +Copyright (C) 2019 UChicago Argonne, LLC. All rights reserved. +Copyright (c) 2018-2020 NVIDIA CORPORATION. All rights reserved. +Copyright (C) 2020 Huawei Technologies Co., Ltd. All rights reserved. +Copyright (C) 2016-2020 Stony Brook University. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +-------------------------------------------------------------------------------- + +The file dev/tasks/r/github.packages.yml contains code from + +https://github.com/ursa-labs/arrow-r-nightly + +which is made available under the Apache License 2.0. + +-------------------------------------------------------------------------------- +.github/actions/sync-nightlies/action.yml (some portions) + +Some portions of this file are derived from code from + +https://github.com/JoshPiper/rsync-docker + +which is made available under the MIT license + +Copyright (c) 2020 Joshua Piper + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- +.github/actions/sync-nightlies/action.yml (some portions) + +Some portions of this file are derived from code from + +https://github.com/burnett01/rsync-deployments + +which is made available under the MIT license + +Copyright (c) 2019-2022 Contention +Copyright (c) 2019-2022 Burnett01 + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +-------------------------------------------------------------------------------- +java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectHashMap.java +java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectMap.java + +These file are derived from code from Netty, which is made available under the +Apache License 2.0. diff --git a/plugins/engine-datafusion/licenses/arrow-NOTICE.txt b/plugins/engine-datafusion/licenses/arrow-NOTICE.txt new file mode 100644 index 0000000000000..2089c6fb20358 --- /dev/null +++ b/plugins/engine-datafusion/licenses/arrow-NOTICE.txt @@ -0,0 +1,84 @@ +Apache Arrow +Copyright 2016-2024 The Apache Software Foundation + +This product includes software developed at +The Apache Software Foundation (http://www.apache.org/). + +This product includes software from the SFrame project (BSD, 3-clause). +* Copyright (C) 2015 Dato, Inc. +* Copyright (c) 2009 Carnegie Mellon University. + +This product includes software from the Feather project (Apache 2.0) +https://github.com/wesm/feather + +This product includes software from the DyND project (BSD 2-clause) +https://github.com/libdynd + +This product includes software from the LLVM project + * distributed under the University of Illinois Open Source + +This product includes software from the google-lint project + * Copyright (c) 2009 Google Inc. All rights reserved. + +This product includes software from the mman-win32 project + * Copyright https://code.google.com/p/mman-win32/ + * Licensed under the MIT License; + +This product includes software from the LevelDB project + * Copyright (c) 2011 The LevelDB Authors. All rights reserved. + * Use of this source code is governed by a BSD-style license that can be + * Moved from Kudu http://github.com/cloudera/kudu + +This product includes software from the CMake project + * Copyright 2001-2009 Kitware, Inc. + * Copyright 2012-2014 Continuum Analytics, Inc. + * All rights reserved. + +This product includes software from https://github.com/matthew-brett/multibuild (BSD 2-clause) + * Copyright (c) 2013-2016, Matt Terry and Matthew Brett; all rights reserved. + +This product includes software from the Ibis project (Apache 2.0) + * Copyright (c) 2015 Cloudera, Inc. + * https://github.com/cloudera/ibis + +This product includes software from Dremio (Apache 2.0) + * Copyright (C) 2017-2018 Dremio Corporation + * https://github.com/dremio/dremio-oss + +This product includes software from Google Guava (Apache 2.0) + * Copyright (C) 2007 The Guava Authors + * https://github.com/google/guava + +This product include software from CMake (BSD 3-Clause) + * CMake - Cross Platform Makefile Generator + * Copyright 2000-2019 Kitware, Inc. and Contributors + +The web site includes files generated by Jekyll. + +-------------------------------------------------------------------------------- + +This product includes code from Apache Kudu, which includes the following in +its NOTICE file: + + Apache Kudu + Copyright 2016 The Apache Software Foundation + + This product includes software developed at + The Apache Software Foundation (http://www.apache.org/). + + Portions of this software were developed at + Cloudera, Inc (http://www.cloudera.com/). + +-------------------------------------------------------------------------------- + +This product includes code from Apache ORC, which includes the following in +its NOTICE file: + + Apache ORC + Copyright 2013-2019 The Apache Software Foundation + + This product includes software developed by The Apache Software + Foundation (http://www.apache.org/). + + This product includes software developed by Hewlett-Packard: + (c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P diff --git a/plugins/engine-datafusion/licenses/arrow-c-data-17.0.0.jar.sha1 b/plugins/engine-datafusion/licenses/arrow-c-data-17.0.0.jar.sha1 new file mode 100644 index 0000000000000..8586384ac28c3 --- /dev/null +++ b/plugins/engine-datafusion/licenses/arrow-c-data-17.0.0.jar.sha1 @@ -0,0 +1 @@ +ccef140b279af80c6dda78a19c75872799c00dfb \ No newline at end of file diff --git a/plugins/engine-datafusion/licenses/arrow-format-17.0.0.jar.sha1 b/plugins/engine-datafusion/licenses/arrow-format-17.0.0.jar.sha1 new file mode 100644 index 0000000000000..34fd4704eac91 --- /dev/null +++ b/plugins/engine-datafusion/licenses/arrow-format-17.0.0.jar.sha1 @@ -0,0 +1 @@ +5d052f20fd1193840eb59818515e710156c364b2 \ No newline at end of file diff --git a/plugins/engine-datafusion/licenses/arrow-memory-core-17.0.0.jar.sha1 b/plugins/engine-datafusion/licenses/arrow-memory-core-17.0.0.jar.sha1 new file mode 100644 index 0000000000000..ea312f4f5e51a --- /dev/null +++ b/plugins/engine-datafusion/licenses/arrow-memory-core-17.0.0.jar.sha1 @@ -0,0 +1 @@ +51c5287ef5a624656bb38da7684078905b1a88c9 \ No newline at end of file diff --git a/plugins/engine-datafusion/licenses/arrow-memory-unsafe-17.0.0.jar.sha1 b/plugins/engine-datafusion/licenses/arrow-memory-unsafe-17.0.0.jar.sha1 new file mode 100644 index 0000000000000..14abbb6b6b3f4 --- /dev/null +++ b/plugins/engine-datafusion/licenses/arrow-memory-unsafe-17.0.0.jar.sha1 @@ -0,0 +1 @@ +c2e4966dcf68f0978d3cc935844191d2d68c61e8 \ No newline at end of file diff --git a/plugins/engine-datafusion/licenses/arrow-vector-17.0.0.jar.sha1 b/plugins/engine-datafusion/licenses/arrow-vector-17.0.0.jar.sha1 new file mode 100644 index 0000000000000..8f9fddc882396 --- /dev/null +++ b/plugins/engine-datafusion/licenses/arrow-vector-17.0.0.jar.sha1 @@ -0,0 +1 @@ +16685545e4734382c1fcdaf12ac9b0a7d1fc06c0 \ No newline at end of file diff --git a/plugins/engine-datafusion/licenses/checker-qual-3.42.0.jar.sha1 b/plugins/engine-datafusion/licenses/checker-qual-3.42.0.jar.sha1 new file mode 100644 index 0000000000000..5a5268f9d126f --- /dev/null +++ b/plugins/engine-datafusion/licenses/checker-qual-3.42.0.jar.sha1 @@ -0,0 +1 @@ +638ec33f363a94d41a4f03c3e7d3dcfba64e402d \ No newline at end of file diff --git a/plugins/engine-datafusion/licenses/checker-qual-LICENSE.txt b/plugins/engine-datafusion/licenses/checker-qual-LICENSE.txt new file mode 100644 index 0000000000000..9837c6b69fdab --- /dev/null +++ b/plugins/engine-datafusion/licenses/checker-qual-LICENSE.txt @@ -0,0 +1,22 @@ +Checker Framework qualifiers +Copyright 2004-present by the Checker Framework developers + +MIT License: + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. diff --git a/plugins/engine-datafusion/licenses/checker-qual-NOTICE.txt b/plugins/engine-datafusion/licenses/checker-qual-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/plugins/engine-datafusion/licenses/flatbuffers-java-23.5.26.jar.sha1 b/plugins/engine-datafusion/licenses/flatbuffers-java-23.5.26.jar.sha1 new file mode 100644 index 0000000000000..939c91b488691 --- /dev/null +++ b/plugins/engine-datafusion/licenses/flatbuffers-java-23.5.26.jar.sha1 @@ -0,0 +1 @@ +e6320185c75767ba32c52ace087425a5a4275a50 \ No newline at end of file diff --git a/plugins/engine-datafusion/licenses/flatbuffers-java-LICENSE.txt b/plugins/engine-datafusion/licenses/flatbuffers-java-LICENSE.txt new file mode 100644 index 0000000000000..d645695673349 --- /dev/null +++ b/plugins/engine-datafusion/licenses/flatbuffers-java-LICENSE.txt @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/plugins/engine-datafusion/licenses/flatbuffers-java-NOTICE.txt b/plugins/engine-datafusion/licenses/flatbuffers-java-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/plugins/engine-datafusion/licenses/jackson-LICENSE.txt b/plugins/engine-datafusion/licenses/jackson-LICENSE.txt new file mode 100644 index 0000000000000..f5f45d26a49d6 --- /dev/null +++ b/plugins/engine-datafusion/licenses/jackson-LICENSE.txt @@ -0,0 +1,8 @@ +This copy of Jackson JSON processor streaming parser/generator is licensed under the +Apache (Software) License, version 2.0 ("the License"). +See the License for details about distribution rights, and the +specific rights regarding derivate works. + +You may obtain a copy of the License at: + +http://www.apache.org/licenses/LICENSE-2.0 diff --git a/plugins/engine-datafusion/licenses/jackson-NOTICE.txt b/plugins/engine-datafusion/licenses/jackson-NOTICE.txt new file mode 100644 index 0000000000000..4c976b7b4cc58 --- /dev/null +++ b/plugins/engine-datafusion/licenses/jackson-NOTICE.txt @@ -0,0 +1,20 @@ +# Jackson JSON processor + +Jackson is a high-performance, Free/Open Source JSON processing library. +It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has +been in development since 2007. +It is currently developed by a community of developers, as well as supported +commercially by FasterXML.com. + +## Licensing + +Jackson core and extension components may licensed under different licenses. +To find the details that apply to this artifact see the accompanying LICENSE file. +For more information, including possible other licensing options, contact +FasterXML.com (http://fasterxml.com). + +## Credits + +A list of contributors may be found from CREDITS file, which is included +in some artifacts (usually source distributions); but is always available +from the source code management (SCM) system project uses. diff --git a/plugins/engine-datafusion/licenses/jackson-annotations-2.18.2.jar.sha1 b/plugins/engine-datafusion/licenses/jackson-annotations-2.18.2.jar.sha1 new file mode 100644 index 0000000000000..a06e1d5f28425 --- /dev/null +++ b/plugins/engine-datafusion/licenses/jackson-annotations-2.18.2.jar.sha1 @@ -0,0 +1 @@ +985d77751ebc7fce5db115a986bc9aa82f973f4a \ No newline at end of file diff --git a/plugins/engine-datafusion/licenses/jackson-databind-2.18.2.jar.sha1 b/plugins/engine-datafusion/licenses/jackson-databind-2.18.2.jar.sha1 new file mode 100644 index 0000000000000..eedbfff66c705 --- /dev/null +++ b/plugins/engine-datafusion/licenses/jackson-databind-2.18.2.jar.sha1 @@ -0,0 +1 @@ +deef8697b92141fb6caf7aa86966cff4eec9b04f \ No newline at end of file diff --git a/plugins/engine-datafusion/licenses/slf4j-api-2.0.17.jar.sha1 b/plugins/engine-datafusion/licenses/slf4j-api-2.0.17.jar.sha1 new file mode 100644 index 0000000000000..435f6c13a28b6 --- /dev/null +++ b/plugins/engine-datafusion/licenses/slf4j-api-2.0.17.jar.sha1 @@ -0,0 +1 @@ +d9e58ac9c7779ba3bf8142aff6c830617a7fe60f \ No newline at end of file diff --git a/plugins/engine-datafusion/licenses/slf4j-api-LICENSE.txt b/plugins/engine-datafusion/licenses/slf4j-api-LICENSE.txt new file mode 100644 index 0000000000000..1a3d053237bec --- /dev/null +++ b/plugins/engine-datafusion/licenses/slf4j-api-LICENSE.txt @@ -0,0 +1,24 @@ +Copyright (c) 2004-2022 QOS.ch Sarl (Switzerland) +All rights reserved. + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + + diff --git a/plugins/engine-datafusion/licenses/slf4j-api-NOTICE.txt b/plugins/engine-datafusion/licenses/slf4j-api-NOTICE.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionPlugin.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionPlugin.java new file mode 100644 index 0000000000000..45a2da3e6afa3 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionPlugin.java @@ -0,0 +1,170 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion; + +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.node.DiscoveryNodes; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.ClusterSettings; +import org.opensearch.common.settings.IndexScopedSettings; +import org.opensearch.common.settings.Settings; +import org.opensearch.common.settings.SettingsFilter; +import org.opensearch.core.common.io.stream.NamedWriteableRegistry; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.datafusion.action.DataFusionAction; +import org.opensearch.datafusion.action.NodesDataFusionInfoAction; +import org.opensearch.datafusion.action.TransportNodesDataFusionInfoAction; +import org.opensearch.datafusion.search.DatafusionContext; +import org.opensearch.datafusion.search.DatafusionQuery; +import org.opensearch.datafusion.search.DatafusionReaderManager; +import org.opensearch.datafusion.search.DatafusionSearcher; +import org.opensearch.env.Environment; +import org.opensearch.env.NodeEnvironment; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.search.ContextEngineSearcher; +import org.opensearch.index.engine.SearchExecEngine; +import org.opensearch.index.engine.exec.FileMetadata; +import org.opensearch.plugins.ActionPlugin; +import org.opensearch.plugins.SearchEnginePlugin; +import org.opensearch.plugins.Plugin; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.rest.RestController; +import org.opensearch.rest.RestHandler; +import org.opensearch.script.ScriptService; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.client.Client; +import org.opensearch.vectorized.execution.search.DataFormat; +import org.opensearch.vectorized.execution.search.spi.DataSourceCodec; +import org.opensearch.vectorized.execution.search.spi.RecordBatchStream; +import org.opensearch.watcher.ResourceWatcherService; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Supplier; + +/** + * Main plugin class for OpenSearch DataFusion integration. + * + */ +public class DataFusionPlugin extends Plugin implements ActionPlugin, SearchEnginePlugin { + + private DataFusionService dataFusionService; + private final boolean isDataFusionEnabled; + + /** + * Constructor for DataFusionPlugin. + * @param settings The settings for the DataFusionPlugin. + */ + public DataFusionPlugin(Settings settings) { + // For now, DataFusion is always enabled if the plugin is loaded + // In the future, this could be controlled by a feature flag + this.isDataFusionEnabled = true; + } + + /** + * Creates components for the DataFusion plugin. + * @param client The client instance. + * @param clusterService The cluster service instance. + * @param threadPool The thread pool instance. + * @param resourceWatcherService The resource watcher service instance. + * @param scriptService The script service instance. + * @param xContentRegistry The named XContent registry. + * @param environment The environment instance. + * @param nodeEnvironment The node environment instance. + * @param namedWriteableRegistry The named writeable registry. + * @param indexNameExpressionResolver The index name expression resolver instance. + * @param repositoriesServiceSupplier The supplier for the repositories service. + * @return Collection of created components + */ + @Override + public Collection createComponents( + Client client, + ClusterService clusterService, + ThreadPool threadPool, + ResourceWatcherService resourceWatcherService, + ScriptService scriptService, + NamedXContentRegistry xContentRegistry, + Environment environment, + NodeEnvironment nodeEnvironment, + NamedWriteableRegistry namedWriteableRegistry, + IndexNameExpressionResolver indexNameExpressionResolver, + Supplier repositoriesServiceSupplier, + Map dataSourceCodecs + ) { + if (!isDataFusionEnabled) { + return Collections.emptyList(); + } + dataFusionService = new DataFusionService(dataSourceCodecs); + + for(DataFormat format : this.getSupportedFormats()) { + dataSourceCodecs.get(format); + } + // return Collections.emptyList(); + return Collections.singletonList(dataFusionService); + } + + @Override + public List getSupportedFormats() { + return List.of(DataFormat.CSV); + } + + /** + * Create engine per shard per format with initial view of catalog + */ + // TODO : one engine per format, does that make sense ? + // TODO : Engine shouldn't just be SearcherOperations, it can be more ? + @Override + public SearchExecEngine + createEngine(DataFormat dataFormat,Collection formatCatalogSnapshot, ShardPath shardPath) throws IOException { + return new DatafusionEngine(dataFormat, formatCatalogSnapshot, dataFusionService, shardPath); + } + + /** + * Gets the REST handlers for the DataFusion plugin. + * @param settings The settings for the plugin. + * @param restController The REST controller instance. + * @param clusterSettings The cluster settings instance. + * @param indexScopedSettings The index scoped settings instance. + * @param settingsFilter The settings filter instance. + * @param indexNameExpressionResolver The index name expression resolver instance. + * @param nodesInCluster The supplier for the discovery nodes. + * @return A list of REST handlers. + */ + @Override + public List getRestHandlers( + Settings settings, + RestController restController, + ClusterSettings clusterSettings, + IndexScopedSettings indexScopedSettings, + SettingsFilter settingsFilter, + IndexNameExpressionResolver indexNameExpressionResolver, + Supplier nodesInCluster + ) { + if (!isDataFusionEnabled) { + return Collections.emptyList(); + } + return List.of(new DataFusionAction()); + } + + /** + * Gets the list of action handlers for the DataFusion plugin. + * @return A list of action handlers. + */ + @Override + public List> getActions() { + if (!isDataFusionEnabled) { + return Collections.emptyList(); + } + return List.of(new ActionHandler<>(NodesDataFusionInfoAction.INSTANCE, TransportNodesDataFusionInfoAction.class)); + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionQueryJNI.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionQueryJNI.java new file mode 100644 index 0000000000000..ddb06b6b9e8dc --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionQueryJNI.java @@ -0,0 +1,158 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion; + +/** + * JNI wrapper for DataFusion operations + */ +public class DataFusionQueryJNI { + + private static boolean libraryLoaded = false; + + static { + loadNativeLibrary(); + } + + /** + * Private constructor to prevent instantiation of utility class. + */ + private DataFusionQueryJNI() { + // Utility class + } + + /** + * Load the native library from resources + */ + private static synchronized void loadNativeLibrary() { + if (libraryLoaded) { + return; + } + + try { + // Try to load the library directly + System.loadLibrary("opensearch_datafusion_jni"); + libraryLoaded = true; + } catch (UnsatisfiedLinkError e) { + // Try loading from resources + try { + String osName = System.getProperty("os.name").toLowerCase(); + String libExtension = osName.contains("windows") ? ".dll" : (osName.contains("mac") ? ".dylib" : ".so"); + String libName = "libopensearch_datafusion_jni" + libExtension; + + java.io.InputStream is = DataFusionQueryJNI.class.getResourceAsStream("/native/" + libName); + if (is != null) { + java.io.File tempFile = java.io.File.createTempFile("libopensearch_datafusion_jni", libExtension); + tempFile.deleteOnExit(); + + try (java.io.FileOutputStream fos = new java.io.FileOutputStream(tempFile)) { + byte[] buffer = new byte[8192]; + int bytesRead; + while ((bytesRead = is.read(buffer)) != -1) { + fos.write(buffer, 0, bytesRead); + } + } + + System.load(tempFile.getAbsolutePath()); + libraryLoaded = true; + } else { + throw new RuntimeException("Native library not found: " + libName, e); + } + } catch (Exception ex) { + throw new RuntimeException("Failed to load native library", ex); + } + } + } + + /** + * Create a new global runtime environment + * @return runtime env pointer for subsequent operations + */ + public static native long createGlobalRuntime(); + + public static native long createTokioRuntime(); + + /** + * Closes global runtime environment + * @param pointer the runtime environment pointer to close + * @return status code + */ + public static native long closeGlobalRuntime(long pointer); + + /** + * Get version information + * @return JSON string with version information + */ + public static native String getVersionInfo(); + + /** + * Create a new DataFusion session context + * @param runtimeId the global runtime environment ID + * @return context ID for subsequent operations + */ + public static native long createSessionContext(long runtimeId); + + /** + * Close and cleanup a DataFusion context + * @param contextId the context ID to close + */ + public static native void closeSessionContext(long contextId); + + /** + * Execute a Substrait query plan + * @param cachePtr the session context ID + * @param substraitPlan the serialized Substrait query plan + * @return stream pointer for result iteration + */ + public static native long executeQueryPhase(long cachePtr, byte[] substraitPlan, long runtimePtr); + + /** + * Execute a Substrait query plan + * @param cachePtr the session context ID + * @param rowIds row ids for which record needs to fetch + * @param runtimePtr runtime pointer + * @return stream pointer for result iteration + */ + + // TODO: tie this to actual FetchPhase + public static native long executeFetchPhase(long cachePtr, long[] rowIds, String[] projections, long runtimePtr); + + public static native long createDatafusionReader(String path, String[] files); + + public static native void closeDatafusionReader(long ptr); + + /** + * Register a directory with CSV files + * @param contextId the session context ID + * @param tableName the table name to register + * @param directoryPath the directory path containing CSV files + * @param fileNames array of file names to register + * @return status code + */ + public static native int registerCsvDirectory(long contextId, String tableName, String directoryPath, String[] fileNames); + + /** + * Check if stream has more data + * @param streamPtr the stream pointer + * @return true if more data available + */ + public static native boolean streamHasNext(long streamPtr); + + /** + * Get next batch from stream + * @param streamPtr the stream pointer + * @return byte array containing the next batch, or null if no more data + */ + public static native byte[] streamNext(long streamPtr); + + /** + * Close and cleanup a result stream + * @param streamPtr the stream pointer to close + */ + public static native void closeStream(long streamPtr); +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionService.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionService.java new file mode 100644 index 0000000000000..9548ced599723 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionService.java @@ -0,0 +1,210 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.lifecycle.AbstractLifecycleComponent; +import org.opensearch.common.util.concurrent.ConcurrentCollections; +import org.opensearch.common.util.concurrent.ConcurrentMapLong; +import org.opensearch.datafusion.core.GlobalRuntimeEnv; +import org.opensearch.vectorized.execution.search.DataFormat; +import org.opensearch.vectorized.execution.search.spi.DataSourceCodec; +import org.opensearch.vectorized.execution.search.spi.RecordBatchStream; + +import java.util.List; +import java.util.Map; +import java.util.concurrent.CompletableFuture; + +/** + * Service for managing DataFusion contexts and operations - essentially like SearchService + */ +public class DataFusionService extends AbstractLifecycleComponent { + + private static final Logger logger = LogManager.getLogger(DataFusionService.class); + private final ConcurrentMapLong sessionEngines = ConcurrentCollections.newConcurrentMapLongWithAggressiveConcurrency(); + + private final DataSourceRegistry dataSourceRegistry; + private final GlobalRuntimeEnv globalRuntimeEnv; + + /** + * Creates a new DataFusion service instance. + */ + public DataFusionService(Map dataSourceCodecs) { + this.dataSourceRegistry = new DataSourceRegistry(dataSourceCodecs); + + // to verify jni + String version = DataFusionQueryJNI.getVersionInfo(); + this.globalRuntimeEnv = new GlobalRuntimeEnv(); + } + + @Override + protected void doStart() { + logger.info("Starting DataFusion service"); + try { + // Initialize the data source registry + // Test that at least one data source is available + if (!dataSourceRegistry.hasCodecs()) { + logger.warn("No data sources available"); + } else { + logger.info( + "DataFusion service started successfully with {} data sources: {}", + dataSourceRegistry.getCodecNames().size(), + dataSourceRegistry.getCodecNames() + ); + + } + } catch (Exception e) { + logger.error("Failed to start DataFusion service", e); + throw new RuntimeException("Failed to initialize DataFusion service", e); + } + } + + @Override + protected void doStop() { + logger.info("Stopping DataFusion service"); + + // Close all session contexts + for (Long sessionId : sessionEngines.keySet()) { + try { + closeSessionContext(sessionId).get(); + } catch (Exception e) { + logger.warn("Error closing session context {}", sessionId, e); + } + } + sessionEngines.clear(); + globalRuntimeEnv.close(); + logger.info("DataFusion service stopped"); + } + + @Override + protected void doClose() { + doStop(); + } + + /** + * Register a directory with list of files to create a runtime environment + * with listing files cache of DataFusion + * + * @param directoryPath path to the directory containing files + * @param fileNames list of file names in the directory + * @return runtime environment ID + */ + public CompletableFuture registerDirectory(String directoryPath, List fileNames) { + DataSourceCodec engine = dataSourceRegistry.getDefaultEngine(); + if (engine == null) { + return CompletableFuture.failedFuture(new IllegalStateException("No DataFusion engine available")); + } + + logger.debug( + "Registering directory {} with {} files using engine {}", + directoryPath, + fileNames.size(), + engine.getClass().getSimpleName() + ); + + return engine.registerDirectory(directoryPath, fileNames, globalRuntimeEnv.getPointer()); + } + + /** + * Create a session context + * + * @return session context ID + */ + public CompletableFuture createSessionContext() { + long runtimeEnvironmentId = globalRuntimeEnv.getPointer(); + DataSourceCodec codec = dataSourceRegistry.getDefaultEngine(); + if (codec == null) { + return CompletableFuture.failedFuture(new IllegalArgumentException("Runtime environment not found: " + runtimeEnvironmentId)); + } + + logger.debug( + "Creating session context for runtime environment {} using engine {}", + runtimeEnvironmentId, + codec.getClass().getSimpleName() + ); + + return codec.createSessionContext(runtimeEnvironmentId).thenApply(sessionId -> { + // Track which engine created this session context + sessionEngines.put(sessionId, codec); + logger.debug("Created session context {} with engine {}", sessionId, codec.getClass().getSimpleName()); + return sessionId; + }); + } + + /** + * Execute a query accepting substrait plan bytes and run via session context + * + * @param sessionContextId the session context ID + * @param substraitPlanBytes the substrait plan as byte array + * @return record batch stream containing query results + */ + public CompletableFuture executeSubstraitQuery(long sessionContextId, byte[] substraitPlanBytes) { + DataSourceCodec engine = sessionEngines.get(sessionContextId); + if (engine == null) { + return CompletableFuture.failedFuture(new IllegalArgumentException("Session context not found: " + sessionContextId)); + } + + logger.debug( + "Executing substrait query for session {} with plan size {} bytes using engine {}", + sessionContextId, + substraitPlanBytes.length, + engine.getClass().getSimpleName() + ); + + return engine.executeSubstraitQuery(sessionContextId, substraitPlanBytes); + } + + public long getRuntimePointer() { + return globalRuntimeEnv.getPointer(); + } + + public long getTokioRuntimePointer() { + return globalRuntimeEnv.getTokioRuntimePtr(); + } + + /** + * Close the session context and clean up resources + * + * @param sessionContextId the session context ID to close + * @return future that completes when cleanup is done + */ + public CompletableFuture closeSessionContext(long sessionContextId) { + DataSourceCodec engine = sessionEngines.remove(sessionContextId); + if (engine == null) { + logger.debug("Session context {} not found or already closed", sessionContextId); + return CompletableFuture.completedFuture(null); + } + + logger.debug("Closing session context {} using engine {}", sessionContextId, engine.getClass().getSimpleName()); + + return engine.closeSessionContext(sessionContextId); + } + + /** + * Get version information from available codecs + * @return JSON version string + */ + public String getVersion() { + StringBuilder version = new StringBuilder(); + version.append("{\"codecs\":["); + + boolean first = true; + for (DataFormat engineName : this.dataSourceRegistry.getCodecNames()) { + if (!first) { + version.append(","); + } + version.append("{\"name\":\"").append(engineName).append("\"}"); + first = false; + } + + version.append("]}"); + return version.toString(); + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataSourceRegistry.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataSourceRegistry.java new file mode 100644 index 0000000000000..1d274116aac94 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataSourceRegistry.java @@ -0,0 +1,73 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.vectorized.execution.search.DataFormat; +import org.opensearch.vectorized.execution.search.spi.DataSourceCodec; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * Registry for DataFusion data source codecs. + */ +public class DataSourceRegistry { + + private static final Logger logger = LogManager.getLogger(DataSourceRegistry.class); + + private final ConcurrentHashMap codecs = new ConcurrentHashMap<>(); + + public DataSourceRegistry(Map dataSourceCodecMap) { + codecs.putAll(dataSourceCodecMap); + } + + /** + * Check if any codecs are available. + * + * @return true if codecs are available, false otherwise + */ + public boolean hasCodecs() { + return !codecs.isEmpty(); + } + + /** + * Get the names of all registered codecs. + * + * @return list of codec names + */ + public List getCodecNames() { + return new ArrayList<>(codecs.keySet()); + } + + /** + * Get the default codec (first available codec). + * + * @return the default codec, or null if none available + */ + public DataSourceCodec getDefaultEngine() { + if (codecs.isEmpty()) { + return null; + } + return codecs.values().iterator().next(); + } + + /** + * Get a codec by name. + * + * @param name the codec name + * @return the codec, or null if not found + */ + public DataSourceCodec getCodec(String name) { + return codecs.get(name); + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DatafusionEngine.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DatafusionEngine.java new file mode 100644 index 0000000000000..892cb70f22b4b --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DatafusionEngine.java @@ -0,0 +1,267 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion; + +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.FieldVector; +import org.apache.arrow.vector.IntVector; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.OpenSearchException; +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.common.lease.Releasables; +import org.opensearch.common.util.BigArrays; +import org.opensearch.common.xcontent.XContentFactory; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.core.xcontent.XContentBuilder; +import org.opensearch.datafusion.search.DatafusionContext; +import org.opensearch.datafusion.search.DatafusionQuery; +import org.opensearch.datafusion.search.DatafusionQueryPhaseExecutor; +import org.opensearch.datafusion.search.DatafusionReader; +import org.opensearch.datafusion.search.DatafusionReaderManager; +import org.opensearch.datafusion.search.DatafusionSearcher; +import org.opensearch.datafusion.search.DatafusionSearcherSupplier; +import org.opensearch.index.engine.CatalogSnapshotAwareRefreshListener; +import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineException; +import org.opensearch.index.engine.EngineSearcherSupplier; +import org.opensearch.index.engine.SearchExecEngine; +import org.opensearch.index.engine.exec.FileMetadata; +import org.opensearch.index.mapper.*; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.aggregations.SearchResultsCollector; +import org.opensearch.search.internal.ReaderContext; +import org.opensearch.search.internal.ShardSearchRequest; +import org.opensearch.search.query.QueryPhaseExecutor; +import org.opensearch.vectorized.execution.search.DataFormat; +import org.opensearch.search.query.GenericQueryPhaseSearcher; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.*; +import java.util.function.Function; + +public class DatafusionEngine extends SearchExecEngine { + + private static final Logger logger = LogManager.getLogger(DatafusionEngine.class); + + private DataFormat dataFormat; + private DatafusionReaderManager datafusionReaderManager; + private DataFusionService datafusionService; + + public DatafusionEngine(DataFormat dataFormat, Collection formatCatalogSnapshot, DataFusionService dataFusionService, ShardPath shardPath) throws IOException { + this.dataFormat = dataFormat; + + this.datafusionReaderManager = new DatafusionReaderManager(shardPath.getDataPath().toString(), formatCatalogSnapshot, dataFormat.getName()); + this.datafusionService = dataFusionService; + } + + @Override + public GenericQueryPhaseSearcher getQueryPhaseSearcher() { + return new DatafusionQueryPhaseSearcher(); + } + + @Override + public QueryPhaseExecutor getQueryPhaseExecutor() { + return new DatafusionQueryPhaseExecutor(); + } + + @Override + public DatafusionContext createContext(ReaderContext readerContext, ShardSearchRequest request, SearchShardTarget searchShardTarget, SearchShardTask task, BigArrays bigArrays) throws IOException { + DatafusionContext datafusionContext = new DatafusionContext(readerContext, request, searchShardTarget, task, this, bigArrays); + // Parse source + datafusionContext.datafusionQuery(new DatafusionQuery(request.source().queryPlanIR(), new ArrayList<>())); + return datafusionContext; + } + + @Override + public EngineSearcherSupplier acquireSearcherSupplier(Function wrapper) throws EngineException { + return acquireSearcherSupplier(wrapper, Engine.SearcherScope.EXTERNAL); + } + + @Override + public EngineSearcherSupplier acquireSearcherSupplier(Function wrapper, Engine.SearcherScope scope) throws EngineException { + // TODO : wrapper is ignored + EngineSearcherSupplier searcher = null; + // TODO : refcount needs to be revisited - add proper tests for exception etc + try { + DatafusionReader reader = datafusionReaderManager.acquire(); + searcher = new DatafusionSearcherSupplier(null) { + @Override + protected DatafusionSearcher acquireSearcherInternal(String source) { + return new DatafusionSearcher(source, reader, () -> {}); + } + + @Override + protected void doClose() { + try { + reader.decRef(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + }; + } catch (Exception ex) { + // TODO + } + return searcher; + } + + @Override + public DatafusionSearcher acquireSearcher(String source) throws EngineException { + return acquireSearcher(source, Engine.SearcherScope.EXTERNAL); + } + + @Override + public DatafusionSearcher acquireSearcher(String source, Engine.SearcherScope scope) throws EngineException { + return acquireSearcher(source, scope, Function.identity()); + } + + @Override + public DatafusionSearcher acquireSearcher(String source, Engine.SearcherScope scope, Function wrapper) throws EngineException { + DatafusionSearcherSupplier releasable = null; + try { + DatafusionSearcherSupplier searcherSupplier = releasable = (DatafusionSearcherSupplier) acquireSearcherSupplier(wrapper, scope); + DatafusionSearcher searcher = searcherSupplier.acquireSearcher(source); + releasable = null; + return new DatafusionSearcher( + source, + searcher.getReader(), + () -> Releasables.close(searcher, searcherSupplier) + ); + } finally { + Releasables.close(releasable); + } + } + + @Override + public DatafusionReaderManager getReferenceManager(Engine.SearcherScope scope) { + return datafusionReaderManager; + } + + @Override + public CatalogSnapshotAwareRefreshListener getRefreshListener(Engine.SearcherScope scope) { + return datafusionReaderManager; + } + + @Override + public boolean assertSearcherIsWarmedUp(String source, Engine.SearcherScope scope) { + return false; + } + + @Override + public void executeQueryPhase(DatafusionContext context) { + Map finalRes = new HashMap<>(); + ArrayList rowIdResult = new ArrayList<>(); + + try { + DatafusionSearcher datafusionSearcher = context.getEngineSearcher(); + long streamPointer = datafusionSearcher.search(context.getDatafusionQuery(), datafusionService.getTokioRuntimePointer()); + RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); + RecordBatchStream stream = new RecordBatchStream(streamPointer, datafusionService.getTokioRuntimePointer() , allocator); + + // We can have some collectors passed like this which can collect the results and convert to InternalAggregation + // Is the possible? need to check + + SearchResultsCollector collector = new SearchResultsCollector() { + @Override + public void collect(RecordBatchStream value) { + VectorSchemaRoot root = value.getVectorSchemaRoot(); + for (Field field : root.getSchema().getFields()) { + String fieldName = field.getName(); + FieldVector fieldVector = root.getVector(fieldName); + Object[] fieldValues = new Object[fieldVector.getValueCount()]; + if (fieldName.equals("___row_id")) { + IntVector rowIdVector = (IntVector) root.getVector(fieldName); + for(int i=0; i entry : finalRes.entrySet()) { + logger.info("{}: {}", entry.getKey(), java.util.Arrays.toString(entry.getValue())); + } + + } catch (Exception exception) { + logger.error("Failed to execute Substrait query plan", exception); + } + context.setDfQueryPhaseResult(rowIdResult); + context.setDFResults(finalRes); + } + + + /** + * Executes fetch phase, DataFusion query should contain projections for fields + * @param context DataFusion context + * @throws IOException + */ + @Override + public void executeFetchPhase(DatafusionContext context) throws IOException { + List rowIds = context.getDfQueryResult(); + + // preprocess + context.getDatafusionQuery().setFetchPhaseContext(rowIds); + DatafusionSearcher datafusionSearcher = context.getEngineSearcher(); + long streamPointer = datafusionSearcher.search(context.getDatafusionQuery(), datafusionService.getTokioRuntimePointer()); // update to handle fetchPhase query + RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); + RecordBatchStream stream = new RecordBatchStream(streamPointer, datafusionService.getTokioRuntimePointer() , allocator); + + // postprocess + context.setDfFetchPhaseResult(generateByteRefs(context, stream)); + } + + private List generateByteRefs(DatafusionContext context, RecordBatchStream recordBatchStream) throws IOException { + MapperService mapperService = context.mapperService(); + List byteRefs = new ArrayList<>(); + while(recordBatchStream.loadNextBatch().join()) { + VectorSchemaRoot vectorSchemaRoot = recordBatchStream.getVectorSchemaRoot(); + List fieldVectorList = vectorSchemaRoot.getFieldVectors(); + for(int i=0; i { + + // How to pass table providers that search other engines such as Lucene ? + @Override + public boolean searchWith( + DatafusionContext context, + DatafusionSearcher searcher, + DatafusionQuery datafusionQuery, + LinkedList collectors, + boolean hasFilterCollector, + boolean hasTimeout + ) throws IOException { + + List> searchCollectors = new ArrayList<>(); // TODO : derive from collectors ? + + // Execute DataFusion query with Substrait plan + searcher.search(datafusionQuery, searchCollectors); + + // Process results into QuerySearchResult + context.queryResult().searchTimedOut(false); + + return false; // No rescoring for DataFusion + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ErrorUtil.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ErrorUtil.java new file mode 100644 index 0000000000000..6d0486d213a55 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ErrorUtil.java @@ -0,0 +1,20 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion; + +/** + * Utility class for error handling in DataFusion operations. + */ +public class ErrorUtil { + private ErrorUtil() {} + + static boolean containsError(String errString) { + return errString != null && !errString.isEmpty(); + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ObjectResultCallback.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ObjectResultCallback.java new file mode 100644 index 0000000000000..d6de1fdace339 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ObjectResultCallback.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion; + +interface ObjectResultCallback { + void callback(String errMessage, long value); +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/RecordBatchStream.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/RecordBatchStream.java new file mode 100644 index 0000000000000..ea90468215012 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/RecordBatchStream.java @@ -0,0 +1,139 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion; + +import org.apache.arrow.c.ArrowArray; +import org.apache.arrow.c.ArrowSchema; +import org.apache.arrow.c.CDataDictionaryProvider; +import org.apache.arrow.c.Data; +import org.apache.arrow.memory.BufferAllocator; +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.ArrowType; +import org.apache.arrow.vector.types.pojo.Field; +import org.apache.arrow.vector.types.pojo.Schema; +import org.opensearch.datafusion.core.SessionContext; + +import java.util.concurrent.CompletableFuture; + +import static org.apache.arrow.c.Data.importField; + +/** + * Represents a stream of Apache Arrow record batches from DataFusion query execution. + * Provides a Java interface to iterate through query results in a memory-efficient way. + */ +public class RecordBatchStream { + + private final long streamPointer; + private final BufferAllocator allocator; + private final CDataDictionaryProvider dictionaryProvider; + private boolean initialized = false; + private VectorSchemaRoot vectorSchemaRoot = null; + private long runtimePtr; + + /** + * Creates a new RecordBatchStream for the given stream pointer + * @param streamId the stream pointer + * @param allocator memory allocator for Arrow vectors + */ + public RecordBatchStream(long streamId, long runtimePtr, BufferAllocator allocator) { + this.streamPointer = streamId; + this.allocator = allocator; + this.runtimePtr = runtimePtr; + this.dictionaryProvider = new CDataDictionaryProvider(); + } + + /** + * Gets the Arrow VectorSchemaRoot for accessing the current batch data + * @return the VectorSchemaRoot containing the current batch + */ + public VectorSchemaRoot getVectorSchemaRoot() { + ensureInitialized(); + return vectorSchemaRoot; + } + + private Schema getSchema() { + // Native method is not async, but use a future to store the result for convenience + CompletableFuture result = new CompletableFuture<>(); + getSchema(streamPointer, (errString, arrowSchemaAddress) -> { + if (ErrorUtil.containsError(errString)) { + result.completeExceptionally(new RuntimeException(errString)); + } else { + try { + ArrowSchema arrowSchema = ArrowSchema.wrap(arrowSchemaAddress); + Schema schema = importSchema(allocator, arrowSchema, dictionaryProvider); + result.complete(schema); + } catch (Exception e) { + result.completeExceptionally(e); + } + } + }); + return result.join(); + } + + private Schema importSchema(BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) { + Field structField = importField(allocator, schema, provider); + if (structField.getType().getTypeID() != ArrowType.ArrowTypeID.Struct) { + throw new IllegalArgumentException("Cannot import schema: ArrowSchema describes non-struct type"); + } + return new Schema(structField.getChildren(), structField.getMetadata()); + } + + private void ensureInitialized() { + if (!initialized) { + Schema schema = getSchema(); + this.vectorSchemaRoot = VectorSchemaRoot.create(schema, allocator); + } + initialized = true; + } + + /** + * Loads the next batch of data from the stream + * @return a CompletableFuture that completes with true if more data is available, false if end of stream + */ + public CompletableFuture loadNextBatch() { + ensureInitialized(); + long runtimePointer = this.runtimePtr; + CompletableFuture result = new CompletableFuture<>(); + next(runtimePointer, streamPointer, (errString, arrowArrayAddress) -> { + if (ErrorUtil.containsError(errString)) { + result.completeExceptionally(new RuntimeException(errString)); + } else if (arrowArrayAddress == 0) { + // Reached end of stream + result.complete(false); + } else { + try { + ArrowArray arrowArray = ArrowArray.wrap(arrowArrayAddress); + Data.importIntoVectorSchemaRoot(allocator, arrowArray, vectorSchemaRoot, dictionaryProvider); + result.complete(true); + } catch (Exception e) { + result.completeExceptionally(e); + } + } + }); + return result; + } + + /** + * Closes the stream and releases all associated resources + * @throws Exception if an error occurs during cleanup + */ + public void close() throws Exception { + closeStream(streamPointer); + dictionaryProvider.close(); + if (initialized) { + vectorSchemaRoot.close(); + } + } + + private static native void next(long runtime, long pointer, ObjectResultCallback callback); + + private static native void getSchema(long pointer, ObjectResultCallback callback); + + private static native void closeStream(long pointer); +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/DataFusionAction.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/DataFusionAction.java new file mode 100644 index 0000000000000..99695d2c96266 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/DataFusionAction.java @@ -0,0 +1,67 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.action; + +import org.opensearch.rest.BaseRestHandler; +import org.opensearch.rest.RestRequest; +import org.opensearch.rest.action.RestToXContentListener; +import org.opensearch.transport.client.node.NodeClient; + +import java.util.List; + +import static org.opensearch.rest.RestRequest.Method.GET; + +/** + * REST handler for DataFusion information operations. + * It handles GET requests for retrieving DataFusion server information. + */ +public class DataFusionAction extends BaseRestHandler { + + /** + * Constructor for DataFusionRestHandler. + */ + public DataFusionAction() {} + + /** + * Returns the name of the action. + * @return The name of the action. + */ + @Override + public String getName() { + return "datafusion_info_action"; + } + + /** + * Returns the list of routes for the action. + * @return The list of routes for the action. + */ + @Override + public List routes() { + return List.of(new Route(GET, "/_plugins/datafusion/info"), new Route(GET, "/_plugins/datafusion/info/{nodeId}")); + } + + /** + * Prepares the request for the action. + * @param request The REST request. + * @param client The node client. + * @return The rest channel consumer. + */ + @Override + protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) { + String nodeId = request.param("nodeId"); + if (nodeId != null) { + // Query specific node + NodesDataFusionInfoRequest nodesRequest = new NodesDataFusionInfoRequest(nodeId); + return channel -> client.execute(NodesDataFusionInfoAction.INSTANCE, nodesRequest, new RestToXContentListener<>(channel)); + } else { + NodesDataFusionInfoRequest nodesRequest = new NodesDataFusionInfoRequest(); + return channel -> client.execute(NodesDataFusionInfoAction.INSTANCE, nodesRequest, new RestToXContentListener<>(channel)); + } + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodeDataFusionInfo.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodeDataFusionInfo.java new file mode 100644 index 0000000000000..5512110c576da --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodeDataFusionInfo.java @@ -0,0 +1,82 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.action; + +import org.opensearch.action.support.nodes.BaseNodeResponse; +import org.opensearch.cluster.node.DiscoveryNode; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.xcontent.ToXContentFragment; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; + +/** + * Information about DataFusion on a specific node + */ +public class NodeDataFusionInfo extends BaseNodeResponse implements ToXContentFragment { + + private final String dataFusionVersion; + + /** + * Constructor for NodeDataFusionInfo. + * @param node The discovery node. + * @param dataFusionVersion The DataFusion version. + */ + public NodeDataFusionInfo(DiscoveryNode node, String dataFusionVersion) { + super(node); + this.dataFusionVersion = dataFusionVersion; + } + + /** + * Constructor for NodeDataFusionInfo from stream input. + * @param in The stream input. + * @throws IOException If an I/O error occurs. + */ + public NodeDataFusionInfo(StreamInput in) throws IOException { + super(in); + this.dataFusionVersion = in.readString(); + } + + /** + * Writes the node info to the stream output. + * @param out The stream output. + * @throws IOException If an I/O error occurs. + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + out.writeString(dataFusionVersion); + } + + /** + * Converts the node info to XContent. + * @param builder The XContent builder. + * @param params The parameters. + * @return The XContent builder. + * @throws IOException If an I/O error occurs. + */ + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startObject("data_fusion_info"); + builder.field("datafusion_version", dataFusionVersion); + builder.endObject(); + builder.endObject(); + return builder; + } + + /** + * Gets the DataFusion version. + * @return The DataFusion version. + */ + public String getDataFusionVersion() { + return dataFusionVersion; + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoAction.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoAction.java new file mode 100644 index 0000000000000..198c7973e6a9c --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoAction.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.action; + +import org.opensearch.action.ActionType; + +/** + * Action to retrieve DataFusion info from nodes + */ +public class NodesDataFusionInfoAction extends ActionType { + /** + * Singleton instance of NodesDataFusionInfoAction. + */ + public static final NodesDataFusionInfoAction INSTANCE = new NodesDataFusionInfoAction(); + /** + * Name of this action. + */ + public static final String NAME = "cluster:admin/datafusion/info"; + + NodesDataFusionInfoAction() { + super(NAME, NodesDataFusionInfoResponse::new); + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoRequest.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoRequest.java new file mode 100644 index 0000000000000..4e32bb3b0f18c --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoRequest.java @@ -0,0 +1,75 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.action; + +import org.opensearch.action.support.nodes.BaseNodesRequest; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; + +import java.io.IOException; + +/** + * Request for retrieving DataFusion information from nodes + */ +public class NodesDataFusionInfoRequest extends BaseNodesRequest { + + /** + * Default constructor for NodesDataFusionInfoRequest. + */ + public NodesDataFusionInfoRequest() { + super((String[]) null); + } + + /** + * Constructor for NodesDataFusionInfoRequest with specific node IDs. + * @param nodeIds The node IDs to query. + */ + public NodesDataFusionInfoRequest(String... nodeIds) { + super(nodeIds); + } + + /** + * Constructor for NodesDataFusionInfoRequest from stream input. + * @param in The stream input. + * @throws IOException If an I/O error occurs. + */ + public NodesDataFusionInfoRequest(StreamInput in) throws IOException { + super(in); + } + + /** + * Writes the request to the stream output. + * @param out The stream output. + * @throws IOException If an I/O error occurs. + */ + @Override + public void writeTo(StreamOutput out) throws IOException { + super.writeTo(out); + } + + /** + * Node-level request for DataFusion information + */ + public static class NodeDataFusionInfoRequest extends org.opensearch.transport.TransportRequest { + + /** + * Default constructor for NodeDataFusionInfoRequest. + */ + public NodeDataFusionInfoRequest() {} + + /** + * Constructor for NodeDataFusionInfoRequest from stream input. + * @param in The stream input. + * @throws IOException If an I/O error occurs. + */ + public NodeDataFusionInfoRequest(StreamInput in) throws IOException { + super(in); + } + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoResponse.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoResponse.java new file mode 100644 index 0000000000000..61a13fd263ee9 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoResponse.java @@ -0,0 +1,94 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.action; + +import org.opensearch.action.FailedNodeException; +import org.opensearch.action.support.nodes.BaseNodesResponse; +import org.opensearch.cluster.ClusterName; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.core.common.io.stream.StreamOutput; +import org.opensearch.core.xcontent.ToXContentObject; +import org.opensearch.core.xcontent.XContentBuilder; + +import java.io.IOException; +import java.util.List; + +/** + * Response containing DataFusion information from multiple nodes + */ +public class NodesDataFusionInfoResponse extends BaseNodesResponse implements ToXContentObject { + + /** + * Constructor for NodesDataFusionInfoResponse. + * @param clusterName The cluster name. + * @param nodes The list of node DataFusion info. + * @param failures The list of failed node exceptions. + */ + public NodesDataFusionInfoResponse(ClusterName clusterName, List nodes, List failures) { + super(clusterName, nodes, failures); + } + + @Override + protected List readNodesFrom(StreamInput in) throws IOException { + return in.readList(NodeDataFusionInfo::new); + } + + /** + * Constructor for NodesDataFusionInfoResponse from stream input. + * @param in The stream input. + * @throws IOException If an I/O error occurs. + */ + public NodesDataFusionInfoResponse(StreamInput in) throws IOException { + super(in); + } + + /** + * Writes the node response to stream output. + * @param out The stream output. + * @param nodes The list of nodes to write. + * @throws IOException If an I/O error occurs. + */ + @Override + protected void writeNodesTo(StreamOutput out, List nodes) throws IOException { + out.writeList(nodes); + } + + /** + * Converts the response to XContent. + * @param builder The XContent builder. + * @param params The parameters. + * @return The XContent builder. + * @throws IOException If an I/O error occurs. + */ + @Override + public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { + builder.startObject(); + builder.startObject("nodes"); + for (NodeDataFusionInfo nodeInfo : getNodes()) { + builder.field(nodeInfo.getNode().getId()); + // builder.field("name", nodeInfo.getNode().getName()); + // builder.field("transport_address", nodeInfo.getNode().getAddress().toString()); + nodeInfo.toXContent(builder, params); + } + builder.endObject(); + + if (!failures().isEmpty()) { + builder.startArray("failures"); + for (FailedNodeException failure : failures()) { + builder.startObject(); + builder.field("node_id", failure.nodeId()); + builder.field("reason", failure.getMessage()); + builder.endObject(); + } + builder.endArray(); + } + builder.endObject(); + return builder; + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/TransportNodesDataFusionInfoAction.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/TransportNodesDataFusionInfoAction.java new file mode 100644 index 0000000000000..8a659f29230d6 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/TransportNodesDataFusionInfoAction.java @@ -0,0 +1,110 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.action; + +import org.opensearch.action.FailedNodeException; +import org.opensearch.action.support.ActionFilters; +import org.opensearch.action.support.nodes.TransportNodesAction; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.inject.Inject; +import org.opensearch.core.common.io.stream.StreamInput; +import org.opensearch.datafusion.DataFusionService; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.TransportService; + +import java.io.IOException; +import java.util.List; + +/** + * Transport action for retrieving DataFusion information from nodes + */ +public class TransportNodesDataFusionInfoAction extends TransportNodesAction< + NodesDataFusionInfoRequest, + NodesDataFusionInfoResponse, + NodesDataFusionInfoRequest.NodeDataFusionInfoRequest, + NodeDataFusionInfo> { + + private final DataFusionService dataFusionService; + + /** + * Constructor for TransportNodesDataFusionInfoAction. + * @param threadPool The thread pool. + * @param clusterService The cluster service. + * @param transportService The transport service. + * @param actionFilters The action filters. + * @param dataFusionService The DataFusion service. + */ + @Inject + public TransportNodesDataFusionInfoAction( + ThreadPool threadPool, + ClusterService clusterService, + TransportService transportService, + ActionFilters actionFilters, + DataFusionService dataFusionService + ) { + super( + NodesDataFusionInfoAction.NAME, + threadPool, + clusterService, + transportService, + actionFilters, + NodesDataFusionInfoRequest::new, + NodesDataFusionInfoRequest.NodeDataFusionInfoRequest::new, + ThreadPool.Names.MANAGEMENT, + NodeDataFusionInfo.class + ); + this.dataFusionService = dataFusionService; + } + + /** + * Creates a new nodes response. + * @param request The nodes request. + * @param responses The list of node responses. + * @param failures The list of failed node exceptions. + * @return The nodes response. + */ + @Override + protected NodesDataFusionInfoResponse newResponse( + NodesDataFusionInfoRequest request, + List responses, + List failures + ) { + return new NodesDataFusionInfoResponse(clusterService.getClusterName(), responses, failures); + } + + /** + * Creates a new node request. + * @param request The nodes request. + * @return The node request. + */ + @Override + protected NodesDataFusionInfoRequest.NodeDataFusionInfoRequest newNodeRequest(NodesDataFusionInfoRequest request) { + return new NodesDataFusionInfoRequest.NodeDataFusionInfoRequest(); + } + + @Override + protected NodeDataFusionInfo newNodeResponse(StreamInput in) throws IOException { + return new NodeDataFusionInfo(in); + } + + /** + * Handles the node request and returns the node response. + * @param request The node request. + * @return The node response. + */ + @Override + protected NodeDataFusionInfo nodeOperation(NodesDataFusionInfoRequest.NodeDataFusionInfoRequest request) { + try { + System.out.println(this.dataFusionService.getVersion()); + return new NodeDataFusionInfo(clusterService.localNode(), dataFusionService.getVersion()); + } catch (Exception e) { + return new NodeDataFusionInfo(clusterService.localNode(), "unknown"); + } + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/package-info.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/package-info.java new file mode 100644 index 0000000000000..d3542f4dfe9dc --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/package-info.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * REST actions and transport handlers for DataFusion plugin. + * Provides API endpoints for DataFusion functionality. + */ +package org.opensearch.datafusion.action; diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/DefaultRecordBatchStream.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/DefaultRecordBatchStream.java new file mode 100644 index 0000000000000..5603660ed760a --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/DefaultRecordBatchStream.java @@ -0,0 +1,114 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.core; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.vectorized.execution.search.spi.RecordBatchStream; + +import java.util.concurrent.CompletableFuture; + +public class DefaultRecordBatchStream implements RecordBatchStream { + + private static final Logger logger = LogManager.getLogger(DefaultRecordBatchStream.class); + + private final long nativeStreamPtr; + private volatile boolean closed = false; + private volatile boolean hasNextCached = false; + private volatile boolean hasNextValue = false; + + /** + * Creates a new wrapping the given native stream pointer. + * + * @param nativeStreamPtr Pointer to the native DataFusion RecordBatch stream + */ + public DefaultRecordBatchStream(long nativeStreamPtr) { + if (nativeStreamPtr == 0) { + throw new IllegalArgumentException("Invalid native stream pointer"); + } + this.nativeStreamPtr = nativeStreamPtr; + logger.debug("Created default record batch stream with pointer: {}", nativeStreamPtr); + } + + @Override + public Object getSchema() { + return "schema"; // Placeholder + } + + @Override + public CompletableFuture next() { + // PlaceholderImpl + return CompletableFuture.supplyAsync(() -> { + if (closed) { + return null; + } + + try { + // Get the next batch from native code + String batch = nativeNextBatch(nativeStreamPtr); + + // Reset cached hasNext value since we consumed a batch + hasNextCached = false; + + logger.trace("Retrieved next batch from stream pointer: {}", nativeStreamPtr); + return batch; + } catch (Exception e) { + logger.error("Error getting next batch from stream", e); + return null; + } + }); + } + + @Override + public boolean hasNext() { + // Placeholder impl + if (closed) { + return false; + } + + if (hasNextCached) { + return hasNextValue; + } + + try { + // Check if there's a next batch available + // This is a simplified implementation - in practice, you might want to + // peek at the stream without consuming the batch + String nextBatch = nativeNextBatch(nativeStreamPtr); + hasNextValue = (nextBatch != null); + hasNextCached = true; + + logger.trace("hasNext() = {} for stream pointer: {}", hasNextValue, nativeStreamPtr); + return hasNextValue; + } catch (Exception e) { + logger.error("Error checking for next batch in stream", e); + return false; + } + } + + @Override + public void close() { + if (!closed) { + logger.debug("Closing RecordBatchStream with pointer: {}", nativeStreamPtr); + try { + nativeCloseStream(nativeStreamPtr); + closed = true; + logger.debug("Successfully closed RecordBatchStream"); + } catch (Exception e) { + logger.error("Error closing RecordBatchStream", e); + throw e; + } + } + } + + // Native method declarations + private static native String nativeNextBatch(long streamPtr); + + private static native void nativeCloseStream(long streamPtr); +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/GlobalRuntimeEnv.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/GlobalRuntimeEnv.java new file mode 100644 index 0000000000000..547539d5ff4d1 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/GlobalRuntimeEnv.java @@ -0,0 +1,48 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.core; + +import static org.opensearch.datafusion.DataFusionQueryJNI.closeGlobalRuntime; +import static org.opensearch.datafusion.DataFusionQueryJNI.createGlobalRuntime; +import static org.opensearch.datafusion.DataFusionQueryJNI.createTokioRuntime; + +/** + * Global runtime environment for DataFusion operations. + * Manages the lifecycle of the native DataFusion runtime. + */ +public class GlobalRuntimeEnv implements AutoCloseable { + // ptr to runtime environment in df + private final long ptr; + private final long tokio_runtime_ptr; + + /** + * Creates a new global runtime environment. + */ + public GlobalRuntimeEnv() { + this.ptr = createGlobalRuntime(); + this.tokio_runtime_ptr = createTokioRuntime(); + } + + /** + * Gets the native pointer to the runtime environment. + * @return the native pointer + */ + public long getPointer() { + return ptr; + } + + public long getTokioRuntimePtr() { + return tokio_runtime_ptr; + } + + @Override + public void close() { + closeGlobalRuntime(this.ptr); + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/SessionContext.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/SessionContext.java new file mode 100644 index 0000000000000..956aa78fdaa30 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/SessionContext.java @@ -0,0 +1,42 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.core; + +/** + * Session context for datafusion + */ +public class SessionContext implements AutoCloseable { + + // ptr to context in df + private final long ptr; + + /** + * Create a new DataFusion session context + * @return context ID for subsequent operations + */ + static native long createContext(); + + /** + * Close and cleanup a DataFusion context + * @param contextId the context ID to close + */ + public static native void closeContext(long contextId); + + /** + * Creates a new session context. + */ + public SessionContext() { + this.ptr = createContext(); + } + + @Override + public void close() throws Exception { + closeContext(this.ptr); + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/package-info.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/package-info.java new file mode 100644 index 0000000000000..2c6e72ef3a582 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/package-info.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * Core DataFusion runtime and session management classes. + * Provides runtime environment and session context management. + */ +package org.opensearch.datafusion.core; diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/package-info.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/package-info.java new file mode 100644 index 0000000000000..81017da49c16c --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/package-info.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * DataFusion query engine integration for OpenSearch. + * Provides the main plugin and service classes for DataFusion functionality. + */ +package org.opensearch.datafusion; diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionContext.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionContext.java new file mode 100644 index 0000000000000..8f7d4f914d64e --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionContext.java @@ -0,0 +1,843 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.search; + +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.CollectorManager; +import org.apache.lucene.search.FieldDoc; +import org.apache.lucene.search.Query; +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.action.search.SearchType; +import org.opensearch.common.unit.TimeValue; +import org.opensearch.common.util.BigArrays; +import org.opensearch.core.common.bytes.BytesReference; +import org.opensearch.index.IndexService; +import org.opensearch.index.cache.bitset.BitsetFilterCache; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.mapper.ObjectMapper; +import org.opensearch.index.query.ParsedQuery; +import org.opensearch.index.query.QueryShardContext; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.similarity.SimilarityService; +import org.opensearch.search.SearchExtBuilder; +import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.aggregations.BucketCollectorProcessor; +import org.opensearch.search.aggregations.InternalAggregation; +import org.opensearch.search.aggregations.SearchContextAggregations; +import org.opensearch.search.collapse.CollapseContext; +import org.opensearch.search.dfs.DfsSearchResult; +import org.opensearch.search.fetch.FetchPhase; +import org.opensearch.search.fetch.FetchSearchResult; +import org.opensearch.search.fetch.StoredFieldsContext; +import org.opensearch.search.fetch.subphase.FetchDocValuesContext; +import org.opensearch.search.fetch.subphase.FetchFieldsContext; +import org.opensearch.search.fetch.subphase.FetchSourceContext; +import org.opensearch.search.fetch.subphase.ScriptFieldsContext; +import org.opensearch.search.fetch.subphase.highlight.SearchHighlightContext; +import org.opensearch.search.internal.ContextIndexSearcher; +import org.opensearch.search.internal.ReaderContext; +import org.opensearch.search.internal.ScrollContext; +import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.internal.ShardSearchContextId; +import org.opensearch.search.internal.ShardSearchRequest; +import org.opensearch.datafusion.DatafusionEngine; +import org.opensearch.search.ContextEngineSearcher; +import org.opensearch.search.profile.Profilers; +import org.opensearch.search.query.QuerySearchResult; +import org.opensearch.search.query.ReduceableSearchResult; +import org.opensearch.search.rescore.RescoreContext; +import org.opensearch.search.sort.SortAndFormats; +import org.opensearch.search.suggest.SuggestionSearchContext; +import org.opensearch.vectorized.execution.search.spi.RecordBatchStream; + +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Search context for Datafusion engine + */ +public class DatafusionContext extends SearchContext { + private final ReaderContext readerContext; + private final ShardSearchRequest request; + private final SearchShardTask task; + private final DatafusionEngine readEngine; + private final DatafusionSearcher engineSearcher; + private final IndexShard indexShard; + private final QuerySearchResult queryResult; + private final FetchSearchResult fetchResult; + private final IndexService indexService; + private final QueryShardContext queryShardContext; + private DatafusionQuery datafusionQuery; + private Map dfResults; + + private List dfQueryResult; + private List dfFetchResult; // TODO: make this Map? + private SearchContextAggregations aggregations; + private final BigArrays bigArrays; + private final Map, CollectorManager> queryCollectorManagers = new HashMap<>(); + + /** + * Constructor + * @param readerContext The reader context + * @param request The shard search request + * @param task The search shard task + * @param engine The datafusion engine + */ + public DatafusionContext( + ReaderContext readerContext, + ShardSearchRequest request, + SearchShardTarget searchShardTarget, + SearchShardTask task, + DatafusionEngine engine, + BigArrays bigArrays) { + this.readerContext = readerContext; + this.indexShard = readerContext.indexShard(); + this.request = request; + this.task = task; + this.readEngine = engine; + this.engineSearcher = engine.acquireSearcher("search");//null;//TODO readerContext.contextEngineSearcher(); + this.queryResult = new QuerySearchResult(readerContext.id(), searchShardTarget, request); + this.fetchResult = new FetchSearchResult(readerContext.id(), searchShardTarget); + this.dfQueryResult = null; + this.dfFetchResult = null; + this.indexService = readerContext.indexService(); + this.queryShardContext = indexService.newQueryShardContext( + request.shardId().id(), + null, // TOOD : index searcher is null + request::nowInMillis, + searchShardTarget.getClusterAlias(), + false, // reevaluate the usage + false // specific to lucene + ); + this.bigArrays = bigArrays; + } + + /** + * Gets the read engine + * @return The datafusion engine + */ + public DatafusionEngine readEngine() { + return readEngine; + } + + /** + * Sets datafusion query + * @param datafusionQuery The datafusion query + */ + public DatafusionContext datafusionQuery(DatafusionQuery datafusionQuery) { + this.datafusionQuery = datafusionQuery; + return this; + } + + /** + * Sets datafusion query phase row ids + * @param dfQueryResult The datafusion query phase result + */ + public void setDfQueryPhaseResult(List dfQueryResult) { + this.dfQueryResult = dfQueryResult; + } + + /** + * Sets datafusion fetch phase row ids + * @param dfFetchResult The datafusion fetch phase result + */ + public void setDfFetchPhaseResult(List dfFetchResult) { + this.dfFetchResult = dfFetchResult; + } + + /** + * Gets the datafusion query + * @return The datafusion query + */ + public DatafusionQuery getDatafusionQuery() { + return datafusionQuery; + } + + /** + * Gets the engine searcher + * @return The datafusion searcher + */ + public DatafusionSearcher getEngineSearcher() { + return engineSearcher; + } + + /** + * {@inheritDoc} + * @param task The search shard task + */ + @Override + public void setTask(SearchShardTask task) { + + } + + @Override + public SearchShardTask getTask() { + return null; + } + + + /** + * Gets df query result. + * + * @return the df query result + */ + public List getDfQueryResult() { + return dfQueryResult; + } + + /** + * Gets df fetch result. + * + * @return the df fetch result + */ + public List getDfFetchResult() { + return dfFetchResult; + } + + @Override + public boolean isCancelled() { + return false; + } + + @Override + protected void doClose() { + + } + + /** + * {@inheritDoc} + * @param rewrite Whether to rewrite + */ + @Override + public void preProcess(boolean rewrite) { + + } + + /** + * {@inheritDoc} + * @param query The query + */ + @Override + public Query buildFilteredQuery(Query query) { + return null; + } + + @Override + public ShardSearchContextId id() { + return null; + } + + @Override + public String source() { + return ""; + } + + @Override + public ShardSearchRequest request() { + return request; + } + + @Override + public SearchType searchType() { + return null; + } + + @Override + public SearchShardTarget shardTarget() { + return null; + } + + @Override + public int numberOfShards() { + return 0; + } + + @Override + public float queryBoost() { + return 0; + } + + @Override + public ScrollContext scrollContext() { + return null; + } + + @Override + public SearchContextAggregations aggregations() { + return aggregations; + } + + /** + * {@inheritDoc} + * @param aggregations The search context aggregations + */ + @Override + public SearchContext aggregations(SearchContextAggregations aggregations) { + this.aggregations = aggregations; + return this; + } + + /** + * {@inheritDoc} + * @param searchExtBuilder The search extension builder + */ + @Override + public void addSearchExt(SearchExtBuilder searchExtBuilder) { + + } + + /** + * {@inheritDoc} + * @param name The name + */ + @Override + public SearchExtBuilder getSearchExt(String name) { + return null; + } + + @Override + public SearchHighlightContext highlight() { + return null; + } + + /** + * {@inheritDoc} + * @param highlight The search highlight context + */ + @Override + public void highlight(SearchHighlightContext highlight) { + + } + + @Override + public SuggestionSearchContext suggest() { + return null; + } + + /** + * {@inheritDoc} + * @param suggest The suggestion search context + */ + @Override + public void suggest(SuggestionSearchContext suggest) { + + } + + @Override + public List rescore() { + return List.of(); + } + + /** + * {@inheritDoc} + * @param rescore The rescore context + */ + @Override + public void addRescore(RescoreContext rescore) { + + } + + @Override + public boolean hasScriptFields() { + return false; + } + + @Override + public ScriptFieldsContext scriptFields() { + return null; + } + + @Override + public boolean sourceRequested() { + return false; + } + + @Override + public boolean hasFetchSourceContext() { + return false; + } + + @Override + public FetchSourceContext fetchSourceContext() { + return null; + } + + /** + * {@inheritDoc} + * @param fetchSourceContext The fetch source context + */ + @Override + public SearchContext fetchSourceContext(FetchSourceContext fetchSourceContext) { + return null; + } + + @Override + public FetchDocValuesContext docValuesContext() { + return null; + } + + /** + * {@inheritDoc} + * @param docValuesContext The fetch doc values context + */ + @Override + public SearchContext docValuesContext(FetchDocValuesContext docValuesContext) { + return null; + } + + @Override + public FetchFieldsContext fetchFieldsContext() { + return null; + } + + /** + * {@inheritDoc} + * @param fetchFieldsContext The fetch fields context + */ + @Override + public SearchContext fetchFieldsContext(FetchFieldsContext fetchFieldsContext) { + return null; + } + + @Override + public ContextIndexSearcher searcher() { + return null; + } + + @Override + public IndexShard indexShard() { + return this.indexShard; + } + + @Override + public MapperService mapperService() { + return indexService.mapperService(); + } + + @Override + public SimilarityService similarityService() { + return null; + } + + @Override + public BigArrays bigArrays() { + return bigArrays; + } + + @Override + public BitsetFilterCache bitsetFilterCache() { + return null; + } + + @Override + public TimeValue timeout() { + return null; + } + + /** + * {@inheritDoc} + * @param timeout The timeout value + */ + @Override + public void timeout(TimeValue timeout) { + + } + + @Override + public int terminateAfter() { + return 0; + } + + /** + * {@inheritDoc} + * @param terminateAfter The terminate after value + */ + @Override + public void terminateAfter(int terminateAfter) { + + } + + @Override + public boolean lowLevelCancellation() { + return false; + } + + /** + * {@inheritDoc} + * @param minimumScore The minimum score + */ + @Override + public SearchContext minimumScore(float minimumScore) { + return null; + } + + @Override + public Float minimumScore() { + return 0f; + } + + /** + * {@inheritDoc} + * @param sort The sort and formats + */ + @Override + public SearchContext sort(SortAndFormats sort) { + return null; + } + + @Override + public SortAndFormats sort() { + return null; + } + + /** + * {@inheritDoc} + * @param trackScores Whether to track scores + */ + @Override + public SearchContext trackScores(boolean trackScores) { + return null; + } + + @Override + public boolean trackScores() { + return false; + } + + /** + * {@inheritDoc} + * @param trackTotalHits The track total hits value + */ + @Override + public SearchContext trackTotalHitsUpTo(int trackTotalHits) { + return null; + } + + @Override + public int trackTotalHitsUpTo() { + return 0; + } + + @Override + /** + * {@inheritDoc} + * @param searchAfter The field doc for search after + */ + public SearchContext searchAfter(FieldDoc searchAfter) { + return null; + } + + @Override + public FieldDoc searchAfter() { + return null; + } + + @Override + /** + * {@inheritDoc} + * @param collapse The collapse context + */ + public SearchContext collapse(CollapseContext collapse) { + return null; + } + + @Override + public CollapseContext collapse() { + return null; + } + + @Override + /** + * {@inheritDoc} + * @param postFilter The parsed post filter query + */ + public SearchContext parsedPostFilter(ParsedQuery postFilter) { + return null; + } + + @Override + public ParsedQuery parsedPostFilter() { + return null; + } + + @Override + public Query aliasFilter() { + return null; + } + + @Override + /** + * {@inheritDoc} + * @param query The parsed query + */ + public SearchContext parsedQuery(ParsedQuery query) { + return null; + } + + @Override + public ParsedQuery parsedQuery() { + return null; + } + + // TODO : fix this + public Query query() { + // Extract query from request + return null; + } + + @Override + public int from() { + return 0; + } + + /** + * {@inheritDoc} + * @param from The from value + */ + @Override + public SearchContext from(int from) { + return null; + } + + @Override + public int size() { + return 0; + } + + /** + * {@inheritDoc} + * @param size The size value + */ + @Override + public SearchContext size(int size) { + return null; + } + + @Override + public boolean hasStoredFields() { + return false; + } + + @Override + public boolean hasStoredFieldsContext() { + return false; + } + + @Override + public boolean storedFieldsRequested() { + return false; + } + + @Override + public StoredFieldsContext storedFieldsContext() { + return null; + } + + /** + * {@inheritDoc} + * @param storedFieldsContext The stored fields context + */ + @Override + public SearchContext storedFieldsContext(StoredFieldsContext storedFieldsContext) { + return null; + } + + @Override + public boolean explain() { + return false; + } + + /** + * {@inheritDoc} + * @param explain Whether to explain + */ + @Override + public void explain(boolean explain) { + + } + + @Override + public List groupStats() { + return List.of(); + } + + /** + * {@inheritDoc} + * @param groupStats The group stats + */ + @Override + public void groupStats(List groupStats) { + + } + + @Override + public boolean version() { + return false; + } + + /** + * {@inheritDoc} + * @param version Whether to include version + */ + @Override + public void version(boolean version) { + + } + + @Override + public boolean seqNoAndPrimaryTerm() { + return false; + } + + /** + * {@inheritDoc} + * @param seqNoAndPrimaryTerm Whether to include sequence number and primary term + */ + @Override + public void seqNoAndPrimaryTerm(boolean seqNoAndPrimaryTerm) { + + } + + @Override + public int[] docIdsToLoad() { + return new int[0]; + } + + @Override + public int docIdsToLoadFrom() { + return 0; + } + + @Override + public int docIdsToLoadSize() { + return 0; + } + + /** + * {@inheritDoc} + * @param docIdsToLoad The document IDs to load + * @param docsIdsToLoadFrom The starting index for document IDs to load + * @param docsIdsToLoadSize The size of document IDs to load + */ + @Override + public SearchContext docIdsToLoad(int[] docIdsToLoad, int docsIdsToLoadFrom, int docsIdsToLoadSize) { + return null; + } + + @Override + public DfsSearchResult dfsResult() { + return null; + } + + @Override + public QuerySearchResult queryResult() { + return this.queryResult; + } + + @Override + public FetchPhase fetchPhase() { + return null; + } + + @Override + public FetchSearchResult fetchResult() { + return this.fetchResult; + } + + @Override + public Profilers getProfilers() { + return null; + } + + /** + * {@inheritDoc} + * @param name The field name + */ + @Override + public MappedFieldType fieldType(String name) { + return null; + } + + /** + * {@inheritDoc} + * @param name The object mapper name + */ + @Override + public ObjectMapper getObjectMapper(String name) { + return null; + } + + @Override + public long getRelativeTimeInMillis() { + return 0; + } + + @Override + public Map, CollectorManager> queryCollectorManagers() { + return queryCollectorManagers; + } + + @Override + public QueryShardContext getQueryShardContext() { + return queryShardContext; + } + + @Override + public ReaderContext readerContext() { + return null; + } + + @Override + public InternalAggregation.ReduceContext partialOnShard() { + return null; + } + + /** + * {@inheritDoc} + * @param bucketCollectorProcessor The bucket collector processor + */ + @Override + public void setBucketCollectorProcessor(BucketCollectorProcessor bucketCollectorProcessor) { + + } + + @Override + public BucketCollectorProcessor bucketCollectorProcessor() { + return null; + } + + @Override + public int getTargetMaxSliceCount() { + return 0; + } + + @Override + public boolean shouldUseTimeSeriesDescSortOptimization() { + return false; + } + + /** + * Gets the context engine searcher + * @return The context engine searcher + */ + public ContextEngineSearcher contextEngineSearcher() { + return new ContextEngineSearcher<>(this.engineSearcher, this); + } + + public void setDFResults(Map dfResults) { + this.dfResults = dfResults; + } + + public Map getDFResults() { + return dfResults; + } + +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQuery.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQuery.java new file mode 100644 index 0000000000000..f34935aa1a205 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQuery.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.search; + +import java.util.Iterator; +import java.util.List; + +public class DatafusionQuery { + private final byte[] substraitBytes; + + // List of Search executors which returns a result iterator which contains row id which can be joined in datafusion + private final List searchExecutors; + private Boolean isFetchPhase; + private List queryPhaseRowIds; + private List projections; + + public DatafusionQuery(byte[] substraitBytes, List searchExecutors) { + this.substraitBytes = substraitBytes; + this.searchExecutors = searchExecutors; + this.isFetchPhase = false; + } + + public void setProjections(List projections) { + this.projections = projections; + } + + public void setFetchPhaseContext(List queryPhaseRowIds) { + this.queryPhaseRowIds = queryPhaseRowIds; + this.isFetchPhase = true; + } + + public boolean isFetchPhase() { + return this.isFetchPhase; + } + + public List getQueryPhaseRowIds() { + return this.queryPhaseRowIds; + } + + public List getProjections() { + return this.projections; + } + + public byte[] getSubstraitBytes() { + return substraitBytes; + } + + public List getSearchExecutors() { + return searchExecutors; + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQueryPhaseExecutor.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQueryPhaseExecutor.java new file mode 100644 index 0000000000000..8de7c7e397715 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQueryPhaseExecutor.java @@ -0,0 +1,49 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.search; + +import org.opensearch.index.engine.EngineSearcher; +import org.opensearch.search.query.QueryPhaseExecutor; +import org.opensearch.search.query.QueryPhaseExecutionException; +import org.opensearch.datafusion.search.DatafusionContext; +import org.opensearch.datafusion.search.DatafusionQuery; +import org.opensearch.search.ContextEngineSearcher; +import org.opensearch.search.query.GenericQueryPhase; +import org.opensearch.search.query.GenericQueryPhaseSearcher; +import org.opensearch.vectorized.execution.search.spi.RecordBatchStream; + +/** + * Query phase executor for Datafusion engine + */ +public class DatafusionQueryPhaseExecutor implements QueryPhaseExecutor { + + @Override + public boolean execute(DatafusionContext context) throws QueryPhaseExecutionException { + if (!canHandle(context)) { + // TODO : throw new QueryPhaseExecutionException("Cannot handle datafusion context"); + } + + GenericQueryPhaseSearcher searcher = + context.readEngine().getQueryPhaseSearcher(); + + GenericQueryPhase queryPhase = + new GenericQueryPhase<>(searcher); + + DatafusionQuery query = context.getDatafusionQuery(); + // TODO : rework interfaces as context itself has many objects + return queryPhase.executeInternal(context, context.getEngineSearcher(), query); + } + + @Override + public boolean canHandle(DatafusionContext context) { + return context != null && + context.readEngine() != null && + context.query() != null; + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReader.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReader.java new file mode 100644 index 0000000000000..ec01a01b57720 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReader.java @@ -0,0 +1,102 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.search; + +import org.opensearch.datafusion.DataFusionQueryJNI; +import org.opensearch.index.engine.exec.WriterFileSet; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.concurrent.atomic.AtomicInteger; + +import static org.opensearch.datafusion.DataFusionQueryJNI.closeDatafusionReader; + +/** + * DataFusion reader for JNI operations. + */ +public class DatafusionReader implements Closeable { + /** + * The directory path. + */ + public String directoryPath; + /** + * The file metadata collection. + */ + public Collection files; + /** + * The cache pointer. + */ + public long cachePtr; + private AtomicInteger refCount = new AtomicInteger(0); + + /** + * Constructor + * @param directoryPath The directory path + * @param files The file metadata collection + */ + public DatafusionReader(String directoryPath, Collection files) { + this.directoryPath = directoryPath; + this.files = files; + String[] fileNames = new String[0]; + if(files != null) { + System.out.println("Got the files!!!!!"); + fileNames = files.stream() + .flatMap(writerFileSet -> writerFileSet.getFiles().stream()) + .toArray(String[]::new); + } + System.out.println("File names: " + Arrays.toString(fileNames)); + System.out.println("Directory path: " + directoryPath); + + this.cachePtr = DataFusionQueryJNI.createDatafusionReader(directoryPath, fileNames); + incRef(); + } + + /** + * Gets the cache pointer. + * @return the cache pointer + */ + public long getCachePtr() { + return cachePtr; + } + + /** + * Increments the reference count. + */ + public void incRef() { + refCount.getAndIncrement(); + } + + /** + * Decrements the reference count. + * @throws IOException if an I/O error occurs + */ + public void decRef() throws IOException { + if(refCount.get() == 0) { + throw new IllegalStateException("Listing table has been already closed"); + } + + int currRefCount = refCount.decrementAndGet(); + if(currRefCount == 0) { + this.close(); + } + + } + + @Override + public void close() throws IOException { + if(cachePtr == -1L) { + throw new IllegalStateException("Listing table has been already closed"); + } + +// closeDatafusionReader(this.cachePtr); + this.cachePtr = -1; + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReaderManager.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReaderManager.java new file mode 100644 index 0000000000000..ba14055170dad --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReaderManager.java @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.search; + +import org.opensearch.index.engine.CatalogSnapshotAwareRefreshListener; +import org.opensearch.index.engine.EngineReaderManager; +import org.opensearch.index.engine.exec.FileMetadata; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.engine.exec.coord.CatalogSnapshot; + +import java.io.IOException; +import java.net.URI; +import java.nio.file.Path; +import java.util.Collection; +import java.util.List; + +public class DatafusionReaderManager implements EngineReaderManager, CatalogSnapshotAwareRefreshListener { + private DatafusionReader current; + private String path; + private String dataFormat; +// private final Lock refreshLock = new ReentrantLock(); +// private final List refreshListeners = new CopyOnWriteArrayList(); + + public DatafusionReaderManager(String path, Collection files, String dataFormat) throws IOException { + WriterFileSet writerFileSet = new WriterFileSet(Path.of(URI.create("file:///" + path)), 1); + files.forEach(fileMetadata -> writerFileSet.add(fileMetadata.file())); + this.current = new DatafusionReader(path, List.of(writerFileSet));; + this.path = path; + this.dataFormat = dataFormat; + } + + @Override + public DatafusionReader acquire() throws IOException { + if (current == null) { + throw new RuntimeException("Invalid state for datafusion reader"); + } + current.incRef(); + return current; + } + + @Override + public void release(DatafusionReader reference) throws IOException { + assert reference != null : "Shard view can't be null"; + reference.decRef(); + } + + + @Override + public void beforeRefresh() throws IOException { + // no op + } + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException { + if (didRefresh && catalogSnapshot != null) { + DatafusionReader old = this.current; + if(old !=null) { + release(old); + } + this.current = new DatafusionReader(this.path, catalogSnapshot.getSearchableFiles(dataFormat)); + this.current.incRef(); + } + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcher.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcher.java new file mode 100644 index 0000000000000..49f05321620a8 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcher.java @@ -0,0 +1,86 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.search; + +import org.apache.lucene.store.AlreadyClosedException; +import org.opensearch.datafusion.DataFusionQueryJNI; +import org.opensearch.datafusion.core.DefaultRecordBatchStream; +import org.opensearch.index.engine.EngineSearcher; +import org.opensearch.search.aggregations.SearchResultsCollector; +import org.opensearch.vectorized.execution.search.spi.RecordBatchStream; + +import java.io.Closeable; +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Arrays; +import java.util.List; +import java.util.Objects; + +public class DatafusionSearcher implements EngineSearcher { + private final String source; + private DatafusionReader reader; + private Closeable closeable; + public DatafusionSearcher(String source, DatafusionReader reader, Closeable close) { + this.source = source; + this.reader = reader; + } + + @Override + public String source() { + return source; + } + + @Override + public void search(DatafusionQuery datafusionQuery, List> collectors) throws IOException { + // TODO : call search here to native + // TODO : change RunTimePtr + long nativeStreamPtr = DataFusionQueryJNI.executeQueryPhase(reader.getCachePtr(), datafusionQuery.getSubstraitBytes(), 0); + RecordBatchStream stream = new DefaultRecordBatchStream(nativeStreamPtr); + while(stream.hasNext()) { + for(SearchResultsCollector collector : collectors) { + collector.collect(stream); + } + } + } + + @Override + public long search(DatafusionQuery datafusionQuery, Long contextPtr) { + if (datafusionQuery.isFetchPhase()) { + long[] row_ids = datafusionQuery.getQueryPhaseRowIds() + .stream() + .mapToLong(Long::longValue) + .toArray(); + String[] projections = Objects.isNull(datafusionQuery.getProjections()) ? new String[]{} : datafusionQuery.getProjections().toArray(String[]::new); + + System.out.println("row_ids"); + System.out.println(Arrays.toString(row_ids)); + return DataFusionQueryJNI.executeFetchPhase(reader.getCachePtr(), row_ids, projections, contextPtr); + } + return DataFusionQueryJNI.executeQueryPhase(reader.getCachePtr(), datafusionQuery.getSubstraitBytes(), contextPtr); + } + + public DatafusionReader getReader() { + return reader; + } + + @Override + public void close() { + try { + if (closeable != null) { + closeable.close(); + } + } catch (IOException e) { + throw new UncheckedIOException("failed to close", e); + } catch (AlreadyClosedException e) { + // This means there's a bug somewhere: don't suppress it + throw new AssertionError(e); + } + + } +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcherSupplier.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcherSupplier.java new file mode 100644 index 0000000000000..6ff7526b0fdea --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcherSupplier.java @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.search; + +import org.apache.lucene.store.AlreadyClosedException; +import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineSearcherSupplier; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.function.Function; + +public abstract class DatafusionSearcherSupplier extends EngineSearcherSupplier { + + private final Function wrapper; + private final AtomicBoolean released = new AtomicBoolean(false); + + public DatafusionSearcherSupplier(Function wrapper) { + this.wrapper = wrapper; + } + + public final DatafusionSearcher acquireSearcher(String source) { + if (released.get()) { + throw new AlreadyClosedException("SearcherSupplier was closed"); + } + final DatafusionSearcher searcher = acquireSearcherInternal(source); + return searcher; + // TODO apply wrapper + } + + @Override + public final void close() { + if (released.compareAndSet(false, true)) { + doClose(); + } else { + assert false : "SearchSupplier was released twice"; + } + } + + protected abstract void doClose(); + + protected abstract DatafusionSearcher acquireSearcherInternal(String source); + +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchExecutor.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchExecutor.java new file mode 100644 index 0000000000000..ff3b5953c119e --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchExecutor.java @@ -0,0 +1,15 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.search; + +// Functional interface to execute search and get iterator +@FunctionalInterface +public interface SearchExecutor { + SearchResultIterator execute(); +} diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchResultIterator.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchResultIterator.java new file mode 100644 index 0000000000000..27fe2d54f76d9 --- /dev/null +++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchResultIterator.java @@ -0,0 +1,18 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion.search; + +import java.util.Iterator; + +// Interface for the iterator that Datafusion expects +public interface SearchResultIterator extends Iterator { + // Basic Iterator methods + boolean hasNext(); + Record next(); +} diff --git a/plugins/engine-datafusion/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec b/plugins/engine-datafusion/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec new file mode 100644 index 0000000000000..9b1ec055f7ea2 --- /dev/null +++ b/plugins/engine-datafusion/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec @@ -0,0 +1,5 @@ +# DataFusion Engine implementations +# Add your custom implementations here, e.g.: +# com.example.CustomCsvDataFusionEngine + +# Note: Built-in csv engine is now in separate library diff --git a/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/DataFusionServiceTests.java b/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/DataFusionServiceTests.java new file mode 100644 index 0000000000000..e2a285f2a36af --- /dev/null +++ b/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/DataFusionServiceTests.java @@ -0,0 +1,372 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion; + +import com.parquet.parquetdataformat.ParquetDataFormatPlugin; +import org.apache.arrow.memory.RootAllocator; +import org.apache.arrow.vector.*; +import org.opensearch.action.OriginalIndices; +import org.opensearch.action.search.SearchRequest; +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.common.UUIDs; +import org.opensearch.common.lease.Releasable; +import org.opensearch.common.lease.Releasables; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.Strings; +import org.opensearch.core.index.Index; +import org.opensearch.core.index.shard.ShardId; +import org.opensearch.datafusion.search.DatafusionContext; +import org.opensearch.datafusion.search.DatafusionQuery; +import org.opensearch.datafusion.search.DatafusionSearcher; +import org.opensearch.env.Environment; +import org.opensearch.index.IndexService; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.EngineSearcherSupplier; +import org.opensearch.index.engine.exec.FileMetadata; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.index.shard.SearchOperationListener; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.index.store.Store; +import org.opensearch.indices.replication.common.ReplicationType; +import org.opensearch.plugins.Plugin; +import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.aggregations.SearchResultsCollector; +import org.opensearch.search.internal.*; +import org.opensearch.tasks.Task; +import org.opensearch.test.IndexSettingsModule; +import org.opensearch.test.OpenSearchSingleNodeTestCase; +import org.junit.Before; + +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; +import org.opensearch.threadpool.TestThreadPool; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.vectorized.execution.search.DataFormat; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URISyntaxException; +import java.net.URL; +import java.nio.file.Path; +import java.util.*; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.atomic.AtomicLong; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; +import static org.opensearch.common.unit.TimeValue.timeValueMinutes; +import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder; + +import org.apache.arrow.vector.VectorSchemaRoot; +import org.apache.arrow.vector.types.pojo.Field; +/** + * Unit tests for DataFusionService + * + * Note: These tests require the native library to be available. + * They are disabled by default and can be enabled by setting the system property: + * -Dtest.native.enabled=true + */ +public class DataFusionServiceTests extends OpenSearchSingleNodeTestCase { + + private DataFusionService service; + + @Mock + private Environment mockEnvironment; + + @Before + public void setup() { + MockitoAnnotations.openMocks(this); + Settings mockSettings = Settings.builder().put("path.data", "/tmp/test-data").build(); + + when(mockEnvironment.settings()).thenReturn(mockSettings); + service = new DataFusionService(Map.of()); + service.doStart(); + } + + public void testGetVersion() { + String version = service.getVersion(); + assertNotNull(version); + assertTrue(version.contains("datafusion_version")); + assertTrue(version.contains("substrait_version")); + } + +// public void testCreateAndCloseContext() { +// // Create context +// SessionContext defaultContext = service.getDefaultContext(); +// assertNotNull(defaultContext); +// assertTrue(defaultContext.getContext() > 0); +// +// // Verify context exists +// SessionContext context = service.getContext(defaultContext.getContext()); +// assertNotNull(context); +// assertEquals(defaultContext.getContext(), context.getContext()); +// +// // Close context +// boolean closed = service.closeContext(defaultContext.getContext()); +// assertTrue(closed); +// +// // Verify context is gone +// assertNull(service.getContext(defaultContext.getContext())); +// } + + public void testQueryPhaseExecutor() throws IOException { + Map finalRes = new HashMap<>(); + DatafusionSearcher datafusionSearcher = null; + try { + URL resourceUrl = getClass().getClassLoader().getResource("data/"); + Index index = new Index("index-7", "index-7"); + final Path path = Path.of(resourceUrl.toURI()).resolve("index-7").resolve("0"); + ShardPath shardPath = new ShardPath(false, path, path, new ShardId(index, 0)); + DatafusionEngine engine = new DatafusionEngine(DataFormat.CSV, List.of(new FileMetadata(DataFormat.CSV.toString(), "generation-1.parquet")), service, shardPath); + datafusionSearcher = engine.acquireSearcher("search"); + + byte[] protoContent; + try (InputStream is = getClass().getResourceAsStream("/substrait_plan.pb")) { + protoContent = is.readAllBytes(); + } catch (IOException e) { + throw new RuntimeException(e); + } + + long streamPointer = datafusionSearcher.search(new DatafusionQuery(protoContent, new ArrayList<>()), service.getTokioRuntimePointer()); + RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); + RecordBatchStream stream = new RecordBatchStream(streamPointer, service.getTokioRuntimePointer() , allocator); + + // We can have some collectors passed like this which can collect the results and convert to InternalAggregation + // Is the possible? need to check + + SearchResultsCollector collector = new SearchResultsCollector() { + @Override + public void collect(RecordBatchStream value) { + VectorSchemaRoot root = value.getVectorSchemaRoot(); + for (Field field : root.getSchema().getFields()) { + String filedName = field.getName(); + FieldVector fieldVector = root.getVector(filedName); + Object[] fieldValues = new Object[fieldVector.getValueCount()]; + for (int i = 0; i < fieldVector.getValueCount(); i++) { + fieldValues[i] = fieldVector.getObject(i); + } + finalRes.put(filedName, fieldValues); + } + } + }; + + while (stream.loadNextBatch().join()) { + collector.collect(stream); + } + + logger.info("Final Results:"); + for (Map.Entry entry : finalRes.entrySet()) { + logger.info("{}: {}", entry.getKey(), java.util.Arrays.toString(entry.getValue())); + } + + } catch (Exception exception) { + logger.error("Failed to execute Substrait query plan", exception); + } + finally { + if(datafusionSearcher != null) { + datafusionSearcher.close(); + } + } + } + + public void testQueryThenFetchExecutor() throws IOException, URISyntaxException { + DatafusionSearcher datafusionSearcher = null; + try { + URL resourceUrl = getClass().getClassLoader().getResource("data/"); + Index index = new Index("index-7", "index-7"); + final Path path = Path.of(resourceUrl.toURI()).resolve("index-7").resolve("0"); + ShardPath shardPath = new ShardPath(false, path, path, new ShardId(index, 0)); + DatafusionEngine engine = new DatafusionEngine(DataFormat.CSV, List.of(new FileMetadata(DataFormat.CSV.toString(), "generation-1.parquet"), new FileMetadata(DataFormat.CSV.toString(), "generation-2.parquet")), service, shardPath); + datafusionSearcher = engine.acquireSearcher("Search"); + + byte[] protoContent; + try (InputStream is = getClass().getResourceAsStream("/substrait_plan.pb")) { + protoContent = is.readAllBytes(); + } catch (IOException e) { + throw new RuntimeException(e); + } + + DatafusionQuery query = new DatafusionQuery(protoContent, new ArrayList<>()); + long streamPointer = datafusionSearcher.search(query, service.getTokioRuntimePointer()); + RootAllocator allocator = new RootAllocator(Long.MAX_VALUE); + RecordBatchStream stream = new RecordBatchStream(streamPointer, service.getTokioRuntimePointer() , allocator); + + ArrayList row_ids_res = new ArrayList<>(); + + while (stream.loadNextBatch().join()) { + VectorSchemaRoot root = stream.getVectorSchemaRoot(); + for (Field field : root.getSchema().getFields()) { + String fieldName = field.getName(); + if (fieldName.equals("___row_id")) { + IntVector fieldVector = (IntVector) root.getVector(fieldName); + for(int i=0; i projections = List.of("target_ip"); + query.setProjections(projections); + query.setFetchPhaseContext(row_ids_res); + long fetchPhaseStreamPointer = datafusionSearcher.search(query, service.getTokioRuntimePointer()); + + RecordBatchStream fetchPhaseStream = new RecordBatchStream(fetchPhaseStreamPointer, service.getTokioRuntimePointer() , allocator); + int total_fetch_results = 0; + ArrayList fetch_row_ids_res = new ArrayList<>(); + + while(fetchPhaseStream.loadNextBatch().join()) { + VectorSchemaRoot root = fetchPhaseStream.getVectorSchemaRoot(); + assertEquals(projections.size(), root.getSchema().getFields().size()); + for (Field field : root.getSchema().getFields()) { + assertTrue("Field was not passed in projections list", projections.contains(field.getName())); + if(field.getName().equals("___row_id")) { + IntVector fieldVector = (IntVector) root.getVector(field.getName()); + for(int i=0; i> getPlugins() { + return pluginList(ParquetDataFormatPlugin.class); + } + + public void testQueryThenFetchE2ETest() throws IOException, URISyntaxException, InterruptedException, ExecutionException { + URL resourceUrl = getClass().getClassLoader().getResource("data/"); + Index index = new Index("index-7", "index-7"); + final Path path = Path.of(resourceUrl.toURI()).resolve("index-7").resolve("0"); + ShardPath shardPath = new ShardPath(false, path, path, new ShardId(index, 0)); + DatafusionEngine engine = new DatafusionEngine(DataFormat.CSV, List.of(new FileMetadata(DataFormat.CSV.toString(), "generation-1.parquet"), new FileMetadata(DataFormat.CSV.toString(), "generation-2.parquet")), service, shardPath); + + SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(true); + ShardSearchRequest shardSearchRequest = new ShardSearchRequest( + OriginalIndices.NONE, + searchRequest, + new ShardId(index, 0), + 1, + new AliasFilter(null, Strings.EMPTY_ARRAY), + 1.0f, + -1, + null, + null + ); + + IndexService indexService = createIndex("index-7", Settings.EMPTY, jsonBuilder().startObject() + .startObject("properties") + .startObject("target_status_code") + .field("type", "integer") + .endObject() + .endObject() + .endObject() + ); + ThreadPool threadPool = new TestThreadPool(this.getClass().getName()); + IndexShard indexShard = createIndexShard(shardPath.getShardId(), true); + when(indexShard.getThreadPool()).thenReturn(threadPool); + SearchOperationListener searchOperationListener = new SearchOperationListener() { + }; + when(indexShard.getSearchOperationListener()).thenReturn(searchOperationListener); + + EngineSearcherSupplier reader = indexShard.acquireSearcherSupplier(); + ReaderContext readerContext = createAndPutReaderContext(shardSearchRequest, indexService, indexShard, reader); + SearchShardTarget searchShardTarget = new SearchShardTarget("node_1", new ShardId("index-7", "index-7", 0), null, OriginalIndices.NONE); + SearchShardTask searchShardTask = new SearchShardTask(0, "n/a", "n/a", "test", null, Collections.singletonMap(Task.X_OPAQUE_ID, "my_id")); + DatafusionContext datafusionContext = new DatafusionContext(readerContext, shardSearchRequest, searchShardTarget, searchShardTask, engine, null); + + byte[] protoContent; + try (InputStream is = getClass().getResourceAsStream("/substrait_plan.pb")) { + protoContent = is.readAllBytes(); + } catch (IOException e) { + throw new RuntimeException(e); + } + + DatafusionQuery query = new DatafusionQuery(protoContent, new ArrayList<>()); + List projections = List.of("target_status_code"); + query.setProjections(projections); + + datafusionContext.datafusionQuery(query); + + engine.executeQueryPhase(datafusionContext); + engine.executeFetchPhase(datafusionContext); + + assertEquals(datafusionContext.getDfQueryResult().size(), datafusionContext.getDfFetchResult().size()); + } + + final AtomicLong idGenerator = new AtomicLong(); + + + final ReaderContext createAndPutReaderContext( + ShardSearchRequest request, + IndexService indexService, + IndexShard shard, + EngineSearcherSupplier reader + ) { + assert request.readerId() == null; + assert request.keepAlive() == null; + ReaderContext readerContext = null; + Releasable decreaseScrollContexts = null; + try { + + final long keepAlive = request.keepAlive() != null ? request.keepAlive().getMillis() : request.readerId() == null ? timeValueMinutes(5).getMillis() : -1; + + final ShardSearchContextId id = new ShardSearchContextId(UUIDs.randomBase64UUID(), idGenerator.incrementAndGet()); + + readerContext = new ReaderContext(id, indexService, shard, reader, keepAlive, request.keepAlive() == null); + reader = null; + final ReaderContext finalReaderContext = readerContext; + final SearchOperationListener searchOperationListener = shard.getSearchOperationListener(); + searchOperationListener.onNewReaderContext(finalReaderContext); + readerContext.addOnClose(() -> { + try { + if (finalReaderContext.scrollContext() != null) { + searchOperationListener.onFreeScrollContext(finalReaderContext); + } + } finally { + searchOperationListener.onFreeReaderContext(finalReaderContext); + } + }); + readerContext = null; + return finalReaderContext; + } finally { + Releasables.close(reader, readerContext, decreaseScrollContexts); + } + } + + static IndexShard createIndexShard(ShardId shardId, boolean remoteStoreEnabled) { + Settings settings = Settings.builder() + .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT) + .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, String.valueOf(remoteStoreEnabled)) + .build(); + IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test_index", settings); + Store store = mock(Store.class); + IndexShard indexShard = mock(IndexShard.class); + when(indexShard.indexSettings()).thenReturn(indexSettings); + when(indexShard.shardId()).thenReturn(shardId); + when(indexShard.store()).thenReturn(store); + return indexShard; + } +} diff --git a/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/TestDataFusionServiceTests.java b/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/TestDataFusionServiceTests.java new file mode 100644 index 0000000000000..395e2fae52e2f --- /dev/null +++ b/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/TestDataFusionServiceTests.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.datafusion; + +import org.opensearch.test.OpenSearchTestCase; + +import java.util.Collections; +import java.util.List; + +/** + * Unit tests for DataFusionService + * + * Note: These tests require the native library to be available. + * They are disabled by default and can be enabled by setting the system property: + * -Dtest.native.enabled=true + */ +public class TestDataFusionServiceTests extends OpenSearchTestCase { + + private DataFusionService service; + + @Override + public void setUp() throws Exception { + super.setUp(); + service = new DataFusionService(Collections.emptyMap()); + service.doStart(); + } + + public void testGetVersion() { + String version = service.getVersion(); + assertNotNull(version); + // The service returns codec information in JSON format + assertTrue("Version should contain codecs", version.contains("codecs")); + assertTrue("Version should contain CsvDataSourceCodec", version.contains("CsvDataSourceCodec")); + } + + public void testCreateAndCloseContext() { + service.registerDirectory("/Users/gbh/Documents", List.of("parquet-nested.csv")); + long contextId = service.createSessionContext().join(); + // Create context + assertTrue(contextId > 0); + + service.getVersion(); + } + + public void testCodecDiscovery() { + // Test that the CSV codec can be discovered via SPI + // TODO : test with dummy plugin and dummy codec + } +} diff --git a/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-1.parquet b/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-1.parquet new file mode 100644 index 0000000000000..695b6429ad7a4 Binary files /dev/null and b/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-1.parquet differ diff --git a/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-2.parquet b/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-2.parquet new file mode 100644 index 0000000000000..695b6429ad7a4 Binary files /dev/null and b/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-2.parquet differ diff --git a/plugins/engine-datafusion/src/test/resources/substrait_plan.pb b/plugins/engine-datafusion/src/test/resources/substrait_plan.pb new file mode 100644 index 0000000000000..80776758aa1eb Binary files /dev/null and b/plugins/engine-datafusion/src/test/resources/substrait_plan.pb differ diff --git a/server/build.gradle b/server/build.gradle index 69f3c59556f5b..917d44aec4664 100644 --- a/server/build.gradle +++ b/server/build.gradle @@ -72,11 +72,14 @@ dependencies { api project(":libs:opensearch-geo") api project(":libs:opensearch-telemetry") api project(":libs:opensearch-task-commons") + api project(':libs:opensearch-vectorized-exec-spi') compileOnly project(":libs:agent-sm:bootstrap") compileOnly project(':libs:opensearch-plugin-classloader') testRuntimeOnly project(':libs:opensearch-plugin-classloader') + implementation 'org.apache.commons:commons-lang3:3.17.0' + api libs.bundles.lucene // utilities @@ -115,6 +118,7 @@ dependencies { api libs.protobuf api libs.jakartaannotation + // https://mvnrepository.com/artifact/org.roaringbitmap/RoaringBitmap api libs.roaringbitmap testImplementation 'org.awaitility:awaitility:4.3.0' @@ -135,8 +139,7 @@ tasks.withType(JavaCompile).configureEach { } compileJava { - options.compilerArgs += ['-processor', ['org.apache.logging.log4j.core.config.plugins.processor.PluginProcessor', - 'org.opensearch.common.annotation.processor.ApiAnnotationProcessor'].join(',')] + options.compilerArgs += ['-processor', ['org.apache.logging.log4j.core.config.plugins.processor.PluginProcessor'].join(',')] } tasks.named("internalClusterTest").configure { diff --git a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java index 8cd6fb7ed5aa6..70e0002608fe2 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java @@ -732,7 +732,9 @@ public static final IndexShard newIndexShard( indexService.getRefreshMutex(), clusterService.getClusterApplierService(), MergedSegmentPublisher.EMPTY, - ReferencedSegmentsPublisher.EMPTY + ReferencedSegmentsPublisher.EMPTY, + null, + null ); } diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/info/PluginsAndModules.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/info/PluginsAndModules.java index 13f7211d48e9a..5412aa00fe49a 100644 --- a/server/src/main/java/org/opensearch/action/admin/cluster/node/info/PluginsAndModules.java +++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/info/PluginsAndModules.java @@ -32,6 +32,7 @@ package org.opensearch.action.admin.cluster.node.info; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.service.ReportingService; @@ -49,6 +50,7 @@ * * @opensearch.internal */ +@ExperimentalApi // TODO : this cannot be experimental, just marking it to bypass for now public class PluginsAndModules implements ReportingService.Info { private final List plugins; private final List modules; diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequest.java b/server/src/main/java/org/opensearch/action/search/SearchRequest.java index 4a4a309b45a2e..e2cc921ba9a1c 100644 --- a/server/src/main/java/org/opensearch/action/search/SearchRequest.java +++ b/server/src/main/java/org/opensearch/action/search/SearchRequest.java @@ -713,6 +713,18 @@ public String pipeline() { return pipeline; } + public SearchRequest queryPlanIR(byte[] queryPlanIR) { + if (this.source == null) { + this.source = new SearchSourceBuilder(); + } + this.source.queryPlanIR(queryPlanIR); + return this; + } + + public byte[] queryPlanIR() { + return this.source != null ? this.source.queryPlanIR() : null; + } + @Override public SearchTask createTask(long id, String type, String action, TaskId parentTaskId, Map headers) { return new SearchTask(id, type, action, this::buildDescription, parentTaskId, headers, cancelAfterTimeInterval); diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index 7a8eee076fa37..f715dd13cd25f 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -90,6 +90,8 @@ import org.opensearch.indices.recovery.RecoverySettings; import org.opensearch.indices.recovery.RecoveryState; import org.opensearch.plugins.IndexStorePlugin; +import org.opensearch.plugins.PluginsService; +import org.opensearch.plugins.SearchEnginePlugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; @@ -493,6 +495,23 @@ public void addSimilarity(String name, TriFunction */ + /** + * indexModule.setReaderWrapper( + * indexService -> new SecurityFlsDlsIndexSearcherWrapper( + * indexService, + * settings, + * adminDns, + * cs, + * auditLog, + * ciol, + * evaluator, + * dlsFlsValve::getCurrentConfig, + * dlsFlsBaseContext + * ) + * ); + * Example reader wrapper used in security plugin + * @param indexReaderWrapperFactory + */ public void setReaderWrapper( Function> indexReaderWrapperFactory ) { @@ -668,7 +687,9 @@ public IndexService newIndexService( Supplier shardLevelRefreshEnabled, RecoverySettings recoverySettings, RemoteStoreSettings remoteStoreSettings, - Supplier clusterDefaultMaxMergeAtOnceSupplier + Supplier clusterDefaultMaxMergeAtOnceSupplier, + PluginsService pluginsService, + SearchEnginePlugin searchEnginePlugin ) throws IOException { return newIndexService( indexCreationContext, @@ -696,7 +717,9 @@ public IndexService newIndexService( remoteStoreSettings, (s) -> {}, shardId -> ReplicationStats.empty(), - clusterDefaultMaxMergeAtOnceSupplier + clusterDefaultMaxMergeAtOnceSupplier, + searchEnginePlugin, + pluginsService ); } @@ -726,7 +749,9 @@ public IndexService newIndexService( RemoteStoreSettings remoteStoreSettings, Consumer replicator, Function segmentReplicationStatsProvider, - Supplier clusterDefaultMaxMergeAtOnceSupplier + Supplier clusterDefaultMaxMergeAtOnceSupplier, + SearchEnginePlugin searchEnginePlugin, + PluginsService pluginsService ) throws IOException { final IndexEventListener eventListener = freeze(); Function> readerWrapperFactory = indexReaderWrapper @@ -798,7 +823,9 @@ public IndexService newIndexService( compositeIndexSettings, replicator, segmentReplicationStatsProvider, - clusterDefaultMaxMergeAtOnceSupplier + clusterDefaultMaxMergeAtOnceSupplier, + searchEnginePlugin, + pluginsService ); success = true; return indexService; diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 22441df923bf8..277daf2696b17 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -110,6 +110,8 @@ import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.plugins.IndexStorePlugin; +import org.opensearch.plugins.PluginsService; +import org.opensearch.plugins.SearchEnginePlugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; @@ -206,7 +208,9 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private final Object refreshMutex = new Object(); private volatile TimeValue refreshInterval; private volatile boolean shardLevelRefreshEnabled; + private final SearchEnginePlugin searchEnginePlugin; private final IndexStorePlugin.StoreFactory storeFactory; + private final PluginsService pluginsService; @InternalApi public IndexService( @@ -252,7 +256,9 @@ public IndexService( CompositeIndexSettings compositeIndexSettings, Consumer replicator, Function segmentReplicationStatsProvider, - Supplier clusterDefaultMaxMergeAtOnceSupplier + Supplier clusterDefaultMaxMergeAtOnceSupplier, + SearchEnginePlugin searchEnginePlugin, + PluginsService pluginsService ) { super(indexSettings); this.storeFactory = storeFactory; @@ -359,6 +365,8 @@ public IndexService( startIndexLevelRefreshTask(); } } + this.searchEnginePlugin = searchEnginePlugin; + this.pluginsService = pluginsService; } @InternalApi @@ -400,7 +408,9 @@ public IndexService( boolean shardLevelRefreshEnabled, RecoverySettings recoverySettings, RemoteStoreSettings remoteStoreSettings, - Supplier clusterDefaultMaxMergeAtOnce + Supplier clusterDefaultMaxMergeAtOnce, + SearchEnginePlugin searchEnginePlugin, + PluginsService pluginsService ) { this( indexSettings, @@ -445,7 +455,9 @@ public IndexService( null, s -> {}, (shardId) -> ReplicationStats.empty(), - clusterDefaultMaxMergeAtOnce + clusterDefaultMaxMergeAtOnce, + searchEnginePlugin, + pluginsService ); } @@ -794,7 +806,8 @@ protected void closeInternal() { refreshMutex, clusterService.getClusterApplierService(), this.indexSettings.isSegRepEnabledOrRemoteNode() ? mergedSegmentPublisher : null, - this.indexSettings.isSegRepEnabledOrRemoteNode() ? referencedSegmentsPublisher : null + this.indexSettings.isSegRepEnabledOrRemoteNode() ? referencedSegmentsPublisher : null, + pluginsService ); eventListener.indexShardStateChanged(indexShard, null, indexShard.state(), "shard created"); eventListener.afterIndexShardCreated(indexShard); diff --git a/server/src/main/java/org/opensearch/index/engine/CatalogSnapshotAwareRefreshListener.java b/server/src/main/java/org/opensearch/index/engine/CatalogSnapshotAwareRefreshListener.java new file mode 100644 index 0000000000000..11c0ce293eae9 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/CatalogSnapshotAwareRefreshListener.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.opensearch.index.engine.exec.coord.CatalogSnapshot; + +import java.io.IOException; + +public interface CatalogSnapshotAwareRefreshListener { + /** + * Called before refresh operation. + */ + void beforeRefresh() throws IOException; + + /** + * Called after refresh operation with catalog snapshot. + * @param didRefresh whether refresh actually occurred + * @param catalogSnapshot the current catalog snapshot with file information + */ + void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/DataFormatPlugin.java b/server/src/main/java/org/opensearch/index/engine/DataFormatPlugin.java new file mode 100644 index 0000000000000..2bb09a50dee52 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/DataFormatPlugin.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.IndexingExecutionEngine; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.shard.ShardPath; + +public interface DataFormatPlugin { + + IndexingExecutionEngine indexingEngine(MapperService mapperService, ShardPath shardPath); + + DataFormat getDataFormat(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/Engine.java b/server/src/main/java/org/opensearch/index/engine/Engine.java index 82d8871b73fba..c17927990df7f 100644 --- a/server/src/main/java/org/opensearch/index/engine/Engine.java +++ b/server/src/main/java/org/opensearch/index/engine/Engine.java @@ -79,6 +79,11 @@ import org.opensearch.core.common.unit.ByteSizeValue; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.VersionType; +import org.opensearch.index.engine.exec.bridge.CheckpointState; +import org.opensearch.index.engine.exec.bridge.Indexer; +import org.opensearch.index.engine.exec.bridge.IndexingThrottler; +import org.opensearch.index.engine.exec.bridge.StatsHolder; +import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; import org.opensearch.index.mapper.IdFieldMapper; import org.opensearch.index.mapper.Mapping; import org.opensearch.index.mapper.ParseContext.Document; @@ -130,7 +135,7 @@ * @opensearch.api */ @PublicApi(since = "1.0.0") -public abstract class Engine implements LifecycleAware, Closeable { +public abstract class Engine implements LifecycleAware, Closeable, Indexer, CheckpointState, StatsHolder, IndexingThrottler, SearcherOperations> { public static final String SYNC_COMMIT_ID = "sync_id"; // TODO: remove sync_id in 3.0 public static final String HISTORY_UUID_KEY = "history_uuid"; @@ -762,6 +767,7 @@ public SearcherSupplier acquireSearcherSupplier(Function wra SearcherSupplier reader = new SearcherSupplier(wrapper) { @Override public Searcher acquireSearcherInternal(String source) { + // TODO : this should return assert assertSearcherIsWarmedUp(source, scope); return new Searcher( source, @@ -828,9 +834,9 @@ public Searcher acquireSearcher(String source, SearcherScope scope, Function getReferenceManager(SearcherScope scope); + public abstract ReferenceManager getReferenceManager(SearcherScope scope); - boolean assertSearcherIsWarmedUp(String source, SearcherScope scope) { + public boolean assertSearcherIsWarmedUp(String source, SearcherScope scope) { return true; } @@ -1404,7 +1410,7 @@ default void onFailedEngine(String reason, @Nullable Exception e) {} * @opensearch.api */ @PublicApi(since = "1.0.0") - public abstract static class SearcherSupplier implements Releasable { + public abstract static class SearcherSupplier extends EngineSearcherSupplier { private final Function wrapper; private final AtomicBoolean released = new AtomicBoolean(false); @@ -1439,8 +1445,10 @@ public final void close() { * * @opensearch.api */ + @PublicApi(since = "1.0.0") - public static final class Searcher extends IndexSearcher implements Releasable { + public static final class Searcher extends IndexSearcher implements Releasable, EngineSearcher { + // TODO : this extends index searcher private final String source; private final Closeable onClose; @@ -1607,6 +1615,7 @@ public static class Index extends Operation { private final boolean isRetry; private final long ifSeqNo; private final long ifPrimaryTerm; + public CompositeDataFormatWriter.CompositeDocumentInput documentInput; public Index( Term uid, @@ -1633,6 +1642,7 @@ public Index( this.autoGeneratedIdTimestamp = autoGeneratedIdTimestamp; this.ifSeqNo = ifSeqNo; this.ifPrimaryTerm = ifPrimaryTerm; + this.documentInput = doc.getDocumentInput(); } public Index(Term uid, long primaryTerm, ParsedDocument doc) { diff --git a/server/src/main/java/org/opensearch/index/engine/EngineLucene.java b/server/src/main/java/org/opensearch/index/engine/EngineLucene.java new file mode 100644 index 0000000000000..f12f8cda0555e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/EngineLucene.java @@ -0,0 +1,57 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.search.ReferenceManager; +import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; + +import java.util.function.Function; + +// Dummy impl +public class EngineLucene implements SearcherOperations>{ + @Override + public EngineSearcherSupplier acquireSearcherSupplier(Function wrapper) throws EngineException { + return null; + } + + @Override + public EngineSearcherSupplier acquireSearcherSupplier(Function wrapper, Engine.SearcherScope scope) throws EngineException { + return null; + } + + @Override + public Engine.Searcher acquireSearcher(String source) throws EngineException { + return null; + } + + @Override + public Engine.Searcher acquireSearcher(String source, Engine.SearcherScope scope) throws EngineException { + return null; + } + + @Override + public Engine.Searcher acquireSearcher(String source, Engine.SearcherScope scope, Function wrapper) throws EngineException { + return null; + } + + @Override + public ReferenceManager getReferenceManager(Engine.SearcherScope scope) { + return null; + } + + @Override + public boolean assertSearcherIsWarmedUp(String source, Engine.SearcherScope scope) { + return false; + } + + @Override + public CatalogSnapshotAwareRefreshListener getRefreshListener(Engine.SearcherScope searcherScope) { + return null; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/EngineReaderManager.java b/server/src/main/java/org/opensearch/index/engine/EngineReaderManager.java new file mode 100644 index 0000000000000..992e835a5204d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/EngineReaderManager.java @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.search.ReferenceManager; + +import java.io.IOException; + +public interface EngineReaderManager { + T acquire() throws IOException; + + void release(T reader) throws IOException; + + default void addListener(ReferenceManager.RefreshListener listener) { + // no-op + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/EngineSearcher.java b/server/src/main/java/org/opensearch/index/engine/EngineSearcher.java new file mode 100644 index 0000000000000..7471fd3fbeb5f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/EngineSearcher.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.lease.Releasable; +import org.opensearch.search.aggregations.SearchResultsCollector; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; +import java.util.List; + +@ExperimentalApi +// TODO make this generic type +public interface EngineSearcher extends Releasable { + + /** + * The source that caused this searcher to be acquired. + */ + String source(); + + /** + * Search using substrait query plan bytes and call the result collectors + */ + default void search(Q query, List> collectors) throws IOException { + throw new UnsupportedOperationException(); + } + + default long search(Q query, Long runtimePtr) throws IOException { + throw new UnsupportedOperationException(); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/EngineSearcherSupplier.java b/server/src/main/java/org/opensearch/index/engine/EngineSearcherSupplier.java new file mode 100644 index 0000000000000..df66b5265ce9e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/EngineSearcherSupplier.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.store.AlreadyClosedException; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.lease.Releasable; + +import java.util.concurrent.atomic.AtomicBoolean; + +@ExperimentalApi +public abstract class EngineSearcherSupplier implements Releasable { + private final AtomicBoolean released = new AtomicBoolean(false); + + /** + * Acquire a searcher for the given source. + */ + public T acquireSearcher(String source) { + if (released.get()) { + throw new AlreadyClosedException("SearcherSupplier was closed"); + } + return acquireSearcherInternal(source); + } + + protected abstract T acquireSearcherInternal(String source); + + protected abstract void doClose(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java index fcc81335d4363..b291c32b8c985 100644 --- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java @@ -163,7 +163,7 @@ public class InternalEngine extends Engine { protected volatile long lastDeleteVersionPruneTimeMSec; protected final TranslogManager translogManager; - protected final IndexWriter indexWriter; + public final IndexWriter indexWriter; protected final LocalCheckpointTracker localCheckpointTracker; protected final AtomicLong maxUnsafeAutoIdTimestamp = new AtomicLong(-1); protected final SoftDeletesPolicy softDeletesPolicy; @@ -429,7 +429,8 @@ public CompletionStats completionStats(String... fieldNamePatterns) { * @opensearch.internal */ @SuppressForbidden(reason = "reference counting is required here") - private static final class ExternalReaderManager extends ReferenceManager { + private static final class + ExternalReaderManager extends ReferenceManager { private final BiConsumer refreshListener; private final OpenSearchReaderManager internalReaderManager; private boolean isWarmedUp; // guarded by refreshLock @@ -443,6 +444,13 @@ private static final class ExternalReaderManager extends ReferenceManager getReferenceManager(SearcherScope scope) { + public final ReferenceManager getReferenceManager(SearcherScope scope) { switch (scope) { case INTERNAL: return internalReaderManager; diff --git a/server/src/main/java/org/opensearch/index/engine/LuceneReaderManager.java b/server/src/main/java/org/opensearch/index/engine/LuceneReaderManager.java new file mode 100644 index 0000000000000..b3d2fe19b1b9d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/LuceneReaderManager.java @@ -0,0 +1,38 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.search.ReferenceManager; +import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; + +import java.io.IOException; + +public class LuceneReaderManager implements EngineReaderManager { + private final ReferenceManager referenceManager; + + public LuceneReaderManager(ReferenceManager referenceManager) { + this.referenceManager = referenceManager; + } + + + @Override + public OpenSearchDirectoryReader acquire() throws IOException { + return referenceManager.acquire(); + } + + @Override + public void release(OpenSearchDirectoryReader reader) throws IOException { + referenceManager.release(reader); + } + + @Override + public void addListener(ReferenceManager.RefreshListener listener) { + + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java index 1fab651078cc4..b97d9931d1139 100644 --- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java @@ -276,7 +276,7 @@ public GetResult get(Get get, BiFunction search } @Override - protected ReferenceManager getReferenceManager(SearcherScope scope) { + public ReferenceManager getReferenceManager(SearcherScope scope) { return readerManager; } diff --git a/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java b/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java index eba074e27f764..ad3cea6291eeb 100644 --- a/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java @@ -277,7 +277,7 @@ public GetResult get(Get get, BiFunction } @Override - protected ReferenceManager getReferenceManager(SearcherScope scope) { + public ReferenceManager getReferenceManager(SearcherScope scope) { return readerManager; } diff --git a/server/src/main/java/org/opensearch/index/engine/SearchExecEngine.java b/server/src/main/java/org/opensearch/index/engine/SearchExecEngine.java new file mode 100644 index 0000000000000..0edf34eed7663 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/SearchExecEngine.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.BigArrays; +import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.internal.ReaderContext; +import org.opensearch.search.internal.SearchContext; +import org.opensearch.search.internal.ShardSearchRequest; +import org.opensearch.search.query.GenericQueryPhaseSearcher; +import org.opensearch.search.query.QueryPhaseExecutor; + +import java.io.IOException; + +/** + * Generic read engine interface that provides searcher operations and query phase execution + * @param Context type for query execution + * @param Searcher type that extends EngineSearcher + * @param Reference manager type + * @param Query type + */ +@ExperimentalApi +// TODO too many templatized types +public abstract class SearchExecEngine, R, Q> implements SearcherOperations { + + /** + * Get the query phase searcher for this engine + */ + public abstract GenericQueryPhaseSearcher getQueryPhaseSearcher(); + + /** + * Get the query phase executor for this engine + */ + public abstract QueryPhaseExecutor getQueryPhaseExecutor(); + + /** + * Create a search context for this engine + */ + public abstract C createContext(ReaderContext readerContext, ShardSearchRequest request, SearchShardTarget searchShardTarget, SearchShardTask task, BigArrays bigArrays) throws IOException; + + /** + * execute Query Phase + */ + public abstract void executeQueryPhase(C context) throws IOException; + + /** + * execute Fetch Phase + */ + public abstract void executeFetchPhase(C context) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/SearchExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/SearchExecutionEngine.java new file mode 100644 index 0000000000000..1834e8cd1e82f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/SearchExecutionEngine.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Map; + +/** + * SearchExecutionEngine + * @opensearch.internal + */ +@ExperimentalApi +public interface SearchExecutionEngine { + /** + * execute + * @param queryPlanIR + * @return + */ + Map execute(byte[] queryPlanIR); +} diff --git a/server/src/main/java/org/opensearch/index/engine/SearcherOperations.java b/server/src/main/java/org/opensearch/index/engine/SearcherOperations.java new file mode 100644 index 0000000000000..32b2d882401fb --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/SearcherOperations.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.lucene.search.ReferenceManager; +import org.opensearch.common.lucene.index.OpenSearchDirectoryReader; + +import java.util.function.Function; + +public interface SearcherOperations { + /** + * Acquires a point-in-time reader that can be used to create {@link Engine.Searcher}s on demand. + */ + EngineSearcherSupplier acquireSearcherSupplier(Function wrapper) throws EngineException; + /** + * Acquires a point-in-time reader that can be used to create {@link Engine.Searcher}s on demand. + */ + EngineSearcherSupplier acquireSearcherSupplier(Function wrapper, Engine.SearcherScope scope) throws EngineException; + + S acquireSearcher(String source) throws EngineException; + + S acquireSearcher(String source, Engine.SearcherScope scope) throws EngineException; + + S acquireSearcher(String source, Engine.SearcherScope scope, Function wrapper) throws EngineException; + + R getReferenceManager(Engine.SearcherScope scope); + + boolean assertSearcherIsWarmedUp(String source, Engine.SearcherScope scope); + + default CatalogSnapshotAwareRefreshListener getRefreshListener(Engine.SearcherScope searcherScope) { + // default is no-op, TODO : revisit this + return null; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java new file mode 100644 index 0000000000000..ef1ad24992256 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java @@ -0,0 +1,51 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.engine.exec.text.TextDF; + +@ExperimentalApi +public interface DataFormat { + Setting dataFormatSettings(); + + Setting clusterLeveldataFormatSettings(); + + String name(); + + void configureStore(); + + static class LuceneDataFormat implements DataFormat { + @Override + public Setting dataFormatSettings() { + return null; + } + + @Override + public Setting clusterLeveldataFormatSettings() { + return null; + } + + @Override + public String name() { + return ""; + } + + @Override + public void configureStore() { + + } + } + + DataFormat LUCENE = new LuceneDataFormat(); + + DataFormat TEXT = new TextDF(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java new file mode 100644 index 0000000000000..0f24ca036741d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java @@ -0,0 +1,23 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.mapper.MappedFieldType; + +import java.io.IOException; +@ExperimentalApi +public interface DocumentInput extends AutoCloseable { + + void addField(MappedFieldType fieldType, Object value); + + T getFinalInput(); + + WriteResult addToWriter() throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FileInfos.java b/server/src/main/java/org/opensearch/index/engine/exec/FileInfos.java new file mode 100644 index 0000000000000..436df520fd67b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FileInfos.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; +import java.util.Optional; + +public final class FileInfos { + + private final Map writerFilesMap; + + public FileInfos() { + this.writerFilesMap = new HashMap<>(); + } + + public Map getWriterFilesMap() { + return Collections.unmodifiableMap(writerFilesMap); + } + + public void putWriterFileSet(DataFormat format, WriterFileSet writerFileSet) { + writerFilesMap.put(format, writerFileSet); + } + + public Optional getWriterFileSet(DataFormat format) { + return Optional.ofNullable(writerFilesMap.get(format)); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java b/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java new file mode 100644 index 0000000000000..41efd124fa437 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +public record FileMetadata(String directory, String file) { +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FlushIn.java b/server/src/main/java/org/opensearch/index/engine/exec/FlushIn.java new file mode 100644 index 0000000000000..5d119a575d1aa --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FlushIn.java @@ -0,0 +1,13 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +public interface FlushIn { + +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java new file mode 100644 index 0000000000000..2c3f63fcf0da2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.index.mapper.MappedFieldType; + +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +public interface IndexingExecutionEngine { + + List supportedFieldTypes(); + + Writer> createWriter(long writerGeneration) + throws IOException; // A writer responsible for data format vended by this engine. + + RefreshResult refresh(RefreshInput refreshInput) throws IOException; + + DataFormat getDataFormat(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/RefreshInput.java b/server/src/main/java/org/opensearch/index/engine/exec/RefreshInput.java new file mode 100644 index 0000000000000..135df6f0855fa --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/RefreshInput.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import java.util.ArrayList; +import java.util.List; + +public class RefreshInput { + + private final List writerFiles; + + public RefreshInput() { + this.writerFiles = new ArrayList<>(); + } + + public void add(WriterFileSet writerFileSetGroup) { + this.writerFiles.add(writerFileSetGroup); + } + + public List getWriterFiles() { + return writerFiles; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/RefreshResult.java b/server/src/main/java/org/opensearch/index/engine/exec/RefreshResult.java new file mode 100644 index 0000000000000..8357529d7acc7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/RefreshResult.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class RefreshResult { + + private final Map> refreshedFiles; + + public RefreshResult() { + this.refreshedFiles = new HashMap<>(); + } + + public void add(DataFormat df, List writerFiles) { + writerFiles.forEach(writerFileSet -> refreshedFiles.computeIfAbsent(df, dataFormat -> new ArrayList<>()).add(writerFileSet)); + } + + public List getRefreshedFiles(DataFormat dataFormat) { + return Collections.unmodifiableList(refreshedFiles.get(dataFormat)); + } + + public Map> getRefreshedFiles() { + return Map.copyOf(refreshedFiles); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/Reportable.java b/server/src/main/java/org/opensearch/index/engine/exec/Reportable.java new file mode 100644 index 0000000000000..620539c877c76 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/Reportable.java @@ -0,0 +1,14 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +public interface Reportable { + + long ramBytesUsed(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/WriteResult.java b/server/src/main/java/org/opensearch/index/engine/exec/WriteResult.java new file mode 100644 index 0000000000000..666576e85cd0f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/WriteResult.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +public record WriteResult(boolean success, Exception e, long version, long term, long seqNo) { +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/Writer.java b/server/src/main/java/org/opensearch/index/engine/exec/Writer.java new file mode 100644 index 0000000000000..d0ad4d35b3fc2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/Writer.java @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import java.io.IOException; + +public interface Writer

> { + + WriteResult addDoc(P d) throws IOException; + + FileInfos flush(FlushIn flushIn) throws IOException; + + void sync() throws IOException; + + void close(); + + P newDocumentInput(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java new file mode 100644 index 0000000000000..9ab00b4753d74 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import java.io.Serializable; +import java.nio.file.Path; +import java.util.HashSet; +import java.util.Set; + +public class WriterFileSet implements Serializable { + + private final String directory; + private final long writerGeneration; + private final Set files; + + public WriterFileSet(Path directory, long writerGeneration) { + this.files = new HashSet<>(); + this.writerGeneration = writerGeneration; + this.directory = directory.toString(); + } + + public void add(String file) { + this.files.add(file); + } + + public Set getFiles() { + return files; + } + + public String getDirectory() { + return directory; + } + + public long getWriterGeneration() { + return writerGeneration; + } + + @Override + public String toString() { + return "WriterFileSet{" + + "directory=" + directory + + ", writerGeneration=" + writerGeneration + + ", files=" + files + + '}'; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/bridge/CheckpointState.java b/server/src/main/java/org/opensearch/index/engine/exec/bridge/CheckpointState.java new file mode 100644 index 0000000000000..52784d834d837 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/bridge/CheckpointState.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.bridge; + +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.index.seqno.SeqNoStats; + +@PublicApi(since = "1.0.0") +public interface CheckpointState { + + /** + * @return the persisted local checkpoint for this Engine + */ + long getPersistedLocalCheckpoint(); + + /** + * @return the latest checkpoint that has been processed but not necessarily persisted. + * Also see {@link #getPersistedLocalCheckpoint()} + */ + long getProcessedLocalCheckpoint(); + + /** + * @return a {@link SeqNoStats} object, using local state and the supplied global checkpoint + */ + SeqNoStats getSeqNoStats(long globalCheckpoint); + + /** + * Returns the latest global checkpoint value that has been persisted in the underlying storage (i.e. translog's checkpoint) + */ + long getLastSyncedGlobalCheckpoint(); + + long getMinRetainedSeqNo(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/bridge/Indexer.java b/server/src/main/java/org/opensearch/index/engine/exec/bridge/Indexer.java new file mode 100644 index 0000000000000..39f8929fe703c --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/bridge/Indexer.java @@ -0,0 +1,94 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.bridge; + +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineException; +import org.opensearch.index.engine.SafeCommitInfo; +import org.opensearch.index.engine.Segment; +import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogManager; + +import java.io.Closeable; +import java.io.IOException; +import java.util.List; + +@PublicApi(since = "1.0.0") +public interface Indexer { + + Engine.IndexResult index(Engine.Index index) throws IOException; + + Engine.DeleteResult delete(Engine.Delete delete) throws IOException; + + Engine.NoOpResult noOp(Engine.NoOp noOp) throws IOException; + + /** + * Counts the number of history operations in the given sequence number range + * @param source source of the request + * @param fromSeqNo from sequence number; included + * @param toSeqNumber to sequence number; included + * @return number of history operations + */ + int countNumberOfHistoryOperations(String source, long fromSeqNo, long toSeqNumber) throws IOException; + + boolean hasCompleteOperationHistory(String reason, long startingSeqNo); + + long getIndexBufferRAMBytesUsed(); + + List segments(boolean verbose); + + /** + * Returns the maximum auto_id_timestamp of all append-only index requests have been processed by this engine + * or the auto_id_timestamp received from its primary shard via {@link #updateMaxUnsafeAutoIdTimestamp(long)}. + * Notes this method returns the auto_id_timestamp of all append-only requests, not max_unsafe_auto_id_timestamp. + */ + long getMaxSeenAutoIdTimestamp(); + + /** + * Forces this engine to advance its max_unsafe_auto_id_timestamp marker to at least the given timestamp. + * The engine will disable optimization for all append-only whose timestamp at most {@code newTimestamp}. + */ + void updateMaxUnsafeAutoIdTimestamp(long newTimestamp); + + int fillSeqNoGaps(long primaryTerm) throws IOException; + + // File format methods follow below + void forceMerge( + boolean flush, + int maxNumSegments, + boolean onlyExpungeDeletes, + boolean upgrade, + boolean upgradeOnlyAncientSegments, + String forceMergeUUID + ) throws EngineException, IOException; + + void writeIndexingBuffer() throws EngineException; + + void refresh(String source) throws EngineException; + + void flush(boolean force, boolean waitIfOngoing) throws EngineException; + + SafeCommitInfo getSafeCommitInfo(); + + // Translog methods follow below + TranslogManager translogManager(); + + Closeable acquireHistoryRetentionLock(); + + Translog.Snapshot newChangesSnapshot( + String source, + long fromSeqNo, + long toSeqNo, + boolean requiredFullRange, + boolean accurateCount + ) throws IOException; + + String getHistoryUUID(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/bridge/IndexingThrottler.java b/server/src/main/java/org/opensearch/index/engine/exec/bridge/IndexingThrottler.java new file mode 100644 index 0000000000000..050dc07d1011b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/bridge/IndexingThrottler.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.bridge; + +import org.opensearch.common.annotation.PublicApi; + +@PublicApi(since = "1.0.0") +public interface IndexingThrottler { + + /** + * Returns the number of milliseconds this engine was under index throttling. + */ + long getIndexThrottleTimeInMillis(); + + /** + * Returns the true iff this engine is currently under index throttling. + * @see #getIndexThrottleTimeInMillis() + */ + boolean isThrottled(); + + /** + * Request that this engine throttle incoming indexing requests to one thread. + * Must be matched by a later call to {@link #deactivateThrottling()}. + */ + void activateThrottling(); + + /** + * Reverses a previous {@link #activateThrottling} call. + */ + void deactivateThrottling(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/bridge/StatsHolder.java b/server/src/main/java/org/opensearch/index/engine/exec/bridge/StatsHolder.java new file mode 100644 index 0000000000000..27d0c099aaa53 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/bridge/StatsHolder.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.bridge; + +import org.opensearch.common.annotation.PublicApi; +import org.opensearch.index.engine.CommitStats; +import org.opensearch.index.engine.SegmentsStats; +import org.opensearch.index.merge.MergeStats; +import org.opensearch.index.shard.DocsStats; +import org.opensearch.indices.pollingingest.PollingIngestStats; +import org.opensearch.search.suggest.completion.CompletionStats; + +@PublicApi(since = "1.0.0") +public interface StatsHolder { + + CommitStats commitStats(); + + DocsStats docStats(); + + SegmentsStats segmentsStats(boolean includeSegmentFileSizes, boolean includeUnloadedSegments); + + CompletionStats completionStats(String... fieldNamePatterns); + + PollingIngestStats pollingIngestStats(); + + MergeStats getMergeStats(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/CommitPoint.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/CommitPoint.java new file mode 100644 index 0000000000000..b3791660206d2 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/CommitPoint.java @@ -0,0 +1,96 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.commit; + +import java.nio.file.Path; +import java.util.Collection; +import java.util.Map; + +public final class CommitPoint { + + private final String commitFileName; + private final long generation; + private final Collection fileNames; + private final Path directory; + private final Map commitData; + + private CommitPoint(Builder builder) { + this.commitFileName = builder.commitFileName; + this.generation = builder.generation; + this.fileNames = builder.fileNames; + this.directory = builder.directory; + this.commitData = builder.commitData; + } + + public String getCommitFileName() { + return commitFileName; + } + + public long getGeneration() { + return generation; + } + + public Collection getFileNames() { + return fileNames; + } + + public Path getDirectory() { + return directory; + } + + public Map getCommitData() { + return commitData; + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + + private String commitFileName; + private long generation; + private Collection fileNames; + private Path directory; + private Map commitData; + + private Builder() { + } + + public Builder commitFileName(String commitFileName) { + this.commitFileName = commitFileName; + return this; + } + + public Builder generation(long generation) { + this.generation = generation; + return this; + } + + public Builder fileNames(Collection fileNames) { + this.fileNames = fileNames; + return this; + } + + public Builder directory(Path directory) { + this.directory = directory; + return this; + } + + public Builder commitData(Map commitData) { + this.commitData = commitData; + return this; + } + + public CommitPoint build() { + return new CommitPoint(this); + } + } + +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java new file mode 100644 index 0000000000000..8c56bd6c8c983 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java @@ -0,0 +1,18 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.commit; + +import org.opensearch.index.engine.exec.coord.CatalogSnapshot; + +public interface Committer { + + void addLuceneIndexes(CatalogSnapshot catalogSnapshot); + + CommitPoint commit(CatalogSnapshot catalogSnapshot); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java new file mode 100644 index 0000000000000..6a09850fdbfbb --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.commit; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +import org.apache.commons.lang3.SerializationUtils; +import org.apache.lucene.index.IndexCommit; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.NIOFSDirectory; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.engine.exec.coord.CatalogSnapshot; + +public class LuceneCommitEngine implements Committer { + + private final IndexWriter indexWriter; + private final LuceneIndexDeletionPolicy indexDeletionPolicy; + + public LuceneCommitEngine(Path commitPath) throws IOException { + Directory directory = new NIOFSDirectory(commitPath); + indexDeletionPolicy = new LuceneIndexDeletionPolicy(); + IndexWriterConfig indexWriterConfig = new IndexWriterConfig(); + indexWriterConfig.setIndexDeletionPolicy(indexDeletionPolicy); + this.indexWriter = new IndexWriter(directory, indexWriterConfig); + } + + @Override + public void addLuceneIndexes(CatalogSnapshot catalogSnapshot) { + Collection luceneFileCollection = catalogSnapshot.getSearchableFiles(DataFormat.LUCENE.name()); + luceneFileCollection.forEach(writerFileSet -> { + try { + indexWriter.addIndexes(new NIOFSDirectory(Path.of(writerFileSet.getDirectory()))); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + Map userData = new HashMap<>(); + catalogSnapshot.getSegments().forEach(segment -> userData.put(String.valueOf(segment.getGeneration()), + new String(SerializationUtils.serialize(segment)))); + indexWriter.setLiveCommitData(userData.entrySet()); + } + + @Override + public CommitPoint commit(CatalogSnapshot catalogSnapshot) { + addLuceneIndexes(catalogSnapshot); + try { + indexWriter.commit(); + IndexCommit indexCommit = indexDeletionPolicy.getLatestIndexCommit(); + return CommitPoint.builder().commitFileName(indexCommit.getSegmentsFileName()) + .fileNames(indexCommit.getFileNames()).commitData(indexCommit.getUserData()) + .generation(indexCommit.getGeneration()) + .directory(Path.of(indexCommit.getSegmentsFileName()).getParent()).build(); + } catch (IOException e) { + throw new RuntimeException("lucene commit engine failed", e); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneIndexDeletionPolicy.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneIndexDeletionPolicy.java new file mode 100644 index 0000000000000..5a6d14d74a191 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneIndexDeletionPolicy.java @@ -0,0 +1,33 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.commit; + +import java.io.IOException; +import java.util.List; +import org.apache.lucene.index.IndexCommit; +import org.apache.lucene.index.IndexDeletionPolicy; + +public final class LuceneIndexDeletionPolicy extends IndexDeletionPolicy { + + private IndexCommit latestIndexCommit; + + @Override + public void onInit(List commits) throws IOException { + + } + + @Override + public void onCommit(List commits) throws IOException { + latestIndexCommit = commits.getLast(); + } + + public IndexCommit getLatestIndexCommit() { + return latestIndexCommit; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java new file mode 100644 index 0000000000000..58a224d0fe9ae --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java @@ -0,0 +1,188 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.composite; + +import org.apache.commons.lang3.tuple.ImmutablePair; +import org.apache.lucene.util.SetOnce; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.FileInfos; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.engine.exec.FlushIn; +import org.opensearch.index.engine.exec.WriteResult; +import org.opensearch.index.engine.exec.Writer; +import org.opensearch.index.mapper.MappedFieldType; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Optional; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Collectors; + +public class CompositeDataFormatWriter implements Writer, Lock { + + private final List>>> writers; + private final Runnable postWrite; + private final ReentrantLock lock; + private final SetOnce flushPending = new SetOnce<>(); + private final SetOnce hasFlushed = new SetOnce<>(); + private final long writerGeneration; + private boolean aborted; + + public CompositeDataFormatWriter(CompositeIndexingExecutionEngine engine, + long writerGeneration) { + this.writers = new ArrayList<>(); + this.lock = new ReentrantLock(); + this.aborted = false; + this.writerGeneration = writerGeneration; + engine.getDelegates().forEach(delegate -> { + try { + writers.add(ImmutablePair.of(delegate.getDataFormat(), delegate.createWriter(writerGeneration))); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + this.postWrite = () -> { + engine.getDataFormatWriterPool().releaseAndUnlock(this); + }; + } + + @Override + public WriteResult addDoc(CompositeDocumentInput d) throws IOException { + return d.addToWriter(); + } + + @Override + public FileInfos flush(FlushIn flushIn) throws IOException { + FileInfos fileInfos = new FileInfos(); + for (ImmutablePair>> writerPair : writers) { + Optional fileMetadataOptional = writerPair.getRight().flush(flushIn) + .getWriterFileSet(writerPair.getLeft()); + fileMetadataOptional.ifPresent( + fileMetadata -> fileInfos.putWriterFileSet(writerPair.getLeft(), fileMetadata)); + } + hasFlushed.set(true); + return fileInfos; + } + + @Override + public void sync() throws IOException { + + } + + @Override + public void close() { + + } + + @Override + public CompositeDocumentInput newDocumentInput() { + return new CompositeDocumentInput( + writers.stream().map(ImmutablePair::getRight).map(Writer::newDocumentInput).collect(Collectors.toList()), + this, postWrite); + } + + void abort() throws IOException { + aborted = true; + } + + public void setFlushPending() { + flushPending.set(Boolean.TRUE); + } + + public boolean hasFlushed() { + return hasFlushed.get() == Boolean.TRUE; + } + + public boolean isFlushPending() { + return flushPending.get() == Boolean.TRUE; + } + + public boolean isAborted() { + return aborted; + } + + @Override + public void lock() { + lock.lock(); + } + + @Override + public void lockInterruptibly() throws InterruptedException { + lock.lockInterruptibly(); + } + + @Override + public boolean tryLock() { + return lock.tryLock(); + } + + @Override + public boolean tryLock(long time, TimeUnit unit) throws InterruptedException { + return lock.tryLock(time, unit); + } + + @Override + public void unlock() { + lock.unlock(); + } + + boolean isHeldByCurrentThread() { + return lock.isHeldByCurrentThread(); + } + + @Override + public Condition newCondition() { + throw new UnsupportedOperationException(); + } + + public static class CompositeDocumentInput implements DocumentInput>> { + + List> inputs; + CompositeDataFormatWriter writer; + Runnable onClose; + + public CompositeDocumentInput(List> inputs, CompositeDataFormatWriter writer, + Runnable onClose) { + this.inputs = inputs; + this.writer = writer; + this.onClose = onClose; + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + for (DocumentInput input : inputs) { + input.addField(fieldType, value); + } + } + + @Override + public List> getFinalInput() { + return null; + } + + @Override + public WriteResult addToWriter() throws IOException { + WriteResult writeResult = null; + for (DocumentInput input : inputs) { + writeResult = input.addToWriter(); + } + return writeResult; + } + + @Override + public void close() throws Exception { + onClose.run(); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java new file mode 100644 index 0000000000000..cd45d24432553 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java @@ -0,0 +1,127 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.composite; + +import java.util.Collections; +import java.util.concurrent.atomic.AtomicLong; + +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.FileInfos; +import org.opensearch.index.engine.exec.IndexingExecutionEngine; +import org.opensearch.index.engine.exec.RefreshInput; +import org.opensearch.index.engine.exec.RefreshResult; +import org.opensearch.index.engine.exec.Writer; +import org.opensearch.index.engine.exec.coord.Any; +import org.opensearch.index.engine.exec.coord.CompositeDataFormatWriterPool; +import org.opensearch.index.engine.exec.text.TextEngine; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.plugins.DataSourcePlugin; +import org.opensearch.plugins.PluginsService; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentLinkedQueue; + +public class CompositeIndexingExecutionEngine implements IndexingExecutionEngine { + + private final CompositeDataFormatWriterPool dataFormatWriterPool; + private DataFormat dataFormat; + private final AtomicLong writerGeneration; + private final List> delegates = new ArrayList<>(); + + public CompositeIndexingExecutionEngine(MapperService mapperService, PluginsService pluginsService, Any dataformat, ShardPath shardPath, long initialWriterGeneration) { + this.dataFormat = dataformat; + this.writerGeneration = new AtomicLong(initialWriterGeneration); + try { + for (DataFormat dataFormat : dataformat.getDataFormats()) { + DataSourcePlugin plugin = pluginsService.filterPlugins(DataSourcePlugin.class).stream().filter(curr -> curr.getDataFormat().equals(dataFormat)).findFirst().orElseThrow(() -> new IllegalArgumentException("dataformat [" + dataFormat + "] is not registered.")); + delegates.add(plugin.indexingEngine(mapperService, shardPath)); + } + } catch (NullPointerException e) { + // my own testing + delegates.add(new TextEngine()); + } + this.dataFormatWriterPool = new CompositeDataFormatWriterPool(() -> new CompositeDataFormatWriter(this, writerGeneration.getAndIncrement()), ConcurrentLinkedQueue::new, Runtime.getRuntime().availableProcessors()); + } + + public CompositeIndexingExecutionEngine(MapperService mapperService, PluginsService pluginsService, ShardPath shardPath, long initialWriterGeneration) { + this.writerGeneration = new AtomicLong(initialWriterGeneration); + try { + DataSourcePlugin plugin = pluginsService.filterPlugins(DataSourcePlugin.class).stream().findAny().orElseThrow(() -> new IllegalArgumentException("dataformat [" + DataFormat.TEXT + "] is not registered.")); + delegates.add(plugin.indexingEngine(mapperService, shardPath)); + } catch (NullPointerException e) { + delegates.add(new TextEngine()); + } + this.dataFormatWriterPool = new CompositeDataFormatWriterPool(() -> new CompositeDataFormatWriter(this, writerGeneration.getAndIncrement()), ConcurrentLinkedQueue::new, Runtime.getRuntime().availableProcessors()); + } + + @Override + public DataFormat getDataFormat() { + return dataFormat; + } + + @Override + public List supportedFieldTypes() { + throw new UnsupportedOperationException(); + } + + @Override + public Writer createWriter(long generation) throws IOException { + throw new UnsupportedOperationException(); + } + + public Writer createCompositeWriter() { + return dataFormatWriterPool.getAndLock(); + } + + @Override + public RefreshResult refresh(RefreshInput ignore) throws IOException { + RefreshResult finalResult = new RefreshResult(); + Map refreshInputs = new HashMap<>(); + try { + List dataFormatWriters = dataFormatWriterPool.checkoutAll(); + + // flush to disk + for (CompositeDataFormatWriter dataFormatWriter : dataFormatWriters) { + FileInfos fileInfos = dataFormatWriter.flush(null); + fileInfos.getWriterFilesMap().forEach((key, value) -> refreshInputs.computeIfAbsent(key, dataFormat -> new RefreshInput()).add(value)); + } + + if (refreshInputs.isEmpty()) { + return null; + } + + // make indexing engines aware of everything + for (IndexingExecutionEngine delegate : delegates) { + RefreshInput refreshInput = refreshInputs.get(delegate.getDataFormat()); + if (refreshInput != null) { + RefreshResult result = delegate.refresh(refreshInput); + finalResult.add(delegate.getDataFormat(), result.getRefreshedFiles(delegate.getDataFormat())); + } + } + + // provide a view to the upper layer + return finalResult; + } catch (IOException ex) { + throw new RuntimeException(ex); + } + } + + public List> getDelegates() { + return Collections.unmodifiableList(delegates); + } + + public CompositeDataFormatWriterPool getDataFormatWriterPool() { + return dataFormatWriterPool; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java new file mode 100644 index 0000000000000..c55834ec337d1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.coord; + +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.engine.exec.DataFormat; + +import java.util.List; + +public class Any implements DataFormat { + + private List dataFormats; + + public Any(List dataFormats) { + this.dataFormats = dataFormats; + } + + @Override + public Setting dataFormatSettings() { + return null; + } + + @Override + public Setting clusterLeveldataFormatSettings() { + return null; + } + + @Override + public String name() { + return "all"; + } + + public List getDataFormats() { + return dataFormats; + } + + @Override + public void configureStore() { + for (DataFormat dataFormat : dataFormats) { + dataFormat.configureStore(); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshot.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshot.java new file mode 100644 index 0000000000000..680f325d84a69 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshot.java @@ -0,0 +1,91 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.coord; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.util.concurrent.AbstractRefCounted; +import org.opensearch.index.engine.exec.RefreshResult; +import org.opensearch.index.engine.exec.WriterFileSet; + +import java.io.Serializable; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +@ExperimentalApi +public class CatalogSnapshot extends AbstractRefCounted { + + private final long id; + private final Map> dfGroupedSearchableFiles; + + public CatalogSnapshot(RefreshResult refreshResult, long id) { + super("catalog_snapshot"); + this.id = id; + this.dfGroupedSearchableFiles = new HashMap<>(); + refreshResult.getRefreshedFiles().forEach((dataFormat, writerFiles) -> dfGroupedSearchableFiles.put(dataFormat.name(), writerFiles)); + } + + public Collection getSearchableFiles(String dataFormat) { + if (dfGroupedSearchableFiles.containsKey(dataFormat)) { + return dfGroupedSearchableFiles.get(dataFormat); + } + return Collections.emptyList(); + } + + public Collection getSegments() { + Map segmentMap = new HashMap<>(); + dfGroupedSearchableFiles.forEach((dataFormat, writerFileSets) -> writerFileSets.forEach(writerFileSet -> { + Segment segment = segmentMap.computeIfAbsent(writerFileSet.getWriterGeneration(), Segment::new); + segment.addSearchableFiles(dataFormat, writerFileSet); + })); + return Collections.unmodifiableCollection(segmentMap.values()); + } + + @Override + protected void closeInternal() { + // notify to file deleter, search, etc + } + + public long getId() { + return id; + } + + @Override + public String toString() { + return "CatalogSnapshot{" + + "id=" + id + + ", dfGroupedSearchableFiles=" + dfGroupedSearchableFiles + + '}'; + } + + public static class Segment implements Serializable { + + private final long generation; + private final Map dfGroupedSearchableFiles; + + public Segment(long generation) { + this.dfGroupedSearchableFiles = new HashMap<>(); + this.generation = generation; + } + + public void addSearchableFiles(String dataFormat, WriterFileSet writerFileSetGroup) { + dfGroupedSearchableFiles.put(dataFormat, writerFileSetGroup); + } + + public long getGeneration() { + return generation; + } + + @Override + public String toString() { + return "Segment{" + "generation=" + generation + ", dfGroupedSearchableFiles=" + dfGroupedSearchableFiles + '}'; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeDataFormatWriterPool.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeDataFormatWriterPool.java new file mode 100644 index 0000000000000..2934b4b4b50fc --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeDataFormatWriterPool.java @@ -0,0 +1,127 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.coord; + +import org.apache.lucene.store.AlreadyClosedException; +import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; +import org.opensearch.index.engine.exec.queue.LockableConcurrentQueue; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.IdentityHashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Objects; +import java.util.Queue; +import java.util.Set; +import java.util.function.Supplier; + +public class CompositeDataFormatWriterPool implements Iterable, Closeable { + + private final Set writers; + private final LockableConcurrentQueue availableWriters; + private final Supplier writerSupplier; + private volatile boolean closed; + + public CompositeDataFormatWriterPool( + Supplier writerSupplier, + Supplier> queueSupplier, + int concurrency + ) { + this.writers = Collections.newSetFromMap(new IdentityHashMap<>()); + this.writerSupplier = writerSupplier; + this.availableWriters = new LockableConcurrentQueue<>(queueSupplier, concurrency); + } + + /** + * This method is used by CompositeIndexingExecutionEngine to grab a writer from the pool to perform an indexing + * operation. + * + * @return a pooled CompositeDataFormatWriter if available, or a newly created instance if none are available + */ + public CompositeDataFormatWriter getAndLock() { + ensureOpen(); + CompositeDataFormatWriter compositeDataFormatWriter = availableWriters.lockAndPoll(); + return Objects.requireNonNullElseGet(compositeDataFormatWriter, this::fetchWriter); + } + + /** + * Create a new {@link CompositeDataFormatWriter} to be added to this pool. + * + * @return a new instance of {@link CompositeDataFormatWriter} + */ + private synchronized CompositeDataFormatWriter fetchWriter() { + ensureOpen(); + CompositeDataFormatWriter compositeDataFormatWriter = writerSupplier.get(); + compositeDataFormatWriter.lock(); + writers.add(compositeDataFormatWriter); + return compositeDataFormatWriter; + } + + /** + * Release the given {@link CompositeDataFormatWriter} to this pool for reuse if it is currently managed by this + * pool. + * + * @param state {@link CompositeDataFormatWriter} to release to the pool. + */ + public void releaseAndUnlock(CompositeDataFormatWriter state) { + assert + !state.isFlushPending() && !state.isAborted() : + "CompositeDataFormatWriter has pending flush: " + state.isFlushPending() + " aborted=" + state.isAborted(); + assert isRegistered(state) : "CompositeDocumentWriterPool doesn't know about this CompositeDataFormatWriter"; + availableWriters.addAndUnlock(state); + } + + /** + * Lock and checkout all CompositeDataFormatWriters from the pool for flush. + * + * @return Unmodifiable list of all CompositeDataFormatWriters locked by current thread. + */ + public synchronized List checkoutAll() { + List checkedOutWriters = new ArrayList<>(); + for (CompositeDataFormatWriter compositeDataFormatWriter : this) { + compositeDataFormatWriter.lock(); + if (isRegistered(compositeDataFormatWriter) && writers.remove(compositeDataFormatWriter)) { + availableWriters.remove(compositeDataFormatWriter); + checkedOutWriters.add(compositeDataFormatWriter); + } else { + compositeDataFormatWriter.unlock(); + } + } + return Collections.unmodifiableList(checkedOutWriters); + } + + /** + * Check if {@link CompositeDataFormatWriter} is part of this pool. + * + * @param perThread {@link CompositeDataFormatWriter} to validate. + * @return true if {@link CompositeDataFormatWriter} is part of this pool, false otherwise. + */ + synchronized boolean isRegistered(CompositeDataFormatWriter perThread) { + return writers.contains(perThread); + } + + private void ensureOpen() { + if (closed) { + throw new AlreadyClosedException("CompositeDocumentWriterPool is already closed"); + } + } + + @Override + public synchronized Iterator iterator() { + return List.copyOf(writers).iterator(); + } + + @Override + public void close() throws IOException { + this.closed = true; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java new file mode 100644 index 0000000000000..1329d7879d1d0 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java @@ -0,0 +1,304 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.coord; + +import org.apache.lucene.search.ReferenceManager; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.CatalogSnapshotAwareRefreshListener; +import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineException; +import org.opensearch.index.engine.SafeCommitInfo; +import org.opensearch.index.engine.SearchExecEngine; +import org.opensearch.index.engine.Segment; +import org.opensearch.index.engine.exec.RefreshInput; +import org.opensearch.index.engine.exec.RefreshResult; +import org.opensearch.index.engine.exec.WriteResult; +import org.opensearch.index.engine.exec.bridge.Indexer; +import org.opensearch.index.engine.exec.commit.Committer; +import org.opensearch.index.engine.exec.commit.LuceneCommitEngine; +import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; +import org.opensearch.index.engine.exec.composite.CompositeIndexingExecutionEngine; +import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.index.translog.Translog; +import org.opensearch.index.translog.TranslogManager; +import org.opensearch.plugins.PluginsService; +import org.opensearch.plugins.SearchEnginePlugin; + +import java.io.Closeable; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +@ExperimentalApi +public class CompositeEngine implements Indexer { + + private final CompositeIndexingExecutionEngine engine; + private final Committer compositeEngineCommitter; + private final List refreshListeners = new ArrayList<>(); + private CatalogSnapshot catalogSnapshot; + private final List catalogSnapshotAwareRefreshListeners = new ArrayList<>(); + private final Map>> readEngines = new HashMap<>(); + + public CompositeEngine(MapperService mapperService, PluginsService pluginsService, ShardPath shardPath) throws IOException { + List searchEnginePlugins = pluginsService.filterPlugins(SearchEnginePlugin.class); + // How to bring the Dataformat here? Currently this means only Text and LuceneFormat can be used + this.engine = new CompositeIndexingExecutionEngine(mapperService, pluginsService, shardPath, 0); + Path committerPath = Files.createTempDirectory("lucene-committer-index"); + this.compositeEngineCommitter = new LuceneCommitEngine(committerPath); + + // Refresh here so that catalog snapshot gets initialized + // TODO : any better way to do this ? + refresh("start"); + // TODO : how to extend this for Lucene ? where engine is a r/w engine + // Create read specific engines for each format which is associated with shard + for (SearchEnginePlugin searchEnginePlugin : searchEnginePlugins) { + for (org.opensearch.vectorized.execution.search.DataFormat dataFormat : searchEnginePlugin.getSupportedFormats()) { + List> currentSearchEngines = readEngines.getOrDefault(dataFormat, new ArrayList<>()); + SearchExecEngine newSearchEngine = searchEnginePlugin.createEngine(dataFormat, + Collections.emptyList(), + shardPath); + + currentSearchEngines.add(newSearchEngine); + readEngines.put(dataFormat, currentSearchEngines); + + // TODO : figure out how to do internal and external refresh listeners + // Maybe external refresh should be managed in opensearch core and plugins should always give + // internal refresh managers + // 60s as refresh interval -> ExternalReaderManager acquires a view every 60 seconds + // InternalReaderManager -> IndexingMemoryController , it keeps on refreshing internal maanger + // + if (newSearchEngine.getRefreshListener(Engine.SearcherScope.INTERNAL) != null) { + catalogSnapshotAwareRefreshListeners.add(newSearchEngine.getRefreshListener(Engine.SearcherScope.INTERNAL)); + } + } + } + } + + public SearchExecEngine getReadEngine(org.opensearch.vectorized.execution.search.DataFormat dataFormat) { + return readEngines.getOrDefault(dataFormat, new ArrayList<>()).getFirst(); + } + + public SearchExecEngine getPrimaryReadEngine() { + // Return the first available ReadEngine as primary + return readEngines.values().stream() + .filter(list -> !list.isEmpty()) + .findFirst() + .map(List::getFirst) + .orElse(null); + } + + public CompositeDataFormatWriter.CompositeDocumentInput documentInput() throws IOException { + return engine.createCompositeWriter().newDocumentInput(); + } + + public Engine.IndexResult index(Engine.Index index) throws IOException { + WriteResult writeResult = index.documentInput.addToWriter(); + // translog, checkpoint, other checks + return new Engine.IndexResult(writeResult.version(), writeResult.seqNo(), writeResult.term(), writeResult.success()); + } + + public synchronized void refresh(String source) throws EngineException { + refreshListeners.forEach(ref -> { + try { + ref.beforeRefresh(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + long id = 0L; + if (catalogSnapshot != null) { + id = catalogSnapshot.getId(); + } + CatalogSnapshot newCatSnap; + try { + RefreshResult refreshResult = engine.refresh(new RefreshInput()); + if (refreshResult == null) { + return; + } + newCatSnap = new CatalogSnapshot(refreshResult, id + 1L); + System.out.println("CATALOG SNAPSHOT: " + newCatSnap); + } catch (IOException ex) { + throw new RuntimeException(ex); + } + + newCatSnap.incRef(); + if (catalogSnapshot != null) { + catalogSnapshot.decRef(); + } + catalogSnapshot = newCatSnap; + compositeEngineCommitter.addLuceneIndexes(catalogSnapshot); + + catalogSnapshotAwareRefreshListeners.forEach(ref -> { + try { + ref.afterRefresh(true, catalogSnapshot); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + refreshListeners.forEach(ref -> { + try { + ref.afterRefresh(true); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + public CatalogSnapshot catalogSnapshot() { + return catalogSnapshot; + } + + // This should get wired into searcher acquireSnapshot for initializing reader context later + // this now becomes equivalent of the reader + // Each search side specific impl can decide on how to init specific reader instances using this pit snapshot provided by writers + public ReleasableRef acquireSnapshot() { + catalogSnapshot.incRef(); // this should be package-private + return new ReleasableRef(catalogSnapshot) { + @Override + public void close() throws Exception { + catalogSnapshot.decRef(); // this should be package-private + } + }; + } + + @ExperimentalApi + public static abstract class ReleasableRef implements AutoCloseable { + + private T t; + + public ReleasableRef(T t) { + this.t = t; + } + + public T getRef() { + return t; + } + } + + public static void main(String[] args) throws Exception { + CompositeEngine coordinator = new CompositeEngine(null, null, null); + + for (int i = 0; i < 5; i++) { + + // Ingestion into one generation + for (int k = 0; k < 10; k++) { + try (CompositeDataFormatWriter.CompositeDocumentInput doc = coordinator.documentInput()) { + + // Mapper part + doc.addField(new KeywordFieldMapper.KeywordFieldType("f1"), k + "_v1"); + doc.addField(new KeywordFieldMapper.KeywordFieldType("f2"), k + "_v2"); + doc.addField(new KeywordFieldMapper.KeywordFieldType("f3"), k + "_v3"); + doc.addField(new KeywordFieldMapper.KeywordFieldType("f4"), k + "_v4"); + Engine.Index index = new Engine.Index(null, 1L, null); + index.documentInput = doc; + + // applyIndexOperation part + coordinator.index(index); + } + } + + // Refresh until generation + coordinator.refresh("_manual_test"); + System.out.println(coordinator.catalogSnapshot); + } + } + + @Override + public Engine.DeleteResult delete(Engine.Delete delete) throws IOException { + return null; + } + + @Override + public Engine.NoOpResult noOp(Engine.NoOp noOp) throws IOException { + return null; + } + + @Override + public int countNumberOfHistoryOperations(String source, long fromSeqNo, long toSeqNumber) throws IOException { + return 0; + } + + @Override + public boolean hasCompleteOperationHistory(String reason, long startingSeqNo) { + return false; + } + + @Override + public long getIndexBufferRAMBytesUsed() { + return 0; + } + + @Override + public List segments(boolean verbose) { + return List.of(); + } + + @Override + public long getMaxSeenAutoIdTimestamp() { + return 0; + } + + @Override + public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) { + + } + + @Override + public int fillSeqNoGaps(long primaryTerm) throws IOException { + return 0; + } + + @Override + public void forceMerge(boolean flush, int maxNumSegments, boolean onlyExpungeDeletes, boolean upgrade, boolean upgradeOnlyAncientSegments, String forceMergeUUID) throws EngineException, IOException { + + } + + @Override + public void writeIndexingBuffer() throws EngineException { + + } + + @Override + public void flush(boolean force, boolean waitIfOngoing) throws EngineException { + compositeEngineCommitter.commit(catalogSnapshot); + } + + @Override + public SafeCommitInfo getSafeCommitInfo() { + return null; + } + + @Override + public TranslogManager translogManager() { + return null; + } + + @Override + public Closeable acquireHistoryRetentionLock() { + return null; + } + + @Override + public Translog.Snapshot newChangesSnapshot(String source, long fromSeqNo, long toSeqNo, boolean requiredFullRange, boolean accurateCount) throws IOException { + return null; + } + + @Override + public String getHistoryUUID() { + return ""; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexingManager.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexingManager.java new file mode 100644 index 0000000000000..3e6a751caef2a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexingManager.java @@ -0,0 +1,137 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.coord; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import org.apache.lucene.search.ReferenceManager; +import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineException; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.RefreshInput; +import org.opensearch.index.engine.exec.WriteResult; +import org.opensearch.index.engine.exec.commit.Committer; +import org.opensearch.index.engine.exec.commit.LuceneCommitEngine; +import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; +import org.opensearch.index.engine.exec.composite.CompositeIndexingExecutionEngine; +import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.MapperService; + +public class IndexingManager { + + private final CompositeIndexingExecutionEngine engine; + private final List refreshListeners = new ArrayList<>(); + private final Committer committer; + private CatalogSnapshot catalogSnapshot; + + public IndexingManager(Path indexPath, MapperService mapperService/*, EngineConfig engineConfig*/) + throws IOException { + this.engine = new CompositeIndexingExecutionEngine(mapperService, null, new Any(List.of(DataFormat.TEXT)), null, + 0); + this.committer = new LuceneCommitEngine(indexPath); + } + + public CompositeDataFormatWriter.CompositeDocumentInput documentInput() throws IOException { + return engine.createCompositeWriter().newDocumentInput(); + } + + public Engine.IndexResult index(Engine.Index index) throws Exception { + WriteResult writeResult = index.documentInput.addToWriter(); + // translog, checkpoint, other checks + return new Engine.IndexResult(writeResult.version(), writeResult.seqNo(), writeResult.term(), + writeResult.success()); + } + + public synchronized void refresh(String source) throws EngineException, IOException { + refreshListeners.forEach(ref -> { + try { + ref.beforeRefresh(); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + + long id = 0L; + if (catalogSnapshot != null) { + id = catalogSnapshot.getId(); + } + CatalogSnapshot newCatSnap = new CatalogSnapshot(engine.refresh(new RefreshInput()), id + 1L); + newCatSnap.incRef(); + if (catalogSnapshot != null) { + catalogSnapshot.decRef(); + } + catalogSnapshot = newCatSnap; + + refreshListeners.forEach(ref -> { + try { + ref.afterRefresh(true); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + // This should get wired into searcher acquireSnapshot for initializing reader context later + // this now becomes equivalent of the reader + // Each search side specific impl can decide on how to init specific reader instances using this pit snapshot provided by writers + public ReleasableRef acquireSnapshot() { + catalogSnapshot.incRef(); // this should be package-private + return new ReleasableRef<>(catalogSnapshot) { + @Override + public void close() throws Exception { + catalogSnapshot.decRef(); // this should be package-private + } + }; + } + + public static abstract class ReleasableRef implements AutoCloseable { + + private final T t; + + public ReleasableRef(T t) { + this.t = t; + } + + public T getRef() { + return t; + } + } + + public static void main(String[] args) throws Exception { + IndexingManager coordinator = new IndexingManager( + Path.of("/Users/shnkgo/Downloads/mustang/lucene-committer-index/"), null); + + for (int i = 0; i < 5; i++) { + + // Ingestion into one generation + for (int k = 0; k < 10; k++) { + try (CompositeDataFormatWriter.CompositeDocumentInput doc = coordinator.documentInput()) { + + // Mapper part + doc.addField(new KeywordFieldMapper.KeywordFieldType("f1"), k + "_v1"); + doc.addField(new KeywordFieldMapper.KeywordFieldType("f2"), k + "_v2"); + doc.addField(new KeywordFieldMapper.KeywordFieldType("f3"), k + "_v3"); + doc.addField(new KeywordFieldMapper.KeywordFieldType("f4"), k + "_v4"); + Engine.Index index = new Engine.Index(null, 1L, null); + index.documentInput = doc; + + // applyIndexOperation part + coordinator.index(index); + } + } + + // Refresh until generation + coordinator.refresh("_manual_test"); + System.out.println(coordinator.catalogSnapshot); + } + } + +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java new file mode 100644 index 0000000000000..8afdc4f9901d4 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java @@ -0,0 +1,130 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.lucene; + +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.util.BytesRef; +import org.opensearch.index.engine.InternalEngine; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.FileInfos; +import org.opensearch.index.engine.exec.FlushIn; +import org.opensearch.index.engine.exec.IndexingExecutionEngine; +import org.opensearch.index.engine.exec.RefreshInput; +import org.opensearch.index.engine.exec.RefreshResult; +import org.opensearch.index.engine.exec.WriteResult; +import org.opensearch.index.engine.exec.Writer; +import org.opensearch.index.mapper.KeywordFieldMapper; +import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.index.mapper.ParseContext; + +import java.io.IOException; +import java.util.List; + +public class LuceneIEEngine implements IndexingExecutionEngine { + + private final InternalEngine internalEngine; + + public LuceneIEEngine(InternalEngine internalEngine) { + this.internalEngine = internalEngine; + } + + @Override + public List supportedFieldTypes() { + return List.of(); + } + + + @Override + public Writer> createWriter(long writerGeneration) throws IOException { + return new LuceneWriter(internalEngine.indexWriter, writerGeneration); + } + + @Override + public RefreshResult refresh(RefreshInput refreshInput) throws IOException { + internalEngine.refresh(refreshInput.getClass().getName()); + return null; + } + + @Override + public DataFormat getDataFormat() { + return DataFormat.LUCENE; + } + + + public static class LuceneDocumentInput implements DocumentInput { + + private final ParseContext.Document doc; + private final IndexWriter writer; + + public LuceneDocumentInput(ParseContext.Document doc, IndexWriter w) { + this.doc = doc; + this.writer = w; + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + doc.add(new KeywordFieldMapper.KeywordField("f1", new BytesRef("good_field"), null)); + } + + @Override + public ParseContext.Document getFinalInput() { + return doc; + } + + @Override + public WriteResult addToWriter() throws IOException { + writer.addDocument(doc); + return null; + } + + @Override + public void close() throws Exception { + // no-op, reuse writer + } + } + + public static class LuceneWriter implements Writer { + + private final IndexWriter writer; + private final long writerGeneration; + + public LuceneWriter(IndexWriter writer, long writerGeneration) { + this.writer = writer; + this.writerGeneration = writerGeneration; + } + + @Override + public WriteResult addDoc(LuceneDocumentInput d) throws IOException { + writer.addDocument(d.doc); + return null; + } + + @Override + public FileInfos flush(FlushIn flushIn) throws IOException { + writer.flush(); + return null; + } + + @Override + public void sync() throws IOException { + writer.flush(); + } + + @Override + public void close() { + // no-op + } + + @Override + public LuceneDocumentInput newDocumentInput() { + return new LuceneDocumentInput(new ParseContext.Document(), writer); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/queue/ConcurrentQueue.java b/server/src/main/java/org/opensearch/index/engine/exec/queue/ConcurrentQueue.java new file mode 100644 index 0000000000000..9b8b774063a87 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/queue/ConcurrentQueue.java @@ -0,0 +1,123 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.queue; + +import java.util.Queue; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import java.util.function.Predicate; +import java.util.function.Supplier; + +public final class ConcurrentQueue { + + static final int MIN_CONCURRENCY = 1; + static final int MAX_CONCURRENCY = 256; + + private final int concurrency; + private final Lock[] locks; + private final Queue[] queues; + private final Supplier> queueSupplier; + + ConcurrentQueue(Supplier> queueSupplier, int concurrency) { + if (concurrency < MIN_CONCURRENCY || concurrency > MAX_CONCURRENCY) { + throw new IllegalArgumentException( + "concurrency must be in [" + MIN_CONCURRENCY + ", " + MAX_CONCURRENCY + "], got " + concurrency); + } + this.concurrency = concurrency; + this.queueSupplier = queueSupplier; + locks = new Lock[concurrency]; + @SuppressWarnings({"rawtypes", "unchecked"}) Queue[] queues = new Queue[concurrency]; + this.queues = queues; + for (int i = 0; i < concurrency; ++i) { + locks[i] = new ReentrantLock(); + queues[i] = queueSupplier.get(); + } + } + + void add(T entry) { + // Seed the order in which to look at entries based on the current thread. This helps distribute + // entries across queues and gives a bit of thread affinity between entries and threads, which + // can't hurt. + final int threadHash = Thread.currentThread().hashCode() & 0xFFFF; + for (int i = 0; i < concurrency; ++i) { + final int index = (threadHash + i) % concurrency; + final Lock lock = locks[index]; + final Queue queue = queues[index]; + if (lock.tryLock()) { + try { + queue.add(entry); + return; + } finally { + lock.unlock(); + } + } + } + final int index = threadHash % concurrency; + final Lock lock = locks[index]; + final Queue queue = queues[index]; + lock.lock(); + try { + queue.add(entry); + } finally { + lock.unlock(); + } + } + + T poll(Predicate predicate) { + final int threadHash = Thread.currentThread().hashCode() & 0xFFFF; + for (int i = 0; i < concurrency; ++i) { + final int index = (threadHash + i) % concurrency; + final Lock lock = locks[index]; + final Queue queue = queues[index]; + if (lock.tryLock()) { + try { + for (T entry : queue) { + if (predicate.test(entry)) { + return entry; + } + } + } finally { + lock.unlock(); + } + } + } + for (int i = 0; i < concurrency; ++i) { + final int index = (threadHash + i) % concurrency; + final Lock lock = locks[index]; + final Queue queue = queues[index]; + lock.lock(); + try { + for (T entry : queue) { + if (predicate.test(entry)) { + return entry; + } + } + } finally { + lock.unlock(); + } + } + return null; + } + + boolean remove(T entry) { + for (int i = 0; i < concurrency; ++i) { + final Lock lock = locks[i]; + final Queue queue = queues[i]; + lock.lock(); + try { + if (queue.remove(entry)) { + return true; + } + } finally { + lock.unlock(); + } + } + return false; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/queue/LockableConcurrentQueue.java b/server/src/main/java/org/opensearch/index/engine/exec/queue/LockableConcurrentQueue.java new file mode 100644 index 0000000000000..e46ec5137308a --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/queue/LockableConcurrentQueue.java @@ -0,0 +1,54 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.queue; + +import java.util.Queue; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.locks.Lock; +import java.util.function.Supplier; + +public final class LockableConcurrentQueue { + + private final ConcurrentQueue queue; + private final AtomicInteger addAndUnlockCounter = new AtomicInteger(); + + public LockableConcurrentQueue(Supplier> queueSupplier, int concurrency) { + this.queue = new ConcurrentQueue<>(queueSupplier, concurrency); + } + + /** + * Lock an entry, and poll it from the queue, in that order. If no entry can be found and locked, + * {@code null} is returned. + */ + public T lockAndPoll() { + int addAndUnlockCount; + do { + addAndUnlockCount = addAndUnlockCounter.get(); + T entry = queue.poll(Lock::tryLock); + if (entry != null) { + return entry; + } + // If an entry has been added to the queue in the meantime, try again. + } while (addAndUnlockCount != addAndUnlockCounter.get()); + + return null; + } + + /** Remove an entry from the queue. */ + public boolean remove(T entry) { + return queue.remove(entry); + } + + /** Add an entry to the queue and unlock it, in that order. */ + public void addAndUnlock(T entry) { + queue.add(entry); + entry.unlock(); + addAndUnlockCounter.incrementAndGet(); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java b/server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java new file mode 100644 index 0000000000000..b19a6c893cc11 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.text; + +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.index.engine.exec.DataFormat; + + +public class TextDF implements DataFormat { + @Override + public Setting dataFormatSettings() { + return null; + } + + @Override + public Setting clusterLeveldataFormatSettings() { + return null; + } + + @Override + public String name() { + return "text"; + } + + @Override + public void configureStore() { + + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java new file mode 100644 index 0000000000000..7e7743c17f6e7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java @@ -0,0 +1,147 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.text; + +import java.nio.file.Path; +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.FileInfos; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.engine.exec.FlushIn; +import org.opensearch.index.engine.exec.IndexingExecutionEngine; +import org.opensearch.index.engine.exec.RefreshInput; +import org.opensearch.index.engine.exec.RefreshResult; +import org.opensearch.index.engine.exec.WriteResult; +import org.opensearch.index.engine.exec.Writer; +import org.opensearch.index.mapper.MappedFieldType; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; + +public class TextEngine implements IndexingExecutionEngine { + + private final AtomicLong counter = new AtomicLong(); + private final Set openWriters = new HashSet<>(); + private final List openFiles = new ArrayList<>(); + + @Override + public List supportedFieldTypes() { + return List.of(); + } + + @Override + public Writer> createWriter(long writerGeneration) throws IOException { + return new TextWriter("text_file" + counter.getAndIncrement(), this, writerGeneration); + } + + @Override + public DataFormat getDataFormat() { + return DataFormat.TEXT; + } + + @Override + public RefreshResult refresh(RefreshInput refreshInput) throws IOException { + openFiles.addAll(refreshInput.getWriterFiles()); + RefreshResult refreshResult = new RefreshResult(); + refreshResult.add(DataFormat.TEXT, openFiles); + return refreshResult; + } + + public static class TextInput implements DocumentInput { + + private final StringBuilder sb = new StringBuilder(); + private final TextWriter writer; + + public TextInput(TextWriter writer) { + this.writer = writer; + } + + @Override + public void addField(MappedFieldType fieldType, Object value) { + sb.append(fieldType.name()).append("=").append(value).append(";"); + } + + @Override + public String getFinalInput() { + return sb.append("\n").toString(); + } + + @Override + public WriteResult addToWriter() throws IOException { + return writer.addDoc(this); + } + + @Override + public void close() throws Exception { + //no op + } + } + + public static class TextWriter implements Writer { + + private final StringBuilder sb = new StringBuilder(); + private final File currentFile; + private final AtomicBoolean flushed = new AtomicBoolean(false); + private final Runnable onClose; + private final long writerGeneration; + + public TextWriter(String currentFile, TextEngine engine, long writerGeneration) throws IOException { + this.currentFile = new File("/Users/shnkgo/mustang" + currentFile); + this.currentFile.createNewFile(); + this.writerGeneration = writerGeneration; + boolean canWrite = this.currentFile.setWritable(true); + if (!canWrite) { + throw new IllegalStateException("Cannot write to file [" + currentFile + "]"); + } + engine.openWriters.add(this); + onClose = () -> engine.openWriters.remove(this); + } + + @Override + public WriteResult addDoc(TextInput d) throws IOException { + sb.append(d.getFinalInput()); + return new WriteResult(true, null, 1, 1, 1); + } + + @Override + public FileInfos flush(FlushIn flushIn) throws IOException { + try (FileWriter fw = new FileWriter(currentFile)) { + fw.write(sb.toString()); + } + flushed.set(true); + FileInfos fileInfos = new FileInfos(); + WriterFileSet writerFileSet = new WriterFileSet(currentFile.toPath().getParent(), writerGeneration); + writerFileSet.add(currentFile.getName()); + fileInfos.putWriterFileSet(DataFormat.TEXT, writerFileSet); + return fileInfos; + } + + @Override + public void sync() throws IOException { + } + + @Override + public void close() { + onClose.run(); + } + + @Override + public TextInput newDocumentInput() { + return new TextInput(this); + } + + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/util/SetOnce.java b/server/src/main/java/org/opensearch/index/engine/exec/util/SetOnce.java new file mode 100644 index 0000000000000..189e49cef8458 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/util/SetOnce.java @@ -0,0 +1,73 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec.util; + +import java.util.concurrent.atomic.AtomicReference; + +public final class SetOnce implements Cloneable { + + /** Thrown when {@link SetOnce#set(Object)} is called more than once. */ + public static final class AlreadySetException extends IllegalStateException { + public AlreadySetException() { + super("The object cannot be set twice!"); + } + } + + /** Holding object and marking that it was already set */ + private static final class Wrapper { + private T object; + + private Wrapper(T object) { + this.object = object; + } + } + + private final AtomicReference> set; + + /** + * A default constructor which does not set the internal object, and allows setting it by calling + * {@link #set(Object)}. + */ + public SetOnce() { + set = new AtomicReference<>(); + } + + /** + * Creates a new instance with the internal object set to the given object. Note that any calls to + * {@link #set(Object)} afterwards will result in {@link AlreadySetException} + * + * @throws AlreadySetException if called more than once + * @see #set(Object) + */ + public SetOnce(T obj) { + set = new AtomicReference<>(new Wrapper<>(obj)); + } + + /** Sets the given object. If the object has already been set, an exception is thrown. */ + public final void set(T obj) { + if (!trySet(obj)) { + throw new AlreadySetException(); + } + } + + /** + * Sets the given object if none was set before. + * + * @return true if object was set successfully, false otherwise + */ + public final boolean trySet(T obj) { + return set.compareAndSet(null, new Wrapper<>(obj)); + } + + /** Returns the object set by {@link #set(Object)}. */ + public final T get() { + Wrapper wrapper = set.get(); + return wrapper == null ? null : wrapper.object; + } +} diff --git a/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java index ea4cff42ca905..8a9e8e3d9f517 100644 --- a/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java @@ -389,17 +389,19 @@ protected void parseCreateField(ParseContext context) throws IOException { if (value == null) { return; } - if (indexed) { - context.doc().add(new Field(fieldType().name(), value ? "T" : "F", Defaults.FIELD_TYPE)); - } - if (stored) { - context.doc().add(new StoredField(fieldType().name(), value ? "T" : "F")); - } - if (hasDocValues) { - context.doc().add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0)); - } else { - createFieldNamesField(context); - } + + context.compositeDocumentInput().addField(fieldType(), value); +// if (indexed) { +// context.doc().add(new Field(fieldType().name(), value ? "T" : "F", Defaults.FIELD_TYPE)); +// } +// if (stored) { +// context.doc().add(new StoredField(fieldType().name(), value ? "T" : "F")); +// } +// if (hasDocValues) { +// context.doc().add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0)); +// } else { +// createFieldNamesField(context); +// } } @Override @@ -430,7 +432,7 @@ protected void canDeriveSourceInternal() { * 2. When using stored field, for multi value field order would be preserved */ @Override - protected DerivedFieldGenerator derivedFieldGenerator() { + public DerivedFieldGenerator derivedFieldGenerator() { return new DerivedFieldGenerator(mappedFieldType, new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()) { @Override public Object convert(Object value) { diff --git a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java index 270a4606b11c6..d3d3784f1295c 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java @@ -247,7 +247,7 @@ protected void canDeriveSourceInternal() { * "format" */ @Override - protected DerivedFieldGenerator derivedFieldGenerator() { + public DerivedFieldGenerator derivedFieldGenerator() { return new DerivedFieldGenerator(mappedFieldType, new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()) { @Override public Object convert(Object value) { @@ -842,21 +842,23 @@ protected void parseCreateField(ParseContext context) throws IOException { } } - if (indexed) { - context.doc().add(new LongPoint(fieldType().name(), timestamp)); - } - if (hasDocValues) { - if (skiplist) { - context.doc().add(SortedNumericDocValuesField.indexedField(fieldType().name(), timestamp)); - } else { - context.doc().add(new SortedNumericDocValuesField(fieldType().name(), timestamp)); - } - } else if (store || indexed) { - createFieldNamesField(context); - } - if (store) { - context.doc().add(new StoredField(fieldType().name(), timestamp)); - } + context.compositeDocumentInput().addField(fieldType(), timestamp); + +// if (indexed) { +// context.doc().add(new LongPoint(fieldType().name(), timestamp)); +// } +// if (hasDocValues) { +// if (skiplist) { +// context.doc().add(SortedNumericDocValuesField.indexedField(fieldType().name(), timestamp)); +// } else { +// context.doc().add(new SortedNumericDocValuesField(fieldType().name(), timestamp)); +// } +// } else if (store || indexed) { +// createFieldNamesField(context); +// } +// if (store) { +// context.doc().add(new StoredField(fieldType().name(), timestamp)); +// } } public Long getNullValue() { diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldGenerator.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldGenerator.java index 383bd25dc7d0c..9f6de67843932 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldGenerator.java +++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldGenerator.java @@ -12,6 +12,7 @@ import org.opensearch.core.xcontent.XContentBuilder; import java.io.IOException; +import java.util.List; import java.util.Objects; /** @@ -58,4 +59,13 @@ public FieldValueType getDerivedFieldPreference() { public void generate(XContentBuilder builder, LeafReader reader, int docId) throws IOException { fieldValueFetcher.write(builder, fieldValueFetcher.fetch(reader, docId)); } + + /** + * Generate the derived field value based on the preference of derived field and field value type + * @param builder - builder to store the derived source filed + * @param values - values for which we want to generate the source + */ + public void generate(XContentBuilder builder, List values) throws IOException { + fieldValueFetcher.write(builder, values); + } } diff --git a/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java b/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java index cb7e08f062d6d..cd520eb5eb1e2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java @@ -51,6 +51,7 @@ import org.opensearch.index.IndexSettings; import org.opensearch.index.IndexSortConfig; import org.opensearch.index.analysis.IndexAnalyzers; +import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; import org.opensearch.index.mapper.MapperService.MergeReason; import org.opensearch.index.mapper.MetadataFieldMapper.TypeParser; import org.opensearch.index.query.NestedQueryBuilder; @@ -253,6 +254,10 @@ public ParsedDocument parse(SourceToParse source) throws MapperParsingException return documentParser.parseDocument(source, mapping.metadataMappers); } + public ParsedDocument parse(SourceToParse source, CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput) throws MapperParsingException { + return documentParser.parseDocument(source, mapping.metadataMappers, compositeDocumentInput); + } + public ParsedDocument createDeleteTombstoneDoc(String index, String id) throws MapperParsingException { final SourceToParse emptySource = new SourceToParse(index, id, new BytesArray("{}"), MediaTypeRegistry.JSON); return documentParser.parseDocument(emptySource, deleteTombstoneMetadataFieldMappers).toTombstone(); diff --git a/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java b/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java index 213fb48595b8b..b81b3dfde7951 100644 --- a/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java +++ b/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java @@ -46,6 +46,7 @@ import org.opensearch.core.xcontent.MediaType; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; import org.opensearch.index.mapper.DynamicTemplate.XContentFieldType; import java.io.IOException; @@ -76,6 +77,10 @@ final class DocumentParser { } ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadataFieldsMappers) throws MapperParsingException { + return parseDocument(source, metadataFieldsMappers, null); + } + + ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadataFieldsMappers, CompositeDataFormatWriter.CompositeDocumentInput documentInput) throws MapperParsingException { final Mapping mapping = docMapper.mapping(); final ParseContext.InternalParseContext context; final MediaType mediaType = source.getMediaType(); @@ -88,7 +93,7 @@ ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadat mediaType ) ) { - context = new ParseContext.InternalParseContext(indexSettings, docMapperParser, docMapper, source, parser); + context = new ParseContext.InternalParseContext(indexSettings, docMapperParser, docMapper, source, parser, documentInput); validateStart(parser); internalParseDocument(mapping, metadataFieldsMappers, context, parser); validateEnd(parser); @@ -102,7 +107,7 @@ ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadat context.postParse(); - return parsedDocument(source, context, createDynamicUpdate(mapping, docMapper, context.getDynamicMappers())); + return parsedDocument(source, context, createDynamicUpdate(mapping, docMapper, context.getDynamicMappers()), documentInput); } private static boolean containsDisabledObjectMapper(ObjectMapper objectMapper, String[] subfields) { @@ -176,7 +181,7 @@ private static boolean isEmptyDoc(Mapping mapping, XContentParser parser) throws return false; } - private static ParsedDocument parsedDocument(SourceToParse source, ParseContext.InternalParseContext context, Mapping update) { + private static ParsedDocument parsedDocument(SourceToParse source, ParseContext.InternalParseContext context, Mapping update, CompositeDataFormatWriter.CompositeDocumentInput documentInput) { return new ParsedDocument( context.version(), context.seqID(), @@ -185,7 +190,8 @@ private static ParsedDocument parsedDocument(SourceToParse source, ParseContext. context.docs(), context.sourceToParse().source(), context.sourceToParse().getMediaType(), - update + update, + documentInput ); } diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java index aaa2c9c029974..fee2194e14976 100644 --- a/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java @@ -600,7 +600,7 @@ protected Explicit ignoreMalformed() { * Method to create derived source generator for this field mapper, it is illegal to enable the * derived source feature and not implement this method for a field mapper */ - protected DerivedFieldGenerator derivedFieldGenerator() { + public DerivedFieldGenerator derivedFieldGenerator() { return null; } diff --git a/server/src/main/java/org/opensearch/index/mapper/GeoPointFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/GeoPointFieldMapper.java index 2910bd2856d2f..89844e14a351d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/GeoPointFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/GeoPointFieldMapper.java @@ -219,7 +219,7 @@ protected void canDeriveSourceInternal() { * 4. When using stored field, order and duplicate values would be preserved */ @Override - protected DerivedFieldGenerator derivedFieldGenerator() { + public DerivedFieldGenerator derivedFieldGenerator() { return new DerivedFieldGenerator(mappedFieldType, new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()) { @Override public Object convert(Object value) { diff --git a/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java index b2e8f75a4f444..041da68ea0cdf 100644 --- a/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java @@ -199,7 +199,7 @@ protected void canDeriveSourceInternal() { * 2. When using stored field, order and duplicate values would be preserved */ @Override - protected DerivedFieldGenerator derivedFieldGenerator() { + public DerivedFieldGenerator derivedFieldGenerator() { return new DerivedFieldGenerator( mappedFieldType, new SortedSetDocValuesFetcher(mappedFieldType, simpleName()), diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java index 7ace516459763..7897d232519a3 100644 --- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java @@ -294,7 +294,7 @@ protected void canDeriveSourceInternal() { * 2. When using stored field, order and duplicate values would be preserved */ @Override - protected DerivedFieldGenerator derivedFieldGenerator() { + public DerivedFieldGenerator derivedFieldGenerator() { return new DerivedFieldGenerator( mappedFieldType, new SortedSetDocValuesFetcher(mappedFieldType, simpleName()), @@ -862,20 +862,22 @@ protected void parseCreateField(ParseContext context) throws IOException { value = normalizeValue(normalizer, name(), value); } - // convert to utf8 only once before feeding postings/dv/stored fields - final BytesRef binaryValue = new BytesRef(value); - if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { - Field field = new KeywordField(fieldType().name(), binaryValue, fieldType); - context.doc().add(field); - - if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { - createFieldNamesField(context); - } - } + context.compositeDocumentInput().addField(fieldType(), value); - if (fieldType().hasDocValues()) { - context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); - } + // convert to utf8 only once before feeding postings/dv/stored fields +// final BytesRef binaryValue = new BytesRef(value); +// if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) { +// Field field = new KeywordField(fieldType().name(), binaryValue, fieldType); +// context.doc().add(field); +// +// if (fieldType().hasDocValues() == false && fieldType.omitNorms()) { +// createFieldNamesField(context); +// } +// } +// +// if (fieldType().hasDocValues()) { +// context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue)); +// } } static String normalizeValue(NamedAnalyzer normalizer, String field, String value) throws IOException { diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java index a3ea6b5764913..751b56cec6248 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java +++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java @@ -87,6 +87,7 @@ public abstract class MappedFieldType { private final boolean docValues; private final boolean isIndexed; private final boolean isStored; + private final boolean isColumnar; private final TextSearchInfo textSearchInfo; private final Map meta; private float boost; @@ -101,6 +102,8 @@ public MappedFieldType( TextSearchInfo textSearchInfo, Map meta ) { + // TODO: take the value from user input + this.isColumnar = true; this.boost = 1.0f; this.name = Objects.requireNonNull(name); this.isIndexed = isIndexed; @@ -185,6 +188,13 @@ public boolean isStored() { return isStored; } + /** + * Returns true if the field is columnar. + */ + public boolean isColumnar() { + return isColumnar; + } + /** * If the field supports using the indexed data to speed up operations related to ordering of data, such as sorting or aggs, return * a function for doing that. If it is unsupported for this field type, there is no need to override this method. diff --git a/server/src/main/java/org/opensearch/index/mapper/Mapper.java b/server/src/main/java/org/opensearch/index/mapper/Mapper.java index 3b9024162656f..d6f5bdcbd9af2 100644 --- a/server/src/main/java/org/opensearch/index/mapper/Mapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/Mapper.java @@ -319,4 +319,8 @@ public void canDeriveSource() { public void deriveSource(XContentBuilder builder, LeafReader leafReader, int docId) throws IOException { throw new UnsupportedOperationException("Derived source field is not supported for [" + name() + "] field"); } + + public DerivedFieldGenerator derivedFieldGenerator() throws IOException { + throw new UnsupportedOperationException("Converting [" + name() + "] is not supported for [" + name() + "] field"); + } } diff --git a/server/src/main/java/org/opensearch/index/mapper/MapperService.java b/server/src/main/java/org/opensearch/index/mapper/MapperService.java index b0acdceeff9ce..3c7d9374fa257 100644 --- a/server/src/main/java/org/opensearch/index/mapper/MapperService.java +++ b/server/src/main/java/org/opensearch/index/mapper/MapperService.java @@ -141,7 +141,7 @@ public enum MergeReason { ); public static final Setting INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING = Setting.longSetting( "index.mapping.total_fields.limit", - 1000L, + 10000L, 0, Property.Dynamic, Property.IndexScope diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java index a04f8888a2347..ba2a215a8ad41 100644 --- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java @@ -209,10 +209,21 @@ public boolean isDataCubeMetricSupported() { * compared to stored field(stored as float) */ @Override - protected DerivedFieldGenerator derivedFieldGenerator() { + public DerivedFieldGenerator derivedFieldGenerator() { return new DerivedFieldGenerator(mappedFieldType, new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()) { @Override public Object convert(Object value) { + if(value instanceof Integer) { + Integer val = (Integer) value; + + return switch (type) { + case HALF_FLOAT -> HalfFloatPoint.sortableShortToHalfFloat(val.shortValue()); + case FLOAT -> NumericUtils.sortableIntToFloat(val); + case DOUBLE -> NumericUtils.sortableLongToDouble(val); + case BYTE, SHORT, INTEGER, LONG -> val; + case UNSIGNED_LONG -> Numbers.toUnsignedBigInteger(val); + }; + } Long val = (Long) value; if (val == null) { return null; @@ -2171,7 +2182,9 @@ protected void parseCreateField(ParseContext context) throws IOException { numericValue = fieldType().type.parse(value, coerce.value()); } - context.doc().addAll(fieldType().type.createFields(fieldType().name(), numericValue, indexed, hasDocValues, skiplist, stored)); + context.compositeDocumentInput().addField(fieldType(), numericValue); + +// context.doc().addAll(fieldType().type.createFields(fieldType().name(), numericValue, indexed, hasDocValues, skiplist, stored)); if (hasDocValues == false && (stored || indexed)) { createFieldNamesField(context); diff --git a/server/src/main/java/org/opensearch/index/mapper/ParseContext.java b/server/src/main/java/org/opensearch/index/mapper/ParseContext.java index 5d382ff28bcf9..5ef7e892a7ce5 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ParseContext.java +++ b/server/src/main/java/org/opensearch/index/mapper/ParseContext.java @@ -39,6 +39,7 @@ import org.opensearch.common.annotation.PublicApi; import org.opensearch.core.xcontent.XContentParser; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; import java.util.ArrayList; import java.util.Collection; @@ -242,6 +243,11 @@ public Document doc() { return in.doc(); } + @Override + public CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput() { + return in.compositeDocumentInput(); + } + @Override protected void addDoc(Document doc) { in.addDoc(doc); @@ -393,12 +399,25 @@ public static class InternalParseContext extends ParseContext { private final Set ignoredFields = new HashSet<>(); + private CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput; + public InternalParseContext( IndexSettings indexSettings, DocumentMapperParser docMapperParser, DocumentMapper docMapper, SourceToParse source, XContentParser parser + ) { + this(indexSettings, docMapperParser, docMapper, source, parser, null); + } + + public InternalParseContext( + IndexSettings indexSettings, + DocumentMapperParser docMapperParser, + DocumentMapper docMapper, + SourceToParse source, + XContentParser parser, + CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput ) { this.indexSettings = indexSettings; this.docMapper = docMapper; @@ -417,6 +436,7 @@ public InternalParseContext( this.currentArrayDepth = 0L; this.maxAllowedFieldDepth = indexSettings.getMappingDepthLimit(); this.maxAllowedArrayDepth = indexSettings.getMappingDepthLimit(); + this.compositeDocumentInput = compositeDocumentInput; } @Override @@ -458,6 +478,11 @@ public Document doc() { return this.document; } + @Override + public CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput() { + return compositeDocumentInput; + } + @Override protected void addDoc(Document doc) { numNestedDocs++; @@ -718,6 +743,7 @@ public boolean isWithinMultiFields() { public abstract Document rootDoc(); public abstract Document doc(); + public abstract CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput(); protected abstract void addDoc(Document doc); diff --git a/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java b/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java index 16e38980f8600..bcbf6a5fb38f3 100644 --- a/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java +++ b/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java @@ -37,6 +37,8 @@ import org.opensearch.common.xcontent.XContentType; import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.xcontent.MediaType; +import org.opensearch.index.engine.exec.DocumentInput; +import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; import org.opensearch.index.mapper.MapperService.MergeReason; import org.opensearch.index.mapper.ParseContext.Document; @@ -64,6 +66,12 @@ public class ParsedDocument { private Mapping dynamicMappingsUpdate; + private CompositeDataFormatWriter.CompositeDocumentInput documentInput; + + public CompositeDataFormatWriter.CompositeDocumentInput getDocumentInput() { + return documentInput; + } + public ParsedDocument( Field version, SeqNoFieldMapper.SequenceIDFields seqID, @@ -73,6 +81,22 @@ public ParsedDocument( BytesReference source, MediaType mediaType, Mapping dynamicMappingsUpdate + ) { + this( + version, seqID, id, routing, documents, source, mediaType, dynamicMappingsUpdate, null + ); + } + + public ParsedDocument( + Field version, + SeqNoFieldMapper.SequenceIDFields seqID, + String id, + String routing, + List documents, + BytesReference source, + MediaType mediaType, + Mapping dynamicMappingsUpdate, + CompositeDataFormatWriter.CompositeDocumentInput documentInput ) { this.version = version; this.seqID = seqID; @@ -82,6 +106,7 @@ public ParsedDocument( this.source = source; this.dynamicMappingsUpdate = dynamicMappingsUpdate; this.mediaType = mediaType; + this.documentInput = documentInput; } public String id() { diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java index bb726893b3d17..66107621f1049 100644 --- a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java @@ -1238,7 +1238,7 @@ protected void canDeriveSourceInternal() {} * Derive source using stored field, which would always be present for derived source enabled index field */ @Override - protected DerivedFieldGenerator derivedFieldGenerator() { + public DerivedFieldGenerator derivedFieldGenerator() { return new DerivedFieldGenerator(mappedFieldType, null, new StoredFieldFetcher(mappedFieldType, simpleName())) { @Override public FieldValueType getDerivedFieldPreference() { diff --git a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java index b10371f301a59..2c1e532542c63 100644 --- a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java @@ -928,7 +928,7 @@ protected void canDeriveSourceInternal() { * 1. When using doc values, for multi value field, result would be deduplicated and in sorted order */ @Override - protected DerivedFieldGenerator derivedFieldGenerator() { + public DerivedFieldGenerator derivedFieldGenerator() { return new DerivedFieldGenerator(mappedFieldType, new SortedSetDocValuesFetcher(mappedFieldType, simpleName()) { @Override public Object convert(Object value) { diff --git a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java index f2c278f04b021..e444bd8f858b0 100644 --- a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java +++ b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java @@ -570,6 +570,7 @@ public boolean indexSortedOnField(String field) { return indexSortConfig.hasPrimarySortOnField(field); } + // This converts the QB to query public ParsedQuery toQuery(QueryBuilder queryBuilder) { return toQuery(queryBuilder, q -> { Query query = q.toQuery(this); @@ -580,6 +581,7 @@ public ParsedQuery toQuery(QueryBuilder queryBuilder) { }); } + // This converts the QB to query private ParsedQuery toQuery(QueryBuilder queryBuilder, CheckedFunction filterOrQuery) { reset(); try { diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 609a6290d36ce..1212ed617c93a 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -81,6 +81,7 @@ import org.opensearch.common.CheckedConsumer; import org.opensearch.common.CheckedFunction; import org.opensearch.common.CheckedRunnable; +import org.opensearch.common.CheckedSupplier; import org.opensearch.common.Nullable; import org.opensearch.common.SetOnce; import org.opensearch.common.annotation.ExperimentalApi; @@ -134,6 +135,7 @@ import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineException; import org.opensearch.index.engine.EngineFactory; +import org.opensearch.index.engine.EngineSearcherSupplier; import org.opensearch.index.engine.IngestionEngine; import org.opensearch.index.engine.MergedSegmentWarmerFactory; import org.opensearch.index.engine.NRTReplicationEngine; @@ -142,6 +144,12 @@ import org.opensearch.index.engine.SafeCommitInfo; import org.opensearch.index.engine.Segment; import org.opensearch.index.engine.SegmentsStats; +import org.opensearch.index.engine.exec.bridge.CheckpointState; +import org.opensearch.index.engine.exec.bridge.Indexer; +import org.opensearch.index.engine.exec.bridge.IndexingThrottler; +import org.opensearch.index.engine.exec.bridge.StatsHolder; +import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter; +import org.opensearch.index.engine.exec.coord.CompositeEngine; import org.opensearch.index.fielddata.FieldDataStats; import org.opensearch.index.fielddata.ShardFieldData; import org.opensearch.index.flush.FlushStats; @@ -210,6 +218,7 @@ import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint; import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher; import org.opensearch.indices.replication.common.ReplicationTimer; +import org.opensearch.plugins.PluginsService; import org.opensearch.repositories.RepositoriesService; import org.opensearch.repositories.Repository; import org.opensearch.search.suggest.completion.CompletionStats; @@ -389,7 +398,7 @@ Runnable getGlobalCheckpointSyncer() { private final MergedSegmentPublisher mergedSegmentPublisher; private final ReferencedSegmentsPublisher referencedSegmentsPublisher; private final Set pendingMergedSegmentCheckpoints = Sets.newConcurrentHashSet(); - + private final CompositeEngine compositeEngine; @InternalApi public IndexShard( final ShardRouting shardRouting, @@ -429,7 +438,8 @@ public IndexShard( final Object refreshMutex, final ClusterApplierService clusterApplierService, @Nullable final MergedSegmentPublisher mergedSegmentPublisher, - @Nullable final ReferencedSegmentsPublisher referencedSegmentsPublisher + @Nullable final ReferencedSegmentsPublisher referencedSegmentsPublisher, + PluginsService pluginsService ) throws IOException { super(shardRouting.shardId(), indexSettings); assert shardRouting.initializing(); @@ -448,7 +458,7 @@ public IndexShard( this.translogSyncProcessor = createTranslogSyncProcessor( logger, threadPool, - this::getEngine, + this::getIndexer, indexSettings.isAssignedOnRemoteNode(), () -> getRemoteTranslogUploadBufferInterval(remoteStoreSettings::getClusterRemoteTranslogBufferInterval) ); @@ -554,8 +564,12 @@ public boolean shouldCache(Query query) { startRefreshTask(); } } + this.compositeEngine = new CompositeEngine(mapperService, pluginsService, path); } + public CompositeEngine getIndexingExecutionCoordinator() { + return compositeEngine; + } /** * By default, UNASSIGNED_SEQ_NO is used as the initial global checkpoint for new shard initialization. Ingestion * source does not track sequence numbers explicitly and hence defaults to NO_OPS_PERFORMED for compatibility. @@ -837,21 +851,21 @@ public void updateShardState( assert getOperationPrimaryTerm() == newPrimaryTerm; try { if (indexSettings.isSegRepEnabledOrRemoteNode()) { - // this Shard's engine was read only, we need to update its engine before restoring local history from xlog. + // this Shard's indexer was read only, we need to update its indexer before restoring local history from xlog. assert newRouting.primary() && currentRouting.primary() == false; ReplicationTimer timer = new ReplicationTimer(); timer.start(); logger.debug( - "Resetting engine on promotion of shard [{}] to primary, startTime {}\n", + "Resetting indexer on promotion of shard [{}] to primary, startTime {}\n", shardId, timer.startTime() ); resetEngineToGlobalCheckpoint(); timer.stop(); - logger.info("Completed engine failover for shard [{}] in: {} ms", shardId, timer.time()); - // It is possible an engine can open with a SegmentInfos on a higher gen but the reader does not refresh to + logger.info("Completed indexer failover for shard [{}] in: {} ms", shardId, timer.time()); + // It is possible an indexer can open with a SegmentInfos on a higher gen but the reader does not refresh to // trigger our refresh listener. - // Force update the checkpoint post engine reset. + // Force update the checkpoint post indexer reset. updateReplicationCheckpoint(); } @@ -870,19 +884,20 @@ public void updateShardState( * primary/replica re-sync completes successfully and we are now being promoted, we have to restore * the reverted operations on this shard by replaying the translog to avoid losing acknowledged writes. */ - final Engine engine = getEngine(); - engine.translogManager() + final Indexer indexer = getIndexer(); + final CheckpointState checkpointState = getCheckpointState(); + indexer.translogManager() .restoreLocalHistoryFromTranslog( - engine.getProcessedLocalCheckpoint(), - (snapshot) -> runTranslogRecovery(engine, snapshot, Engine.Operation.Origin.LOCAL_RESET, () -> {}) + checkpointState.getProcessedLocalCheckpoint(), + (snapshot) -> runTranslogRecovery(indexer, snapshot, Engine.Operation.Origin.LOCAL_RESET, () -> {}) ); /* Rolling the translog generation is not strictly needed here (as we will never have collisions between * sequence numbers in a translog generation in a new primary as it takes the last known sequence number * as a starting point), but it simplifies reasoning about the relationship between primary terms and * translog generations. */ - engine.translogManager().rollTranslogGeneration(); - engine.fillSeqNoGaps(newPrimaryTerm); + indexer.translogManager().rollTranslogGeneration(); + indexer.fillSeqNoGaps(newPrimaryTerm); replicationTracker.updateLocalCheckpoint(currentRouting.allocationId().getId(), getLocalCheckpoint()); primaryReplicaSyncer.accept(this, new ActionListener() { @Override @@ -1006,7 +1021,7 @@ public void relocated( } // Ensure all in-flight remote store translog upload drains, before we perform the performSegRep. - releasablesOnHandoffFailures.add(getEngine().translogManager().drainSync()); + releasablesOnHandoffFailures.add(getIndexer().translogManager().drainSync()); // no shard operation permits are being held here, move state from started to relocated assert indexShardOperationPermits.getActiveOperationsCount() == OPERATIONS_BLOCKED @@ -1117,7 +1132,7 @@ public Engine.IndexResult applyIndexOperationOnPrimary( ) throws IOException { assert versionType.validateVersionForWrites(version); return applyIndexOperation( - getEngine(), + getIndexingExecutionCoordinator(), UNASSIGNED_SEQ_NO, getOperationPrimaryTerm(), version, @@ -1128,7 +1143,8 @@ public Engine.IndexResult applyIndexOperationOnPrimary( isRetry, Engine.Operation.Origin.PRIMARY, sourceToParse, - null + null, + compositeEngine::documentInput ); } @@ -1142,7 +1158,7 @@ public Engine.IndexResult applyIndexOperationOnReplica( SourceToParse sourceToParse ) throws IOException { return applyIndexOperation( - getEngine(), + getIndexer(), seqNo, opPrimaryTerm, version, @@ -1153,12 +1169,13 @@ public Engine.IndexResult applyIndexOperationOnReplica( isRetry, Engine.Operation.Origin.REPLICA, sourceToParse, - id + id, + null ); } private Engine.IndexResult applyIndexOperation( - Engine engine, + Indexer engine, long seqNo, long opPrimaryTerm, long version, @@ -1169,7 +1186,8 @@ private Engine.IndexResult applyIndexOperation( boolean isRetry, Engine.Operation.Origin origin, SourceToParse sourceToParse, - String id + String id, + CheckedSupplier documentInputSupplier ) throws IOException { // For Segment Replication enabled replica shards we can be skip parsing the documents as we directly copy segments from primary @@ -1189,7 +1207,7 @@ private Engine.IndexResult applyIndexOperation( UNASSIGNED_SEQ_NO, 0 ); - return getEngine().index(index); + return getIndexer().index(index); } assert opPrimaryTerm <= getOperationPrimaryTerm() : "op term [ " + opPrimaryTerm @@ -1198,7 +1216,7 @@ private Engine.IndexResult applyIndexOperation( + "]"; ensureWriteAllowed(origin); Engine.Index operation; - try { + try (CompositeDataFormatWriter.CompositeDocumentInput documentInput = documentInputSupplier.get()) { operation = prepareIndex( docMapper(), sourceToParse, @@ -1210,12 +1228,14 @@ private Engine.IndexResult applyIndexOperation( autoGeneratedTimeStamp, isRetry, ifSeqNo, - ifPrimaryTerm + ifPrimaryTerm, + documentInput ); Mapping update = operation.parsedDoc().dynamicMappingsUpdate(); if (update != null) { return new Engine.IndexResult(update); } + return index(engine, operation); } catch (Exception e) { // We treat any exception during parsing and or mapping update as a document level failure // with the exception side effects of closing the shard. Since we don't have the shard, we @@ -1224,8 +1244,6 @@ private Engine.IndexResult applyIndexOperation( verifyNotClosed(e); return new Engine.IndexResult(e, version, opPrimaryTerm, seqNo); } - - return index(engine, operation); } public static Engine.Index prepareIndex( @@ -1239,10 +1257,11 @@ public static Engine.Index prepareIndex( long autoGeneratedIdTimestamp, boolean isRetry, long ifSeqNo, - long ifPrimaryTerm + long ifPrimaryTerm, + CompositeDataFormatWriter.CompositeDocumentInput documentInput ) { long startTime = System.nanoTime(); - ParsedDocument doc = docMapper.getDocumentMapper().parse(source); + ParsedDocument doc = docMapper.getDocumentMapper().parse(source, documentInput);; if (docMapper.getMapping() != null) { doc.addDynamicMappingsUpdate(docMapper.getMapping()); } @@ -1263,7 +1282,7 @@ public static Engine.Index prepareIndex( ); } - private Engine.IndexResult index(Engine engine, Engine.Index index) throws IOException { + private Engine.IndexResult index(Indexer engine, Engine.Index index) throws IOException { active.set(true); final Engine.IndexResult result; index = indexingOperationListeners.preIndex(shardId, index); @@ -1319,10 +1338,10 @@ private Engine.IndexResult index(Engine engine, Engine.Index index) throws IOExc } public Engine.NoOpResult markSeqNoAsNoop(long seqNo, long opPrimaryTerm, String reason) throws IOException { - return markSeqNoAsNoop(getEngine(), seqNo, opPrimaryTerm, reason, Engine.Operation.Origin.REPLICA); + return markSeqNoAsNoop(getIndexer(), seqNo, opPrimaryTerm, reason, Engine.Operation.Origin.REPLICA); } - private Engine.NoOpResult markSeqNoAsNoop(Engine engine, long seqNo, long opPrimaryTerm, String reason, Engine.Operation.Origin origin) + private Engine.NoOpResult markSeqNoAsNoop(Indexer engine, long seqNo, long opPrimaryTerm, String reason, Engine.Operation.Origin origin) throws IOException { assert opPrimaryTerm <= getOperationPrimaryTerm() : "op term [ " + opPrimaryTerm @@ -1335,7 +1354,7 @@ private Engine.NoOpResult markSeqNoAsNoop(Engine engine, long seqNo, long opPrim return noOp(engine, noOp); } - private Engine.NoOpResult noOp(Engine engine, Engine.NoOp noOp) throws IOException { + private Engine.NoOpResult noOp(Indexer engine, Engine.NoOp noOp) throws IOException { active.set(true); if (logger.isTraceEnabled()) { logger.trace("noop (seq# [{}])", noOp.seqNo()); @@ -1360,7 +1379,7 @@ public Engine.DeleteResult applyDeleteOperationOnPrimary( ) throws IOException { assert versionType.validateVersionForWrites(version); return applyDeleteOperation( - getEngine(), + getIndexer(), UNASSIGNED_SEQ_NO, getOperationPrimaryTerm(), version, @@ -1386,10 +1405,10 @@ public Engine.DeleteResult applyDeleteOperationOnReplica(long seqNo, long opPrim UNASSIGNED_SEQ_NO, 0 ); - return getEngine().delete(delete); + return getIndexer().delete(delete); } return applyDeleteOperation( - getEngine(), + getIndexer(), seqNo, opPrimaryTerm, version, @@ -1402,7 +1421,7 @@ public Engine.DeleteResult applyDeleteOperationOnReplica(long seqNo, long opPrim } private Engine.DeleteResult applyDeleteOperation( - Engine engine, + Indexer engine, long seqNo, long opPrimaryTerm, long version, @@ -1437,7 +1456,7 @@ public static Engine.Delete prepareDelete( return new Engine.Delete(id, uid, seqNo, primaryTerm, version, versionType, origin, startTime, ifSeqNo, ifPrimaryTerm); } - private Engine.DeleteResult delete(Engine engine, Engine.Delete delete) throws IOException { + private Engine.DeleteResult delete(Indexer engine, Engine.Delete delete) throws IOException { active.set(true); final Engine.DeleteResult result; delete = indexingOperationListeners.preDelete(shardId, delete); @@ -1460,7 +1479,7 @@ public Engine.GetResult get(Engine.Get get) { if (mapper == null) { return GetResult.NOT_EXISTS; } - return getEngine().get(get, this::acquireSearcher); + return getEngine().get(get, this::acquireSearcher); // TODO: READER INTERFACE } /** @@ -1471,7 +1490,8 @@ public void refresh(String source) { if (logger.isTraceEnabled()) { logger.trace("refresh with source [{}]", source); } - getEngine().refresh(source); + getIndexingExecutionCoordinator().refresh(source); +// getIndexer().refresh(source); } /** @@ -1502,7 +1522,7 @@ public FlushStats flushStats() { public DocsStats docStats() { readAllowed(); - return getEngine().docStats(); + return getStatsHolder().docStats(); } /** @@ -1510,7 +1530,7 @@ public DocsStats docStats() { * @throws AlreadyClosedException if shard is closed */ public CommitStats commitStats() { - return getEngine().commitStats(); + return getStatsHolder().commitStats(); } /** @@ -1518,11 +1538,11 @@ public CommitStats commitStats() { * @throws AlreadyClosedException if shard is closed */ public SeqNoStats seqNoStats() { - return getEngine().getSeqNoStats(replicationTracker.getGlobalCheckpoint()); + return getCheckpointState().getSeqNoStats(replicationTracker.getGlobalCheckpoint()); } public IndexingStats indexingStats() { - Engine engine = getEngineOrNull(); + IndexingThrottler engine = getIndexingThrottler(); final boolean throttled; final long throttleTimeInMillis; if (engine == null) { @@ -1555,17 +1575,17 @@ public StoreStats storeStats() { } public MergeStats mergeStats() { - final Engine engine = getEngineOrNull(); + final StatsHolder engine = getStatsHolderOrNull(); if (engine == null) { return new MergeStats(); } final MergeStats mergeStats = engine.getMergeStats(); - mergeStats.addUnreferencedFileCleanUpStats(engine.unreferencedFileCleanUpsPerformed()); +// mergeStats.addUnreferencedFileCleanUpStats(engine.unreferencedFileCleanUpsPerformed()); return mergeStats; } public SegmentsStats segmentStats(boolean includeSegmentFileSizes, boolean includeUnloadedSegments) { - SegmentsStats segmentsStats = getEngine().segmentsStats(includeSegmentFileSizes, includeUnloadedSegments); + SegmentsStats segmentsStats = getStatsHolder().segmentsStats(includeSegmentFileSizes, includeUnloadedSegments); segmentsStats.addBitsetMemoryInBytes(shardBitsetFilterCache.getMemorySizeInBytes()); // Populate remote_store stats only if the index is remote store backed if (indexSettings().isAssignedOnRemoteNode()) { @@ -1588,7 +1608,7 @@ public FieldDataStats fieldDataStats(String... fields) { } public TranslogStats translogStats() { - TranslogStats translogStats = getEngine().translogManager().getTranslogStats(); + TranslogStats translogStats = getIndexer().translogManager().getTranslogStats(); // Populate remote_store stats only if the index is remote store backed if (indexSettings.isAssignedOnRemoteNode()) { translogStats.addRemoteTranslogStats( @@ -1601,11 +1621,11 @@ public TranslogStats translogStats() { public CompletionStats completionStats(String... fields) { readAllowed(); - return getEngine().completionStats(fields); + return getStatsHolder().completionStats(fields); } public PollingIngestStats pollingIngestStats() { - return getEngine().pollingIngestStats(); + return getStatsHolder().pollingIngestStats(); } /** @@ -1624,7 +1644,7 @@ public void flush(FlushRequest request) { */ verifyNotClosed(); final long time = System.nanoTime(); - getEngine().flush(force, waitIfOngoing); + getIndexingExecutionCoordinator().flush(force, waitIfOngoing); flushMetric.inc(System.nanoTime() - time); } @@ -1637,15 +1657,14 @@ public void trimTranslog() { return; } verifyNotClosed(); - final Engine engine = getEngine(); - engine.translogManager().trimUnreferencedTranslogFiles(); + getIndexer().translogManager().trimUnreferencedTranslogFiles(); } /** * Rolls the tranlog generation and cleans unneeded. */ public void rollTranslogGeneration() throws IOException { - final Engine engine = getEngine(); + final Indexer engine = getIndexer(); engine.translogManager().rollTranslogGeneration(); } @@ -1654,7 +1673,7 @@ public void forceMerge(ForceMergeRequest forceMerge) throws IOException { if (logger.isTraceEnabled()) { logger.trace("force merge with {}", forceMerge); } - Engine engine = getEngine(); + Indexer engine = getIndexer(); engine.forceMerge( forceMerge.flush(), forceMerge.maxNumSegments(), @@ -1675,7 +1694,7 @@ public org.apache.lucene.util.Version upgrade(UpgradeRequest upgrade) throws IOE } org.apache.lucene.util.Version previousVersion = minimumCompatibleVersion(); // we just want to upgrade the segments, not actually forge merge to a single segment - final Engine engine = getEngine(); + final Indexer engine = getIndexer(); engine.forceMerge( true, // we need to flush at the end to make sure the upgrade is durable Integer.MAX_VALUE, // we just want to upgrade the segments, not actually optimize to a single segment @@ -1694,7 +1713,7 @@ public org.apache.lucene.util.Version upgrade(UpgradeRequest upgrade) throws IOE public org.apache.lucene.util.Version minimumCompatibleVersion() { org.apache.lucene.util.Version luceneVersion = null; - for (Segment segment : getEngine().segments(false)) { + for (Segment segment : getIndexer().segments(false)) { if (luceneVersion == null || luceneVersion.onOrAfter(segment.getVersion())) { luceneVersion = segment.getVersion(); } @@ -1724,19 +1743,21 @@ public RemoteSegmentMetadata fetchLastRemoteUploadedSegmentMetadata() throws IOE * * @param flushFirst true if the index should first be flushed to disk / a low level lucene commit should be executed */ + // TODO: This full method changes public GatedCloseable acquireLastIndexCommit(boolean flushFirst) throws EngineException { final IndexShardState state = this.state; // one time volatile read // we allow snapshot on closed index shard, since we want to do one after we close the shard and before we close the engine if (state == IndexShardState.STARTED || state == IndexShardState.CLOSED) { - return getEngine().acquireLastIndexCommit(flushFirst); + return getEngine().acquireLastIndexCommit(flushFirst); // TODO: READER, SNAPSHOTTER? } else { throw new IllegalIndexShardStateException(shardId, state, "snapshot is not allowed"); } } + // TODO: This full method changes public GatedCloseable acquireLastIndexCommitAndRefresh(boolean flushFirst) throws EngineException { GatedCloseable indexCommit = acquireLastIndexCommit(flushFirst); - getEngine().refresh("Snapshot for Remote Store based Shard"); + getIndexer().refresh("Snapshot for Remote Store based Shard"); return indexCommit; } @@ -1865,6 +1886,7 @@ public Set getPendingMergedSegmentCheckpoints() { /** * Snapshots the most recent safe index commit from the currently running engine. * All index files referenced by this index commit won't be freed until the commit/snapshot is closed. + * TODO: This method changes */ public GatedCloseable acquireSafeIndexCommit() throws EngineException { final IndexShardState state = this.state; // one time volatile read @@ -1927,6 +1949,7 @@ public Tuple, ReplicationCheckpoint> getLatestSegme * @param segmentInfos {@link SegmentInfos} infos to use to compute. * @return {@link ReplicationCheckpoint} Checkpoint computed from the infos. * @throws IOException When there is an error computing segment metadata from the store. + * TODO: SegRep changes for decoupling. looks to depend on codec. */ ReplicationCheckpoint computeReplicationCheckpoint(SegmentInfos segmentInfos) throws IOException { if (segmentInfos == null) { @@ -2154,7 +2177,7 @@ public void failShard(String reason, @Nullable Exception e) { /** * Acquires a point-in-time reader that can be used to create {@link Engine.Searcher}s on demand. */ - public Engine.SearcherSupplier acquireSearcherSupplier() { + public EngineSearcherSupplier acquireSearcherSupplier() { return acquireSearcherSupplier(Engine.SearcherScope.EXTERNAL); } @@ -2165,6 +2188,7 @@ public Engine.SearcherSupplier acquireSearcherSupplier(Engine.SearcherScope scop readAllowed(); markSearcherAccessed(); final Engine engine = getEngine(); + compositeEngine.getPrimaryReadEngine().acquireSearcherSupplier(null, scope); return engine.acquireSearcherSupplier(this::wrapSearcher, scope); } @@ -2196,6 +2220,7 @@ private Engine.Searcher wrapSearcher(Engine.Searcher searcher) { throw new OpenSearchException("failed to wrap searcher", ex); } finally { if (success == false) { + // TODO important Releasables.close(success, searcher); } } @@ -2434,7 +2459,7 @@ public void postRecovery(String reason) throws IndexShardStartedException, Index // we may not expose operations that were indexed with a refresh listener that was immediately // responded to in addRefreshListener. The refresh must happen under the same mutex used in addRefreshListener // and before moving this shard to POST_RECOVERY state (i.e., allow to read from this shard). - getEngine().refresh("post_recovery"); + getIndexer().refresh("post_recovery"); synchronized (mutex) { if (state == IndexShardState.CLOSED) { throw new IndexShardClosedException(shardId); @@ -2511,7 +2536,7 @@ private long recoverLocallyUpToGlobalCheckpoint() { final TranslogRecoveryRunner translogRecoveryRunner = (snapshot) -> { recoveryState.getTranslog().totalLocal(snapshot.totalOperations()); final int recoveredOps = runTranslogRecovery( - getEngine(), + getIndexer(), snapshot, Engine.Operation.Origin.LOCAL_TRANSLOG_RECOVERY, recoveryState.getTranslog()::incrementRecoveredOperations @@ -2520,9 +2545,9 @@ private long recoverLocallyUpToGlobalCheckpoint() { return recoveredOps; }; innerOpenEngineAndTranslog(() -> globalCheckpoint); - getEngine().translogManager() - .recoverFromTranslog(translogRecoveryRunner, getEngine().getProcessedLocalCheckpoint(), globalCheckpoint); - logger.trace("shard locally recovered up to {}", getEngine().getSeqNoStats(globalCheckpoint)); + getIndexer().translogManager() + .recoverFromTranslog(translogRecoveryRunner, getCheckpointState().getProcessedLocalCheckpoint(), globalCheckpoint); + logger.trace("shard locally recovered up to {}", getCheckpointState().getSeqNoStats(globalCheckpoint)); } finally { synchronized (engineMutex) { IOUtils.close(currentEngineReference.getAndSet(null)); @@ -2598,7 +2623,7 @@ private void validateLocalRecoveryState() { } public void trimOperationOfPreviousPrimaryTerms(long aboveSeqNo) { - getEngine().translogManager().trimOperationsFromTranslog(getOperationPrimaryTerm(), aboveSeqNo); + getIndexer().translogManager().trimOperationsFromTranslog(getOperationPrimaryTerm(), aboveSeqNo); } /** @@ -2608,7 +2633,7 @@ public void trimOperationOfPreviousPrimaryTerms(long aboveSeqNo) { * @see #updateMaxUnsafeAutoIdTimestamp(long) */ public long getMaxSeenAutoIdTimestamp() { - return getEngine().getMaxSeenAutoIdTimestamp(); + return getIndexer().getMaxSeenAutoIdTimestamp(); } /** @@ -2621,14 +2646,14 @@ public long getMaxSeenAutoIdTimestamp() { * a retry append-only (without timestamp) via recovery, then an original append-only (with timestamp) via replication. */ public void updateMaxUnsafeAutoIdTimestamp(long maxSeenAutoIdTimestampFromPrimary) { - getEngine().updateMaxUnsafeAutoIdTimestamp(maxSeenAutoIdTimestampFromPrimary); + getIndexer().updateMaxUnsafeAutoIdTimestamp(maxSeenAutoIdTimestampFromPrimary); } public Engine.Result applyTranslogOperation(Translog.Operation operation, Engine.Operation.Origin origin) throws IOException { - return applyTranslogOperation(getEngine(), operation, origin); + return applyTranslogOperation(getIndexer(), operation, origin); } - private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation operation, Engine.Operation.Origin origin) + private Engine.Result applyTranslogOperation(Indexer engine, Translog.Operation operation, Engine.Operation.Origin origin) throws IOException { // If a translog op is replayed on the primary (eg. ccr), we need to use external instead of null for its version type. final VersionType versionType = (origin == Engine.Operation.Origin.PRIMARY) ? VersionType.EXTERNAL : null; @@ -2656,7 +2681,8 @@ private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation o MediaTypeRegistry.xContentType(index.source()), index.routing() ), - index.id() + index.id(), + null ); break; case DELETE: @@ -2687,7 +2713,7 @@ private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation o * Replays translog operations from the provided translog {@code snapshot} to the current engine using the given {@code origin}. * The callback {@code onOperationRecovered} is notified after each translog operation is replayed successfully. */ - int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot, Engine.Operation.Origin origin, Runnable onOperationRecovered) + int runTranslogRecovery(Indexer engine, Translog.Snapshot snapshot, Engine.Operation.Origin origin, Runnable onOperationRecovered) throws IOException { int opsRecovered = 0; Translog.Operation operation; @@ -2747,7 +2773,7 @@ public void openEngineAndRecoverFromTranslog(boolean syncFromRemote) throws IOEx translogRecoveryStats.totalOperations(snapshot.totalOperations()); translogRecoveryStats.totalOperationsOnStart(snapshot.totalOperations()); return runTranslogRecovery( - getEngine(), + getIndexer(), snapshot, Engine.Operation.Origin.LOCAL_TRANSLOG_RECOVERY, translogRecoveryStats::incrementRecoveredOperations @@ -2771,8 +2797,8 @@ public void openEngineAndRecoverFromTranslog(boolean syncFromRemote) throws IOEx translogConfig.setDownloadRemoteTranslogOnInit(true); } - getEngine().translogManager() - .recoverFromTranslog(translogRecoveryRunner, getEngine().getProcessedLocalCheckpoint(), Long.MAX_VALUE); + getIndexer().translogManager() + .recoverFromTranslog(translogRecoveryRunner, getCheckpointState().getProcessedLocalCheckpoint(), Long.MAX_VALUE); } /** @@ -2799,7 +2825,7 @@ void openEngineAndSkipTranslogRecovery(boolean syncFromRemote) throws IOExceptio innerOpenEngineAndTranslog(replicationTracker, syncFromRemote); assert routingEntry().isSearchOnly() == false || translogStats().estimatedNumberOfOperations() == 0 : "Translog is expected to be empty but holds " + translogStats().estimatedNumberOfOperations() + "Operations."; - getEngine().translogManager().skipTranslogRecovery(); + getIndexer().translogManager().skipTranslogRecovery(); } private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier) throws IOException { @@ -2961,9 +2987,9 @@ public RecoveryState recoveryState() { */ public void finalizeRecovery() { recoveryState().setStage(RecoveryState.Stage.FINALIZE); - Engine engine = getEngine(); + Indexer engine = getIndexer(); engine.refresh("recovery_finalization"); - engine.config().setEnableGcDeletes(true); + //engine.config().setEnableGcDeletes(true); } /** @@ -3284,7 +3310,7 @@ protected void doRun() { * Acquires a lock on the translog files and Lucene soft-deleted documents to prevent them from being trimmed */ public Closeable acquireHistoryRetentionLock() { - return getEngine().acquireHistoryRetentionLock(); + return getIndexer().acquireHistoryRetentionLock(); } /** @@ -3294,7 +3320,7 @@ public Closeable acquireHistoryRetentionLock() { */ public Translog.Snapshot getHistoryOperations(String reason, long startingSeqNo, long endSeqNo, boolean accurateCount) throws IOException { - return getEngine().newChangesSnapshot(reason, startingSeqNo, endSeqNo, true, accurateCount); + return getIndexer().newChangesSnapshot(reason, startingSeqNo, endSeqNo, true, accurateCount); } /** @@ -3305,7 +3331,7 @@ public Translog.Snapshot getHistoryOperations(String reason, long startingSeqNo, public Translog.Snapshot getHistoryOperationsFromTranslog(long startingSeqNo, long endSeqNo) throws IOException { assert indexSettings.isSegRepEnabledOrRemoteNode() == false : "unsupported operation for segment replication enabled indices or remote store backed indices"; - return getEngine().translogManager().newChangesSnapshot(startingSeqNo, endSeqNo, true); + return getIndexer().translogManager().newChangesSnapshot(startingSeqNo, endSeqNo, true); } /** @@ -3313,7 +3339,7 @@ public Translog.Snapshot getHistoryOperationsFromTranslog(long startingSeqNo, lo * This method should be called after acquiring the retention lock; See {@link #acquireHistoryRetentionLock()} */ public boolean hasCompleteHistoryOperations(String reason, long startingSeqNo) { - return getEngine().hasCompleteOperationHistory(reason, startingSeqNo); + return getIndexer().hasCompleteOperationHistory(reason, startingSeqNo); } /** @@ -3322,7 +3348,7 @@ public boolean hasCompleteHistoryOperations(String reason, long startingSeqNo) { * @return the minimum retained sequence number */ public long getMinRetainedSeqNo() { - return getEngine().getMinRetainedSeqNo(); + return getCheckpointState().getMinRetainedSeqNo(); } /** @@ -3333,7 +3359,7 @@ public long getMinRetainedSeqNo() { * @return number of history operations in the sequence number range */ public int countNumberOfHistoryOperations(String source, long fromSeqNo, long toSeqNo) throws IOException { - return getEngine().countNumberOfHistoryOperations(source, fromSeqNo, toSeqNo); + return getIndexer().countNumberOfHistoryOperations(source, fromSeqNo, toSeqNo); } /** @@ -3354,15 +3380,15 @@ public Translog.Snapshot newChangesSnapshot( boolean requiredFullRange, boolean accurateCount ) throws IOException { - return getEngine().newChangesSnapshot(source, fromSeqNo, toSeqNo, requiredFullRange, accurateCount); + return getIndexer().newChangesSnapshot(source, fromSeqNo, toSeqNo, requiredFullRange, accurateCount); } public List segments(boolean verbose) { - return getEngine().segments(verbose); + return getIndexer().segments(verbose); } public String getHistoryUUID() { - return getEngine().getHistoryUUID(); + return getIndexer().getHistoryUUID(); } public IndexEventListener getIndexEventListener() { @@ -3371,7 +3397,7 @@ public IndexEventListener getIndexEventListener() { public void activateThrottling() { try { - getEngine().activateThrottling(); + getIndexingThrottler().activateThrottling(); } catch (AlreadyClosedException ex) { // ignore } @@ -3379,7 +3405,7 @@ public void activateThrottling() { public void deactivateThrottling() { try { - getEngine().deactivateThrottling(); + getIndexingThrottler().deactivateThrottling(); } catch (AlreadyClosedException ex) { // ignore } @@ -3413,8 +3439,7 @@ private void handleRefreshException(Exception e) { */ public void writeIndexingBuffer() { try { - Engine engine = getEngine(); - engine.writeIndexingBuffer(); + getIndexer().writeIndexingBuffer(); } catch (Exception e) { handleRefreshException(e); } @@ -3697,7 +3722,7 @@ public void markAllocationIdAsInSync(final String allocationId, final long local * @return the local checkpoint */ public long getLocalCheckpoint() { - return getEngine().getPersistedLocalCheckpoint(); + return getCheckpointState().getPersistedLocalCheckpoint(); } /** @@ -3705,7 +3730,7 @@ public long getLocalCheckpoint() { * Also see {@link #getLocalCheckpoint()}. */ public long getProcessedLocalCheckpoint() { - return getEngine().getProcessedLocalCheckpoint(); + return getCheckpointState().getProcessedLocalCheckpoint(); } /** @@ -3721,7 +3746,7 @@ public long getLastKnownGlobalCheckpoint() { * Returns the latest global checkpoint value that has been persisted in the underlying storage (i.e. translog's checkpoint) */ public long getLastSyncedGlobalCheckpoint() { - return getEngine().getLastSyncedGlobalCheckpoint(); + return getCheckpointState().getLastSyncedGlobalCheckpoint(); } /** @@ -3747,7 +3772,7 @@ public void maybeSyncGlobalCheckpoint(final String reason) { } assert assertPrimaryMode(); // only sync if there are no operations in flight, or when using async durability - final SeqNoStats stats = getEngine().getSeqNoStats(replicationTracker.getGlobalCheckpoint()); + final SeqNoStats stats = getCheckpointState().getSeqNoStats(replicationTracker.getGlobalCheckpoint()); final boolean asyncDurability = indexSettings().getTranslogDurability() == Durability.ASYNC; if (stats.getMaxSeqNo() == stats.getGlobalCheckpoint() || asyncDurability) { final Map globalCheckpoints = getInSyncGlobalCheckpoints(); @@ -3867,7 +3892,7 @@ private void postActivatePrimaryMode() { // This helps to get a consistent state in remote store where both remote segment store and remote // translog contains data. try { - getEngine().translogManager().syncTranslog(); + getIndexer().translogManager().syncTranslog(); } catch (IOException e) { logger.error("Failed to sync translog to remote from new primary", e); } @@ -3976,7 +4001,24 @@ private void doCheckIndex() throws IOException { recoveryState.getVerifyIndex().checkIndexTime(Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - timeNS))); } - Engine getEngine() { + + public Indexer getIndexer() { + return getEngine(); + } + + public CheckpointState getCheckpointState() { + return getEngine(); + } + + public StatsHolder getStatsHolder() { + return getEngine(); + } + + public IndexingThrottler getIndexingThrottler() { + return getEngine(); + } + + public Engine getEngine() { Engine engine = getEngineOrNull(); if (engine == null) { throw new AlreadyClosedException("engine is closed"); @@ -3984,6 +4026,23 @@ Engine getEngine() { return engine; } + + protected Indexer getIndexerOrNull() { + return getEngineOrNull(); + } + + public CheckpointState getCheckpointStateOrNull() { + return getEngineOrNull(); + } + + public StatsHolder getStatsHolderOrNull() { + return getEngineOrNull(); + } + + public IndexingThrottler getIndexingThrottlerOrNull() { + return getEngineOrNull(); + } + /** * NOTE: returns null if engine is not yet started (e.g. recovery phase 1, copying over index files, is still running), or if engine is * closed. @@ -4174,7 +4233,7 @@ public boolean useRetentionLeasesInPeerRecovery() { private SafeCommitInfo getSafeCommitInfo() { final Engine engine = getEngineOrNull(); - return engine == null ? SafeCommitInfo.EMPTY : engine.getSafeCommitInfo(); + return engine == null ? SafeCommitInfo.EMPTY : getIndexer().getSafeCommitInfo(); } class ShardEventListener implements Engine.EventListener { @@ -4252,10 +4311,12 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) thro if (indexSettings.isSegRepEnabledOrRemoteNode()) { internalRefreshListener.add(new ReplicationCheckpointUpdater()); } + // HERE if (this.checkpointPublisher != null && shardRouting.primary() && indexSettings.isSegRepLocalEnabled()) { internalRefreshListener.add(new CheckpointRefreshListener(this, this.checkpointPublisher)); } + // HERE if (isRemoteStoreEnabled() || isMigratingToRemote()) { internalRefreshListener.add( new RemoteStoreRefreshListener( @@ -4709,7 +4770,7 @@ public List getActiveOperations() { private static AsyncIOProcessor createTranslogSyncProcessor( Logger logger, ThreadPool threadPool, - Supplier engineSupplier, + Supplier engineSupplier, boolean bufferAsyncIoProcessor, Supplier bufferIntervalSupplier ) { @@ -4908,7 +4969,7 @@ ReplicationTracker getReplicationTracker() { public boolean scheduledRefresh() { verifyNotClosed(); boolean listenerNeedsRefresh = refreshListeners.refreshNeeded(); - if (isReadAllowed() && (listenerNeedsRefresh || getEngine().refreshNeeded())) { + if (isReadAllowed() && (listenerNeedsRefresh || true)) { if (listenerNeedsRefresh == false // if we have a listener that is waiting for a refresh we need to force it && isSearchIdleSupported() && isSearchIdle() @@ -4917,15 +4978,19 @@ && isSearchIdle() // lets skip this refresh since we are search idle and // don't necessarily need to refresh. the next searcher access will register a refreshListener and that will // cause the next schedule to refresh. - final Engine engine = getEngine(); - engine.maybePruneDeletes(); // try to prune the deletes in the engine if we accumulated some - setRefreshPending(engine); - return false; +// final Engine engine = getEngine(); +// engine.maybePruneDeletes(); // try to prune the deletes in the engine if we accumulated some +// setRefreshPending(engine); +// return false; + getIndexingExecutionCoordinator().refresh("schedule"); + return true; } else { if (logger.isTraceEnabled()) { logger.trace("refresh with source [schedule]"); } - return getEngine().maybeRefresh("schedule"); + getIndexingExecutionCoordinator().refresh("schedule"); + return true; +// return getEngine().maybeRefresh("schedule"); } } final Engine engine = getEngine(); diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 59f967744cc77..5b3be542eb160 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -164,6 +164,7 @@ import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.plugins.IndexStorePlugin; import org.opensearch.plugins.PluginsService; +import org.opensearch.plugins.SearchEnginePlugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; @@ -1101,7 +1102,9 @@ private synchronized IndexService createIndexService( this.remoteStoreSettings, replicator, segmentReplicationStatsProvider, - this::getClusterDefaultMaxMergeAtOnce + this::getClusterDefaultMaxMergeAtOnce, + getSearchEnginePlugin(), + this.pluginsService ); } @@ -1109,6 +1112,13 @@ private EngineConfigFactory getEngineConfigFactory(final IndexSettings idxSettin return new EngineConfigFactory(this.pluginsService, idxSettings); } + private SearchEnginePlugin getSearchEnginePlugin() throws IOException { + List searchEnginePlugins = pluginsService.filterPlugins(SearchEnginePlugin.class); + return !searchEnginePlugins.isEmpty() + ? searchEnginePlugins.getFirst() + : null; + } + private IngestionConsumerFactory getIngestionConsumerFactory(final IndexSettings idxSettings) { final IndexMetadata indexMetadata = idxSettings.getIndexMetadata(); if (indexMetadata == null) { diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java index ae8299ee7ccb5..416237111ff7b 100644 --- a/server/src/main/java/org/opensearch/node/Node.java +++ b/server/src/main/java/org/opensearch/node/Node.java @@ -218,6 +218,8 @@ import org.opensearch.plugins.ClusterPlugin; import org.opensearch.plugins.CryptoKeyProviderPlugin; import org.opensearch.plugins.CryptoPlugin; +import org.opensearch.plugins.SearchEnginePlugin; +import org.opensearch.plugins.DataSourcePlugin; import org.opensearch.plugins.DiscoveryPlugin; import org.opensearch.plugins.EnginePlugin; import org.opensearch.plugins.ExtensionAwarePlugin; @@ -294,6 +296,8 @@ import org.opensearch.transport.client.Client; import org.opensearch.transport.client.node.NodeClient; import org.opensearch.usage.UsageService; +import org.opensearch.vectorized.execution.search.DataFormat; +import org.opensearch.vectorized.execution.search.spi.DataSourceCodec; import org.opensearch.watcher.ResourceWatcherService; import org.opensearch.wlm.WorkloadGroupService; import org.opensearch.wlm.WorkloadGroupsStateAccessor; @@ -1111,10 +1115,40 @@ protected Node(final Environment initialEnvironment, Collection clas ).stream() ) .collect(Collectors.toList()); - // Add the telemetryAwarePlugin components to the existing pluginComponents collection. pluginComponents.addAll(telemetryAwarePluginComponents); + Map dataSourceCodecMap = new HashMap<>(); + for (DataSourcePlugin dataSourcePlugin : pluginsService.filterPlugins(DataSourcePlugin.class)) { + if (dataSourcePlugin.getDataSourceCodecs().isPresent()) { + dataSourceCodecMap.putAll(dataSourcePlugin.getDataSourceCodecs().get()); + } + } + + // TODO : compilation issue + + Collection dataSourceAwareComponents = pluginsService.filterPlugins(SearchEnginePlugin.class) + .stream() + .flatMap( + p -> p.createComponents( + client, + clusterService, + threadPool, + resourceWatcherService, + scriptService, + xContentRegistry, + environment, + nodeEnvironment, + namedWriteableRegistry, + clusterModule.getIndexNameExpressionResolver(), + repositoriesServiceReference::get, + dataSourceCodecMap + ).stream() + ) + .collect(Collectors.toList()); + + // Add all dataSourceAwarePlugin components to the existing pluginComponents + pluginComponents.addAll(dataSourceAwareComponents); List identityAwarePlugins = pluginsService.filterPlugins(IdentityAwarePlugin.class); identityService.initializeIdentityAwarePlugins(identityAwarePlugins); @@ -1525,7 +1559,8 @@ protected Node(final Environment initialEnvironment, Collection clas searchModule.getIndexSearcherExecutor(threadPool), taskResourceTrackingService, searchModule.getConcurrentSearchRequestDeciderFactories(), - searchModule.getPluginProfileMetricsProviders() + searchModule.getPluginProfileMetricsProviders(), + pluginsService.filterPlugins(DataSourcePlugin.class) ); final List> tasksExecutors = pluginsService.filterPlugins(PersistentTaskPlugin.class) @@ -2256,7 +2291,8 @@ protected SearchService newSearchService( Executor indexSearcherExecutor, TaskResourceTrackingService taskResourceTrackingService, Collection concurrentSearchDeciderFactories, - List pluginProfilers + List pluginProfilers, + List dataSourcePluginList ) { return new SearchService( clusterService, @@ -2271,7 +2307,8 @@ protected SearchService newSearchService( indexSearcherExecutor, taskResourceTrackingService, concurrentSearchDeciderFactories, - pluginProfilers + pluginProfilers, + dataSourcePluginList ); } diff --git a/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java b/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java new file mode 100644 index 0000000000000..cf008d3098fcd --- /dev/null +++ b/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugins; + +import org.opensearch.index.engine.exec.DataFormat; +import org.opensearch.index.engine.exec.IndexingExecutionEngine; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.vectorized.execution.search.spi.DataSourceCodec; + +import java.util.Map; +import java.util.Optional; + +public interface DataSourcePlugin { + default Optional> getDataSourceCodecs() { + return Optional.empty(); + } + + IndexingExecutionEngine indexingEngine(MapperService mapperService, ShardPath shardPath); + + DataFormat getDataFormat(); +} diff --git a/server/src/main/java/org/opensearch/plugins/PluginsService.java b/server/src/main/java/org/opensearch/plugins/PluginsService.java index 5e382584dbe0e..ccbc10f77cb14 100644 --- a/server/src/main/java/org/opensearch/plugins/PluginsService.java +++ b/server/src/main/java/org/opensearch/plugins/PluginsService.java @@ -42,6 +42,7 @@ import org.opensearch.OpenSearchException; import org.opensearch.Version; import org.opensearch.action.admin.cluster.node.info.PluginsAndModules; +import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.bootstrap.JarHell; import org.opensearch.common.collect.Tuple; import org.opensearch.common.inject.Module; @@ -88,6 +89,7 @@ * * @opensearch.internal */ +@ExperimentalApi // TODO : this cannot be experimental, just marking it to bypass for now public class PluginsService implements ReportingService { private static final Logger logger = LogManager.getLogger(PluginsService.class); diff --git a/server/src/main/java/org/opensearch/plugins/SearchEnginePlugin.java b/server/src/main/java/org/opensearch/plugins/SearchEnginePlugin.java new file mode 100644 index 0000000000000..e1c68761dd0a7 --- /dev/null +++ b/server/src/main/java/org/opensearch/plugins/SearchEnginePlugin.java @@ -0,0 +1,60 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugins; + +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.core.common.io.stream.NamedWriteableRegistry; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.env.Environment; +import org.opensearch.env.NodeEnvironment; +import org.opensearch.index.engine.SearchExecEngine; +import org.opensearch.index.engine.exec.FileMetadata; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.script.ScriptService; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.client.Client; +import org.opensearch.vectorized.execution.search.DataFormat; +import org.opensearch.vectorized.execution.search.spi.DataSourceCodec; +import org.opensearch.watcher.ResourceWatcherService; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.function.Supplier; + +public interface SearchEnginePlugin extends SearchPlugin{ + + /** + * Make dataSourceCodecs available for the DataSourceAwarePlugin(s) + */ + default Collection createComponents( + Client client, + ClusterService clusterService, + ThreadPool threadPool, + ResourceWatcherService resourceWatcherService, + ScriptService scriptService, + NamedXContentRegistry xContentRegistry, + Environment environment, + NodeEnvironment nodeEnvironment, + NamedWriteableRegistry namedWriteableRegistry, + IndexNameExpressionResolver indexNameExpressionResolver, + Supplier repositoriesServiceSupplier, + Map dataSourceCodecs + ) { + return Collections.emptyList(); + } + + List getSupportedFormats(); + + SearchExecEngine createEngine(DataFormat dataFormat, Collection formatCatalogSnapshot, ShardPath shardPath) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/search/ContextEngineSearcher.java b/server/src/main/java/org/opensearch/search/ContextEngineSearcher.java new file mode 100644 index 0000000000000..85809b993b165 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/ContextEngineSearcher.java @@ -0,0 +1,31 @@ +package org.opensearch.search; + +import org.opensearch.index.engine.EngineSearcher; +import org.opensearch.search.aggregations.SearchResultsCollector; +import org.opensearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.List; + +/** + * Engine-agnostic equivalent of ContextIndexSearcher that wraps EngineSearcher + * and provides search context awareness + */ +public record ContextEngineSearcher(EngineSearcher engineSearcher, + SearchContext searchContext) implements EngineSearcher { + + @Override + public String source() { + return engineSearcher.source(); + } + + @Override + public void search(Q query, List> collectors) throws IOException { + engineSearcher.search(query, collectors); + } + + @Override + public void close() { + engineSearcher.close(); + } +} diff --git a/server/src/main/java/org/opensearch/search/DefaultSearchContext.java b/server/src/main/java/org/opensearch/search/DefaultSearchContext.java index dda3e203c0667..99371456499b4 100644 --- a/server/src/main/java/org/opensearch/search/DefaultSearchContext.java +++ b/server/src/main/java/org/opensearch/search/DefaultSearchContext.java @@ -221,6 +221,7 @@ final class DefaultSearchContext extends SearchContext { private final boolean isStreamSearch; private StreamSearchChannelListener listener; + private Map dfResults; DefaultSearchContext( ReaderContext readerContext, @@ -252,7 +253,7 @@ final class DefaultSearchContext extends SearchContext { this.indexService = readerContext.indexService(); this.indexShard = readerContext.indexShard(); this.clusterService = clusterService; - this.engineSearcher = readerContext.acquireSearcher("search"); + this.engineSearcher = (Engine.Searcher) readerContext.acquireSearcher("search"); this.concurrentSearchMode = evaluateConcurrentSearchMode(executor); this.searcher = new ContextIndexSearcher( engineSearcher.getIndexReader(), @@ -1277,4 +1278,12 @@ public StreamSearchChannelListener getStreamChannelListener() { public boolean isStreamSearch() { return isStreamSearch; } + + public void setDFResults(Map dfResults) { + this.dfResults = dfResults; + } + + public Map getDFResults() { + return dfResults; + } } diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java index eeb4978d4c1f8..ae2d9682bc4fc 100644 --- a/server/src/main/java/org/opensearch/search/SearchService.java +++ b/server/src/main/java/org/opensearch/search/SearchService.java @@ -83,6 +83,8 @@ import org.opensearch.index.IndexService; import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineSearcherSupplier; +import org.opensearch.index.engine.SearchExecEngine; import org.opensearch.index.mapper.DerivedFieldResolver; import org.opensearch.index.mapper.DerivedFieldResolverFactory; import org.opensearch.index.query.InnerHitContextBuilder; @@ -99,6 +101,7 @@ import org.opensearch.indices.IndicesService; import org.opensearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason; import org.opensearch.node.ResponseCollectorService; +import org.opensearch.plugins.DataSourcePlugin; import org.opensearch.plugins.SearchPlugin; import org.opensearch.script.FieldScript; import org.opensearch.script.ScriptService; @@ -423,6 +426,7 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv private final FetchPhase fetchPhase; private final Collection concurrentSearchDeciderFactories; + private final List dataSourcePluginList; private volatile long defaultKeepAlive; @@ -471,7 +475,8 @@ public SearchService( Executor indexSearcherExecutor, TaskResourceTrackingService taskResourceTrackingService, Collection concurrentSearchDeciderFactories, - List pluginProfilers + List pluginProfilers, + List dataSourcePluginList ) { Settings settings = clusterService.getSettings(); this.threadPool = threadPool; @@ -499,7 +504,7 @@ public SearchService( this::setPitKeepAlives, this::validatePitKeepAlives ); - + this.dataSourcePluginList = dataSourcePluginList; clusterService.getClusterSettings() .addSettingsUpdateConsumer(DEFAULT_KEEPALIVE_SETTING, MAX_KEEPALIVE_SETTING, this::setKeepAlives, this::validateKeepAlives); @@ -803,18 +808,54 @@ private SearchPhaseResult executeQueryPhase( boolean isStreamSearch, ActionListener listener ) throws Exception { + // Till here things are generic but for datafusion , we need to abstract out and get the read engine specific implementation + // it could be reusing existing final ReaderContext readerContext = createOrGetReaderContext(request, keepStatesInContext); + @SuppressWarnings("unchecked") + SearchExecEngine searchExecEngine = readerContext.indexShard() + .getIndexingExecutionCoordinator() + .getPrimaryReadEngine(); + SearchShardTarget shardTarget = new SearchShardTarget( + clusterService.localNode().getId(), + readerContext.indexShard().shardId(), + request.getClusterAlias(), + OriginalIndices.NONE + ); try ( Releasable ignored = readerContext.markAsUsed(getKeepAlive(request)); - SearchContext context = createContext(readerContext, request, task, true, isStreamSearch) + // Get engine-specific executor and context + // TODO : move this logic to work with Lucene + + SearchContext context = createContext(readerContext, request, task, true, isStreamSearch, searchExecEngine); + + //SearchContext context = createContext(readerContext, request, task, true) ) { + // TODO : this is not correct - need to tie source to plugin context above + //context.aggregations(context1.aggregations()); + // TODO Execute plan here + // TODO : figure out how to tie this + byte[] substraitQuery = request.source().queryPlanIR(); + if (substraitQuery != null) { + // setDFResults in context + searchExecEngine.executeQueryPhase(context); + } + if (isStreamSearch) { assert listener instanceof StreamSearchChannelListener : "Stream search expects StreamSearchChannelListener"; context.setStreamChannelListener((StreamSearchChannelListener) listener); } final long afterQueryTime; try (SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(context)) { + // TODO check for this +// @SuppressWarnings("unchecked") +// QueryPhaseExecutor queryPhaseExecutor = +// (QueryPhaseExecutor) searchExecEngine.getQueryPhaseExecutor(); + + //QueryPhaseExecutor queryPhaseExecutor = readEngine.getQueryPhaseExecutor(); +// boolean success = queryPhaseExecutor.execute(context); loadOrExecuteQueryPhase(request, context); + //queryPhase.execute(context); + // loadOrExecuteQueryPhase(request, context); if (context.queryResult().hasSearchContext() == false && readerContext.singleSession()) { freeReaderContext(readerContext.id()); } @@ -1057,7 +1098,8 @@ final ReaderContext createOrGetReaderContext(ShardSearchRequest request, boolean } IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex()); IndexShard shard = indexService.getShard(request.shardId().id()); - Engine.SearcherSupplier reader = shard.acquireSearcherSupplier(); + // TODO acquire search supplier + EngineSearcherSupplier reader = shard.acquireSearcherSupplier(); return createAndPutReaderContext(request, indexService, shard, reader, keepStatesInContext); } @@ -1065,7 +1107,7 @@ final ReaderContext createAndPutReaderContext( ShardSearchRequest request, IndexService indexService, IndexShard shard, - Engine.SearcherSupplier reader, + EngineSearcherSupplier reader, boolean keepStatesInContext ) { assert request.readerId() == null; @@ -1131,7 +1173,7 @@ public void createPitReaderContext(ShardId shardId, TimeValue keepAlive, ActionL final IndexShard shard = indexService.getShard(shardId.id()); final SearchOperationListener searchOperationListener = shard.getSearchOperationListener(); shard.awaitShardSearchActive(ignored -> { - Engine.SearcherSupplier searcherSupplier = null; + EngineSearcherSupplier searcherSupplier = null; ReaderContext readerContext = null; Releasable decreasePitContexts = openPitContexts::decrementAndGet; try { @@ -1226,7 +1268,7 @@ final SearchContext createContext( SearchShardTask task, boolean includeAggregations ) throws IOException { - return createContext(readerContext, request, task, includeAggregations, false); + return createContext(readerContext, request, task, includeAggregations, false, null); } private SearchContext createContext( @@ -1234,9 +1276,18 @@ private SearchContext createContext( ShardSearchRequest request, SearchShardTask task, boolean includeAggregations, - boolean isStreamSearch + boolean isStreamSearch, + SearchExecEngine searchExecEngine ) throws IOException { - final DefaultSearchContext context = createSearchContext(readerContext, request, defaultSearchTimeout, false, isStreamSearch); + //final DefaultSearchContext originalContext = createSearchContext(readerContext, request, defaultSearchTimeout, false, isStreamSearch); + + SearchShardTarget shardTarget = new SearchShardTarget( + clusterService.localNode().getId(), + readerContext.indexShard().shardId(), + request.getClusterAlias(), + OriginalIndices.NONE + ); + SearchContext context = searchExecEngine.createContext(readerContext, request, shardTarget, task, bigArrays); try { if (request.scroll() != null) { context.scrollContext().scroll = request.scroll(); @@ -1265,7 +1316,7 @@ private SearchContext createContext( public DefaultSearchContext createSearchContext(ShardSearchRequest request, TimeValue timeout, boolean validate) throws IOException { final IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex()); final IndexShard indexShard = indexService.getShard(request.shardId().getId()); - final Engine.SearcherSupplier reader = indexShard.acquireSearcherSupplier(); + final EngineSearcherSupplier reader = indexShard.acquireSearcherSupplier(); final ShardSearchContextId id = new ShardSearchContextId(sessionId, idGenerator.incrementAndGet()); try (ReaderContext readerContext = new ReaderContext(id, indexService, indexShard, reader, -1L, true)) { DefaultSearchContext searchContext = createSearchContext(readerContext, request, timeout, validate); @@ -1502,10 +1553,10 @@ private void processFailure(ReaderContext context, Exception exc) { } } - private void parseSource(DefaultSearchContext context, SearchSourceBuilder source, boolean includeAggregations) { + private void parseSource(SearchContext context, SearchSourceBuilder source, boolean includeAggregations) { // nothing to parse... if (source == null) { - context.evaluateRequestShouldUseConcurrentSearch(); + // context.evaluateRequestShouldUseConcurrentSearch(); // TODO : specific to default search context return; } @@ -1662,7 +1713,7 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc if (context.scrollContext() == null && !(context.readerContext() instanceof PitReaderContext)) { throw new SearchException(shardTarget, "`slice` cannot be used outside of a scroll context or PIT context"); } - context.sliceBuilder(source.slice()); + // context.sliceBuilder(source.slice()); // TODO : specific to default search context } if (source.storedFields() != null) { @@ -1696,13 +1747,13 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc final CollapseContext collapseContext = source.collapse().build(queryShardContext); context.collapse(collapseContext); } - context.evaluateRequestShouldUseConcurrentSearch(); + // context.evaluateRequestShouldUseConcurrentSearch(); // TODO : specific to default search context if (source.profile()) { final Function>> pluginProfileMetricsSupplier = (query) -> pluginProfilers.stream() .flatMap(p -> p.getQueryProfileMetrics(context, query).stream()) .toList(); Profilers profilers = new Profilers(context.searcher(), context.shouldUseConcurrentSearch(), pluginProfileMetricsSupplier); - context.setProfilers(profilers); + // context.setProfilers(profilers); // TODO : specific to default search context } if (context.getStarTreeIndexEnabled() && StarTreeQueryHelper.isStarTreeSupported(context)) { @@ -1820,7 +1871,7 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre final boolean hasRefreshPending; if (readerContext != null) { indexService = readerContext.indexService(); - canMatchSearcher = readerContext.acquireSearcher(Engine.CAN_MATCH_SEARCH_SOURCE); + canMatchSearcher = (Engine.Searcher) readerContext.acquireSearcher(Engine.CAN_MATCH_SEARCH_SOURCE); hasRefreshPending = false; } else { indexService = indicesService.indexServiceSafe(request.shardId().getIndex()); diff --git a/server/src/main/java/org/opensearch/search/aggregations/SearchResultsCollector.java b/server/src/main/java/org/opensearch/search/aggregations/SearchResultsCollector.java new file mode 100644 index 0000000000000..836fa4509531f --- /dev/null +++ b/server/src/main/java/org/opensearch/search/aggregations/SearchResultsCollector.java @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations; +import org.opensearch.common.annotation.ExperimentalApi; + +/** + * Experimental + * @opensearch.internal + */ +// TODO : account for sub collectors +@ExperimentalApi +public interface SearchResultsCollector { + + /** + * collect + */ + void collect(T value); +} diff --git a/server/src/main/java/org/opensearch/search/aggregations/ShardResultConvertor.java b/server/src/main/java/org/opensearch/search/aggregations/ShardResultConvertor.java new file mode 100644 index 0000000000000..5568b7051246b --- /dev/null +++ b/server/src/main/java/org/opensearch/search/aggregations/ShardResultConvertor.java @@ -0,0 +1,18 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.aggregations; + +import java.util.List; +import java.util.Map; + +public interface ShardResultConvertor { + + List convert(Map shardResult); + +} diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java index 5f99a9cc05558..2ad44cd33aa74 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java @@ -51,6 +51,7 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.ShardResultConvertor; import org.opensearch.search.aggregations.StarTreeBucketCollector; import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.support.ValuesSource; @@ -59,6 +60,8 @@ import org.opensearch.search.startree.StarTreeQueryHelper; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import static org.opensearch.search.startree.StarTreeQueryHelper.getStarTreeFilteredValues; @@ -69,7 +72,7 @@ * * @opensearch.internal */ -class AvgAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector { +class AvgAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector, ShardResultConvertor { final ValuesSource.Numeric valuesSource; @@ -275,4 +278,15 @@ public void collectStarTreeEntry(int starTreeEntryBit, long bucket) throws IOExc } }; } + + @Override + public List convert(Map shardResult) { + Object[] counts = shardResult.get(name + "_count"); + Object[] sums = shardResult.get(name + "_sum"); + List results = new ArrayList<>(counts.length); + for (int i = 0; i < counts.length; i++) { + results.add(new InternalAvg(name, (Long) counts[i], (Long) sums[i], format, metadata())); + } + return results; + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java index 93192411ea0f8..341f905e78ef0 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java @@ -51,6 +51,7 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.ShardResultConvertor; import org.opensearch.search.aggregations.StarTreeBucketCollector; import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.support.ValuesSource; @@ -59,7 +60,9 @@ import org.opensearch.search.startree.StarTreeQueryHelper; import java.io.IOException; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; @@ -71,7 +74,7 @@ * * @opensearch.internal */ -class MaxAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector { +class MaxAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector, ShardResultConvertor { final ValuesSource.Numeric valuesSource; final DocValueFormat formatter; @@ -280,4 +283,14 @@ public StarTreeBucketCollector getStarTreeBucketCollector( public void doReset() { maxes.fill(0, maxes.size(), Double.NEGATIVE_INFINITY); } + + @Override + public List convert(Map shardResult) { + Object[] values = shardResult.get(name); + List results = new ArrayList<>(values.length); + for (Object value : values) { + results.add(new InternalMax(name, (Long) value, formatter, metadata())); + } + return results; + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java index 22749382216dd..3652e36453263 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java @@ -51,6 +51,7 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.ShardResultConvertor; import org.opensearch.search.aggregations.StarTreeBucketCollector; import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.support.ValuesSource; @@ -59,6 +60,8 @@ import org.opensearch.search.startree.StarTreeQueryHelper; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicReference; import java.util.function.Function; @@ -70,7 +73,7 @@ * * @opensearch.internal */ -class MinAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector { +class MinAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector, ShardResultConvertor { private static final int MAX_BKD_LOOKUPS = 1024; final ValuesSource.Numeric valuesSource; @@ -271,4 +274,14 @@ public StarTreeBucketCollector getStarTreeBucketCollector( (bucket, metricValue) -> mins.set(bucket, Math.min(mins.get(bucket), NumericUtils.sortableLongToDouble(metricValue))) ); } + + @Override + public List convert(Map shardResult) { + Object[] values = shardResult.get(name); + List results = new ArrayList<>(values.length); + for (Object value : values) { + results.add(new InternalMin(name, (Long) value, format, metadata())); + } + return results; + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java index ba32592f75ea1..0a611329a2fa8 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java @@ -45,6 +45,7 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.ShardResultConvertor; import org.opensearch.search.aggregations.StarTreeBucketCollector; import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.support.ValuesSource; @@ -53,6 +54,8 @@ import org.opensearch.search.startree.StarTreeQueryHelper; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import static org.opensearch.search.startree.StarTreeQueryHelper.getSupportedStarTree; @@ -62,7 +65,7 @@ * * @opensearch.internal */ -public class SumAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector { +public class SumAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector, ShardResultConvertor { private final ValuesSource.Numeric valuesSource; private final DocValueFormat format; @@ -215,4 +218,14 @@ public InternalAggregation buildEmptyAggregation() { public void doClose() { Releasables.close(sums, compensations); } + + @Override + public List convert(Map shardResult) { + Object[] values = shardResult.get(name); + List results = new ArrayList<>(values.length); + for (Object value : values) { + results.add(new InternalSum(name, (Long) value, format, metadata())); + } + return results; + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java index 3541753d94e6f..76c5bb31fd166 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java @@ -45,6 +45,7 @@ import org.opensearch.search.aggregations.InternalAggregation; import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.aggregations.LeafBucketCollectorBase; +import org.opensearch.search.aggregations.ShardResultConvertor; import org.opensearch.search.aggregations.StarTreeBucketCollector; import org.opensearch.search.aggregations.StarTreePreComputeCollector; import org.opensearch.search.aggregations.support.ValuesSource; @@ -53,6 +54,8 @@ import org.opensearch.search.startree.StarTreeQueryHelper; import java.io.IOException; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import static org.opensearch.search.startree.StarTreeQueryHelper.getSupportedStarTree; @@ -65,7 +68,7 @@ * * @opensearch.internal */ -public class ValueCountAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector { +public class ValueCountAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector, ShardResultConvertor { final ValuesSource valuesSource; @@ -209,4 +212,14 @@ public StarTreeBucketCollector getStarTreeBucketCollector( (bucket, metricValue) -> counts.increment(bucket, metricValue) ); } + + @Override + public List convert(Map shardResult) { + Object[] values = shardResult.get(name); + List results = new ArrayList<>(values.length); + for (Object value : values) { + results.add(new InternalValueCount(name, (Long) value, metadata())); + } + return results; + } } diff --git a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java index 90dfc1e086602..442d81f585015 100644 --- a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java +++ b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java @@ -42,6 +42,8 @@ import org.opensearch.core.ParseField; import org.opensearch.core.common.ParsingException; import org.opensearch.core.common.Strings; +import org.opensearch.core.common.bytes.BytesArray; +import org.opensearch.core.common.bytes.BytesReference; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; @@ -78,6 +80,7 @@ import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.Map; @@ -137,6 +140,7 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R public static final ParseField POINT_IN_TIME = new ParseField("pit"); public static final ParseField SEARCH_PIPELINE = new ParseField("search_pipeline"); public static final ParseField VERBOSE_SEARCH_PIPELINE = new ParseField("verbose_pipeline"); + public static final ParseField QUERY_PLAN_IR = new ParseField("query_plan_ir"); public static SearchSourceBuilder fromXContent(XContentParser parser) throws IOException { return fromXContent(parser, true); @@ -229,6 +233,8 @@ public static HighlightBuilder highlight() { private boolean verbosePipeline = false; + private byte[] queryPlanIR; + /** * Constructs a new search source builder. */ @@ -308,6 +314,10 @@ public SearchSourceBuilder(StreamInput in) throws IOException { if (in.getVersion().onOrAfter(Version.V_2_19_0)) { verbosePipeline = in.readBoolean(); } + if (in.getVersion().onOrAfter(Version.V_3_0_0)) { + BytesReference bytesRef = in.readOptionalBytesReference(); + queryPlanIR = bytesRef != null ? BytesReference.toBytes(bytesRef) : null; + } } @Override @@ -394,6 +404,9 @@ public void writeTo(StreamOutput out) throws IOException { if (out.getVersion().onOrAfter(Version.V_2_19_0)) { out.writeBoolean(verbosePipeline); } + if (out.getVersion().onOrAfter(Version.V_3_0_0)) { + out.writeOptionalBytesReference(queryPlanIR != null ? new BytesArray(queryPlanIR) : null); + } } /** @@ -1171,6 +1184,21 @@ public Boolean verbosePipeline() { return verbosePipeline; } + /** + * Sets the query plan intermediate representation for this search request. + */ + public SearchSourceBuilder queryPlanIR(byte[] queryPlanIR) { + this.queryPlanIR = queryPlanIR; + return this; + } + + /** + * Gets the query plan intermediate representation for this search request. + */ + public byte[] queryPlanIR() { + return queryPlanIR; + } + /** * Rewrites this search source builder into its primitive form. e.g. by * rewriting the QueryBuilder. If the builder did not change the identity @@ -1270,6 +1298,7 @@ private SearchSourceBuilder shallowCopy( rewrittenBuilder.derivedFields = derivedFields; rewrittenBuilder.searchPipeline = searchPipeline; rewrittenBuilder.verbosePipeline = verbosePipeline; + rewrittenBuilder.queryPlanIR = queryPlanIR; return rewrittenBuilder; } @@ -1341,6 +1370,8 @@ public void parseXContent(XContentParser parser, boolean checkTrailingTokens) th searchPipeline = parser.text(); } else if (VERBOSE_SEARCH_PIPELINE.match(currentFieldName, parser.getDeprecationHandler())) { verbosePipeline = parser.booleanValue(); + } else if (QUERY_PLAN_IR.match(currentFieldName, parser.getDeprecationHandler())) { + queryPlanIR = parser.binaryValue(); } else { throw new ParsingException( parser.getTokenLocation(), @@ -1678,6 +1709,10 @@ public XContentBuilder innerToXContent(XContentBuilder builder, Params params) t builder.field(VERBOSE_SEARCH_PIPELINE.getPreferredName(), verbosePipeline); } + if (queryPlanIR != null) { + builder.field(QUERY_PLAN_IR.getPreferredName(), queryPlanIR); + } + return builder; } @@ -1957,7 +1992,8 @@ public int hashCode() { derivedFieldsObject, derivedFields, searchPipeline, - verbosePipeline + verbosePipeline, + Arrays.hashCode(queryPlanIR) ); } @@ -2004,7 +2040,8 @@ public boolean equals(Object obj) { && Objects.equals(derivedFieldsObject, other.derivedFieldsObject) && Objects.equals(derivedFields, other.derivedFields) && Objects.equals(searchPipeline, other.searchPipeline) - && Objects.equals(verbosePipeline, other.verbosePipeline); + && Objects.equals(verbosePipeline, other.verbosePipeline) + && Arrays.equals(queryPlanIR, other.queryPlanIR); } @Override diff --git a/server/src/main/java/org/opensearch/search/internal/LegacyReaderContext.java b/server/src/main/java/org/opensearch/search/internal/LegacyReaderContext.java index 05ab12d5ae809..4a4b96113930c 100644 --- a/server/src/main/java/org/opensearch/search/internal/LegacyReaderContext.java +++ b/server/src/main/java/org/opensearch/search/internal/LegacyReaderContext.java @@ -34,6 +34,8 @@ import org.opensearch.index.IndexService; import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineSearcher; +import org.opensearch.index.engine.EngineSearcherSupplier; import org.opensearch.index.shard.IndexShard; import org.opensearch.search.RescoreDocIds; import org.opensearch.search.dfs.AggregatedDfs; @@ -57,7 +59,7 @@ public LegacyReaderContext( ShardSearchContextId id, IndexService indexService, IndexShard indexShard, - Engine.SearcherSupplier reader, + EngineSearcherSupplier reader, ShardSearchRequest shardSearchRequest, long keepAliveInMillis ) { @@ -70,7 +72,7 @@ public LegacyReaderContext( // to reuse the searcher created on the request that initialized the scroll. // This ensures that we wrap the searcher's reader with the user's permissions // when they are available. - final Engine.Searcher delegate = searcherSupplier.acquireSearcher("search"); + final Engine.Searcher delegate = (Engine.Searcher) searcherSupplier.acquireSearcher("search"); addOnClose(delegate); // wrap the searcher so that closing is a noop, the actual closing happens when this context is closed this.searcher = new Engine.Searcher( @@ -89,7 +91,7 @@ public LegacyReaderContext( } @Override - public Engine.Searcher acquireSearcher(String source) { + public EngineSearcher acquireSearcher(String source) { if (scrollContext != null) { assert Engine.SEARCH_SOURCE.equals(source) : "scroll context should not acquire searcher for " + source; return searcher; diff --git a/server/src/main/java/org/opensearch/search/internal/PitReaderContext.java b/server/src/main/java/org/opensearch/search/internal/PitReaderContext.java index 5c2a9f82f98e4..b09f40f35172f 100644 --- a/server/src/main/java/org/opensearch/search/internal/PitReaderContext.java +++ b/server/src/main/java/org/opensearch/search/internal/PitReaderContext.java @@ -14,6 +14,7 @@ import org.opensearch.common.lease.Releasables; import org.opensearch.index.IndexService; import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineSearcherSupplier; import org.opensearch.index.engine.Segment; import org.opensearch.index.shard.IndexShard; @@ -43,7 +44,7 @@ public PitReaderContext( ShardSearchContextId id, IndexService indexService, IndexShard indexShard, - Engine.SearcherSupplier searcherSupplier, + EngineSearcherSupplier searcherSupplier, long keepAliveInMillis, boolean singleSession ) { diff --git a/server/src/main/java/org/opensearch/search/internal/ReaderContext.java b/server/src/main/java/org/opensearch/search/internal/ReaderContext.java index 776e92d325ae4..1293032f7932e 100644 --- a/server/src/main/java/org/opensearch/search/internal/ReaderContext.java +++ b/server/src/main/java/org/opensearch/search/internal/ReaderContext.java @@ -38,6 +38,8 @@ import org.opensearch.common.util.concurrent.AbstractRefCounted; import org.opensearch.index.IndexService; import org.opensearch.index.engine.Engine; +import org.opensearch.index.engine.EngineSearcher; +import org.opensearch.index.engine.EngineSearcherSupplier; import org.opensearch.index.shard.IndexShard; import org.opensearch.search.RescoreDocIds; import org.opensearch.search.dfs.AggregatedDfs; @@ -65,7 +67,7 @@ public class ReaderContext implements Releasable { private final ShardSearchContextId id; private final IndexService indexService; private final IndexShard indexShard; - protected final Engine.SearcherSupplier searcherSupplier; + protected final EngineSearcherSupplier searcherSupplier; private final AtomicBoolean closed = new AtomicBoolean(false); private final boolean singleSession; @@ -84,7 +86,7 @@ public ReaderContext( ShardSearchContextId id, IndexService indexService, IndexShard indexShard, - Engine.SearcherSupplier searcherSupplier, + EngineSearcherSupplier searcherSupplier, long keepAliveInMillis, boolean singleSession ) { @@ -150,7 +152,7 @@ public IndexShard indexShard() { return indexShard; } - public Engine.Searcher acquireSearcher(String source) { + public EngineSearcher acquireSearcher(String source) { return searcherSupplier.acquireSearcher(source); } diff --git a/server/src/main/java/org/opensearch/search/internal/SearchContext.java b/server/src/main/java/org/opensearch/search/internal/SearchContext.java index 4eadd8817a5c3..ec392b4e0cf9b 100644 --- a/server/src/main/java/org/opensearch/search/internal/SearchContext.java +++ b/server/src/main/java/org/opensearch/search/internal/SearchContext.java @@ -83,6 +83,7 @@ import org.opensearch.search.suggest.SuggestionSearchContext; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -561,4 +562,12 @@ public StreamSearchChannelListener getStr public boolean isStreamSearch() { return false; } + + public void setDFResults(Map dfResults) { + + } + + public Map getDFResults() { + return Collections.emptyMap(); + } } diff --git a/server/src/main/java/org/opensearch/search/query/GenericQueryPhase.java b/server/src/main/java/org/opensearch/search/query/GenericQueryPhase.java new file mode 100644 index 0000000000000..533ef9b328c99 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/GenericQueryPhase.java @@ -0,0 +1,25 @@ +package org.opensearch.search.query; + +import java.util.LinkedList; + +/** + * Generic query phase that can work with different context and searcher types + * @param Context type + * @param Searcher type + * @param Query type + */ +public class GenericQueryPhase { + private final GenericQueryPhaseSearcher queryPhaseSearcher; + + public GenericQueryPhase(GenericQueryPhaseSearcher queryPhaseSearcher) { + this.queryPhaseSearcher = queryPhaseSearcher; + } + + public boolean executeInternal(C context, S searcher, Q query) throws QueryPhaseExecutionException { + try { + return queryPhaseSearcher.searchWith(context, searcher, query, new LinkedList<>() /* Figure out how to pass collectors */, false, false); + } catch (Exception e) { + throw new QueryPhaseExecutionException(null, "Failed to execute query", e); + } + } +} diff --git a/server/src/main/java/org/opensearch/search/query/GenericQueryPhaseSearcher.java b/server/src/main/java/org/opensearch/search/query/GenericQueryPhaseSearcher.java new file mode 100644 index 0000000000000..65a8c9a6b6ff5 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/GenericQueryPhaseSearcher.java @@ -0,0 +1,31 @@ +package org.opensearch.search.query; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.search.aggregations.AggregationProcessor; + +import java.io.IOException; +import java.util.LinkedList; + +/** + * Generic query phase searcher that can work with different context and searcher types + * @param Context type (SearchContext for Lucene, EngineReaderContext for DataFusion) + * @param Searcher type (ContextIndexSearcher for Lucene, ContextEngineSearcher for DataFusion) + * @param Query type (Query for Lucene, byte[] for DataFusion Substrait) + */ +// TODO make this part of QueryPhaseSearcher + @ExperimentalApi +public interface GenericQueryPhaseSearcher { + + boolean searchWith( + C context, + S searcher, + Q query, + LinkedList collectors, + boolean hasFilterCollector, + boolean hasTimeout + ) throws IOException; + + default AggregationProcessor aggregationProcessor(C context) { + return new org.opensearch.search.aggregations.DefaultAggregationProcessor(); + } +} diff --git a/server/src/main/java/org/opensearch/search/query/LuceneQueryPhaseExecutor.java b/server/src/main/java/org/opensearch/search/query/LuceneQueryPhaseExecutor.java new file mode 100644 index 0000000000000..59493a8991733 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/LuceneQueryPhaseExecutor.java @@ -0,0 +1,19 @@ +package org.opensearch.search.query; + +import org.opensearch.search.internal.SearchContext; + +/** + * Lucene-specific query phase executor + */ +public class LuceneQueryPhaseExecutor implements QueryPhaseExecutor { + + @Override + public boolean execute(SearchContext context) throws QueryPhaseExecutionException { + return QueryPhase.executeInternal(context); + } + + @Override + public boolean canHandle(SearchContext context) { + return context != null; + } +} diff --git a/server/src/main/java/org/opensearch/search/query/QueryExecutionContext.java b/server/src/main/java/org/opensearch/search/query/QueryExecutionContext.java new file mode 100644 index 0000000000000..f1501458f5211 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/QueryExecutionContext.java @@ -0,0 +1,13 @@ +package org.opensearch.search.query; + +/** + * Common interface for query execution contexts + */ +public interface QueryExecutionContext { + + /** + * Execute query phase for this context + * @return whether rescoring phase should be executed + */ + boolean executeQueryPhase() throws QueryPhaseExecutionException; +} diff --git a/server/src/main/java/org/opensearch/search/query/QueryPhase.java b/server/src/main/java/org/opensearch/search/query/QueryPhase.java index f8427440a6c13..25cceae77bfd5 100644 --- a/server/src/main/java/org/opensearch/search/query/QueryPhase.java +++ b/server/src/main/java/org/opensearch/search/query/QueryPhase.java @@ -60,6 +60,7 @@ import org.opensearch.search.aggregations.AggregationProcessor; import org.opensearch.search.aggregations.DefaultAggregationProcessor; import org.opensearch.search.aggregations.GlobalAggCollectorManager; +import org.opensearch.search.aggregations.InternalAggregations; import org.opensearch.search.internal.ContextIndexSearcher; import org.opensearch.search.internal.ScrollContext; import org.opensearch.search.internal.SearchContext; @@ -98,6 +99,7 @@ public class QueryPhase { // TODO: remove this property public static final boolean SYS_PROP_REWRITE_SORT = Booleans.parseBoolean(System.getProperty("opensearch.search.rewrite_sort", "true")); public static final QueryPhaseSearcher DEFAULT_QUERY_PHASE_SEARCHER = new DefaultQueryPhaseSearcher(); + private final QueryPhaseSearcher queryPhaseSearcher; private final SuggestProcessor suggestProcessor; private final RescoreProcessor rescoreProcessor; @@ -148,18 +150,29 @@ public void execute(SearchContext searchContext) throws QueryPhaseExecutionExcep LOGGER.trace("{}", new SearchContextSourcePrinter(searchContext)); } + // Keeping AggregationProcessor and preProcess uncommented since it builds aggregation nesting final AggregationProcessor aggregationProcessor = queryPhaseSearcher.aggregationProcessor(searchContext); // Pre-process aggregations as late as possible. In the case of a DFS_Q_T_F // request, preProcess is called on the DFS phase phase, this is why we pre-process them // here to make sure it happens during the QUERY phase aggregationProcessor.preProcess(searchContext); - boolean rescore = executeInternal(searchContext, queryPhaseSearcher); - if (rescore) { // only if we do a regular search - rescoreProcessor.process(searchContext); - } - suggestProcessor.process(searchContext); - aggregationProcessor.postProcess(searchContext); + searchContext.queryResult() + .topDocs( + new TopDocsAndMaxScore(new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), Lucene.EMPTY_SCORE_DOCS), Float.NaN), + new DocValueFormat[0] + ); + + // boolean rescore = executeInternal(searchContext, queryPhaseSearcher); + + // Post process + SearchEngineResultConversionUtils.convertDFResultGeneric(searchContext); + + // if (rescore) { // only if we do a regular search + // rescoreProcessor.process(searchContext); + // } + // suggestProcessor.process(searchContext); + aggregationProcessor.postProcess(searchContext); if (searchContext.getProfilers() != null) { ProfileShardResult shardResults = SearchProfileShardResults.buildShardResults( diff --git a/server/src/main/java/org/opensearch/search/query/QueryPhaseExecutor.java b/server/src/main/java/org/opensearch/search/query/QueryPhaseExecutor.java new file mode 100644 index 0000000000000..f9ae60a5c2bfa --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/QueryPhaseExecutor.java @@ -0,0 +1,15 @@ +package org.opensearch.search.query; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.search.internal.SearchContext; + +/** + * Strategy interface for executing query phases across different engines + */ +@ExperimentalApi +public interface QueryPhaseExecutor { + + boolean execute(C context) throws QueryPhaseExecutionException; + + boolean canHandle(C context); +} diff --git a/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcher.java b/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcher.java index 38e45a5212c81..790558db5228d 100644 --- a/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcher.java +++ b/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcher.java @@ -23,6 +23,8 @@ * The extension point which allows to plug in custom search implementation to be * used at {@link QueryPhase}. * + * TODO : Change this ? query phase searcher shouldn't rely on Lucene + * * @opensearch.api */ @PublicApi(since = "2.0.0") diff --git a/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcherWrapper.java b/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcherWrapper.java index 19a59e9f7bebe..80ed92500fc49 100644 --- a/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcherWrapper.java +++ b/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcherWrapper.java @@ -54,11 +54,13 @@ public boolean searchWith( boolean hasFilterCollector, boolean hasTimeout ) throws IOException { - if (searchContext.shouldUseConcurrentSearch()) { - return concurrentQueryPhaseSearcher.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout); - } else { - return defaultQueryPhaseSearcher.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout); - } + // if (searchContext.shouldUseConcurrentSearch()) { + // return concurrentQueryPhaseSearcher.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout); + // } else { + // return defaultQueryPhaseSearcher.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout); + // } + // + return defaultQueryPhaseSearcher.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout); } /** @@ -68,10 +70,11 @@ public boolean searchWith( */ @Override public AggregationProcessor aggregationProcessor(SearchContext searchContext) { - if (searchContext.shouldUseConcurrentSearch()) { - return concurrentQueryPhaseSearcher.aggregationProcessor(searchContext); - } else { - return defaultQueryPhaseSearcher.aggregationProcessor(searchContext); - } + // if (searchContext.shouldUseConcurrentSearch()) { + // return concurrentQueryPhaseSearcher.aggregationProcessor(searchContext); + // } else { + // return defaultQueryPhaseSearcher.aggregationProcessor(searchContext); + // } + return defaultQueryPhaseSearcher.aggregationProcessor(searchContext); } } diff --git a/server/src/main/java/org/opensearch/search/query/SearchEngineResultConversionUtils.java b/server/src/main/java/org/opensearch/search/query/SearchEngineResultConversionUtils.java new file mode 100644 index 0000000000000..9e9ac280453e3 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/query/SearchEngineResultConversionUtils.java @@ -0,0 +1,64 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search.query; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.search.aggregations.Aggregator; +import org.opensearch.search.aggregations.InternalAggregations; +import org.opensearch.search.aggregations.ShardResultConvertor; +import org.opensearch.search.internal.SearchContext; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class SearchEngineResultConversionUtils { + + private static final Logger LOGGER = LogManager.getLogger(SearchEngineResultConversionUtils.class); + + public static void convertDFResultGeneric(SearchContext searchContext) { + if (searchContext.aggregations() != null) { + Map dfResult = searchContext.getDFResults(); + + // Create aggregators which will process the result from DataFusion + try { + + List aggregators = new ArrayList<>(); + + if (searchContext.aggregations().factories().hasGlobalAggregator()) { + aggregators.addAll(searchContext.aggregations().factories().createTopLevelGlobalAggregators(searchContext)); + } + + if (searchContext.aggregations().factories().hasNonGlobalAggregator()) { + aggregators.addAll(searchContext.aggregations().factories().createTopLevelNonGlobalAggregators(searchContext)); + } + + List shardResultConvertors = aggregators.stream().map(x -> { + if (x instanceof ShardResultConvertor) { + return ((ShardResultConvertor) x); + } else { + throw new UnsupportedOperationException("Aggregator doesn't support converting results from shard: " + x); + } + }).toList(); + + InternalAggregations internalAggregations = InternalAggregations.from( + shardResultConvertors.stream().flatMap(x -> x.convert(dfResult).stream()).collect(Collectors.toList()) + ); + LOGGER.info("Internal Aggregations converted {}", internalAggregations.asMap()); + searchContext.queryResult().aggregations(internalAggregations); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + } + +} diff --git a/server/src/test/java/org/opensearch/index/IndexModuleTests.java b/server/src/test/java/org/opensearch/index/IndexModuleTests.java index 29dd60c3e638f..6bb68a263b46a 100644 --- a/server/src/test/java/org/opensearch/index/IndexModuleTests.java +++ b/server/src/test/java/org/opensearch/index/IndexModuleTests.java @@ -270,7 +270,9 @@ private IndexService newIndexService(IndexModule module) throws IOException { DefaultRemoteStoreSettings.INSTANCE, s -> {}, null, - () -> TieredMergePolicyProvider.DEFAULT_MAX_MERGE_AT_ONCE + () -> TieredMergePolicyProvider.DEFAULT_MAX_MERGE_AT_ONCE, + null, + null ); } diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java index a936d4ce79ec2..cdaf3293cfb64 100644 --- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java +++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java @@ -2367,6 +2367,7 @@ public void onFailure(final Exception e) { null, new TaskResourceTrackingService(settings, clusterSettings, threadPool), Collections.emptyList(), + Collections.emptyList(), Collections.emptyList() ); SearchPhaseController searchPhaseController = new SearchPhaseController( diff --git a/test/framework/src/main/java/org/opensearch/index/engine/TranslogHandler.java b/test/framework/src/main/java/org/opensearch/index/engine/TranslogHandler.java index 9e4e59d9a4d15..064bc6281d997 100644 --- a/test/framework/src/main/java/org/opensearch/index/engine/TranslogHandler.java +++ b/test/framework/src/main/java/org/opensearch/index/engine/TranslogHandler.java @@ -153,6 +153,7 @@ public Engine.Operation convertToEngineOp(Translog.Operation operation, Engine.O true, SequenceNumbers.UNASSIGNED_SEQ_NO, SequenceNumbers.UNASSIGNED_PRIMARY_TERM + ,null // TODO ); return engineIndex; case DELETE: diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index a300e2c9cc717..7513db2d13ab7 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -738,7 +738,8 @@ protected IndexShard newShard( new Object(), clusterService.getClusterApplierService(), MergedSegmentPublisher.EMPTY, - ReferencedSegmentsPublisher.EMPTY + ReferencedSegmentsPublisher.EMPTY, + null ); indexShard.addShardFailureCallback(DEFAULT_SHARD_FAILURE_HANDLER); if (remoteStoreStatsTrackerFactory != null) { diff --git a/test/framework/src/main/java/org/opensearch/node/MockNode.java b/test/framework/src/main/java/org/opensearch/node/MockNode.java index 8297e6b066cde..8dcf2cb66e4ab 100644 --- a/test/framework/src/main/java/org/opensearch/node/MockNode.java +++ b/test/framework/src/main/java/org/opensearch/node/MockNode.java @@ -51,6 +51,7 @@ import org.opensearch.env.Environment; import org.opensearch.http.HttpServerTransport; import org.opensearch.indices.IndicesService; +import org.opensearch.plugins.DataSourcePlugin; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.PluginInfo; import org.opensearch.plugins.SearchPlugin; @@ -175,7 +176,8 @@ protected SearchService newSearchService( Executor indexSearcherExecutor, TaskResourceTrackingService taskResourceTrackingService, Collection concurrentSearchDeciderFactories, - List pluginProfilers + List pluginProfilers, + List dataSourcePluginList ) { if (getPluginsService().filterPlugins(MockSearchService.TestPlugin.class).isEmpty()) { return super.newSearchService( @@ -191,7 +193,8 @@ protected SearchService newSearchService( indexSearcherExecutor, taskResourceTrackingService, concurrentSearchDeciderFactories, - pluginProfilers + pluginProfilers, + null // TODO ); } return new MockSearchService( diff --git a/test/framework/src/main/java/org/opensearch/search/MockSearchService.java b/test/framework/src/main/java/org/opensearch/search/MockSearchService.java index e3bc166e56d6b..0bf59b30ff011 100644 --- a/test/framework/src/main/java/org/opensearch/search/MockSearchService.java +++ b/test/framework/src/main/java/org/opensearch/search/MockSearchService.java @@ -114,7 +114,8 @@ public MockSearchService( indexSearcherExecutor, taskResourceTrackingService, Collections.emptyList(), - Collections.emptyList() + Collections.emptyList(), + null // TODO ); }