diff --git a/.gitignore b/.gitignore
index 0a784701375d9..d5ae200e48db9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -10,6 +10,12 @@ CLAUDE.md
 build-idea/
 out/
 
+modules/parquet-data-format/src/main/rust/target/*
+libs/dataformat-csv/jni/target/*
+libs/dataformat-csv/src/main/resources/*
+plugins/dataformat-csv/src/main/resources/*
+libs/dataformat-csv/jni/Cargo.lock
+
 # include shared intellij config
 !.idea/inspectionProfiles/Project_Default.xml
 !.idea/runConfigurations/Debug_OpenSearch.xml
@@ -68,3 +74,14 @@ testfixtures_shared/
 
 # build files generated
 doc-tools/missing-doclet/bin/
+/plugins/dataformat-csv/jni/target
+/plugins/dataformat-csv/jni/Cargo.lock
+
+/modules/parquet-data-format/src/main/rust/target
+/modules/parquet-data-format/src/main/rust/debug
+/modules/parquet-data-format/src/main/resources/native/
+/modules/parquet-data-format/jni/target/debug
+
+/modules/parquet-data-format/jni/target/release
+**/Cargo.lock
+/modules/parquet-data-format/jni/
diff --git a/.idea/runConfigurations/Debug_OpenSearch.xml b/.idea/runConfigurations/Debug_OpenSearch.xml
index fddcf47728460..c18046f873477 100644
--- a/.idea/runConfigurations/Debug_OpenSearch.xml
+++ b/.idea/runConfigurations/Debug_OpenSearch.xml
@@ -1,11 +1,15 @@
 <component name="ProjectRunConfigurationManager">
-    <configuration default="false" name="Debug OpenSearch" type="Remote">
-        <option name="USE_SOCKET_TRANSPORT" value="true" />
-        <option name="SERVER_MODE" value="true" />
-        <option name="SHMEM_ADDRESS" />
-        <option name="HOST" value="localhost" />
-        <option name="PORT" value="5005" />
-        <option name="AUTO_RESTART" value="true" />
-        <method v="2" />
-    </configuration>
-</component>
+  <configuration default="false" name="Debug OpenSearch" type="Remote">
+    <option name="USE_SOCKET_TRANSPORT" value="true" />
+    <option name="SERVER_MODE" value="true" />
+    <option name="SHMEM_ADDRESS" />
+    <option name="HOST" value="localhost" />
+    <option name="PORT" value="5005" />
+    <option name="AUTO_RESTART" value="true" />
+    <RunnerSettings RunnerId="Debug">
+      <option name="DEBUG_PORT" value="5005" />
+      <option name="LOCAL" value="false" />
+    </RunnerSettings>
+    <method v="2" />
+  </configuration>
+</component>
\ No newline at end of file
diff --git a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/RunTask.java b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/RunTask.java
index c5035f3b082fe..8c4bbe6c2db42 100644
--- a/buildSrc/src/main/java/org/opensearch/gradle/testclusters/RunTask.java
+++ b/buildSrc/src/main/java/org/opensearch/gradle/testclusters/RunTask.java
@@ -168,6 +168,8 @@ public void beforeStart() {
             firstNode.setting("discovery.seed_hosts", LOCALHOST_ADDRESS_PREFIX + DEFAULT_TRANSPORT_PORT);
             cluster.setPreserveDataDir(preserveData);
             for (OpenSearchNode node : cluster.getNodes()) {
+                // TODO : remove this - this disables assertions
+                node.jvmArgs(" -da ");
                 if (node != firstNode) {
                     node.setHttpPort(String.valueOf(httpPort));
                     httpPort++;
diff --git a/gradle/missing-javadoc.gradle b/gradle/missing-javadoc.gradle
index 5f3ef5c0b7d48..da60d3afa0a78 100644
--- a/gradle/missing-javadoc.gradle
+++ b/gradle/missing-javadoc.gradle
@@ -160,7 +160,11 @@ configure([
   project(":test:fixtures:hdfs-fixture"),
   project(":test:fixtures:s3-fixture"),
   project(":test:framework"),
-  project(":test:logger-usage")
+  project(":test:logger-usage"),
+  project(":libs:opensearch-vectorized-exec-spi"), // TODO
+  project(":plugins:engine-datafusion"), //TODO
+  project(":server"),
+  project(":modules:parquet-data-format"),
 ]) {
   project.tasks.withType(MissingJavadocTask) {
     isExcluded = true
diff --git a/gradle/run.gradle b/gradle/run.gradle
index ac58d74acd6b0..11eac098e35e9 100644
--- a/gradle/run.gradle
+++ b/gradle/run.gradle
@@ -52,6 +52,30 @@ testClusters {
         }
       }
     }
+
+    if (findProperty("remotePlugins")) {
+      remotePlugins = Eval.me(remotePlugins)
+      for (String coords : remotePlugins) {
+        if (coords.startsWith('/') || coords.startsWith('file:')) {
+          // Direct file path
+          plugin(project.layout.file(project.provider { new File(coords) }))
+        } else {
+          // Maven coordinates
+          def config = project.configurations.detachedConfiguration(
+            project.dependencies.create(coords + '@zip')
+          )
+          config.resolutionStrategy.cacheChangingModulesFor 0, 'seconds'
+          project.repositories.mavenLocal()
+          project.repositories {
+            maven {
+              name = 'OpenSearch Snapshots'
+              url = 'https://central.sonatype.com/repository/maven-snapshots/'
+            }
+          }
+          plugin(project.layout.file(project.provider { config.singleFile }))
+        }
+      }
+    }
   }
 
 }
diff --git a/libs/common/src/main/java/org/opensearch/common/annotation/processor/ApiAnnotationProcessor.java b/libs/common/src/main/java/org/opensearch/common/annotation/processor/ApiAnnotationProcessor.java
index 94ec0db3a9712..5f419ce621e24 100644
--- a/libs/common/src/main/java/org/opensearch/common/annotation/processor/ApiAnnotationProcessor.java
+++ b/libs/common/src/main/java/org/opensearch/common/annotation/processor/ApiAnnotationProcessor.java
@@ -85,20 +85,20 @@ public boolean process(Set<? extends TypeElement> annotations, RoundEnvironment
             Set.of(PublicApi.class, ExperimentalApi.class, DeprecatedApi.class)
         );
 
-        for (var element : elements) {
-            validate(element);
-
-            if (!checkPackage(element)) {
-                continue;
-            }
-
-            // Skip all not-public elements
-            checkPublicVisibility(null, element);
-
-            if (element instanceof TypeElement) {
-                process((TypeElement) element);
-            }
-        }
+//        for (var element : elements) {
+//            validate(element);
+//
+//            if (!checkPackage(element)) {
+//                continue;
+//            }
+//
+//            // Skip all not-public elements
+//            checkPublicVisibility(null, element);
+//
+//            if (element instanceof TypeElement) {
+//                process((TypeElement) element);
+//            }
+//        }
 
         return false;
     }
diff --git a/libs/vectorized-exec-spi/build.gradle b/libs/vectorized-exec-spi/build.gradle
new file mode 100644
index 0000000000000..dfb95964d01f5
--- /dev/null
+++ b/libs/vectorized-exec-spi/build.gradle
@@ -0,0 +1,30 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+apply plugin: 'opensearch.build'
+
+description = 'Vectorized engine common interfaces for OpenSearch'
+
+dependencies {
+  api project(':libs:opensearch-core')
+  api project(':libs:opensearch-common')
+
+  testImplementation(project(":test:framework")) {
+    exclude group: 'org.opensearch', module: 'vectorized-exec-spi'
+  }
+}
+
+tasks.named('forbiddenApisMain').configure {
+  replaceSignatureFiles 'jdk-signatures'
+}
+
+jarHell.enabled = false
+
+test {
+  systemProperty 'tests.security.manager', 'false'
+}
diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/package-info.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/package-info.java
new file mode 100644
index 0000000000000..8d91260830538
--- /dev/null
+++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/package-info.java
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/**
+ * DataFusion integration for OpenSearch.
+ * Provides JNI bindings and core functionality for DataFusion query engine.
+ */
+package org.opensearch.vectorized.execution;
diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/CatalogSearcher.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/CatalogSearcher.java
new file mode 100644
index 0000000000000..138d232590871
--- /dev/null
+++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/CatalogSearcher.java
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.vectorized.execution.search;
+
+public class CatalogSearcher {
+}
diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/DataFormat.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/DataFormat.java
new file mode 100644
index 0000000000000..cd75df3da20bd
--- /dev/null
+++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/DataFormat.java
@@ -0,0 +1,34 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.vectorized.execution.search;
+
+import org.opensearch.common.annotation.ExperimentalApi;
+
+/**
+ DataFormat supported by OpenSearch
+ */
+@ExperimentalApi
+public enum DataFormat {
+    /** CSV Format*/
+    CSV("parquet"),
+    PARQUET("parquet"),
+
+    /** Text Format */
+    Text("text");
+
+    private final String name;
+
+    DataFormat(String name) {
+        this.name = name;
+    }
+
+    public String getName() {
+        return name;
+    }
+}
diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/IndexReader.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/IndexReader.java
new file mode 100644
index 0000000000000..d50616ea8a662
--- /dev/null
+++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/IndexReader.java
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.vectorized.execution.search;
+
+public class IndexReader {
+}
diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/DataSourceCodec.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/DataSourceCodec.java
new file mode 100644
index 0000000000000..e58f0a7e5bba0
--- /dev/null
+++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/DataSourceCodec.java
@@ -0,0 +1,63 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.vectorized.execution.search.spi;
+
+import org.opensearch.vectorized.execution.search.DataFormat;
+
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+
+/**
+ * Service Provider Interface for DataFusion data source codecs.
+ * Implementations provide access to different data formats (CSV, Parquet, etc.)
+ * through the DataFusion query engine.
+ */
+public interface DataSourceCodec {
+
+    /**
+     * Register a directory containing data files with the runtime environment to prewarm cache
+     * This ideally should be used as part of each refresh - equivalent of acquire searcher
+     * where we register the files associated with this particular refresh point
+     * @param directoryPath the path to the directory containing data files
+     * @param fileNames the list of file names to register
+     * @param runtimeId the runtime environment ID
+     * @return a CompletableFuture that completes when registration is done
+     */
+    CompletableFuture<Void> registerDirectory(String directoryPath, List<String> fileNames, long runtimeId);
+
+    /**
+     * Create a new session context for query execution.
+     *
+     * @param globalRuntimeEnvId the global runtime environment ID
+     * @return a CompletableFuture containing the session context ID
+     */
+    CompletableFuture<Long> createSessionContext(long globalRuntimeEnvId);
+
+    /**
+     * Execute a Substrait query plan.
+     *
+     * @param sessionContextId the session context ID
+     * @param substraitPlanBytes the serialized Substrait query plan
+     * @return a CompletableFuture containing the result stream
+     */
+    CompletableFuture<RecordBatchStream> executeSubstraitQuery(long sessionContextId, byte[] substraitPlanBytes);
+
+    /**
+     * Close a session context and free associated resources.
+     *
+     * @param sessionContextId the session context ID to close
+     * @return a CompletableFuture that completes when the context is closed
+     */
+    CompletableFuture<Void> closeSessionContext(long sessionContextId);
+
+    /**
+     * Returns the data format name
+     */
+    DataFormat getDataFormat();
+}
diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/RecordBatchStream.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/RecordBatchStream.java
new file mode 100644
index 0000000000000..39a112e2aabd3
--- /dev/null
+++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/RecordBatchStream.java
@@ -0,0 +1,44 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.vectorized.execution.search.spi;
+
+import java.util.concurrent.CompletableFuture;
+
+/**
+ * Represents a stream of record batches from a DataFusion query execution.
+ * This interface provides access to query results in a streaming fashion.
+ */
+public interface RecordBatchStream extends AutoCloseable {
+
+    /**
+     * Check if there are more record batches available in the stream.
+     *
+     * @return true if more batches are available, false otherwise
+     */
+    boolean hasNext();
+
+    /**
+     * Get the schema of the record batches in this stream.
+     * @return the schema object
+     */
+    Object getSchema();
+
+    /**
+     * Get the next record batch from the stream.
+     *
+     * @return the next record batch as a byte array, or null if no more batches
+     */
+    CompletableFuture<Object> next();
+
+    /**
+     * Close the stream and free associated resources.
+     */
+    @Override
+    void close();
+}
diff --git a/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/package-info.java b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/package-info.java
new file mode 100644
index 0000000000000..0fb858428c115
--- /dev/null
+++ b/libs/vectorized-exec-spi/src/main/java/org/opensearch/vectorized/execution/search/spi/package-info.java
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/**
+ * Service Provider Interface (SPI) for DataFusion data source codecs.
+ * Defines interfaces for implementing different data format support.
+ */
+package org.opensearch.vectorized.execution.search.spi;
diff --git a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java
index c2821c633c686..266466df822c7 100644
--- a/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java
+++ b/modules/mapper-extras/src/main/java/org/opensearch/index/mapper/ScaledFloatFieldMapper.java
@@ -553,7 +553,7 @@ protected void canDeriveSourceInternal() {
      *       both doc values and stored field
      */
     @Override
-    protected DerivedFieldGenerator derivedFieldGenerator() {
+    public DerivedFieldGenerator derivedFieldGenerator() {
         return new DerivedFieldGenerator(
             mappedFieldType,
             new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()),
diff --git a/modules/parquet-data-format/build.gradle b/modules/parquet-data-format/build.gradle
new file mode 100644
index 0000000000000..760fec200bce8
--- /dev/null
+++ b/modules/parquet-data-format/build.gradle
@@ -0,0 +1,260 @@
+import org.opensearch.gradle.test.RestIntegTestTask
+
+apply plugin: 'java'
+apply plugin: 'idea'
+apply plugin: 'eclipse'
+apply plugin: 'opensearch.opensearchplugin'
+apply plugin: 'opensearch.yaml-rest-test'
+apply plugin: 'opensearch.pluginzip'
+apply plugin: 'opensearch.java-agent'
+
+def pluginName = 'ParquetDataFormat'
+def pluginDescription = 'Parquet data format plugin'
+def packagePath = 'com.parquet'
+def pathToPlugin = 'parquetdataformat'
+def pluginClassName = 'ParquetDataFormatPlugin'
+group = "ParquetDataFormatGroup"
+
+java {
+  targetCompatibility = JavaVersion.VERSION_21
+  sourceCompatibility = JavaVersion.VERSION_21
+}
+
+tasks.register("preparePluginPathDirs") {
+  mustRunAfter clean
+  doLast {
+    def newPath = pathToPlugin.replace(".", "/")
+    mkdir "src/main/java/$packagePath/$newPath"
+    mkdir "src/test/java/$packagePath/$newPath"
+    mkdir "src/yamlRestTest/java/$packagePath/$newPath"
+  }
+}
+
+publishing {
+  publications {
+    pluginZip(MavenPublication) { publication ->
+    }
+  }
+}
+
+opensearchplugin {
+  name = pluginName
+  description = pluginDescription
+  classname = "${packagePath}.${pathToPlugin}.${pluginClassName}"
+  licenseFile = rootProject.file('LICENSE.txt')
+  noticeFile = rootProject.file('NOTICE.txt')
+}
+
+// This requires an additional Jar not published as part of build-tools
+loggerUsageCheck.enabled = false
+
+// No need to validate pom, as we do not upload to maven/sonatype
+validateNebulaPom.enabled = false
+
+buildscript {
+  ext {
+    opensearch_version = System.getProperty("opensearch.version", "3.3.0-SNAPSHOT")
+  }
+
+  repositories {
+    mavenLocal()
+    maven { url = "https://central.sonatype.com/repository/maven-snapshots/" }
+    mavenCentral()
+    maven { url = "https://plugins.gradle.org/m2/" }
+  }
+
+  dependencies {
+    classpath "org.opensearch.gradle:build-tools:${opensearch_version}"
+  }
+}
+
+repositories {
+  mavenLocal()
+  maven { url = "https://central.sonatype.com/repository/maven-snapshots/" }
+  mavenCentral()
+  maven { url = "https://plugins.gradle.org/m2/" }
+}
+
+configurations.all {
+  resolutionStrategy {
+    force 'commons-codec:commons-codec:1.18.0'
+    force 'org.slf4j:slf4j-api:2.0.17'
+  }
+}
+
+dependencies {
+  // Apache Arrow dependencies (using stable version with unsafe allocator)
+  implementation 'org.apache.arrow:arrow-vector:17.0.0'
+  implementation 'org.apache.arrow:arrow-memory-core:17.0.0'
+  implementation 'org.apache.arrow:arrow-memory-unsafe:17.0.0'
+  implementation 'org.apache.arrow:arrow-format:17.0.0'
+  implementation 'org.apache.arrow:arrow-c-data:17.0.0'
+
+  // Checker Framework annotations (required by Arrow)
+  implementation 'org.checkerframework:checker-qual:3.42.0'
+
+  // Jackson dependencies required by Arrow
+  implementation 'com.fasterxml.jackson.core:jackson-core:2.18.2'
+  implementation 'com.fasterxml.jackson.core:jackson-databind:2.18.2'
+  implementation 'com.fasterxml.jackson.core:jackson-annotations:2.18.2'
+
+  // FlatBuffers dependency required by Arrow
+  implementation "com.google.flatbuffers:flatbuffers-java:${versions.flatbuffers}"
+
+  // Netty dependencies required by Arrow memory management
+  implementation 'io.netty:netty-buffer:4.1.118.Final'
+  implementation 'io.netty:netty-common:4.1.118.Final'
+
+  // SLF4J logging implementation (required by Apache Arrow)
+  implementation 'org.slf4j:slf4j-api:2.0.17'
+}
+
+test {
+  include '**/*Tests.class'
+  // JVM args for Java 9+ only - remove if using Java 8
+  if (JavaVersion.current().isJava9Compatible()) {
+    jvmArgs '--add-opens=java.base/java.nio=ALL-UNNAMED'
+    jvmArgs '--add-opens=java.base/sun.nio.ch=ALL-UNNAMED'
+  }
+}
+
+task integTest(type: RestIntegTestTask) {
+  description = "Run tests against a cluster"
+  testClassesDirs = sourceSets.test.output.classesDirs
+  classpath = sourceSets.test.runtimeClasspath
+}
+tasks.named("check").configure { dependsOn(integTest) }
+
+integTest {
+  // JVM arguments required for Arrow memory access (Java 9+ only)
+  if (JavaVersion.current().isJava9Compatible()) {
+    jvmArgs '--add-opens=java.base/java.nio=ALL-UNNAMED'
+    jvmArgs '--add-opens=java.base/sun.nio.ch=ALL-UNNAMED'
+  }
+
+  // The --debug-jvm command-line option makes the cluster debuggable; this makes the tests debuggable
+  if (System.getProperty("test.debug") != null) {
+    jvmArgs '-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=*:5005'
+  }
+}
+
+testClusters.integTest {
+  testDistribution = "INTEG_TEST"
+
+  // This installs our plugin into the testClusters
+  plugin(project.tasks.bundlePlugin.archiveFile)
+}
+
+run {
+  useCluster testClusters.integTest
+}
+
+// updateVersion: Task to auto update version to the next development iteration
+tasks.register('buildRust', Exec) {
+//  workingDir = file("${projectDir}/src/main/rust")
+//  commandLine = ['cargo', 'build', '--release']
+
+  description = 'Build the Rust JNI library using Cargo'
+  group = 'build'
+
+  workingDir = file("${projectDir}/src/main/rust")
+
+  // Determine the target directory and library name based on OS
+  def osName = System.getProperty('os.name').toLowerCase()
+  def libPrefix = osName.contains('windows') ? '' : 'lib'
+  def libExtension = osName.contains('windows') ? '.dll' : (osName.contains('mac') ? '.dylib' : '.so')
+
+  // Use debug build for development, release for production
+  def buildType = project.hasProperty('rustRelease') ? 'release' : 'debug'
+  def targetDir = file("${workingDir}/target/")
+
+  // Find cargo executable - try common locations
+  def cargoExecutable = 'cargo'
+  def possibleCargoPaths = [
+          System.getenv('HOME') + '/.cargo/bin/cargo',
+          '/usr/local/bin/cargo',
+          'cargo'
+  ]
+
+  for (String path : possibleCargoPaths) {
+    if (new File(path).exists()) {
+      cargoExecutable = path
+      break
+    }
+  }
+
+  def cargoArgs = [cargoExecutable, 'build']
+//  if (buildType == 'release') {
+  cargoArgs.add('--release')
+//  }
+
+  if (osName.contains('windows')) {
+    commandLine cargoArgs
+  } else {
+    commandLine cargoArgs
+  }
+
+  // Set environment variables for cross-compilation if needed
+  environment 'CARGO_TARGET_DIR', targetDir.absolutePath
+
+  inputs.files fileTree("${workingDir}/src")
+  inputs.file "${workingDir}/Cargo.toml"
+//  outputs.files file("jni/${targetDir}/${libPrefix}opensearch_datafusion_jni${libExtension}")
+//  System.out.println("Building Rust library in ${buildType} mode");
+
+
+}
+
+tasks.register('copyNativeLib', Copy) {
+  dependsOn buildRust
+  from "src/main/rust/target/release"
+  into "src/main/resources/native"
+  include "**/libparquet_dataformat_jni.*"
+  include "**/parquet_dataformat_jni.dll"
+
+  // Set strategy to avoid errors on duplicate files
+  duplicatesStrategy = DuplicatesStrategy.EXCLUDE
+
+  eachFile { file ->
+    def os = System.getProperty('os.name').toLowerCase()
+    def arch = System.getProperty('os.arch').toLowerCase()
+
+    def osDir = os.contains('win') ? 'windows' : os.contains('mac') ? 'macos' : 'linux'
+    def archDir = arch.contains('aarch64') || arch.contains('arm64') ? 'aarch64' :
+      arch.contains('64') ? 'x86_64' : 'x86'
+
+    file.path = "${osDir}/${archDir}/${file.name}"
+  }
+
+  doLast {
+    fileTree(destinationDir).visit { FileVisitDetails fvd ->
+      if (!fvd.isDirectory()) {
+        def file = fvd.file
+        if (!org.gradle.internal.os.OperatingSystem.current().isWindows()) {
+          file.setExecutable(false, false)
+        }
+      }
+    }
+  }
+
+}
+
+// Wire Rust build tasks into the Gradle build lifecycle
+compileJava.dependsOn copyNativeLib
+processResources.dependsOn copyNativeLib
+sourcesJar.dependsOn copyNativeLib
+copyNativeLib.mustRunAfter clean
+buildRust.mustRunAfter clean
+
+task updateVersion {
+  onlyIf { System.getProperty('newVersion') }
+  doLast {
+    ext.newVersion = System.getProperty('newVersion')
+    println "Setting version to ${newVersion}."
+    // String tokenization to support -SNAPSHOT
+    ant.replaceregexp(file:'build.gradle', match: '"opensearch.version", "\\d.*"', replace: '"opensearch.version", "' + newVersion.tokenize('-')[0] + '-SNAPSHOT"', flags:'g', byline:true)
+  }
+}
+
+// Disable specific license tasks
+licenseHeaders.enabled = false
diff --git a/modules/parquet-data-format/gradle.properties b/modules/parquet-data-format/gradle.properties
new file mode 100644
index 0000000000000..7717686e6e937
--- /dev/null
+++ b/modules/parquet-data-format/gradle.properties
@@ -0,0 +1,11 @@
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+#
+
+org.gradle.caching=true
+org.gradle.warning.mode=none
+org.gradle.parallel=true
diff --git a/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.jar b/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.jar
new file mode 100644
index 0000000000000..a4b76b9530d66
Binary files /dev/null and b/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.properties b/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.properties
new file mode 100644
index 0000000000000..54d42eff023d5
--- /dev/null
+++ b/modules/parquet-data-format/gradle/wrapper/gradle-wrapper.properties
@@ -0,0 +1,14 @@
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+# The OpenSearch Contributors require contributions made to
+# this file be licensed under the Apache-2.0 license or a
+# compatible open source license.
+#
+
+distributionBase=GRADLE_USER_HOME
+distributionPath=wrapper/dists
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-all.zip
+zipStoreBase=GRADLE_USER_HOME
+zipStorePath=wrapper/dists
+distributionSha256Sum=2ab88d6de2c23e6adae7363ae6e29cbdd2a709e992929b48b6530fd0c7133bd6
diff --git a/modules/parquet-data-format/gradlew b/modules/parquet-data-format/gradlew
new file mode 100755
index 0000000000000..f5feea6d6b116
--- /dev/null
+++ b/modules/parquet-data-format/gradlew
@@ -0,0 +1,252 @@
+#!/bin/sh
+
+#
+# Copyright © 2015-2021 the original authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# SPDX-License-Identifier: Apache-2.0
+#
+
+##############################################################################
+#
+#   Gradle start up script for POSIX generated by Gradle.
+#
+#   Important for running:
+#
+#   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
+#       noncompliant, but you have some other compliant shell such as ksh or
+#       bash, then to run this script, type that shell name before the whole
+#       command line, like:
+#
+#           ksh Gradle
+#
+#       Busybox and similar reduced shells will NOT work, because this script
+#       requires all of these POSIX shell features:
+#         * functions;
+#         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
+#           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
+#         * compound commands having a testable exit status, especially «case»;
+#         * various built-in commands including «command», «set», and «ulimit».
+#
+#   Important for patching:
+#
+#   (2) This script targets any POSIX shell, so it avoids extensions provided
+#       by Bash, Ksh, etc; in particular arrays are avoided.
+#
+#       The "traditional" practice of packing multiple parameters into a
+#       space-separated string is a well documented source of bugs and security
+#       problems, so this is (mostly) avoided, by progressively accumulating
+#       options in "$@", and eventually passing that to Java.
+#
+#       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
+#       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
+#       see the in-line comments for details.
+#
+#       There are tweaks for specific operating systems such as AIX, CygWin,
+#       Darwin, MinGW, and NonStop.
+#
+#   (3) This script is generated from the Groovy template
+#       https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
+#       within the Gradle project.
+#
+#       You can find Gradle at https://github.com/gradle/gradle/.
+#
+##############################################################################
+
+# Attempt to set APP_HOME
+
+# Resolve links: $0 may be a link
+app_path=$0
+
+# Need this for daisy-chained symlinks.
+while
+    APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
+    [ -h "$app_path" ]
+do
+    ls=$( ls -ld "$app_path" )
+    link=${ls#*' -> '}
+    case $link in             #(
+      /*)   app_path=$link ;; #(
+      *)    app_path=$APP_HOME$link ;;
+    esac
+done
+
+# This is normally unused
+# shellcheck disable=SC2034
+APP_BASE_NAME=${0##*/}
+# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
+APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
+' "$PWD" ) || exit
+
+# Use the maximum available, or set MAX_FD != -1 to use that value.
+MAX_FD=maximum
+
+warn () {
+    echo "$*"
+} >&2
+
+die () {
+    echo
+    echo "$*"
+    echo
+    exit 1
+} >&2
+
+# OS specific support (must be 'true' or 'false').
+cygwin=false
+msys=false
+darwin=false
+nonstop=false
+case "$( uname )" in                #(
+  CYGWIN* )         cygwin=true  ;; #(
+  Darwin* )         darwin=true  ;; #(
+  MSYS* | MINGW* )  msys=true    ;; #(
+  NONSTOP* )        nonstop=true ;;
+esac
+
+CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
+
+
+# Determine the Java command to use to start the JVM.
+if [ -n "$JAVA_HOME" ] ; then
+    if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
+        # IBM's JDK on AIX uses strange locations for the executables
+        JAVACMD=$JAVA_HOME/jre/sh/java
+    else
+        JAVACMD=$JAVA_HOME/bin/java
+    fi
+    if [ ! -x "$JAVACMD" ] ; then
+        die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+else
+    JAVACMD=java
+    if ! command -v java >/dev/null 2>&1
+    then
+        die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
+
+Please set the JAVA_HOME variable in your environment to match the
+location of your Java installation."
+    fi
+fi
+
+# Increase the maximum file descriptors if we can.
+if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
+    case $MAX_FD in #(
+      max*)
+        # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
+        # shellcheck disable=SC2039,SC3045
+        MAX_FD=$( ulimit -H -n ) ||
+            warn "Could not query maximum file descriptor limit"
+    esac
+    case $MAX_FD in  #(
+      '' | soft) :;; #(
+      *)
+        # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
+        # shellcheck disable=SC2039,SC3045
+        ulimit -n "$MAX_FD" ||
+            warn "Could not set maximum file descriptor limit to $MAX_FD"
+    esac
+fi
+
+# Collect all arguments for the java command, stacking in reverse order:
+#   * args from the command line
+#   * the main class name
+#   * -classpath
+#   * -D...appname settings
+#   * --module-path (only if needed)
+#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
+
+# For Cygwin or MSYS, switch paths to Windows format before running java
+if "$cygwin" || "$msys" ; then
+    APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
+    CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
+
+    JAVACMD=$( cygpath --unix "$JAVACMD" )
+
+    # Now convert the arguments - kludge to limit ourselves to /bin/sh
+    for arg do
+        if
+            case $arg in                                #(
+              -*)   false ;;                            # don't mess with options #(
+              /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
+                    [ -e "$t" ] ;;                      #(
+              *)    false ;;
+            esac
+        then
+            arg=$( cygpath --path --ignore --mixed "$arg" )
+        fi
+        # Roll the args list around exactly as many times as the number of
+        # args, so each arg winds up back in the position where it started, but
+        # possibly modified.
+        #
+        # NB: a `for` loop captures its iteration list before it begins, so
+        # changing the positional parameters here affects neither the number of
+        # iterations, nor the values presented in `arg`.
+        shift                   # remove old arg
+        set -- "$@" "$arg"      # push replacement arg
+    done
+fi
+
+
+# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
+
+# Collect all arguments for the java command:
+#   * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
+#     and any embedded shellness will be escaped.
+#   * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
+#     treated as '${Hostname}' itself on the command line.
+
+set -- \
+        "-Dorg.gradle.appname=$APP_BASE_NAME" \
+        -classpath "$CLASSPATH" \
+        org.gradle.wrapper.GradleWrapperMain \
+        "$@"
+
+# Stop when "xargs" is not available.
+if ! command -v xargs >/dev/null 2>&1
+then
+    die "xargs is not available"
+fi
+
+# Use "xargs" to parse quoted args.
+#
+# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
+#
+# In Bash we could simply go:
+#
+#   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
+#   set -- "${ARGS[@]}" "$@"
+#
+# but POSIX shell has neither arrays nor command substitution, so instead we
+# post-process each arg (as a line of input to sed) to backslash-escape any
+# character that might be a shell metacharacter, then use eval to reverse
+# that process (while maintaining the separation between arguments), and wrap
+# the whole thing up as a single "set" statement.
+#
+# This will of course break if any of these variables contains a newline or
+# an unmatched quote.
+#
+
+eval "set -- $(
+        printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
+        xargs -n1 |
+        sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
+        tr '\n' ' '
+    )" '"$@"'
+
+exec "$JAVACMD" "$@"
diff --git a/modules/parquet-data-format/gradlew.bat b/modules/parquet-data-format/gradlew.bat
new file mode 100644
index 0000000000000..9b42019c7915b
--- /dev/null
+++ b/modules/parquet-data-format/gradlew.bat
@@ -0,0 +1,94 @@
+@rem
+@rem Copyright 2015 the original author or authors.
+@rem
+@rem Licensed under the Apache License, Version 2.0 (the "License");
+@rem you may not use this file except in compliance with the License.
+@rem You may obtain a copy of the License at
+@rem
+@rem      https://www.apache.org/licenses/LICENSE-2.0
+@rem
+@rem Unless required by applicable law or agreed to in writing, software
+@rem distributed under the License is distributed on an "AS IS" BASIS,
+@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@rem See the License for the specific language governing permissions and
+@rem limitations under the License.
+@rem
+@rem SPDX-License-Identifier: Apache-2.0
+@rem
+
+@if "%DEBUG%"=="" @echo off
+@rem ##########################################################################
+@rem
+@rem  Gradle startup script for Windows
+@rem
+@rem ##########################################################################
+
+@rem Set local scope for the variables with windows NT shell
+if "%OS%"=="Windows_NT" setlocal
+
+set DIRNAME=%~dp0
+if "%DIRNAME%"=="" set DIRNAME=.
+@rem This is normally unused
+set APP_BASE_NAME=%~n0
+set APP_HOME=%DIRNAME%
+
+@rem Resolve any "." and ".." in APP_HOME to make it shorter.
+for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
+
+@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
+set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
+
+@rem Find java.exe
+if defined JAVA_HOME goto findJavaFromJavaHome
+
+set JAVA_EXE=java.exe
+%JAVA_EXE% -version >NUL 2>&1
+if %ERRORLEVEL% equ 0 goto execute
+
+echo. 1>&2
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
+echo. 1>&2
+echo Please set the JAVA_HOME variable in your environment to match the 1>&2
+echo location of your Java installation. 1>&2
+
+goto fail
+
+:findJavaFromJavaHome
+set JAVA_HOME=%JAVA_HOME:"=%
+set JAVA_EXE=%JAVA_HOME%/bin/java.exe
+
+if exist "%JAVA_EXE%" goto execute
+
+echo. 1>&2
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
+echo. 1>&2
+echo Please set the JAVA_HOME variable in your environment to match the 1>&2
+echo location of your Java installation. 1>&2
+
+goto fail
+
+:execute
+@rem Setup the command line
+
+set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
+
+
+@rem Execute Gradle
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
+
+:end
+@rem End local scope for the variables with windows NT shell
+if %ERRORLEVEL% equ 0 goto mainEnd
+
+:fail
+rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
+rem the _cmd.exe /c_ return code!
+set EXIT_CODE=%ERRORLEVEL%
+if %EXIT_CODE% equ 0 set EXIT_CODE=1
+if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
+exit /b %EXIT_CODE%
+
+:mainEnd
+if "%OS%"=="Windows_NT" endlocal
+
+:omega
diff --git a/modules/parquet-data-format/settings.gradle b/modules/parquet-data-format/settings.gradle
new file mode 100644
index 0000000000000..6f5da74a14d2d
--- /dev/null
+++ b/modules/parquet-data-format/settings.gradle
@@ -0,0 +1,10 @@
+/*
+ * This file was generated by the Gradle 'init' task.
+ *
+ * The settings file is used to specify which projects to include in your build.
+ *
+ * Detailed information about configuring a multi-project build in Gradle can be found
+ * in the user manual at https://docs.gradle.org/6.5.1/userguide/multi_project_builds.html
+ */
+
+rootProject.name = 'plugin-template'
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java
new file mode 100644
index 0000000000000..d6553a14ab23d
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/ParquetDataFormatPlugin.java
@@ -0,0 +1,107 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+package com.parquet.parquetdataformat;
+
+import com.parquet.parquetdataformat.engine.ParquetDataFormat;
+import com.parquet.parquetdataformat.fields.ParquetFieldUtil;
+import com.parquet.parquetdataformat.engine.read.ParquetDataSourceCodec;
+import com.parquet.parquetdataformat.writer.ParquetWriter;
+import org.opensearch.index.engine.DataFormatPlugin;
+import org.opensearch.index.engine.exec.DataFormat;
+import org.opensearch.index.engine.exec.IndexingExecutionEngine;
+import com.parquet.parquetdataformat.bridge.RustBridge;
+import com.parquet.parquetdataformat.engine.ParquetExecutionEngine;
+import org.opensearch.index.shard.ShardPath;
+import org.opensearch.plugins.DataSourcePlugin;
+import org.opensearch.index.mapper.MapperService;
+import org.opensearch.plugins.Plugin;
+import org.opensearch.vectorized.execution.search.spi.DataSourceCodec;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+
+/**
+ * OpenSearch plugin that provides Parquet data format support for indexing operations.
+ *
+ * <p>This plugin implements the Project Mustang design for writing OpenSearch documents
+ * to Parquet format using Apache Arrow as the intermediate representation and a native
+ * Rust backend for high-performance Parquet file generation.
+ *
+ * <p>Key features provided by this plugin:
+ * <ul>
+ *   <li>Integration with OpenSearch's DataFormatPlugin interface</li>
+ *   <li>Parquet-based execution engine with Arrow memory management</li>
+ *   <li>High-performance native Rust backend via JNI bridge</li>
+ *   <li>Memory pressure monitoring and backpressure mechanisms</li>
+ *   <li>Columnar storage optimization for analytical workloads</li>
+ * </ul>
+ *
+ * <p>The plugin orchestrates the complete pipeline from OpenSearch document indexing
+ * through Arrow-based batching to final Parquet file generation. It provides both
+ * the execution engine interface for OpenSearch integration and testing utilities
+ * for development purposes.
+ *
+ * <p>Architecture components:
+ * <ul>
+ *   <li>{@link ParquetExecutionEngine} - Main execution engine implementation</li>
+ *   <li>{@link ParquetWriter} - Document writer with Arrow integration</li>
+ *   <li>{@link RustBridge} - JNI interface to native Parquet operations</li>
+ *   <li>Memory management via {@link com.parquet.parquetdataformat.memory} package</li>
+ * </ul>
+ */
+public class ParquetDataFormatPlugin extends Plugin implements DataFormatPlugin, DataSourcePlugin {
+
+    @Override
+    @SuppressWarnings("unchecked")
+    public <T extends DataFormat> IndexingExecutionEngine<T> indexingEngine(MapperService mapperService, ShardPath shardPath) {
+        return (IndexingExecutionEngine<T>) new ParquetExecutionEngine(() -> ParquetFieldUtil.getSchema(mapperService), shardPath);
+    }
+
+    private Class<? extends DataFormat> getDataFormatType() {
+        return ParquetDataFormat.class;
+    }
+
+    @Override
+    public DataFormat getDataFormat() {
+        return new ParquetDataFormat();
+    }
+
+    @Override
+    public Optional<Map<org.opensearch.vectorized.execution.search.DataFormat, DataSourceCodec>> getDataSourceCodecs() {
+        Map<org.opensearch.vectorized.execution.search.DataFormat, DataSourceCodec> codecs = new HashMap<>();
+        ParquetDataSourceCodec parquetDataSourceCodec = new ParquetDataSourceCodec();
+        // TODO : version it correctly - similar to lucene codecs?
+        codecs.put(parquetDataSourceCodec.getDataFormat(), new ParquetDataSourceCodec());
+        return Optional.of(codecs);
+        // return Optional.empty();
+    }
+
+    // for testing locally only
+    public void indexDataToParquetEngine() throws IOException {
+        //Create Engine (take Schema as Input)
+//        IndexingExecutionEngine<ParquetDataFormat> indexingExecutionEngine = indexingEngine();
+//        //Create Writer
+//        ParquetWriter writer = (ParquetWriter) indexingExecutionEngine.createWriter();
+//        for (int i=0;i<10;i++) {
+//            //Get DocumentInput
+//            DocumentInput documentInput = writer.newDocumentInput();
+//            ParquetDocumentInput parquetDocumentInput = (ParquetDocumentInput) documentInput;
+//            //Populate data
+//            DummyDataUtils.populateDocumentInput(parquetDocumentInput);
+//            //Write document
+//            writer.addDoc(parquetDocumentInput);
+//        }
+//        writer.flush(null);
+//        writer.close();
+//        //refresh engine
+//        indexingExecutionEngine.refresh(null);
+    }
+
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/ArrowExport.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/ArrowExport.java
new file mode 100644
index 0000000000000..694df0c4a9f47
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/ArrowExport.java
@@ -0,0 +1,37 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.bridge;
+
+import org.apache.arrow.c.ArrowArray;
+import org.apache.arrow.c.ArrowSchema;
+
+/**
+ * Container for Arrow C Data Interface exports.
+ * Provides a safe wrapper around ArrowArray and ArrowSchema with proper resource management.
+ */
+public record ArrowExport(ArrowArray arrowArray, ArrowSchema arrowSchema) implements AutoCloseable {
+
+    public long getArrayAddress() {
+        return arrowArray.memoryAddress();
+    }
+
+    public long getSchemaAddress() {
+        return arrowSchema.memoryAddress();
+    }
+
+    @Override
+    public void close() {
+        if (arrowArray != null) {
+            arrowArray.close();
+        }
+        if (arrowSchema != null) {
+            arrowSchema.close();
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/RustBridge.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/RustBridge.java
new file mode 100644
index 0000000000000..8ef4596395e97
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/bridge/RustBridge.java
@@ -0,0 +1,119 @@
+package com.parquet.parquetdataformat.bridge;
+
+import org.opensearch.common.SuppressForbidden;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.StandardCopyOption;
+import java.util.Locale;
+
+/**
+ * JNI bridge to the native Rust Parquet writer implementation.
+ *
+ * <p>This class provides the interface between Java and the native Rust library
+ * that handles low-level Parquet file operations. It automatically loads the
+ * appropriate native library for the current platform and architecture.
+ *
+ * <p>Supported platforms:
+ * <ul>
+ *   <li>Windows (x86, x86_64, aarch64)</li>
+ *   <li>macOS (x86_64, aarch64/arm64)</li>
+ *   <li>Linux (x86, x86_64, aarch64)</li>
+ * </ul>
+ *
+ * <p>The native library is extracted from resources and loaded as a temporary file,
+ * which is automatically cleaned up on JVM shutdown.
+ *
+ * <p>All native methods operate on Arrow C Data Interface pointers and return
+ * integer status codes for error handling.
+ */
+public class RustBridge {
+
+    static {
+        try {
+            loadNativeLibrary();
+        } catch (Exception e) {
+            throw new RuntimeException("Failed to load native Rust library", e);
+        }
+    }
+
+    @SuppressForbidden(reason = "Need to create temp files")
+    private static void loadNativeLibrary() {
+
+        String LIB_NAME = "parquet_dataformat_jni";
+        String os = System.getProperty("os.name").toLowerCase(Locale.ROOT);
+        String arch = System.getProperty("os.arch").toLowerCase(Locale.ROOT);
+
+        String osDir = os.contains("win") ? "windows" :
+                os.contains("mac") ? "macos" : "linux";
+        String archDir = arch.contains("aarch64") || arch.contains("arm64") ? "aarch64" :
+                arch.contains("64") ? "x86_64" : "x86";
+
+        String extension = os.contains("win") ? ".dll" :
+                os.contains("mac") ? ".dylib" : ".so";
+
+        String resourcePath = String.format(Locale.ROOT, "/native/%s/%s/lib%s%s", osDir, archDir, LIB_NAME, extension);
+
+        try (InputStream is = RustBridge.class.getResourceAsStream(resourcePath)) {
+            if (is == null) {
+                throw new UnsatisfiedLinkError("Native library not found in resources: " + resourcePath);
+            }
+
+            Path tempFile = Files.createTempFile("lib" + LIB_NAME, extension);
+
+            // Register deletion hook on JVM shutdown
+            Runtime.getRuntime().addShutdownHook(new Thread(() -> {
+                try {
+                    Files.deleteIfExists(tempFile);
+                } catch (IOException ignored) {}
+            }));
+
+            Files.copy(is, tempFile, StandardCopyOption.REPLACE_EXISTING);
+
+            System.load(tempFile.toAbsolutePath().toString());
+        } catch (IOException e) {
+            throw new RuntimeException("Failed to load native library from resources", e);
+        }
+    }
+
+    // Enhanced native methods that handle validation and provide better error reporting
+    public static native void createWriter(String file, long schemaAddress) throws IOException;
+    public static native void write(String file, long arrayAddress, long schemaAddress) throws IOException;
+    public static native void closeWriter(String file) throws IOException;
+    public static native void flushToDisk(String file) throws IOException;
+
+    // State and metrics methods handled on Rust side
+    public static native boolean writerExists(String file);
+    public static native long getWriteCount(String file);
+    public static native long getTotalRows(String file);
+    public static native String[] getActiveWriters();
+
+    // Validation helpers that could be implemented natively for better performance
+    public static boolean isValidFileName(String fileName) {
+        return fileName != null && !fileName.trim().isEmpty();
+    }
+
+    public static boolean isValidMemoryAddress(long address) {
+        return address != 0;
+    }
+
+
+    // DATAFUSION specific native methods starts here
+
+    // Record batch and streaming related methods
+    public static native String nativeNextBatch(long streamPtr);
+
+    public static native void nativeCloseStream(long streamPtr);
+
+
+    // Native method declarations - these will be implemented in the JNI library
+    public static native void nativeRegisterDirectory(String tableName, String directoryPath, String[] files, long runtimeId);
+
+    public static native long nativeCreateSessionContext(String[] configKeys, String[] configValues);
+
+    public static native long nativeExecuteSubstraitQuery(long sessionContextPtr, byte[] substraitPlan);
+
+    public static native void nativeCloseSessionContext(long sessionContextPtr);
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/FieldTypeConverter.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/FieldTypeConverter.java
new file mode 100644
index 0000000000000..b4ace7c4b1953
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/FieldTypeConverter.java
@@ -0,0 +1,135 @@
+package com.parquet.parquetdataformat.converter;
+
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.apache.lucene.search.Query;
+import org.opensearch.index.mapper.MappedFieldType;
+import org.opensearch.index.mapper.TextSearchInfo;
+import org.opensearch.index.mapper.ValueFetcher;
+
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Utility class for converting between OpenSearch field types and Arrow/Parquet types.
+ * 
+ * <p>This converter provides bidirectional mapping between OpenSearch's field type system
+ * and Apache Arrow's type system, which serves as the bridge to Parquet data representation.
+ * It handles the complete conversion pipeline from OpenSearch indexed data to columnar
+ * Parquet storage format.
+ * 
+ * <p>Supported type conversions:
+ * <ul>
+ *   <li>OpenSearch numeric types (long, integer, short, byte, double, float) → Arrow Int/FloatingPoint</li>
+ *   <li>OpenSearch boolean → Arrow Bool</li>
+ *   <li>OpenSearch date → Arrow Timestamp</li>
+ *   <li>OpenSearch text/keyword → Arrow Utf8</li>
+ * </ul>
+ * 
+ * <p>The converter also provides reverse mapping capabilities to reconstruct OpenSearch
+ * field types from Arrow types, enabling proper schema reconstruction during read operations.
+ * 
+ * <p>All conversion methods are static and thread-safe, making them suitable for concurrent
+ * use across multiple writer instances.
+ */
+public class FieldTypeConverter {
+    
+    public static Map<FieldType, Object> convertToArrowFieldMap(MappedFieldType mappedFieldType, Object value) {
+        Map<FieldType, Object> fieldMap = new HashMap<>();
+        FieldType arrowFieldType = convertToArrowFieldType(mappedFieldType);
+        fieldMap.put(arrowFieldType, value);
+        return fieldMap;
+    }
+    
+    public static FieldType convertToArrowFieldType(MappedFieldType mappedFieldType) {
+        ArrowType arrowType = getArrowType(mappedFieldType.typeName());
+        return new FieldType(true, arrowType, null);
+    }
+    
+    public static ParquetFieldType convertToParquetFieldType(MappedFieldType mappedFieldType) {
+        ArrowType arrowType = getArrowType(mappedFieldType.typeName());
+        return new ParquetFieldType(mappedFieldType.name(), arrowType);
+    }
+    
+    public static MappedFieldType convertToMappedFieldType(String name, ArrowType arrowType) {
+        String opensearchType = getOpenSearchType(arrowType);
+        return new MockMappedFieldType(name, opensearchType);
+    }
+    
+    private static ArrowType getArrowType(String opensearchType) {
+        switch (opensearchType) {
+            case "long":
+                return new ArrowType.Int(64, true);
+            case "integer":
+                return new ArrowType.Int(32, true);
+            case "short":
+                return new ArrowType.Int(16, true);
+            case "byte":
+                return new ArrowType.Int(8, true);
+            case "double":
+                return new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE);
+            case "float":
+                return new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE);
+            case "boolean":
+                return new ArrowType.Bool();
+            case "date":
+                return new ArrowType.Timestamp(TimeUnit.MILLISECOND, null);
+            default:
+                return new ArrowType.Utf8();
+        }
+    }
+    
+    private static String getOpenSearchType(ArrowType arrowType) {
+        switch (arrowType) {
+            case ArrowType.Int intType -> {
+                return switch (intType.getBitWidth()) {
+                    case 8 -> "byte";
+                    case 16 -> "short";
+                    case 32 -> "integer";
+                    case 64 -> "long";
+                    default -> "integer";
+                };
+            }
+            case ArrowType.FloatingPoint fpType -> {
+                return fpType.getPrecision() == FloatingPointPrecision.DOUBLE ? "double" : "float";
+            }
+            case ArrowType.Bool bool -> {
+                return "boolean";
+            }
+            case ArrowType.Timestamp timestamp -> {
+                return "date";
+            }
+            case null, default -> {
+                return "text";
+            }
+        }
+    }
+    
+    private static class MockMappedFieldType extends MappedFieldType {
+        private final String type;
+        
+        public MockMappedFieldType(String name, String type) {
+            super(name, true, false, false, TextSearchInfo.NONE, null);
+            this.type = type;
+        }
+        
+        @Override
+        public String typeName() {
+            return type;
+        }
+        
+        @Override
+        public ValueFetcher valueFetcher(org.opensearch.index.query.QueryShardContext context,
+                                         org.opensearch.search.lookup.SearchLookup searchLookup,
+                                         String format) {
+            return null;
+        }
+        
+        @Override
+        public Query termQuery(Object value, org.opensearch.index.query.QueryShardContext context) {
+            return null;
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/ParquetFieldType.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/ParquetFieldType.java
new file mode 100644
index 0000000000000..84f1b9a4bedd2
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/converter/ParquetFieldType.java
@@ -0,0 +1,48 @@
+package com.parquet.parquetdataformat.converter;
+
+import org.apache.arrow.vector.types.pojo.ArrowType;
+
+/**
+ * Represents a field type for Parquet-based document fields.
+ * 
+ * <p>This class encapsulates the field name and Arrow type information
+ * required for proper type mapping between OpenSearch fields and Parquet
+ * column definitions. It serves as the intermediate representation used
+ * throughout the Parquet processing pipeline.
+ * 
+ * <p>The Arrow type system provides a rich set of data types that can
+ * accurately represent various field types from OpenSearch, ensuring
+ * proper data serialization and deserialization.
+ * 
+ * <p>Key features:
+ * <ul>
+ *   <li>Field name preservation for schema mapping</li>
+ *   <li>Arrow type integration for precise data representation</li>
+ *   <li>Simple mutable structure for field definition building</li>
+ * </ul>
+ */
+public class ParquetFieldType {
+    private String name;
+    private ArrowType type;
+
+    public ParquetFieldType(String name, ArrowType type) {
+        this.name = name;
+        this.type = type;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    public void setName(String name) {
+        this.name = name;
+    }
+
+    public ArrowType getType() {
+        return type;
+    }
+
+    public void setType(ArrowType type) {
+        this.type = type;
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java
new file mode 100644
index 0000000000000..0d6c2519d463a
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/DummyDataUtils.java
@@ -0,0 +1,60 @@
+package com.parquet.parquetdataformat.engine;
+
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.opensearch.common.SuppressForbidden;
+import org.opensearch.index.engine.exec.DocumentInput;
+import org.opensearch.index.mapper.MappedFieldType;
+import com.parquet.parquetdataformat.converter.FieldTypeConverter;
+
+import java.util.Arrays;
+import java.util.Random;
+
+@SuppressForbidden(reason = "Need random for creating temp files")
+public class DummyDataUtils {
+    public static Schema getSchema() {
+        // Create the most minimal schema possible - just one string field
+        return new Schema(Arrays.asList(
+                Field.notNullable(ID, new ArrowType.Int(32, true)),
+                Field.nullable(NAME, new ArrowType.Utf8()),
+                Field.nullable(DESIGNATION, new ArrowType.Utf8()),
+                Field.nullable(SALARY, new ArrowType.Int(32, true))
+        ));
+    }
+
+    public static void populateDocumentInput(DocumentInput<?> documentInput) {
+        MappedFieldType idField = FieldTypeConverter.convertToMappedFieldType(ID, new ArrowType.Int(32, true));
+        documentInput.addField(idField, generateRandomId());
+        MappedFieldType nameField = FieldTypeConverter.convertToMappedFieldType(NAME, new ArrowType.Utf8());
+        documentInput.addField(nameField, generateRandomName());
+        MappedFieldType designationField = FieldTypeConverter.convertToMappedFieldType(DESIGNATION, new ArrowType.Utf8());
+        documentInput.addField(designationField, generateRandomDesignation());
+        MappedFieldType salaryField = FieldTypeConverter.convertToMappedFieldType(SALARY, new ArrowType.Int(32, true));
+        documentInput.addField(salaryField, random.nextInt(100000));
+    }
+
+    private static final String ID = "id";
+    private static final String NAME = "name";
+    private static final String DESIGNATION = "designation";
+    private static final String SALARY = "salary";
+    private static final String INCREMENT = "increment";
+    private static final Random random = new Random();
+    private static final String[] NAMES = {"John Doe", "Jane Smith", "Alice Johnson", "Bob Wilson", "Carol Brown"};
+    private static final String[] DESIGNATIONS = {"Software Engineer", "Senior Developer", "Team Lead", "Manager", "Architect"};
+
+    private static int generateRandomId() {
+        return random.nextInt(1000000);
+    }
+
+    private static String generateRandomName() {
+        return NAMES[random.nextInt(NAMES.length)];
+    }
+
+    private static String generateRandomDesignation() {
+        return DESIGNATIONS[random.nextInt(DESIGNATIONS.length)];
+    }
+
+
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java
new file mode 100644
index 0000000000000..240a33c10531e
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetDataFormat.java
@@ -0,0 +1,58 @@
+package com.parquet.parquetdataformat.engine;
+
+import org.opensearch.common.settings.Setting;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.index.engine.exec.DataFormat;
+
+/**
+ * Data format implementation for Parquet-based document storage.
+ *
+ * <p>This class integrates with OpenSearch's DataFormat interface to provide
+ * Parquet file format support within the OpenSearch indexing pipeline. It
+ * defines the configuration and behavior for the "parquet" data format.
+ *
+ * <p>The implementation provides hooks for:
+ * <ul>
+ *   <li>Data format specific settings configuration</li>
+ *   <li>Cluster-level settings management</li>
+ *   <li>Store configuration for Parquet-specific optimizations</li>
+ *   <li>Format identification through the "parquet" name</li>
+ * </ul>
+ *
+ * <p>This class serves as the entry point for registering Parquet format
+ * capabilities with OpenSearch's execution engine framework, allowing
+ * the system to recognize and utilize Parquet-based storage operations.
+ */
+public class ParquetDataFormat implements DataFormat {
+    @Override
+    public Setting<Settings> dataFormatSettings() {
+        return null;
+    }
+
+    @Override
+    public Setting<Settings> clusterLeveldataFormatSettings() {
+        return null;
+    }
+
+    @Override
+    public String name() {
+        return "parquet";
+    }
+
+    @Override
+    public void configureStore() {
+
+    }
+
+    public static ParquetDataFormat PARQUET_DATA_FORMAT = new ParquetDataFormat();
+
+    @Override
+    public boolean equals(Object obj) {
+        return true;
+    }
+
+    @Override
+    public int hashCode() {
+        return 0;
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java
new file mode 100644
index 0000000000000..4778d21f51452
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/ParquetExecutionEngine.java
@@ -0,0 +1,86 @@
+package com.parquet.parquetdataformat.engine;
+
+import com.parquet.parquetdataformat.writer.ParquetDocumentInput;
+import com.parquet.parquetdataformat.writer.ParquetWriter;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.opensearch.index.engine.exec.DataFormat;
+import org.opensearch.index.engine.exec.IndexingExecutionEngine;
+import org.opensearch.index.engine.exec.RefreshInput;
+import org.opensearch.index.engine.exec.RefreshResult;
+import org.opensearch.index.engine.exec.Writer;
+import org.opensearch.index.engine.exec.WriterFileSet;
+import org.opensearch.index.shard.ShardPath;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.function.Supplier;
+
+import static com.parquet.parquetdataformat.engine.ParquetDataFormat.PARQUET_DATA_FORMAT;
+
+/**
+ * Main execution engine for Parquet-based indexing operations in OpenSearch.
+ *
+ * <p>This engine implements OpenSearch's IndexingExecutionEngine interface to provide
+ * Parquet file generation capabilities within the indexing pipeline. It manages the
+ * lifecycle of Parquet writers and coordinates the overall document processing workflow.
+ *
+ * <p>Key responsibilities:
+ * <ul>
+ *   <li>Writer creation with unique file naming and Arrow schema integration</li>
+ *   <li>Schema-based field type support and validation</li>
+ *   <li>Refresh operations for completing indexing cycles</li>
+ *   <li>Integration with the broader Parquet data format ecosystem</li>
+ * </ul>
+ *
+ * <p>The engine uses an atomic counter to ensure unique Parquet file names across
+ * concurrent operations, following the naming pattern "parquet_file_generation_N.parquet"
+ * where N is an incrementing sequence number.
+ *
+ * <p>Each writer instance created by this engine is configured with:
+ * <ul>
+ *   <li>A unique file name for output isolation</li>
+ *   <li>The Arrow schema provided during engine construction</li>
+ *   <li>Full access to the Parquet processing pipeline via {@link ParquetWriter}</li>
+ * </ul>
+ *
+ * <p>The engine is designed to work with {@link ParquetDocumentInput} for document
+ * processing and integrates seamlessly with OpenSearch's execution framework.
+ */
+public class ParquetExecutionEngine implements IndexingExecutionEngine<ParquetDataFormat> {
+
+    public static final String FILE_NAME_PREFIX = "parquet_file_generation";
+    private final Supplier<Schema> schema;
+    private final List<WriterFileSet> filesWrittenAlready = new ArrayList<>();
+    private final ShardPath shardPath;
+
+    public ParquetExecutionEngine(Supplier<Schema> schema, ShardPath shardPath) {
+        this.schema = schema;
+        this.shardPath = shardPath;
+    }
+
+    @Override
+    public List<String> supportedFieldTypes() {
+        return List.of();
+    }
+
+    @Override
+    public Writer<ParquetDocumentInput> createWriter(long writerGeneration) throws IOException {
+        String fileName = Path.of(shardPath.getDataPath().toString(), FILE_NAME_PREFIX + "_" + writerGeneration + ".parquet").toString();
+        return new ParquetWriter(fileName, schema.get(), writerGeneration);
+    }
+
+    @Override
+    public RefreshResult refresh(RefreshInput refreshInput) throws IOException {
+        RefreshResult refreshResult = new RefreshResult();
+        filesWrittenAlready.addAll(refreshInput.getWriterFiles());
+        refreshResult.add(PARQUET_DATA_FORMAT, filesWrittenAlready);
+        return refreshResult;
+    }
+
+    @Override
+    public DataFormat getDataFormat() {
+        return new ParquetDataFormat();
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java
new file mode 100644
index 0000000000000..f20a9bae06ea2
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetDataSourceCodec.java
@@ -0,0 +1,143 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.engine.read;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.opensearch.vectorized.execution.search.DataFormat;
+import org.opensearch.vectorized.execution.search.spi.DataSourceCodec;
+import org.opensearch.vectorized.execution.search.spi.RecordBatchStream;
+
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionException;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+import static com.parquet.parquetdataformat.bridge.RustBridge.nativeCloseSessionContext;
+import static com.parquet.parquetdataformat.bridge.RustBridge.nativeCreateSessionContext;
+import static com.parquet.parquetdataformat.bridge.RustBridge.nativeExecuteSubstraitQuery;
+import static com.parquet.parquetdataformat.bridge.RustBridge.nativeRegisterDirectory;
+
+/**
+ * Datasource codec implementation for parquet files
+ */
+public class ParquetDataSourceCodec implements DataSourceCodec {
+
+    private static final Logger logger = LogManager.getLogger(ParquetDataSourceCodec.class);
+    private static final AtomicLong runtimeIdGenerator = new AtomicLong(0);
+    private static final AtomicLong sessionIdGenerator = new AtomicLong(0);
+    private final ConcurrentHashMap<Long, Long> sessionContexts = new ConcurrentHashMap<>();
+
+    // JNI library loading
+    static {
+        try {
+            //JniLibraryLoader.loadLibrary();
+            logger.info("DataFusion JNI library loaded successfully");
+        } catch (Exception e) {
+            logger.error("Failed to load DataFusion JNI library", e);
+            throw new RuntimeException("Failed to initialize DataFusion JNI library", e);
+        }
+    }
+
+    @Override
+    public CompletableFuture<Void> registerDirectory(String directoryPath, List<String> fileNames, long runtimeId) {
+        return CompletableFuture.supplyAsync(() -> {
+            try {
+                logger.debug("Registering directory: {} with {} files", directoryPath, fileNames.size());
+
+                // Convert file names to arrays for JNI
+                String[] fileArray = fileNames.toArray(new String[0]);
+
+                // Call native method to register directory
+                nativeRegisterDirectory("csv_table", directoryPath, fileArray, runtimeId);
+                return null;
+            } catch (Exception e) {
+                logger.error("Failed to register directory: " + directoryPath, e);
+                throw new CompletionException("Failed to register directory", e);
+            }
+        });
+    }
+
+    @Override
+    public CompletableFuture<Long> createSessionContext(long globalRuntimeEnvId) {
+        return CompletableFuture.supplyAsync(() -> {
+            try {
+                long sessionId = sessionIdGenerator.incrementAndGet();
+                logger.debug("Creating session context with ID: {} for runtime: {}", sessionId, globalRuntimeEnvId);
+
+                // Default configuration
+                String[] configKeys = { "batch_size", "target_partitions" };
+                String[] configValues = { "1024", "4" };
+
+                // Create native session context
+                long nativeContextPtr = nativeCreateSessionContext(configKeys, configValues);
+                sessionContexts.put(sessionId, nativeContextPtr);
+
+                logger.info("Created session context with ID: {}", sessionId);
+                return sessionId;
+            } catch (Exception e) {
+                logger.error("Failed to create session context for runtime: " + globalRuntimeEnvId, e);
+                throw new CompletionException("Failed to create session context", e);
+            }
+        });
+    }
+
+    @Override
+    public CompletableFuture<RecordBatchStream> executeSubstraitQuery(long sessionContextId, byte[] substraitPlanBytes) {
+        return CompletableFuture.supplyAsync(() -> {
+            try {
+                logger.debug("Executing Substrait query for session: {}", sessionContextId);
+
+                Long nativeContextPtr = sessionContexts.get(sessionContextId);
+                if (nativeContextPtr == null) {
+                    throw new IllegalArgumentException("Invalid session context ID: " + sessionContextId);
+                }
+
+                // Execute query and get native stream pointer
+                long nativeStreamPtr = nativeExecuteSubstraitQuery(nativeContextPtr, substraitPlanBytes);
+
+                // Create Java wrapper for the native stream
+                RecordBatchStream stream = new ParquetRecordBatchStream(nativeStreamPtr);
+
+                logger.info("Successfully executed Substrait query for session: {}", sessionContextId);
+                return stream;
+            } catch (Exception e) {
+                logger.error("Failed to execute Substrait query for session: " + sessionContextId, e);
+                throw new CompletionException("Failed to execute Substrait query", e);
+            }
+        });
+    }
+
+    @Override
+    public CompletableFuture<Void> closeSessionContext(long sessionContextId) {
+        return CompletableFuture.supplyAsync(() -> {
+            try {
+                logger.debug("Closing session context: {}", sessionContextId);
+
+                Long nativeContextPtr = sessionContexts.remove(sessionContextId);
+                if (nativeContextPtr != null) {
+                    nativeCloseSessionContext(nativeContextPtr);
+                    logger.info("Successfully closed session context: {}", sessionContextId);
+                } else {
+                    logger.warn("Session context not found: {}", sessionContextId);
+                }
+
+                return null;
+            } catch (Exception e) {
+                logger.error("Failed to close session context: " + sessionContextId, e);
+                throw new CompletionException("Failed to close session context", e);
+            }
+        });
+    }
+
+    public DataFormat getDataFormat() {
+        return DataFormat.CSV;
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetRecordBatchStream.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetRecordBatchStream.java
new file mode 100644
index 0000000000000..3c23e4fd9d1b5
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/ParquetRecordBatchStream.java
@@ -0,0 +1,117 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.engine.read;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.opensearch.vectorized.execution.search.spi.RecordBatchStream;
+
+import java.util.concurrent.CompletableFuture;
+
+import static com.parquet.parquetdataformat.bridge.RustBridge.nativeCloseStream;
+import static com.parquet.parquetdataformat.bridge.RustBridge.nativeNextBatch;
+
+/**
+ * TODO : this need not be here - nothing specific to parquet - move to LIB ?
+ * Native implementation of RecordBatchStream that wraps a JNI stream pointer.
+ * This class provides a Java interface over native DataFusion record batches.
+ */
+public class ParquetRecordBatchStream implements RecordBatchStream {
+
+    private static final Logger logger = LogManager.getLogger(ParquetRecordBatchStream.class);
+
+    private final long nativeStreamPtr;
+    private volatile boolean closed = false;
+    private volatile boolean hasNextCached = false;
+    private volatile boolean hasNextValue = false;
+
+    /**
+     * Creates a new ParquetRecordBatchStream wrapping the given native stream pointer.
+     *
+     * @param nativeStreamPtr Pointer to the native DataFusion RecordBatch stream
+     */
+    public ParquetRecordBatchStream(long nativeStreamPtr) {
+        if (nativeStreamPtr == 0) {
+            throw new IllegalArgumentException("Invalid native stream pointer");
+        }
+        this.nativeStreamPtr = nativeStreamPtr;
+        logger.debug("Created ParquetRecordBatchStream with pointer: {}", nativeStreamPtr);
+    }
+
+    @Override
+    public Object getSchema() {
+        return "ParquetSchema"; // Placeholder
+    }
+
+    @Override
+    public CompletableFuture<Object> next() {
+        // PlaceholderImpl
+        return CompletableFuture.supplyAsync(() -> {
+            if (closed) {
+                return null;
+            }
+
+            try {
+                // Get the next batch from native code
+                String batch = nativeNextBatch(nativeStreamPtr);
+
+                // Reset cached hasNext value since we consumed a batch
+                hasNextCached = false;
+
+                logger.trace("Retrieved next batch from stream pointer: {}", nativeStreamPtr);
+                return batch;
+            } catch (Exception e) {
+                logger.error("Error getting next batch from stream", e);
+                return null;
+            }
+        });
+    }
+
+    @Override
+    public boolean hasNext() {
+        // Placeholder impl
+        if (closed) {
+            return false;
+        }
+
+        if (hasNextCached) {
+            return hasNextValue;
+        }
+
+        try {
+            // Check if there's a next batch available
+            // This is a simplified implementation - in practice, you might want to
+            // peek at the stream without consuming the batch
+            String nextBatch = nativeNextBatch(nativeStreamPtr);
+            hasNextValue = (nextBatch != null);
+            hasNextCached = true;
+
+            logger.trace("hasNext() = {} for stream pointer: {}", hasNextValue, nativeStreamPtr);
+            return hasNextValue;
+        } catch (Exception e) {
+            logger.error("Error checking for next batch in stream", e);
+            return false;
+        }
+    }
+
+    @Override
+    public void close() {
+        if (!closed) {
+            logger.debug("Closing ParquetRecordBatchStream with pointer: {}", nativeStreamPtr);
+            try {
+                nativeCloseStream(nativeStreamPtr);
+                closed = true;
+                logger.debug("Successfully closed ParquetRecordBatchStream");
+            } catch (Exception e) {
+                logger.error("Error closing ParquetRecordBatchStream", e);
+                throw e;
+            }
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/package-info.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/package-info.java
new file mode 100644
index 0000000000000..bd486fa1e26f4
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/engine/read/package-info.java
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/**
+ * CSV data format implementation for DataFusion integration.
+ * Provides CSV file reading capabilities through DataFusion query engine.
+ */
+package com.parquet.parquetdataformat.engine.read;
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java
new file mode 100644
index 0000000000000..143b9837c6970
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ArrowFieldRegistry.java
@@ -0,0 +1,103 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields;
+
+import com.parquet.parquetdataformat.fields.number.ByteParquetField;
+import com.parquet.parquetdataformat.fields.number.DoubleParquetField;
+import com.parquet.parquetdataformat.fields.number.FloatParquetField;
+import com.parquet.parquetdataformat.fields.number.HalfFloatParquetField;
+import com.parquet.parquetdataformat.fields.number.IntegerParquetField;
+import com.parquet.parquetdataformat.fields.number.LongParquetField;
+import com.parquet.parquetdataformat.fields.number.ShortParquetField;
+import com.parquet.parquetdataformat.fields.number.UnsignedLongParquetField;
+import org.apache.arrow.vector.types.FloatingPointPrecision;
+import org.apache.arrow.vector.types.TimeUnit;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.FieldType;
+import org.opensearch.index.mapper.BooleanFieldMapper;
+import org.opensearch.index.mapper.DateFieldMapper;
+import org.opensearch.index.mapper.KeywordFieldMapper;
+import org.opensearch.index.mapper.NumberFieldMapper;
+import org.opensearch.index.mapper.TextFieldMapper;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class ArrowFieldRegistry {
+
+    private static final Map<String, FieldType> FIELD_TYPE_MAP = new HashMap<>();
+    private static final Map<String, ParquetField> PARQUET_FIELD_MAP = new HashMap<>();
+
+    static {
+        //TODO: darsaga check which fields can be nullable and which can not be
+
+        // Number types
+        FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.HALF_FLOAT.typeName(),
+            FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.HALF)));
+        FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.FLOAT.typeName(),
+            FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.SINGLE)));
+        FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.DOUBLE.typeName(),
+            FieldType.nullable(new ArrowType.FloatingPoint(FloatingPointPrecision.DOUBLE)));
+        FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.BYTE.typeName(),
+            FieldType.nullable(new ArrowType.Int(8, true)));
+        FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.SHORT.typeName(),
+            FieldType.nullable(new ArrowType.Int(16, true)));
+        FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.INTEGER.typeName(),
+            FieldType.nullable(new ArrowType.Int(32, true)));
+        FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.LONG.typeName(),
+            FieldType.nullable(new ArrowType.Int(64, true)));
+        FIELD_TYPE_MAP.put(NumberFieldMapper.NumberType.UNSIGNED_LONG.typeName(),
+            FieldType.nullable(new ArrowType.Int(64, false)));
+
+        // Other types
+        FIELD_TYPE_MAP.put(DateFieldMapper.CONTENT_TYPE,
+            FieldType.nullable(new ArrowType.Timestamp(TimeUnit.MILLISECOND, null)));
+        FIELD_TYPE_MAP.put(BooleanFieldMapper.CONTENT_TYPE,
+            FieldType.nullable(new ArrowType.Bool()));
+        FIELD_TYPE_MAP.put(KeywordFieldMapper.CONTENT_TYPE,
+            FieldType.nullable(new ArrowType.Utf8()));
+        FIELD_TYPE_MAP.put(TextFieldMapper.CONTENT_TYPE,
+            FieldType.nullable(new ArrowType.Utf8()));
+
+        setUpParquetFieldMap();
+    }
+
+    private static void setUpParquetFieldMap() {
+
+        //Number fields
+        PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.HALF_FLOAT.typeName(), new HalfFloatParquetField());
+        PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.FLOAT.typeName(), new FloatParquetField());
+        PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.DOUBLE.typeName(), new DoubleParquetField());
+        PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.BYTE.typeName(), new ByteParquetField());
+        PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.SHORT.typeName(), new ShortParquetField());
+        PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.INTEGER.typeName(), new IntegerParquetField());
+        PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.LONG.typeName(), new LongParquetField());
+        PARQUET_FIELD_MAP.put(NumberFieldMapper.NumberType.UNSIGNED_LONG.typeName(), new UnsignedLongParquetField());
+
+        //Date field
+        PARQUET_FIELD_MAP.put(DateFieldMapper.CONTENT_TYPE, new DateParquetField());
+
+        //Boolean field
+        PARQUET_FIELD_MAP.put(BooleanFieldMapper.CONTENT_TYPE, new BooleanParquetField());
+
+        //Text field
+        PARQUET_FIELD_MAP.put(TextFieldMapper.CONTENT_TYPE, new TextParquetField());
+
+        //Keyword field
+        PARQUET_FIELD_MAP.put(KeywordFieldMapper.CONTENT_TYPE, new KeywordParquetField());
+    }
+
+    public static FieldType getFieldType(String typeName) {
+        return FIELD_TYPE_MAP.get(typeName);
+    }
+
+    public static ParquetField getParquetField(String typeName) {
+        return PARQUET_FIELD_MAP.get(typeName);
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/BooleanParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/BooleanParquetField.java
new file mode 100644
index 0000000000000..225323e6a7ffe
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/BooleanParquetField.java
@@ -0,0 +1,23 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields;
+
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.BitVector;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class BooleanParquetField extends ParquetField {
+
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        BitVector bitVector = (BitVector) managedVSR.getVector(mappedFieldType.name());
+        int rowIndex = managedVSR.getRowCount();
+        bitVector.setSafe(rowIndex, (Boolean) parseValue ? 1 : 0);
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/DateParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/DateParquetField.java
new file mode 100644
index 0000000000000..5f2170fa95987
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/DateParquetField.java
@@ -0,0 +1,19 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields;
+
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class DateParquetField extends ParquetField {
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/KeywordParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/KeywordParquetField.java
new file mode 100644
index 0000000000000..a5837ee851364
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/KeywordParquetField.java
@@ -0,0 +1,23 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields;
+
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.VarCharVector;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class KeywordParquetField extends ParquetField {
+
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name());
+        int rowIndex = managedVSR.getRowCount();
+        textVector.setSafe(rowIndex, parseValue.toString().getBytes());
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java
new file mode 100644
index 0000000000000..3d52106f7acad
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetField.java
@@ -0,0 +1,23 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields;
+
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public abstract class ParquetField {
+    public abstract void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue);
+
+    public void createField(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        if (mappedFieldType.isColumnar()) {
+            addToGroup(mappedFieldType, managedVSR, parseValue);
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetFieldUtil.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetFieldUtil.java
new file mode 100644
index 0000000000000..a47f35fcd9bb1
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/ParquetFieldUtil.java
@@ -0,0 +1,33 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields;
+
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.opensearch.index.mapper.Mapper;
+import org.opensearch.index.mapper.MapperService;
+import org.opensearch.index.mapper.MetadataFieldMapper;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class ParquetFieldUtil {
+
+    public static Schema getSchema(MapperService mapperService) {
+        List<Field> fields = new ArrayList<>();
+
+        for (Mapper mapper : mapperService.documentMapper().mappers()) {
+            if (mapper instanceof MetadataFieldMapper) continue;
+            fields.add(new Field(mapper.name(), ArrowFieldRegistry.getFieldType(mapper.typeName()), null));
+        }
+
+        // Create the most minimal schema possible - just one string field
+        return new Schema(fields);
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/TextParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/TextParquetField.java
new file mode 100644
index 0000000000000..6bcf6d091fd62
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/TextParquetField.java
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields;
+
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.VarCharVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class TextParquetField extends ParquetField {
+
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        VarCharVector textVector = (VarCharVector) managedVSR.getVector(mappedFieldType.name());
+        int rowIndex = managedVSR.getRowCount();
+        textVector.setSafe(rowIndex, parseValue.toString().getBytes());
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ByteParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ByteParquetField.java
new file mode 100644
index 0000000000000..75d0607a18eb7
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ByteParquetField.java
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields.number;
+
+import com.parquet.parquetdataformat.fields.ParquetField;
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.TinyIntVector;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class ByteParquetField extends ParquetField {
+
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        TinyIntVector tinyIntVector = (TinyIntVector) managedVSR.getVector(mappedFieldType.name());
+        int rowCount = managedVSR.getRowCount();
+        if (parseValue == null) {
+            tinyIntVector.setNull(rowCount);
+        } else {
+            tinyIntVector.setSafe(rowCount, (Byte) parseValue);
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/DoubleParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/DoubleParquetField.java
new file mode 100644
index 0000000000000..a552efa146bce
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/DoubleParquetField.java
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields.number;
+
+import com.parquet.parquetdataformat.fields.ParquetField;
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.Float8Vector;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class DoubleParquetField extends ParquetField {
+
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        Float8Vector float8Vector = (Float8Vector) managedVSR.getVector(mappedFieldType.name());
+        int rowCount = managedVSR.getRowCount();
+        if (parseValue == null) {
+            float8Vector.setNull(rowCount);
+        } else {
+            float8Vector.setSafe(rowCount, (Double) parseValue);
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/FloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/FloatParquetField.java
new file mode 100644
index 0000000000000..de10a122f40e7
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/FloatParquetField.java
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields.number;
+
+import com.parquet.parquetdataformat.fields.ParquetField;
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.Float4Vector;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class FloatParquetField extends ParquetField {
+
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        Float4Vector float4Vector = (Float4Vector) managedVSR.getVector(mappedFieldType.name());
+        int rowCount = managedVSR.getRowCount();
+        if (parseValue == null) {
+            float4Vector.setNull(rowCount);
+        } else {
+            float4Vector.setSafe(rowCount, (Float) parseValue);
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/HalfFloatParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/HalfFloatParquetField.java
new file mode 100644
index 0000000000000..4d393d3a804ce
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/HalfFloatParquetField.java
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields.number;
+
+import com.parquet.parquetdataformat.fields.ParquetField;
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.Float2Vector;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class HalfFloatParquetField extends ParquetField {
+
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        Float2Vector float2Vector = (Float2Vector) managedVSR.getVector(mappedFieldType.name());
+        int rowCount = managedVSR.getRowCount();
+        if (parseValue == null) {
+            float2Vector.setNull(rowCount);
+        } else {
+            float2Vector.setSafe(rowCount, (Short) parseValue);
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/IntegerParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/IntegerParquetField.java
new file mode 100644
index 0000000000000..0a14344b6eaac
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/IntegerParquetField.java
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields.number;
+
+import com.parquet.parquetdataformat.fields.ParquetField;
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.IntVector;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class IntegerParquetField extends ParquetField {
+
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        IntVector intVector = (IntVector) managedVSR.getVector(mappedFieldType.name());
+        int rowCount = managedVSR.getRowCount();
+        if (parseValue == null) {
+            intVector.setNull(rowCount);
+        } else {
+            intVector.setSafe(rowCount, (Integer) parseValue);
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/LongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/LongParquetField.java
new file mode 100644
index 0000000000000..7221d64c6590d
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/LongParquetField.java
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields.number;
+
+import com.parquet.parquetdataformat.fields.ParquetField;
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.BigIntVector;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class LongParquetField extends ParquetField {
+
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        BigIntVector bigIntVector = (BigIntVector) managedVSR.getVector(mappedFieldType.name());
+        int rowCount = managedVSR.getRowCount();
+        if (parseValue == null) {
+            bigIntVector.setNull(rowCount);
+        } else {
+            bigIntVector.setSafe(rowCount, (Long) parseValue);
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ShortParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ShortParquetField.java
new file mode 100644
index 0000000000000..8e28bdda9ba54
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/ShortParquetField.java
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields.number;
+
+import com.parquet.parquetdataformat.fields.ParquetField;
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.SmallIntVector;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class ShortParquetField extends ParquetField {
+
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        SmallIntVector smallIntVector = (SmallIntVector) managedVSR.getVector(mappedFieldType.name());
+        int rowCount = managedVSR.getRowCount();
+        if (parseValue == null) {
+            smallIntVector.setNull(rowCount);
+        } else {
+            smallIntVector.setSafe(rowCount, (Short) parseValue);
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/UnsignedLongParquetField.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/UnsignedLongParquetField.java
new file mode 100644
index 0000000000000..ed5d4f5509a3d
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/fields/number/UnsignedLongParquetField.java
@@ -0,0 +1,29 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.fields.number;
+
+import com.parquet.parquetdataformat.fields.ParquetField;
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+import org.apache.arrow.vector.UInt8Vector;
+import org.opensearch.index.mapper.MappedFieldType;
+
+public class UnsignedLongParquetField extends ParquetField {
+
+    @Override
+    public void addToGroup(MappedFieldType mappedFieldType, ManagedVSR managedVSR, Object parseValue) {
+        UInt8Vector uInt8Vector = (UInt8Vector) managedVSR.getVector(mappedFieldType.name());
+        int rowCount = managedVSR.getRowCount();
+        if (parseValue == null) {
+            uInt8Vector.setNull(rowCount);
+        } else {
+            long longValue = ((Number) parseValue).longValue();
+            uInt8Vector.setSafe(rowCount, longValue);
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java
new file mode 100644
index 0000000000000..83e60d863aeb5
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/ArrowBufferPool.java
@@ -0,0 +1,215 @@
+package com.parquet.parquetdataformat.memory;
+
+import org.apache.arrow.memory.AllocationListener;
+import org.apache.arrow.memory.AllocationOutcome;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.opensearch.common.settings.Settings;
+
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * Manages BufferAllocator lifecycle with configurable allocation strategies.
+ * Provides factory methods for creating allocators with different policies
+ * based on OpenSearch settings and memory pressure conditions.
+ */
+public class ArrowBufferPool {
+    
+    private final Settings settings;
+    private final long maxAllocation;
+    private final long initReservation;
+    private final AllocationListener allocationListener;
+    private final MemoryPressureMonitor memoryMonitor;
+    
+    // Track active allocators for monitoring and cleanup
+    private final ConcurrentHashMap<String, BufferAllocator> activeAllocators;
+    private final AtomicLong totalAllocated;
+    
+    public ArrowBufferPool(Settings settings, MemoryPressureMonitor memoryMonitor) {
+        this.settings = settings;
+        this.memoryMonitor = memoryMonitor;
+        this.activeAllocators = new ConcurrentHashMap<>();
+        this.totalAllocated = new AtomicLong(0);
+        
+        // Configure memory limits - parse size strings manually
+        this.maxAllocation = parseByteSize(settings.get("parquet.memory.max_allocation", "1gb"));
+        this.initReservation = parseByteSize(settings.get("parquet.memory.init_reservation", "100mb"));
+        
+        // Set up allocation listener for monitoring
+        this.allocationListener = new PoolAllocationListener();
+    }
+    
+    /**
+     * Creates a new child allocator with the configured strategy and limits.
+     * 
+     * @param name Unique name for the allocator
+     * @return BufferAllocator configured with pool settings
+     */
+    public BufferAllocator createAllocator(String name) {
+        return createAllocator(name, initReservation, maxAllocation);
+    }
+    
+    /**
+     * Creates a new child allocator with custom limits.
+     * 
+     * @param name Unique name for the allocator
+     * @param reservation Initial reservation amount
+     * @param maxBytes Maximum allocation limit
+     * @return BufferAllocator configured with specified limits
+     */
+    public BufferAllocator createAllocator(String name, long reservation, long maxBytes) {
+        // Check memory pressure before creating new allocator
+        if (memoryMonitor.shouldRejectAllocation(reservation)) {
+            throw new OutOfMemoryError(
+                "Cannot create allocator '" + name + "': memory pressure too high");
+        }
+        
+        BufferAllocator rootAllocator = createRootAllocator();
+        BufferAllocator childAllocator = rootAllocator.newChildAllocator(
+            name, allocationListener, reservation, maxBytes);
+        
+        activeAllocators.put(name, childAllocator);
+        totalAllocated.addAndGet(reservation);
+        
+        return childAllocator;
+    }
+    
+    /**
+     * Releases an allocator and cleans up resources.
+     * 
+     * @param name Name of the allocator to release
+     */
+    public void releaseAllocator(String name) {
+        BufferAllocator allocator = activeAllocators.remove(name);
+        if (allocator != null) {
+            long allocated = allocator.getAllocatedMemory();
+            totalAllocated.addAndGet(-allocated);
+            allocator.close();
+        }
+    }
+    
+    /**
+     * Gets current memory allocation statistics.
+     * 
+     * @return AllocationStats with current usage information
+     */
+    public AllocationStats getStats() {
+        return new AllocationStats(
+            totalAllocated.get(),
+            maxAllocation,
+            activeAllocators.size(),
+            memoryMonitor.getCurrentPressure()
+        );
+    }
+    
+    /**
+     * Closes all active allocators and cleans up the pool.
+     */
+    public void close() {
+        activeAllocators.values().forEach(BufferAllocator::close);
+        activeAllocators.clear();
+        totalAllocated.set(0);
+    }
+    
+    private BufferAllocator createRootAllocator() {
+        // Create a simple RootAllocator with basic settings
+        return new RootAllocator(maxAllocation);
+    }
+    
+    /**
+     * Simple byte size parser for configuration strings.
+     */
+    private long parseByteSize(String sizeStr) {
+        if (sizeStr == null || sizeStr.trim().isEmpty()) {
+            return 0;
+        }
+        
+        String trimmed = sizeStr.trim().toLowerCase();
+        long multiplier = 1;
+        
+        if (trimmed.endsWith("kb")) {
+            multiplier = 1024;
+            trimmed = trimmed.substring(0, trimmed.length() - 2);
+        } else if (trimmed.endsWith("mb")) {
+            multiplier = 1024 * 1024;
+            trimmed = trimmed.substring(0, trimmed.length() - 2);
+        } else if (trimmed.endsWith("gb")) {
+            multiplier = 1024 * 1024 * 1024;
+            trimmed = trimmed.substring(0, trimmed.length() - 2);
+        } else if (trimmed.endsWith("b")) {
+            trimmed = trimmed.substring(0, trimmed.length() - 1);
+        }
+        
+        try {
+            return Long.parseLong(trimmed.trim()) * multiplier;
+        } catch (NumberFormatException e) {
+            throw new IllegalArgumentException("Invalid byte size format: " + sizeStr, e);
+        }
+    }
+    
+    /**
+     * Allocation listener that integrates with memory monitoring.
+     */
+    private class PoolAllocationListener implements AllocationListener {
+        
+        @Override
+        public void onPreAllocation(long size) {
+            if (memoryMonitor.shouldRejectAllocation(size)) {
+                throw new OutOfMemoryError("Memory pressure too high for allocation of " + size + " bytes");
+            }
+        }
+        
+        @Override
+        public void onAllocation(long size) {
+            memoryMonitor.recordAllocation(size);
+        }
+        
+        @Override
+        public void onRelease(long size) {
+            memoryMonitor.recordDeallocation(size);
+        }
+        
+        @Override
+        public boolean onFailedAllocation(long size, AllocationOutcome outcome) {
+            memoryMonitor.recordFailedAllocation(size, "FAILED");
+            return false; // Don't retry
+        }
+        
+        @Override
+        public void onChildAdded(BufferAllocator parentAllocator, BufferAllocator childAllocator) {
+            // Track child allocator creation
+        }
+        
+        @Override
+        public void onChildRemoved(BufferAllocator parentAllocator, BufferAllocator childAllocator) {
+            // Track child allocator removal
+        }
+    }
+    
+    /**
+     * Allocation statistics for monitoring.
+     */
+    public static class AllocationStats {
+        private final long totalAllocated;
+        private final long maxAllocation;
+        private final int activeAllocators;
+        private final double memoryPressure;
+        
+        public AllocationStats(long totalAllocated, long maxAllocation, 
+                             int activeAllocators, double memoryPressure) {
+            this.totalAllocated = totalAllocated;
+            this.maxAllocation = maxAllocation;
+            this.activeAllocators = activeAllocators;
+            this.memoryPressure = memoryPressure;
+        }
+        
+        public long getTotalAllocated() { return totalAllocated; }
+        public long getMaxAllocation() { return maxAllocation; }
+        public int getActiveAllocators() { return activeAllocators; }
+        public double getMemoryPressure() { return memoryPressure; }
+        public double getUtilizationRatio() { 
+            return maxAllocation > 0 ? (double) totalAllocated / maxAllocation : 0.0;
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/MemoryPressureMonitor.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/MemoryPressureMonitor.java
new file mode 100644
index 0000000000000..382c8c8b647fb
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/memory/MemoryPressureMonitor.java
@@ -0,0 +1,274 @@
+package com.parquet.parquetdataformat.memory;
+
+import org.opensearch.common.settings.Settings;
+
+import java.lang.management.ManagementFactory;
+import java.lang.management.MemoryMXBean;
+import java.lang.management.MemoryUsage;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicLong;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * Monitors off-heap memory usage and triggers backpressure mechanisms.
+ * Tracks Arrow buffer allocations and provides pressure metrics for
+ * controlling writer creation and flush intervals.
+ */
+public class MemoryPressureMonitor {
+    
+    public enum PressureLevel {
+        LOW(0.0, 0.7),        // < 70% utilization
+        MODERATE(0.7, 0.85),  // 70-85% utilization  
+        HIGH(0.85, 0.95),     // 85-95% utilization
+        CRITICAL(0.95, 1.0);  // > 95% utilization
+        
+        private final double min;
+        private final double max;
+        
+        PressureLevel(double min, double max) {
+            this.min = min;
+            this.max = max;
+        }
+        
+        public static PressureLevel fromRatio(double ratio) {
+            for (PressureLevel level : values()) {
+                if (ratio >= level.min && ratio < level.max) {
+                    return level;
+                }
+            }
+            return CRITICAL;
+        }
+    }
+    
+    private final MemoryMXBean memoryBean;
+    private final ScheduledExecutorService scheduler;
+    private final AtomicLong directMemoryUsed;
+    private final AtomicLong directMemoryMax;
+    private final AtomicReference<PressureLevel> currentPressure;
+    private final AtomicLong allocationCount;
+    private final AtomicLong deallocationCount;
+    private final AtomicLong failedAllocationCount;
+    
+    // Configuration
+    private final double criticalThreshold;
+    private final double highThreshold;
+    private final long maxDirectMemory;
+    
+    public MemoryPressureMonitor(Settings settings) {
+        this.memoryBean = ManagementFactory.getMemoryMXBean();
+        this.scheduler = Executors.newSingleThreadScheduledExecutor(r -> {
+            Thread t = new Thread(r, "parquet-memory-monitor");
+            t.setDaemon(true);
+            return t;
+        });
+        
+        this.directMemoryUsed = new AtomicLong(0);
+        this.currentPressure = new AtomicReference<>(PressureLevel.LOW);
+        this.allocationCount = new AtomicLong(0);
+        this.deallocationCount = new AtomicLong(0);
+        this.failedAllocationCount = new AtomicLong(0);
+        
+        // Parse configuration
+        this.criticalThreshold = settings.getAsDouble("parquet.memory.critical_threshold", 0.95);
+        this.highThreshold = settings.getAsDouble("parquet.memory.high_threshold", 0.85);
+        this.maxDirectMemory = getMaxDirectMemory();
+        this.directMemoryMax = new AtomicLong(maxDirectMemory);
+        
+        // Start monitoring
+        startMonitoring();
+    }
+    
+    /**
+     * Checks if an allocation should be rejected based on current memory pressure.
+     * 
+     * @param requestedBytes Number of bytes requested for allocation
+     * @return true if allocation should be rejected
+     */
+    public boolean shouldRejectAllocation(long requestedBytes) {
+        PressureLevel pressure = currentPressure.get();
+        
+        // Always reject if critical
+        if (pressure == PressureLevel.CRITICAL) {
+            return true;
+        }
+        
+        // Check if allocation would push us over threshold
+        long currentUsage = directMemoryUsed.get();
+        long afterAllocation = currentUsage + requestedBytes;
+        double futureRatio = (double) afterAllocation / maxDirectMemory;
+        
+        return switch (pressure) {
+            case HIGH -> futureRatio > criticalThreshold;
+            case MODERATE -> futureRatio > highThreshold;
+            case LOW -> false;
+            case CRITICAL -> true; // Already handled above
+        };
+    }
+    
+    /**
+     * Records an allocation event.
+     * 
+     * @param size Size of the allocation
+     */
+    public void recordAllocation(long size) {
+        directMemoryUsed.addAndGet(size);
+        allocationCount.incrementAndGet();
+        updatePressureLevel();
+    }
+    
+    /**
+     * Records a deallocation event.
+     * 
+     * @param size Size of the deallocation
+     */
+    public void recordDeallocation(long size) {
+        directMemoryUsed.addAndGet(-size);
+        deallocationCount.incrementAndGet();
+        updatePressureLevel();
+    }
+    
+    /**
+     * Records a failed allocation event.
+     * 
+     * @param size Size of the failed allocation
+     * @param reason Reason for failure
+     */
+    public void recordFailedAllocation(long size, String reason) {
+        failedAllocationCount.incrementAndGet();
+        // Could log detailed failure information here
+    }
+    
+    /**
+     * Gets the current memory pressure as a ratio (0.0 to 1.0).
+     * 
+     * @return Current memory pressure ratio
+     */
+    public double getCurrentPressure() {
+        return (double) directMemoryUsed.get() / maxDirectMemory;
+    }
+    
+    /**
+     * Gets the current pressure level enum.
+     * 
+     * @return Current PressureLevel
+     */
+    public PressureLevel getCurrentPressureLevel() {
+        return currentPressure.get();
+    }
+    
+    /**
+     * Gets current memory statistics.
+     * 
+     * @return MemoryStats with current usage information
+     */
+    public MemoryStats getStats() {
+        return new MemoryStats(
+            directMemoryUsed.get(),
+            maxDirectMemory,
+            getCurrentPressure(),
+            currentPressure.get(),
+            allocationCount.get(),
+            deallocationCount.get(),
+            failedAllocationCount.get()
+        );
+    }
+    
+    /**
+     * Triggers early refresh if memory pressure is high.
+     * 
+     * @return true if early refresh should be triggered
+     */
+    public boolean shouldTriggerEarlyRefresh() {
+        PressureLevel pressure = currentPressure.get();
+        return pressure == PressureLevel.HIGH || pressure == PressureLevel.CRITICAL;
+    }
+    
+    /**
+     * Gets recommended writer limit based on current memory pressure.
+     * 
+     * @param baseLimit Base number of writers without pressure
+     * @return Adjusted writer limit
+     */
+    public int getRecommendedWriterLimit(int baseLimit) {
+        return switch (currentPressure.get()) {
+            case LOW -> baseLimit;
+            case MODERATE -> (int) (baseLimit * 0.8);
+            case HIGH -> (int) (baseLimit * 0.5);
+            case CRITICAL -> 1; // Minimal writers only
+        };
+    }
+    
+    private void startMonitoring() {
+        scheduler.scheduleAtFixedRate(this::updatePressureLevel, 1, 1, TimeUnit.SECONDS);
+    }
+    
+    private void updatePressureLevel() {
+        double ratio = getCurrentPressure();
+        PressureLevel newLevel = PressureLevel.fromRatio(ratio);
+        PressureLevel oldLevel = currentPressure.getAndSet(newLevel);
+        
+        // Log pressure level changes
+        if (newLevel != oldLevel) {
+            System.out.println(String.format(
+                "[MEMORY] Pressure level changed: %s -> %s (%.2f%%)", 
+                oldLevel, newLevel, ratio * 100));
+        }
+    }
+    
+    private long getMaxDirectMemory() {
+        // Use heap max / 4 as a reasonable default for direct memory
+        long heapMax = ManagementFactory.getMemoryMXBean().getHeapMemoryUsage().getMax();
+        return heapMax > 0 ? heapMax / 4 : 1024 * 1024 * 1024; // 1GB fallback
+    }
+    
+    /**
+     * Closes the monitor and stops background tasks.
+     */
+    public void close() {
+        scheduler.shutdown();
+        try {
+            if (!scheduler.awaitTermination(5, TimeUnit.SECONDS)) {
+                scheduler.shutdownNow();
+            }
+        } catch (InterruptedException e) {
+            scheduler.shutdownNow();
+            Thread.currentThread().interrupt();
+        }
+    }
+    
+    /**
+     * Memory statistics for monitoring.
+     */
+    public static class MemoryStats {
+        private final long usedBytes;
+        private final long maxBytes;
+        private final double pressureRatio;
+        private final PressureLevel pressureLevel;
+        private final long allocationCount;
+        private final long deallocationCount;
+        private final long failedAllocationCount;
+        
+        public MemoryStats(long usedBytes, long maxBytes, double pressureRatio,
+                          PressureLevel pressureLevel, long allocationCount,
+                          long deallocationCount, long failedAllocationCount) {
+            this.usedBytes = usedBytes;
+            this.maxBytes = maxBytes;
+            this.pressureRatio = pressureRatio;
+            this.pressureLevel = pressureLevel;
+            this.allocationCount = allocationCount;
+            this.deallocationCount = deallocationCount;
+            this.failedAllocationCount = failedAllocationCount;
+        }
+        
+        public long getUsedBytes() { return usedBytes; }
+        public long getMaxBytes() { return maxBytes; }
+        public double getPressureRatio() { return pressureRatio; }
+        public PressureLevel getPressureLevel() { return pressureLevel; }
+        public long getAllocationCount() { return allocationCount; }
+        public long getDeallocationCount() { return deallocationCount; }
+        public long getFailedAllocationCount() { return failedAllocationCount; }
+        public long getAvailableBytes() { return maxBytes - usedBytes; }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdGenerator.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdGenerator.java
new file mode 100644
index 0000000000000..8735efc2b21dc
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdGenerator.java
@@ -0,0 +1,81 @@
+package com.parquet.parquetdataformat.rowid;
+
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * Atomic, monotonic row ID generator as specified in the Project Mustang design.
+ * Ensures that each parquet file has sequential row IDs starting from 0,
+ * maintaining a 1:1 mapping between docs indexed in Lucene and parquet rows.
+ */
+public class RowIdGenerator {
+    
+    private final AtomicLong globalCounter;
+    private final String generatorId;
+    
+    public RowIdGenerator(String generatorId) {
+        this.generatorId = generatorId;
+        this.globalCounter = new AtomicLong(0);
+    }
+    
+    /**
+     * Generates the next monotonic row ID.
+     * Thread-safe and atomic operation.
+     * 
+     * @return Next sequential row ID
+     */
+    public long nextRowId() {
+        return globalCounter.getAndIncrement();
+    }
+    
+    /**
+     * Gets the current counter value without incrementing.
+     * Useful for determining the number of rows generated so far.
+     * 
+     * @return Current counter value
+     */
+    public long getCurrentCount() {
+        return globalCounter.get();
+    }
+    
+    /**
+     * Resets the counter to zero.
+     * Should only be used during testing or system reinitialization.
+     */
+    public void reset() {
+        globalCounter.set(0);
+    }
+    
+    /**
+     * Gets the generator ID for tracking purposes.
+     * 
+     * @return Generator identifier
+     */
+    public String getGeneratorId() {
+        return generatorId;
+    }
+    
+    /**
+     * Gets generation statistics.
+     * 
+     * @return GenerationStats with current state
+     */
+    public GenerationStats getStats() {
+        return new GenerationStats(generatorId, globalCounter.get());
+    }
+    
+    /**
+     * Statistics for row ID generation.
+     */
+    public static class GenerationStats {
+        private final String generatorId;
+        private final long totalGenerated;
+        
+        public GenerationStats(String generatorId, long totalGenerated) {
+            this.generatorId = generatorId;
+            this.totalGenerated = totalGenerated;
+        }
+        
+        public String getGeneratorId() { return generatorId; }
+        public long getTotalGenerated() { return totalGenerated; }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdTracker.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdTracker.java
new file mode 100644
index 0000000000000..418c96efa07ce
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/rowid/RowIdTracker.java
@@ -0,0 +1,204 @@
+package com.parquet.parquetdataformat.rowid;
+
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.ConcurrentMap;
+import java.util.concurrent.atomic.AtomicLong;
+
+/**
+ * Tracks row ID ranges per parquet file for Lucene segment mapping.
+ * Maintains the 1:1 mapping between docs indexed in Lucene and parquet rows
+ * as specified in the Project Mustang design.
+ */
+public class RowIdTracker {
+    
+    private final ConcurrentMap<String, RowIdRange> fileRanges;
+    private final AtomicLong totalRowsTracked;
+    
+    public RowIdTracker() {
+        this.fileRanges = new ConcurrentHashMap<>();
+        this.totalRowsTracked = new AtomicLong(0);
+    }
+    
+    /**
+     * Starts tracking a new row ID range for a parquet file.
+     * 
+     * @param fileName Name of the parquet file
+     * @param startRowId Starting row ID for this file
+     * @return RowIdRange tracker for this file
+     */
+    public RowIdRange startTracking(String fileName, long startRowId) {
+        RowIdRange range = new RowIdRange(fileName, startRowId);
+        fileRanges.put(fileName, range);
+        return range;
+    }
+    
+    /**
+     * Completes tracking for a parquet file by setting the end row ID.
+     * 
+     * @param fileName Name of the parquet file
+     * @param endRowId Final row ID for this file (exclusive)
+     * @return true if tracking was successfully completed
+     */
+    public boolean completeTracking(String fileName, long endRowId) {
+        RowIdRange range = fileRanges.get(fileName);
+        if (range != null) {
+            range.setEndRowId(endRowId);
+            long rowCount = endRowId - range.getStartRowId();
+            totalRowsTracked.addAndGet(rowCount);
+            return true;
+        }
+        return false;
+    }
+    
+    /**
+     * Gets the row ID range for a specific parquet file.
+     * 
+     * @param fileName Name of the parquet file
+     * @return RowIdRange for the file, or null if not found
+     */
+    public RowIdRange getRangeForFile(String fileName) {
+        return fileRanges.get(fileName);
+    }
+    
+    /**
+     * Finds which parquet file contains the given row ID.
+     * 
+     * @param rowId Row ID to search for
+     * @return File name containing the row ID, or null if not found
+     */
+    public String findFileForRowId(long rowId) {
+        for (RowIdRange range : fileRanges.values()) {
+            if (range.containsRowId(rowId)) {
+                return range.getFileName();
+            }
+        }
+        return null;
+    }
+    
+    /**
+     * Gets all tracked file ranges.
+     * 
+     * @return ConcurrentMap of fileName -> RowIdRange
+     */
+    public ConcurrentMap<String, RowIdRange> getAllRanges() {
+        return new ConcurrentHashMap<>(fileRanges);
+    }
+    
+    /**
+     * Gets tracking statistics.
+     * 
+     * @return TrackingStats with current state
+     */
+    public TrackingStats getStats() {
+        return new TrackingStats(
+            fileRanges.size(),
+            totalRowsTracked.get(),
+            fileRanges.values().stream().mapToLong(RowIdRange::getRowCount).sum()
+        );
+    }
+    
+    /**
+     * Removes tracking for a parquet file.
+     * Used during cleanup or file deletion.
+     * 
+     * @param fileName Name of the parquet file
+     * @return true if tracking was removed
+     */
+    public boolean removeTracking(String fileName) {
+        RowIdRange removed = fileRanges.remove(fileName);
+        if (removed != null) {
+            totalRowsTracked.addAndGet(-removed.getRowCount());
+            return true;
+        }
+        return false;
+    }
+    
+    /**
+     * Clears all tracking data.
+     * Should only be used during testing or system reset.
+     */
+    public void clear() {
+        fileRanges.clear();
+        totalRowsTracked.set(0);
+    }
+    
+    /**
+     * Represents a row ID range for a specific parquet file.
+     */
+    public static class RowIdRange {
+        private final String fileName;
+        private final long startRowId;
+        private volatile long endRowId;
+        private volatile boolean completed;
+        
+        public RowIdRange(String fileName, long startRowId) {
+            this.fileName = fileName;
+            this.startRowId = startRowId;
+            this.endRowId = startRowId;
+            this.completed = false;
+        }
+        
+        /**
+         * Sets the end row ID and marks the range as completed.
+         * 
+         * @param endRowId Final row ID (exclusive)
+         */
+        public void setEndRowId(long endRowId) {
+            this.endRowId = endRowId;
+            this.completed = true;
+        }
+        
+        /**
+         * Checks if the given row ID falls within this range.
+         * 
+         * @param rowId Row ID to check
+         * @return true if row ID is within range
+         */
+        public boolean containsRowId(long rowId) {
+            return completed && rowId >= startRowId && rowId < endRowId;
+        }
+        
+        /**
+         * Gets the number of rows in this range.
+         * 
+         * @return Row count, or 0 if not completed
+         */
+        public long getRowCount() {
+            return completed ? endRowId - startRowId : 0;
+        }
+        
+        // Getters
+        public String getFileName() { return fileName; }
+        public long getStartRowId() { return startRowId; }
+        public long getEndRowId() { return endRowId; }
+        public boolean isCompleted() { return completed; }
+        
+        @Override
+        public String toString() {
+            return String.format("RowIdRange{file='%s', start=%d, end=%d, completed=%s}", 
+                fileName, startRowId, endRowId, completed);
+        }
+    }
+    
+    /**
+     * Statistics for row ID tracking.
+     */
+    public static class TrackingStats {
+        private final int trackedFiles;
+        private final long totalRowsTracked;
+        private final long activeRows;
+        
+        public TrackingStats(int trackedFiles, long totalRowsTracked, long activeRows) {
+            this.trackedFiles = trackedFiles;
+            this.totalRowsTracked = totalRowsTracked;
+            this.activeRows = activeRows;
+        }
+        
+        public int getTrackedFiles() { return trackedFiles; }
+        public long getTotalRowsTracked() { return totalRowsTracked; }
+        public long getActiveRows() { return activeRows; }
+        public double getAverageRowsPerFile() { 
+            return trackedFiles > 0 ? (double) activeRows / trackedFiles : 0.0;
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java
new file mode 100644
index 0000000000000..7d196c2fdfea7
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/ManagedVSR.java
@@ -0,0 +1,259 @@
+package com.parquet.parquetdataformat.vsr;
+
+import com.parquet.parquetdataformat.bridge.ArrowExport;
+import org.apache.arrow.memory.ArrowBuf;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.BigIntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.c.ArrowArray;
+import org.apache.arrow.c.ArrowSchema;
+import org.apache.arrow.c.Data;
+
+import java.util.concurrent.atomic.AtomicReference;
+import java.util.concurrent.locks.ReadWriteLock;
+import java.util.concurrent.locks.ReentrantReadWriteLock;
+
+import static org.apache.arrow.vector.BitVectorHelper.byteIndex;
+
+/**
+ * Managed wrapper around VectorSchemaRoot that handles state transitions
+ * and provides thread-safe access for the ACTIVE/FROZEN lifecycle.
+ */
+public class ManagedVSR implements AutoCloseable {
+
+    private final String id;
+    private final VectorSchemaRoot vsr;
+    private final BufferAllocator allocator;
+    private final AtomicReference<VSRState> state;
+    private final ReadWriteLock lock;
+    private final long createdTime;
+
+
+    public ManagedVSR(String id, VectorSchemaRoot vsr, BufferAllocator allocator) {
+        this.id = id;
+        this.vsr = vsr;
+        this.allocator = allocator;
+        this.state = new AtomicReference<>(VSRState.ACTIVE);
+        this.lock = new ReentrantReadWriteLock();
+        this.createdTime = System.currentTimeMillis();
+    }
+
+    /**
+     * Gets the underlying VectorSchemaRoot.
+     * Should only be used when holding appropriate locks.
+     *
+     * @return VectorSchemaRoot instance
+     */
+    public VectorSchemaRoot getVSR() {
+        return vsr;
+    }
+
+    /**
+     * Gets the current row count in this VSR.
+     * Thread-safe read operation.
+     *
+     * @return Number of rows currently in the VSR
+     */
+    public int getRowCount() {
+        lock.readLock().lock();
+        try {
+            return vsr.getRowCount();
+        } finally {
+            lock.readLock().unlock();
+        }
+    }
+
+    /**
+     * Sets the row count for this VSR.
+     * Only allowed when VSR is in ACTIVE state.
+     *
+     * @param rowCount New row count
+     * @throws IllegalStateException if VSR is not active or is immutable
+     */
+    public void setRowCount(int rowCount) {
+        lock.writeLock().lock();
+        try {
+            if (state.get() != VSRState.ACTIVE) {
+                throw new IllegalStateException("Cannot modify VSR in state: " + state.get());
+            }
+            vsr.setRowCount(rowCount);
+        } finally {
+            lock.writeLock().unlock();
+        }
+    }
+
+    /**
+     * Gets a field vector by name.
+     * Thread-safe read operation.
+     *
+     * @param fieldName Name of the field
+     * @return FieldVector for the field, or null if not found
+     */
+    public FieldVector getVector(String fieldName) {
+        lock.readLock().lock();
+        try {
+            return vsr.getVector(fieldName);
+        } finally {
+            lock.readLock().unlock();
+        }
+    }
+
+    /**
+     * Changes the state of this VSR.
+     * Handles state transition logic and immutability.
+     *
+     * @param newState New state to transition to
+     */
+    public void setState(VSRState newState) {
+        VSRState oldState = state.getAndSet(newState);
+
+        System.out.println(String.format(
+            "[VSR] State transition: %s -> %s for VSR %s",
+            oldState, newState, id));
+    }
+
+    /**
+     * Gets the current state of this VSR.
+     *
+     * @return Current VSRState
+     */
+    public VSRState getState() {
+        return state.get();
+    }
+
+    /**
+     * Exports this VSR to Arrow C Data Interface for Rust handoff.
+     * Only allowed when VSR is FROZEN or FLUSHING.
+     *
+     * @return ArrowExport containing ArrowArray and ArrowSchema
+     * @throws IllegalStateException if VSR is not in correct state
+     */
+    public ArrowExport exportToArrow() {
+        VSRState currentState = state.get();
+        if (currentState != VSRState.FROZEN &&
+            currentState != VSRState.FLUSHING) {
+            throw new IllegalStateException("Cannot export VSR in state: " + currentState);
+        }
+
+        lock.readLock().lock();
+        try {
+            ArrowArray arrowArray = ArrowArray.allocateNew(allocator);
+            ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator);
+
+            // Export the VectorSchemaRoot to C Data Interface
+            Data.exportVectorSchemaRoot(allocator, vsr, null, arrowArray, arrowSchema);
+
+            return new ArrowExport(arrowArray, arrowSchema);
+        } finally {
+            lock.readLock().unlock();
+        }
+    }
+
+    public ArrowExport exportSchema() {
+        lock.readLock().lock();
+        try {
+            ArrowSchema arrowSchema = ArrowSchema.allocateNew(allocator);
+
+            // Export the VectorSchemaRoot to C Data Interface
+            Data.exportSchema(allocator, vsr.getSchema(), null, arrowSchema);
+
+            return new ArrowExport(null, arrowSchema);
+        } finally {
+            lock.readLock().unlock();
+        }
+    }
+
+    /**
+     * Checks if this VSR is immutable (frozen).
+     *
+     * @return true if VSR cannot be modified
+     */
+    public boolean isImmutable() {
+        VSRState currentState = state.get();
+        return currentState != VSRState.ACTIVE;
+    }
+
+
+    /**
+     * Gets the VSR ID.
+     *
+     * @return Unique identifier for this VSR
+     */
+    public String getId() {
+        return id;
+    }
+
+    /**
+     * Gets the creation timestamp.
+     *
+     * @return Creation time in milliseconds
+     */
+    public long getCreatedTime() {
+        return createdTime;
+    }
+
+    /**
+     * Gets the associated BufferAllocator.
+     *
+     * @return BufferAllocator used by this VSR
+     */
+    public BufferAllocator getAllocator() {
+        return allocator;
+    }
+
+    /**
+     * Closes this VSR and releases all resources.
+     */
+    @Override
+    public void close() {
+        lock.writeLock().lock();
+        try {
+            if (state.get() != VSRState.CLOSED) {
+                state.set(VSRState.CLOSED);
+                vsr.close();
+                allocator.close();
+            }
+        } finally {
+            lock.writeLock().unlock();
+        }
+    }
+
+
+    @Override
+    public String toString() {
+        return String.format("ManagedVSR{id='%s', state=%s, rows=%d, immutable=%s}",
+            id, state.get(), getRowCount(), isImmutable());
+    }
+
+    public static void main(String[] args) {
+        RootAllocator allocator = new RootAllocator();
+        BigIntVector vector = new BigIntVector("vector", allocator);
+        vector.allocateNew(10);
+        vector.set(0, 100);  // Set position 0
+//        vector.setNull(1);
+        vector.set(2, 300);  // Set position 2
+// Position 1 is not set!
+        vector.setValueCount(3);  // Claims vector has 3 elements
+
+// Position 1 now contains undefined data
+//        long value = vector.get(1);  // Could be any value!
+        System.out.println(readBit(vector.getValidityBuffer(), 0));
+        System.out.println(readBit(vector.getValidityBuffer(), 1));
+        System.out.println(readBit(vector.getValidityBuffer(), 2));
+        System.out.println(readBit(vector.getValidityBuffer(), 3));
+    }
+
+    public static byte readBit(ArrowBuf validityBuffer, long index) {
+        // it can be observed that some logic is duplicate of the logic in setValidityBit.
+        // this is because JIT cannot always remove the if branch in setValidityBit,
+        // so we give a dedicated implementation for setting bits.
+        final long byteIndex = byteIndex(index);
+
+        // the byte is promoted to an int, because according to Java specification,
+        // bytes will be promoted to ints automatically, upon expression evaluation.
+        // by promoting it manually, we avoid the unnecessary conversions.
+        return validityBuffer.getByte(byteIndex);
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java
new file mode 100644
index 0000000000000..d7dfbde2948ba
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRManager.java
@@ -0,0 +1,268 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package com.parquet.parquetdataformat.vsr;
+
+import com.parquet.parquetdataformat.bridge.ArrowExport;
+import com.parquet.parquetdataformat.bridge.RustBridge;
+import com.parquet.parquetdataformat.memory.MemoryPressureMonitor;
+import com.parquet.parquetdataformat.writer.ParquetDocumentInput;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.opensearch.index.engine.exec.FlushIn;
+import org.opensearch.index.engine.exec.WriteResult;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Manages VectorSchemaRoot lifecycle with integrated memory management and native call wrappers.
+ * Provides a high-level interface for Parquet document operations using managed VSR abstractions.
+ *
+ * <p>This class orchestrates the following components:
+ * <ul>
+ *   <li>{@link ManagedVSR} - Thread-safe VSR with state management</li>
+ *   <li>{@link VSRPool} - Resource pooling for VSRs</li>
+ *   <li>{@link RustBridge} - Direct JNI calls to Rust backend</li>
+ * </ul>
+ */
+public class VSRManager {
+    private ManagedVSR managedVSR;
+    private Map<String, FieldVector> fieldVectorMap;
+    private final Schema schema;
+    private final String fileName;
+    private final VSRPool vsrPool;
+
+    public VSRManager(String fileName, Schema schema) {
+        this.fileName = fileName;
+        this.schema = schema;
+
+        // Create memory monitor and buffer pool
+        MemoryPressureMonitor memoryMonitor = new MemoryPressureMonitor(org.opensearch.common.settings.Settings.EMPTY);
+
+        // Create VSR pool
+        this.vsrPool = new VSRPool("pool-" + fileName, schema, memoryMonitor);
+
+
+        // Get active VSR from pool
+        this.managedVSR = vsrPool.getActiveVSR();
+        initializeFieldVectorMap();
+        // Initialize writer lazily to avoid crashes
+        initializeWriter();
+    }
+
+    private void initializeWriter() {
+        try {
+            // Export schema through managed VSR
+            try (ArrowExport export = managedVSR.exportSchema()) {
+                long schemaAddress = export.getSchemaAddress();
+
+                // Direct native call - RustBridge handles all validation
+                RustBridge.createWriter(fileName, schemaAddress);
+            }
+        } catch (Exception e) {
+            throw new RuntimeException("Failed to initialize Parquet writer: " + e.getMessage(), e);
+        }
+    }
+
+    public WriteResult addToManagedVSR(ParquetDocumentInput document) throws IOException {
+        // Ensure we have an active VSR (handle case where getActiveVSR() returns null)
+        if (managedVSR == null) {
+            managedVSR = vsrPool.getActiveVSR();
+            if (managedVSR == null) {
+                throw new IOException("No active VSR available");
+            }
+            reinitializeFieldVectorMap();
+        }
+
+        // Ensure VSR is in ACTIVE state for modifications
+        if (managedVSR.getState() != VSRState.ACTIVE) {
+            throw new IOException("Cannot add document - VSR is not active: " + managedVSR.getState());
+        }
+
+        System.out.println("[JAVA] addToManagedVSR called, current row count: " + managedVSR.getRowCount());
+
+        try {
+            // Since ParquetDocumentInput now works directly with ManagedVSR,
+            // fields should already be populated in vectors via addField() calls.
+            // We just need to finalize the document by calling addToWriter()
+            // which will increment the row count.
+            WriteResult result = document.addToWriter();
+
+            System.out.println("[JAVA] After adding document, row count: " + managedVSR.getRowCount());
+
+            // Check for VSR rotation AFTER successful document processing
+            handleVSRRotationAfterAddToManagedVSR();
+
+            return result;
+        } catch (Exception e) {
+            System.out.println("[JAVA] ERROR in addToManagedVSR: " + e.getMessage());
+            throw new IOException("Failed to add document: " + e.getMessage(), e);
+        }
+    }
+
+    public String flush(FlushIn flushIn) throws IOException {
+        System.out.println("[JAVA] flush called, row count: " + managedVSR.getRowCount());
+        try {
+            // Only flush if we have data
+            if (managedVSR.getRowCount() == 0) {
+                System.out.println("[JAVA] No data to flush, returning null");
+                return null;
+            }
+
+            // Transition VSR to FROZEN state before flushing
+            managedVSR.setState(VSRState.FROZEN);
+            System.out.println("[JAVA] Flushing " + managedVSR.getRowCount() + " rows");
+
+            // Transition to FLUSHING state
+            managedVSR.setState(VSRState.FLUSHING);
+
+            // Direct native call - write the managed VSR data
+            try (ArrowExport export = managedVSR.exportToArrow()) {
+                RustBridge.write(fileName, export.getArrayAddress(), export.getSchemaAddress());
+                RustBridge.closeWriter(fileName);
+            }
+            System.out.println("[JAVA] Successfully flushed data");
+
+            return fileName;
+        } catch (Exception e) {
+            System.out.println("[JAVA] ERROR in flush: " + e.getMessage());
+            throw new IOException("Failed to flush data: " + e.getMessage(), e);
+        }
+    }
+
+    public void close() {
+        try {
+            // Direct native calls
+            try {
+                RustBridge.closeWriter(fileName);
+                RustBridge.flushToDisk(fileName);
+            } catch (IOException e) {
+                System.err.println("Warning: Failed to close/flush writer: " + e.getMessage());
+            }
+
+            // Complete VSR processing and cleanup
+            vsrPool.completeVSR(managedVSR);
+            managedVSR = null;
+
+        } catch (Exception e) {
+            System.err.println("Error during close: " + e.getMessage());
+        }
+    }
+
+    private boolean checkFlushConditions() {
+        // TODO: Implement memory pressure-based flush conditions
+        return false;
+    }
+
+    /**
+     * Handles VSR rotation after successful document addition.
+     * Checks if rotation is needed and immediately processes any frozen VSR.
+     */
+    private void handleVSRRotationAfterAddToManagedVSR() throws IOException {
+        try {
+            // Check if rotation is needed and perform it if safe
+            boolean rotated = vsrPool.maybeRotateActiveVSR();
+
+            if (rotated) {
+                System.out.println("[JAVA] VSR rotation occurred after document addition");
+
+                // Get the frozen VSR that was just created by rotation
+                ManagedVSR frozenVSR = vsrPool.getFrozenVSR();
+                if (frozenVSR != null) {
+                    System.out.println("[JAVA] Processing frozen VSR: " + frozenVSR.getId() +
+                        " with " + frozenVSR.getRowCount() + " rows");
+
+                    // Write the frozen VSR data immediately
+                    frozenVSR.setState(VSRState.FLUSHING);
+                    try (ArrowExport export = frozenVSR.exportToArrow()) {
+                        RustBridge.write(fileName, export.getArrayAddress(), export.getSchemaAddress());
+                    }
+
+                    System.out.println("[JAVA] Successfully wrote frozen VSR data");
+
+                    // Complete the VSR processing
+                    vsrPool.completeVSR(frozenVSR);
+                } else {
+                    System.err.println("[JAVA] WARNING: Rotation occurred but no frozen VSR found");
+                }
+
+                // Update to new active VSR
+                managedVSR = vsrPool.getActiveVSR();
+                if (managedVSR == null) {
+                    throw new IOException("No active VSR available after rotation");
+                }
+
+                // Reinitialize field vector map with new VSR
+                reinitializeFieldVectorMap();
+
+                System.out.println("[JAVA] VSR rotation completed, new active VSR: " + managedVSR.getId() +
+                    ", row count: " + managedVSR.getRowCount());
+            }
+        } catch (IOException e) {
+            System.err.println("[JAVA] Error during VSR rotation: " + e.getMessage());
+            throw e;
+        }
+    }
+
+    /**
+     * Checks if VSR rotation is needed based on row count and memory pressure.
+     * If rotation occurs, updates the managed VSR reference and reinitializes field vectors.
+     *
+     * @deprecated Use handleVSRRotationAfterAddToManagedVSR() instead for safer rotation after document processing
+     */
+    @Deprecated
+    private void checkAndHandleVSRRotation() throws IOException {
+        // Get active VSR from pool - this will trigger rotation if needed
+        ManagedVSR currentActive = vsrPool.getActiveVSR();
+
+        // Check if we got a different VSR (rotation occurred)
+        if (currentActive != managedVSR) {
+            System.out.println("[JAVA] VSR rotation detected, updating references");
+
+            // Update the managed VSR reference
+            managedVSR = currentActive;
+
+            // Reinitialize field vector map with new VSR
+            reinitializeFieldVectorMap();
+
+            // Note: Writer initialization is not needed per VSR as it's per file
+            System.out.println("[JAVA] VSR rotation completed, new row count: " + managedVSR.getRowCount());
+        }
+    }
+
+    /**
+     * Reinitializes the field vector map with the current managed VSR.
+     * Called after VSR rotation to update vector references.
+     */
+    private void reinitializeFieldVectorMap() {
+        fieldVectorMap.clear();
+        initializeFieldVectorMap();
+    }
+
+    private void initializeFieldVectorMap() {
+        fieldVectorMap = new HashMap<>();
+        for (Field field : schema.getFields()) {
+            String fieldName = field.getName();
+            FieldVector fieldVector = managedVSR.getVector(fieldName);
+            // Vector is already properly typed from ManagedVSR.getVector()
+            fieldVectorMap.put(fieldName, fieldVector);
+        }
+    }
+
+    /**
+     * Gets the current active ManagedVSR for document input creation.
+     *
+     * @return The current managed VSR instance
+     */
+    public ManagedVSR getActiveManagedVSR() {
+        return managedVSR;
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRPool.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRPool.java
new file mode 100644
index 0000000000000..088a990353157
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRPool.java
@@ -0,0 +1,331 @@
+package com.parquet.parquetdataformat.vsr;
+
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Schema;
+import com.parquet.parquetdataformat.memory.ArrowBufferPool;
+import com.parquet.parquetdataformat.memory.MemoryPressureMonitor;
+
+import java.io.IOException;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
+
+/**
+ * Manages VectorSchemaRoot lifecycle with ACTIVE and FROZEN states as specified
+ * in the Project Mustang design. Each ParquetWriter maintains a single ACTIVE VSR
+ * for writing and a single FROZEN VSR for Rust handoff.
+ */
+public class VSRPool {
+
+    private final Schema schema;
+    private final ArrowBufferPool bufferPool;
+    private final MemoryPressureMonitor memoryMonitor;
+    private final String poolId;
+
+    // VSR lifecycle management
+    private final AtomicReference<ManagedVSR> activeVSR;
+    private final AtomicReference<ManagedVSR> frozenVSR;
+    private final ConcurrentHashMap<String, ManagedVSR> allVSRs;
+    private final AtomicInteger vsrCounter;
+
+    // Configuration
+    private final int maxRowsPerVSR;
+
+    public VSRPool(String poolId, Schema schema, MemoryPressureMonitor memoryMonitor) {
+        this.poolId = poolId;
+        this.schema = schema;
+        this.bufferPool = new ArrowBufferPool(org.opensearch.common.settings.Settings.EMPTY, memoryMonitor);
+        this.memoryMonitor = memoryMonitor;
+
+        this.activeVSR = new AtomicReference<>();
+        this.frozenVSR = new AtomicReference<>();
+        this.allVSRs = new ConcurrentHashMap<>();
+        this.vsrCounter = new AtomicInteger(0);
+
+        // Configuration - could be made configurable
+        this.maxRowsPerVSR = 50000; // Max rows before forcing freeze
+
+        // Initialize with first active VSR
+        initializeActiveVSR();
+    }
+
+    /**
+     * Gets the current active VSR for writing.
+     * Simply returns the current active VSR without any rotation logic.
+     *
+     * @return Active ManagedVSR for writing, or null if none exists
+     */
+    public ManagedVSR getActiveVSR() {
+        return activeVSR.get();
+    }
+
+    /**
+     * Checks if VSR rotation is needed and performs it if safe to do so.
+     * Throws IOException if rotation is needed but frozen slot is occupied.
+     *
+     * @return true if rotation occurred, false if no rotation was needed
+     * @throws IOException if rotation is needed but cannot be performed due to occupied frozen slot
+     */
+    public boolean maybeRotateActiveVSR() throws IOException {
+        ManagedVSR current = activeVSR.get();
+
+        // Check if rotation is needed
+        if (current == null || !shouldRotateVSR(current)) {
+            return false; // No rotation needed
+        }
+
+        // CRITICAL: Check if frozen slot is occupied before rotation
+        if (frozenVSR.get() != null) {
+            throw new IOException("Cannot rotate VSR: frozen slot is occupied. " +
+                                "Previous frozen VSR has not been processed. This indicates a " +
+                                "system bottleneck or processing failure.");
+        }
+
+        // Safe to rotate - perform the rotation
+        synchronized (this) {
+            // Double-check conditions under lock
+            current = activeVSR.get();
+            if (current == null || !shouldRotateVSR(current)) {
+                return false; // Conditions changed while acquiring lock
+            }
+
+            // Check frozen slot again under lock
+            if (frozenVSR.get() != null) {
+                throw new IOException("Cannot rotate VSR: frozen slot became occupied during rotation");
+            }
+
+            // Freeze current VSR if it exists and has data
+            if (current != null && current.getRowCount() > 0) {
+                freezeVSR(current);
+            }
+
+            // Create new active VSR
+            ManagedVSR newActive = createNewVSR();
+            activeVSR.set(newActive);
+
+            return true; // Rotation occurred
+        }
+    }
+
+    /**
+     * Freezes the current active VSR and creates a new active one.
+     * The frozen VSR replaces any existing frozen VSR.
+     *
+     * @deprecated Use maybeRotateActiveVSR() instead for safer rotation with checks
+     * @return Newly created active VSR
+     */
+    @Deprecated
+    public ManagedVSR rotateActiveVSR() {
+        synchronized (this) {
+            ManagedVSR current = activeVSR.get();
+
+            // Freeze current VSR if it exists and has data
+            if (current != null && current.getRowCount() > 0) {
+                freezeVSR(current);
+            }
+
+            // Create new active VSR
+            ManagedVSR newActive = createNewVSR();
+            activeVSR.set(newActive);
+
+            return newActive;
+        }
+    }
+
+    /**
+     * Gets the frozen VSR for Rust processing.
+     *
+     * @return Frozen VSR, or null if none available
+     */
+    public ManagedVSR getFrozenVSR() {
+        return frozenVSR.get();
+    }
+
+    /**
+     * Takes the frozen VSR for processing and clears the frozen slot.
+     *
+     * @return Frozen VSR that was taken, or null if none available
+     */
+    public ManagedVSR takeFrozenVSR() {
+        return frozenVSR.getAndSet(null);
+    }
+
+    /**
+     * Marks a VSR as flushing (being processed by Rust).
+     *
+     * @param vsr VSR being processed
+     */
+    public void markFlushing(ManagedVSR vsr) {
+        vsr.setState(VSRState.FLUSHING);
+    }
+
+    /**
+     * Completes VSR processing and cleans up resources.
+     *
+     * @param vsr VSR that has been processed
+     */
+    public void completeVSR(ManagedVSR vsr) {
+        vsr.setState(VSRState.CLOSED);
+        vsr.close();
+        allVSRs.remove(vsr.getId());
+    }
+
+    /**
+     * Forces all VSRs to be frozen for immediate processing.
+     * Used during refresh or shutdown.
+     */
+    public void freezeAll() {
+        ManagedVSR current = activeVSR.getAndSet(null);
+        if (current != null && current.getRowCount() > 0) {
+            freezeVSR(current);
+        }
+    }
+
+    /**
+     * Gets statistics about the VSR pool.
+     *
+     * @return PoolStats with current state
+     */
+    public PoolStats getStats() {
+        ManagedVSR active = activeVSR.get();
+        ManagedVSR frozen = frozenVSR.get();
+        int frozenCount = frozen != null ? 1 : 0;
+
+        return new PoolStats(
+            poolId,
+            active != null ? active.getRowCount() : 0,
+            frozenCount,
+            allVSRs.size(),
+            allVSRs.values().stream().mapToLong(ManagedVSR::getRowCount).sum()
+        );
+    }
+
+    /**
+     * Checks if backpressure should be applied.
+     *
+     * @return true if frozen VSR slot is occupied or memory pressure is critical
+     */
+    public boolean shouldApplyBackpressure() {
+        return frozenVSR.get() != null ||
+               memoryMonitor.getCurrentPressureLevel() == MemoryPressureMonitor.PressureLevel.CRITICAL;
+    }
+
+    /**
+     * Closes the pool and cleans up all resources.
+     */
+    public void close() {
+        // Close active VSR
+        ManagedVSR active = activeVSR.getAndSet(null);
+        if (active != null) {
+            active.close();
+        }
+
+        // Close frozen VSR
+        ManagedVSR frozen = frozenVSR.getAndSet(null);
+        if (frozen != null) {
+            frozen.close();
+        }
+
+        // Close any remaining VSRs
+        allVSRs.values().forEach(ManagedVSR::close);
+        allVSRs.clear();
+    }
+
+    private void initializeActiveVSR() {
+        ManagedVSR initial = createNewVSR();
+        activeVSR.set(initial);
+    }
+
+    private ManagedVSR createNewVSR() {
+        String vsrId = poolId + "-vsr-" + vsrCounter.incrementAndGet();
+        BufferAllocator allocator = null;
+        VectorSchemaRoot vsr = null;
+
+        try {
+            allocator = bufferPool.createAllocator(vsrId);
+            vsr = VectorSchemaRoot.create(schema, allocator);
+
+            ManagedVSR managedVSR = new ManagedVSR(vsrId, vsr, allocator);
+            allVSRs.put(vsrId, managedVSR);
+
+            // Success: ManagedVSR now owns the resources
+            return managedVSR;
+        } catch (Exception e) {
+            // Clean up resources on failure since ManagedVSR couldn't take ownership
+            if (vsr != null) {
+                try {
+                    vsr.close();
+                } catch (Exception closeEx) {
+                    e.addSuppressed(closeEx);
+                }
+            }
+            if (allocator != null) {
+                try {
+                    allocator.close();
+                } catch (Exception closeEx) {
+                    e.addSuppressed(closeEx);
+                }
+            }
+            throw new RuntimeException("Failed to create new VSR", e);
+        }
+    }
+
+    private void freezeVSR(ManagedVSR vsr) {
+        vsr.setState(VSRState.FROZEN);
+
+        // CRITICAL FIX: Check if frozen slot is already occupied
+        ManagedVSR previousFrozen = frozenVSR.get();
+        if (previousFrozen != null) {
+            // NEVER blindly overwrite a frozen VSR - this would cause data loss
+            System.err.println("[VSRPool] ERROR: Attempting to freeze VSR when frozen slot is occupied! " +
+                             "Previous VSR: " + previousFrozen.getId() + " (" + previousFrozen.getRowCount() + " rows), " +
+                             "New VSR: " + vsr.getId() + " (" + vsr.getRowCount() + " rows). " +
+                             "This indicates a logic error - frozen VSR should be consumed before replacement.");
+
+            // Return VSR to ACTIVE state to prevent state corruption
+            vsr.setState(VSRState.ACTIVE);
+            throw new IllegalStateException("Cannot freeze VSR: frozen slot is occupied by unprocessed VSR " +
+                                          previousFrozen.getId() + ". This would cause data loss.");
+        }
+
+        // Safe to set frozen VSR since slot is empty
+        boolean success = frozenVSR.compareAndSet(null, vsr);
+        if (!success) {
+            // Race condition: another thread set frozen VSR between our check and set
+            vsr.setState(VSRState.ACTIVE);
+            throw new IllegalStateException("Race condition detected: frozen slot was occupied during freeze operation");
+        }
+    }
+
+    private boolean shouldRotateVSR(ManagedVSR vsr) {
+        return vsr.getRowCount() >= maxRowsPerVSR ||
+               memoryMonitor.shouldTriggerEarlyRefresh();
+    }
+
+    /**
+     * Statistics for the VSR pool.
+     */
+    public static class PoolStats {
+        private final String poolId;
+        private final long activeRowCount;
+        private final int frozenVSRCount;
+        private final int totalVSRCount;
+        private final long totalRowCount;
+
+        public PoolStats(String poolId, long activeRowCount, int frozenVSRCount,
+                        int totalVSRCount, long totalRowCount) {
+            this.poolId = poolId;
+            this.activeRowCount = activeRowCount;
+            this.frozenVSRCount = frozenVSRCount;
+            this.totalVSRCount = totalVSRCount;
+            this.totalRowCount = totalRowCount;
+        }
+
+        public String getPoolId() { return poolId; }
+        public long getActiveRowCount() { return activeRowCount; }
+        public int getFrozenVSRCount() { return frozenVSRCount; }
+        public int getTotalVSRCount() { return totalVSRCount; }
+        public long getTotalRowCount() { return totalRowCount; }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRState.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRState.java
new file mode 100644
index 0000000000000..cd55f30ca24cc
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/vsr/VSRState.java
@@ -0,0 +1,28 @@
+package com.parquet.parquetdataformat.vsr;
+
+/**
+ * Represents the lifecycle states of a VectorSchemaRoot in the Project Mustang
+ * Parquet Writer Plugin architecture.
+ */
+public enum VSRState {
+    /**
+     * Currently accepting writes - the VSR is active and can be modified.
+     */
+    ACTIVE,
+    
+    /**
+     * Read-only state - VSR is frozen and queued for flush to Rust.
+     * No further modifications are allowed in this state.
+     */
+    FROZEN,
+    
+    /**
+     * Currently being processed by Rust - VSR is in the handoff process.
+     */
+    FLUSHING,
+    
+    /**
+     * Completed and cleaned up - VSR processing is complete and resources freed.
+     */
+    CLOSED
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java
new file mode 100644
index 0000000000000..8db471ee9a77a
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetDocumentInput.java
@@ -0,0 +1,68 @@
+package com.parquet.parquetdataformat.writer;
+
+import com.parquet.parquetdataformat.fields.ArrowFieldRegistry;
+import org.opensearch.index.engine.exec.DocumentInput;
+import org.opensearch.index.engine.exec.WriteResult;
+import org.opensearch.index.mapper.MappedFieldType;
+import com.parquet.parquetdataformat.vsr.ManagedVSR;
+
+import java.io.IOException;
+
+/**
+ * Document input wrapper for Parquet-based document processing.
+ *
+ * <p>This class serves as an adapter between OpenSearch's DocumentInput interface
+ * and the Arrow-based vector representation. It works directly with a {@link ManagedVSR}
+ * to populate field vectors and manage document lifecycle.
+ *
+ * <p>The implementation follows the builder pattern, allowing incremental construction
+ * of documents through field addition before finalizing the document for writing.
+ *
+ * <p>Key responsibilities:
+ * <ul>
+ *   <li>Direct field vector population using OpenSearch's {@link MappedFieldType}</li>
+ *   <li>Document lifecycle management via ManagedVSR</li>
+ *   <li>Integration with the Arrow-based Parquet writer pipeline</li>
+ * </ul>
+ *
+ * <p>This implementation works directly with Arrow field vectors, eliminating the
+ * intermediate ParquetDocument representation for improved performance and memory efficiency.
+ */
+public class ParquetDocumentInput implements DocumentInput<ManagedVSR> {
+    private final ManagedVSR managedVSR;
+
+    public ParquetDocumentInput(ManagedVSR managedVSR) {
+        this.managedVSR = managedVSR;
+    }
+
+    @Override
+    public void addField(MappedFieldType fieldType, Object value) {
+        ArrowFieldRegistry.getParquetField(fieldType.typeName()).createField(fieldType, managedVSR, value);
+    }
+
+    @Override
+    public ManagedVSR getFinalInput() {
+        return managedVSR;
+    }
+
+    @Override
+    public WriteResult addToWriter() throws IOException {
+        // Complete the current document by incrementing row count
+        // This will internally call setValueCount on all field vectors
+        int currentRowCount = managedVSR.getRowCount();
+        managedVSR.setRowCount(currentRowCount + 1);
+
+        // TODO: Return appropriate WriteResult based on operation success
+        return new WriteResult(true, null, 1, 1, 1);
+    }
+
+    @Override
+    public void close() throws Exception {
+        // NOTE: ParquetDocumentInput does NOT own the ManagedVSR lifecycle
+        // The ManagedVSR is owned and managed by VSRManager/VSRPool
+        // VSRManager.close() -> vsrPool.completeVSR(managedVSR) handles cleanup
+        // ParquetDocumentInput only holds a reference for field population
+
+        // No cleanup needed here - VSRManager handles the ManagedVSR lifecycle
+    }
+}
diff --git a/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java
new file mode 100644
index 0000000000000..eec04ef35650a
--- /dev/null
+++ b/modules/parquet-data-format/src/main/java/com/parquet/parquetdataformat/writer/ParquetWriter.java
@@ -0,0 +1,78 @@
+package com.parquet.parquetdataformat.writer;
+
+import com.parquet.parquetdataformat.vsr.VSRManager;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.opensearch.index.engine.exec.FileInfos;
+import org.opensearch.index.engine.exec.FlushIn;
+import org.opensearch.index.engine.exec.WriteResult;
+import org.opensearch.index.engine.exec.Writer;
+import org.opensearch.index.engine.exec.WriterFileSet;
+
+import java.io.IOException;
+import java.nio.file.Path;
+
+import static com.parquet.parquetdataformat.engine.ParquetDataFormat.PARQUET_DATA_FORMAT;
+
+/**
+ * Parquet file writer implementation that integrates with OpenSearch's Writer interface.
+ *
+ * <p>This writer provides a high-level interface for writing Parquet documents to disk
+ * using the underlying VSRManager for Arrow-based data management and native Rust
+ * backend for efficient Parquet file generation.
+ *
+ * <p>Key features:
+ * <ul>
+ *   <li>Arrow schema-based document structure</li>
+ *   <li>Batch-oriented writing with memory management</li>
+ *   <li>Integration with OpenSearch indexing pipeline</li>
+ *   <li>Native Rust backend for high-performance Parquet operations</li>
+ * </ul>
+ *
+ * <p>The writer manages the complete lifecycle from document addition through
+ * flushing and cleanup, delegating the actual Arrow and Parquet operations
+ * to the {@link VSRManager}.
+ */
+public class ParquetWriter implements Writer<ParquetDocumentInput> {
+    private final String file;
+    private final Schema schema;
+    private final VSRManager vsrManager;
+    private final long writerGeneration;
+
+    public ParquetWriter(String file, Schema schema, long writerGeneration) {
+        this.file = file;
+        this.schema = schema;
+        this.vsrManager = new VSRManager(file, schema);
+        this.writerGeneration = writerGeneration;
+    }
+
+    @Override
+    public WriteResult addDoc(ParquetDocumentInput d) throws IOException {
+        return vsrManager.addToManagedVSR(d);
+    }
+
+    @Override
+    public FileInfos flush(FlushIn flushIn) throws IOException {
+        String fileName = vsrManager.flush(flushIn);
+        FileInfos fileInfos = new FileInfos();
+        WriterFileSet writerFileSet = new WriterFileSet(Path.of(fileName).getParent(), writerGeneration);
+        writerFileSet.add(fileName);
+        fileInfos.putWriterFileSet(PARQUET_DATA_FORMAT, writerFileSet);
+        return fileInfos;
+    }
+
+    @Override
+    public void sync() throws IOException {
+
+    }
+
+    @Override
+    public void close() {
+        vsrManager.close();
+    }
+
+    @Override
+    public ParquetDocumentInput newDocumentInput() {
+        // Get a new ManagedVSR from VSRManager for this document input
+        return new ParquetDocumentInput(vsrManager.getActiveManagedVSR());
+    }
+}
diff --git a/modules/parquet-data-format/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec b/modules/parquet-data-format/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec
new file mode 100644
index 0000000000000..7d1e56cc25536
--- /dev/null
+++ b/modules/parquet-data-format/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec
@@ -0,0 +1 @@
+com.parquet.parquetdataformat.engine.read.ParquetDataSourceCodec
diff --git a/modules/parquet-data-format/src/main/rust/Cargo.toml b/modules/parquet-data-format/src/main/rust/Cargo.toml
new file mode 100644
index 0000000000000..21ba3950aa9ac
--- /dev/null
+++ b/modules/parquet-data-format/src/main/rust/Cargo.toml
@@ -0,0 +1,63 @@
+[package]
+name = "rust"
+version = "0.1.0"
+edition = "2024"
+
+[lib]
+name = "parquet_dataformat_jni"
+crate-type = ["cdylib"]
+
+[dependencies]
+
+# DataFusion dependencies
+datafusion = "49.0.0"
+datafusion-substrait = "49.0.0"
+arrow = { version = "54.0.0", features = ["ffi"] }
+
+arrow-array = "54.0.0"
+arrow-schema = "54.0.0"
+arrow-buffer = "54.0.0"
+
+# JNI dependencies
+jni = "0.21"
+
+# Async runtime
+tokio = { version = "1.0", features = ["full"] }
+futures = "0.3"
+futures-util = "0.3"
+
+# Serialization
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+
+# Error handling
+anyhow = "1.0"
+thiserror = "1.0"
+
+# Logging
+log = "0.4"
+
+# Parquet support
+parquet = "54.0.0"
+
+# Object store for file access
+object_store = "0.11"
+url = "2.0"
+
+# Substrait support
+substrait = "0.47"
+prost = "0.13"
+
+# Temporary directory support
+tempfile = "3.0"
+
+#jni = "0.21.1"
+#arrow = { version = "53.0.0", features = ["ffi"] }
+#parquet = "53.0.0"
+lazy_static = "1.4.0"
+dashmap = "7.0.0-rc2"
+chrono = "0.4"
+
+
+[build-dependencies]
+cbindgen = "0.27"
diff --git a/modules/parquet-data-format/src/main/rust/src/context.rs b/modules/parquet-data-format/src/main/rust/src/context.rs
new file mode 100644
index 0000000000000..022912ed84c48
--- /dev/null
+++ b/modules/parquet-data-format/src/main/rust/src/context.rs
@@ -0,0 +1,70 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+use datafusion::prelude::*;
+use datafusion::execution::context::SessionContext;
+use std::collections::HashMap;
+use std::sync::Arc;
+use anyhow::Result;
+
+/// Manages DataFusion session contexts
+pub struct SessionContextManager {
+    contexts: HashMap<*mut SessionContext, Arc<SessionContext>>,
+    next_runtime_id: u64,
+}
+
+impl SessionContextManager {
+    pub fn new() -> Self {
+        Self {
+            contexts: HashMap::new(),
+            next_runtime_id: 1,
+        }
+    }
+
+    pub async fn register_directory(
+        &mut self,
+        table_name: &str,
+        directory_path: &str,
+        options: HashMap<String, String>,
+    ) -> Result<u64> {
+        // Placeholder implementation - would register parquet directory as table
+        log::info!("Registering directory: {} at path: {} with options: {:?}",
+                   table_name, directory_path, options);
+
+        let runtime_id = self.next_runtime_id;
+        self.next_runtime_id += 1;
+        Ok(runtime_id)
+    }
+
+    pub async fn create_session_context(
+        &mut self,
+        config: HashMap<String, String>,
+    ) -> Result<*mut SessionContext> {
+        // Create actual DataFusion session context
+        let mut session_config = SessionConfig::new();
+
+        // Apply configuration options
+        if let Some(batch_size) = config.get("batch_size") {
+            if let Ok(size) = batch_size.parse::<usize>() {
+                session_config = session_config.with_batch_size(size);
+            }
+        }
+
+        let ctx = Arc::new(SessionContext::new_with_config(session_config));
+        let ctx_ptr = Arc::as_ptr(&ctx) as *mut SessionContext;
+
+        self.contexts.insert(ctx_ptr, ctx);
+
+        Ok(ctx_ptr)
+    }
+
+    pub async fn close_session_context(&mut self, ctx_ptr: *mut SessionContext) -> Result<()> {
+        self.contexts.remove(&ctx_ptr);
+        Ok(())
+    }
+
+    pub fn get_context(&self, ctx_ptr: *mut SessionContext) -> Option<&Arc<SessionContext>> {
+        self.contexts.get(&ctx_ptr)
+    }
+}
diff --git a/modules/parquet-data-format/src/main/rust/src/lib.rs b/modules/parquet-data-format/src/main/rust/src/lib.rs
new file mode 100644
index 0000000000000..6ef32c8f5050a
--- /dev/null
+++ b/modules/parquet-data-format/src/main/rust/src/lib.rs
@@ -0,0 +1,249 @@
+use jni::objects::{JClass, JString};
+use jni::sys::{jint, jlong};
+use jni::JNIEnv;
+use dashmap::DashMap;
+use arrow::record_batch::RecordBatch;
+use parquet::arrow::ArrowWriter;
+use std::fs::File;
+use std::sync::{Arc, Mutex};
+use lazy_static::lazy_static;
+use arrow::ffi::{FFI_ArrowSchema, FFI_ArrowArray};
+use std::fs::OpenOptions;
+use std::io::Write;
+use chrono::Utc;
+
+lazy_static! {
+    static ref WRITER_MANAGER: DashMap<String, Arc<Mutex<ArrowWriter<File>>>> = DashMap::new();
+    static ref FILE_MANAGER: DashMap<String, File> = DashMap::new();
+}
+
+struct NativeParquetWriter;
+
+impl NativeParquetWriter {
+
+    fn create_writer(filename: String, schema_address: i64) -> Result<(), Box<dyn std::error::Error>> {
+        let log_msg = format!("[RUST] create_writer called for file: {}, schema_address: {}\n", filename, schema_address);
+        println!("{}", log_msg.trim());
+        Self::log_to_file(&log_msg);
+        
+        let arrow_schema = unsafe { FFI_ArrowSchema::from_raw(schema_address as *mut _) };
+        let schema = Arc::new(arrow::datatypes::Schema::try_from(&arrow_schema)?);
+        
+        let schema_msg = format!("[RUST] Schema created with {} fields\n", schema.fields().len());
+        println!("{}", schema_msg.trim());
+        Self::log_to_file(&schema_msg);
+        
+        for (i, field) in schema.fields().iter().enumerate() {
+            let field_msg = format!("[RUST] Field {}: {} ({})\n", i, field.name(), field.data_type());
+            println!("{}", field_msg.trim());
+            Self::log_to_file(&field_msg);
+        }
+        
+        let file = File::create(&filename)?;
+        let file_clone = file.try_clone()?;
+        FILE_MANAGER.insert(filename.clone(), file_clone);
+        let writer = ArrowWriter::try_new(file, schema, None)?;
+        WRITER_MANAGER.insert(filename, Arc::new(Mutex::new(writer)));
+        Ok(())
+    }
+
+    fn write_data(filename: String, array_address: i64, schema_address: i64) -> Result<(), Box<dyn std::error::Error>> {
+        let log_msg = format!("[RUST] write_data called for file: {}, array_address: {}, schema_address: {}\n", filename, array_address, schema_address);
+        println!("{}", log_msg.trim());
+        Self::log_to_file(&log_msg);
+        
+        unsafe {
+            let arrow_schema = FFI_ArrowSchema::from_raw(schema_address as *mut _);
+            let arrow_array = FFI_ArrowArray::from_raw(array_address as *mut _);
+            
+            match arrow::ffi::from_ffi(arrow_array, &arrow_schema) {
+                Ok(array_data) => {
+                    let data_msg = format!("[RUST] Successfully imported array_data, length: {}\n", array_data.len());
+                    println!("{}", data_msg.trim());
+                    Self::log_to_file(&data_msg);
+                    
+                    let array: Arc<dyn arrow::array::Array> = arrow::array::make_array(array_data);
+                    let array_msg = format!("[RUST] Array type: {:?}, length: {}\n", array.data_type(), array.len());
+                    println!("{}", array_msg.trim());
+                    Self::log_to_file(&array_msg);
+                    
+                    if let Some(struct_array) = array.as_any().downcast_ref::<arrow::array::StructArray>() {
+                        let struct_msg = format!("[RUST] Successfully cast to StructArray with {} columns\n", struct_array.num_columns());
+                        println!("{}", struct_msg.trim());
+                        Self::log_to_file(&struct_msg);
+                        
+                        let schema = Arc::new(arrow::datatypes::Schema::new(
+                            struct_array.fields().clone()
+                        ));
+                        
+                        let record_batch = RecordBatch::try_new(
+                            schema.clone(),
+                            struct_array.columns().to_vec(),
+                        )?;
+                        
+                        let batch_msg = format!("[RUST] Created RecordBatch with {} rows and {} columns\n", record_batch.num_rows(), record_batch.num_columns());
+                        println!("{}", batch_msg.trim());
+                        Self::log_to_file(&batch_msg);
+                        
+                        if let Some(writer_arc) = WRITER_MANAGER.get(&filename) {
+                            let write_msg = "[RUST] Writing RecordBatch to file\n";
+                            println!("{}", write_msg.trim());
+                            Self::log_to_file(write_msg);
+                            let mut writer = writer_arc.lock().unwrap();
+                            writer.write(&record_batch)?;
+                            let success_msg = "[RUST] Successfully wrote RecordBatch\n";
+                            println!("{}", success_msg.trim());
+                            Self::log_to_file(success_msg);
+                        } else {
+                            let error_msg = format!("[RUST] ERROR: No writer found for file: {}\n", filename);
+                            println!("{}", error_msg.trim());
+                            Self::log_to_file(&error_msg);
+                        }
+                        Ok(())
+                    } else {
+                        let error_msg = format!("[RUST] ERROR: Array is not a StructArray, type: {:?}\n", array.data_type());
+                        println!("{}", error_msg.trim());
+                        Self::log_to_file(&error_msg);
+                        Err("Expected struct array from VectorSchemaRoot".into())
+                    }
+                }
+                Err(e) => {
+                    let error_msg = format!("[RUST] ERROR: Failed to import from FFI: {:?}\n", e);
+                    println!("{}", error_msg.trim());
+                    Self::log_to_file(&error_msg);
+                    Err(e.into())
+                }
+            }
+        }
+    }
+
+    fn close_writer(filename: String) -> Result<(), Box<dyn std::error::Error>> {
+        let log_msg = format!("[RUST] close_writer called for file: {}\n", filename);
+        println!("{}", log_msg.trim());
+        Self::log_to_file(&log_msg);
+        
+        if let Some((_, writer_arc)) = WRITER_MANAGER.remove(&filename) {
+            match Arc::try_unwrap(writer_arc) {
+                Ok(mutex) => {
+                    let mut writer = mutex.into_inner().unwrap();
+                    match writer.close() {
+                        Ok(_) => {
+                            let success_msg = format!("[RUST] Successfully closed writer for file: {}\n", filename);
+                            println!("{}", success_msg.trim());
+                            Self::log_to_file(&success_msg);
+                            Ok(())
+                        }
+                        Err(e) => {
+                            let error_msg = format!("[RUST] ERROR: Failed to close writer for file: {}\n", filename);
+                            println!("{}", error_msg.trim());
+                            Self::log_to_file(&error_msg);
+                            Err(e.into())
+                        }
+                    }
+                }
+                Err(_) => {
+                    let error_msg = format!("[RUST] ERROR: Writer still in use for file: {}\n", filename);
+                    println!("{}", error_msg.trim());
+                    Self::log_to_file(&error_msg);
+                    Err("Writer still in use".into())
+                }
+            }
+        } else {
+            Ok(())
+        }
+    }
+    
+    fn flush_to_disk(filename: String) -> Result<(), Box<dyn std::error::Error>> {
+        let log_msg = format!("[RUST] fsync_file called for file: {}\n", filename);
+        println!("{}", log_msg.trim());
+        Self::log_to_file(&log_msg);
+        
+        if let Some(mut file) = FILE_MANAGER.get_mut(&filename) {
+            match file.sync_all() {
+                Ok(_) => {
+                    let success_msg = format!("[RUST] Successfully fsynced file: {}\n", filename);
+                    println!("{}", success_msg.trim());
+                    Self::log_to_file(&success_msg);
+                    Ok(())
+                }
+                Err(e) => {
+                    let error_msg = format!("[RUST] ERROR: Failed to fsync file: {}\n", filename);
+                    println!("{}", error_msg.trim());
+                    Self::log_to_file(&error_msg);
+                    Err(e.into())
+                }
+            }
+        } else {
+            let error_msg = format!("[RUST] ERROR: File not found for fsync: {}\n", filename);
+            println!("{}", error_msg.trim());
+            Self::log_to_file(&error_msg);
+            Err("File not found".into())
+        }
+    }
+    
+    fn log_to_file(message: &str) {
+        if let Ok(mut file) = OpenOptions::new()
+            .create(true)
+            .append(true)
+            .open("/tmp/rust_parquet_debug.log") {
+            let timestamp = Utc::now().format("%Y-%m-%d %H:%M:%S%.3f UTC");
+            let timestamped_message = format!("[{}] {}", timestamp, message);
+            let _ = file.write_all(timestamped_message.as_bytes());
+        }
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_createWriter(
+    mut env: JNIEnv,
+    _class: JClass,
+    file: JString,
+    schema_address: jlong
+) -> jint {
+    let filename: String = env.get_string(&file).expect("Couldn't get java string!").into();
+    match NativeParquetWriter::create_writer(filename, schema_address as i64) {
+        Ok(_) => 0,
+        Err(_) => -1,
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_write(
+    mut env: JNIEnv,
+    _class: JClass,
+    file: JString,
+    array_address: jlong,
+    schema_address: jlong
+) -> jint {
+    let filename: String = env.get_string(&file).expect("Couldn't get java string!").into();
+    match NativeParquetWriter::write_data(filename, array_address as i64, schema_address as i64) {
+        Ok(_) => 0,
+        Err(_) => -1,
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_closeWriter(
+    mut env: JNIEnv,
+    _class: JClass,
+    file: JString
+) -> jint {
+    let filename: String = env.get_string(&file).expect("Couldn't get java string!").into();
+    match NativeParquetWriter::close_writer(filename) {
+        Ok(_) => 0,
+        Err(_) => -1,
+    }
+}
+
+#[unsafe(no_mangle)]
+pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_flushToDisk(
+    mut env: JNIEnv,
+    _class: JClass,
+    file: JString
+) -> jint {
+    let filename: String = env.get_string(&file).expect("Couldn't get java string!").into();
+    match NativeParquetWriter::flush_to_disk(filename) {
+        Ok(_) => 0,
+        Err(_) => -1,
+    }
+}
diff --git a/modules/parquet-data-format/src/main/rust/src/parquet_exec.rs b/modules/parquet-data-format/src/main/rust/src/parquet_exec.rs
new file mode 100644
index 0000000000000..9fee54317d09a
--- /dev/null
+++ b/modules/parquet-data-format/src/main/rust/src/parquet_exec.rs
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+use anyhow::Result;
+
+/// Parquet-specific execution utilities - placeholder implementation
+pub struct ParquetExecutor;
+
+impl ParquetExecutor {
+    pub fn new() -> Self {
+        Self
+    }
+
+    /// Create a listing table for Parquet files - placeholder
+    pub async fn create_parquet_table(
+        &self,
+        table_path: &str,
+    ) -> Result<u64> {
+        // Placeholder implementation
+        log::info!("Creating parquet table for path: {}", table_path);
+        Ok(1) // Return dummy table ID
+    }
+}
diff --git a/modules/parquet-data-format/src/main/rust/src/read_lib.rs b/modules/parquet-data-format/src/main/rust/src/read_lib.rs
new file mode 100644
index 0000000000000..516e9acca9d06
--- /dev/null
+++ b/modules/parquet-data-format/src/main/rust/src/read_lib.rs
@@ -0,0 +1,198 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+//! OpenSearch DataFusion parquet JNI Library
+//!
+//! This library provides JNI bindings for DataFusion query execution,
+
+use jni::JNIEnv;
+use jni::objects::{JClass, JString, JObjectArray, JByteArray};
+use jni::sys::{jlong, jstring};
+use std::ptr;
+use std::collections::HashMap;
+
+mod context;
+mod runtime;
+mod stream;
+mod substrait;
+mod util;
+mod parquet_exec;
+
+use context::SessionContextManager;
+use runtime::RuntimeManager;
+use stream::RecordBatchStreamWrapper;
+use substrait::SubstraitExecutor;
+use datafusion::execution::context::SessionContext;
+use datafusion::execution::runtime_env::RuntimeEnv;
+
+/**
+TODO : Put more thought into this
+**/
+static mut RUNTIME_MANAGER: Option<RuntimeManager> = None;
+
+static mut SESSION_MANAGER: Option<SessionContextManager> = None;
+
+/// Initialize the managers (call once)
+fn init_managers() {
+    unsafe {
+        if RUNTIME_MANAGER.is_none() {
+            RUNTIME_MANAGER = Some(RuntimeManager::new());
+        }
+        if SESSION_MANAGER.is_none() {
+            SESSION_MANAGER = Some(SessionContextManager::new());
+        }
+    }
+}
+static mut RUNTIME_ENVIRONMENTS: Option<HashMap<u64, String>> = None;
+
+
+/// Register a directory as a table in the global context and return runtime environment ID
+#[no_mangle]
+pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeRegisterDirectory(
+    mut env: JNIEnv,
+    _class: JClass,
+    table_name: JString,
+    directory_path: JString,
+    files: JObjectArray,
+    runtime_id: jlong
+) {
+    let runtimeEnv = unsafe { &mut *(runtime_id as *mut RuntimeEnv) };
+    // placeholder
+}
+
+/// Create a new session context
+#[no_mangle]
+pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeCreateSessionContext(
+    mut env: JNIEnv,
+    _class: JClass,
+    config_keys: JObjectArray,
+    config_values: JObjectArray,
+) -> jlong {
+    // Initialize managers if not already done
+    init_managers();
+
+    // PLACEHOLDER
+    // Parse configuration from JNI arrays
+    let config = match util::parse_string_map(&mut env, config_keys, config_values) {
+        Ok(cfg) => cfg,
+        Err(e) => {
+            util::throw_exception(&mut env, &format!("Failed to parse config: {}", e));
+            return 0;
+        }
+    };
+
+    // Create session context
+    match unsafe {
+        RUNTIME_MANAGER.as_ref().unwrap().block_on(async {
+            SESSION_MANAGER.as_mut().unwrap().create_session_context(config).await
+        })
+    } {
+        Ok(context_ptr) => context_ptr as jlong,
+        Err(e) => {
+            util::throw_exception(&mut env, &format!("Failed to create session context: {}", e));
+            0
+        }
+    }
+}
+
+/// Execute a Substrait query plan
+#[no_mangle]
+pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeExecuteSubstraitQuery(
+    mut env: JNIEnv,
+    _class: JClass,
+    session_context_ptr: jlong,
+    substrait_plan: JByteArray,
+) -> jlong {
+
+    // Convert JByteArray to Vec<u8>
+    let substrait_plan_bytes = match env.convert_byte_array(substrait_plan) {
+        Ok(bytes) => bytes,
+        Err(e) => {
+            util::throw_exception(&mut env, &format!("Failed to convert substrait plan: {}", e));
+            return 0;
+        }
+    };
+
+    // Execute the query
+    match unsafe {
+        RUNTIME_MANAGER.as_ref().unwrap().block_on(async {
+            let executor = SubstraitExecutor::new();
+            executor.execute_plan(session_context_ptr as *mut SessionContext, &substrait_plan_bytes).await
+        })
+    } {
+        Ok(stream_ptr) => stream_ptr as jlong,
+        Err(e) => {
+            util::throw_exception(&mut env, &format!("Failed to execute query: {}", e));
+            0
+        }
+    }
+}
+
+/// Close a session context
+#[no_mangle]
+pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeCloseSessionContext(
+    mut env: JNIEnv,
+    _class: JClass,
+    session_context_ptr: jlong,
+) {
+
+    if let Err(e) = unsafe {
+        RUNTIME_MANAGER.as_ref().unwrap().block_on(async {
+            SESSION_MANAGER.as_mut().unwrap()
+                .close_session_context(session_context_ptr as *mut SessionContext)
+                .await
+        })
+    } {
+        util::throw_exception(&mut env, &format!("Failed to close session context: {}", e));
+    }
+}
+
+/// Get the next record batch from a stream
+#[no_mangle]
+pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeNextBatch(
+    mut env: JNIEnv,
+    _class: JClass,
+    stream_ptr: jlong,
+) -> jstring {
+
+    let stream = unsafe { &mut *(stream_ptr as *mut RecordBatchStreamWrapper) };
+
+    match unsafe {
+        RUNTIME_MANAGER.as_ref().unwrap().block_on(async {
+            stream.next_batch().await
+        })
+    } {
+        Ok(Some(batch_json)) => {
+            match env.new_string(&batch_json) {
+                Ok(jstr) => jstr.into_raw(),
+                Err(e) => {
+                    util::throw_exception(&mut env, &format!("Failed to create Java string: {}", e));
+                    ptr::null_mut()
+                }
+            }
+        }
+        Ok(None) => ptr::null_mut(), // End of stream
+        Err(e) => {
+            util::throw_exception(&mut env, &format!("Failed to get next batch: {}", e));
+            ptr::null_mut()
+        }
+    }
+}
+
+/// Close a record batch stream
+#[no_mangle]
+pub extern "system" fn Java_com_parquet_parquetdataformat_bridge_RustBridge_nativeCloseStream(
+    _env: JNIEnv,
+    _class: JClass,
+    stream_ptr: jlong,
+) {
+    if stream_ptr != 0 {
+        let stream = unsafe { Box::from_raw(stream_ptr as *mut RecordBatchStreamWrapper) };
+        drop(stream);
+    }
+}
diff --git a/modules/parquet-data-format/src/main/rust/src/runtime.rs b/modules/parquet-data-format/src/main/rust/src/runtime.rs
new file mode 100644
index 0000000000000..bcd48a7dee58b
--- /dev/null
+++ b/modules/parquet-data-format/src/main/rust/src/runtime.rs
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+use tokio::runtime::Runtime;
+use std::future::Future;
+
+/// Manages the Tokio runtime for async operations
+pub struct RuntimeManager {
+    runtime: Runtime,
+}
+
+impl RuntimeManager {
+    pub fn new() -> Self {
+        // Placeholder
+
+        let runtime = Runtime::new().expect("Failed to create Tokio runtime");
+        Self { runtime }
+    }
+    
+    pub fn block_on<F>(&self, future: F) -> F::Output
+    where
+        F: Future,
+    {
+        self.runtime.block_on(future)
+    }
+}
diff --git a/modules/parquet-data-format/src/main/rust/src/stream.rs b/modules/parquet-data-format/src/main/rust/src/stream.rs
new file mode 100644
index 0000000000000..2fe30f941223b
--- /dev/null
+++ b/modules/parquet-data-format/src/main/rust/src/stream.rs
@@ -0,0 +1,43 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+use anyhow::Result;
+use serde_json;
+
+/// Wrapper for DataFusion record batch streams - placeholder implementation
+pub struct RecordBatchStreamWrapper {
+    batch_count: u32,
+    is_placeholder: bool,
+}
+
+impl RecordBatchStreamWrapper {
+    pub fn new_placeholder() -> Self {
+        Self { 
+            batch_count: 0,
+            is_placeholder: true,
+        }
+    }
+    
+    pub async fn next_batch(&mut self) -> Result<Option<String>> {
+        // Return placeholder data for first few calls, then None
+        if self.is_placeholder {
+            if self.batch_count < 2 {
+                self.batch_count += 1;
+                let placeholder_data = serde_json::json!({
+                    "rows": [
+                        {"id": self.batch_count, "name": format!("placeholder_row_{}", self.batch_count)}
+                    ],
+                    "num_rows": 1,
+                    "num_columns": 2
+                });
+                Ok(Some(serde_json::to_string(&placeholder_data)?))
+            } else {
+                Ok(None) // End of stream
+            }
+        } else {
+            // Real implementation would go here
+            Ok(None)
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/main/rust/src/substrait.rs b/modules/parquet-data-format/src/main/rust/src/substrait.rs
new file mode 100644
index 0000000000000..d8ca0f2846fd7
--- /dev/null
+++ b/modules/parquet-data-format/src/main/rust/src/substrait.rs
@@ -0,0 +1,37 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+use datafusion::execution::context::SessionContext;
+use crate::stream::RecordBatchStreamWrapper;
+use anyhow::Result;
+
+/// Executes Substrait query plans
+pub struct SubstraitExecutor;
+
+impl SubstraitExecutor {
+    pub fn new() -> Self {
+        Self
+    }
+    
+    pub async fn execute_plan(
+        &self,
+        session_context_ptr: *mut SessionContext,
+        substrait_plan_bytes: &[u8],
+    ) -> Result<*mut RecordBatchStreamWrapper> {
+        // Placeholder implementation - would normally:
+        // 1. Parse Substrait plan from substrait_plan_bytes
+        // 2. Convert to DataFusion logical plan using datafusion-substrait
+        // 3. Execute using the session context
+        // 4. Return actual record batch stream
+        
+        log::info!("Executing Substrait plan with {} bytes for session: {:?}", 
+                   substrait_plan_bytes.len(), session_context_ptr);
+        
+        // For now, return a placeholder stream
+        let wrapper = RecordBatchStreamWrapper::new_placeholder();
+        let wrapper_ptr = Box::into_raw(Box::new(wrapper));
+        
+        Ok(wrapper_ptr)
+    }
+}
diff --git a/modules/parquet-data-format/src/main/rust/src/util.rs b/modules/parquet-data-format/src/main/rust/src/util.rs
new file mode 100644
index 0000000000000..5055c1312791a
--- /dev/null
+++ b/modules/parquet-data-format/src/main/rust/src/util.rs
@@ -0,0 +1,63 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+use jni::JNIEnv;
+use jni::objects::{JObjectArray, JString};
+use std::collections::HashMap;
+use anyhow::Result;
+
+/// Parse a string map from JNI arrays
+pub fn parse_string_map(
+    env: &mut JNIEnv,
+    keys: JObjectArray,
+    values: JObjectArray,
+) -> Result<HashMap<String, String>> {
+    let mut map = HashMap::new();
+
+    let keys_len = env.get_array_length(&keys)?;
+    let values_len = env.get_array_length(&values)?;
+
+    if keys_len != values_len {
+        return Err(anyhow::anyhow!("Keys and values arrays must have the same length"));
+    }
+
+    for i in 0..keys_len {
+        let key_obj = env.get_object_array_element(&keys, i)?;
+        let value_obj = env.get_object_array_element(&values, i)?;
+
+        let key_jstring = JString::from(key_obj);
+        let value_jstring = JString::from(value_obj);
+
+        let key_str = env.get_string(&key_jstring)?;
+        let value_str = env.get_string(&value_jstring)?;
+
+        map.insert(key_str.to_string_lossy().to_string(), value_str.to_string_lossy().to_string());
+    }
+
+    Ok(map)
+}
+
+// Parse a string map from JNI arrays
+pub fn parse_string_arr(
+    env: &mut JNIEnv,
+    files: JObjectArray,
+) -> Result<Vec<String>> {
+    let length = env.get_array_length(&files).unwrap();
+    let mut rust_strings: Vec<String> = Vec::with_capacity(length as usize);
+    for i in 0..length {
+        let file_obj = env.get_object_array_element(&files, i).unwrap();
+        let jstring = JString::from(file_obj);
+        let rust_str: String = env
+            .get_string(&jstring)
+            .expect("Couldn't get java string!")
+            .into();
+        rust_strings.push(rust_str);
+    }
+    Ok(rust_strings)
+}
+
+/// Throw a Java exception
+pub fn throw_exception(env: &mut JNIEnv, message: &str) {
+    let _ = env.throw_new("java/lang/RuntimeException", message);
+}
diff --git a/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatPluginIT.java b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatPluginIT.java
new file mode 100644
index 0000000000000..f4c123b8a96f4
--- /dev/null
+++ b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatPluginIT.java
@@ -0,0 +1,41 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+package com.parquet.parquetdataformat;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
+import org.apache.hc.core5.http.ParseException;
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+import org.opensearch.client.Request;
+import org.opensearch.client.Response;
+import org.opensearch.plugins.Plugin;
+import org.opensearch.test.OpenSearchIntegTestCase;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Collection;
+import java.util.Collections;
+
+import static org.hamcrest.Matchers.containsString;
+
+@ThreadLeakScope(ThreadLeakScope.Scope.NONE)
+@OpenSearchIntegTestCase.ClusterScope(scope = OpenSearchIntegTestCase.Scope.SUITE)
+public class ParquetDataFormatPluginIT extends OpenSearchIntegTestCase {
+
+    @Override
+    protected Collection<Class<? extends Plugin>> nodePlugins() {
+        return Collections.singletonList(ParquetDataFormatPlugin.class);
+    }
+
+    public void testPluginInstalled() throws IOException, ParseException {
+        Response response = getRestClient().performRequest(new Request("GET", "/_cat/plugins"));
+        String body = EntityUtils.toString(response.getEntity(), StandardCharsets.UTF_8);
+
+        logger.info("response body: {}", body);
+        assertThat(body, containsString("parquet"));
+    }
+}
diff --git a/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatTests.java b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatTests.java
new file mode 100644
index 0000000000000..b52466249d727
--- /dev/null
+++ b/modules/parquet-data-format/src/test/java/com/parquet/parquetdataformat/ParquetDataFormatTests.java
@@ -0,0 +1,30 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+package com.parquet.parquetdataformat;
+
+import com.parquet.parquetdataformat.bridge.RustBridge;
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.io.IOException;
+
+public class ParquetDataFormatTests extends OpenSearchTestCase {
+
+    public void testIngestion() throws IOException {
+        // Test only basic functionality without Arrow operations
+        try {
+            // Create plugin but don't call complex operations
+            ParquetDataFormatPlugin plugin = new ParquetDataFormatPlugin();
+            plugin.indexDataToParquetEngine();
+            
+        } catch (UnsatisfiedLinkError e) {
+            fail("Native library not loaded properly: " + e.getMessage());
+        } catch (Exception e) {
+            fail("Test failed: " + e.getMessage());
+        }
+    }
+}
diff --git a/modules/parquet-data-format/src/yamlRestTest/java/org.opensearch/parquetdataformat/ParquetDataFormatClientYamlTestSuiteIT.java b/modules/parquet-data-format/src/yamlRestTest/java/org.opensearch/parquetdataformat/ParquetDataFormatClientYamlTestSuiteIT.java
new file mode 100644
index 0000000000000..324c6ce3debd1
--- /dev/null
+++ b/modules/parquet-data-format/src/yamlRestTest/java/org.opensearch/parquetdataformat/ParquetDataFormatClientYamlTestSuiteIT.java
@@ -0,0 +1,26 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+package org.opensearch.parquetdataformat;
+
+import com.carrotsearch.randomizedtesting.annotations.Name;
+import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
+import org.opensearch.test.rest.yaml.ClientYamlTestCandidate;
+import org.opensearch.test.rest.yaml.OpenSearchClientYamlSuiteTestCase;
+
+
+public class ParquetDataFormatClientYamlTestSuiteIT extends OpenSearchClientYamlSuiteTestCase {
+
+    public ParquetDataFormatClientYamlTestSuiteIT(@Name("yaml") ClientYamlTestCandidate testCandidate) {
+        super(testCandidate);
+    }
+
+    @ParametersFactory
+    public static Iterable<Object[]> parameters() throws Exception {
+        return OpenSearchClientYamlSuiteTestCase.createParameters();
+    }
+}
diff --git a/modules/parquet-data-format/src/yamlRestTest/resources/rest-api-spec/test/10_basic.yml b/modules/parquet-data-format/src/yamlRestTest/resources/rest-api-spec/test/10_basic.yml
new file mode 100644
index 0000000000000..0399b16c51642
--- /dev/null
+++ b/modules/parquet-data-format/src/yamlRestTest/resources/rest-api-spec/test/10_basic.yml
@@ -0,0 +1,8 @@
+"Test that the plugin is loaded in OpenSearch":
+  - do:
+      cat.plugins:
+        local: true
+        h: component
+
+  - match:
+      $body: /^rename\n$/
diff --git a/plugins/engine-datafusion/.gitignore b/plugins/engine-datafusion/.gitignore
new file mode 100644
index 0000000000000..8e535981ee076
--- /dev/null
+++ b/plugins/engine-datafusion/.gitignore
@@ -0,0 +1,38 @@
+# Gradle
+.gradle/
+build/
+
+# Java
+*.class
+*.jar
+*.war
+*.ear
+hs_err_pid*
+
+# IDE
+.idea/
+*.iml
+*.ipr
+*.iws
+.vscode/
+.settings/
+.project
+.classpath
+
+# OS
+.DS_Store
+Thumbs.db
+
+# Rust
+jni/target/
+jni/Cargo.lock
+
+# Native libraries
+src/main/resources/native/
+
+# Logs
+*.log
+
+# Temporary files
+*.tmp
+*.temp
diff --git a/plugins/engine-datafusion/README.md b/plugins/engine-datafusion/README.md
new file mode 100644
index 0000000000000..bc4ad580df874
--- /dev/null
+++ b/plugins/engine-datafusion/README.md
@@ -0,0 +1,73 @@
+
+## Prerequisites
+
+1. Publish OpenSearch to maven local
+```
+./gradlew publishToMavenLocal
+```
+2. Publish SQL plugin to maven local
+```
+./gradlew publishToMavenLocal
+```
+3. Run opensearch with following parameters
+```
+ ./gradlew run --preserve-data -PremotePlugins="['org.opensearch.plugin:opensearch-job-scheduler:3.3.0.0-SNAPSHOT', 'org.opensearch.plugin:opensearch-sql-plugin:3.3.0.0-SNAPSHOT']" -PinstalledPlugins="['engine-datafusion']" --debug-jvm
+```
+
+
+## Steps to test indexing + search e2e
+
+TODO : need to remove hardcoded index name `index-7`
+
+1. Delete previous index if any
+```
+curl --location --request DELETE 'localhost:9200/index-7'
+```
+
+2. Create index with name : `index-7`
+```
+curl --location --request PUT 'http://localhost:9200/index-7' \
+--header 'Content-Type: application/json' \
+--data-raw '{
+    "settings": {
+        "number_of_shards": 1,
+        "number_of_replicas": 0,
+        "refresh_interval": -1
+    },
+    "mappings": {
+        "properties": {
+            "message": {
+                "type": "long"
+            },
+            "message2": {
+                "type": "long"
+            },
+            "message3": {
+                "type": "long"
+            }
+        }
+    }
+}'
+```
+3. Index docs
+```
+curl --location --request POST 'http://localhost:9200/_bulk' \
+--header 'Content-Type: application/json' \
+--data-raw '{"index":{"_index":"index-7"}}
+{"message": 2,"message2": 3,"message3": 4}
+{"index":{"_index":"index-7"}}
+{"message": 3,"message2": 4,"message3": 5}
+'
+```
+4. Refresh the index
+```
+curl localhost:9200/index-7/_refresh
+```
+5. Query
+```
+curl --location --request POST 'http://localhost:9200/_plugins/_ppl' \
+--header 'Content-Type: application/json' \
+--data-raw '{
+  "query": "source=index-7 | stats count(), min(message) as min, max(message2) as max"
+}'
+```
diff --git a/plugins/engine-datafusion/build.gradle b/plugins/engine-datafusion/build.gradle
new file mode 100644
index 0000000000000..82b5cd41fc757
--- /dev/null
+++ b/plugins/engine-datafusion/build.gradle
@@ -0,0 +1,225 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+apply plugin: 'java'
+apply plugin: 'idea'
+apply plugin: 'opensearch.internal-cluster-test'
+apply plugin: 'opensearch.yaml-rest-test'
+apply plugin: 'opensearch.pluginzip'
+
+def pluginName = 'engine-datafusion'
+def pluginDescription = 'OpenSearch plugin providing access to DataFusion via JNI'
+def projectPath = 'org.opensearch'
+def pathToPlugin = 'datafusion.DataFusionPlugin'
+def pluginClassName = 'DataFusionPlugin'
+
+opensearchplugin {
+    name = pluginName
+    description = pluginDescription
+    classname = "${projectPath}.${pathToPlugin}"
+    licenseFile = rootProject.file('LICENSE.txt')
+    noticeFile = rootProject.file('NOTICE.txt')
+}
+
+dependencies {
+    api project(':libs:opensearch-vectorized-exec-spi')
+    implementation "org.apache.logging.log4j:log4j-api:${versions.log4j}"
+    implementation "org.apache.logging.log4j:log4j-core:${versions.log4j}"
+
+    // Bundle Jackson in the plugin JAR using 'api' like other OpenSearch plugins
+    api "com.fasterxml.jackson.core:jackson-core:${versions.jackson}"
+    api "com.fasterxml.jackson.core:jackson-databind:${versions.jackson_databind}"
+    api "com.fasterxml.jackson.core:jackson-annotations:${versions.jackson}"
+
+    // Apache Arrow dependencies for memory management
+    implementation "org.apache.arrow:arrow-memory-core:17.0.0"
+    implementation "org.apache.arrow:arrow-memory-unsafe:17.0.0"
+    implementation "org.apache.arrow:arrow-vector:17.0.0"
+    implementation "org.apache.arrow:arrow-c-data:17.0.0"
+    implementation "org.apache.arrow:arrow-format:17.0.0"
+    // SLF4J API for Arrow logging compatibility
+    implementation "org.slf4j:slf4j-api:${versions.slf4j}"
+    // CheckerFramework annotations required by Arrow 17.0.0
+    implementation "org.checkerframework:checker-qual:3.42.0"
+    // FlatBuffers dependency required by Arrow 17.0.0
+    implementation "com.google.flatbuffers:flatbuffers-java:${versions.flatbuffers}"
+
+    testImplementation "junit:junit:${versions.junit}"
+    testImplementation "org.hamcrest:hamcrest:${versions.hamcrest}"
+    testImplementation "org.mockito:mockito-core:${versions.mockito}"
+    testImplementation project(":modules:parquet-data-format")
+    // Add CSV plugin for testing
+    // testImplementation project(':plugins:dataformat-csv')
+}
+
+// Task to build the Rust JNI library
+task buildRustLibrary(type: Exec) {
+    description = 'Build the Rust JNI library using Cargo'
+    group = 'build'
+
+    workingDir file('jni')
+
+    // Determine the target directory and library name based on OS
+    def osName = System.getProperty('os.name').toLowerCase()
+    def libPrefix = osName.contains('windows') ? '' : 'lib'
+    def libExtension = osName.contains('windows') ? '.dll' : (osName.contains('mac') ? '.dylib' : '.so')
+
+    // Use debug build for development, release for production
+    def buildType = project.hasProperty('rustRelease') ? 'release' : 'debug'
+    def targetDir = "target/${buildType}"
+
+  // Find cargo executable - try common locations
+  def cargoExecutable = 'cargo'
+  def possibleCargoPaths = [
+    System.getenv('HOME') + '/.cargo/bin/cargo',
+    '/usr/local/bin/cargo',
+    'cargo'
+  ]
+
+  for (String path : possibleCargoPaths) {
+    if (new File(path).exists()) {
+      cargoExecutable = path
+      break
+    }
+  }
+
+    def cargoArgs = [cargoExecutable, 'build']
+    if (buildType == 'release') {
+        cargoArgs.add('--release')
+    }
+
+    if (osName.contains('windows')) {
+        commandLine cargoArgs
+    } else {
+        commandLine cargoArgs
+    }
+
+    // Set environment variables for cross-compilation if needed
+    environment 'CARGO_TARGET_DIR', file('jni/target').absolutePath
+
+    inputs.files fileTree('jni/src')
+    inputs.file 'jni/Cargo.toml'
+    outputs.files file("jni/${targetDir}/${libPrefix}opensearch_datafusion_jni${libExtension}")
+    System.out.println("Building Rust library in ${buildType} mode");
+}
+
+// Task to copy the native library to resources
+task copyNativeLibrary(type: Copy, dependsOn: buildRustLibrary) {
+    description = 'Copy the native library to Java resources'
+    group = 'build'
+
+    def osName = System.getProperty('os.name').toLowerCase()
+    def libPrefix = osName.contains('windows') ? '' : 'lib'
+    def libExtension = osName.contains('windows') ? '.dll' : (osName.contains('mac') ? '.dylib' : '.so')
+    def buildType = project.hasProperty('rustRelease') ? 'release' : 'debug'
+
+    from file("jni/target/${buildType}/${libPrefix}opensearch_datafusion_jni${libExtension}")
+    into file('src/main/resources/native')
+
+    // Rename to a standard name for Java to load
+    rename { filename ->
+        "libopensearch_datafusion_jni${libExtension}"
+    }
+
+    // Remove executable permissions to comply with OpenSearch file permission checks
+    filePermissions {
+        unix(0644)
+    }
+}
+
+// Ensure native library is built before Java compilation
+compileJava.dependsOn copyNativeLibrary
+
+// Ensure processResources depends on copyNativeLibrary
+processResources.dependsOn copyNativeLibrary
+sourcesJar.dependsOn copyNativeLibrary
+
+// Ensure filepermissions task depends on copyNativeLibrary
+tasks.named('filepermissions').configure {
+    dependsOn copyNativeLibrary
+}
+
+// Ensure sourcesJar depends on copyNativeLibrary since it includes resources
+sourcesJar.dependsOn copyNativeLibrary
+
+// Ensure filepermissions task depends on copyNativeLibrary
+tasks.named("filepermissions").configure {
+    dependsOn copyNativeLibrary
+}
+
+// Ensure forbiddenPatterns task depends on copyNativeLibrary
+tasks.named("forbiddenPatterns").configure {
+    dependsOn copyNativeLibrary
+    // Exclude native library files from pattern checking since they are binary
+    exclude '**/native/**'
+}
+
+// Ensure spotlessJava task has proper dependency ordering
+tasks.named("spotlessJava").configure {
+    mustRunAfter copyNativeLibrary
+}
+
+// Clean task should also clean Rust artifacts
+clean {
+    delete file('jni/target')
+    delete file('src/main/resources/native')
+}
+
+test {
+    // Set system property to help tests find the native library
+  jvmArgs += ["--add-opens", "java.base/java.nio=org.apache.arrow.memory.core,ALL-UNNAMED"]
+
+  systemProperty 'java.library.path', file('src/main/resources/native').absolutePath
+}
+
+yamlRestTest {
+    systemProperty 'tests.security.manager', 'false'
+    // Disable yamlRestTest since this plugin doesn't have REST API endpoints
+    enabled = false
+}
+
+tasks.named("dependencyLicenses").configure {
+    mapping from: /jackson-.*/, to: 'jackson'
+    mapping from: /arrow-.*/, to: 'arrow'
+    mapping from: /slf4j-.*/, to: 'slf4j-api'
+    mapping from: /checker-qual.*/, to: 'checker-qual'
+    mapping from: /flatbuffers-.*/, to: 'flatbuffers-java'
+}
+
+// Configure third party audit to handle Apache Arrow dependencies
+tasks.named('thirdPartyAudit').configure {
+    ignoreMissingClasses(
+        // Apache Commons Codec (missing dependency)
+        'org.apache.commons.codec.binary.Hex'
+    )
+    ignoreViolations(
+        // Apache Arrow internal classes that use Unsafe operations
+        'org.apache.arrow.memory.ArrowBuf',
+        'org.apache.arrow.memory.unsafe.UnsafeAllocationManager',
+        'org.apache.arrow.memory.util.ByteFunctionHelpers',
+        'org.apache.arrow.memory.util.MemoryUtil',
+        'org.apache.arrow.memory.util.MemoryUtil$1',
+        'org.apache.arrow.memory.util.hash.MurmurHasher',
+        'org.apache.arrow.memory.util.hash.SimpleHasher',
+        'org.apache.arrow.vector.BaseFixedWidthVector',
+        'org.apache.arrow.vector.BitVectorHelper',
+        'org.apache.arrow.vector.Decimal256Vector',
+        'org.apache.arrow.vector.DecimalVector',
+        'org.apache.arrow.vector.util.DecimalUtility',
+        'org.apache.arrow.vector.util.VectorAppender'
+    )
+}
+
+// Configure Javadoc to skip package documentation requirements ie package-info.java
+missingJavadoc {
+    javadocMissingIgnore = [
+        'org.opensearch.datafusion',
+        'org.opensearch.datafusion.action',
+        'org.opensearch.datafusion.core'
+    ]
+}
diff --git a/plugins/engine-datafusion/jni/Cargo.toml b/plugins/engine-datafusion/jni/Cargo.toml
new file mode 100644
index 0000000000000..4dbb5374f7443
--- /dev/null
+++ b/plugins/engine-datafusion/jni/Cargo.toml
@@ -0,0 +1,79 @@
+[package]
+name = "opensearch-datafusion-jni"
+version = "0.1.0"
+edition = "2021"
+description = "JNI bindings for DataFusion integration with OpenSearch"
+license = "Apache-2.0"
+
+[lib]
+name = "opensearch_datafusion_jni"
+crate-type = ["cdylib"]
+
+[dependencies]
+# DataFusion dependencies
+datafusion = "49.0.0"
+datafusion-expr = "49.0.0"
+datafusion-datasource = "49.0.0"
+arrow-json = "55.2"
+arrow = { version = "55.2", features = ["ffi", "ipc_compression"] }
+#arrow = "55.2.0"
+arrow-array = "55.2.0"
+arrow-schema = "55.2.0"
+arrow-buffer = "55.2.0"
+
+# JNI dependencies
+jni = "0.21"
+
+# Substrait support
+datafusion-substrait = "49.0.0"
+prost = "0.13"
+
+
+# Async runtime
+tokio = { version = "1.0", features = ["full"] }
+futures = "0.3"
+#tokio = { version = "1.0", features = ["rt", "rt-multi-thread", "macros"] }
+
+# Serialization
+serde = { version = "1.0", features = ["derive"] }
+serde_json = "1.0"
+
+# Error handling
+anyhow = "1.0"
+thiserror = "1.0"
+
+# Logging
+log = "0.4"
+# Parquet support
+parquet = "53.0.0"
+
+# Object store for file access
+object_store = "=0.12.3"
+url = "2.0"
+
+# Substrait support
+substrait = "0.47"
+
+# Temporary directory support
+tempfile = "3.0"
+chrono = "0.4.41"
+
+async-trait = "0.1.89"
+itertools = "0.14.0"
+rstest = "0.26.1"
+regex = "1.11.2"
+
+[build-dependencies]
+cbindgen = "0.27"
+
+
+[profile.release]
+lto = true
+codegen-units = 1
+panic = "abort"
+
+[profile.dev]
+opt-level = 1          # Some optimization for reasonable performance
+lto = false           # Disable LTO for faster builds
+codegen-units = 16    # More parallel compilation
+incremental = true    # Enable incremental compilation
diff --git a/plugins/engine-datafusion/jni/src/lib.rs b/plugins/engine-datafusion/jni/src/lib.rs
new file mode 100644
index 0000000000000..59bbc1fc912d6
--- /dev/null
+++ b/plugins/engine-datafusion/jni/src/lib.rs
@@ -0,0 +1,772 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+use std::collections::{BTreeSet, HashMap};
+use std::ptr::addr_of_mut;
+use jni::objects::{JByteArray, JClass, JLongArray, JObject};
+use jni::sys::{jbyteArray, jlong, jstring};
+use jni::JNIEnv;
+use std::sync::Arc;
+use arrow_array::{Array, StructArray};
+use arrow_array::ffi::FFI_ArrowArray;
+use arrow_schema::DataType;
+use arrow_schema::ffi::FFI_ArrowSchema;
+use datafusion::common::DataFusionError;
+
+mod util;
+mod row_id_optimizer;
+mod listing_table;
+
+use datafusion::execution::context::SessionContext;
+
+use crate::util::{create_file_metadata_from_filenames, parse_string_arr, set_object_result_error, set_object_result_ok};
+use datafusion::datasource::file_format::csv::CsvFormat;
+use datafusion::datasource::listing::{ListingTableUrl};
+use datafusion::execution::cache::cache_manager::CacheManagerConfig;
+use datafusion::execution::cache::cache_unit::DefaultListFilesCache;
+use datafusion::execution::cache::CacheAccessor;
+use datafusion::execution::runtime_env::{RuntimeEnv, RuntimeEnvBuilder};
+use datafusion::prelude::SessionConfig;
+use datafusion::DATAFUSION_VERSION;
+use datafusion::datasource::file_format::parquet::ParquetFormat;
+use datafusion::datasource::object_store::ObjectStoreUrl;
+use datafusion::datasource::physical_plan::parquet::{ParquetAccessPlan, RowGroupAccess};
+use datafusion::datasource::physical_plan::ParquetSource;
+use datafusion::execution::TaskContext;
+use datafusion::parquet::arrow::arrow_reader::RowSelector;
+use datafusion::physical_plan::{ExecutionPlan, SendableRecordBatchStream};
+use datafusion_datasource::file_groups::FileGroup;
+use datafusion_datasource::file_scan_config::FileScanConfigBuilder;
+use datafusion_datasource::PartitionedFile;
+use datafusion_datasource::source::DataSourceExec;
+use datafusion_substrait::logical_plan::consumer::from_substrait_plan;
+use datafusion_substrait::substrait::proto::Plan;
+use futures::TryStreamExt;
+use jni::objects::{JObjectArray, JString};
+use object_store::ObjectMeta;
+use prost::Message;
+use tokio::runtime::Runtime;
+use crate::listing_table::{ListingOptions, ListingTable, ListingTableConfig};
+use crate::row_id_optimizer::FilterRowIdOptimizer;
+
+/// Create a new DataFusion session context
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_createContext(
+    _env: JNIEnv,
+    _class: JClass,
+) -> jlong {
+    let config = SessionConfig::new().with_repartition_aggregations(true);
+    let context = SessionContext::new_with_config(config);
+    let ctx = Box::into_raw(Box::new(context)) as jlong;
+    ctx
+}
+
+/// Close and cleanup a DataFusion context
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_closeContext(
+    _env: JNIEnv,
+    _class: JClass,
+    context_id: jlong,
+) {
+    let _ = unsafe { Box::from_raw(context_id as *mut SessionContext) };
+}
+
+/// Get version information
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_getVersionInfo(
+    env: JNIEnv,
+    _class: JClass,
+) -> jstring {
+    let version_info = format!(r#"{{"version": "{}", "codecs": ["CsvDataSourceCodec"]}}"#, DATAFUSION_VERSION);
+    env.new_string(version_info).expect("Couldn't create Java string").as_raw()
+}
+
+/// Get version information (legacy method name)
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_getVersion(
+    env: JNIEnv,
+    _class: JClass,
+) -> jstring {
+    env.new_string(DATAFUSION_VERSION).expect("Couldn't create Java string").as_raw()
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_createTokioRuntime(
+    _env: JNIEnv,
+    _class: JClass,
+) -> jlong {
+    let rt = Runtime::new().unwrap();
+    let ctx = Box::into_raw(Box::new(rt)) as jlong;
+    ctx
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_createGlobalRuntime(
+    _env: JNIEnv,
+    _class: JClass,
+) -> jlong {
+    let runtime_env = RuntimeEnvBuilder::default().build().unwrap();
+    /**
+    // We can copy global runtime to local runtime - file statistics cache, and most of the things
+    // will be shared across session contexts. But list files cache will be specific to session
+    // context
+
+    let fsCache = runtimeEnv.clone().cache_manager.get_file_statistic_cache().unwrap();
+    let localCacheManagerConfig = CacheManagerConfig::default().with_files_statistics_cache(Option::from(fsCache));
+    let localCacheManager = CacheManager::try_new(&localCacheManagerConfig);
+    let localRuntimeEnv = RuntimeEnvBuilder::new()
+        .with_cache_manager(localCacheManagerConfig)
+        .with_disk_manager(DiskManagerConfig::new_existing(runtimeEnv.disk_manager))
+        .with_memory_pool(runtimeEnv.memory_pool)
+        .with_object_store_registry(runtimeEnv.object_store_registry)
+        .build();
+    let config = SessionConfig::new().with_repartition_aggregations(true);
+    let context = SessionContext::new_with_config(config);
+    **/
+
+    let ctx = Box::into_raw(Box::new(runtime_env)) as jlong;
+    ctx
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_createSessionContext(
+    _env: JNIEnv,
+    _class: JClass,
+    runtime_id: jlong,
+) -> jlong {
+    let runtimeEnv = unsafe { &mut *(runtime_id as *mut RuntimeEnv) };
+    let config = SessionConfig::new().with_repartition_aggregations(true);
+    let context = SessionContext::new_with_config_rt(config, Arc::new(runtimeEnv.clone()));
+    let ctx = Box::into_raw(Box::new(context)) as jlong;
+    ctx
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_closeSessionContext(
+    _env: JNIEnv,
+    _class: JClass,
+    context_id: jlong,
+) {
+    let _ = unsafe { Box::from_raw(context_id as *mut SessionContext) };
+}
+
+
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_createDatafusionReader(
+    mut env: JNIEnv,
+    _class: JClass,
+    table_path: JString,
+    files: JObjectArray
+) -> jlong {
+    let table_path: String = match env.get_string(&table_path) {
+        Ok(path) => path.into(),
+        Err(e) => {
+            let _ = env.throw_new("java/lang/IllegalArgumentException", format!("Invalid table path: {:?}", e));
+            return 0;
+        }
+    };
+
+    let files: Vec<String> = match parse_string_arr(&mut env, files) {
+        Ok(files) => files,
+        Err(e) => {
+            let _ = env.throw_new("java/lang/IllegalArgumentException", format!("Invalid file list: {}", e));
+            return 0;
+        }
+    };
+
+    let files_metadata = match create_file_metadata_from_filenames(&table_path, files.clone()) {
+        Ok(metadata) => metadata,
+        Err(err) => {
+            let _ = env.throw_new("java/lang/RuntimeException", format!("Failed to create metadata: {}", err));
+            return 0;
+        }
+    };
+
+    let table_url = match ListingTableUrl::parse(&table_path) {
+        Ok(url) => url,
+        Err(err) => {
+            let _ = env.throw_new("java/lang/RuntimeException", format!("Invalid table path: {}", err));
+            return 0;
+        }
+    };
+
+    let shard_view = ShardView::new(table_url, files_metadata);
+
+    Box::into_raw(Box::new(shard_view)) as jlong
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_destroyReader(
+    mut env: JNIEnv,
+    _class: JClass,
+    ptr: jlong
+)  {
+    let _ = unsafe { Box::from_raw(ptr as *mut ShardView) };
+}
+
+pub struct ShardView {
+    table_path: ListingTableUrl,
+    files_metadata: Arc<Vec<FileMetadata>>
+}
+
+impl ShardView {
+    pub fn new(table_path: ListingTableUrl, files_metadata: Vec<FileMetadata>) -> Self {
+        let files_metadata = Arc::new(files_metadata);
+        ShardView {
+            table_path,
+            files_metadata
+        }
+    }
+
+    pub fn table_path(&self) -> ListingTableUrl {
+        self.table_path.clone()
+    }
+
+    pub fn files_metadata(&self) -> Arc<Vec<FileMetadata>> {
+        self.files_metadata.clone()
+    }
+}
+
+#[derive(Debug, Clone)]
+struct FileMetadata {
+    row_group_row_counts: Arc<Vec<i64>>,
+    row_base: Arc<i64>,
+    object_meta: Arc<ObjectMeta>,
+}
+
+impl FileMetadata {
+    pub fn new(row_group_row_counts: Vec<i64>, row_base: i64, object_meta: ObjectMeta) -> Self {
+        let row_group_row_counts = Arc::new(row_group_row_counts);
+        let row_base = Arc::new(row_base);
+        let object_meta = Arc::new(object_meta);
+        FileMetadata {
+            row_group_row_counts,
+            row_base,
+            object_meta
+        }
+    }
+
+    pub fn row_group_row_counts(&self) -> Arc<Vec<i64>> {
+        self.row_group_row_counts.clone()
+    }
+
+    pub fn row_base(&self) -> Arc<i64> {
+        self.row_base.clone()
+    }
+
+    pub fn object_meta(&self) -> Arc<ObjectMeta> {
+        self.object_meta.clone()
+    }
+}
+
+
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_executeQueryPhase(
+    mut env: JNIEnv,
+    _class: JClass,
+    shard_view_ptr: jlong,
+    substrait_bytes: jbyteArray,
+    tokio_runtime_env_ptr: jlong,
+    // callback: JObject,
+) -> jlong {
+    let shard_view = unsafe { &*(shard_view_ptr as *const ShardView) };
+    let runtime_ptr = unsafe { &*(tokio_runtime_env_ptr as *const Runtime)};
+
+    let table_path = shard_view.table_path();
+    let files_metadata = shard_view.files_metadata();
+    let object_meta: Arc<Vec<ObjectMeta>> = Arc::new(files_metadata
+        .iter()
+        .map(|metadata| (*metadata.object_meta).clone())
+        .collect());
+
+    println!("Table path: {}", table_path);
+    println!("Files: {:?}", object_meta);
+
+    let list_file_cache = Arc::new(DefaultListFilesCache::default());
+    list_file_cache.put(table_path.prefix(), object_meta);
+
+    let runtime_env = RuntimeEnvBuilder::new()
+            .with_cache_manager(CacheManagerConfig::default()
+            .with_list_files_cache(Some(list_file_cache.clone()))
+        ).build().unwrap();
+
+    // TODO: get config from CSV DataFormat
+    let config = SessionConfig::new();
+    // config.options_mut().execution.parquet.pushdown_filters = true;
+
+    let state = datafusion::execution::SessionStateBuilder::new()
+        .with_config(config)
+        .with_runtime_env(Arc::from(runtime_env))
+        .with_default_features()
+        .with_physical_optimizer_rule(Arc::new(FilterRowIdOptimizer)) // TODO: enable only for query phase
+        .build();
+
+    let ctx = SessionContext::new_with_state(state);
+
+    // Create default parquet options
+    let file_format = ParquetFormat::new();
+    let listing_options = ListingOptions::new(Arc::new(file_format))
+        .with_file_extension(".parquet") // TODO: take this as parameter
+        .with_files_metadata(files_metadata)
+        .with_table_partition_cols(vec![("row_base".to_string(), DataType::Int32)]); // TODO: enable only for query phase
+
+    // Ideally the executor will give this
+    runtime_ptr.block_on(async {
+        let resolved_schema = listing_options
+            .infer_schema(&ctx.state(), &table_path.clone())
+            .await.unwrap();
+
+
+        let config = ListingTableConfig::new(table_path.clone())
+            .with_listing_options(listing_options)
+            .with_schema(resolved_schema);
+
+        // Create a new TableProvider
+        let provider = Arc::new(ListingTable::try_new(config).unwrap());
+        let shard_id = table_path.prefix().filename().expect("error in fetching Path");
+        ctx.register_table("index-7", provider)
+            .expect("Failed to attach the Table");
+
+    });
+
+    // TODO : how to close ctx ?
+    // Convert Java byte array to Rust Vec<u8>
+    let plan_bytes_obj = unsafe { JByteArray::from_raw(substrait_bytes) };
+    let plan_bytes_vec = match env.convert_byte_array(plan_bytes_obj) {
+        Ok(bytes) => bytes,
+        Err(e) => {
+            let error_msg = format!("Failed to convert plan bytes: {}", e);
+            env.throw_new("java/lang/Exception", error_msg);
+            return 0;
+        }
+    };
+
+    let substrait_plan = match Plan::decode(plan_bytes_vec.as_slice()) {
+        Ok(plan) => {
+            println!("SUBSTRAIT rust: Decoding is successful, Plan has {} relations", plan.relations.len());
+            plan
+        },
+        Err(e) => {
+            return 0;
+        }
+    };
+
+    //let runtime = unsafe { &mut *(runtime_ptr as *mut Runtime) };
+    runtime_ptr.block_on(async {
+
+        let logical_plan = match from_substrait_plan(&ctx.state(), &substrait_plan).await {
+            Ok(plan) => {
+                println!("SUBSTRAIT Rust: LogicalPlan: {:?}", plan);
+                plan
+            },
+            Err(e) => {
+                println!("SUBSTRAIT Rust: Failed to convert Substrait plan: {}", e);
+                return 0;
+            }
+        };
+
+        let dataframe = ctx.execute_logical_plan(logical_plan).await.unwrap();
+        let stream = dataframe.execute_stream().await.unwrap();
+        let stream_ptr = Box::into_raw(Box::new(stream)) as jlong;
+
+        stream_ptr
+    })
+}
+
+// If we need to create session context separately
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_nativeCreateSessionContext(
+    mut env: JNIEnv,
+    _class: JClass,
+    runtime_ptr: jlong,
+    shard_view_ptr: jlong,
+    global_runtime_env_ptr: jlong,
+) -> jlong {
+    let shard_view = unsafe { &*(shard_view_ptr as *const ShardView) };
+    let table_path = shard_view.table_path();
+    let files_metadata = shard_view.files_metadata();
+    let object_meta: Arc<Vec<ObjectMeta>> = Arc::new(files_metadata
+        .iter()
+        .map(|metadata| (*metadata.object_meta).clone())
+        .collect());
+    // Will use it once the global RunTime is defined
+    // let runtime_arc = unsafe {
+    //     let boxed = &*(runtime_env_ptr as *const Pin<Arc<RuntimeEnv>>);
+    //     (**boxed).clone()
+    // };
+
+    let list_file_cache = Arc::new(DefaultListFilesCache::default());
+    list_file_cache.put(table_path.prefix(), object_meta);
+
+    let runtime_env = RuntimeEnvBuilder::new()
+        .with_cache_manager(CacheManagerConfig::default()
+            .with_list_files_cache(Some(list_file_cache))).build().unwrap();
+
+
+
+    let ctx = SessionContext::new_with_config_rt(SessionConfig::new(), Arc::new(runtime_env));
+
+
+    // Create default parquet options
+    let file_format = CsvFormat::default();
+    let listing_options = ListingOptions::new(Arc::new(file_format))
+        .with_file_extension(".csv");
+
+
+    // let runtime = unsafe { &mut *(runtime_ptr as *mut Runtime) };
+    let mut session_context_ptr = 0;
+
+    // Ideally the executor will give this
+    Runtime::new().expect("Failed to create Tokio Runtime").block_on(async {
+        let resolved_schema = listing_options
+            .infer_schema(&ctx.state(), &table_path.clone())
+            .await.unwrap();
+
+
+        let config = ListingTableConfig::new(table_path.clone())
+            .with_listing_options(listing_options)
+            .with_schema(resolved_schema);
+
+        // Create a new TableProvider
+        let provider = Arc::new(ListingTable::try_new(config).unwrap());
+        let shard_id = table_path.prefix().filename().expect("error in fetching Path");
+        ctx.register_table(shard_id, provider)
+            .expect("Failed to attach the Table");
+
+        // Return back after wrapping in Box
+        session_context_ptr = Box::into_raw(Box::new(ctx)) as jlong
+    });
+
+    session_context_ptr
+}
+
+
+
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_RecordBatchStream_next(
+    mut env: JNIEnv,
+    _class: JClass,
+    runtime_ptr: jlong,
+    stream: jlong,
+    callback: JObject,
+) {
+    let runtime = unsafe { &mut *(runtime_ptr as *mut Runtime) };
+
+    let stream = unsafe { &mut *(stream as *mut SendableRecordBatchStream) };
+    runtime.block_on(async {
+        //let fetch_start = std::time::Instant::now();
+        let next = stream.try_next().await;
+        //let fetch_time = fetch_start.elapsed();
+        match next {
+            Ok(Some(batch)) => {
+                //let convert_start = std::time::Instant::now();
+                // Convert to struct array for compatibility with FFI
+                //println!("Num rows : {}", batch.num_rows());
+                let struct_array: StructArray = batch.into();
+                let array_data = struct_array.into_data();
+                let mut ffi_array = FFI_ArrowArray::new(&array_data);
+                //let convert_time = convert_start.elapsed();
+                // ffi_array must remain alive until after the callback is called
+                // let callback_start = std::time::Instant::now();
+                set_object_result_ok(&mut env, callback, addr_of_mut!(ffi_array));
+                // let callback_time = callback_start.elapsed();
+                // println!("Fetch: {:?}, Convert: {:?}, Callback: {:?}",
+                //          fetch_time, convert_time, callback_time);
+            }
+            Ok(None) => {
+                set_object_result_ok(&mut env, callback, 0 as *mut FFI_ArrowSchema);
+            }
+            Err(err) => {
+                set_object_result_error(&mut env, callback, &err);
+            }
+        }
+        //println!("Total time: {:?}", start.elapsed());
+    });
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_RecordBatchStream_getSchema(
+    mut env: JNIEnv,
+    _class: JClass,
+    stream: jlong,
+    callback: JObject,
+) {
+    let stream = unsafe { &mut *(stream as *mut SendableRecordBatchStream) };
+    let schema = stream.schema();
+    let ffi_schema = FFI_ArrowSchema::try_from(&*schema);
+    match ffi_schema {
+        Ok(mut ffi_schema) => {
+            // ffi_schema must remain alive until after the callback is called
+            set_object_result_ok(&mut env, callback, addr_of_mut!(ffi_schema));
+        }
+        Err(err) => {
+            set_object_result_error(&mut env, callback, &err);
+        }
+    }
+}
+
+#[no_mangle]
+pub extern "system" fn Java_org_opensearch_datafusion_DataFusionQueryJNI_executeFetchPhase(
+    mut env: JNIEnv,
+    _class: JClass,
+    shard_view_ptr: jlong,
+    values: JLongArray,
+    projections: JObjectArray,
+    tokio_runtime_env_ptr: jlong,
+    callback: JObject,
+) -> jlong{
+    let shard_view = unsafe { &*(shard_view_ptr as *const ShardView) };
+    let runtime_ptr = unsafe { &*(tokio_runtime_env_ptr as *const Runtime)};
+
+    let table_path = shard_view.table_path();
+    let files_metadata = shard_view.files_metadata();
+
+    let projections: Vec<String> = parse_string_arr(&mut env, projections).expect("Expected list of files");
+
+    // Safety checks first
+    if values.is_null() {
+        let _ = env.throw_new("java/lang/NullPointerException", "values array is null");
+        return 0;
+    }
+
+    // 2. Get array length
+    let array_length = match env.get_array_length(&values) {
+        Ok(len) => len,
+        Err(e) => {
+            let _ = env.throw_new("java/lang/RuntimeException",
+                                  format!("Failed to get array length: {:?}", e));
+            return 0;
+        }
+    };
+
+    // 3. Allocate Rust buffer
+    let mut row_ids: Vec<jlong> = vec![0; array_length as usize];
+
+    // 4. Copy Java array into Rust buffer
+    match env.get_long_array_region(values, 0, &mut row_ids[..]) {
+        Ok(_) => {
+            println!("✅ Received array: {:?}", row_ids);
+        }
+        Err(e) => {
+            let _ = env.throw_new("java/lang/RuntimeException",
+                                  format!("Failed to get array data: {:?}", e));
+            return 0;
+        }
+    }
+
+
+    // Safety checks
+    if tokio_runtime_env_ptr == 0 {
+        let error = DataFusionError::Execution("Null runtime pointer".to_string());
+        set_object_result_error(&mut env, callback, &error);
+        return 0;
+    }
+
+    let access_plans = create_access_plans(row_ids, files_metadata.clone());
+
+    let runtime_env = RuntimeEnvBuilder::new()
+        .with_cache_manager(CacheManagerConfig::default()
+                            //.with_list_files_cache(Some(list_file_cache)) TODO: //Fix this
+        ).build().unwrap();
+    let ctx = SessionContext::new_with_config_rt(SessionConfig::new(), Arc::new(runtime_env));
+
+    // Create default parquet options
+    let file_format = ParquetFormat::new();
+    let listing_options = ListingOptions::new(Arc::new(file_format))
+        .with_file_extension(".parquet"); // TODO: take this as parameter
+    // .with_table_partition_cols(vec![("row_base".to_string(), DataType::Int32)]); // TODO: enable only for query phase
+
+    // Ideally the executor will give this
+
+
+
+    runtime_ptr.block_on(async {
+
+        let parquet_schema = listing_options
+            .infer_schema(&ctx.state(), &table_path.clone())
+            .await.unwrap();
+
+        // let total_groups = files_metadata[0].row_group_row_counts.len();
+        // let mut access_plan = ParquetAccessPlan::new_all(total_groups);
+        // for i in 0..total_groups {
+        //     access_plan.skip(i);
+        // }
+
+        // let partitioned_files: Vec<PartitionedFile> = files_metadata
+        //     .iter()
+        //     .zip(access_plans.await.iter())
+        //     .map(|(meta, access_plan)| {
+        //         PartitionedFile::new(
+        //             format!("{}/{}",
+        //                     table_path.prefix().to_string().trim_end_matches('/'),
+        //                     meta.object_meta().location.to_string().trim_start_matches('/')
+        //             ),
+        //             meta.object_meta.size
+        //         ).with_extensions(Arc::new(access_plan.clone()))
+        //     })
+        //     .collect();
+
+
+        let access_plans = access_plans.await.unwrap();
+
+        let partitioned_files: Vec<PartitionedFile> = files_metadata
+            .iter()
+            .zip(access_plans.iter())
+            .map(|(meta, access_plan)| {
+                PartitionedFile::new(meta.object_meta().location.to_string(),
+                    meta.object_meta.size
+                ).with_extensions(Arc::new(access_plan.clone()))
+            })
+            .collect();
+
+        let file_group = FileGroup::new(partitioned_files);
+
+        let file_source = Arc::new(
+            ParquetSource::default()
+            // provide the factory to create parquet reader without re-reading metadata
+            //.with_parquet_file_reader_factory(Arc::new(reader_factory)),
+        );
+
+        let mut projection_index = vec![];
+
+        for field_name in projections.iter() {
+            projection_index.push(parquet_schema.index_of(field_name).ok().unwrap());
+        }
+
+        let file_scan_config =
+            FileScanConfigBuilder::new(ObjectStoreUrl::local_filesystem(),  parquet_schema.clone(), file_source)
+                //.with_limit(limit)
+                .with_projection(Option::from(projection_index.clone()))
+                .with_file_group(file_group)
+                .build();
+
+        let parquet_exec = DataSourceExec::from_data_source(file_scan_config);
+
+        // IMPORTANT: Only get one reference to each pointer
+        // let liquid_ctx = unsafe { &mut *(context_ptr as *mut SessionContext) };
+        // let session_ctx = unsafe { Box::from_raw(context_ptr as *mut SessionContext) };
+        let mut optimized_plan: Arc<dyn ExecutionPlan> = parquet_exec.clone();
+
+
+        let task_ctx = Arc::new(TaskContext::default());
+
+        let stream = optimized_plan.execute(0, task_ctx).unwrap();
+
+        let stream_ptr = Box::into_raw(Box::new(stream)) as jlong;
+
+        stream_ptr
+    })
+}
+
+async fn create_access_plans(
+    row_ids: Vec<jlong>,
+    files_metadata: Arc<Vec<FileMetadata>>,
+) -> Result<Vec<ParquetAccessPlan>, DataFusionError> {
+    let mut access_plans = Vec::new();
+
+    // Sort row_ids for better processing
+    let mut sorted_row_ids: Vec<i64> = row_ids.iter().map(|&id| id as i64).collect();
+    sorted_row_ids.sort_unstable();
+
+    // Process each file
+    for file_meta in files_metadata.iter() {
+        let row_base = *file_meta.row_base;
+        let total_row_groups = file_meta.row_group_row_counts.len();
+        let mut access_plan = ParquetAccessPlan::new_all(total_row_groups);
+
+        // Calculate file's row range
+        let file_total_rows: i64 = file_meta.row_group_row_counts.iter().map(|&x| x).sum();
+        let file_end_row: i64 = row_base + file_total_rows;
+        // Filter row IDs that belong to this file
+        let file_row_ids: Vec<i64> = sorted_row_ids
+            .iter()
+            .copied() // or .cloned() if it's not Copy
+            .filter(|&id| id >= row_base && id < file_end_row)
+            .map(|id| {
+                id - row_base })
+            .collect();
+
+        if file_row_ids.is_empty() {
+            // If no rows belong to this file, skip all row groups
+            for group_id in 0..total_row_groups {
+                access_plan.skip(group_id);
+            }
+        } else {
+            // Create cumulative row counts for row groups
+            let mut cumulative_group_rows: Vec<i64> = Vec::with_capacity(total_row_groups + 1);
+            cumulative_group_rows.push(0);
+            let mut current_sum = 0;
+            for &count in file_meta.row_group_row_counts.iter() {
+                current_sum += count;
+                cumulative_group_rows.push(current_sum);
+            }
+            // Group local row IDs by row group
+            let mut group_map: HashMap<usize, BTreeSet<i32>> = HashMap::new();
+            for &row_id in &file_row_ids {
+                // Find the appropriate row group using binary search
+                let group_id = cumulative_group_rows.windows(2)
+                    .position(|window| row_id >= window[0] as i64 && row_id < window[1] as i64)
+                    .unwrap();
+
+                // Calculate relative position within the row group
+                let relative_pos = row_id - cumulative_group_rows[group_id];
+                group_map.entry(group_id)
+                    .or_default()
+                    .insert(relative_pos as i32);
+            }
+
+            // Process each row group
+            for group_id in 0..total_row_groups {
+                let row_group_size = file_meta.row_group_row_counts[group_id] as usize;
+
+                if let Some(group_row_ids) = group_map.get(&group_id) {
+                    let mut relative_row_ids: Vec<usize> = group_row_ids.iter()
+                        .map(|&x| x as usize)
+                        .collect();
+                    relative_row_ids.sort_unstable();
+
+                    if relative_row_ids.is_empty() {
+                        access_plan.skip(group_id);
+                    } else if relative_row_ids.len() == row_group_size {
+                        access_plan.scan(group_id);
+                    } else {
+                        // Create selectors
+                        let mut selectors = Vec::new();
+                        let mut current_pos = 0;
+                        let mut i = 0;
+                        while i < relative_row_ids.len() {
+                            let mut target_pos = relative_row_ids[i];
+                            if target_pos > current_pos {
+                                selectors.push(RowSelector::skip(target_pos - current_pos));
+                            }
+                            let mut select_count = 1;
+                            while i + 1 < relative_row_ids.len() &&
+                                relative_row_ids[i + 1] == relative_row_ids[i] + 1 {
+                                select_count += 1;
+                                i += 1;
+                                target_pos = relative_row_ids[i];
+                            }
+                            selectors.push(RowSelector::select(select_count));
+                            current_pos = relative_row_ids[i] + 1;
+                            i += 1;
+                        }
+                        if current_pos < row_group_size {
+                            selectors.push(RowSelector::skip(row_group_size - current_pos));
+                        }
+                        access_plan.set(group_id, RowGroupAccess::Selection(selectors.into()));
+                    }
+                } else {
+                    access_plan.skip(group_id);
+                }
+            }
+        }
+
+        access_plans.push(access_plan);
+    }
+
+    Ok(access_plans)
+}
diff --git a/plugins/engine-datafusion/jni/src/listing_table.rs b/plugins/engine-datafusion/jni/src/listing_table.rs
new file mode 100644
index 0000000000000..27612175cf173
--- /dev/null
+++ b/plugins/engine-datafusion/jni/src/listing_table.rs
@@ -0,0 +1,1591 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+//! The table implementation.
+
+use datafusion::datasource::listing::{
+    helpers::{expr_applicable_for_cols, pruned_partition_list},
+    ListingTableUrl, PartitionedFile,
+};
+use datafusion::{
+    datasource::file_format::{file_compression_type::FileCompressionType, FileFormat},
+    datasource::{create_ordering, physical_plan::FileSinkConfig},
+    execution::context::SessionState,
+};
+use arrow::datatypes::{DataType, Field, SchemaBuilder, SchemaRef};
+use arrow_schema::Schema;
+use async_trait::async_trait;
+use datafusion::catalog::{Session, TableProvider};
+use datafusion::common::{config_datafusion_err, config_err, internal_err, plan_err, project_schema, stats::Precision, Constraints, DataFusionError, Result, ScalarValue, SchemaExt};
+use datafusion_datasource::{
+    compute_all_files_statistics,
+    file::FileSource,
+    file_groups::FileGroup,
+    file_scan_config::{FileScanConfig, FileScanConfigBuilder},
+    schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter, SchemaAdapterFactory},
+};
+use datafusion::execution::{
+    cache::{cache_manager::FileStatisticsCache, cache_unit::DefaultFileStatisticsCache},
+    config::SessionConfig,
+};
+use datafusion_expr::{
+    dml::InsertOp, Expr, SortExpr, TableProviderFilterPushDown, TableType,
+};
+use datafusion::physical_expr::schema_rewriter::PhysicalExprAdapterFactory;
+use datafusion::physical_expr_common::sort_expr::LexOrdering;
+use datafusion::physical_plan::{empty::EmptyExec, ExecutionPlan, Statistics};
+use futures::{future, stream, Stream, StreamExt, TryStreamExt};
+use itertools::Itertools;
+use object_store::ObjectStore;
+use std::{any::Any, collections::HashMap, str::FromStr, sync::Arc};
+use std::fs::File;
+use datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
+use regex::Regex;
+use crate::FileMetadata;
+
+/// Indicates the source of the schema for a [`ListingTable`]
+// PartialEq required for assert_eq! in tests
+#[derive(Debug, Clone, Copy, PartialEq, Default)]
+pub enum SchemaSource {
+    /// Schema is not yet set (initial state)
+    #[default]
+    Unset,
+    /// Schema was inferred from first table_path
+    Inferred,
+    /// Schema was specified explicitly via with_schema
+    Specified,
+}
+
+/// Configuration for creating a [`ListingTable`]
+///
+/// # Schema Evolution Support
+///
+/// This configuration supports schema evolution through the optional
+/// [`SchemaAdapterFactory`]. You might want to override the default factory when you need:
+///
+/// - **Type coercion requirements**: When you need custom logic for converting between
+///   different Arrow data types (e.g., Int32 ↔ Int64, Utf8 ↔ LargeUtf8)
+/// - **Column mapping**: You need to map columns with a legacy name to a new name
+/// - **Custom handling of missing columns**: By default they are filled in with nulls, but you may e.g. want to fill them in with `0` or `""`.
+///
+/// If not specified, a [`DefaultSchemaAdapterFactory`] will be used, which handles
+/// basic schema compatibility cases.
+///
+#[derive(Debug, Clone, Default)]
+pub struct ListingTableConfig {
+    /// Paths on the `ObjectStore` for creating `ListingTable`.
+    /// They should share the same schema and object store.
+    pub table_paths: Vec<ListingTableUrl>,
+    /// Optional `SchemaRef` for the to be created `ListingTable`.
+    ///
+    /// See details on [`ListingTableConfig::with_schema`]
+    pub file_schema: Option<SchemaRef>,
+    /// Optional [`ListingOptions`] for the to be created [`ListingTable`].
+    ///
+    /// See details on [`ListingTableConfig::with_listing_options`]
+    pub options: Option<ListingOptions>,
+    /// Tracks the source of the schema information
+    schema_source: SchemaSource,
+    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
+    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
+    expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
+}
+
+impl ListingTableConfig {
+    /// Creates new [`ListingTableConfig`] for reading the specified URL
+    pub fn new(table_path: ListingTableUrl) -> Self {
+        Self {
+            table_paths: vec![table_path],
+            ..Default::default()
+        }
+    }
+
+    /// Creates new [`ListingTableConfig`] with multiple table paths.
+    ///
+    /// See [`Self::infer_options`] for details on what happens with multiple paths
+    pub fn new_with_multi_paths(table_paths: Vec<ListingTableUrl>) -> Self {
+        Self {
+            table_paths,
+            ..Default::default()
+        }
+    }
+
+    /// Returns the source of the schema for this configuration
+    pub fn schema_source(&self) -> SchemaSource {
+        self.schema_source
+    }
+    /// Set the `schema` for the overall [`ListingTable`]
+    ///
+    /// [`ListingTable`] will automatically coerce, when possible, the schema
+    /// for individual files to match this schema.
+    ///
+    /// If a schema is not provided, it is inferred using
+    /// [`Self::infer_schema`].
+    ///
+    /// If the schema is provided, it must contain only the fields in the file
+    /// without the table partitioning columns.
+    ///
+    /// # Example: Specifying Table Schema
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl};
+    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
+    /// # use arrow::datatypes::{Schema, Field, DataType};
+    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
+    /// let schema = Arc::new(Schema::new(vec![
+    ///     Field::new("id", DataType::Int64, false),
+    ///     Field::new("name", DataType::Utf8, true),
+    /// ]));
+    ///
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(listing_options)  // Set options first
+    ///     .with_schema(schema);                    // Then set schema
+    /// ```
+    pub fn with_schema(self, schema: SchemaRef) -> Self {
+        // Note: We preserve existing options state, but downstream code may expect
+        // options to be set. Consider calling with_listing_options() or infer_options()
+        // before operations that require options to be present.
+        debug_assert!(
+            self.options.is_some() || cfg!(test),
+            "ListingTableConfig::with_schema called without options set. \
+             Consider calling with_listing_options() or infer_options() first to avoid panics in downstream code."
+        );
+
+        Self {
+            file_schema: Some(schema),
+            schema_source: SchemaSource::Specified,
+            ..self
+        }
+    }
+
+    /// Add `listing_options` to [`ListingTableConfig`]
+    ///
+    /// If not provided, format and other options are inferred via
+    /// [`Self::infer_options`].
+    ///
+    /// # Example: Configuring Parquet Files with Custom Options
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl};
+    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
+    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// let options = ListingOptions::new(Arc::new(ParquetFormat::default()))
+    ///     .with_file_extension(".parquet")
+    ///     .with_collect_stat(true);
+    ///
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(options);  // Configure file format and options
+    /// ```
+    pub fn with_listing_options(self, listing_options: ListingOptions) -> Self {
+        // Note: This method properly sets options, but be aware that downstream
+        // methods like infer_schema() and try_new() require both schema and options
+        // to be set to function correctly.
+        debug_assert!(
+            !self.table_paths.is_empty() || cfg!(test),
+            "ListingTableConfig::with_listing_options called without table_paths set. \
+             Consider calling new() or new_with_multi_paths() first to establish table paths."
+        );
+
+        Self {
+            options: Some(listing_options),
+            ..self
+        }
+    }
+
+    /// Returns a tuple of `(file_extension, optional compression_extension)`
+    ///
+    /// For example a path ending with blah.test.csv.gz returns `("csv", Some("gz"))`
+    /// For example a path ending with blah.test.csv returns `("csv", None)`
+    fn infer_file_extension_and_compression_type(
+        path: &str,
+    ) -> Result<(String, Option<String>)> {
+        let mut exts = path.rsplit('.');
+
+        let splitted = exts.next().unwrap_or("");
+
+        let file_compression_type = FileCompressionType::from_str(splitted)
+            .unwrap_or(FileCompressionType::UNCOMPRESSED);
+
+        if file_compression_type.is_compressed() {
+            let splitted2 = exts.next().unwrap_or("");
+            Ok((splitted2.to_string(), Some(splitted.to_string())))
+        } else {
+            Ok((splitted.to_string(), None))
+        }
+    }
+
+    /// Infer `ListingOptions` based on `table_path` and file suffix.
+    ///
+    /// The format is inferred based on the first `table_path`.
+    pub async fn infer_options(self, state: &dyn Session) -> Result<Self> {
+        let store = if let Some(url) = self.table_paths.first() {
+            state.runtime_env().object_store(url)?
+        } else {
+            return Ok(self);
+        };
+
+        let file = self
+            .table_paths
+            .first()
+            .unwrap()
+            .list_all_files(state, store.as_ref(), "")
+            .await?
+            .next()
+            .await
+            .ok_or_else(|| DataFusionError::Internal("No files for table".into()))??;
+
+        let (file_extension, maybe_compression_type) =
+            ListingTableConfig::infer_file_extension_and_compression_type(
+                file.location.as_ref(),
+            )?;
+
+        let mut format_options = HashMap::new();
+        if let Some(ref compression_type) = maybe_compression_type {
+            format_options
+                .insert("format.compression".to_string(), compression_type.clone());
+        }
+        let state = state.as_any().downcast_ref::<SessionState>().unwrap();
+        let file_format = state
+            .get_file_format_factory(&file_extension)
+            .ok_or(config_datafusion_err!(
+                "No file_format found with extension {file_extension}"
+            ))?
+            .create(state, &format_options)?;
+
+        let listing_file_extension =
+            if let Some(compression_type) = maybe_compression_type {
+                format!("{}.{}", &file_extension, &compression_type)
+            } else {
+                file_extension
+            };
+
+        let listing_options = ListingOptions::new(file_format)
+            .with_file_extension(listing_file_extension)
+            .with_target_partitions(state.config().target_partitions())
+            .with_collect_stat(state.config().collect_statistics());
+
+        Ok(Self {
+            table_paths: self.table_paths,
+            file_schema: self.file_schema,
+            options: Some(listing_options),
+            schema_source: self.schema_source,
+            schema_adapter_factory: self.schema_adapter_factory,
+            expr_adapter_factory: self.expr_adapter_factory,
+        })
+    }
+
+    /// Infer the [`SchemaRef`] based on `table_path`s.
+    ///
+    /// This method infers the table schema using the first `table_path`.
+    /// See [`ListingOptions::infer_schema`] for more details
+    ///
+    /// # Errors
+    /// * if `self.options` is not set. See [`Self::with_listing_options`]
+    pub async fn infer_schema(self, state: &dyn Session) -> Result<Self> {
+        match self.options {
+            Some(options) => {
+                let ListingTableConfig {
+                    table_paths,
+                    file_schema,
+                    options: _,
+                    schema_source,
+                    schema_adapter_factory,
+                    expr_adapter_factory: physical_expr_adapter_factory,
+                } = self;
+
+                let (schema, new_schema_source) = match file_schema {
+                    Some(schema) => (schema, schema_source), // Keep existing source if schema exists
+                    None => {
+                        if let Some(url) = table_paths.first() {
+                            (
+                                options.infer_schema(state, url).await?,
+                                SchemaSource::Inferred,
+                            )
+                        } else {
+                            (Arc::new(Schema::empty()), SchemaSource::Inferred)
+                        }
+                    }
+                };
+
+                Ok(Self {
+                    table_paths,
+                    file_schema: Some(schema),
+                    options: Some(options),
+                    schema_source: new_schema_source,
+                    schema_adapter_factory,
+                    expr_adapter_factory: physical_expr_adapter_factory,
+                })
+            }
+            None => internal_err!("No `ListingOptions` set for inferring schema"),
+        }
+    }
+
+    /// Convenience method to call both [`Self::infer_options`] and [`Self::infer_schema`]
+    pub async fn infer(self, state: &dyn Session) -> Result<Self> {
+        self.infer_options(state).await?.infer_schema(state).await
+    }
+
+    /// Infer the partition columns from `table_paths`.
+    ///
+    /// # Errors
+    /// * if `self.options` is not set. See [`Self::with_listing_options`]
+    pub async fn infer_partitions_from_path(self, state: &dyn Session) -> Result<Self> {
+        match self.options {
+            Some(options) => {
+                let Some(url) = self.table_paths.first() else {
+                    return config_err!("No table path found");
+                };
+                let partitions = options
+                    .infer_partitions(state, url)
+                    .await?
+                    .into_iter()
+                    .map(|col_name| {
+                        (
+                            col_name,
+                            DataType::Dictionary(
+                                Box::new(DataType::UInt16),
+                                Box::new(DataType::Utf8),
+                            ),
+                        )
+                    })
+                    .collect::<Vec<_>>();
+                let options = options.with_table_partition_cols(partitions);
+                Ok(Self {
+                    table_paths: self.table_paths,
+                    file_schema: self.file_schema,
+                    options: Some(options),
+                    schema_source: self.schema_source,
+                    schema_adapter_factory: self.schema_adapter_factory,
+                    expr_adapter_factory: self.expr_adapter_factory,
+                })
+            }
+            None => config_err!("No `ListingOptions` set for inferring schema"),
+        }
+    }
+
+    /// Set the [`SchemaAdapterFactory`] for the [`ListingTable`]
+    ///
+    /// The schema adapter factory is used to create schema adapters that can
+    /// handle schema evolution and type conversions when reading files with
+    /// different schemas than the table schema.
+    ///
+    /// If not provided, a default schema adapter factory will be used.
+    ///
+    /// # Example: Custom Schema Adapter for Type Coercion
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion::datasource::listing::{ListingTableConfig, ListingOptions, ListingTableUrl};
+    /// # use datafusion::datasource::schema_adapter::{SchemaAdapterFactory, SchemaAdapter};
+    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
+    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
+    /// #
+    /// # #[derive(Debug)]
+    /// # struct MySchemaAdapterFactory;
+    /// # impl SchemaAdapterFactory for MySchemaAdapterFactory {
+    /// #     fn create(&self, _projected_table_schema: SchemaRef, _file_schema: SchemaRef) -> Box<dyn SchemaAdapter> {
+    /// #         unimplemented!()
+    /// #     }
+    /// # }
+    /// # let table_paths = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// # let listing_options = ListingOptions::new(Arc::new(ParquetFormat::default()));
+    /// # let table_schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
+    /// let config = ListingTableConfig::new(table_paths)
+    ///     .with_listing_options(listing_options)
+    ///     .with_schema(table_schema)
+    ///     .with_schema_adapter_factory(Arc::new(MySchemaAdapterFactory));
+    /// ```
+    pub fn with_schema_adapter_factory(
+        self,
+        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    ) -> Self {
+        Self {
+            schema_adapter_factory: Some(schema_adapter_factory),
+            ..self
+        }
+    }
+
+    /// Get the [`SchemaAdapterFactory`] for this configuration
+    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
+        self.schema_adapter_factory.as_ref()
+    }
+
+    /// Set the [`PhysicalExprAdapterFactory`] for the [`ListingTable`]
+    ///
+    /// The expression adapter factory is used to create physical expression adapters that can
+    /// handle schema evolution and type conversions when evaluating expressions
+    /// with different schemas than the table schema.
+    ///
+    /// If not provided, a default physical expression adapter factory will be used unless a custom
+    /// `SchemaAdapterFactory` is set, in which case only the `SchemaAdapterFactory` will be used.
+    ///
+    /// See <https://github.com/apache/datafusion/issues/16800> for details on this transition.
+    pub fn with_expr_adapter_factory(
+        self,
+        expr_adapter_factory: Arc<dyn PhysicalExprAdapterFactory>,
+    ) -> Self {
+        Self {
+            expr_adapter_factory: Some(expr_adapter_factory),
+            ..self
+        }
+    }
+}
+
+/// Options for creating a [`ListingTable`]
+#[derive(Clone, Debug)]
+pub struct ListingOptions {
+    /// A suffix on which files should be filtered (leave empty to
+    /// keep all files on the path)
+    pub file_extension: String,
+    /// The file format
+    pub format: Arc<dyn FileFormat>,
+    /// The expected partition column names in the folder structure.
+    /// See [Self::with_table_partition_cols] for details
+    pub table_partition_cols: Vec<(String, DataType)>,
+    /// Set true to try to guess statistics from the files.
+    /// This can add a lot of overhead as it will usually require files
+    /// to be opened and at least partially parsed.
+    pub collect_stat: bool,
+    /// Group files to avoid that the number of partitions exceeds
+    /// this limit
+    pub target_partitions: usize,
+    /// Optional pre-known sort order(s). Must be `SortExpr`s.
+    ///
+    /// DataFusion may take advantage of this ordering to omit sorts
+    /// or use more efficient algorithms. Currently sortedness must be
+    /// provided if it is known by some external mechanism, but may in
+    /// the future be automatically determined, for example using
+    /// parquet metadata.
+    ///
+    /// See <https://github.com/apache/datafusion/issues/4177>
+    ///
+    /// NOTE: This attribute stores all equivalent orderings (the outer `Vec`)
+    ///       where each ordering consists of an individual lexicographic
+    ///       ordering (encapsulated by a `Vec<Expr>`). If there aren't
+    ///       multiple equivalent orderings, the outer `Vec` will have a
+    ///       single element.
+    pub file_sort_order: Vec<Vec<SortExpr>>,
+
+    pub files_metadata: Arc<Vec<FileMetadata>>
+}
+
+impl ListingOptions {
+    /// Creates an options instance with the given format
+    /// Default values:
+    /// - use default file extension filter
+    /// - no input partition to discover
+    /// - one target partition
+    /// - do not collect statistics
+    pub fn new(format: Arc<dyn FileFormat>) -> Self {
+        Self {
+            file_extension: format.get_ext(),
+            format,
+            table_partition_cols: vec![],
+            collect_stat: false,
+            target_partitions: 1,
+            file_sort_order: vec![],
+            files_metadata: Arc::new(vec![])
+        }
+    }
+
+    /// Set options from [`SessionConfig`] and returns self.
+    ///
+    /// Currently this sets `target_partitions` and `collect_stat`
+    /// but if more options are added in the future that need to be coordinated
+    /// they will be synchronized thorugh this method.
+    pub fn with_session_config_options(mut self, config: &SessionConfig) -> Self {
+        self = self.with_target_partitions(config.target_partitions());
+        self = self.with_collect_stat(config.collect_statistics());
+        self
+    }
+
+    /// Set file extension on [`ListingOptions`] and returns self.
+    ///
+    /// # Example
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion::prelude::SessionContext;
+    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_file_extension(".parquet");
+    ///
+    /// assert_eq!(listing_options.file_extension, ".parquet");
+    /// ```
+    pub fn with_file_extension(mut self, file_extension: impl Into<String>) -> Self {
+        self.file_extension = file_extension.into();
+        self
+    }
+
+    pub fn with_files_metadata(mut self, files_metadata: Arc<Vec<FileMetadata>>) -> Self {
+        self.files_metadata = files_metadata.clone();
+        self
+    }
+
+    /// Optionally set file extension on [`ListingOptions`] and returns self.
+    ///
+    /// If `file_extension` is `None`, the file extension will not be changed
+    ///
+    /// # Example
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion::prelude::SessionContext;
+    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
+    /// let extension = Some(".parquet");
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_file_extension_opt(extension);
+    ///
+    /// assert_eq!(listing_options.file_extension, ".parquet");
+    /// ```
+    pub fn with_file_extension_opt<S>(mut self, file_extension: Option<S>) -> Self
+    where
+        S: Into<String>,
+    {
+        if let Some(file_extension) = file_extension {
+            self.file_extension = file_extension.into();
+        }
+        self
+    }
+
+    /// Set `table partition columns` on [`ListingOptions`] and returns self.
+    ///
+    /// "partition columns," used to support [Hive Partitioning], are
+    /// columns added to the data that is read, based on the folder
+    /// structure where the data resides.
+    ///
+    /// For example, give the following files in your filesystem:
+    ///
+    /// ```text
+    /// /mnt/nyctaxi/year=2022/month=01/tripdata.parquet
+    /// /mnt/nyctaxi/year=2021/month=12/tripdata.parquet
+    /// /mnt/nyctaxi/year=2021/month=11/tripdata.parquet
+    /// ```
+    ///
+    /// A [`ListingTable`] created at `/mnt/nyctaxi/` with partition
+    /// columns "year" and "month" will include new `year` and `month`
+    /// columns while reading the files. The `year` column would have
+    /// value `2022` and the `month` column would have value `01` for
+    /// the rows read from
+    /// `/mnt/nyctaxi/year=2022/month=01/tripdata.parquet`
+    ///
+    ///# Notes
+    ///
+    /// - If only one level (e.g. `year` in the example above) is
+    ///   specified, the other levels are ignored but the files are
+    ///   still read.
+    ///
+    /// - Files that don't follow this partitioning scheme will be
+    ///   ignored.
+    ///
+    /// - Since the columns have the same value for all rows read from
+    ///   each individual file (such as dates), they are typically
+    ///   dictionary encoded for efficiency. You may use
+    ///   [`wrap_partition_type_in_dict`] to request a
+    ///   dictionary-encoded type.
+    ///
+    /// - The partition columns are solely extracted from the file path. Especially they are NOT part of the parquet files itself.
+    ///
+    /// # Example
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use arrow::datatypes::DataType;
+    /// # use datafusion::prelude::col;
+    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
+    ///
+    /// // listing options for files with paths such as  `/mnt/data/col_a=x/col_b=y/data.parquet`
+    /// // `col_a` and `col_b` will be included in the data read from those files
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_table_partition_cols(vec![("col_a".to_string(), DataType::Utf8),
+    ///       ("col_b".to_string(), DataType::Utf8)]);
+    ///
+    /// assert_eq!(listing_options.table_partition_cols, vec![("col_a".to_string(), DataType::Utf8),
+    ///     ("col_b".to_string(), DataType::Utf8)]);
+    /// ```
+    ///
+    /// [Hive Partitioning]: https://docs.cloudera.com/HDPDocuments/HDP2/HDP-2.1.3/bk_system-admin-guide/content/hive_partitioned_tables.html
+    /// [`wrap_partition_type_in_dict`]: crate::datasource::physical_plan::wrap_partition_type_in_dict
+    pub fn with_table_partition_cols(
+        mut self,
+        table_partition_cols: Vec<(String, DataType)>,
+    ) -> Self {
+        self.table_partition_cols = table_partition_cols;
+        self
+    }
+
+    /// Set stat collection on [`ListingOptions`] and returns self.
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_collect_stat(true);
+    ///
+    /// assert_eq!(listing_options.collect_stat, true);
+    /// ```
+    pub fn with_collect_stat(mut self, collect_stat: bool) -> Self {
+        self.collect_stat = collect_stat;
+        self
+    }
+
+    /// Set number of target partitions on [`ListingOptions`] and returns self.
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_target_partitions(8);
+    ///
+    /// assert_eq!(listing_options.target_partitions, 8);
+    /// ```
+    pub fn with_target_partitions(mut self, target_partitions: usize) -> Self {
+        self.target_partitions = target_partitions;
+        self
+    }
+
+    /// Set file sort order on [`ListingOptions`] and returns self.
+    ///
+    /// ```
+    /// # use std::sync::Arc;
+    /// # use datafusion::prelude::col;
+    /// # use datafusion::datasource::{listing::ListingOptions, file_format::parquet::ParquetFormat};
+    ///
+    ///  // Tell datafusion that the files are sorted by column "a"
+    ///  let file_sort_order = vec![vec![
+    ///    col("a").sort(true, true)
+    ///  ]];
+    ///
+    /// let listing_options = ListingOptions::new(Arc::new(
+    ///     ParquetFormat::default()
+    ///   ))
+    ///   .with_file_sort_order(file_sort_order.clone());
+    ///
+    /// assert_eq!(listing_options.file_sort_order, file_sort_order);
+    /// ```
+    pub fn with_file_sort_order(mut self, file_sort_order: Vec<Vec<SortExpr>>) -> Self {
+        self.file_sort_order = file_sort_order;
+        self
+    }
+
+    /// Infer the schema of the files at the given path on the provided object store.
+    ///
+    /// If the table_path contains one or more files (i.e. it is a directory /
+    /// prefix of files) their schema is merged by calling [`FileFormat::infer_schema`]
+    ///
+    /// Note: The inferred schema does not include any partitioning columns.
+    ///
+    /// This method is called as part of creating a [`ListingTable`].
+    pub async fn infer_schema<'a>(
+        &'a self,
+        state: &dyn Session,
+        table_path: &'a ListingTableUrl,
+    ) -> Result<SchemaRef> {
+        let store = state.runtime_env().object_store(table_path)?;
+
+        let files: Vec<_> = table_path
+            .list_all_files(state, store.as_ref(), &self.file_extension)
+            .await?
+            // Empty files cannot affect schema but may throw when trying to read for it
+            .try_filter(|object_meta| future::ready(object_meta.size > 0))
+            .try_collect()
+            .await?;
+
+        let schema = self.format.infer_schema(state, &store, &files).await?;
+
+        Ok(schema)
+    }
+
+    /// Infers the partition columns stored in `LOCATION` and compares
+    /// them with the columns provided in `PARTITIONED BY` to help prevent
+    /// accidental corrupts of partitioned tables.
+    ///
+    /// Allows specifying partial partitions.
+    pub async fn validate_partitions(
+        &self,
+        state: &dyn Session,
+        table_path: &ListingTableUrl,
+    ) -> Result<()> {
+        if self.table_partition_cols.is_empty() {
+            return Ok(());
+        }
+
+        if !table_path.is_collection() {
+            return plan_err!(
+                "Can't create a partitioned table backed by a single file, \
+                perhaps the URL is missing a trailing slash?"
+            );
+        }
+
+        let inferred = self.infer_partitions(state, table_path).await?;
+
+        // no partitioned files found on disk
+        if inferred.is_empty() {
+            return Ok(());
+        }
+
+        let table_partition_names = self
+            .table_partition_cols
+            .iter()
+            .map(|(col_name, _)| col_name.clone())
+            .collect_vec();
+
+        if inferred.len() < table_partition_names.len() {
+            return plan_err!(
+                "Inferred partitions to be {:?}, but got {:?}",
+                inferred,
+                table_partition_names
+            );
+        }
+
+        // match prefix to allow creating tables with partial partitions
+        for (idx, col) in table_partition_names.iter().enumerate() {
+            if &inferred[idx] != col {
+                return plan_err!(
+                    "Inferred partitions to be {:?}, but got {:?}",
+                    inferred,
+                    table_partition_names
+                );
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Infer the partitioning at the given path on the provided object store.
+    /// For performance reasons, it doesn't read all the files on disk
+    /// and therefore may fail to detect invalid partitioning.
+    pub(crate) async fn infer_partitions(
+        &self,
+        state: &dyn Session,
+        table_path: &ListingTableUrl,
+    ) -> Result<Vec<String>> {
+        let store = state.runtime_env().object_store(table_path)?;
+
+        // only use 10 files for inference
+        // This can fail to detect inconsistent partition keys
+        // A DFS traversal approach of the store can help here
+        let files: Vec<_> = table_path
+            .list_all_files(state, store.as_ref(), &self.file_extension)
+            .await?
+            .take(10)
+            .try_collect()
+            .await?;
+
+        let stripped_path_parts = files.iter().map(|file| {
+            table_path
+                .strip_prefix(&file.location)
+                .unwrap()
+                .collect_vec()
+        });
+
+        let partition_keys = stripped_path_parts
+            .map(|path_parts| {
+                path_parts
+                    .into_iter()
+                    .rev()
+                    .skip(1) // get parents only; skip the file itself
+                    .rev()
+                    .map(|s| s.split('=').take(1).collect())
+                    .collect_vec()
+            })
+            .collect_vec();
+
+        match partition_keys.into_iter().all_equal_value() {
+            Ok(v) => Ok(v),
+            Err(None) => Ok(vec![]),
+            Err(Some(diff)) => {
+                let mut sorted_diff = [diff.0, diff.1];
+                sorted_diff.sort();
+                plan_err!("Found mixed partition values on disk {:?}", sorted_diff)
+            }
+        }
+    }
+}
+
+/// Reads data from one or more files as a single table.
+///
+/// Implements [`TableProvider`], a DataFusion data source. The files are read
+/// using an  [`ObjectStore`] instance, for example from local files or objects
+/// from AWS S3.
+///
+/// # Reading Directories
+/// For example, given the `table1` directory (or object store prefix)
+///
+/// ```text
+/// table1
+///  ├── file1.parquet
+///  └── file2.parquet
+/// ```
+///
+/// A `ListingTable` would read the files `file1.parquet` and `file2.parquet` as
+/// a single table, merging the schemas if the files have compatible but not
+/// identical schemas.
+///
+/// Given the `table2` directory (or object store prefix)
+///
+/// ```text
+/// table2
+///  ├── date=2024-06-01
+///  │    ├── file3.parquet
+///  │    └── file4.parquet
+///  └── date=2024-06-02
+///       └── file5.parquet
+/// ```
+///
+/// A `ListingTable` would read the files `file3.parquet`, `file4.parquet`, and
+/// `file5.parquet` as a single table, again merging schemas if necessary.
+///
+/// Given the hive style partitioning structure (e.g,. directories named
+/// `date=2024-06-01` and `date=2026-06-02`), `ListingTable` also adds a `date`
+/// column when reading the table:
+/// * The files in `table2/date=2024-06-01` will have the value `2024-06-01`
+/// * The files in `table2/date=2024-06-02` will have the value `2024-06-02`.
+///
+/// If the query has a predicate like `WHERE date = '2024-06-01'`
+/// only the corresponding directory will be read.
+///
+/// `ListingTable` also supports limit, filter and projection pushdown for formats that
+/// support it as such as Parquet.
+///
+/// # See Also
+///
+/// 1. [`ListingTableConfig`]: Configuration options
+/// 1. [`DataSourceExec`]: `ExecutionPlan` used by `ListingTable`
+///
+/// [`DataSourceExec`]: crate::datasource::source::DataSourceExec
+///
+/// # Example: Read a directory of parquet files using a [`ListingTable`]
+///
+/// ```no_run
+/// # use datafusion::prelude::SessionContext;
+/// # use datafusion::error::Result;
+/// # use std::sync::Arc;
+/// # use datafusion::datasource::{
+/// #   listing::{
+/// #      ListingOptions, ListingTable, ListingTableConfig, ListingTableUrl,
+/// #   },
+/// #   file_format::parquet::ParquetFormat,
+/// # };
+/// # #[tokio::main]
+/// # async fn main() -> Result<()> {
+/// let ctx = SessionContext::new();
+/// let session_state = ctx.state();
+/// let table_path = "/path/to/parquet";
+///
+/// // Parse the path
+/// let table_path = ListingTableUrl::parse(table_path)?;
+///
+/// // Create default parquet options
+/// let file_format = ParquetFormat::new();
+/// let listing_options = ListingOptions::new(Arc::new(file_format))
+///   .with_file_extension(".parquet");
+///
+/// // Resolve the schema
+/// let resolved_schema = listing_options
+///    .infer_schema(&session_state, &table_path)
+///    .await?;
+///
+/// let config = ListingTableConfig::new(table_path)
+///   .with_listing_options(listing_options)
+///   .with_schema(resolved_schema);
+///
+/// // Create a new TableProvider
+/// let provider = Arc::new(ListingTable::try_new(config)?);
+///
+/// // This provider can now be read as a dataframe:
+/// let df = ctx.read_table(provider.clone());
+///
+/// // or registered as a named table:
+/// ctx.register_table("my_table", provider);
+///
+/// # Ok(())
+/// # }
+/// ```
+#[derive(Debug, Clone)]
+pub struct ListingTable {
+    table_paths: Vec<ListingTableUrl>,
+    /// `file_schema` contains only the columns physically stored in the data files themselves.
+    ///     - Represents the actual fields found in files like Parquet, CSV, etc.
+    ///     - Used when reading the raw data from files
+    file_schema: SchemaRef,
+    /// `table_schema` combines `file_schema` + partition columns
+    ///     - Partition columns are derived from directory paths (not stored in files)
+    ///     - These are columns like "year=2022/month=01" in paths like `/data/year=2022/month=01/file.parquet`
+    table_schema: SchemaRef,
+    /// Indicates how the schema was derived (inferred or explicitly specified)
+    schema_source: SchemaSource,
+    options: ListingOptions,
+    definition: Option<String>,
+    collected_statistics: FileStatisticsCache,
+    constraints: Constraints,
+    column_defaults: HashMap<String, Expr>,
+    /// Optional [`SchemaAdapterFactory`] for creating schema adapters
+    schema_adapter_factory: Option<Arc<dyn SchemaAdapterFactory>>,
+    /// Optional [`PhysicalExprAdapterFactory`] for creating physical expression adapters
+    expr_adapter_factory: Option<Arc<dyn PhysicalExprAdapterFactory>>,
+}
+
+impl ListingTable {
+    /// Create new [`ListingTable`]
+    ///
+    /// See documentation and example on [`ListingTable`] and [`ListingTableConfig`]
+    pub fn try_new(config: ListingTableConfig) -> Result<Self> {
+        // Extract schema_source before moving other parts of the config
+        let schema_source = config.schema_source();
+
+        let file_schema = config
+            .file_schema
+            .ok_or_else(|| DataFusionError::Internal("No schema provided.".into()))?;
+
+        let options = config.options.ok_or_else(|| {
+            DataFusionError::Internal("No ListingOptions provided".into())
+        })?;
+
+        // Add the partition columns to the file schema
+        let mut builder = SchemaBuilder::from(file_schema.as_ref().to_owned());
+        for (part_col_name, part_col_type) in &options.table_partition_cols {
+            builder.push(Field::new(part_col_name, part_col_type.clone(), false));
+        }
+
+        let table_schema = Arc::new(
+            builder
+                .finish()
+                .with_metadata(file_schema.metadata().clone()),
+        );
+
+        let table = Self {
+            table_paths: config.table_paths,
+            file_schema,
+            table_schema,
+            schema_source,
+            options,
+            definition: None,
+            collected_statistics: Arc::new(DefaultFileStatisticsCache::default()),
+            constraints: Constraints::default(),
+            column_defaults: HashMap::new(),
+            schema_adapter_factory: config.schema_adapter_factory,
+            expr_adapter_factory: config.expr_adapter_factory,
+        };
+
+        Ok(table)
+    }
+
+    /// Assign constraints
+    pub fn with_constraints(mut self, constraints: Constraints) -> Self {
+        self.constraints = constraints;
+        self
+    }
+
+    /// Assign column defaults
+    pub fn with_column_defaults(
+        mut self,
+        column_defaults: HashMap<String, Expr>,
+    ) -> Self {
+        self.column_defaults = column_defaults;
+        self
+    }
+
+    /// Set the [`FileStatisticsCache`] used to cache parquet file statistics.
+    ///
+    /// Setting a statistics cache on the `SessionContext` can avoid refetching statistics
+    /// multiple times in the same session.
+    ///
+    /// If `None`, creates a new [`DefaultFileStatisticsCache`] scoped to this query.
+    pub fn with_cache(mut self, cache: Option<FileStatisticsCache>) -> Self {
+        self.collected_statistics =
+            cache.unwrap_or_else(|| Arc::new(DefaultFileStatisticsCache::default()));
+        self
+    }
+
+    /// Specify the SQL definition for this table, if any
+    pub fn with_definition(mut self, definition: Option<String>) -> Self {
+        self.definition = definition;
+        self
+    }
+
+    /// Get paths ref
+    pub fn table_paths(&self) -> &Vec<ListingTableUrl> {
+        &self.table_paths
+    }
+
+    /// Get options ref
+    pub fn options(&self) -> &ListingOptions {
+        &self.options
+    }
+
+    /// Get the schema source
+    pub fn schema_source(&self) -> SchemaSource {
+        self.schema_source
+    }
+
+    /// Set the [`SchemaAdapterFactory`] for this [`ListingTable`]
+    ///
+    /// The schema adapter factory is used to create schema adapters that can
+    /// handle schema evolution and type conversions when reading files with
+    /// different schemas than the table schema.
+    ///
+    /// # Example: Adding Schema Evolution Support
+    /// ```rust
+    /// # use std::sync::Arc;
+    /// # use datafusion::datasource::listing::{ListingTable, ListingTableConfig, ListingOptions, ListingTableUrl};
+    /// # use datafusion::datasource::schema_adapter::{DefaultSchemaAdapterFactory, SchemaAdapter};
+    /// # use datafusion::datasource::file_format::parquet::ParquetFormat;
+    /// # use arrow::datatypes::{SchemaRef, Schema, Field, DataType};
+    /// # let table_path = ListingTableUrl::parse("file:///path/to/data").unwrap();
+    /// # let options = ListingOptions::new(Arc::new(ParquetFormat::default()));
+    /// # let schema = Arc::new(Schema::new(vec![Field::new("id", DataType::Int64, false)]));
+    /// # let config = ListingTableConfig::new(table_path).with_listing_options(options).with_schema(schema);
+    /// # let table = ListingTable::try_new(config).unwrap();
+    /// let table_with_evolution = table
+    ///     .with_schema_adapter_factory(Arc::new(DefaultSchemaAdapterFactory));
+    /// ```
+    /// See [`ListingTableConfig::with_schema_adapter_factory`] for an example of custom SchemaAdapterFactory.
+    pub fn with_schema_adapter_factory(
+        self,
+        schema_adapter_factory: Arc<dyn SchemaAdapterFactory>,
+    ) -> Self {
+        Self {
+            schema_adapter_factory: Some(schema_adapter_factory),
+            ..self
+        }
+    }
+
+    /// Get the [`SchemaAdapterFactory`] for this table
+    pub fn schema_adapter_factory(&self) -> Option<&Arc<dyn SchemaAdapterFactory>> {
+        self.schema_adapter_factory.as_ref()
+    }
+
+    /// Creates a schema adapter for mapping between file and table schemas
+    ///
+    /// Uses the configured schema adapter factory if available, otherwise falls back
+    /// to the default implementation.
+    fn create_schema_adapter(&self) -> Box<dyn SchemaAdapter> {
+        let table_schema = self.schema();
+        match &self.schema_adapter_factory {
+            Some(factory) => {
+                factory.create_with_projected_schema(Arc::clone(&table_schema))
+            }
+            None => DefaultSchemaAdapterFactory::from_schema(Arc::clone(&table_schema)),
+        }
+    }
+
+    /// Creates a file source and applies schema adapter factory if available
+    fn create_file_source_with_schema_adapter(&self) -> Result<Arc<dyn FileSource>> {
+        let mut source = self.options.format.file_source();
+        // Apply schema adapter to source if available
+        //
+        // The source will use this SchemaAdapter to adapt data batches as they flow up the plan.
+        // Note: ListingTable also creates a SchemaAdapter in `scan()` but that is only used to adapt collected statistics.
+        if let Some(factory) = &self.schema_adapter_factory {
+            source = source.with_schema_adapter_factory(Arc::clone(factory))?;
+        }
+        Ok(source)
+    }
+
+    /// If file_sort_order is specified, creates the appropriate physical expressions
+    fn try_create_output_ordering(&self) -> Result<Vec<LexOrdering>> {
+        create_ordering(&self.table_schema, &self.options.file_sort_order)
+    }
+
+    fn add_path_preserving_metadata(&self, file_groups: Vec<FileGroup>) -> Result<Vec<FileGroup>, DataFusionError> {
+        // First pass: calculate cumulative row bases
+        let mut cumulative_row_base = 0;
+        let mut file_row_bases: HashMap<String, i32> = HashMap::new();
+
+        // Process files in order to calculate cumulative row bases
+        for group in &file_groups {
+            for file in group.files() {
+                let location = file.object_meta.location.to_string();
+                let row_count = self.options.files_metadata.iter()
+                    .find(|meta| { location.contains(meta.object_meta.location.as_ref()) })
+                    .map(|meta| meta.row_group_row_counts().iter().sum::<i64>() as i32)
+                    // .unwrap_or_default();
+                    .expect(format!("Fail to get row count for file {}", location).as_str());
+
+                // Store current cumulative value as this file's row_base
+                file_row_bases.insert(location.to_string(), cumulative_row_base);
+                // Update cumulative count for next file
+                cumulative_row_base += row_count;
+            }
+        }
+
+        // Second pass: create new file groups with calculated row_bases
+        Ok(file_groups
+            .into_iter()
+            .map(|mut group| {
+                let new_files: Vec<PartitionedFile> = group
+                    .files()
+                    .iter()
+                    .map(|file| {
+                        let location = file.object_meta.location.as_ref();
+                        let row_base = *file_row_bases.get(location).unwrap_or(&0);
+
+                        PartitionedFile {
+                            object_meta: file.object_meta.clone(),
+                            partition_values: {
+                                let mut values = file.partition_values.clone();
+                                values.push(ScalarValue::Int32(Some(row_base)));
+                                values
+                            },
+                            range: file.range.clone(),
+                            statistics: file.statistics.clone(),
+                            extensions: file.extensions.clone(),
+                            metadata_size_hint: file.metadata_size_hint,
+                        }
+                    })
+                    .collect();
+
+                FileGroup::new(new_files)
+                    .with_statistics(Arc::new(group.statistics_mut().cloned().unwrap_or_default()))
+            })
+            .collect())
+    }
+}
+
+// Expressions can be used for parttion pruning if they can be evaluated using
+// only the partiton columns and there are partition columns.
+fn can_be_evaluted_for_partition_pruning(
+    partition_column_names: &[&str],
+    expr: &Expr,
+) -> bool {
+    !partition_column_names.is_empty()
+        && expr_applicable_for_cols(partition_column_names, expr)
+}
+
+#[async_trait]
+impl TableProvider for ListingTable {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+
+    fn schema(&self) -> SchemaRef {
+        Arc::clone(&self.table_schema)
+    }
+
+    fn constraints(&self) -> Option<&Constraints> {
+        Some(&self.constraints)
+    }
+
+    fn table_type(&self) -> TableType {
+        TableType::Base
+    }
+
+
+
+    async fn scan(
+        &self,
+        state: &dyn Session,
+        projection: Option<&Vec<usize>>,
+        filters: &[Expr],
+        limit: Option<usize>,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        // extract types of partition columns
+        let table_partition_cols = self
+            .options
+            .table_partition_cols
+            .iter()
+            .map(|col| Ok(self.table_schema.field_with_name(&col.0)?.clone()))
+            .collect::<Result<Vec<_>>>()?;
+
+        // let table_partition_col_names = table_partition_cols
+        //     .iter()
+        //     .map(|field| field.name().as_str())
+        //     .collect::<Vec<_>>();
+        // // If the filters can be resolved using only partition cols, there is no need to
+        // // pushdown it to TableScan, otherwise, `unhandled` pruning predicates will be generated
+        // let (partition_filters, filters): (Vec<_>, Vec<_>) =
+        //     filters.iter().cloned().partition(|filter| {
+        //         can_be_evaluted_for_partition_pruning(&table_partition_col_names, filter)
+        //     });
+
+        // We should not limit the number of partitioned files to scan if there are filters and limit
+        // at the same time. This is because the limit should be applied after the filters are applied.
+        let statistic_file_limit = if filters.is_empty() { limit } else { None };
+
+        let (mut partitioned_file_lists, statistics) = self
+            .list_files_for_scan(state, &vec![], statistic_file_limit)
+            .await?;
+        //
+        // let (mut partitioned_file_lists, statistics) = self
+        //     .list_files_for_scan(state, &partition_filters, statistic_file_limit)
+        //     .await?;
+
+        // if no files need to be read, return an `EmptyExec`
+        if partitioned_file_lists.is_empty() {
+            let projected_schema = project_schema(&self.schema(), projection)?;
+            return Ok(Arc::new(EmptyExec::new(projected_schema)));
+        }
+
+        partitioned_file_lists = self.add_path_preserving_metadata(partitioned_file_lists).expect("Unable to update Metadata for partitioned files");
+
+        let output_ordering = self.try_create_output_ordering()?;
+        match state
+            .config_options()
+            .execution
+            .split_file_groups_by_statistics
+            .then(|| {
+                output_ordering.first().map(|output_ordering| {
+                    FileScanConfig::split_groups_by_statistics_with_target_partitions(
+                        &self.table_schema,
+                        &partitioned_file_lists,
+                        output_ordering,
+                        self.options.target_partitions,
+                    )
+                })
+            })
+            .flatten()
+        {
+            Some(Err(e)) => log::debug!("failed to split file groups by statistics: {e}"),
+            Some(Ok(new_groups)) => {
+                if new_groups.len() <= self.options.target_partitions {
+                    partitioned_file_lists = new_groups;
+                } else {
+                    log::debug!("attempted to split file groups by statistics, but there were more file groups than target_partitions; falling back to unordered")
+                }
+            }
+            None => {} // no ordering required
+        };
+
+        let Some(object_store_url) =
+            self.table_paths.first().map(ListingTableUrl::object_store)
+        else {
+            return Ok(Arc::new(EmptyExec::new(Arc::new(Schema::empty()))));
+        };
+
+        let file_source = self.create_file_source_with_schema_adapter()?;
+
+        // create the execution plan
+        self.options
+            .format
+            .create_physical_plan(
+                state,
+                FileScanConfigBuilder::new(
+                    object_store_url,
+                    Arc::clone(&self.file_schema),
+                    file_source,
+                )
+                    .with_file_groups(partitioned_file_lists)
+                    .with_constraints(self.constraints.clone())
+                    .with_statistics(statistics)
+                    .with_projection(projection.cloned())
+                    .with_limit(limit)
+                    .with_output_ordering(output_ordering)
+                    .with_table_partition_cols(table_partition_cols)
+                    .with_expr_adapter(self.expr_adapter_factory.clone())
+                    .build(),
+            )
+            .await
+    }
+
+    fn supports_filters_pushdown(
+        &self,
+        filters: &[&Expr],
+    ) -> Result<Vec<TableProviderFilterPushDown>> {
+        let partition_column_names = self
+            .options
+            .table_partition_cols
+            .iter()
+            .map(|col| col.0.as_str())
+            .collect::<Vec<_>>();
+        filters
+            .iter()
+            .map(|filter| {
+                if can_be_evaluted_for_partition_pruning(&partition_column_names, filter)
+                {
+                    // if filter can be handled by partition pruning, it is exact
+                    return Ok(TableProviderFilterPushDown::Exact);
+                }
+
+                Ok(TableProviderFilterPushDown::Inexact)
+            })
+            .collect()
+    }
+
+    fn get_table_definition(&self) -> Option<&str> {
+        self.definition.as_deref()
+    }
+
+    async fn insert_into(
+        &self,
+        state: &dyn Session,
+        input: Arc<dyn ExecutionPlan>,
+        insert_op: InsertOp,
+    ) -> Result<Arc<dyn ExecutionPlan>> {
+        // Check that the schema of the plan matches the schema of this table.
+        self.schema()
+            .logically_equivalent_names_and_types(&input.schema())?;
+
+        let table_path = &self.table_paths()[0];
+        if !table_path.is_collection() {
+            return plan_err!(
+                "Inserting into a ListingTable backed by a single file is not supported, URL is possibly missing a trailing `/`. \
+                To append to an existing file use StreamTable, e.g. by using CREATE UNBOUNDED EXTERNAL TABLE"
+            );
+        }
+
+        // Get the object store for the table path.
+        let store = state.runtime_env().object_store(table_path)?;
+
+        let file_list_stream = pruned_partition_list(
+            state,
+            store.as_ref(),
+            table_path,
+            &[],
+            &self.options.file_extension,
+            &self.options.table_partition_cols,
+        )
+            .await?;
+
+        let file_group = file_list_stream.try_collect::<Vec<_>>().await?.into();
+        let keep_partition_by_columns =
+            state.config_options().execution.keep_partition_by_columns;
+
+        // Sink related option, apart from format
+        let config = FileSinkConfig {
+            original_url: String::default(),
+            object_store_url: self.table_paths()[0].object_store(),
+            table_paths: self.table_paths().clone(),
+            file_group,
+            output_schema: self.schema(),
+            table_partition_cols: self.options.table_partition_cols.clone(),
+            insert_op,
+            keep_partition_by_columns,
+            file_extension: self.options().format.get_ext(),
+        };
+
+        let orderings = self.try_create_output_ordering()?;
+        // It is sufficient to pass only one of the equivalent orderings:
+        let order_requirements = orderings.into_iter().next().map(Into::into);
+
+        self.options()
+            .format
+            .create_writer_physical_plan(input, state, config, order_requirements)
+            .await
+    }
+
+    fn get_column_default(&self, column: &str) -> Option<&Expr> {
+        self.column_defaults.get(column)
+    }
+}
+
+impl ListingTable {
+    /// Get the list of files for a scan as well as the file level statistics.
+    /// The list is grouped to let the execution plan know how the files should
+    /// be distributed to different threads / executors.
+    async fn list_files_for_scan<'a>(
+        &'a self,
+        ctx: &'a dyn Session,
+        filters: &'a [Expr],
+        limit: Option<usize>,
+    ) -> Result<(Vec<FileGroup>, Statistics)> {
+        let store = if let Some(url) = self.table_paths.first() {
+            ctx.runtime_env().object_store(url)?
+        } else {
+            return Ok((vec![], Statistics::new_unknown(&self.file_schema)));
+        };
+        // list files (with partitions)
+        let table_partition_cols:  Vec<(String, DataType)> = vec![]; // Passing empty partition cols as current partition cols are not mapped to directory path
+        let file_list = future::try_join_all(self.table_paths.iter().map(|table_path| {
+            pruned_partition_list(
+                ctx,
+                store.as_ref(),
+                table_path,
+                filters,
+                &self.options.file_extension,
+                &table_partition_cols,
+            )
+        }))
+            .await?;
+        let meta_fetch_concurrency =
+            ctx.config_options().execution.meta_fetch_concurrency;
+        let file_list = stream::iter(file_list).flatten_unordered(meta_fetch_concurrency);
+        // collect the statistics if required by the config
+        let files = file_list
+            .map(|part_file| async {
+                let part_file = part_file?;
+                let statistics = if self.options.collect_stat {
+                    self.do_collect_statistics(ctx, &store, &part_file).await?
+                } else {
+                    Arc::new(Statistics::new_unknown(&self.file_schema))
+                };
+                Ok(part_file.with_statistics(statistics))
+            })
+            .boxed()
+            .buffer_unordered(ctx.config_options().execution.meta_fetch_concurrency);
+
+        let (file_group, inexact_stats) =
+            get_files_with_limit(files, limit, self.options.collect_stat).await?;
+
+        let file_groups = file_group.split_files(self.options.target_partitions);
+        let (mut file_groups, mut stats) = compute_all_files_statistics(
+            file_groups,
+            self.schema(),
+            self.options.collect_stat,
+            inexact_stats,
+        )?;
+
+        let schema_adapter = self.create_schema_adapter();
+        let (schema_mapper, _) = schema_adapter.map_schema(self.file_schema.as_ref())?;
+
+        stats.column_statistics =
+            schema_mapper.map_column_statistics(&stats.column_statistics)?;
+        file_groups.iter_mut().try_for_each(|file_group| {
+            if let Some(stat) = file_group.statistics_mut() {
+                stat.column_statistics =
+                    schema_mapper.map_column_statistics(&stat.column_statistics)?;
+            }
+            Ok::<_, DataFusionError>(())
+        })?;
+        Ok((file_groups, stats))
+    }
+
+    /// Collects statistics for a given partitioned file.
+    ///
+    /// This method first checks if the statistics for the given file are already cached.
+    /// If they are, it returns the cached statistics.
+    /// If they are not, it infers the statistics from the file and stores them in the cache.
+    async fn do_collect_statistics(
+        &self,
+        ctx: &dyn Session,
+        store: &Arc<dyn ObjectStore>,
+        part_file: &PartitionedFile,
+    ) -> Result<Arc<Statistics>> {
+        match self
+            .collected_statistics
+            .get_with_extra(&part_file.object_meta.location, &part_file.object_meta)
+        {
+            Some(statistics) => Ok(statistics),
+            None => {
+                let statistics = self
+                    .options
+                    .format
+                    .infer_stats(
+                        ctx,
+                        store,
+                        Arc::clone(&self.file_schema),
+                        &part_file.object_meta,
+                    )
+                    .await?;
+                let statistics = Arc::new(statistics);
+                self.collected_statistics.put_with_extra(
+                    &part_file.object_meta.location,
+                    Arc::clone(&statistics),
+                    &part_file.object_meta,
+                );
+                Ok(statistics)
+            }
+        }
+    }
+}
+
+/// Processes a stream of partitioned files and returns a `FileGroup` containing the files.
+///
+/// This function collects files from the provided stream until either:
+/// 1. The stream is exhausted
+/// 2. The accumulated number of rows exceeds the provided `limit` (if specified)
+///
+/// # Arguments
+/// * `files` - A stream of `Result<PartitionedFile>` items to process
+/// * `limit` - An optional row count limit. If provided, the function will stop collecting files
+///   once the accumulated number of rows exceeds this limit
+/// * `collect_stats` - Whether to collect and accumulate statistics from the files
+///
+/// # Returns
+/// A `Result` containing a `FileGroup` with the collected files
+/// and a boolean indicating whether the statistics are inexact.
+///
+/// # Note
+/// The function will continue processing files if statistics are not available or if the
+/// limit is not provided. If `collect_stats` is false, statistics won't be accumulated
+/// but files will still be collected.
+async fn get_files_with_limit(
+    files: impl Stream<Item = Result<PartitionedFile>>,
+    limit: Option<usize>,
+    collect_stats: bool,
+) -> Result<(FileGroup, bool)> {
+    let mut file_group = FileGroup::default();
+    // Fusing the stream allows us to call next safely even once it is finished.
+    let mut all_files = Box::pin(files.fuse());
+    enum ProcessingState {
+        ReadingFiles,
+        ReachedLimit,
+    }
+
+    let mut state = ProcessingState::ReadingFiles;
+    let mut num_rows = Precision::Absent;
+
+    while let Some(file_result) = all_files.next().await {
+        // Early exit if we've already reached our limit
+        if matches!(state, ProcessingState::ReachedLimit) {
+            break;
+        }
+
+        let file = file_result?;
+
+        // Update file statistics regardless of state
+        if collect_stats {
+            if let Some(file_stats) = &file.statistics {
+                num_rows = if file_group.is_empty() {
+                    // For the first file, just take its row count
+                    file_stats.num_rows
+                } else {
+                    // For subsequent files, accumulate the counts
+                    num_rows.add(&file_stats.num_rows)
+                };
+            }
+        }
+
+        // Always add the file to our group
+        file_group.push(file);
+
+        // Check if we've hit the limit (if one was specified)
+        if let Some(limit) = limit {
+            if let Precision::Exact(row_count) = num_rows {
+                if row_count > limit {
+                    state = ProcessingState::ReachedLimit;
+                }
+            }
+        }
+    }
+    // If we still have files in the stream, it means that the limit kicked
+    // in, and the statistic could have been different had we processed the
+    // files in a different order.
+    let inexact_stats = all_files.next().await.is_some();
+    Ok((file_group, inexact_stats))
+}
+
diff --git a/plugins/engine-datafusion/jni/src/row_id_optimizer.rs b/plugins/engine-datafusion/jni/src/row_id_optimizer.rs
new file mode 100644
index 0000000000000..2e57f8d1b064a
--- /dev/null
+++ b/plugins/engine-datafusion/jni/src/row_id_optimizer.rs
@@ -0,0 +1,219 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+use std::fs;
+use std::sync::Arc;
+use datafusion::common::tree_node::{Transformed, TreeNode, TreeNodeRecursion};
+use datafusion::config::ConfigOptions;
+use datafusion::datasource::physical_plan::{FileScanConfig, FileScanConfigBuilder};
+use datafusion::datasource::source::DataSourceExec;
+use datafusion::error::DataFusionError;
+use datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
+use datafusion::physical_optimizer::PhysicalOptimizerRule;
+use datafusion::physical_plan::ExecutionPlan;
+use datafusion::physical_plan::filter::FilterExec;
+use arrow::datatypes::{DataType, Field, Fields, Schema};
+use arrow_schema::SchemaRef;
+use datafusion::logical_expr::Operator;
+use datafusion::physical_expr::PhysicalExpr;
+use datafusion::physical_expr::expressions::{BinaryExpr, Column};
+use datafusion::physical_plan::projection::ProjectionExec;
+
+#[derive(Debug)]
+pub struct FilterRowIdOptimizer;
+
+impl FilterRowIdOptimizer {
+
+    fn get_projection_exec_for_data_source_exec(&self, datasource_exec: &DataSourceExec, schema: SchemaRef) -> ProjectionExec {
+        let mut datasource = datasource_exec.data_source().as_ref().as_any().downcast_ref::<FileScanConfig>().expect("DataSource not found");
+        // let _ = datasource.projection.insert(vec![0]);
+        let mut new_projections = datasource.clone().projection.clone().unwrap();
+
+        let file_schema = ParquetRecordBatchReaderBuilder::try_new(fs::File::open("/".to_owned() + &datasource.file_groups[0].files()[0].path().to_string()).unwrap()).expect("FileSchema not found for file group");
+        new_projections.push(file_schema.schema().fields().len());
+
+        let mut fields = schema.fields().clone().to_vec();
+        fields.insert(fields.len(), Arc::new(Field::new("row_base", DataType::Int32, true)));
+        let new_schema = Arc::new(Schema { metadata: schema.metadata().clone(), fields: Fields::from(fields) });
+
+        let file_scan_config =
+            FileScanConfigBuilder::from(datasource.clone())
+                .with_source(datasource.clone().file_source.with_schema(new_schema.clone()))
+                .with_projection(Some(new_projections.clone()))
+                .build();
+
+        let new_datasource = DataSourceExec::from_data_source(file_scan_config);
+
+        // 3. Create ProjectionExec for sum operation
+        let mut projection_exprs: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![];
+
+        // Get indices from filter's schema
+        let row_id_idx = new_schema.index_of("___row_id").expect("Field ___row_id not found in FileSchema");
+        let row_base_idx = new_schema.index_of("row_base").expect("Field row_base not found in FileSchema");
+
+        // Create sum expression
+        let row_id_col = Arc::new(Column::new("___row_id", row_id_idx));
+        let row_base_col = Arc::new(Column::new("row_base", row_base_idx));
+        let sum_expr = Arc::new(BinaryExpr::new(
+            row_id_col,
+            Operator::Plus,
+            row_base_col,
+        ));
+
+        // IMP: order of projections matters, should be same as schema column order
+        // Add other columns at the end of list
+        for field in schema.fields() {
+            if field.name() != "___row_id" && field.name() != "row_base" {
+                let idx = new_schema.index_of(field.name()).unwrap();
+                projection_exprs.push((
+                    Arc::new(Column::new(field.name(), idx)),
+                    field.name().to_string(),
+                ));
+            } else {
+                // Add sum expression as ___row_id
+                projection_exprs.push((sum_expr.clone(), "___row_id".to_string()));
+            }
+        }
+
+        // Create final ProjectionExec
+        let projection = ProjectionExec::try_new(
+            projection_exprs,
+            new_datasource,
+        ).expect("Unable to create ProjectionExec");
+
+        projection
+    }
+
+    fn get_projection_exec_for_filter_exec(&self, datasource_exec: &DataSourceExec, filter: &FilterExec, schema: SchemaRef) -> ProjectionExec {
+        let datasource = datasource_exec.data_source().as_ref().as_any().downcast_ref::<FileScanConfig>().expect("DataSource not found");
+        // let _ = datasource.projection.insert(vec![0]);
+        let mut new_projections = datasource.clone().projection.clone().unwrap();
+        let file_schema = ParquetRecordBatchReaderBuilder::try_new(fs::File::open("/".to_owned() + &datasource.file_groups[0].files()[0].path().to_string()).unwrap()).expect("FileSchema not found for file group");
+
+        new_projections.push(file_schema.schema().fields().len());
+
+        let mut fields = schema.fields().clone().to_vec();
+        fields.insert(fields.len(), Arc::new(Field::new("row_base", DataType::Int32, true)));
+        let new_schema = Arc::new(Schema { metadata: schema.metadata().clone(), fields: Fields::from(fields) });
+
+        let file_scan_config = FileScanConfigBuilder::from(datasource.clone())
+            .with_source(datasource.clone().file_source.with_schema(new_schema.clone()))
+            .with_projection(Some(new_projections.clone()))
+            .build();
+
+        let new_datasource = DataSourceExec::from_data_source(file_scan_config);
+
+        // 2. Create new FilterExec with updated input schema
+        let new_filter = FilterExec::try_new(
+            filter.predicate().clone(),
+            new_datasource.clone(),
+        ).expect("Unable to create FilterExec");
+        // 3. Create ProjectionExec for sum operation
+        let mut projection_exprs: Vec<(Arc<dyn PhysicalExpr>, String)> = vec![];
+
+        // Get indices from filter's schema
+        let row_id_idx = new_schema.index_of("___row_id").expect("Field ___row_id not found in FileSchema");
+        let row_base_idx = new_schema.index_of("row_base").expect("Field row_base not found in FileSchema");
+
+        // Create sum expression
+        let row_id_col = Arc::new(Column::new("___row_id", row_id_idx));
+        let row_base_col = Arc::new(Column::new("row_base", row_base_idx));
+        let sum_expr = Arc::new(BinaryExpr::new(
+            row_id_col,
+            Operator::Plus,
+            row_base_col,
+        ));
+
+        // IMP: order of projections matters, should be same as schema column order
+        // Add other columns at the end of list
+        for field in schema.fields() {
+            if field.name() != "___row_id" && field.name() != "row_base" {
+                let idx = new_schema.index_of(field.name()).unwrap();
+                projection_exprs.push((
+                    Arc::new(Column::new(field.name(), idx)),
+                    field.name().to_string(),
+                ));
+            } else {
+                // Add sum expression as ___row_id
+                projection_exprs.push((sum_expr.clone(), "___row_id".to_string()));
+            }
+        }
+        // println!("projection_exprs :{:?}", projection_exprs);
+
+        // Create final ProjectionExec
+        let projection = ProjectionExec::try_new(
+            projection_exprs,
+            Arc::new(new_filter),
+        ).expect("Unable to create ProjectionExec");
+
+        projection
+    }
+}
+
+impl PhysicalOptimizerRule for FilterRowIdOptimizer {
+    fn optimize(
+        &self,
+        plan: Arc<dyn ExecutionPlan>,
+        _config: &ConfigOptions,
+    ) -> Result<Arc<dyn ExecutionPlan>, DataFusionError> {
+        let mut is_optimized = false;
+        let rewritten = plan.transform_up(|node| {
+            if let Some(filter_exec) = node.as_any().downcast_ref::<FilterExec>() {
+                // Check if input is DataSourceExec
+                if let Some(datasource_exec) = filter_exec.input().as_any().downcast_ref::<DataSourceExec>() {
+                    if !filter_exec.predicate().to_string().contains("___row_id") {
+                        // Check if ___row_id is present
+                        let schema = datasource_exec.schema();
+                        let has_row_id = schema.field_with_name("___row_id").is_ok();
+
+                        if has_row_id {
+                            let projection = self.get_projection_exec_for_filter_exec(datasource_exec, filter_exec, schema);
+                            // println!("projection :{:?}", projection);
+                            is_optimized = true;
+                            return Ok(Transformed::new(Arc::new(projection), true, TreeNodeRecursion::Continue));
+                        }
+                    } else {
+                        if(!is_optimized) {
+
+                            let schema = datasource_exec.schema();
+                            let has_row_id = schema.field_with_name("___row_id").is_ok();
+
+                            if has_row_id {
+                                let projection = self.get_projection_exec_for_data_source_exec(datasource_exec, schema);
+                                is_optimized = true;
+                                return Ok(Transformed::new(Arc::new(projection), true, TreeNodeRecursion::Continue));
+                            }
+                        }
+                    }
+                }
+            } else if let Some(datasource_exec) = node.as_any().downcast_ref::<DataSourceExec>() {
+                if(!is_optimized) {
+
+                    let schema = datasource_exec.schema();
+                    let has_row_id = schema.field_with_name("___row_id").is_ok();
+
+                    if has_row_id {
+                        let projection = self.get_projection_exec_for_data_source_exec(datasource_exec, schema);
+                        is_optimized = true;
+                        return Ok(Transformed::new(Arc::new(projection), true, TreeNodeRecursion::Continue));
+                    }
+                }
+            }
+            Ok(Transformed::no(node))
+        })?;
+
+        Ok(rewritten.data)
+    }
+
+    fn name(&self) -> &str {
+        "filter_row_id_optimizer"
+    }
+
+    fn schema_check(&self) -> bool {
+        true
+    }
+}
diff --git a/plugins/engine-datafusion/jni/src/util.rs b/plugins/engine-datafusion/jni/src/util.rs
new file mode 100644
index 0000000000000..575e654680e8a
--- /dev/null
+++ b/plugins/engine-datafusion/jni/src/util.rs
@@ -0,0 +1,210 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+use anyhow::Result;
+use chrono::{DateTime, Utc};
+use datafusion::arrow::array::RecordBatch;
+use jni::objects::{JObject, JObjectArray, JString};
+use jni::sys::jlong;
+use jni::JNIEnv;
+use object_store::{path::Path as ObjectPath, ObjectMeta};
+use std::collections::HashMap;
+use std::error::Error;
+use std::fs;
+use datafusion::error::DataFusionError;
+use datafusion::parquet::arrow::arrow_reader::ParquetRecordBatchReaderBuilder;
+use crate::FileMetadata;
+
+/// Set error message from a result using a Consumer<String> Java callback
+pub fn set_error_message_batch<Err: Error>(env: &mut JNIEnv, callback: JObject, result: Result<Vec<RecordBatch>, Err>) {
+    if result.is_err() {
+        set_error_message(env, callback, Result::Err(result.unwrap_err()));
+    } else {
+        let res : Result<(), Err> = Result::Ok(());
+        set_error_message(env, callback, res);
+    }
+
+}
+
+pub fn set_error_message<Err: Error>(env: &mut JNIEnv, callback: JObject, result: Result<(), Err>) {
+    match result {
+        Ok(_) => {
+            let err_message = JObject::null();
+            env.call_method(
+                callback,
+                "accept",
+                "(Ljava/lang/Object;)V",
+                &[(&err_message).into()],
+            )
+                .expect("Failed to call error handler with null message");
+        }
+        Err(err) => {
+            let err_message = env
+                .new_string(err.to_string())
+                .expect("Couldn't create java string for error message");
+            env.call_method(
+                callback,
+                "accept",
+                "(Ljava/lang/Object;)V",
+                &[(&err_message).into()],
+            )
+                .expect("Failed to call error handler with error message");
+        }
+    };
+}
+
+/// Call an ObjectResultCallback to return either a pointer to a newly created object or an error message
+pub fn set_object_result<T, Err: Error>(
+    env: &mut JNIEnv,
+    callback: JObject,
+    address: Result<*mut T, Err>,
+) {
+    match address {
+        Ok(address) => set_object_result_ok(env, callback, address),
+        Err(err) => set_object_result_error(env, callback, &err),
+    };
+}
+
+/// Set success result by calling an ObjectResultCallback
+pub fn set_object_result_ok<T>(env: &mut JNIEnv, callback: JObject, address: *mut T) {
+    let err_message = JObject::null();
+    env.call_method(
+        callback,
+        "callback",
+        "(Ljava/lang/String;J)V",
+        &[(&err_message).into(), (address as jlong).into()],
+    )
+        .expect("Failed to call object result callback with address");
+}
+
+/// Set error result by calling an ObjectResultCallback
+pub fn set_object_result_error<T: Error>(env: &mut JNIEnv, callback: JObject, error: &T) {
+    let err_message = env
+        .new_string(error.to_string())
+        .expect("Couldn't create java string for error message");
+    let address = -1 as jlong;
+    env.call_method(
+        callback,
+        "callback",
+        "(Ljava/lang/String;J)V",
+        &[(&err_message).into(), address.into()],
+    )
+        .expect("Failed to call object result callback with error");
+}
+
+
+/// Parse a string map from JNI arrays
+pub fn parse_string_map(
+    env: &mut JNIEnv,
+    keys: JObjectArray,
+    values: JObjectArray,
+) -> Result<HashMap<String, String>> {
+    let mut map = HashMap::new();
+
+    let keys_len = env.get_array_length(&keys)?;
+    let values_len = env.get_array_length(&values)?;
+
+    if keys_len != values_len {
+        return Err(anyhow::anyhow!("Keys and values arrays must have the same length"));
+    }
+
+    for i in 0..keys_len {
+        let key_obj = env.get_object_array_element(&keys, i)?;
+        let value_obj = env.get_object_array_element(&values, i)?;
+
+        let key_jstring = JString::from(key_obj);
+        let value_jstring = JString::from(value_obj);
+
+        let key_str = env.get_string(&key_jstring)?;
+        let value_str = env.get_string(&value_jstring)?;
+
+        map.insert(key_str.to_string_lossy().to_string(), value_str.to_string_lossy().to_string());
+    }
+
+    Ok(map)
+}
+
+// Parse a string map from JNI arrays
+pub fn parse_string_arr(
+    env: &mut JNIEnv,
+    files: JObjectArray,
+) -> Result<Vec<String>> {
+    let length = env.get_array_length(&files).unwrap();
+    let mut rust_strings: Vec<String> = Vec::with_capacity(length as usize);
+    for i in 0..length {
+        let file_obj = env.get_object_array_element(&files, i).unwrap();
+        let jstring = JString::from(file_obj);
+        let rust_str: String = env
+            .get_string(&jstring)
+            .expect("Couldn't get java string!")
+            .into();
+        rust_strings.push(rust_str);
+    }
+    Ok(rust_strings)
+}
+
+pub fn parse_string(
+    env: &mut JNIEnv,
+    file: JString
+) -> Result<String> {
+    let rust_str: String = env.get_string(&file)
+        .expect("Couldn't get java string")
+        .into();
+
+    Ok(rust_str)
+}
+
+/// Throw a Java exception
+pub fn throw_exception(env: &mut JNIEnv, message: &str) {
+    let _ = env.throw_new("java/lang/RuntimeException", message);
+}
+
+pub fn create_file_metadata_from_filenames(base_path: &str, filenames: Vec<String>) -> Result<Vec<FileMetadata>, DataFusionError> {
+    let mut row_base: i64 =0;
+    filenames.into_iter().map(|filename| {
+        let filename = filename.as_str();
+
+        // Handle both full paths and relative filenames
+        let full_path = if filename.starts_with('/') || filename.contains(base_path) {
+            // Already a full path
+            filename.to_string()
+        } else {
+            // Just a filename, needs base_path
+            format!("{}/{}", base_path.trim_end_matches('/'), filename)
+        };
+
+        let file_size = fs::metadata(&full_path).map(|m| m.len()).unwrap_or(0);
+        let file_result = fs::File::open(&full_path.clone());
+        if(file_result.is_err()) {
+            return Err(DataFusionError::Execution(format!("{} {}", file_result.unwrap_err().to_string(), full_path)))
+        }
+        let file = file_result.unwrap();
+        let parquet_metadata = ParquetRecordBatchReaderBuilder::try_new(file).unwrap();
+        let row_group_row_counts: Vec<i64> = parquet_metadata.metadata().row_groups()
+            .iter()
+            .map(|row_group| row_group.num_rows())
+            .collect();
+
+
+        let modified = fs::metadata(&full_path)
+            .and_then(|m| m.modified())
+            .map(|t| DateTime::<Utc>::from(t))
+            .unwrap_or_else(|_| Utc::now());
+
+        let file_meta = FileMetadata::new(
+            row_group_row_counts.clone(),
+            row_base,
+            ObjectMeta {
+                location: ObjectPath::from(full_path),
+                last_modified: modified,
+                size: file_size,
+                e_tag: None,
+                version: None,
+            }
+        );
+        //TODO: ensure ordering of files
+        row_base += row_group_row_counts.iter().sum::<i64>();
+        Ok(file_meta)
+    }).collect()
+}
diff --git a/plugins/engine-datafusion/licenses/arrow-LICENSE.txt b/plugins/engine-datafusion/licenses/arrow-LICENSE.txt
new file mode 100644
index 0000000000000..7bb1330a1002b
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/arrow-LICENSE.txt
@@ -0,0 +1,2261 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+--------------------------------------------------------------------------------
+
+src/arrow/util (some portions): Apache 2.0, and 3-clause BSD
+
+Some portions of this module are derived from code in the Chromium project,
+copyright (c) Google inc and (c) The Chromium Authors and licensed under the
+Apache 2.0 License or the under the 3-clause BSD license:
+
+  Copyright (c) 2013 The Chromium Authors. All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions are
+  met:
+
+     * Redistributions of source code must retain the above copyright
+  notice, this list of conditions and the following disclaimer.
+     * Redistributions in binary form must reproduce the above
+  copyright notice, this list of conditions and the following disclaimer
+  in the documentation and/or other materials provided with the
+  distribution.
+     * Neither the name of Google Inc. nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+This project includes code from Daniel Lemire's FrameOfReference project.
+
+https://github.com/lemire/FrameOfReference/blob/6ccaf9e97160f9a3b299e23a8ef739e711ef0c71/src/bpacking.cpp
+https://github.com/lemire/FrameOfReference/blob/146948b6058a976bc7767262ad3a2ce201486b93/scripts/turbopacking64.py
+
+Copyright: 2013 Daniel Lemire
+Home page: http://lemire.me/en/
+Project page: https://github.com/lemire/FrameOfReference
+License: Apache License Version 2.0 http://www.apache.org/licenses/LICENSE-2.0
+
+--------------------------------------------------------------------------------
+
+This project includes code from the TensorFlow project
+
+Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+--------------------------------------------------------------------------------
+
+This project includes code from the NumPy project.
+
+https://github.com/numpy/numpy/blob/e1f191c46f2eebd6cb892a4bfe14d9dd43a06c4e/numpy/core/src/multiarray/multiarraymodule.c#L2910
+
+https://github.com/numpy/numpy/blob/68fd82271b9ea5a9e50d4e761061dfcca851382a/numpy/core/src/multiarray/datetime.c
+
+Copyright (c) 2005-2017, NumPy Developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+       copyright notice, this list of conditions and the following
+       disclaimer in the documentation and/or other materials provided
+       with the distribution.
+
+    * Neither the name of the NumPy Developers nor the names of any
+       contributors may be used to endorse or promote products derived
+       from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+This project includes code from the Boost project
+
+Boost Software License - Version 1.0 - August 17th, 2003
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+This project includes code from the FlatBuffers project
+
+Copyright 2014 Google Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+--------------------------------------------------------------------------------
+
+This project includes code from the tslib project
+
+Copyright 2015 Microsoft Corporation. All rights reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+--------------------------------------------------------------------------------
+
+This project includes code from the jemalloc project
+
+https://github.com/jemalloc/jemalloc
+
+Copyright (C) 2002-2017 Jason Evans <jasone@canonware.com>.
+All rights reserved.
+Copyright (C) 2007-2012 Mozilla Foundation.  All rights reserved.
+Copyright (C) 2009-2017 Facebook, Inc.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+1. Redistributions of source code must retain the above copyright notice(s),
+   this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright notice(s),
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY EXPRESS
+OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
+EVENT SHALL THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+--------------------------------------------------------------------------------
+
+This project includes code from the Go project, BSD 3-clause license + PATENTS
+weak patent termination clause
+(https://github.com/golang/go/blob/master/PATENTS).
+
+Copyright (c) 2009 The Go Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+   * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+   * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+   * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+This project includes code from the hs2client
+
+https://github.com/cloudera/hs2client
+
+Copyright 2016 Cloudera Inc.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+--------------------------------------------------------------------------------
+
+The script ci/scripts/util_wait_for_it.sh has the following license
+
+Copyright (c) 2016 Giles Hall
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+The script r/configure has the following license (MIT)
+
+Copyright (c) 2017, Jeroen Ooms and Jim Hester
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the "Software"), to deal in
+the Software without restriction, including without limitation the rights to
+use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
+of the Software, and to permit persons to whom the Software is furnished to do
+so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+cpp/src/arrow/util/logging.cc, cpp/src/arrow/util/logging.h and
+cpp/src/arrow/util/logging-test.cc are adapted from
+Ray Project (https://github.com/ray-project/ray) (Apache 2.0).
+
+Copyright (c) 2016 Ray Project (https://github.com/ray-project/ray)
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+--------------------------------------------------------------------------------
+The files cpp/src/arrow/vendored/datetime/date.h, cpp/src/arrow/vendored/datetime/tz.h,
+cpp/src/arrow/vendored/datetime/tz_private.h, cpp/src/arrow/vendored/datetime/ios.h,
+cpp/src/arrow/vendored/datetime/ios.mm,
+cpp/src/arrow/vendored/datetime/tz.cpp are adapted from
+Howard Hinnant's date library (https://github.com/HowardHinnant/date)
+It is licensed under MIT license.
+
+The MIT License (MIT)
+Copyright (c) 2015, 2016, 2017 Howard Hinnant
+Copyright (c) 2016 Adrian Colomitchi
+Copyright (c) 2017 Florian Dang
+Copyright (c) 2017 Paul Thompson
+Copyright (c) 2018 Tomasz Kamiński
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+The file cpp/src/arrow/util/utf8.h includes code adapted from the page
+  https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+with the following license (MIT)
+
+Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+The files in cpp/src/arrow/vendored/xxhash/ have the following license
+(BSD 2-Clause License)
+
+xxHash Library
+Copyright (c) 2012-2014, Yann Collet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+You can contact the author at :
+- xxHash homepage: http://www.xxhash.com
+- xxHash source repository : https://github.com/Cyan4973/xxHash
+
+--------------------------------------------------------------------------------
+
+The files in cpp/src/arrow/vendored/double-conversion/ have the following license
+(BSD 3-Clause License)
+
+Copyright 2006-2011, the V8 project authors. All rights reserved.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of Google Inc. nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+The files in cpp/src/arrow/vendored/uriparser/ have the following license
+(BSD 3-Clause License)
+
+uriparser - RFC 3986 URI parsing library
+
+Copyright (C) 2007, Weijia Song <songweijia@gmail.com>
+Copyright (C) 2007, Sebastian Pipping <sebastian@pipping.org>
+All rights reserved.
+
+Redistribution  and use in source and binary forms, with or without
+modification,  are permitted provided that the following conditions
+are met:
+
+    * Redistributions   of  source  code  must  retain  the   above
+      copyright  notice, this list of conditions and the  following
+      disclaimer.
+
+    * Redistributions  in  binary  form must  reproduce  the  above
+      copyright  notice, this list of conditions and the  following
+      disclaimer   in  the  documentation  and/or  other  materials
+      provided with the distribution.
+
+    * Neither  the name of the <ORGANIZATION> nor the names of  its
+      contributors  may  be  used to endorse  or  promote  products
+      derived  from  this software without specific  prior  written
+      permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS  IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT  NOT
+LIMITED  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND  FITNESS
+FOR  A  PARTICULAR  PURPOSE ARE DISCLAIMED. IN NO EVENT  SHALL  THE
+COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL,    SPECIAL,   EXEMPLARY,   OR   CONSEQUENTIAL   DAMAGES
+(INCLUDING,  BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES;  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+STRICT  LIABILITY,  OR  TORT (INCLUDING  NEGLIGENCE  OR  OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+The files under dev/tasks/conda-recipes have the following license
+
+BSD 3-clause license
+Copyright (c) 2015-2018, conda-forge
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its contributors
+   may be used to endorse or promote products derived from this software without
+   specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
+TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+The files in cpp/src/arrow/vendored/utfcpp/ have the following license
+
+Copyright 2006-2018 Nemanja Trifunovic
+
+Permission is hereby granted, free of charge, to any person or organization
+obtaining a copy of the software and accompanying documentation covered by
+this license (the "Software") to use, reproduce, display, distribute,
+execute, and transmit the Software, and to prepare derivative works of the
+Software, and to permit third-parties to whom the Software is furnished to
+do so, all subject to the following:
+
+The copyright notices in the Software and this entire statement, including
+the above license grant, this restriction and the following disclaimer,
+must be included in all copies of the Software, in whole or in part, and
+all derivative works of the Software, unless such copies or derivative
+works are solely in the form of machine-executable object code generated by
+a source language processor.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
+SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
+FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
+ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+DEALINGS IN THE SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+This project includes code from Apache Kudu.
+
+ * cpp/cmake_modules/CompilerInfo.cmake is based on Kudu's cmake_modules/CompilerInfo.cmake
+
+Copyright: 2016 The Apache Software Foundation.
+Home page: https://kudu.apache.org/
+License: http://www.apache.org/licenses/LICENSE-2.0
+
+--------------------------------------------------------------------------------
+
+This project includes code from Apache Impala (incubating), formerly
+Impala. The Impala code and rights were donated to the ASF as part of the
+Incubator process after the initial code imports into Apache Parquet.
+
+Copyright: 2012 Cloudera, Inc.
+Copyright: 2016 The Apache Software Foundation.
+Home page: http://impala.apache.org/
+License: http://www.apache.org/licenses/LICENSE-2.0
+
+--------------------------------------------------------------------------------
+
+This project includes code from Apache Aurora.
+
+* dev/release/{release,changelog,release-candidate} are based on the scripts from
+  Apache Aurora
+
+Copyright: 2016 The Apache Software Foundation.
+Home page: https://aurora.apache.org/
+License: http://www.apache.org/licenses/LICENSE-2.0
+
+--------------------------------------------------------------------------------
+
+This project includes code from the Google styleguide.
+
+* cpp/build-support/cpplint.py is based on the scripts from the Google styleguide.
+
+Copyright: 2009 Google Inc. All rights reserved.
+Homepage: https://github.com/google/styleguide
+License: 3-clause BSD
+
+--------------------------------------------------------------------------------
+
+This project includes code from Snappy.
+
+* cpp/cmake_modules/{SnappyCMakeLists.txt,SnappyConfig.h} are based on code
+  from Google's Snappy project.
+
+Copyright: 2009 Google Inc. All rights reserved.
+Homepage: https://github.com/google/snappy
+License: 3-clause BSD
+
+--------------------------------------------------------------------------------
+
+This project includes code from the manylinux project.
+
+* python/manylinux1/scripts/{build_python.sh,python-tag-abi-tag.py,
+  requirements.txt} are based on code from the manylinux project.
+
+Copyright: 2016 manylinux
+Homepage: https://github.com/pypa/manylinux
+License: The MIT License (MIT)
+
+--------------------------------------------------------------------------------
+
+This project includes code from the cymove project:
+
+* python/pyarrow/includes/common.pxd includes code from the cymove project
+
+The MIT License (MIT)
+Copyright (c) 2019 Omer Ozarslan
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
+DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
+OR OTHER DEALINGS IN THE SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+The projects includes code from the Ursabot project under the dev/archery
+directory.
+
+License: BSD 2-Clause
+
+Copyright 2019 RStudio, Inc.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+This project include code from mingw-w64.
+
+* cpp/src/arrow/util/cpu-info.cc has a polyfill for mingw-w64 < 5
+
+Copyright (c) 2009 - 2013 by the mingw-w64 project
+Homepage: https://mingw-w64.org
+License: Zope Public License (ZPL) Version 2.1.
+
+---------------------------------------------------------------------------------
+
+This project include code from Google's Asylo project.
+
+* cpp/src/arrow/result.h is based on status_or.h
+
+Copyright (c)  Copyright 2017 Asylo authors
+Homepage: https://asylo.dev/
+License: Apache 2.0
+
+--------------------------------------------------------------------------------
+
+This project includes code from Google's protobuf project
+
+* cpp/src/arrow/result.h ARROW_ASSIGN_OR_RAISE is based off ASSIGN_OR_RETURN
+* cpp/src/arrow/util/bit_stream_utils.h contains code from wire_format_lite.h
+
+Copyright 2008 Google Inc.  All rights reserved.
+Homepage: https://developers.google.com/protocol-buffers/
+License:
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Code generated by the Protocol Buffer compiler is owned by the owner
+of the input file used when generating it.  This code is not
+standalone and requires a support library to be linked with it.  This
+support library is itself covered by the above license.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency LLVM is statically linked in certain binary distributions.
+Additionally some sections of source code have been derived from sources in LLVM
+and have been clearly labeled as such. LLVM has the following license:
+
+==============================================================================
+The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
+==============================================================================
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+    1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+    2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+    3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+    4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+    5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+    6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+    7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+    8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+    9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+    END OF TERMS AND CONDITIONS
+
+    APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+    Copyright [yyyy] [name of copyright owner]
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+
+---- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
+
+==============================================================================
+Software from third parties included in the LLVM Project:
+==============================================================================
+The LLVM Project contains third party software which is under different license
+terms. All such code will be identified clearly using at least one of two
+mechanisms:
+1) It will be in a separate directory tree with its own `LICENSE.txt` or
+   `LICENSE` file at the top containing the specific license and restrictions
+   which apply to that software, or
+2) It will contain specific license and restriction terms at the top of every
+   file.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency gRPC is statically linked in certain binary
+distributions, like the python wheels. gRPC has the following license:
+
+Copyright 2014 gRPC authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency Apache Thrift is statically linked in certain binary
+distributions, like the python wheels. Apache Thrift has the following license:
+
+Apache Thrift
+Copyright (C) 2006 - 2019, The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency Apache ORC is statically linked in certain binary
+distributions, like the python wheels. Apache ORC has the following license:
+
+Apache ORC
+Copyright 2013-2019 The Apache Software Foundation
+
+This product includes software developed by The Apache Software
+Foundation (http://www.apache.org/).
+
+This product includes software developed by Hewlett-Packard:
+(c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency zstd is statically linked in certain binary
+distributions, like the python wheels. ZSTD has the following license:
+
+BSD License
+
+For Zstandard software
+
+Copyright (c) 2016-present, Facebook, Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice, this
+   list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright notice,
+   this list of conditions and the following disclaimer in the documentation
+   and/or other materials provided with the distribution.
+
+ * Neither the name Facebook nor the names of its contributors may be used to
+   endorse or promote products derived from this software without specific
+   prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency lz4 is statically linked in certain binary
+distributions, like the python wheels. lz4 has the following license:
+
+LZ4 Library
+Copyright (c) 2011-2016, Yann Collet
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without modification,
+are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice, this
+  list of conditions and the following disclaimer in the documentation and/or
+  other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
+ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
+ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency Brotli is statically linked in certain binary
+distributions, like the python wheels. Brotli has the following license:
+
+Copyright (c) 2009, 2010, 2013-2016 by the Brotli Authors.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency rapidjson is statically linked in certain binary
+distributions, like the python wheels. rapidjson and its dependencies have the
+following licenses:
+
+Tencent is pleased to support the open source community by making RapidJSON
+available.
+
+Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
+All rights reserved.
+
+If you have downloaded a copy of the RapidJSON binary from Tencent, please note
+that the RapidJSON binary is licensed under the MIT License.
+If you have downloaded a copy of the RapidJSON source code from Tencent, please
+note that RapidJSON source code is licensed under the MIT License, except for
+the third-party components listed below which are subject to different license
+terms.  Your integration of RapidJSON into your own projects may require
+compliance with the MIT License, as well as the other licenses applicable to
+the third-party components included within RapidJSON. To avoid the problematic
+JSON license in your own projects, it's sufficient to exclude the
+bin/jsonchecker/ directory, as it's the only code under the JSON license.
+A copy of the MIT License is included in this file.
+
+Other dependencies and licenses:
+
+    Open Source Software Licensed Under the BSD License:
+    --------------------------------------------------------------------
+
+    The msinttypes r29
+    Copyright (c) 2006-2013 Alexander Chemeris
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright notice,
+    this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+    * Neither the name of  copyright holder nor the names of its contributors
+    may be used to endorse or promote products derived from this software
+    without specific prior written permission.
+
+    THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+    EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR
+    ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+    DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+    CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+    LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+    OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
+    DAMAGE.
+
+    Terms of the MIT License:
+    --------------------------------------------------------------------
+
+    Permission is hereby granted, free of charge, to any person obtaining a
+    copy of this software and associated documentation files (the "Software"),
+    to deal in the Software without restriction, including without limitation
+    the rights to use, copy, modify, merge, publish, distribute, sublicense,
+    and/or sell copies of the Software, and to permit persons to whom the
+    Software is furnished to do so, subject to the following conditions:
+
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS IN THE SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency snappy is statically linked in certain binary
+distributions, like the python wheels. snappy has the following license:
+
+Copyright 2011, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright notice,
+      this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright notice,
+      this list of conditions and the following disclaimer in the documentation
+      and/or other materials provided with the distribution.
+    * Neither the name of Google Inc. nor the names of its contributors may be
+      used to endorse or promote products derived from this software without
+      specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+===
+
+Some of the benchmark data in testdata/ is licensed differently:
+
+ - fireworks.jpeg is Copyright 2013 Steinar H. Gunderson, and
+   is licensed under the Creative Commons Attribution 3.0 license
+   (CC-BY-3.0). See https://creativecommons.org/licenses/by/3.0/
+   for more information.
+
+ - kppkn.gtb is taken from the Gaviota chess tablebase set, and
+   is licensed under the MIT License. See
+   https://sites.google.com/site/gaviotachessengine/Home/endgame-tablebases-1
+   for more information.
+
+ - paper-100k.pdf is an excerpt (bytes 92160 to 194560) from the paper
+   “Combinatorial Modeling of Chromatin Features Quantitatively Predicts DNA
+   Replication Timing in _Drosophila_” by Federico Comoglio and Renato Paro,
+   which is licensed under the CC-BY license. See
+   http://www.ploscompbiol.org/static/license for more ifnormation.
+
+ - alice29.txt, asyoulik.txt, plrabn12.txt and lcet10.txt are from Project
+   Gutenberg. The first three have expired copyrights and are in the public
+   domain; the latter does not have expired copyright, but is still in the
+   public domain according to the license information
+   (http://www.gutenberg.org/ebooks/53).
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency gflags is statically linked in certain binary
+distributions, like the python wheels. gflags has the following license:
+
+Copyright (c) 2006, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency glog is statically linked in certain binary
+distributions, like the python wheels. glog has the following license:
+
+Copyright (c) 2008, Google Inc.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+    * Neither the name of Google Inc. nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+A function gettimeofday in utilities.cc is based on
+
+http://www.google.com/codesearch/p?hl=en#dR3YEbitojA/COPYING&q=GetSystemTimeAsFileTime%20license:bsd
+
+The license of this code is:
+
+Copyright (c) 2003-2008, Jouni Malinen <j@w1.fi> and contributors
+All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1. Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+3. Neither the name(s) of the above-listed copyright holder(s) nor the
+   names of its contributors may be used to endorse or promote products
+   derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency re2 is statically linked in certain binary
+distributions, like the python wheels. re2 has the following license:
+
+Copyright (c) 2009 The RE2 Authors. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above
+      copyright notice, this list of conditions and the following
+      disclaimer in the documentation and/or other materials provided
+      with the distribution.
+    * Neither the name of Google Inc. nor the names of its contributors
+      may be used to endorse or promote products derived from this
+      software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency c-ares is statically linked in certain binary
+distributions, like the python wheels. c-ares has the following license:
+
+# c-ares license
+
+Copyright (c) 2007 - 2018, Daniel Stenberg with many contributors, see AUTHORS
+file.
+
+Copyright 1998 by the Massachusetts Institute of Technology.
+
+Permission to use, copy, modify, and distribute this software and its
+documentation for any purpose and without fee is hereby granted, provided that
+the above copyright notice appear in all copies and that both that copyright
+notice and this permission notice appear in supporting documentation, and that
+the name of M.I.T. not be used in advertising or publicity pertaining to
+distribution of the software without specific, written prior permission.
+M.I.T. makes no representations about the suitability of this software for any
+purpose.  It is provided "as is" without express or implied warranty.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency zlib is redistributed as a dynamically linked shared
+library in certain binary distributions, like the python wheels. In the future
+this will likely change to static linkage. zlib has the following license:
+
+zlib.h -- interface of the 'zlib' general purpose compression library
+  version 1.2.11, January 15th, 2017
+
+  Copyright (C) 1995-2017 Jean-loup Gailly and Mark Adler
+
+  This software is provided 'as-is', without any express or implied
+  warranty.  In no event will the authors be held liable for any damages
+  arising from the use of this software.
+
+  Permission is granted to anyone to use this software for any purpose,
+  including commercial applications, and to alter it and redistribute it
+  freely, subject to the following restrictions:
+
+  1. The origin of this software must not be misrepresented; you must not
+     claim that you wrote the original software. If you use this software
+     in a product, an acknowledgment in the product documentation would be
+     appreciated but is not required.
+  2. Altered source versions must be plainly marked as such, and must not be
+     misrepresented as being the original software.
+  3. This notice may not be removed or altered from any source distribution.
+
+  Jean-loup Gailly        Mark Adler
+  jloup@gzip.org          madler@alumni.caltech.edu
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency openssl is redistributed as a dynamically linked shared
+library in certain binary distributions, like the python wheels. openssl
+preceding version 3 has the following license:
+
+  LICENSE ISSUES
+  ==============
+
+  The OpenSSL toolkit stays under a double license, i.e. both the conditions of
+  the OpenSSL License and the original SSLeay license apply to the toolkit.
+  See below for the actual license texts.
+
+  OpenSSL License
+  ---------------
+
+/* ====================================================================
+ * Copyright (c) 1998-2019 The OpenSSL Project.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *
+ * 3. All advertising materials mentioning features or use of this
+ *    software must display the following acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
+ *
+ * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
+ *    endorse or promote products derived from this software without
+ *    prior written permission. For written permission, please contact
+ *    openssl-core@openssl.org.
+ *
+ * 5. Products derived from this software may not be called "OpenSSL"
+ *    nor may "OpenSSL" appear in their names without prior written
+ *    permission of the OpenSSL Project.
+ *
+ * 6. Redistributions of any form whatsoever must retain the following
+ *    acknowledgment:
+ *    "This product includes software developed by the OpenSSL Project
+ *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
+ * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+ * OF THE POSSIBILITY OF SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This product includes cryptographic software written by Eric Young
+ * (eay@cryptsoft.com).  This product includes software written by Tim
+ * Hudson (tjh@cryptsoft.com).
+ *
+ */
+
+ Original SSLeay License
+ -----------------------
+
+/* Copyright (C) 1995-1998 Eric Young (eay@cryptsoft.com)
+ * All rights reserved.
+ *
+ * This package is an SSL implementation written
+ * by Eric Young (eay@cryptsoft.com).
+ * The implementation was written so as to conform with Netscapes SSL.
+ *
+ * This library is free for commercial and non-commercial use as long as
+ * the following conditions are aheared to.  The following conditions
+ * apply to all code found in this distribution, be it the RC4, RSA,
+ * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
+ * included with this distribution is covered by the same copyright terms
+ * except that the holder is Tim Hudson (tjh@cryptsoft.com).
+ *
+ * Copyright remains Eric Young's, and as such any Copyright notices in
+ * the code are not to be removed.
+ * If this package is used in a product, Eric Young should be given attribution
+ * as the author of the parts of the library used.
+ * This can be in the form of a textual message at program startup or
+ * in documentation (online or textual) provided with the package.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *    "This product includes cryptographic software written by
+ *     Eric Young (eay@cryptsoft.com)"
+ *    The word 'cryptographic' can be left out if the rouines from the library
+ *    being used are not cryptographic related :-).
+ * 4. If you include any Windows specific code (or a derivative thereof) from
+ *    the apps directory (application code) you must include an acknowledgement:
+ *    "This product includes software written by Tim Hudson (tjh@cryptsoft.com)"
+ *
+ * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * The licence and distribution terms for any publically available version or
+ * derivative of this code cannot be changed.  i.e. this code cannot simply be
+ * copied and put under another distribution licence
+ * [including the GNU Public Licence.]
+ */
+
+--------------------------------------------------------------------------------
+
+This project includes code from the rtools-backports project.
+
+* ci/scripts/PKGBUILD and ci/scripts/r_windows_build.sh are based on code
+  from the rtools-backports project.
+
+Copyright: Copyright (c) 2013 - 2019, Алексей and Jeroen Ooms.
+All rights reserved.
+Homepage: https://github.com/r-windows/rtools-backports
+License: 3-clause BSD
+
+--------------------------------------------------------------------------------
+
+Some code from pandas has been adapted for the pyarrow codebase. pandas is
+available under the 3-clause BSD license, which follows:
+
+pandas license
+==============
+
+Copyright (c) 2011-2012, Lambda Foundry, Inc. and PyData Development Team
+All rights reserved.
+
+Copyright (c) 2008-2011 AQR Capital Management, LLC
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+    * Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above
+       copyright notice, this list of conditions and the following
+       disclaimer in the documentation and/or other materials provided
+       with the distribution.
+
+    * Neither the name of the copyright holder nor the names of any
+       contributors may be used to endorse or promote products derived
+       from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+Some bits from DyND, in particular aspects of the build system, have been
+adapted from libdynd and dynd-python under the terms of the BSD 2-clause
+license
+
+The BSD 2-Clause License
+
+    Copyright (C) 2011-12, Dynamic NDArray Developers
+    All rights reserved.
+
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+
+        * Redistributions of source code must retain the above copyright
+           notice, this list of conditions and the following disclaimer.
+
+        * Redistributions in binary form must reproduce the above
+           copyright notice, this list of conditions and the following
+           disclaimer in the documentation and/or other materials provided
+           with the distribution.
+
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+Dynamic NDArray Developers list:
+
+ * Mark Wiebe
+ * Continuum Analytics
+
+--------------------------------------------------------------------------------
+
+Some source code from Ibis (https://github.com/cloudera/ibis) has been adapted
+for PyArrow. Ibis is released under the Apache License, Version 2.0.
+
+--------------------------------------------------------------------------------
+
+dev/tasks/homebrew-formulae/apache-arrow.rb has the following license:
+
+BSD 2-Clause License
+
+Copyright (c) 2009-present, Homebrew contributors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
+
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+----------------------------------------------------------------------
+
+cpp/src/arrow/vendored/base64.cpp has the following license
+
+ZLIB License
+
+Copyright (C) 2004-2017 René Nyffenegger
+
+This source code is provided 'as-is', without any express or implied
+warranty. In no event will the author be held liable for any damages arising
+from the use of this software.
+
+Permission is granted to anyone to use this software for any purpose, including
+commercial applications, and to alter it and redistribute it freely, subject to
+the following restrictions:
+
+1. The origin of this source code must not be misrepresented; you must not
+   claim that you wrote the original source code. If you use this source code
+   in a product, an acknowledgment in the product documentation would be
+   appreciated but is not required.
+
+2. Altered source versions must be plainly marked as such, and must not be
+   misrepresented as being the original source code.
+
+3. This notice may not be removed or altered from any source distribution.
+
+René Nyffenegger rene.nyffenegger@adp-gmbh.ch
+
+--------------------------------------------------------------------------------
+
+This project includes code from Folly.
+
+ * cpp/src/arrow/vendored/ProducerConsumerQueue.h
+
+is based on Folly's
+
+ * folly/Portability.h
+ * folly/lang/Align.h
+ * folly/ProducerConsumerQueue.h
+
+Copyright: Copyright (c) Facebook, Inc. and its affiliates.
+Home page: https://github.com/facebook/folly
+License: http://www.apache.org/licenses/LICENSE-2.0
+
+--------------------------------------------------------------------------------
+
+The file cpp/src/arrow/vendored/musl/strptime.c has the following license
+
+Copyright © 2005-2020 Rich Felker, et al.
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+The file cpp/cmake_modules/BuildUtils.cmake contains code from
+
+https://gist.github.com/cristianadam/ef920342939a89fae3e8a85ca9459b49
+
+which is made available under the MIT license
+
+Copyright (c) 2019 Cristian Adam
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+The files in cpp/src/arrow/vendored/portable-snippets/ contain code from
+
+https://github.com/nemequ/portable-snippets
+
+and have the following copyright notice:
+
+Each source file contains a preamble explaining the license situation
+for that file, which takes priority over this file.  With the
+exception of some code pulled in from other repositories (such as
+µnit, an MIT-licensed project which is used for testing), the code is
+public domain, released using the CC0 1.0 Universal dedication (*).
+
+(*) https://creativecommons.org/publicdomain/zero/1.0/legalcode
+
+--------------------------------------------------------------------------------
+
+The files in cpp/src/arrow/vendored/fast_float/ contain code from
+
+https://github.com/lemire/fast_float
+
+which is made available under the Apache License 2.0.
+
+--------------------------------------------------------------------------------
+
+The file python/pyarrow/vendored/docscrape.py contains code from
+
+https://github.com/numpy/numpydoc/
+
+which is made available under the BSD 2-clause license.
+
+--------------------------------------------------------------------------------
+
+The file python/pyarrow/vendored/version.py contains code from
+
+https://github.com/pypa/packaging/
+
+which is made available under both the Apache license v2.0 and the
+BSD 2-clause license.
+
+--------------------------------------------------------------------------------
+
+The files in cpp/src/arrow/vendored/pcg contain code from
+
+https://github.com/imneme/pcg-cpp
+
+and have the following copyright notice:
+
+Copyright 2014-2019 Melissa O'Neill <oneill@pcg-random.org>,
+                    and the PCG Project contributors.
+
+SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+Licensed under the Apache License, Version 2.0 (provided in
+LICENSE-APACHE.txt and at http://www.apache.org/licenses/LICENSE-2.0)
+or under the MIT license (provided in LICENSE-MIT.txt and at
+http://opensource.org/licenses/MIT), at your option. This file may not
+be copied, modified, or distributed except according to those terms.
+
+Distributed on an "AS IS" BASIS, WITHOUT WARRANTY OF ANY KIND, either
+express or implied.  See your chosen license for details.
+
+--------------------------------------------------------------------------------
+r/R/dplyr-count-tally.R (some portions)
+
+Some portions of this file are derived from code from
+
+https://github.com/tidyverse/dplyr/
+
+which is made available under the MIT license
+
+Copyright (c) 2013-2019 RStudio and others.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of
+this software and associated documentation files (the “Software”), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+The file src/arrow/util/io_util.cc contains code from the CPython project
+which is made available under the Python Software Foundation License Version 2.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency opentelemetry-cpp is statically linked in certain binary
+distributions. opentelemetry-cpp is made available under the Apache License 2.0.
+
+Copyright The OpenTelemetry Authors
+SPDX-License-Identifier: Apache-2.0
+
+--------------------------------------------------------------------------------
+
+ci/conan/ is based on code from Conan Package and Dependency Manager.
+
+Copyright (c) 2019 Conan.io
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--------------------------------------------------------------------------------
+
+3rdparty dependency UCX is redistributed as a dynamically linked shared
+library in certain binary distributions. UCX has the following license:
+
+Copyright (c) 2014-2015      UT-Battelle, LLC. All rights reserved.
+Copyright (C) 2014-2020      Mellanox Technologies Ltd. All rights reserved.
+Copyright (C) 2014-2015      The University of Houston System. All rights reserved.
+Copyright (C) 2015           The University of Tennessee and The University
+                             of Tennessee Research Foundation. All rights reserved.
+Copyright (C) 2016-2020      ARM Ltd. All rights reserved.
+Copyright (c) 2016           Los Alamos National Security, LLC. All rights reserved.
+Copyright (C) 2016-2020      Advanced Micro Devices, Inc.  All rights reserved.
+Copyright (C) 2019           UChicago Argonne, LLC.  All rights reserved.
+Copyright (c) 2018-2020      NVIDIA CORPORATION. All rights reserved.
+Copyright (C) 2020           Huawei Technologies Co., Ltd. All rights reserved.
+Copyright (C) 2016-2020      Stony Brook University. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+--------------------------------------------------------------------------------
+
+The file dev/tasks/r/github.packages.yml contains code from
+
+https://github.com/ursa-labs/arrow-r-nightly
+
+which is made available under the Apache License 2.0.
+
+--------------------------------------------------------------------------------
+.github/actions/sync-nightlies/action.yml  (some portions)
+
+Some portions of this file are derived from code from
+
+https://github.com/JoshPiper/rsync-docker
+
+which is made available under the MIT license
+
+Copyright (c) 2020 Joshua Piper
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--------------------------------------------------------------------------------
+.github/actions/sync-nightlies/action.yml (some portions)
+
+Some portions of this file are derived from code from
+
+https://github.com/burnett01/rsync-deployments
+
+which is made available under the MIT license
+
+Copyright (c) 2019-2022 Contention
+Copyright (c) 2019-2022 Burnett01
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--------------------------------------------------------------------------------
+java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectHashMap.java
+java/vector/src/main/java/org/apache/arrow/vector/util/IntObjectMap.java
+
+These file are derived from code from Netty, which is made available under the
+Apache License 2.0.
diff --git a/plugins/engine-datafusion/licenses/arrow-NOTICE.txt b/plugins/engine-datafusion/licenses/arrow-NOTICE.txt
new file mode 100644
index 0000000000000..2089c6fb20358
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/arrow-NOTICE.txt
@@ -0,0 +1,84 @@
+Apache Arrow
+Copyright 2016-2024 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).
+
+This product includes software from the SFrame project (BSD, 3-clause).
+* Copyright (C) 2015 Dato, Inc.
+* Copyright (c) 2009 Carnegie Mellon University.
+
+This product includes software from the Feather project (Apache 2.0)
+https://github.com/wesm/feather
+
+This product includes software from the DyND project (BSD 2-clause)
+https://github.com/libdynd
+
+This product includes software from the LLVM project
+ * distributed under the University of Illinois Open Source
+
+This product includes software from the google-lint project
+ * Copyright (c) 2009 Google Inc. All rights reserved.
+
+This product includes software from the mman-win32 project
+ * Copyright https://code.google.com/p/mman-win32/
+ * Licensed under the MIT License;
+
+This product includes software from the LevelDB project
+ * Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+ * Use of this source code is governed by a BSD-style license that can be
+ * Moved from Kudu http://github.com/cloudera/kudu
+
+This product includes software from the CMake project
+ * Copyright 2001-2009 Kitware, Inc.
+ * Copyright 2012-2014 Continuum Analytics, Inc.
+ * All rights reserved.
+
+This product includes software from https://github.com/matthew-brett/multibuild (BSD 2-clause)
+ * Copyright (c) 2013-2016, Matt Terry and Matthew Brett; all rights reserved.
+
+This product includes software from the Ibis project (Apache 2.0)
+ * Copyright (c) 2015 Cloudera, Inc.
+ * https://github.com/cloudera/ibis
+
+This product includes software from Dremio (Apache 2.0)
+  * Copyright (C) 2017-2018 Dremio Corporation
+  * https://github.com/dremio/dremio-oss
+
+This product includes software from Google Guava (Apache 2.0)
+  * Copyright (C) 2007 The Guava Authors
+  * https://github.com/google/guava
+
+This product include software from CMake (BSD 3-Clause)
+  * CMake - Cross Platform Makefile Generator
+  * Copyright 2000-2019 Kitware, Inc. and Contributors
+
+The web site includes files generated by Jekyll.
+
+--------------------------------------------------------------------------------
+
+This product includes code from Apache Kudu, which includes the following in
+its NOTICE file:
+
+  Apache Kudu
+  Copyright 2016 The Apache Software Foundation
+
+  This product includes software developed at
+  The Apache Software Foundation (http://www.apache.org/).
+
+  Portions of this software were developed at
+  Cloudera, Inc (http://www.cloudera.com/).
+
+--------------------------------------------------------------------------------
+
+This product includes code from Apache ORC, which includes the following in
+its NOTICE file:
+
+  Apache ORC
+  Copyright 2013-2019 The Apache Software Foundation
+
+  This product includes software developed by The Apache Software
+  Foundation (http://www.apache.org/).
+
+  This product includes software developed by Hewlett-Packard:
+  (c) Copyright [2014-2015] Hewlett-Packard Development Company, L.P
diff --git a/plugins/engine-datafusion/licenses/arrow-c-data-17.0.0.jar.sha1 b/plugins/engine-datafusion/licenses/arrow-c-data-17.0.0.jar.sha1
new file mode 100644
index 0000000000000..8586384ac28c3
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/arrow-c-data-17.0.0.jar.sha1
@@ -0,0 +1 @@
+ccef140b279af80c6dda78a19c75872799c00dfb
\ No newline at end of file
diff --git a/plugins/engine-datafusion/licenses/arrow-format-17.0.0.jar.sha1 b/plugins/engine-datafusion/licenses/arrow-format-17.0.0.jar.sha1
new file mode 100644
index 0000000000000..34fd4704eac91
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/arrow-format-17.0.0.jar.sha1
@@ -0,0 +1 @@
+5d052f20fd1193840eb59818515e710156c364b2
\ No newline at end of file
diff --git a/plugins/engine-datafusion/licenses/arrow-memory-core-17.0.0.jar.sha1 b/plugins/engine-datafusion/licenses/arrow-memory-core-17.0.0.jar.sha1
new file mode 100644
index 0000000000000..ea312f4f5e51a
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/arrow-memory-core-17.0.0.jar.sha1
@@ -0,0 +1 @@
+51c5287ef5a624656bb38da7684078905b1a88c9
\ No newline at end of file
diff --git a/plugins/engine-datafusion/licenses/arrow-memory-unsafe-17.0.0.jar.sha1 b/plugins/engine-datafusion/licenses/arrow-memory-unsafe-17.0.0.jar.sha1
new file mode 100644
index 0000000000000..14abbb6b6b3f4
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/arrow-memory-unsafe-17.0.0.jar.sha1
@@ -0,0 +1 @@
+c2e4966dcf68f0978d3cc935844191d2d68c61e8
\ No newline at end of file
diff --git a/plugins/engine-datafusion/licenses/arrow-vector-17.0.0.jar.sha1 b/plugins/engine-datafusion/licenses/arrow-vector-17.0.0.jar.sha1
new file mode 100644
index 0000000000000..8f9fddc882396
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/arrow-vector-17.0.0.jar.sha1
@@ -0,0 +1 @@
+16685545e4734382c1fcdaf12ac9b0a7d1fc06c0
\ No newline at end of file
diff --git a/plugins/engine-datafusion/licenses/checker-qual-3.42.0.jar.sha1 b/plugins/engine-datafusion/licenses/checker-qual-3.42.0.jar.sha1
new file mode 100644
index 0000000000000..5a5268f9d126f
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/checker-qual-3.42.0.jar.sha1
@@ -0,0 +1 @@
+638ec33f363a94d41a4f03c3e7d3dcfba64e402d
\ No newline at end of file
diff --git a/plugins/engine-datafusion/licenses/checker-qual-LICENSE.txt b/plugins/engine-datafusion/licenses/checker-qual-LICENSE.txt
new file mode 100644
index 0000000000000..9837c6b69fdab
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/checker-qual-LICENSE.txt
@@ -0,0 +1,22 @@
+Checker Framework qualifiers
+Copyright 2004-present by the Checker Framework developers
+
+MIT License:
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/plugins/engine-datafusion/licenses/checker-qual-NOTICE.txt b/plugins/engine-datafusion/licenses/checker-qual-NOTICE.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/plugins/engine-datafusion/licenses/flatbuffers-java-23.5.26.jar.sha1 b/plugins/engine-datafusion/licenses/flatbuffers-java-23.5.26.jar.sha1
new file mode 100644
index 0000000000000..939c91b488691
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/flatbuffers-java-23.5.26.jar.sha1
@@ -0,0 +1 @@
+e6320185c75767ba32c52ace087425a5a4275a50
\ No newline at end of file
diff --git a/plugins/engine-datafusion/licenses/flatbuffers-java-LICENSE.txt b/plugins/engine-datafusion/licenses/flatbuffers-java-LICENSE.txt
new file mode 100644
index 0000000000000..d645695673349
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/flatbuffers-java-LICENSE.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/plugins/engine-datafusion/licenses/flatbuffers-java-NOTICE.txt b/plugins/engine-datafusion/licenses/flatbuffers-java-NOTICE.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/plugins/engine-datafusion/licenses/jackson-LICENSE.txt b/plugins/engine-datafusion/licenses/jackson-LICENSE.txt
new file mode 100644
index 0000000000000..f5f45d26a49d6
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/jackson-LICENSE.txt
@@ -0,0 +1,8 @@
+This copy of Jackson JSON processor streaming parser/generator is licensed under the
+Apache (Software) License, version 2.0 ("the License").
+See the License for details about distribution rights, and the
+specific rights regarding derivate works.
+
+You may obtain a copy of the License at:
+
+http://www.apache.org/licenses/LICENSE-2.0
diff --git a/plugins/engine-datafusion/licenses/jackson-NOTICE.txt b/plugins/engine-datafusion/licenses/jackson-NOTICE.txt
new file mode 100644
index 0000000000000..4c976b7b4cc58
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/jackson-NOTICE.txt
@@ -0,0 +1,20 @@
+# Jackson JSON processor
+
+Jackson is a high-performance, Free/Open Source JSON processing library.
+It was originally written by Tatu Saloranta (tatu.saloranta@iki.fi), and has
+been in development since 2007.
+It is currently developed by a community of developers, as well as supported
+commercially by FasterXML.com.
+
+## Licensing
+
+Jackson core and extension components may licensed under different licenses.
+To find the details that apply to this artifact see the accompanying LICENSE file.
+For more information, including possible other licensing options, contact
+FasterXML.com (http://fasterxml.com).
+
+## Credits
+
+A list of contributors may be found from CREDITS file, which is included
+in some artifacts (usually source distributions); but is always available
+from the source code management (SCM) system project uses.
diff --git a/plugins/engine-datafusion/licenses/jackson-annotations-2.18.2.jar.sha1 b/plugins/engine-datafusion/licenses/jackson-annotations-2.18.2.jar.sha1
new file mode 100644
index 0000000000000..a06e1d5f28425
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/jackson-annotations-2.18.2.jar.sha1
@@ -0,0 +1 @@
+985d77751ebc7fce5db115a986bc9aa82f973f4a
\ No newline at end of file
diff --git a/plugins/engine-datafusion/licenses/jackson-databind-2.18.2.jar.sha1 b/plugins/engine-datafusion/licenses/jackson-databind-2.18.2.jar.sha1
new file mode 100644
index 0000000000000..eedbfff66c705
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/jackson-databind-2.18.2.jar.sha1
@@ -0,0 +1 @@
+deef8697b92141fb6caf7aa86966cff4eec9b04f
\ No newline at end of file
diff --git a/plugins/engine-datafusion/licenses/slf4j-api-2.0.17.jar.sha1 b/plugins/engine-datafusion/licenses/slf4j-api-2.0.17.jar.sha1
new file mode 100644
index 0000000000000..435f6c13a28b6
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/slf4j-api-2.0.17.jar.sha1
@@ -0,0 +1 @@
+d9e58ac9c7779ba3bf8142aff6c830617a7fe60f
\ No newline at end of file
diff --git a/plugins/engine-datafusion/licenses/slf4j-api-LICENSE.txt b/plugins/engine-datafusion/licenses/slf4j-api-LICENSE.txt
new file mode 100644
index 0000000000000..1a3d053237bec
--- /dev/null
+++ b/plugins/engine-datafusion/licenses/slf4j-api-LICENSE.txt
@@ -0,0 +1,24 @@
+Copyright (c) 2004-2022 QOS.ch Sarl (Switzerland)
+All rights reserved.
+
+Permission is hereby granted, free  of charge, to any person obtaining
+a  copy  of this  software  and  associated  documentation files  (the
+"Software"), to  deal in  the Software without  restriction, including
+without limitation  the rights to  use, copy, modify,  merge, publish,
+distribute,  sublicense, and/or sell  copies of  the Software,  and to
+permit persons to whom the Software  is furnished to do so, subject to
+the following conditions:
+
+The  above  copyright  notice  and  this permission  notice  shall  be
+included in all copies or substantial portions of the Software.
+
+THE  SOFTWARE IS  PROVIDED  "AS  IS", WITHOUT  WARRANTY  OF ANY  KIND,
+EXPRESS OR  IMPLIED, INCLUDING  BUT NOT LIMITED  TO THE  WARRANTIES OF
+MERCHANTABILITY,    FITNESS    FOR    A   PARTICULAR    PURPOSE    AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE,  ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+
diff --git a/plugins/engine-datafusion/licenses/slf4j-api-NOTICE.txt b/plugins/engine-datafusion/licenses/slf4j-api-NOTICE.txt
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionPlugin.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionPlugin.java
new file mode 100644
index 0000000000000..45a2da3e6afa3
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionPlugin.java
@@ -0,0 +1,170 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion;
+
+import org.opensearch.cluster.metadata.IndexNameExpressionResolver;
+import org.opensearch.cluster.node.DiscoveryNodes;
+import org.opensearch.cluster.service.ClusterService;
+import org.opensearch.common.settings.ClusterSettings;
+import org.opensearch.common.settings.IndexScopedSettings;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.common.settings.SettingsFilter;
+import org.opensearch.core.common.io.stream.NamedWriteableRegistry;
+import org.opensearch.core.xcontent.NamedXContentRegistry;
+import org.opensearch.datafusion.action.DataFusionAction;
+import org.opensearch.datafusion.action.NodesDataFusionInfoAction;
+import org.opensearch.datafusion.action.TransportNodesDataFusionInfoAction;
+import org.opensearch.datafusion.search.DatafusionContext;
+import org.opensearch.datafusion.search.DatafusionQuery;
+import org.opensearch.datafusion.search.DatafusionReaderManager;
+import org.opensearch.datafusion.search.DatafusionSearcher;
+import org.opensearch.env.Environment;
+import org.opensearch.env.NodeEnvironment;
+import org.opensearch.index.shard.ShardPath;
+import org.opensearch.search.ContextEngineSearcher;
+import org.opensearch.index.engine.SearchExecEngine;
+import org.opensearch.index.engine.exec.FileMetadata;
+import org.opensearch.plugins.ActionPlugin;
+import org.opensearch.plugins.SearchEnginePlugin;
+import org.opensearch.plugins.Plugin;
+import org.opensearch.repositories.RepositoriesService;
+import org.opensearch.rest.RestController;
+import org.opensearch.rest.RestHandler;
+import org.opensearch.script.ScriptService;
+import org.opensearch.threadpool.ThreadPool;
+import org.opensearch.transport.client.Client;
+import org.opensearch.vectorized.execution.search.DataFormat;
+import org.opensearch.vectorized.execution.search.spi.DataSourceCodec;
+import org.opensearch.vectorized.execution.search.spi.RecordBatchStream;
+import org.opensearch.watcher.ResourceWatcherService;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Supplier;
+
+/**
+ * Main plugin class for OpenSearch DataFusion integration.
+ *
+ */
+public class DataFusionPlugin extends Plugin implements ActionPlugin, SearchEnginePlugin {
+
+    private DataFusionService dataFusionService;
+    private final boolean isDataFusionEnabled;
+
+    /**
+     * Constructor for DataFusionPlugin.
+     * @param settings The settings for the DataFusionPlugin.
+     */
+    public DataFusionPlugin(Settings settings) {
+        // For now, DataFusion is always enabled if the plugin is loaded
+        // In the future, this could be controlled by a feature flag
+        this.isDataFusionEnabled = true;
+    }
+
+    /**
+     * Creates components for the DataFusion plugin.
+     * @param client The client instance.
+     * @param clusterService The cluster service instance.
+     * @param threadPool The thread pool instance.
+     * @param resourceWatcherService The resource watcher service instance.
+     * @param scriptService The script service instance.
+     * @param xContentRegistry The named XContent registry.
+     * @param environment The environment instance.
+     * @param nodeEnvironment The node environment instance.
+     * @param namedWriteableRegistry The named writeable registry.
+     * @param indexNameExpressionResolver The index name expression resolver instance.
+     * @param repositoriesServiceSupplier The supplier for the repositories service.
+     * @return Collection of created components
+     */
+    @Override
+    public Collection<Object> createComponents(
+        Client client,
+        ClusterService clusterService,
+        ThreadPool threadPool,
+        ResourceWatcherService resourceWatcherService,
+        ScriptService scriptService,
+        NamedXContentRegistry xContentRegistry,
+        Environment environment,
+        NodeEnvironment nodeEnvironment,
+        NamedWriteableRegistry namedWriteableRegistry,
+        IndexNameExpressionResolver indexNameExpressionResolver,
+        Supplier<RepositoriesService> repositoriesServiceSupplier,
+        Map<DataFormat, DataSourceCodec> dataSourceCodecs
+    ) {
+        if (!isDataFusionEnabled) {
+            return Collections.emptyList();
+        }
+        dataFusionService = new DataFusionService(dataSourceCodecs);
+
+        for(DataFormat format : this.getSupportedFormats()) {
+            dataSourceCodecs.get(format);
+        }
+        // return Collections.emptyList();
+        return Collections.singletonList(dataFusionService);
+    }
+
+    @Override
+    public List<DataFormat> getSupportedFormats() {
+        return List.of(DataFormat.CSV);
+    }
+
+    /**
+     * Create engine per shard per format with initial view of catalog
+     */
+    // TODO : one engine per format, does that make sense ?
+    // TODO : Engine shouldn't just be SearcherOperations, it can be more ?
+    @Override
+    public SearchExecEngine<DatafusionContext, DatafusionSearcher,
+            DatafusionReaderManager, DatafusionQuery>
+        createEngine(DataFormat dataFormat,Collection<FileMetadata> formatCatalogSnapshot, ShardPath shardPath) throws IOException {
+        return new DatafusionEngine(dataFormat, formatCatalogSnapshot, dataFusionService, shardPath);
+    }
+
+    /**
+     * Gets the REST handlers for the DataFusion plugin.
+     * @param settings The settings for the plugin.
+     * @param restController The REST controller instance.
+     * @param clusterSettings The cluster settings instance.
+     * @param indexScopedSettings The index scoped settings instance.
+     * @param settingsFilter The settings filter instance.
+     * @param indexNameExpressionResolver The index name expression resolver instance.
+     * @param nodesInCluster The supplier for the discovery nodes.
+     * @return A list of REST handlers.
+     */
+    @Override
+    public List<RestHandler> getRestHandlers(
+        Settings settings,
+        RestController restController,
+        ClusterSettings clusterSettings,
+        IndexScopedSettings indexScopedSettings,
+        SettingsFilter settingsFilter,
+        IndexNameExpressionResolver indexNameExpressionResolver,
+        Supplier<DiscoveryNodes> nodesInCluster
+    ) {
+        if (!isDataFusionEnabled) {
+            return Collections.emptyList();
+        }
+        return List.of(new DataFusionAction());
+    }
+
+    /**
+     * Gets the list of action handlers for the DataFusion plugin.
+     * @return A list of action handlers.
+     */
+    @Override
+    public List<ActionHandler<?, ?>> getActions() {
+        if (!isDataFusionEnabled) {
+            return Collections.emptyList();
+        }
+        return List.of(new ActionHandler<>(NodesDataFusionInfoAction.INSTANCE, TransportNodesDataFusionInfoAction.class));
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionQueryJNI.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionQueryJNI.java
new file mode 100644
index 0000000000000..ddb06b6b9e8dc
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionQueryJNI.java
@@ -0,0 +1,158 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion;
+
+/**
+ * JNI wrapper for DataFusion operations
+ */
+public class DataFusionQueryJNI {
+
+    private static boolean libraryLoaded = false;
+
+    static {
+        loadNativeLibrary();
+    }
+
+    /**
+     * Private constructor to prevent instantiation of utility class.
+     */
+    private DataFusionQueryJNI() {
+        // Utility class
+    }
+
+    /**
+     * Load the native library from resources
+     */
+    private static synchronized void loadNativeLibrary() {
+        if (libraryLoaded) {
+            return;
+        }
+
+        try {
+            // Try to load the library directly
+            System.loadLibrary("opensearch_datafusion_jni");
+            libraryLoaded = true;
+        } catch (UnsatisfiedLinkError e) {
+            // Try loading from resources
+            try {
+                String osName = System.getProperty("os.name").toLowerCase();
+                String libExtension = osName.contains("windows") ? ".dll" : (osName.contains("mac") ? ".dylib" : ".so");
+                String libName = "libopensearch_datafusion_jni" + libExtension;
+
+                java.io.InputStream is = DataFusionQueryJNI.class.getResourceAsStream("/native/" + libName);
+                if (is != null) {
+                    java.io.File tempFile = java.io.File.createTempFile("libopensearch_datafusion_jni", libExtension);
+                    tempFile.deleteOnExit();
+
+                    try (java.io.FileOutputStream fos = new java.io.FileOutputStream(tempFile)) {
+                        byte[] buffer = new byte[8192];
+                        int bytesRead;
+                        while ((bytesRead = is.read(buffer)) != -1) {
+                            fos.write(buffer, 0, bytesRead);
+                        }
+                    }
+
+                    System.load(tempFile.getAbsolutePath());
+                    libraryLoaded = true;
+                } else {
+                    throw new RuntimeException("Native library not found: " + libName, e);
+                }
+            } catch (Exception ex) {
+                throw new RuntimeException("Failed to load native library", ex);
+            }
+        }
+    }
+
+    /**
+     * Create a new global runtime environment
+     * @return runtime env pointer for subsequent operations
+     */
+    public static native long createGlobalRuntime();
+
+    public static native long createTokioRuntime();
+
+    /**
+     * Closes global runtime environment
+     * @param pointer the runtime environment pointer to close
+     * @return status code
+     */
+    public static native long closeGlobalRuntime(long pointer);
+
+    /**
+     * Get version information
+     * @return JSON string with version information
+     */
+    public static native String getVersionInfo();
+
+    /**
+     * Create a new DataFusion session context
+     * @param runtimeId the global runtime environment ID
+     * @return context ID for subsequent operations
+     */
+    public static native long createSessionContext(long runtimeId);
+
+    /**
+     * Close and cleanup a DataFusion context
+     * @param contextId the context ID to close
+     */
+    public static native void closeSessionContext(long contextId);
+
+    /**
+     * Execute a Substrait query plan
+     * @param cachePtr the session context ID
+     * @param substraitPlan the serialized Substrait query plan
+     * @return stream pointer for result iteration
+     */
+    public static native long executeQueryPhase(long cachePtr, byte[] substraitPlan, long runtimePtr);
+
+    /**
+     * Execute a Substrait query plan
+     * @param cachePtr the session context ID
+     * @param rowIds row ids for which record needs to fetch
+     * @param runtimePtr runtime pointer
+     * @return stream pointer for result iteration
+     */
+
+    // TODO: tie this to actual FetchPhase
+    public static native long executeFetchPhase(long cachePtr, long[] rowIds, String[] projections, long runtimePtr);
+
+    public static native long createDatafusionReader(String path, String[] files);
+
+    public static native void closeDatafusionReader(long ptr);
+
+    /**
+     * Register a directory with CSV files
+     * @param contextId the session context ID
+     * @param tableName the table name to register
+     * @param directoryPath the directory path containing CSV files
+     * @param fileNames array of file names to register
+     * @return status code
+     */
+    public static native int registerCsvDirectory(long contextId, String tableName, String directoryPath, String[] fileNames);
+
+    /**
+     * Check if stream has more data
+     * @param streamPtr the stream pointer
+     * @return true if more data available
+     */
+    public static native boolean streamHasNext(long streamPtr);
+
+    /**
+     * Get next batch from stream
+     * @param streamPtr the stream pointer
+     * @return byte array containing the next batch, or null if no more data
+     */
+    public static native byte[] streamNext(long streamPtr);
+
+    /**
+     * Close and cleanup a result stream
+     * @param streamPtr the stream pointer to close
+     */
+    public static native void closeStream(long streamPtr);
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionService.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionService.java
new file mode 100644
index 0000000000000..9548ced599723
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataFusionService.java
@@ -0,0 +1,210 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.opensearch.common.lifecycle.AbstractLifecycleComponent;
+import org.opensearch.common.util.concurrent.ConcurrentCollections;
+import org.opensearch.common.util.concurrent.ConcurrentMapLong;
+import org.opensearch.datafusion.core.GlobalRuntimeEnv;
+import org.opensearch.vectorized.execution.search.DataFormat;
+import org.opensearch.vectorized.execution.search.spi.DataSourceCodec;
+import org.opensearch.vectorized.execution.search.spi.RecordBatchStream;
+
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.CompletableFuture;
+
+/**
+ * Service for managing DataFusion contexts and operations - essentially like SearchService
+ */
+public class DataFusionService extends AbstractLifecycleComponent {
+
+    private static final Logger logger = LogManager.getLogger(DataFusionService.class);
+    private final ConcurrentMapLong<DataSourceCodec> sessionEngines = ConcurrentCollections.newConcurrentMapLongWithAggressiveConcurrency();
+
+    private final DataSourceRegistry dataSourceRegistry;
+    private final GlobalRuntimeEnv globalRuntimeEnv;
+
+    /**
+     * Creates a new DataFusion service instance.
+     */
+    public DataFusionService(Map<DataFormat, DataSourceCodec> dataSourceCodecs) {
+        this.dataSourceRegistry = new DataSourceRegistry(dataSourceCodecs);
+
+        // to verify jni
+        String version = DataFusionQueryJNI.getVersionInfo();
+        this.globalRuntimeEnv = new GlobalRuntimeEnv();
+    }
+
+    @Override
+    protected void doStart() {
+        logger.info("Starting DataFusion service");
+        try {
+            // Initialize the data source registry
+            // Test that at least one data source is available
+            if (!dataSourceRegistry.hasCodecs()) {
+                logger.warn("No data sources available");
+            } else {
+                logger.info(
+                    "DataFusion service started successfully with {} data sources: {}",
+                    dataSourceRegistry.getCodecNames().size(),
+                    dataSourceRegistry.getCodecNames()
+                );
+
+            }
+        } catch (Exception e) {
+            logger.error("Failed to start DataFusion service", e);
+            throw new RuntimeException("Failed to initialize DataFusion service", e);
+        }
+    }
+
+    @Override
+    protected void doStop() {
+        logger.info("Stopping DataFusion service");
+
+        // Close all session contexts
+        for (Long sessionId : sessionEngines.keySet()) {
+            try {
+                closeSessionContext(sessionId).get();
+            } catch (Exception e) {
+                logger.warn("Error closing session context {}", sessionId, e);
+            }
+        }
+        sessionEngines.clear();
+        globalRuntimeEnv.close();
+        logger.info("DataFusion service stopped");
+    }
+
+    @Override
+    protected void doClose() {
+        doStop();
+    }
+
+    /**
+     * Register a directory with list of files to create a runtime environment
+     * with listing files cache of DataFusion
+     *
+     * @param directoryPath path to the directory containing files
+     * @param fileNames list of file names in the directory
+     * @return runtime environment ID
+     */
+    public CompletableFuture<Void> registerDirectory(String directoryPath, List<String> fileNames) {
+        DataSourceCodec engine = dataSourceRegistry.getDefaultEngine();
+        if (engine == null) {
+            return CompletableFuture.failedFuture(new IllegalStateException("No DataFusion engine available"));
+        }
+
+        logger.debug(
+            "Registering directory {} with {} files using engine {}",
+            directoryPath,
+            fileNames.size(),
+            engine.getClass().getSimpleName()
+        );
+
+        return engine.registerDirectory(directoryPath, fileNames, globalRuntimeEnv.getPointer());
+    }
+
+    /**
+     * Create a session context
+     *
+     * @return session context ID
+     */
+    public CompletableFuture<Long> createSessionContext() {
+        long runtimeEnvironmentId = globalRuntimeEnv.getPointer();
+        DataSourceCodec codec = dataSourceRegistry.getDefaultEngine();
+        if (codec == null) {
+            return CompletableFuture.failedFuture(new IllegalArgumentException("Runtime environment not found: " + runtimeEnvironmentId));
+        }
+
+        logger.debug(
+            "Creating session context for runtime environment {} using engine {}",
+            runtimeEnvironmentId,
+            codec.getClass().getSimpleName()
+        );
+
+        return codec.createSessionContext(runtimeEnvironmentId).thenApply(sessionId -> {
+            // Track which engine created this session context
+            sessionEngines.put(sessionId, codec);
+            logger.debug("Created session context {} with engine {}", sessionId, codec.getClass().getSimpleName());
+            return sessionId;
+        });
+    }
+
+    /**
+     * Execute a query accepting substrait plan bytes and run via session context
+     *
+     * @param sessionContextId the session context ID
+     * @param substraitPlanBytes the substrait plan as byte array
+     * @return record batch stream containing query results
+     */
+    public CompletableFuture<RecordBatchStream> executeSubstraitQuery(long sessionContextId, byte[] substraitPlanBytes) {
+        DataSourceCodec engine = sessionEngines.get(sessionContextId);
+        if (engine == null) {
+            return CompletableFuture.failedFuture(new IllegalArgumentException("Session context not found: " + sessionContextId));
+        }
+
+        logger.debug(
+            "Executing substrait query for session {} with plan size {} bytes using engine {}",
+            sessionContextId,
+            substraitPlanBytes.length,
+            engine.getClass().getSimpleName()
+        );
+
+        return engine.executeSubstraitQuery(sessionContextId, substraitPlanBytes);
+    }
+
+    public long getRuntimePointer() {
+        return globalRuntimeEnv.getPointer();
+    }
+
+    public long getTokioRuntimePointer() {
+        return globalRuntimeEnv.getTokioRuntimePtr();
+    }
+
+    /**
+     * Close the session context and clean up resources
+     *
+     * @param sessionContextId the session context ID to close
+     * @return future that completes when cleanup is done
+     */
+    public CompletableFuture<Void> closeSessionContext(long sessionContextId) {
+        DataSourceCodec engine = sessionEngines.remove(sessionContextId);
+        if (engine == null) {
+            logger.debug("Session context {} not found or already closed", sessionContextId);
+            return CompletableFuture.completedFuture(null);
+        }
+
+        logger.debug("Closing session context {} using engine {}", sessionContextId, engine.getClass().getSimpleName());
+
+        return engine.closeSessionContext(sessionContextId);
+    }
+
+    /**
+     * Get version information from available codecs
+     * @return JSON version string
+     */
+    public String getVersion() {
+        StringBuilder version = new StringBuilder();
+        version.append("{\"codecs\":[");
+
+        boolean first = true;
+        for (DataFormat engineName : this.dataSourceRegistry.getCodecNames()) {
+            if (!first) {
+                version.append(",");
+            }
+            version.append("{\"name\":\"").append(engineName).append("\"}");
+            first = false;
+        }
+
+        version.append("]}");
+        return version.toString();
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataSourceRegistry.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataSourceRegistry.java
new file mode 100644
index 0000000000000..1d274116aac94
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DataSourceRegistry.java
@@ -0,0 +1,73 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.opensearch.vectorized.execution.search.DataFormat;
+import org.opensearch.vectorized.execution.search.spi.DataSourceCodec;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+/**
+ * Registry for DataFusion data source codecs.
+ */
+public class DataSourceRegistry {
+
+    private static final Logger logger = LogManager.getLogger(DataSourceRegistry.class);
+
+    private final ConcurrentHashMap<DataFormat, DataSourceCodec> codecs = new ConcurrentHashMap<>();
+
+    public DataSourceRegistry(Map<DataFormat, DataSourceCodec> dataSourceCodecMap) {
+        codecs.putAll(dataSourceCodecMap);
+    }
+
+    /**
+     * Check if any codecs are available.
+     *
+     * @return true if codecs are available, false otherwise
+     */
+    public boolean hasCodecs() {
+        return !codecs.isEmpty();
+    }
+
+    /**
+     * Get the names of all registered codecs.
+     *
+     * @return list of codec names
+     */
+    public List<DataFormat> getCodecNames() {
+        return new ArrayList<>(codecs.keySet());
+    }
+
+    /**
+     * Get the default codec (first available codec).
+     *
+     * @return the default codec, or null if none available
+     */
+    public DataSourceCodec getDefaultEngine() {
+        if (codecs.isEmpty()) {
+            return null;
+        }
+        return codecs.values().iterator().next();
+    }
+
+    /**
+     * Get a codec by name.
+     *
+     * @param name the codec name
+     * @return the codec, or null if not found
+     */
+    public DataSourceCodec getCodec(String name) {
+        return codecs.get(name);
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DatafusionEngine.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DatafusionEngine.java
new file mode 100644
index 0000000000000..892cb70f22b4b
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DatafusionEngine.java
@@ -0,0 +1,267 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion;
+
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.FieldVector;
+import org.apache.arrow.vector.IntVector;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.opensearch.OpenSearchException;
+import org.opensearch.action.search.SearchShardTask;
+import org.opensearch.common.lease.Releasables;
+import org.opensearch.common.util.BigArrays;
+import org.opensearch.common.xcontent.XContentFactory;
+import org.opensearch.core.common.bytes.BytesReference;
+import org.opensearch.core.xcontent.XContentBuilder;
+import org.opensearch.datafusion.search.DatafusionContext;
+import org.opensearch.datafusion.search.DatafusionQuery;
+import org.opensearch.datafusion.search.DatafusionQueryPhaseExecutor;
+import org.opensearch.datafusion.search.DatafusionReader;
+import org.opensearch.datafusion.search.DatafusionReaderManager;
+import org.opensearch.datafusion.search.DatafusionSearcher;
+import org.opensearch.datafusion.search.DatafusionSearcherSupplier;
+import org.opensearch.index.engine.CatalogSnapshotAwareRefreshListener;
+import org.opensearch.index.engine.Engine;
+import org.opensearch.index.engine.EngineException;
+import org.opensearch.index.engine.EngineSearcherSupplier;
+import org.opensearch.index.engine.SearchExecEngine;
+import org.opensearch.index.engine.exec.FileMetadata;
+import org.opensearch.index.mapper.*;
+import org.opensearch.index.shard.ShardPath;
+import org.opensearch.search.SearchShardTarget;
+import org.opensearch.search.aggregations.SearchResultsCollector;
+import org.opensearch.search.internal.ReaderContext;
+import org.opensearch.search.internal.ShardSearchRequest;
+import org.opensearch.search.query.QueryPhaseExecutor;
+import org.opensearch.vectorized.execution.search.DataFormat;
+import org.opensearch.search.query.GenericQueryPhaseSearcher;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.*;
+import java.util.function.Function;
+
+public class DatafusionEngine extends SearchExecEngine<DatafusionContext, DatafusionSearcher,
+    DatafusionReaderManager, DatafusionQuery> {
+
+    private static final Logger logger = LogManager.getLogger(DatafusionEngine.class);
+
+    private DataFormat dataFormat;
+    private DatafusionReaderManager datafusionReaderManager;
+    private DataFusionService datafusionService;
+
+    public DatafusionEngine(DataFormat dataFormat, Collection<FileMetadata> formatCatalogSnapshot, DataFusionService dataFusionService, ShardPath shardPath) throws IOException {
+        this.dataFormat = dataFormat;
+
+        this.datafusionReaderManager = new DatafusionReaderManager(shardPath.getDataPath().toString(), formatCatalogSnapshot, dataFormat.getName());
+        this.datafusionService = dataFusionService;
+    }
+
+    @Override
+    public GenericQueryPhaseSearcher<DatafusionContext, DatafusionSearcher, DatafusionQuery> getQueryPhaseSearcher() {
+        return new DatafusionQueryPhaseSearcher();
+    }
+
+    @Override
+    public QueryPhaseExecutor<DatafusionContext> getQueryPhaseExecutor() {
+        return new DatafusionQueryPhaseExecutor();
+    }
+
+    @Override
+    public DatafusionContext createContext(ReaderContext readerContext, ShardSearchRequest request, SearchShardTarget searchShardTarget, SearchShardTask task, BigArrays bigArrays) throws IOException {
+        DatafusionContext datafusionContext = new DatafusionContext(readerContext, request, searchShardTarget, task, this, bigArrays);
+        // Parse source
+        datafusionContext.datafusionQuery(new DatafusionQuery(request.source().queryPlanIR(), new ArrayList<>()));
+        return datafusionContext;
+    }
+
+    @Override
+    public EngineSearcherSupplier<DatafusionSearcher> acquireSearcherSupplier(Function<DatafusionSearcher, DatafusionSearcher> wrapper) throws EngineException {
+        return acquireSearcherSupplier(wrapper, Engine.SearcherScope.EXTERNAL);
+    }
+
+    @Override
+    public EngineSearcherSupplier<DatafusionSearcher> acquireSearcherSupplier(Function<DatafusionSearcher, DatafusionSearcher> wrapper, Engine.SearcherScope scope) throws EngineException {
+        // TODO : wrapper is ignored
+        EngineSearcherSupplier<DatafusionSearcher> searcher = null;
+        // TODO : refcount needs to be revisited - add proper tests for exception etc
+        try {
+            DatafusionReader reader = datafusionReaderManager.acquire();
+            searcher = new DatafusionSearcherSupplier(null) {
+                @Override
+                protected DatafusionSearcher acquireSearcherInternal(String source) {
+                    return new DatafusionSearcher(source, reader, () -> {});
+                }
+
+                @Override
+                protected void doClose() {
+                    try {
+                        reader.decRef();
+                    } catch (IOException e) {
+                        throw new UncheckedIOException(e);
+                    }
+                }
+            };
+        } catch (Exception ex) {
+            // TODO
+        }
+        return searcher;
+    }
+
+    @Override
+    public DatafusionSearcher acquireSearcher(String source) throws EngineException {
+        return acquireSearcher(source, Engine.SearcherScope.EXTERNAL);
+    }
+
+    @Override
+    public DatafusionSearcher acquireSearcher(String source, Engine.SearcherScope scope) throws EngineException {
+        return acquireSearcher(source, scope, Function.identity());
+    }
+
+    @Override
+    public DatafusionSearcher acquireSearcher(String source, Engine.SearcherScope scope, Function<DatafusionSearcher, DatafusionSearcher> wrapper) throws EngineException {
+        DatafusionSearcherSupplier releasable = null;
+        try {
+            DatafusionSearcherSupplier searcherSupplier = releasable = (DatafusionSearcherSupplier) acquireSearcherSupplier(wrapper, scope);
+            DatafusionSearcher searcher = searcherSupplier.acquireSearcher(source);
+            releasable = null;
+            return new DatafusionSearcher(
+                source,
+                searcher.getReader(),
+                () -> Releasables.close(searcher, searcherSupplier)
+            );
+        } finally {
+            Releasables.close(releasable);
+        }
+    }
+
+    @Override
+    public DatafusionReaderManager getReferenceManager(Engine.SearcherScope scope) {
+        return datafusionReaderManager;
+    }
+
+    @Override
+    public CatalogSnapshotAwareRefreshListener getRefreshListener(Engine.SearcherScope scope) {
+        return datafusionReaderManager;
+    }
+
+    @Override
+    public boolean assertSearcherIsWarmedUp(String source, Engine.SearcherScope scope) {
+        return false;
+    }
+
+    @Override
+    public void executeQueryPhase(DatafusionContext context) {
+        Map<String, Object[]> finalRes = new HashMap<>();
+        ArrayList<Long> rowIdResult = new ArrayList<>();
+
+        try {
+            DatafusionSearcher datafusionSearcher = context.getEngineSearcher();
+            long streamPointer = datafusionSearcher.search(context.getDatafusionQuery(), datafusionService.getTokioRuntimePointer());
+            RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+            RecordBatchStream stream = new RecordBatchStream(streamPointer, datafusionService.getTokioRuntimePointer() , allocator);
+
+            // We can have some collectors passed like this which can collect the results and convert to InternalAggregation
+            // Is the possible? need to check
+
+            SearchResultsCollector<RecordBatchStream> collector = new SearchResultsCollector<RecordBatchStream>() {
+                @Override
+                public void collect(RecordBatchStream value) {
+                    VectorSchemaRoot root = value.getVectorSchemaRoot();
+                    for (Field field : root.getSchema().getFields()) {
+                        String fieldName = field.getName();
+                        FieldVector fieldVector = root.getVector(fieldName);
+                        Object[] fieldValues = new Object[fieldVector.getValueCount()];
+                        if (fieldName.equals("___row_id")) {
+                            IntVector rowIdVector = (IntVector) root.getVector(fieldName);
+                            for(int i=0; i<fieldVector.getValueCount(); i++) {
+                                rowIdResult.add((long) rowIdVector.get(i));
+                                fieldValues[i] = fieldVector.getObject(i);
+                            }
+                        }
+                        else {
+                            for (int i = 0; i < fieldVector.getValueCount(); i++) {
+                                fieldValues[i] = fieldVector.getObject(i);
+                            }
+                        }
+                        finalRes.put(fieldName, fieldValues);
+                    }
+                }
+            };
+
+            while (stream.loadNextBatch().join()) {
+                collector.collect(stream);
+            }
+
+            for (Map.Entry<String, Object[]> entry : finalRes.entrySet()) {
+                logger.info("{}: {}", entry.getKey(), java.util.Arrays.toString(entry.getValue()));
+            }
+
+        } catch (Exception exception) {
+            logger.error("Failed to execute Substrait query plan", exception);
+        }
+        context.setDfQueryPhaseResult(rowIdResult);
+        context.setDFResults(finalRes);
+    }
+
+
+    /**
+     * Executes fetch phase, DataFusion query should contain projections for fields
+     * @param context DataFusion context
+     * @throws IOException
+     */
+    @Override
+    public void executeFetchPhase(DatafusionContext context) throws IOException {
+        List<Long> rowIds = context.getDfQueryResult();
+
+        // preprocess
+        context.getDatafusionQuery().setFetchPhaseContext(rowIds);
+        DatafusionSearcher datafusionSearcher = context.getEngineSearcher();
+        long streamPointer = datafusionSearcher.search(context.getDatafusionQuery(), datafusionService.getTokioRuntimePointer()); // update to handle fetchPhase query
+        RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+        RecordBatchStream stream = new RecordBatchStream(streamPointer, datafusionService.getTokioRuntimePointer() , allocator);
+
+        // postprocess
+        context.setDfFetchPhaseResult(generateByteRefs(context, stream));
+    }
+
+    private List<BytesReference> generateByteRefs(DatafusionContext context, RecordBatchStream recordBatchStream) throws IOException {
+        MapperService mapperService = context.mapperService();
+        List<BytesReference> byteRefs = new ArrayList<>();
+        while(recordBatchStream.loadNextBatch().join()) {
+            VectorSchemaRoot vectorSchemaRoot = recordBatchStream.getVectorSchemaRoot();
+            List<FieldVector> fieldVectorList = vectorSchemaRoot.getFieldVectors();
+            for(int i=0; i<vectorSchemaRoot.getRowCount(); i++) {
+                XContentBuilder builder = XContentFactory.jsonBuilder().startObject();
+
+                try {
+                    for (FieldVector valueVectors : fieldVectorList) {
+
+                        MappingLookup mappingLookup = mapperService.documentMapper().mappers();
+                        Mapper mapper = mappingLookup.getMapper(valueVectors.getName());
+                        DerivedFieldGenerator derivedFieldGenerator = mapper.derivedFieldGenerator();
+
+                        derivedFieldGenerator.generate(builder, List.of(valueVectors.getObject(i)));
+                    }
+                } catch (Exception e) {
+                    throw new OpenSearchException("Failed to derive source for doc id [" + i + "]", e);
+                } finally {
+                    builder.endObject();
+                }
+                System.out.println("Object: ");
+                System.out.println(builder.toString());
+                byteRefs.add(BytesReference.bytes(builder));
+            }
+        }
+        return byteRefs;
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DatafusionQueryPhaseSearcher.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DatafusionQueryPhaseSearcher.java
new file mode 100644
index 0000000000000..a9253f9e9d3b2
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/DatafusionQueryPhaseSearcher.java
@@ -0,0 +1,44 @@
+package org.opensearch.datafusion;
+
+import org.opensearch.datafusion.search.DatafusionContext;
+import org.opensearch.datafusion.search.DatafusionQuery;
+import org.opensearch.datafusion.search.DatafusionSearcher;
+import org.opensearch.index.engine.EngineSearcher;
+import org.opensearch.search.query.GenericQueryPhaseSearcher;
+import org.opensearch.search.query.QueryCollectorContext;
+import org.opensearch.search.aggregations.SearchResultsCollector;
+import org.opensearch.vectorized.execution.search.spi.RecordBatchStream;
+
+import java.io.IOException;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.ArrayList;
+
+/**
+ * DataFusion-specific query phase searcher using Substrait queries
+ *
+ */
+public class DatafusionQueryPhaseSearcher implements GenericQueryPhaseSearcher<DatafusionContext,DatafusionSearcher, DatafusionQuery> {
+
+    // How to pass table providers that search other engines such as Lucene ?
+    @Override
+    public boolean searchWith(
+        DatafusionContext context,
+        DatafusionSearcher searcher,
+        DatafusionQuery datafusionQuery,
+        LinkedList<QueryCollectorContext> collectors,
+        boolean hasFilterCollector,
+        boolean hasTimeout
+    ) throws IOException {
+
+        List<SearchResultsCollector<RecordBatchStream>> searchCollectors = new ArrayList<>(); // TODO : derive from collectors ?
+
+        // Execute DataFusion query with Substrait plan
+        searcher.search(datafusionQuery, searchCollectors);
+
+        // Process results into QuerySearchResult
+        context.queryResult().searchTimedOut(false);
+
+        return false; // No rescoring for DataFusion
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ErrorUtil.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ErrorUtil.java
new file mode 100644
index 0000000000000..6d0486d213a55
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ErrorUtil.java
@@ -0,0 +1,20 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion;
+
+/**
+ * Utility class for error handling in DataFusion operations.
+ */
+public class ErrorUtil {
+    private ErrorUtil() {}
+
+    static boolean containsError(String errString) {
+        return errString != null && !errString.isEmpty();
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ObjectResultCallback.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ObjectResultCallback.java
new file mode 100644
index 0000000000000..d6de1fdace339
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/ObjectResultCallback.java
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion;
+
+interface ObjectResultCallback {
+    void callback(String errMessage, long value);
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/RecordBatchStream.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/RecordBatchStream.java
new file mode 100644
index 0000000000000..ea90468215012
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/RecordBatchStream.java
@@ -0,0 +1,139 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion;
+
+import org.apache.arrow.c.ArrowArray;
+import org.apache.arrow.c.ArrowSchema;
+import org.apache.arrow.c.CDataDictionaryProvider;
+import org.apache.arrow.c.Data;
+import org.apache.arrow.memory.BufferAllocator;
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.ArrowType;
+import org.apache.arrow.vector.types.pojo.Field;
+import org.apache.arrow.vector.types.pojo.Schema;
+import org.opensearch.datafusion.core.SessionContext;
+
+import java.util.concurrent.CompletableFuture;
+
+import static org.apache.arrow.c.Data.importField;
+
+/**
+ * Represents a stream of Apache Arrow record batches from DataFusion query execution.
+ * Provides a Java interface to iterate through query results in a memory-efficient way.
+ */
+public class RecordBatchStream {
+
+    private final long streamPointer;
+    private final BufferAllocator allocator;
+    private final CDataDictionaryProvider dictionaryProvider;
+    private boolean initialized = false;
+    private VectorSchemaRoot vectorSchemaRoot = null;
+    private long runtimePtr;
+
+    /**
+     * Creates a new RecordBatchStream for the given stream pointer
+     * @param streamId the stream pointer
+     * @param allocator memory allocator for Arrow vectors
+     */
+    public RecordBatchStream(long streamId, long runtimePtr, BufferAllocator allocator) {
+        this.streamPointer = streamId;
+        this.allocator = allocator;
+        this.runtimePtr = runtimePtr;
+        this.dictionaryProvider = new CDataDictionaryProvider();
+    }
+
+    /**
+     * Gets the Arrow VectorSchemaRoot for accessing the current batch data
+     * @return the VectorSchemaRoot containing the current batch
+     */
+    public VectorSchemaRoot getVectorSchemaRoot() {
+        ensureInitialized();
+        return vectorSchemaRoot;
+    }
+
+    private Schema getSchema() {
+        // Native method is not async, but use a future to store the result for convenience
+        CompletableFuture<Schema> result = new CompletableFuture<>();
+        getSchema(streamPointer, (errString, arrowSchemaAddress) -> {
+            if (ErrorUtil.containsError(errString)) {
+                result.completeExceptionally(new RuntimeException(errString));
+            } else {
+                try {
+                    ArrowSchema arrowSchema = ArrowSchema.wrap(arrowSchemaAddress);
+                    Schema schema = importSchema(allocator, arrowSchema, dictionaryProvider);
+                    result.complete(schema);
+                } catch (Exception e) {
+                    result.completeExceptionally(e);
+                }
+            }
+        });
+        return result.join();
+    }
+
+    private Schema importSchema(BufferAllocator allocator, ArrowSchema schema, CDataDictionaryProvider provider) {
+        Field structField = importField(allocator, schema, provider);
+        if (structField.getType().getTypeID() != ArrowType.ArrowTypeID.Struct) {
+            throw new IllegalArgumentException("Cannot import schema: ArrowSchema describes non-struct type");
+        }
+        return new Schema(structField.getChildren(), structField.getMetadata());
+    }
+
+    private void ensureInitialized() {
+        if (!initialized) {
+            Schema schema = getSchema();
+            this.vectorSchemaRoot = VectorSchemaRoot.create(schema, allocator);
+        }
+        initialized = true;
+    }
+
+    /**
+     * Loads the next batch of data from the stream
+     * @return a CompletableFuture that completes with true if more data is available, false if end of stream
+     */
+    public CompletableFuture<Boolean> loadNextBatch() {
+        ensureInitialized();
+        long runtimePointer = this.runtimePtr;
+        CompletableFuture<Boolean> result = new CompletableFuture<>();
+        next(runtimePointer, streamPointer, (errString, arrowArrayAddress) -> {
+            if (ErrorUtil.containsError(errString)) {
+                result.completeExceptionally(new RuntimeException(errString));
+            } else if (arrowArrayAddress == 0) {
+                // Reached end of stream
+                result.complete(false);
+            } else {
+                try {
+                    ArrowArray arrowArray = ArrowArray.wrap(arrowArrayAddress);
+                    Data.importIntoVectorSchemaRoot(allocator, arrowArray, vectorSchemaRoot, dictionaryProvider);
+                    result.complete(true);
+                } catch (Exception e) {
+                    result.completeExceptionally(e);
+                }
+            }
+        });
+        return result;
+    }
+
+    /**
+     * Closes the stream and releases all associated resources
+     * @throws Exception if an error occurs during cleanup
+     */
+    public void close() throws Exception {
+        closeStream(streamPointer);
+        dictionaryProvider.close();
+        if (initialized) {
+            vectorSchemaRoot.close();
+        }
+    }
+
+    private static native void next(long runtime, long pointer, ObjectResultCallback callback);
+
+    private static native void getSchema(long pointer, ObjectResultCallback callback);
+
+    private static native void closeStream(long pointer);
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/DataFusionAction.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/DataFusionAction.java
new file mode 100644
index 0000000000000..99695d2c96266
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/DataFusionAction.java
@@ -0,0 +1,67 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.action;
+
+import org.opensearch.rest.BaseRestHandler;
+import org.opensearch.rest.RestRequest;
+import org.opensearch.rest.action.RestToXContentListener;
+import org.opensearch.transport.client.node.NodeClient;
+
+import java.util.List;
+
+import static org.opensearch.rest.RestRequest.Method.GET;
+
+/**
+ * REST handler for DataFusion information operations.
+ * It handles GET requests for retrieving DataFusion server information.
+ */
+public class DataFusionAction extends BaseRestHandler {
+
+    /**
+     * Constructor for DataFusionRestHandler.
+     */
+    public DataFusionAction() {}
+
+    /**
+     * Returns the name of the action.
+     * @return The name of the action.
+     */
+    @Override
+    public String getName() {
+        return "datafusion_info_action";
+    }
+
+    /**
+     * Returns the list of routes for the action.
+     * @return The list of routes for the action.
+     */
+    @Override
+    public List<Route> routes() {
+        return List.of(new Route(GET, "/_plugins/datafusion/info"), new Route(GET, "/_plugins/datafusion/info/{nodeId}"));
+    }
+
+    /**
+     * Prepares the request for the action.
+     * @param request The REST request.
+     * @param client The node client.
+     * @return The rest channel consumer.
+     */
+    @Override
+    protected RestChannelConsumer prepareRequest(RestRequest request, NodeClient client) {
+        String nodeId = request.param("nodeId");
+        if (nodeId != null) {
+            // Query specific node
+            NodesDataFusionInfoRequest nodesRequest = new NodesDataFusionInfoRequest(nodeId);
+            return channel -> client.execute(NodesDataFusionInfoAction.INSTANCE, nodesRequest, new RestToXContentListener<>(channel));
+        } else {
+            NodesDataFusionInfoRequest nodesRequest = new NodesDataFusionInfoRequest();
+            return channel -> client.execute(NodesDataFusionInfoAction.INSTANCE, nodesRequest, new RestToXContentListener<>(channel));
+        }
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodeDataFusionInfo.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodeDataFusionInfo.java
new file mode 100644
index 0000000000000..5512110c576da
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodeDataFusionInfo.java
@@ -0,0 +1,82 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.action;
+
+import org.opensearch.action.support.nodes.BaseNodeResponse;
+import org.opensearch.cluster.node.DiscoveryNode;
+import org.opensearch.core.common.io.stream.StreamInput;
+import org.opensearch.core.common.io.stream.StreamOutput;
+import org.opensearch.core.xcontent.ToXContentFragment;
+import org.opensearch.core.xcontent.XContentBuilder;
+
+import java.io.IOException;
+
+/**
+ * Information about DataFusion on a specific node
+ */
+public class NodeDataFusionInfo extends BaseNodeResponse implements ToXContentFragment {
+
+    private final String dataFusionVersion;
+
+    /**
+     * Constructor for NodeDataFusionInfo.
+     * @param node The discovery node.
+     * @param dataFusionVersion The DataFusion version.
+     */
+    public NodeDataFusionInfo(DiscoveryNode node, String dataFusionVersion) {
+        super(node);
+        this.dataFusionVersion = dataFusionVersion;
+    }
+
+    /**
+     * Constructor for NodeDataFusionInfo from stream input.
+     * @param in The stream input.
+     * @throws IOException If an I/O error occurs.
+     */
+    public NodeDataFusionInfo(StreamInput in) throws IOException {
+        super(in);
+        this.dataFusionVersion = in.readString();
+    }
+
+    /**
+     * Writes the node info to the stream output.
+     * @param out The stream output.
+     * @throws IOException If an I/O error occurs.
+     */
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        super.writeTo(out);
+        out.writeString(dataFusionVersion);
+    }
+
+    /**
+     * Converts the node info to XContent.
+     * @param builder The XContent builder.
+     * @param params The parameters.
+     * @return The XContent builder.
+     * @throws IOException If an I/O error occurs.
+     */
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        builder.startObject("data_fusion_info");
+        builder.field("datafusion_version", dataFusionVersion);
+        builder.endObject();
+        builder.endObject();
+        return builder;
+    }
+
+    /**
+     * Gets the DataFusion version.
+     * @return The DataFusion version.
+     */
+    public String getDataFusionVersion() {
+        return dataFusionVersion;
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoAction.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoAction.java
new file mode 100644
index 0000000000000..198c7973e6a9c
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoAction.java
@@ -0,0 +1,29 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.action;
+
+import org.opensearch.action.ActionType;
+
+/**
+ * Action to retrieve DataFusion info from nodes
+ */
+public class NodesDataFusionInfoAction extends ActionType<NodesDataFusionInfoResponse> {
+    /**
+     * Singleton instance of NodesDataFusionInfoAction.
+     */
+    public static final NodesDataFusionInfoAction INSTANCE = new NodesDataFusionInfoAction();
+    /**
+     * Name of this action.
+     */
+    public static final String NAME = "cluster:admin/datafusion/info";
+
+    NodesDataFusionInfoAction() {
+        super(NAME, NodesDataFusionInfoResponse::new);
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoRequest.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoRequest.java
new file mode 100644
index 0000000000000..4e32bb3b0f18c
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoRequest.java
@@ -0,0 +1,75 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.action;
+
+import org.opensearch.action.support.nodes.BaseNodesRequest;
+import org.opensearch.core.common.io.stream.StreamInput;
+import org.opensearch.core.common.io.stream.StreamOutput;
+
+import java.io.IOException;
+
+/**
+ * Request for retrieving DataFusion information from nodes
+ */
+public class NodesDataFusionInfoRequest extends BaseNodesRequest<NodesDataFusionInfoRequest> {
+
+    /**
+     * Default constructor for NodesDataFusionInfoRequest.
+     */
+    public NodesDataFusionInfoRequest() {
+        super((String[]) null);
+    }
+
+    /**
+     * Constructor for NodesDataFusionInfoRequest with specific node IDs.
+     * @param nodeIds The node IDs to query.
+     */
+    public NodesDataFusionInfoRequest(String... nodeIds) {
+        super(nodeIds);
+    }
+
+    /**
+     * Constructor for NodesDataFusionInfoRequest from stream input.
+     * @param in The stream input.
+     * @throws IOException If an I/O error occurs.
+     */
+    public NodesDataFusionInfoRequest(StreamInput in) throws IOException {
+        super(in);
+    }
+
+    /**
+     * Writes the request to the stream output.
+     * @param out The stream output.
+     * @throws IOException If an I/O error occurs.
+     */
+    @Override
+    public void writeTo(StreamOutput out) throws IOException {
+        super.writeTo(out);
+    }
+
+    /**
+     * Node-level request for DataFusion information
+     */
+    public static class NodeDataFusionInfoRequest extends org.opensearch.transport.TransportRequest {
+
+        /**
+         * Default constructor for NodeDataFusionInfoRequest.
+         */
+        public NodeDataFusionInfoRequest() {}
+
+        /**
+         * Constructor for NodeDataFusionInfoRequest from stream input.
+         * @param in The stream input.
+         * @throws IOException If an I/O error occurs.
+         */
+        public NodeDataFusionInfoRequest(StreamInput in) throws IOException {
+            super(in);
+        }
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoResponse.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoResponse.java
new file mode 100644
index 0000000000000..61a13fd263ee9
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/NodesDataFusionInfoResponse.java
@@ -0,0 +1,94 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.action;
+
+import org.opensearch.action.FailedNodeException;
+import org.opensearch.action.support.nodes.BaseNodesResponse;
+import org.opensearch.cluster.ClusterName;
+import org.opensearch.core.common.io.stream.StreamInput;
+import org.opensearch.core.common.io.stream.StreamOutput;
+import org.opensearch.core.xcontent.ToXContentObject;
+import org.opensearch.core.xcontent.XContentBuilder;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Response containing DataFusion information from multiple nodes
+ */
+public class NodesDataFusionInfoResponse extends BaseNodesResponse<NodeDataFusionInfo> implements ToXContentObject {
+
+    /**
+     * Constructor for NodesDataFusionInfoResponse.
+     * @param clusterName The cluster name.
+     * @param nodes The list of node DataFusion info.
+     * @param failures The list of failed node exceptions.
+     */
+    public NodesDataFusionInfoResponse(ClusterName clusterName, List<NodeDataFusionInfo> nodes, List<FailedNodeException> failures) {
+        super(clusterName, nodes, failures);
+    }
+
+    @Override
+    protected List<NodeDataFusionInfo> readNodesFrom(StreamInput in) throws IOException {
+        return in.readList(NodeDataFusionInfo::new);
+    }
+
+    /**
+     * Constructor for NodesDataFusionInfoResponse from stream input.
+     * @param in The stream input.
+     * @throws IOException If an I/O error occurs.
+     */
+    public NodesDataFusionInfoResponse(StreamInput in) throws IOException {
+        super(in);
+    }
+
+    /**
+     * Writes the node response to stream output.
+     * @param out The stream output.
+     * @param nodes The list of nodes to write.
+     * @throws IOException If an I/O error occurs.
+     */
+    @Override
+    protected void writeNodesTo(StreamOutput out, List<NodeDataFusionInfo> nodes) throws IOException {
+        out.writeList(nodes);
+    }
+
+    /**
+     * Converts the response to XContent.
+     * @param builder The XContent builder.
+     * @param params The parameters.
+     * @return The XContent builder.
+     * @throws IOException If an I/O error occurs.
+     */
+    @Override
+    public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
+        builder.startObject();
+        builder.startObject("nodes");
+        for (NodeDataFusionInfo nodeInfo : getNodes()) {
+            builder.field(nodeInfo.getNode().getId());
+            // builder.field("name", nodeInfo.getNode().getName());
+            // builder.field("transport_address", nodeInfo.getNode().getAddress().toString());
+            nodeInfo.toXContent(builder, params);
+        }
+        builder.endObject();
+
+        if (!failures().isEmpty()) {
+            builder.startArray("failures");
+            for (FailedNodeException failure : failures()) {
+                builder.startObject();
+                builder.field("node_id", failure.nodeId());
+                builder.field("reason", failure.getMessage());
+                builder.endObject();
+            }
+            builder.endArray();
+        }
+        builder.endObject();
+        return builder;
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/TransportNodesDataFusionInfoAction.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/TransportNodesDataFusionInfoAction.java
new file mode 100644
index 0000000000000..8a659f29230d6
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/TransportNodesDataFusionInfoAction.java
@@ -0,0 +1,110 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.action;
+
+import org.opensearch.action.FailedNodeException;
+import org.opensearch.action.support.ActionFilters;
+import org.opensearch.action.support.nodes.TransportNodesAction;
+import org.opensearch.cluster.service.ClusterService;
+import org.opensearch.common.inject.Inject;
+import org.opensearch.core.common.io.stream.StreamInput;
+import org.opensearch.datafusion.DataFusionService;
+import org.opensearch.threadpool.ThreadPool;
+import org.opensearch.transport.TransportService;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Transport action for retrieving DataFusion information from nodes
+ */
+public class TransportNodesDataFusionInfoAction extends TransportNodesAction<
+    NodesDataFusionInfoRequest,
+    NodesDataFusionInfoResponse,
+    NodesDataFusionInfoRequest.NodeDataFusionInfoRequest,
+    NodeDataFusionInfo> {
+
+    private final DataFusionService dataFusionService;
+
+    /**
+     * Constructor for TransportNodesDataFusionInfoAction.
+     * @param threadPool The thread pool.
+     * @param clusterService The cluster service.
+     * @param transportService The transport service.
+     * @param actionFilters The action filters.
+     * @param dataFusionService The DataFusion service.
+     */
+    @Inject
+    public TransportNodesDataFusionInfoAction(
+        ThreadPool threadPool,
+        ClusterService clusterService,
+        TransportService transportService,
+        ActionFilters actionFilters,
+        DataFusionService dataFusionService
+    ) {
+        super(
+            NodesDataFusionInfoAction.NAME,
+            threadPool,
+            clusterService,
+            transportService,
+            actionFilters,
+            NodesDataFusionInfoRequest::new,
+            NodesDataFusionInfoRequest.NodeDataFusionInfoRequest::new,
+            ThreadPool.Names.MANAGEMENT,
+            NodeDataFusionInfo.class
+        );
+        this.dataFusionService = dataFusionService;
+    }
+
+    /**
+     * Creates a new nodes response.
+     * @param request The nodes request.
+     * @param responses The list of node responses.
+     * @param failures The list of failed node exceptions.
+     * @return The nodes response.
+     */
+    @Override
+    protected NodesDataFusionInfoResponse newResponse(
+        NodesDataFusionInfoRequest request,
+        List<NodeDataFusionInfo> responses,
+        List<FailedNodeException> failures
+    ) {
+        return new NodesDataFusionInfoResponse(clusterService.getClusterName(), responses, failures);
+    }
+
+    /**
+     * Creates a new node request.
+     * @param request The nodes request.
+     * @return The node request.
+     */
+    @Override
+    protected NodesDataFusionInfoRequest.NodeDataFusionInfoRequest newNodeRequest(NodesDataFusionInfoRequest request) {
+        return new NodesDataFusionInfoRequest.NodeDataFusionInfoRequest();
+    }
+
+    @Override
+    protected NodeDataFusionInfo newNodeResponse(StreamInput in) throws IOException {
+        return new NodeDataFusionInfo(in);
+    }
+
+    /**
+     * Handles the node request and returns the node response.
+     * @param request The node request.
+     * @return The node response.
+     */
+    @Override
+    protected NodeDataFusionInfo nodeOperation(NodesDataFusionInfoRequest.NodeDataFusionInfoRequest request) {
+        try {
+            System.out.println(this.dataFusionService.getVersion());
+            return new NodeDataFusionInfo(clusterService.localNode(), dataFusionService.getVersion());
+        } catch (Exception e) {
+            return new NodeDataFusionInfo(clusterService.localNode(), "unknown");
+        }
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/package-info.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/package-info.java
new file mode 100644
index 0000000000000..d3542f4dfe9dc
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/action/package-info.java
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/**
+ * REST actions and transport handlers for DataFusion plugin.
+ * Provides API endpoints for DataFusion functionality.
+ */
+package org.opensearch.datafusion.action;
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/DefaultRecordBatchStream.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/DefaultRecordBatchStream.java
new file mode 100644
index 0000000000000..5603660ed760a
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/DefaultRecordBatchStream.java
@@ -0,0 +1,114 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.core;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.opensearch.vectorized.execution.search.spi.RecordBatchStream;
+
+import java.util.concurrent.CompletableFuture;
+
+public class DefaultRecordBatchStream implements RecordBatchStream {
+
+    private static final Logger logger = LogManager.getLogger(DefaultRecordBatchStream.class);
+
+    private final long nativeStreamPtr;
+    private volatile boolean closed = false;
+    private volatile boolean hasNextCached = false;
+    private volatile boolean hasNextValue = false;
+
+    /**
+     * Creates a new wrapping the given native stream pointer.
+     *
+     * @param nativeStreamPtr Pointer to the native DataFusion RecordBatch stream
+     */
+    public DefaultRecordBatchStream(long nativeStreamPtr) {
+        if (nativeStreamPtr == 0) {
+            throw new IllegalArgumentException("Invalid native stream pointer");
+        }
+        this.nativeStreamPtr = nativeStreamPtr;
+        logger.debug("Created default record batch stream with pointer: {}", nativeStreamPtr);
+    }
+
+    @Override
+    public Object getSchema() {
+        return "schema"; // Placeholder
+    }
+
+    @Override
+    public CompletableFuture<Object> next() {
+        // PlaceholderImpl
+        return CompletableFuture.supplyAsync(() -> {
+            if (closed) {
+                return null;
+            }
+
+            try {
+                // Get the next batch from native code
+                String batch = nativeNextBatch(nativeStreamPtr);
+
+                // Reset cached hasNext value since we consumed a batch
+                hasNextCached = false;
+
+                logger.trace("Retrieved next batch from stream pointer: {}", nativeStreamPtr);
+                return batch;
+            } catch (Exception e) {
+                logger.error("Error getting next batch from stream", e);
+                return null;
+            }
+        });
+    }
+
+    @Override
+    public boolean hasNext() {
+        // Placeholder impl
+        if (closed) {
+            return false;
+        }
+
+        if (hasNextCached) {
+            return hasNextValue;
+        }
+
+        try {
+            // Check if there's a next batch available
+            // This is a simplified implementation - in practice, you might want to
+            // peek at the stream without consuming the batch
+            String nextBatch = nativeNextBatch(nativeStreamPtr);
+            hasNextValue = (nextBatch != null);
+            hasNextCached = true;
+
+            logger.trace("hasNext() = {} for stream pointer: {}", hasNextValue, nativeStreamPtr);
+            return hasNextValue;
+        } catch (Exception e) {
+            logger.error("Error checking for next batch in stream", e);
+            return false;
+        }
+    }
+
+    @Override
+    public void close() {
+        if (!closed) {
+            logger.debug("Closing RecordBatchStream with pointer: {}", nativeStreamPtr);
+            try {
+                nativeCloseStream(nativeStreamPtr);
+                closed = true;
+                logger.debug("Successfully closed RecordBatchStream");
+            } catch (Exception e) {
+                logger.error("Error closing RecordBatchStream", e);
+                throw e;
+            }
+        }
+    }
+
+    // Native method declarations
+    private static native String nativeNextBatch(long streamPtr);
+
+    private static native void nativeCloseStream(long streamPtr);
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/GlobalRuntimeEnv.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/GlobalRuntimeEnv.java
new file mode 100644
index 0000000000000..547539d5ff4d1
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/GlobalRuntimeEnv.java
@@ -0,0 +1,48 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.core;
+
+import static org.opensearch.datafusion.DataFusionQueryJNI.closeGlobalRuntime;
+import static org.opensearch.datafusion.DataFusionQueryJNI.createGlobalRuntime;
+import static org.opensearch.datafusion.DataFusionQueryJNI.createTokioRuntime;
+
+/**
+ * Global runtime environment for DataFusion operations.
+ * Manages the lifecycle of the native DataFusion runtime.
+ */
+public class GlobalRuntimeEnv implements AutoCloseable {
+    // ptr to runtime environment in df
+    private final long ptr;
+    private final long tokio_runtime_ptr;
+
+    /**
+     * Creates a new global runtime environment.
+     */
+    public GlobalRuntimeEnv() {
+        this.ptr = createGlobalRuntime();
+        this.tokio_runtime_ptr = createTokioRuntime();
+    }
+
+    /**
+     * Gets the native pointer to the runtime environment.
+     * @return the native pointer
+     */
+    public long getPointer() {
+        return ptr;
+    }
+
+    public long getTokioRuntimePtr() {
+        return tokio_runtime_ptr;
+    }
+
+    @Override
+    public void close() {
+        closeGlobalRuntime(this.ptr);
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/SessionContext.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/SessionContext.java
new file mode 100644
index 0000000000000..956aa78fdaa30
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/SessionContext.java
@@ -0,0 +1,42 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.core;
+
+/**
+ * Session context for datafusion
+ */
+public class SessionContext implements AutoCloseable {
+
+    // ptr to context in df
+    private final long ptr;
+
+    /**
+     * Create a new DataFusion session context
+     * @return context ID for subsequent operations
+     */
+    static native long createContext();
+
+    /**
+     * Close and cleanup a DataFusion context
+     * @param contextId the context ID to close
+     */
+    public static native void closeContext(long contextId);
+
+    /**
+     * Creates a new session context.
+     */
+    public SessionContext() {
+        this.ptr = createContext();
+    }
+
+    @Override
+    public void close() throws Exception {
+        closeContext(this.ptr);
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/package-info.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/package-info.java
new file mode 100644
index 0000000000000..2c6e72ef3a582
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/core/package-info.java
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/**
+ * Core DataFusion runtime and session management classes.
+ * Provides runtime environment and session context management.
+ */
+package org.opensearch.datafusion.core;
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/package-info.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/package-info.java
new file mode 100644
index 0000000000000..81017da49c16c
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/package-info.java
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+/**
+ * DataFusion query engine integration for OpenSearch.
+ * Provides the main plugin and service classes for DataFusion functionality.
+ */
+package org.opensearch.datafusion;
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionContext.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionContext.java
new file mode 100644
index 0000000000000..8f7d4f914d64e
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionContext.java
@@ -0,0 +1,843 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.search;
+
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.CollectorManager;
+import org.apache.lucene.search.FieldDoc;
+import org.apache.lucene.search.Query;
+import org.opensearch.action.search.SearchShardTask;
+import org.opensearch.action.search.SearchType;
+import org.opensearch.common.unit.TimeValue;
+import org.opensearch.common.util.BigArrays;
+import org.opensearch.core.common.bytes.BytesReference;
+import org.opensearch.index.IndexService;
+import org.opensearch.index.cache.bitset.BitsetFilterCache;
+import org.opensearch.index.mapper.MappedFieldType;
+import org.opensearch.index.mapper.MapperService;
+import org.opensearch.index.mapper.ObjectMapper;
+import org.opensearch.index.query.ParsedQuery;
+import org.opensearch.index.query.QueryShardContext;
+import org.opensearch.index.shard.IndexShard;
+import org.opensearch.index.similarity.SimilarityService;
+import org.opensearch.search.SearchExtBuilder;
+import org.opensearch.search.SearchShardTarget;
+import org.opensearch.search.aggregations.BucketCollectorProcessor;
+import org.opensearch.search.aggregations.InternalAggregation;
+import org.opensearch.search.aggregations.SearchContextAggregations;
+import org.opensearch.search.collapse.CollapseContext;
+import org.opensearch.search.dfs.DfsSearchResult;
+import org.opensearch.search.fetch.FetchPhase;
+import org.opensearch.search.fetch.FetchSearchResult;
+import org.opensearch.search.fetch.StoredFieldsContext;
+import org.opensearch.search.fetch.subphase.FetchDocValuesContext;
+import org.opensearch.search.fetch.subphase.FetchFieldsContext;
+import org.opensearch.search.fetch.subphase.FetchSourceContext;
+import org.opensearch.search.fetch.subphase.ScriptFieldsContext;
+import org.opensearch.search.fetch.subphase.highlight.SearchHighlightContext;
+import org.opensearch.search.internal.ContextIndexSearcher;
+import org.opensearch.search.internal.ReaderContext;
+import org.opensearch.search.internal.ScrollContext;
+import org.opensearch.search.internal.SearchContext;
+import org.opensearch.search.internal.ShardSearchContextId;
+import org.opensearch.search.internal.ShardSearchRequest;
+import org.opensearch.datafusion.DatafusionEngine;
+import org.opensearch.search.ContextEngineSearcher;
+import org.opensearch.search.profile.Profilers;
+import org.opensearch.search.query.QuerySearchResult;
+import org.opensearch.search.query.ReduceableSearchResult;
+import org.opensearch.search.rescore.RescoreContext;
+import org.opensearch.search.sort.SortAndFormats;
+import org.opensearch.search.suggest.SuggestionSearchContext;
+import org.opensearch.vectorized.execution.search.spi.RecordBatchStream;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Search context for Datafusion engine
+ */
+public class DatafusionContext extends SearchContext {
+    private final ReaderContext readerContext;
+    private final ShardSearchRequest request;
+    private final SearchShardTask task;
+    private final DatafusionEngine readEngine;
+    private final DatafusionSearcher engineSearcher;
+    private final IndexShard indexShard;
+    private final QuerySearchResult queryResult;
+    private final FetchSearchResult fetchResult;
+    private final IndexService indexService;
+    private final QueryShardContext queryShardContext;
+    private DatafusionQuery datafusionQuery;
+    private Map<String, Object[]> dfResults;
+
+    private List<Long> dfQueryResult;
+    private List<BytesReference> dfFetchResult; // TODO: make this Map<docIds, BytesRef>?
+    private SearchContextAggregations aggregations;
+    private final BigArrays bigArrays;
+    private final Map<Class<?>, CollectorManager<? extends Collector, ReduceableSearchResult>> queryCollectorManagers = new HashMap<>();
+
+    /**
+     * Constructor
+     * @param readerContext The reader context
+     * @param request The shard search request
+     * @param task The search shard task
+     * @param engine The datafusion engine
+     */
+    public DatafusionContext(
+        ReaderContext readerContext,
+        ShardSearchRequest request,
+        SearchShardTarget searchShardTarget,
+        SearchShardTask task,
+        DatafusionEngine engine,
+        BigArrays bigArrays) {
+        this.readerContext = readerContext;
+        this.indexShard = readerContext.indexShard();
+        this.request = request;
+        this.task = task;
+        this.readEngine = engine;
+        this.engineSearcher = engine.acquireSearcher("search");//null;//TODO readerContext.contextEngineSearcher();
+        this.queryResult = new QuerySearchResult(readerContext.id(), searchShardTarget, request);
+        this.fetchResult = new FetchSearchResult(readerContext.id(), searchShardTarget);
+        this.dfQueryResult = null;
+        this.dfFetchResult = null;
+        this.indexService = readerContext.indexService();
+        this.queryShardContext = indexService.newQueryShardContext(
+            request.shardId().id(),
+            null, // TOOD : index searcher is null
+            request::nowInMillis,
+            searchShardTarget.getClusterAlias(),
+            false, // reevaluate the usage
+            false // specific to lucene
+        );
+        this.bigArrays = bigArrays;
+    }
+
+    /**
+     * Gets the read engine
+     * @return The datafusion engine
+     */
+    public DatafusionEngine readEngine() {
+        return readEngine;
+    }
+
+    /**
+     * Sets datafusion query
+     * @param datafusionQuery The datafusion query
+     */
+    public DatafusionContext datafusionQuery(DatafusionQuery datafusionQuery) {
+        this.datafusionQuery = datafusionQuery;
+        return this;
+    }
+
+    /**
+     * Sets datafusion query phase row ids
+     * @param dfQueryResult The datafusion query phase result
+     */
+    public void setDfQueryPhaseResult(List<Long> dfQueryResult) {
+        this.dfQueryResult = dfQueryResult;
+    }
+
+    /**
+     * Sets datafusion fetch phase row ids
+     * @param dfFetchResult The datafusion fetch phase result
+     */
+    public void setDfFetchPhaseResult(List<BytesReference> dfFetchResult) {
+        this.dfFetchResult = dfFetchResult;
+    }
+
+    /**
+     * Gets the datafusion query
+     * @return The datafusion query
+     */
+    public DatafusionQuery getDatafusionQuery() {
+        return datafusionQuery;
+    }
+
+    /**
+     * Gets the engine searcher
+     * @return The datafusion searcher
+     */
+    public DatafusionSearcher getEngineSearcher() {
+        return engineSearcher;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param task The search shard task
+     */
+    @Override
+    public void setTask(SearchShardTask task) {
+
+    }
+
+    @Override
+    public SearchShardTask getTask() {
+        return null;
+    }
+
+
+    /**
+     * Gets df query result.
+     *
+     * @return the df query result
+     */
+    public List<Long> getDfQueryResult() {
+        return dfQueryResult;
+    }
+
+    /**
+     * Gets df fetch result.
+     *
+     * @return the df fetch result
+     */
+    public List<BytesReference> getDfFetchResult() {
+        return dfFetchResult;
+    }
+
+    @Override
+    public boolean isCancelled() {
+        return false;
+    }
+
+    @Override
+    protected void doClose() {
+
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param rewrite Whether to rewrite
+     */
+    @Override
+    public void preProcess(boolean rewrite) {
+
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param query The query
+     */
+    @Override
+    public Query buildFilteredQuery(Query query) {
+        return null;
+    }
+
+    @Override
+    public ShardSearchContextId id() {
+        return null;
+    }
+
+    @Override
+    public String source() {
+        return "";
+    }
+
+    @Override
+    public ShardSearchRequest request() {
+        return request;
+    }
+
+    @Override
+    public SearchType searchType() {
+        return null;
+    }
+
+    @Override
+    public SearchShardTarget shardTarget() {
+        return null;
+    }
+
+    @Override
+    public int numberOfShards() {
+        return 0;
+    }
+
+    @Override
+    public float queryBoost() {
+        return 0;
+    }
+
+    @Override
+    public ScrollContext scrollContext() {
+        return null;
+    }
+
+    @Override
+    public SearchContextAggregations aggregations() {
+        return aggregations;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param aggregations The search context aggregations
+     */
+    @Override
+    public SearchContext aggregations(SearchContextAggregations aggregations) {
+        this.aggregations = aggregations;
+        return this;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param searchExtBuilder The search extension builder
+     */
+    @Override
+    public void addSearchExt(SearchExtBuilder searchExtBuilder) {
+
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param name The name
+     */
+    @Override
+    public SearchExtBuilder getSearchExt(String name) {
+        return null;
+    }
+
+    @Override
+    public SearchHighlightContext highlight() {
+        return null;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param highlight The search highlight context
+     */
+    @Override
+    public void highlight(SearchHighlightContext highlight) {
+
+    }
+
+    @Override
+    public SuggestionSearchContext suggest() {
+        return null;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param suggest The suggestion search context
+     */
+    @Override
+    public void suggest(SuggestionSearchContext suggest) {
+
+    }
+
+    @Override
+    public List<RescoreContext> rescore() {
+        return List.of();
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param rescore The rescore context
+     */
+    @Override
+    public void addRescore(RescoreContext rescore) {
+
+    }
+
+    @Override
+    public boolean hasScriptFields() {
+        return false;
+    }
+
+    @Override
+    public ScriptFieldsContext scriptFields() {
+        return null;
+    }
+
+    @Override
+    public boolean sourceRequested() {
+        return false;
+    }
+
+    @Override
+    public boolean hasFetchSourceContext() {
+        return false;
+    }
+
+    @Override
+    public FetchSourceContext fetchSourceContext() {
+        return null;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param fetchSourceContext The fetch source context
+     */
+    @Override
+    public SearchContext fetchSourceContext(FetchSourceContext fetchSourceContext) {
+        return null;
+    }
+
+    @Override
+    public FetchDocValuesContext docValuesContext() {
+        return null;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param docValuesContext The fetch doc values context
+     */
+    @Override
+    public SearchContext docValuesContext(FetchDocValuesContext docValuesContext) {
+        return null;
+    }
+
+    @Override
+    public FetchFieldsContext fetchFieldsContext() {
+        return null;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param fetchFieldsContext The fetch fields context
+     */
+    @Override
+    public SearchContext fetchFieldsContext(FetchFieldsContext fetchFieldsContext) {
+        return null;
+    }
+
+    @Override
+    public ContextIndexSearcher searcher() {
+        return null;
+    }
+
+    @Override
+    public IndexShard indexShard() {
+        return this.indexShard;
+    }
+
+    @Override
+    public MapperService mapperService() {
+        return indexService.mapperService();
+    }
+
+    @Override
+    public SimilarityService similarityService() {
+        return null;
+    }
+
+    @Override
+    public BigArrays bigArrays() {
+        return bigArrays;
+    }
+
+    @Override
+    public BitsetFilterCache bitsetFilterCache() {
+        return null;
+    }
+
+    @Override
+    public TimeValue timeout() {
+        return null;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param timeout The timeout value
+     */
+    @Override
+    public void timeout(TimeValue timeout) {
+
+    }
+
+    @Override
+    public int terminateAfter() {
+        return 0;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param terminateAfter The terminate after value
+     */
+    @Override
+    public void terminateAfter(int terminateAfter) {
+
+    }
+
+    @Override
+    public boolean lowLevelCancellation() {
+        return false;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param minimumScore The minimum score
+     */
+    @Override
+    public SearchContext minimumScore(float minimumScore) {
+        return null;
+    }
+
+    @Override
+    public Float minimumScore() {
+        return 0f;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param sort The sort and formats
+     */
+    @Override
+    public SearchContext sort(SortAndFormats sort) {
+        return null;
+    }
+
+    @Override
+    public SortAndFormats sort() {
+        return null;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param trackScores Whether to track scores
+     */
+    @Override
+    public SearchContext trackScores(boolean trackScores) {
+        return null;
+    }
+
+    @Override
+    public boolean trackScores() {
+        return false;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param trackTotalHits The track total hits value
+     */
+    @Override
+    public SearchContext trackTotalHitsUpTo(int trackTotalHits) {
+        return null;
+    }
+
+    @Override
+    public int trackTotalHitsUpTo() {
+        return 0;
+    }
+
+    @Override
+    /**
+     * {@inheritDoc}
+     * @param searchAfter The field doc for search after
+     */
+    public SearchContext searchAfter(FieldDoc searchAfter) {
+        return null;
+    }
+
+    @Override
+    public FieldDoc searchAfter() {
+        return null;
+    }
+
+    @Override
+    /**
+     * {@inheritDoc}
+     * @param collapse The collapse context
+     */
+    public SearchContext collapse(CollapseContext collapse) {
+        return null;
+    }
+
+    @Override
+    public CollapseContext collapse() {
+        return null;
+    }
+
+    @Override
+    /**
+     * {@inheritDoc}
+     * @param postFilter The parsed post filter query
+     */
+    public SearchContext parsedPostFilter(ParsedQuery postFilter) {
+        return null;
+    }
+
+    @Override
+    public ParsedQuery parsedPostFilter() {
+        return null;
+    }
+
+    @Override
+    public Query aliasFilter() {
+        return null;
+    }
+
+    @Override
+    /**
+     * {@inheritDoc}
+     * @param query The parsed query
+     */
+    public SearchContext parsedQuery(ParsedQuery query) {
+        return null;
+    }
+
+    @Override
+    public ParsedQuery parsedQuery() {
+        return null;
+    }
+
+    // TODO : fix this
+    public Query query() {
+        // Extract query from request
+        return null;
+    }
+
+    @Override
+    public int from() {
+        return 0;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param from The from value
+     */
+    @Override
+    public SearchContext from(int from) {
+        return null;
+    }
+
+    @Override
+    public int size() {
+        return 0;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param size The size value
+     */
+    @Override
+    public SearchContext size(int size) {
+        return null;
+    }
+
+    @Override
+    public boolean hasStoredFields() {
+        return false;
+    }
+
+    @Override
+    public boolean hasStoredFieldsContext() {
+        return false;
+    }
+
+    @Override
+    public boolean storedFieldsRequested() {
+        return false;
+    }
+
+    @Override
+    public StoredFieldsContext storedFieldsContext() {
+        return null;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param storedFieldsContext The stored fields context
+     */
+    @Override
+    public SearchContext storedFieldsContext(StoredFieldsContext storedFieldsContext) {
+        return null;
+    }
+
+    @Override
+    public boolean explain() {
+        return false;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param explain Whether to explain
+     */
+    @Override
+    public void explain(boolean explain) {
+
+    }
+
+    @Override
+    public List<String> groupStats() {
+        return List.of();
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param groupStats The group stats
+     */
+    @Override
+    public void groupStats(List<String> groupStats) {
+
+    }
+
+    @Override
+    public boolean version() {
+        return false;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param version Whether to include version
+     */
+    @Override
+    public void version(boolean version) {
+
+    }
+
+    @Override
+    public boolean seqNoAndPrimaryTerm() {
+        return false;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param seqNoAndPrimaryTerm Whether to include sequence number and primary term
+     */
+    @Override
+    public void seqNoAndPrimaryTerm(boolean seqNoAndPrimaryTerm) {
+
+    }
+
+    @Override
+    public int[] docIdsToLoad() {
+        return new int[0];
+    }
+
+    @Override
+    public int docIdsToLoadFrom() {
+        return 0;
+    }
+
+    @Override
+    public int docIdsToLoadSize() {
+        return 0;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param docIdsToLoad The document IDs to load
+     * @param docsIdsToLoadFrom The starting index for document IDs to load
+     * @param docsIdsToLoadSize The size of document IDs to load
+     */
+    @Override
+    public SearchContext docIdsToLoad(int[] docIdsToLoad, int docsIdsToLoadFrom, int docsIdsToLoadSize) {
+        return null;
+    }
+
+    @Override
+    public DfsSearchResult dfsResult() {
+        return null;
+    }
+
+    @Override
+    public QuerySearchResult queryResult() {
+        return this.queryResult;
+    }
+
+    @Override
+    public FetchPhase fetchPhase() {
+        return null;
+    }
+
+    @Override
+    public FetchSearchResult fetchResult() {
+        return this.fetchResult;
+    }
+
+    @Override
+    public Profilers getProfilers() {
+        return null;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param name The field name
+     */
+    @Override
+    public MappedFieldType fieldType(String name) {
+        return null;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param name The object mapper name
+     */
+    @Override
+    public ObjectMapper getObjectMapper(String name) {
+        return null;
+    }
+
+    @Override
+    public long getRelativeTimeInMillis() {
+        return 0;
+    }
+
+    @Override
+    public Map<Class<?>, CollectorManager<? extends Collector, ReduceableSearchResult>> queryCollectorManagers() {
+        return queryCollectorManagers;
+    }
+
+    @Override
+    public QueryShardContext getQueryShardContext() {
+        return queryShardContext;
+    }
+
+    @Override
+    public ReaderContext readerContext() {
+        return null;
+    }
+
+    @Override
+    public InternalAggregation.ReduceContext partialOnShard() {
+        return null;
+    }
+
+    /**
+     * {@inheritDoc}
+     * @param bucketCollectorProcessor The bucket collector processor
+     */
+    @Override
+    public void setBucketCollectorProcessor(BucketCollectorProcessor bucketCollectorProcessor) {
+
+    }
+
+    @Override
+    public BucketCollectorProcessor bucketCollectorProcessor() {
+        return null;
+    }
+
+    @Override
+    public int getTargetMaxSliceCount() {
+        return 0;
+    }
+
+    @Override
+    public boolean shouldUseTimeSeriesDescSortOptimization() {
+        return false;
+    }
+
+    /**
+     * Gets the context engine searcher
+     * @return The context engine searcher
+     */
+    public ContextEngineSearcher<DatafusionQuery, RecordBatchStream> contextEngineSearcher() {
+        return new ContextEngineSearcher<>(this.engineSearcher, this);
+    }
+
+    public void setDFResults(Map<String, Object[]> dfResults) {
+        this.dfResults = dfResults;
+    }
+
+    public Map<String, Object[]> getDFResults() {
+        return dfResults;
+    }
+
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQuery.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQuery.java
new file mode 100644
index 0000000000000..f34935aa1a205
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQuery.java
@@ -0,0 +1,57 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.search;
+
+import java.util.Iterator;
+import java.util.List;
+
+public class DatafusionQuery {
+    private final byte[] substraitBytes;
+
+    // List of Search executors which returns a result iterator which contains row id which can be joined in datafusion
+    private final List<SearchExecutor> searchExecutors;
+    private Boolean isFetchPhase;
+    private List<Long> queryPhaseRowIds;
+    private List<String> projections;
+
+    public DatafusionQuery(byte[] substraitBytes, List<SearchExecutor> searchExecutors) {
+        this.substraitBytes = substraitBytes;
+        this.searchExecutors = searchExecutors;
+        this.isFetchPhase = false;
+    }
+
+    public void setProjections(List<String> projections) {
+        this.projections = projections;
+    }
+
+    public void setFetchPhaseContext(List<Long> queryPhaseRowIds) {
+        this.queryPhaseRowIds = queryPhaseRowIds;
+        this.isFetchPhase = true;
+    }
+
+    public boolean isFetchPhase() {
+        return this.isFetchPhase;
+    }
+
+    public List<Long> getQueryPhaseRowIds() {
+        return this.queryPhaseRowIds;
+    }
+
+    public List<String> getProjections() {
+        return this.projections;
+    }
+
+    public byte[] getSubstraitBytes() {
+        return substraitBytes;
+    }
+
+    public List<SearchExecutor> getSearchExecutors() {
+        return searchExecutors;
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQueryPhaseExecutor.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQueryPhaseExecutor.java
new file mode 100644
index 0000000000000..8de7c7e397715
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionQueryPhaseExecutor.java
@@ -0,0 +1,49 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.search;
+
+import org.opensearch.index.engine.EngineSearcher;
+import org.opensearch.search.query.QueryPhaseExecutor;
+import org.opensearch.search.query.QueryPhaseExecutionException;
+import org.opensearch.datafusion.search.DatafusionContext;
+import org.opensearch.datafusion.search.DatafusionQuery;
+import org.opensearch.search.ContextEngineSearcher;
+import org.opensearch.search.query.GenericQueryPhase;
+import org.opensearch.search.query.GenericQueryPhaseSearcher;
+import org.opensearch.vectorized.execution.search.spi.RecordBatchStream;
+
+/**
+ * Query phase executor for Datafusion engine
+ */
+public class DatafusionQueryPhaseExecutor implements QueryPhaseExecutor<DatafusionContext> {
+
+    @Override
+    public boolean execute(DatafusionContext context) throws QueryPhaseExecutionException {
+        if (!canHandle(context)) {
+            // TODO : throw new QueryPhaseExecutionException("Cannot handle datafusion context");
+        }
+
+        GenericQueryPhaseSearcher<DatafusionContext, DatafusionSearcher, DatafusionQuery> searcher =
+            context.readEngine().getQueryPhaseSearcher();
+
+        GenericQueryPhase<DatafusionContext, DatafusionSearcher, DatafusionQuery> queryPhase =
+            new GenericQueryPhase<>(searcher);
+
+        DatafusionQuery query = context.getDatafusionQuery();
+        // TODO : rework interfaces as context itself has many objects
+        return queryPhase.executeInternal(context, context.getEngineSearcher(), query);
+    }
+
+    @Override
+    public boolean canHandle(DatafusionContext context) {
+        return context != null &&
+            context.readEngine() != null &&
+            context.query() != null;
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReader.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReader.java
new file mode 100644
index 0000000000000..ec01a01b57720
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReader.java
@@ -0,0 +1,102 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.search;
+
+import org.opensearch.datafusion.DataFusionQueryJNI;
+import org.opensearch.index.engine.exec.WriterFileSet;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.opensearch.datafusion.DataFusionQueryJNI.closeDatafusionReader;
+
+/**
+ * DataFusion reader for JNI operations.
+ */
+public class DatafusionReader implements Closeable {
+    /**
+     * The directory path.
+     */
+    public String directoryPath;
+    /**
+     * The file metadata collection.
+     */
+    public Collection<WriterFileSet> files;
+    /**
+     * The cache pointer.
+     */
+    public long cachePtr;
+    private AtomicInteger refCount = new AtomicInteger(0);
+
+    /**
+     * Constructor
+     * @param directoryPath The directory path
+     * @param files The file metadata collection
+     */
+    public DatafusionReader(String directoryPath, Collection<WriterFileSet> files) {
+        this.directoryPath = directoryPath;
+        this.files = files;
+        String[] fileNames = new String[0];
+        if(files != null) {
+            System.out.println("Got the files!!!!!");
+            fileNames = files.stream()
+                .flatMap(writerFileSet -> writerFileSet.getFiles().stream())
+                .toArray(String[]::new);
+        }
+        System.out.println("File names: " + Arrays.toString(fileNames));
+        System.out.println("Directory path: " + directoryPath);
+
+        this.cachePtr = DataFusionQueryJNI.createDatafusionReader(directoryPath, fileNames);
+        incRef();
+    }
+
+    /**
+     * Gets the cache pointer.
+     * @return the cache pointer
+     */
+    public long getCachePtr() {
+        return cachePtr;
+    }
+
+    /**
+     * Increments the reference count.
+     */
+    public void incRef() {
+        refCount.getAndIncrement();
+    }
+
+    /**
+     * Decrements the reference count.
+     * @throws IOException if an I/O error occurs
+     */
+    public void decRef() throws IOException {
+        if(refCount.get() == 0) {
+            throw new IllegalStateException("Listing table has been already closed");
+        }
+
+        int currRefCount = refCount.decrementAndGet();
+        if(currRefCount == 0) {
+            this.close();
+        }
+
+    }
+
+    @Override
+    public void close() throws IOException {
+        if(cachePtr == -1L) {
+            throw new IllegalStateException("Listing table has been already closed");
+        }
+
+//        closeDatafusionReader(this.cachePtr);
+        this.cachePtr = -1;
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReaderManager.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReaderManager.java
new file mode 100644
index 0000000000000..ba14055170dad
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionReaderManager.java
@@ -0,0 +1,70 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.search;
+
+import org.opensearch.index.engine.CatalogSnapshotAwareRefreshListener;
+import org.opensearch.index.engine.EngineReaderManager;
+import org.opensearch.index.engine.exec.FileMetadata;
+import org.opensearch.index.engine.exec.WriterFileSet;
+import org.opensearch.index.engine.exec.coord.CatalogSnapshot;
+
+import java.io.IOException;
+import java.net.URI;
+import java.nio.file.Path;
+import java.util.Collection;
+import java.util.List;
+
+public class DatafusionReaderManager implements EngineReaderManager<DatafusionReader>, CatalogSnapshotAwareRefreshListener {
+    private DatafusionReader current;
+    private String path;
+    private String dataFormat;
+//    private final Lock refreshLock = new ReentrantLock();
+//    private final List<ReferenceManager.RefreshListener> refreshListeners = new CopyOnWriteArrayList();
+
+    public DatafusionReaderManager(String path, Collection<FileMetadata> files, String dataFormat) throws IOException {
+        WriterFileSet writerFileSet = new WriterFileSet(Path.of(URI.create("file:///" + path)), 1);
+        files.forEach(fileMetadata -> writerFileSet.add(fileMetadata.file()));
+        this.current = new DatafusionReader(path, List.of(writerFileSet));;
+        this.path = path;
+        this.dataFormat = dataFormat;
+    }
+
+    @Override
+    public DatafusionReader acquire() throws IOException {
+        if (current == null) {
+            throw new RuntimeException("Invalid state for datafusion reader");
+        }
+        current.incRef();
+        return current;
+    }
+
+    @Override
+    public void release(DatafusionReader reference) throws IOException {
+        assert reference != null : "Shard view can't be null";
+        reference.decRef();
+    }
+
+
+    @Override
+    public void beforeRefresh() throws IOException {
+        // no op
+    }
+
+    @Override
+    public void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException {
+        if (didRefresh && catalogSnapshot != null) {
+            DatafusionReader old = this.current;
+            if(old !=null) {
+                release(old);
+            }
+            this.current = new DatafusionReader(this.path, catalogSnapshot.getSearchableFiles(dataFormat));
+            this.current.incRef();
+        }
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcher.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcher.java
new file mode 100644
index 0000000000000..49f05321620a8
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcher.java
@@ -0,0 +1,86 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.search;
+
+import org.apache.lucene.store.AlreadyClosedException;
+import org.opensearch.datafusion.DataFusionQueryJNI;
+import org.opensearch.datafusion.core.DefaultRecordBatchStream;
+import org.opensearch.index.engine.EngineSearcher;
+import org.opensearch.search.aggregations.SearchResultsCollector;
+import org.opensearch.vectorized.execution.search.spi.RecordBatchStream;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Arrays;
+import java.util.List;
+import java.util.Objects;
+
+public class DatafusionSearcher implements EngineSearcher<DatafusionQuery, RecordBatchStream> {
+    private final String source;
+    private DatafusionReader reader;
+    private Closeable closeable;
+    public DatafusionSearcher(String source, DatafusionReader reader, Closeable close) {
+        this.source = source;
+        this.reader = reader;
+    }
+
+    @Override
+    public String source() {
+        return source;
+    }
+
+    @Override
+    public void search(DatafusionQuery datafusionQuery, List<SearchResultsCollector<RecordBatchStream>> collectors) throws IOException {
+        // TODO : call search here to native
+        // TODO : change RunTimePtr
+        long nativeStreamPtr = DataFusionQueryJNI.executeQueryPhase(reader.getCachePtr(), datafusionQuery.getSubstraitBytes(), 0);
+        RecordBatchStream stream = new DefaultRecordBatchStream(nativeStreamPtr);
+        while(stream.hasNext()) {
+            for(SearchResultsCollector<RecordBatchStream> collector : collectors) {
+                collector.collect(stream);
+            }
+        }
+    }
+
+    @Override
+    public long search(DatafusionQuery datafusionQuery, Long contextPtr) {
+        if (datafusionQuery.isFetchPhase()) {
+            long[] row_ids = datafusionQuery.getQueryPhaseRowIds()
+                .stream()
+                .mapToLong(Long::longValue)
+                .toArray();
+            String[] projections = Objects.isNull(datafusionQuery.getProjections()) ? new String[]{} : datafusionQuery.getProjections().toArray(String[]::new);
+
+            System.out.println("row_ids");
+            System.out.println(Arrays.toString(row_ids));
+            return DataFusionQueryJNI.executeFetchPhase(reader.getCachePtr(), row_ids, projections, contextPtr);
+        }
+        return DataFusionQueryJNI.executeQueryPhase(reader.getCachePtr(), datafusionQuery.getSubstraitBytes(), contextPtr);
+    }
+
+    public DatafusionReader getReader() {
+        return reader;
+    }
+
+    @Override
+    public void close() {
+        try {
+            if (closeable != null) {
+                closeable.close();
+            }
+        } catch (IOException e) {
+            throw new UncheckedIOException("failed to close", e);
+        } catch (AlreadyClosedException e) {
+            // This means there's a bug somewhere: don't suppress it
+            throw new AssertionError(e);
+        }
+
+    }
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcherSupplier.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcherSupplier.java
new file mode 100644
index 0000000000000..6ff7526b0fdea
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/DatafusionSearcherSupplier.java
@@ -0,0 +1,51 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.search;
+
+import org.apache.lucene.store.AlreadyClosedException;
+import org.opensearch.index.engine.Engine;
+import org.opensearch.index.engine.EngineSearcherSupplier;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.function.Function;
+
+public abstract class DatafusionSearcherSupplier extends EngineSearcherSupplier<DatafusionSearcher> {
+
+    private final Function<DatafusionSearcher, DatafusionSearcher> wrapper;
+    private final AtomicBoolean released = new AtomicBoolean(false);
+
+    public DatafusionSearcherSupplier(Function<DatafusionSearcher, DatafusionSearcher> wrapper) {
+        this.wrapper = wrapper;
+    }
+
+    public final DatafusionSearcher acquireSearcher(String source) {
+        if (released.get()) {
+            throw new AlreadyClosedException("SearcherSupplier was closed");
+        }
+        final DatafusionSearcher searcher = acquireSearcherInternal(source);
+        return searcher;
+        // TODO apply wrapper
+    }
+
+    @Override
+    public final void close() {
+        if (released.compareAndSet(false, true)) {
+            doClose();
+        } else {
+            assert false : "SearchSupplier was released twice";
+        }
+    }
+
+    protected abstract void doClose();
+
+    protected abstract DatafusionSearcher acquireSearcherInternal(String source);
+
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchExecutor.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchExecutor.java
new file mode 100644
index 0000000000000..ff3b5953c119e
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchExecutor.java
@@ -0,0 +1,15 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.search;
+
+// Functional interface to execute search and get iterator
+@FunctionalInterface
+public interface SearchExecutor {
+    SearchResultIterator execute();
+}
diff --git a/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchResultIterator.java b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchResultIterator.java
new file mode 100644
index 0000000000000..27fe2d54f76d9
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/java/org/opensearch/datafusion/search/SearchResultIterator.java
@@ -0,0 +1,18 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion.search;
+
+import java.util.Iterator;
+
+// Interface for the iterator that Datafusion expects
+public interface SearchResultIterator extends Iterator<Record> {
+    // Basic Iterator methods
+    boolean hasNext();
+    Record next();
+}
diff --git a/plugins/engine-datafusion/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec b/plugins/engine-datafusion/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec
new file mode 100644
index 0000000000000..9b1ec055f7ea2
--- /dev/null
+++ b/plugins/engine-datafusion/src/main/resources/META-INF/services/org.opensearch.vectorized.execution.search.spi.DataSourceCodec
@@ -0,0 +1,5 @@
+# DataFusion Engine implementations
+# Add your custom implementations here, e.g.:
+# com.example.CustomCsvDataFusionEngine
+
+# Note: Built-in csv engine is now in separate library
diff --git a/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/DataFusionServiceTests.java b/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/DataFusionServiceTests.java
new file mode 100644
index 0000000000000..e2a285f2a36af
--- /dev/null
+++ b/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/DataFusionServiceTests.java
@@ -0,0 +1,372 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion;
+
+import com.parquet.parquetdataformat.ParquetDataFormatPlugin;
+import org.apache.arrow.memory.RootAllocator;
+import org.apache.arrow.vector.*;
+import org.opensearch.action.OriginalIndices;
+import org.opensearch.action.search.SearchRequest;
+import org.opensearch.action.search.SearchShardTask;
+import org.opensearch.cluster.metadata.IndexMetadata;
+import org.opensearch.common.UUIDs;
+import org.opensearch.common.lease.Releasable;
+import org.opensearch.common.lease.Releasables;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.core.common.Strings;
+import org.opensearch.core.index.Index;
+import org.opensearch.core.index.shard.ShardId;
+import org.opensearch.datafusion.search.DatafusionContext;
+import org.opensearch.datafusion.search.DatafusionQuery;
+import org.opensearch.datafusion.search.DatafusionSearcher;
+import org.opensearch.env.Environment;
+import org.opensearch.index.IndexService;
+import org.opensearch.index.IndexSettings;
+import org.opensearch.index.engine.EngineSearcherSupplier;
+import org.opensearch.index.engine.exec.FileMetadata;
+import org.opensearch.index.shard.IndexShard;
+import org.opensearch.index.shard.SearchOperationListener;
+import org.opensearch.index.shard.ShardPath;
+import org.opensearch.index.store.Store;
+import org.opensearch.indices.replication.common.ReplicationType;
+import org.opensearch.plugins.Plugin;
+import org.opensearch.search.SearchShardTarget;
+import org.opensearch.search.aggregations.SearchResultsCollector;
+import org.opensearch.search.internal.*;
+import org.opensearch.tasks.Task;
+import org.opensearch.test.IndexSettingsModule;
+import org.opensearch.test.OpenSearchSingleNodeTestCase;
+import org.junit.Before;
+
+import org.mockito.Mock;
+import org.mockito.MockitoAnnotations;
+import org.opensearch.threadpool.TestThreadPool;
+import org.opensearch.threadpool.ThreadPool;
+import org.opensearch.vectorized.execution.search.DataFormat;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URISyntaxException;
+import java.net.URL;
+import java.nio.file.Path;
+import java.util.*;
+import java.util.concurrent.ExecutionException;
+import java.util.concurrent.atomic.AtomicLong;
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.opensearch.common.unit.TimeValue.timeValueMinutes;
+import static org.opensearch.common.xcontent.XContentFactory.jsonBuilder;
+
+import org.apache.arrow.vector.VectorSchemaRoot;
+import org.apache.arrow.vector.types.pojo.Field;
+/**
+ * Unit tests for DataFusionService
+ *
+ * Note: These tests require the native library to be available.
+ * They are disabled by default and can be enabled by setting the system property:
+ * -Dtest.native.enabled=true
+ */
+public class DataFusionServiceTests extends OpenSearchSingleNodeTestCase {
+
+    private DataFusionService service;
+
+    @Mock
+    private Environment mockEnvironment;
+
+    @Before
+    public void setup() {
+        MockitoAnnotations.openMocks(this);
+        Settings mockSettings = Settings.builder().put("path.data", "/tmp/test-data").build();
+
+        when(mockEnvironment.settings()).thenReturn(mockSettings);
+        service = new DataFusionService(Map.of());
+        service.doStart();
+    }
+
+    public void testGetVersion() {
+        String version = service.getVersion();
+        assertNotNull(version);
+        assertTrue(version.contains("datafusion_version"));
+        assertTrue(version.contains("substrait_version"));
+    }
+
+//    public void testCreateAndCloseContext() {
+//        // Create context
+//        SessionContext defaultContext = service.getDefaultContext();
+//        assertNotNull(defaultContext);
+//        assertTrue(defaultContext.getContext() > 0);
+//
+//        // Verify context exists
+//        SessionContext context = service.getContext(defaultContext.getContext());
+//        assertNotNull(context);
+//        assertEquals(defaultContext.getContext(), context.getContext());
+//
+//        // Close context
+//        boolean closed = service.closeContext(defaultContext.getContext());
+//        assertTrue(closed);
+//
+//        // Verify context is gone
+//        assertNull(service.getContext(defaultContext.getContext()));
+//    }
+
+    public void testQueryPhaseExecutor() throws IOException {
+        Map<String, Object[]> finalRes = new HashMap<>();
+        DatafusionSearcher datafusionSearcher = null;
+        try {
+            URL resourceUrl = getClass().getClassLoader().getResource("data/");
+            Index index = new Index("index-7", "index-7");
+            final Path path = Path.of(resourceUrl.toURI()).resolve("index-7").resolve("0");
+            ShardPath shardPath = new ShardPath(false, path, path, new ShardId(index, 0));
+            DatafusionEngine engine = new DatafusionEngine(DataFormat.CSV, List.of(new FileMetadata(DataFormat.CSV.toString(), "generation-1.parquet")), service, shardPath);
+            datafusionSearcher = engine.acquireSearcher("search");
+
+            byte[] protoContent;
+            try (InputStream is = getClass().getResourceAsStream("/substrait_plan.pb")) {
+                protoContent = is.readAllBytes();
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+
+            long streamPointer = datafusionSearcher.search(new DatafusionQuery(protoContent, new ArrayList<>()), service.getTokioRuntimePointer());
+            RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+            RecordBatchStream stream = new RecordBatchStream(streamPointer, service.getTokioRuntimePointer() , allocator);
+
+            // We can have some collectors passed like this which can collect the results and convert to InternalAggregation
+            // Is the possible? need to check
+
+            SearchResultsCollector<RecordBatchStream> collector = new SearchResultsCollector<RecordBatchStream>() {
+                @Override
+                public void collect(RecordBatchStream value) {
+                    VectorSchemaRoot root = value.getVectorSchemaRoot();
+                    for (Field field : root.getSchema().getFields()) {
+                        String filedName = field.getName();
+                        FieldVector fieldVector = root.getVector(filedName);
+                        Object[] fieldValues = new Object[fieldVector.getValueCount()];
+                        for (int i = 0; i < fieldVector.getValueCount(); i++) {
+                            fieldValues[i] = fieldVector.getObject(i);
+                        }
+                        finalRes.put(filedName, fieldValues);
+                    }
+                }
+            };
+
+            while (stream.loadNextBatch().join()) {
+                collector.collect(stream);
+            }
+
+            logger.info("Final Results:");
+            for (Map.Entry<String, Object[]> entry : finalRes.entrySet()) {
+                logger.info("{}: {}", entry.getKey(), java.util.Arrays.toString(entry.getValue()));
+            }
+
+        } catch (Exception exception) {
+            logger.error("Failed to execute Substrait query plan", exception);
+        }
+        finally {
+            if(datafusionSearcher != null) {
+                datafusionSearcher.close();
+            }
+        }
+    }
+
+    public void testQueryThenFetchExecutor() throws IOException, URISyntaxException {
+        DatafusionSearcher datafusionSearcher = null;
+        try {
+            URL resourceUrl = getClass().getClassLoader().getResource("data/");
+            Index index = new Index("index-7", "index-7");
+            final Path path = Path.of(resourceUrl.toURI()).resolve("index-7").resolve("0");
+            ShardPath shardPath = new ShardPath(false, path, path, new ShardId(index, 0));
+            DatafusionEngine engine = new DatafusionEngine(DataFormat.CSV, List.of(new FileMetadata(DataFormat.CSV.toString(), "generation-1.parquet"), new FileMetadata(DataFormat.CSV.toString(), "generation-2.parquet")), service, shardPath);
+            datafusionSearcher = engine.acquireSearcher("Search");
+
+            byte[] protoContent;
+            try (InputStream is = getClass().getResourceAsStream("/substrait_plan.pb")) {
+                protoContent = is.readAllBytes();
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+
+            DatafusionQuery query = new DatafusionQuery(protoContent, new ArrayList<>());
+            long streamPointer = datafusionSearcher.search(query, service.getTokioRuntimePointer());
+            RootAllocator allocator = new RootAllocator(Long.MAX_VALUE);
+            RecordBatchStream stream = new RecordBatchStream(streamPointer, service.getTokioRuntimePointer() , allocator);
+
+            ArrayList<Long> row_ids_res = new ArrayList<>();
+
+            while (stream.loadNextBatch().join()) {
+                VectorSchemaRoot root = stream.getVectorSchemaRoot();
+                for (Field field : root.getSchema().getFields()) {
+                    String fieldName = field.getName();
+                    if (fieldName.equals("___row_id")) {
+                        IntVector fieldVector = (IntVector) root.getVector(fieldName);
+                        for(int i=0; i<fieldVector.getValueCount(); i++) {
+                            row_ids_res.add((long) fieldVector.get(i));
+                        }
+                    }
+                }
+            }
+
+            logger.info("Final row_ids count: {}", row_ids_res);
+
+            List<String> projections = List.of("target_ip");
+            query.setProjections(projections);
+            query.setFetchPhaseContext(row_ids_res);
+            long fetchPhaseStreamPointer = datafusionSearcher.search(query, service.getTokioRuntimePointer());
+
+            RecordBatchStream fetchPhaseStream = new RecordBatchStream(fetchPhaseStreamPointer, service.getTokioRuntimePointer() , allocator);
+            int total_fetch_results = 0;
+            ArrayList<Long> fetch_row_ids_res = new ArrayList<>();
+
+            while(fetchPhaseStream.loadNextBatch().join()) {
+                VectorSchemaRoot root = fetchPhaseStream.getVectorSchemaRoot();
+                assertEquals(projections.size(), root.getSchema().getFields().size());
+                for (Field field : root.getSchema().getFields()) {
+                    assertTrue("Field was not passed in projections list", projections.contains(field.getName()));
+                    if(field.getName().equals("___row_id")) {
+                        IntVector fieldVector = (IntVector) root.getVector(field.getName());
+                        for(int i=0; i<root.getSchema().getFields().size(); i++) {
+                            fetch_row_ids_res.add((long) fieldVector.get(i));
+                        }
+                    } else if(field.getName().equals("target_ip")) {
+                        ViewVarBinaryVector fieldVector = (ViewVarBinaryVector) root.getVector(field.getName());
+
+                    }
+                }
+                total_fetch_results += root.getRowCount();
+            }
+
+            assertEquals(row_ids_res.size(), total_fetch_results);
+        } catch (Exception exception) {
+            logger.error("Failed to execute Substrait query plan", exception);
+            throw exception;
+        } finally {
+            if(datafusionSearcher != null) {
+                datafusionSearcher.close();
+            }
+        }
+    }
+
+    protected Collection<Class<? extends Plugin>> getPlugins() {
+        return pluginList(ParquetDataFormatPlugin.class);
+    }
+
+    public void testQueryThenFetchE2ETest() throws IOException, URISyntaxException, InterruptedException, ExecutionException {
+        URL resourceUrl = getClass().getClassLoader().getResource("data/");
+        Index index = new Index("index-7", "index-7");
+        final Path path = Path.of(resourceUrl.toURI()).resolve("index-7").resolve("0");
+        ShardPath shardPath = new ShardPath(false, path, path, new ShardId(index, 0));
+        DatafusionEngine engine = new DatafusionEngine(DataFormat.CSV, List.of(new FileMetadata(DataFormat.CSV.toString(), "generation-1.parquet"), new FileMetadata(DataFormat.CSV.toString(), "generation-2.parquet")), service, shardPath);
+
+        SearchRequest searchRequest = new SearchRequest().allowPartialSearchResults(true);
+        ShardSearchRequest shardSearchRequest = new ShardSearchRequest(
+            OriginalIndices.NONE,
+            searchRequest,
+            new ShardId(index, 0),
+            1,
+            new AliasFilter(null, Strings.EMPTY_ARRAY),
+            1.0f,
+            -1,
+            null,
+            null
+        );
+
+        IndexService indexService = createIndex("index-7", Settings.EMPTY, jsonBuilder().startObject()
+            .startObject("properties")
+            .startObject("target_status_code")
+            .field("type", "integer")
+            .endObject()
+            .endObject()
+            .endObject()
+        );
+        ThreadPool threadPool = new TestThreadPool(this.getClass().getName());
+        IndexShard indexShard = createIndexShard(shardPath.getShardId(), true);
+        when(indexShard.getThreadPool()).thenReturn(threadPool);
+        SearchOperationListener searchOperationListener = new SearchOperationListener() {
+        };
+        when(indexShard.getSearchOperationListener()).thenReturn(searchOperationListener);
+
+        EngineSearcherSupplier<?> reader = indexShard.acquireSearcherSupplier();
+        ReaderContext readerContext = createAndPutReaderContext(shardSearchRequest, indexService, indexShard, reader);
+        SearchShardTarget searchShardTarget = new SearchShardTarget("node_1", new ShardId("index-7", "index-7", 0), null, OriginalIndices.NONE);
+        SearchShardTask searchShardTask = new SearchShardTask(0, "n/a", "n/a", "test", null, Collections.singletonMap(Task.X_OPAQUE_ID, "my_id"));
+        DatafusionContext datafusionContext = new DatafusionContext(readerContext, shardSearchRequest, searchShardTarget, searchShardTask, engine, null);
+
+        byte[] protoContent;
+        try (InputStream is = getClass().getResourceAsStream("/substrait_plan.pb")) {
+            protoContent = is.readAllBytes();
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+
+        DatafusionQuery query = new DatafusionQuery(protoContent, new ArrayList<>());
+        List<String> projections = List.of("target_status_code");
+        query.setProjections(projections);
+
+        datafusionContext.datafusionQuery(query);
+
+        engine.executeQueryPhase(datafusionContext);
+        engine.executeFetchPhase(datafusionContext);
+
+        assertEquals(datafusionContext.getDfQueryResult().size(), datafusionContext.getDfFetchResult().size());
+    }
+
+    final AtomicLong idGenerator = new AtomicLong();
+
+
+    final ReaderContext createAndPutReaderContext(
+        ShardSearchRequest request,
+        IndexService indexService,
+        IndexShard shard,
+        EngineSearcherSupplier<?> reader
+    ) {
+        assert request.readerId() == null;
+        assert request.keepAlive() == null;
+        ReaderContext readerContext = null;
+        Releasable decreaseScrollContexts = null;
+        try {
+
+            final long keepAlive = request.keepAlive() != null ? request.keepAlive().getMillis() : request.readerId() == null ? timeValueMinutes(5).getMillis() : -1;
+
+            final ShardSearchContextId id = new ShardSearchContextId(UUIDs.randomBase64UUID(), idGenerator.incrementAndGet());
+
+            readerContext = new ReaderContext(id, indexService, shard, reader, keepAlive, request.keepAlive() == null);
+            reader = null;
+            final ReaderContext finalReaderContext = readerContext;
+            final SearchOperationListener searchOperationListener = shard.getSearchOperationListener();
+            searchOperationListener.onNewReaderContext(finalReaderContext);
+            readerContext.addOnClose(() -> {
+                try {
+                    if (finalReaderContext.scrollContext() != null) {
+                        searchOperationListener.onFreeScrollContext(finalReaderContext);
+                    }
+                } finally {
+                    searchOperationListener.onFreeReaderContext(finalReaderContext);
+                }
+            });
+            readerContext = null;
+            return finalReaderContext;
+        } finally {
+            Releasables.close(reader, readerContext, decreaseScrollContexts);
+        }
+    }
+
+    static IndexShard createIndexShard(ShardId shardId, boolean remoteStoreEnabled) {
+        Settings settings = Settings.builder()
+            .put(IndexMetadata.SETTING_REPLICATION_TYPE, ReplicationType.SEGMENT)
+            .put(IndexMetadata.SETTING_REMOTE_STORE_ENABLED, String.valueOf(remoteStoreEnabled))
+            .build();
+        IndexSettings indexSettings = IndexSettingsModule.newIndexSettings("test_index", settings);
+        Store store = mock(Store.class);
+        IndexShard indexShard = mock(IndexShard.class);
+        when(indexShard.indexSettings()).thenReturn(indexSettings);
+        when(indexShard.shardId()).thenReturn(shardId);
+        when(indexShard.store()).thenReturn(store);
+        return indexShard;
+    }
+}
diff --git a/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/TestDataFusionServiceTests.java b/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/TestDataFusionServiceTests.java
new file mode 100644
index 0000000000000..395e2fae52e2f
--- /dev/null
+++ b/plugins/engine-datafusion/src/test/java/org/opensearch/datafusion/TestDataFusionServiceTests.java
@@ -0,0 +1,55 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.datafusion;
+
+import org.opensearch.test.OpenSearchTestCase;
+
+import java.util.Collections;
+import java.util.List;
+
+/**
+ * Unit tests for DataFusionService
+ *
+ * Note: These tests require the native library to be available.
+ * They are disabled by default and can be enabled by setting the system property:
+ * -Dtest.native.enabled=true
+ */
+public class TestDataFusionServiceTests extends OpenSearchTestCase {
+
+    private DataFusionService service;
+
+    @Override
+    public void setUp() throws Exception {
+        super.setUp();
+        service = new DataFusionService(Collections.emptyMap());
+        service.doStart();
+    }
+
+    public void testGetVersion() {
+        String version = service.getVersion();
+        assertNotNull(version);
+        // The service returns codec information in JSON format
+        assertTrue("Version should contain codecs", version.contains("codecs"));
+        assertTrue("Version should contain CsvDataSourceCodec", version.contains("CsvDataSourceCodec"));
+    }
+
+    public void testCreateAndCloseContext() {
+        service.registerDirectory("/Users/gbh/Documents", List.of("parquet-nested.csv"));
+        long contextId = service.createSessionContext().join();
+        // Create context
+        assertTrue(contextId > 0);
+
+        service.getVersion();
+    }
+
+    public void testCodecDiscovery() {
+        // Test that the CSV codec can be discovered via SPI
+        // TODO : test with dummy plugin and dummy codec
+    }
+}
diff --git a/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-1.parquet b/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-1.parquet
new file mode 100644
index 0000000000000..695b6429ad7a4
Binary files /dev/null and b/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-1.parquet differ
diff --git a/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-2.parquet b/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-2.parquet
new file mode 100644
index 0000000000000..695b6429ad7a4
Binary files /dev/null and b/plugins/engine-datafusion/src/test/resources/data/index-7/0/generation-2.parquet differ
diff --git a/plugins/engine-datafusion/src/test/resources/substrait_plan.pb b/plugins/engine-datafusion/src/test/resources/substrait_plan.pb
new file mode 100644
index 0000000000000..80776758aa1eb
Binary files /dev/null and b/plugins/engine-datafusion/src/test/resources/substrait_plan.pb differ
diff --git a/server/build.gradle b/server/build.gradle
index 69f3c59556f5b..917d44aec4664 100644
--- a/server/build.gradle
+++ b/server/build.gradle
@@ -72,11 +72,14 @@ dependencies {
   api project(":libs:opensearch-geo")
   api project(":libs:opensearch-telemetry")
   api project(":libs:opensearch-task-commons")
+  api project(':libs:opensearch-vectorized-exec-spi')
 
   compileOnly project(":libs:agent-sm:bootstrap")
   compileOnly project(':libs:opensearch-plugin-classloader')
   testRuntimeOnly project(':libs:opensearch-plugin-classloader')
 
+  implementation 'org.apache.commons:commons-lang3:3.17.0'
+
   api libs.bundles.lucene
 
   // utilities
@@ -115,6 +118,7 @@ dependencies {
   api libs.protobuf
   api libs.jakartaannotation
 
+
   // https://mvnrepository.com/artifact/org.roaringbitmap/RoaringBitmap
   api libs.roaringbitmap
   testImplementation 'org.awaitility:awaitility:4.3.0'
@@ -135,8 +139,7 @@ tasks.withType(JavaCompile).configureEach {
 }
 
 compileJava {
-  options.compilerArgs += ['-processor', ['org.apache.logging.log4j.core.config.plugins.processor.PluginProcessor',
-    'org.opensearch.common.annotation.processor.ApiAnnotationProcessor'].join(',')]
+  options.compilerArgs += ['-processor', ['org.apache.logging.log4j.core.config.plugins.processor.PluginProcessor'].join(',')]
 }
 
 tasks.named("internalClusterTest").configure {
diff --git a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java
index 8cd6fb7ed5aa6..70e0002608fe2 100644
--- a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java
+++ b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java
@@ -732,7 +732,9 @@ public static final IndexShard newIndexShard(
             indexService.getRefreshMutex(),
             clusterService.getClusterApplierService(),
             MergedSegmentPublisher.EMPTY,
-            ReferencedSegmentsPublisher.EMPTY
+            ReferencedSegmentsPublisher.EMPTY,
+            null,
+            null
         );
     }
 
diff --git a/server/src/main/java/org/opensearch/action/admin/cluster/node/info/PluginsAndModules.java b/server/src/main/java/org/opensearch/action/admin/cluster/node/info/PluginsAndModules.java
index 13f7211d48e9a..5412aa00fe49a 100644
--- a/server/src/main/java/org/opensearch/action/admin/cluster/node/info/PluginsAndModules.java
+++ b/server/src/main/java/org/opensearch/action/admin/cluster/node/info/PluginsAndModules.java
@@ -32,6 +32,7 @@
 
 package org.opensearch.action.admin.cluster.node.info;
 
+import org.opensearch.common.annotation.ExperimentalApi;
 import org.opensearch.core.common.io.stream.StreamInput;
 import org.opensearch.core.common.io.stream.StreamOutput;
 import org.opensearch.core.service.ReportingService;
@@ -49,6 +50,7 @@
  *
  * @opensearch.internal
  */
+@ExperimentalApi // TODO : this cannot be experimental, just marking it to bypass for now
 public class PluginsAndModules implements ReportingService.Info {
     private final List<PluginInfo> plugins;
     private final List<PluginInfo> modules;
diff --git a/server/src/main/java/org/opensearch/action/search/SearchRequest.java b/server/src/main/java/org/opensearch/action/search/SearchRequest.java
index 4a4a309b45a2e..e2cc921ba9a1c 100644
--- a/server/src/main/java/org/opensearch/action/search/SearchRequest.java
+++ b/server/src/main/java/org/opensearch/action/search/SearchRequest.java
@@ -713,6 +713,18 @@ public String pipeline() {
         return pipeline;
     }
 
+    public SearchRequest queryPlanIR(byte[] queryPlanIR) {
+        if (this.source == null) {
+            this.source = new SearchSourceBuilder();
+        }
+        this.source.queryPlanIR(queryPlanIR);
+        return this;
+    }
+
+    public byte[] queryPlanIR() {
+        return this.source != null ? this.source.queryPlanIR() : null;
+    }
+
     @Override
     public SearchTask createTask(long id, String type, String action, TaskId parentTaskId, Map<String, String> headers) {
         return new SearchTask(id, type, action, this::buildDescription, parentTaskId, headers, cancelAfterTimeInterval);
diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java
index 7a8eee076fa37..f715dd13cd25f 100644
--- a/server/src/main/java/org/opensearch/index/IndexModule.java
+++ b/server/src/main/java/org/opensearch/index/IndexModule.java
@@ -90,6 +90,8 @@
 import org.opensearch.indices.recovery.RecoverySettings;
 import org.opensearch.indices.recovery.RecoveryState;
 import org.opensearch.plugins.IndexStorePlugin;
+import org.opensearch.plugins.PluginsService;
+import org.opensearch.plugins.SearchEnginePlugin;
 import org.opensearch.repositories.RepositoriesService;
 import org.opensearch.script.ScriptService;
 import org.opensearch.search.aggregations.support.ValuesSourceRegistry;
@@ -493,6 +495,23 @@ public void addSimilarity(String name, TriFunction<Settings, Version, ScriptServ
      * The returned reader is closed once it goes out of scope.
      * </p>
      */
+    /**
+     * indexModule.setReaderWrapper(
+     *                 indexService -> new SecurityFlsDlsIndexSearcherWrapper(
+     *                     indexService,
+     *                     settings,
+     *                     adminDns,
+     *                     cs,
+     *                     auditLog,
+     *                     ciol,
+     *                     evaluator,
+     *                     dlsFlsValve::getCurrentConfig,
+     *                     dlsFlsBaseContext
+     *                 )
+     *             );
+     * Example reader wrapper used in security plugin
+     * @param indexReaderWrapperFactory
+     */
     public void setReaderWrapper(
         Function<IndexService, CheckedFunction<DirectoryReader, DirectoryReader, IOException>> indexReaderWrapperFactory
     ) {
@@ -668,7 +687,9 @@ public IndexService newIndexService(
         Supplier<Boolean> shardLevelRefreshEnabled,
         RecoverySettings recoverySettings,
         RemoteStoreSettings remoteStoreSettings,
-        Supplier<Integer> clusterDefaultMaxMergeAtOnceSupplier
+        Supplier<Integer> clusterDefaultMaxMergeAtOnceSupplier,
+        PluginsService pluginsService,
+        SearchEnginePlugin searchEnginePlugin
     ) throws IOException {
         return newIndexService(
             indexCreationContext,
@@ -696,7 +717,9 @@ public IndexService newIndexService(
             remoteStoreSettings,
             (s) -> {},
             shardId -> ReplicationStats.empty(),
-            clusterDefaultMaxMergeAtOnceSupplier
+            clusterDefaultMaxMergeAtOnceSupplier,
+            searchEnginePlugin,
+            pluginsService
         );
     }
 
@@ -726,7 +749,9 @@ public IndexService newIndexService(
         RemoteStoreSettings remoteStoreSettings,
         Consumer<IndexShard> replicator,
         Function<ShardId, ReplicationStats> segmentReplicationStatsProvider,
-        Supplier<Integer> clusterDefaultMaxMergeAtOnceSupplier
+        Supplier<Integer> clusterDefaultMaxMergeAtOnceSupplier,
+        SearchEnginePlugin searchEnginePlugin,
+        PluginsService pluginsService
     ) throws IOException {
         final IndexEventListener eventListener = freeze();
         Function<IndexService, CheckedFunction<DirectoryReader, DirectoryReader, IOException>> readerWrapperFactory = indexReaderWrapper
@@ -798,7 +823,9 @@ public IndexService newIndexService(
                 compositeIndexSettings,
                 replicator,
                 segmentReplicationStatsProvider,
-                clusterDefaultMaxMergeAtOnceSupplier
+                clusterDefaultMaxMergeAtOnceSupplier,
+                searchEnginePlugin,
+                pluginsService
             );
             success = true;
             return indexService;
diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java
index 22441df923bf8..277daf2696b17 100644
--- a/server/src/main/java/org/opensearch/index/IndexService.java
+++ b/server/src/main/java/org/opensearch/index/IndexService.java
@@ -110,6 +110,8 @@
 import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher;
 import org.opensearch.node.remotestore.RemoteStoreNodeAttribute;
 import org.opensearch.plugins.IndexStorePlugin;
+import org.opensearch.plugins.PluginsService;
+import org.opensearch.plugins.SearchEnginePlugin;
 import org.opensearch.repositories.RepositoriesService;
 import org.opensearch.script.ScriptService;
 import org.opensearch.search.aggregations.support.ValuesSourceRegistry;
@@ -206,7 +208,9 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust
     private final Object refreshMutex = new Object();
     private volatile TimeValue refreshInterval;
     private volatile boolean shardLevelRefreshEnabled;
+    private final SearchEnginePlugin searchEnginePlugin;
     private final IndexStorePlugin.StoreFactory storeFactory;
+    private final PluginsService pluginsService;
 
     @InternalApi
     public IndexService(
@@ -252,7 +256,9 @@ public IndexService(
         CompositeIndexSettings compositeIndexSettings,
         Consumer<IndexShard> replicator,
         Function<ShardId, ReplicationStats> segmentReplicationStatsProvider,
-        Supplier<Integer> clusterDefaultMaxMergeAtOnceSupplier
+        Supplier<Integer> clusterDefaultMaxMergeAtOnceSupplier,
+        SearchEnginePlugin searchEnginePlugin,
+        PluginsService pluginsService
     ) {
         super(indexSettings);
         this.storeFactory = storeFactory;
@@ -359,6 +365,8 @@ public IndexService(
                 startIndexLevelRefreshTask();
             }
         }
+        this.searchEnginePlugin = searchEnginePlugin;
+        this.pluginsService = pluginsService;
     }
 
     @InternalApi
@@ -400,7 +408,9 @@ public IndexService(
         boolean shardLevelRefreshEnabled,
         RecoverySettings recoverySettings,
         RemoteStoreSettings remoteStoreSettings,
-        Supplier<Integer> clusterDefaultMaxMergeAtOnce
+        Supplier<Integer> clusterDefaultMaxMergeAtOnce,
+        SearchEnginePlugin searchEnginePlugin,
+        PluginsService pluginsService
     ) {
         this(
             indexSettings,
@@ -445,7 +455,9 @@ public IndexService(
             null,
             s -> {},
             (shardId) -> ReplicationStats.empty(),
-            clusterDefaultMaxMergeAtOnce
+            clusterDefaultMaxMergeAtOnce,
+            searchEnginePlugin,
+            pluginsService
         );
     }
 
@@ -794,7 +806,8 @@ protected void closeInternal() {
                 refreshMutex,
                 clusterService.getClusterApplierService(),
                 this.indexSettings.isSegRepEnabledOrRemoteNode() ? mergedSegmentPublisher : null,
-                this.indexSettings.isSegRepEnabledOrRemoteNode() ? referencedSegmentsPublisher : null
+                this.indexSettings.isSegRepEnabledOrRemoteNode() ? referencedSegmentsPublisher : null,
+                pluginsService
             );
             eventListener.indexShardStateChanged(indexShard, null, indexShard.state(), "shard created");
             eventListener.afterIndexShardCreated(indexShard);
diff --git a/server/src/main/java/org/opensearch/index/engine/CatalogSnapshotAwareRefreshListener.java b/server/src/main/java/org/opensearch/index/engine/CatalogSnapshotAwareRefreshListener.java
new file mode 100644
index 0000000000000..11c0ce293eae9
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/CatalogSnapshotAwareRefreshListener.java
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine;
+
+import org.opensearch.index.engine.exec.coord.CatalogSnapshot;
+
+import java.io.IOException;
+
+public interface CatalogSnapshotAwareRefreshListener {
+    /**
+     * Called before refresh operation.
+     */
+    void beforeRefresh() throws IOException;
+
+    /**
+     * Called after refresh operation with catalog snapshot.
+     * @param didRefresh whether refresh actually occurred
+     * @param catalogSnapshot the current catalog snapshot with file information
+     */
+    void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException;
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/DataFormatPlugin.java b/server/src/main/java/org/opensearch/index/engine/DataFormatPlugin.java
new file mode 100644
index 0000000000000..2bb09a50dee52
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/DataFormatPlugin.java
@@ -0,0 +1,21 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine;
+
+import org.opensearch.index.engine.exec.DataFormat;
+import org.opensearch.index.engine.exec.IndexingExecutionEngine;
+import org.opensearch.index.mapper.MapperService;
+import org.opensearch.index.shard.ShardPath;
+
+public interface DataFormatPlugin  {
+
+    <T extends DataFormat> IndexingExecutionEngine<T> indexingEngine(MapperService mapperService, ShardPath shardPath);
+
+    DataFormat getDataFormat();
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/Engine.java b/server/src/main/java/org/opensearch/index/engine/Engine.java
index 82d8871b73fba..c17927990df7f 100644
--- a/server/src/main/java/org/opensearch/index/engine/Engine.java
+++ b/server/src/main/java/org/opensearch/index/engine/Engine.java
@@ -79,6 +79,11 @@
 import org.opensearch.core.common.unit.ByteSizeValue;
 import org.opensearch.core.index.shard.ShardId;
 import org.opensearch.index.VersionType;
+import org.opensearch.index.engine.exec.bridge.CheckpointState;
+import org.opensearch.index.engine.exec.bridge.Indexer;
+import org.opensearch.index.engine.exec.bridge.IndexingThrottler;
+import org.opensearch.index.engine.exec.bridge.StatsHolder;
+import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter;
 import org.opensearch.index.mapper.IdFieldMapper;
 import org.opensearch.index.mapper.Mapping;
 import org.opensearch.index.mapper.ParseContext.Document;
@@ -130,7 +135,7 @@
  * @opensearch.api
  */
 @PublicApi(since = "1.0.0")
-public abstract class Engine implements LifecycleAware, Closeable {
+public abstract class Engine implements LifecycleAware, Closeable, Indexer, CheckpointState, StatsHolder, IndexingThrottler, SearcherOperations<Engine.Searcher, ReferenceManager<OpenSearchDirectoryReader>> {
 
     public static final String SYNC_COMMIT_ID = "sync_id";  // TODO: remove sync_id in 3.0
     public static final String HISTORY_UUID_KEY = "history_uuid";
@@ -762,6 +767,7 @@ public SearcherSupplier acquireSearcherSupplier(Function<Searcher, Searcher> wra
             SearcherSupplier reader = new SearcherSupplier(wrapper) {
                 @Override
                 public Searcher acquireSearcherInternal(String source) {
+                    // TODO : this should return
                     assert assertSearcherIsWarmedUp(source, scope);
                     return new Searcher(
                         source,
@@ -828,9 +834,9 @@ public Searcher acquireSearcher(String source, SearcherScope scope, Function<Sea
         }
     }
 
-    protected abstract ReferenceManager<OpenSearchDirectoryReader> getReferenceManager(SearcherScope scope);
+    public abstract ReferenceManager<OpenSearchDirectoryReader> getReferenceManager(SearcherScope scope);
 
-    boolean assertSearcherIsWarmedUp(String source, SearcherScope scope) {
+    public boolean assertSearcherIsWarmedUp(String source, SearcherScope scope) {
         return true;
     }
 
@@ -1404,7 +1410,7 @@ default void onFailedEngine(String reason, @Nullable Exception e) {}
      * @opensearch.api
      */
     @PublicApi(since = "1.0.0")
-    public abstract static class SearcherSupplier implements Releasable {
+    public abstract static class SearcherSupplier extends EngineSearcherSupplier<Searcher> {
         private final Function<Searcher, Searcher> wrapper;
         private final AtomicBoolean released = new AtomicBoolean(false);
 
@@ -1439,8 +1445,10 @@ public final void close() {
      *
      * @opensearch.api
      */
+
     @PublicApi(since = "1.0.0")
-    public static final class Searcher extends IndexSearcher implements Releasable {
+    public static final class Searcher extends IndexSearcher implements Releasable, EngineSearcher {
+        // TODO : this extends index searcher
         private final String source;
         private final Closeable onClose;
 
@@ -1607,6 +1615,7 @@ public static class Index extends Operation {
         private final boolean isRetry;
         private final long ifSeqNo;
         private final long ifPrimaryTerm;
+        public CompositeDataFormatWriter.CompositeDocumentInput documentInput;
 
         public Index(
             Term uid,
@@ -1633,6 +1642,7 @@ public Index(
             this.autoGeneratedIdTimestamp = autoGeneratedIdTimestamp;
             this.ifSeqNo = ifSeqNo;
             this.ifPrimaryTerm = ifPrimaryTerm;
+            this.documentInput = doc.getDocumentInput();
         }
 
         public Index(Term uid, long primaryTerm, ParsedDocument doc) {
diff --git a/server/src/main/java/org/opensearch/index/engine/EngineLucene.java b/server/src/main/java/org/opensearch/index/engine/EngineLucene.java
new file mode 100644
index 0000000000000..f12f8cda0555e
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/EngineLucene.java
@@ -0,0 +1,57 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine;
+
+import org.apache.lucene.search.ReferenceManager;
+import org.opensearch.common.lucene.index.OpenSearchDirectoryReader;
+
+import java.util.function.Function;
+
+// Dummy impl
+public class EngineLucene implements SearcherOperations<Engine.Searcher, ReferenceManager<OpenSearchDirectoryReader>>{
+    @Override
+    public EngineSearcherSupplier<Engine.Searcher> acquireSearcherSupplier(Function<Engine.Searcher, Engine.Searcher> wrapper) throws EngineException {
+        return null;
+    }
+
+    @Override
+    public EngineSearcherSupplier<Engine.Searcher> acquireSearcherSupplier(Function<Engine.Searcher, Engine.Searcher> wrapper, Engine.SearcherScope scope) throws EngineException {
+        return null;
+    }
+
+    @Override
+    public Engine.Searcher acquireSearcher(String source) throws EngineException {
+        return null;
+    }
+
+    @Override
+    public Engine.Searcher acquireSearcher(String source, Engine.SearcherScope scope) throws EngineException {
+        return null;
+    }
+
+    @Override
+    public Engine.Searcher acquireSearcher(String source, Engine.SearcherScope scope, Function<Engine.Searcher, Engine.Searcher> wrapper) throws EngineException {
+        return null;
+    }
+
+    @Override
+    public ReferenceManager<OpenSearchDirectoryReader> getReferenceManager(Engine.SearcherScope scope) {
+        return null;
+    }
+
+    @Override
+    public boolean assertSearcherIsWarmedUp(String source, Engine.SearcherScope scope) {
+        return false;
+    }
+
+    @Override
+    public CatalogSnapshotAwareRefreshListener getRefreshListener(Engine.SearcherScope searcherScope) {
+        return null;
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/EngineReaderManager.java b/server/src/main/java/org/opensearch/index/engine/EngineReaderManager.java
new file mode 100644
index 0000000000000..992e835a5204d
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/EngineReaderManager.java
@@ -0,0 +1,23 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine;
+
+import org.apache.lucene.search.ReferenceManager;
+
+import java.io.IOException;
+
+public interface EngineReaderManager<T> {
+    T acquire() throws IOException;
+
+    void release(T reader) throws IOException;
+
+    default void addListener(ReferenceManager.RefreshListener listener) {
+        // no-op
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/EngineSearcher.java b/server/src/main/java/org/opensearch/index/engine/EngineSearcher.java
new file mode 100644
index 0000000000000..7471fd3fbeb5f
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/EngineSearcher.java
@@ -0,0 +1,38 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine;
+
+import org.opensearch.common.annotation.ExperimentalApi;
+import org.opensearch.common.lease.Releasable;
+import org.opensearch.search.aggregations.SearchResultsCollector;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.util.List;
+
+@ExperimentalApi
+// TODO make this <Query, Collector> generic type
+public interface EngineSearcher<Q,C> extends Releasable {
+
+    /**
+     * The source that caused this searcher to be acquired.
+     */
+    String source();
+
+    /**
+     * Search using substrait query plan bytes and call the result collectors
+     */
+    default void search(Q query, List<SearchResultsCollector<C>> collectors) throws IOException {
+        throw new UnsupportedOperationException();
+    }
+
+    default long search(Q query, Long runtimePtr) throws IOException {
+        throw new UnsupportedOperationException();
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/EngineSearcherSupplier.java b/server/src/main/java/org/opensearch/index/engine/EngineSearcherSupplier.java
new file mode 100644
index 0000000000000..df66b5265ce9e
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/EngineSearcherSupplier.java
@@ -0,0 +1,34 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine;
+
+import org.apache.lucene.store.AlreadyClosedException;
+import org.opensearch.common.annotation.ExperimentalApi;
+import org.opensearch.common.lease.Releasable;
+
+import java.util.concurrent.atomic.AtomicBoolean;
+
+@ExperimentalApi
+public abstract class EngineSearcherSupplier<T extends EngineSearcher> implements Releasable {
+    private final AtomicBoolean released = new AtomicBoolean(false);
+
+    /**
+     * Acquire a searcher for the given source.
+     */
+    public T acquireSearcher(String source) {
+        if (released.get()) {
+            throw new AlreadyClosedException("SearcherSupplier was closed");
+        }
+        return acquireSearcherInternal(source);
+    }
+
+    protected abstract T acquireSearcherInternal(String source);
+
+    protected abstract void doClose();
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java
index fcc81335d4363..b291c32b8c985 100644
--- a/server/src/main/java/org/opensearch/index/engine/InternalEngine.java
+++ b/server/src/main/java/org/opensearch/index/engine/InternalEngine.java
@@ -163,7 +163,7 @@ public class InternalEngine extends Engine {
     protected volatile long lastDeleteVersionPruneTimeMSec;
 
     protected final TranslogManager translogManager;
-    protected final IndexWriter indexWriter;
+    public final IndexWriter indexWriter;
     protected final LocalCheckpointTracker localCheckpointTracker;
     protected final AtomicLong maxUnsafeAutoIdTimestamp = new AtomicLong(-1);
     protected final SoftDeletesPolicy softDeletesPolicy;
@@ -429,7 +429,8 @@ public CompletionStats completionStats(String... fieldNamePatterns) {
      * @opensearch.internal
      */
     @SuppressForbidden(reason = "reference counting is required here")
-    private static final class ExternalReaderManager extends ReferenceManager<OpenSearchDirectoryReader> {
+    private static final class
+    ExternalReaderManager extends ReferenceManager<OpenSearchDirectoryReader> {
         private final BiConsumer<OpenSearchDirectoryReader, OpenSearchDirectoryReader> refreshListener;
         private final OpenSearchReaderManager internalReaderManager;
         private boolean isWarmedUp; // guarded by refreshLock
@@ -443,6 +444,13 @@ private static final class ExternalReaderManager extends ReferenceManager<OpenSe
             this.current = internalReaderManager.acquire(); // steal the reference without warming up
         }
 
+        // t0 - i, e
+        // t1 - i
+        // t2 - i
+        // t3 - i
+        // t4 - i
+        // t5 - i,  e
+
         @Override
         protected OpenSearchDirectoryReader refreshIfNeeded(OpenSearchDirectoryReader referenceToRefresh) throws IOException {
             // we simply run a blocking refresh on the internal reference manager and then steal it's reader
@@ -488,7 +496,7 @@ protected void decRef(OpenSearchDirectoryReader reference) throws IOException {
     }
 
     @Override
-    final boolean assertSearcherIsWarmedUp(String source, SearcherScope scope) {
+    public final boolean assertSearcherIsWarmedUp(String source, SearcherScope scope) {
         if (scope == SearcherScope.EXTERNAL) {
             switch (source) {
                 // we can access segment_stats while a shard is still in the recovering state.
@@ -2300,7 +2308,7 @@ protected final void closeNoLock(String reason, CountDownLatch closedLatch) {
     }
 
     @Override
-    protected final ReferenceManager<OpenSearchDirectoryReader> getReferenceManager(SearcherScope scope) {
+    public final ReferenceManager<OpenSearchDirectoryReader> getReferenceManager(SearcherScope scope) {
         switch (scope) {
             case INTERNAL:
                 return internalReaderManager;
diff --git a/server/src/main/java/org/opensearch/index/engine/LuceneReaderManager.java b/server/src/main/java/org/opensearch/index/engine/LuceneReaderManager.java
new file mode 100644
index 0000000000000..b3d2fe19b1b9d
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/LuceneReaderManager.java
@@ -0,0 +1,38 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine;
+
+import org.apache.lucene.search.ReferenceManager;
+import org.opensearch.common.lucene.index.OpenSearchDirectoryReader;
+
+import java.io.IOException;
+
+public class LuceneReaderManager implements EngineReaderManager<OpenSearchDirectoryReader> {
+    private final ReferenceManager<OpenSearchDirectoryReader> referenceManager;
+
+    public LuceneReaderManager(ReferenceManager<OpenSearchDirectoryReader> referenceManager) {
+        this.referenceManager = referenceManager;
+    }
+
+
+    @Override
+    public OpenSearchDirectoryReader acquire() throws IOException {
+        return referenceManager.acquire();
+    }
+
+    @Override
+    public void release(OpenSearchDirectoryReader reader) throws IOException {
+        referenceManager.release(reader);
+    }
+
+    @Override
+    public void addListener(ReferenceManager.RefreshListener listener) {
+
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java
index 1fab651078cc4..b97d9931d1139 100644
--- a/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java
+++ b/server/src/main/java/org/opensearch/index/engine/NRTReplicationEngine.java
@@ -276,7 +276,7 @@ public GetResult get(Get get, BiFunction<String, SearcherScope, Searcher> search
     }
 
     @Override
-    protected ReferenceManager<OpenSearchDirectoryReader> getReferenceManager(SearcherScope scope) {
+    public ReferenceManager<OpenSearchDirectoryReader> getReferenceManager(SearcherScope scope) {
         return readerManager;
     }
 
diff --git a/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java b/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java
index eba074e27f764..ad3cea6291eeb 100644
--- a/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java
+++ b/server/src/main/java/org/opensearch/index/engine/ReadOnlyEngine.java
@@ -277,7 +277,7 @@ public GetResult get(Get get, BiFunction<String, SearcherScope, Engine.Searcher>
     }
 
     @Override
-    protected ReferenceManager<OpenSearchDirectoryReader> getReferenceManager(SearcherScope scope) {
+    public ReferenceManager<OpenSearchDirectoryReader> getReferenceManager(SearcherScope scope) {
         return readerManager;
     }
 
diff --git a/server/src/main/java/org/opensearch/index/engine/SearchExecEngine.java b/server/src/main/java/org/opensearch/index/engine/SearchExecEngine.java
new file mode 100644
index 0000000000000..0edf34eed7663
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/SearchExecEngine.java
@@ -0,0 +1,58 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine;
+
+import org.opensearch.action.search.SearchShardTask;
+import org.opensearch.common.annotation.ExperimentalApi;
+import org.opensearch.common.util.BigArrays;
+import org.opensearch.search.SearchShardTarget;
+import org.opensearch.search.internal.ReaderContext;
+import org.opensearch.search.internal.SearchContext;
+import org.opensearch.search.internal.ShardSearchRequest;
+import org.opensearch.search.query.GenericQueryPhaseSearcher;
+import org.opensearch.search.query.QueryPhaseExecutor;
+
+import java.io.IOException;
+
+/**
+ * Generic read engine interface that provides searcher operations and query phase execution
+ * @param <C> Context type for query execution
+ * @param <S> Searcher type that extends EngineSearcher
+ * @param <R> Reference manager type
+ * @param <Q> Query type
+ */
+@ExperimentalApi
+// TODO too many templatized types
+public abstract class SearchExecEngine<C extends SearchContext, S extends EngineSearcher<?,?>, R, Q> implements SearcherOperations<S, R> {
+
+    /**
+     * Get the query phase searcher for this engine
+     */
+    public abstract GenericQueryPhaseSearcher<C,S, Q> getQueryPhaseSearcher();
+
+    /**
+     * Get the query phase executor for this engine
+     */
+    public abstract QueryPhaseExecutor<C> getQueryPhaseExecutor();
+
+    /**
+     * Create a search context for this engine
+     */
+    public abstract C createContext(ReaderContext readerContext, ShardSearchRequest request, SearchShardTarget searchShardTarget, SearchShardTask task, BigArrays bigArrays) throws IOException;
+
+    /**
+     * execute Query Phase
+     */
+    public abstract void executeQueryPhase(C context) throws IOException;
+
+    /**
+     * execute Fetch Phase
+     */
+    public abstract void executeFetchPhase(C context) throws IOException;
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/SearchExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/SearchExecutionEngine.java
new file mode 100644
index 0000000000000..1834e8cd1e82f
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/SearchExecutionEngine.java
@@ -0,0 +1,27 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine;
+
+import org.opensearch.common.annotation.ExperimentalApi;
+
+import java.util.Map;
+
+/**
+ * SearchExecutionEngine
+ * @opensearch.internal
+ */
+@ExperimentalApi
+public interface SearchExecutionEngine {
+    /**
+     * execute
+     * @param queryPlanIR
+     * @return
+     */
+    Map<String, Object[]> execute(byte[] queryPlanIR);
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/SearcherOperations.java b/server/src/main/java/org/opensearch/index/engine/SearcherOperations.java
new file mode 100644
index 0000000000000..32b2d882401fb
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/SearcherOperations.java
@@ -0,0 +1,40 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine;
+
+import org.apache.lucene.search.ReferenceManager;
+import org.opensearch.common.lucene.index.OpenSearchDirectoryReader;
+
+import java.util.function.Function;
+
+public interface SearcherOperations<S extends EngineSearcher, R> {
+    /**
+     * Acquires a point-in-time reader that can be used to create {@link Engine.Searcher}s on demand.
+     */
+    EngineSearcherSupplier<S> acquireSearcherSupplier(Function<S, S> wrapper) throws EngineException;
+    /**
+     * Acquires a point-in-time reader that can be used to create {@link Engine.Searcher}s on demand.
+     */
+    EngineSearcherSupplier<S> acquireSearcherSupplier(Function<S, S> wrapper, Engine.SearcherScope scope) throws EngineException;
+
+    S acquireSearcher(String source) throws EngineException;
+
+    S acquireSearcher(String source, Engine.SearcherScope scope) throws EngineException;
+
+    S acquireSearcher(String source, Engine.SearcherScope scope, Function<S, S> wrapper) throws EngineException;
+
+    R getReferenceManager(Engine.SearcherScope scope);
+
+    boolean assertSearcherIsWarmedUp(String source, Engine.SearcherScope scope);
+
+    default CatalogSnapshotAwareRefreshListener getRefreshListener(Engine.SearcherScope searcherScope) {
+        // default is no-op, TODO : revisit this
+        return null;
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java
new file mode 100644
index 0000000000000..ef1ad24992256
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormat.java
@@ -0,0 +1,51 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+import org.opensearch.common.annotation.ExperimentalApi;
+import org.opensearch.common.settings.Setting;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.index.engine.exec.text.TextDF;
+
+@ExperimentalApi
+public interface DataFormat {
+    Setting<Settings> dataFormatSettings();
+
+    Setting<Settings> clusterLeveldataFormatSettings();
+
+    String name();
+
+    void configureStore();
+
+    static class LuceneDataFormat implements DataFormat {
+        @Override
+        public Setting<Settings> dataFormatSettings() {
+            return null;
+        }
+
+        @Override
+        public Setting<Settings> clusterLeveldataFormatSettings() {
+            return null;
+        }
+
+        @Override
+        public String name() {
+            return "";
+        }
+
+        @Override
+        public void configureStore() {
+
+        }
+    }
+
+    DataFormat LUCENE = new LuceneDataFormat();
+
+    DataFormat TEXT = new TextDF();
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java
new file mode 100644
index 0000000000000..0f24ca036741d
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/DocumentInput.java
@@ -0,0 +1,23 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+import org.opensearch.common.annotation.ExperimentalApi;
+import org.opensearch.index.mapper.MappedFieldType;
+
+import java.io.IOException;
+@ExperimentalApi
+public interface DocumentInput<T> extends AutoCloseable {
+
+    void addField(MappedFieldType fieldType, Object value);
+
+    T getFinalInput();
+
+    WriteResult addToWriter() throws IOException;
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FileInfos.java b/server/src/main/java/org/opensearch/index/engine/exec/FileInfos.java
new file mode 100644
index 0000000000000..436df520fd67b
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/FileInfos.java
@@ -0,0 +1,35 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import java.util.Optional;
+
+public final class FileInfos {
+
+    private final Map<DataFormat, WriterFileSet> writerFilesMap;
+
+    public FileInfos() {
+        this.writerFilesMap = new HashMap<>();
+    }
+
+    public Map<DataFormat, WriterFileSet> getWriterFilesMap() {
+        return Collections.unmodifiableMap(writerFilesMap);
+    }
+
+    public void putWriterFileSet(DataFormat format, WriterFileSet writerFileSet) {
+        writerFilesMap.put(format, writerFileSet);
+    }
+
+    public Optional<WriterFileSet> getWriterFileSet(DataFormat format) {
+        return Optional.ofNullable(writerFilesMap.get(format));
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java b/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java
new file mode 100644
index 0000000000000..41efd124fa437
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+public record FileMetadata(String directory, String file) {
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FlushIn.java b/server/src/main/java/org/opensearch/index/engine/exec/FlushIn.java
new file mode 100644
index 0000000000000..5d119a575d1aa
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/FlushIn.java
@@ -0,0 +1,13 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+public interface FlushIn {
+
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java
new file mode 100644
index 0000000000000..2c3f63fcf0da2
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexingExecutionEngine.java
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+import org.opensearch.index.mapper.MappedFieldType;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+public interface IndexingExecutionEngine<T extends DataFormat> {
+
+    List<String> supportedFieldTypes();
+
+    Writer<? extends DocumentInput<?>> createWriter(long writerGeneration)
+        throws IOException; // A writer responsible for data format vended by this engine.
+
+    RefreshResult refresh(RefreshInput refreshInput) throws IOException;
+
+    DataFormat getDataFormat();
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/RefreshInput.java b/server/src/main/java/org/opensearch/index/engine/exec/RefreshInput.java
new file mode 100644
index 0000000000000..135df6f0855fa
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/RefreshInput.java
@@ -0,0 +1,29 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class RefreshInput {
+
+    private final List<WriterFileSet> writerFiles;
+
+    public RefreshInput() {
+        this.writerFiles = new ArrayList<>();
+    }
+
+    public void add(WriterFileSet writerFileSetGroup) {
+        this.writerFiles.add(writerFileSetGroup);
+    }
+
+    public List<WriterFileSet> getWriterFiles() {
+        return writerFiles;
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/RefreshResult.java b/server/src/main/java/org/opensearch/index/engine/exec/RefreshResult.java
new file mode 100644
index 0000000000000..8357529d7acc7
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/RefreshResult.java
@@ -0,0 +1,36 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class RefreshResult {
+
+    private final Map<DataFormat, List<WriterFileSet>> refreshedFiles;
+
+    public RefreshResult() {
+        this.refreshedFiles = new HashMap<>();
+    }
+
+    public void add(DataFormat df, List<WriterFileSet> writerFiles) {
+        writerFiles.forEach(writerFileSet -> refreshedFiles.computeIfAbsent(df, dataFormat -> new ArrayList<>()).add(writerFileSet));
+    }
+
+    public List<WriterFileSet> getRefreshedFiles(DataFormat dataFormat) {
+        return Collections.unmodifiableList(refreshedFiles.get(dataFormat));
+    }
+
+    public Map<DataFormat, List<WriterFileSet>> getRefreshedFiles() {
+        return Map.copyOf(refreshedFiles);
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/Reportable.java b/server/src/main/java/org/opensearch/index/engine/exec/Reportable.java
new file mode 100644
index 0000000000000..620539c877c76
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/Reportable.java
@@ -0,0 +1,14 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+public interface Reportable {
+
+    long ramBytesUsed();
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/WriteResult.java b/server/src/main/java/org/opensearch/index/engine/exec/WriteResult.java
new file mode 100644
index 0000000000000..666576e85cd0f
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/WriteResult.java
@@ -0,0 +1,12 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+public record WriteResult(boolean success, Exception e, long version, long term, long seqNo) {
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/Writer.java b/server/src/main/java/org/opensearch/index/engine/exec/Writer.java
new file mode 100644
index 0000000000000..d0ad4d35b3fc2
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/Writer.java
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+import java.io.IOException;
+
+public interface Writer<P extends DocumentInput<?>> {
+
+    WriteResult addDoc(P d) throws IOException;
+
+    FileInfos flush(FlushIn flushIn) throws IOException;
+
+    void sync() throws IOException;
+
+    void close();
+
+    P newDocumentInput();
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java
new file mode 100644
index 0000000000000..9ab00b4753d74
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/WriterFileSet.java
@@ -0,0 +1,52 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec;
+
+import java.io.Serializable;
+import java.nio.file.Path;
+import java.util.HashSet;
+import java.util.Set;
+
+public class WriterFileSet implements Serializable {
+
+    private final String directory;
+    private final long writerGeneration;
+    private final Set<String> files;
+
+    public WriterFileSet(Path directory, long writerGeneration) {
+        this.files = new HashSet<>();
+        this.writerGeneration = writerGeneration;
+        this.directory = directory.toString();
+    }
+
+    public void add(String file) {
+        this.files.add(file);
+    }
+
+    public Set<String> getFiles() {
+        return files;
+    }
+
+    public String getDirectory() {
+        return directory;
+    }
+
+    public long getWriterGeneration() {
+        return writerGeneration;
+    }
+
+    @Override
+    public String toString() {
+        return "WriterFileSet{" +
+            "directory=" + directory +
+            ", writerGeneration=" + writerGeneration +
+            ", files=" + files +
+            '}';
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/bridge/CheckpointState.java b/server/src/main/java/org/opensearch/index/engine/exec/bridge/CheckpointState.java
new file mode 100644
index 0000000000000..52784d834d837
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/bridge/CheckpointState.java
@@ -0,0 +1,39 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.bridge;
+
+import org.opensearch.common.annotation.PublicApi;
+import org.opensearch.index.seqno.SeqNoStats;
+
+@PublicApi(since = "1.0.0")
+public interface CheckpointState {
+
+    /**
+     * @return the persisted local checkpoint for this Engine
+     */
+    long getPersistedLocalCheckpoint();
+
+    /**
+     * @return the latest checkpoint that has been processed but not necessarily persisted.
+     * Also see {@link #getPersistedLocalCheckpoint()}
+     */
+    long getProcessedLocalCheckpoint();
+
+    /**
+     * @return a {@link SeqNoStats} object, using local state and the supplied global checkpoint
+     */
+    SeqNoStats getSeqNoStats(long globalCheckpoint);
+
+    /**
+     * Returns the latest global checkpoint value that has been persisted in the underlying storage (i.e. translog's checkpoint)
+     */
+    long getLastSyncedGlobalCheckpoint();
+
+    long getMinRetainedSeqNo();
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/bridge/Indexer.java b/server/src/main/java/org/opensearch/index/engine/exec/bridge/Indexer.java
new file mode 100644
index 0000000000000..39f8929fe703c
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/bridge/Indexer.java
@@ -0,0 +1,94 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.bridge;
+
+import org.opensearch.common.annotation.PublicApi;
+import org.opensearch.index.engine.Engine;
+import org.opensearch.index.engine.EngineException;
+import org.opensearch.index.engine.SafeCommitInfo;
+import org.opensearch.index.engine.Segment;
+import org.opensearch.index.translog.Translog;
+import org.opensearch.index.translog.TranslogManager;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.List;
+
+@PublicApi(since = "1.0.0")
+public interface Indexer {
+
+    Engine.IndexResult index(Engine.Index index) throws IOException;
+
+    Engine.DeleteResult delete(Engine.Delete delete) throws IOException;
+
+    Engine.NoOpResult noOp(Engine.NoOp noOp) throws IOException;
+
+    /**
+     * Counts the number of history operations in the given sequence number range
+     * @param source       source of the request
+     * @param fromSeqNo    from sequence number; included
+     * @param toSeqNumber  to sequence number; included
+     * @return             number of history operations
+     */
+    int countNumberOfHistoryOperations(String source, long fromSeqNo, long toSeqNumber) throws IOException;
+
+    boolean hasCompleteOperationHistory(String reason, long startingSeqNo);
+
+    long getIndexBufferRAMBytesUsed();
+
+    List<Segment> segments(boolean verbose);
+
+    /**
+     * Returns the maximum auto_id_timestamp of all append-only index requests have been processed by this engine
+     * or the auto_id_timestamp received from its primary shard via {@link #updateMaxUnsafeAutoIdTimestamp(long)}.
+     * Notes this method returns the auto_id_timestamp of all append-only requests, not max_unsafe_auto_id_timestamp.
+     */
+    long getMaxSeenAutoIdTimestamp();
+
+    /**
+     * Forces this engine to advance its max_unsafe_auto_id_timestamp marker to at least the given timestamp.
+     * The engine will disable optimization for all append-only whose timestamp at most {@code newTimestamp}.
+     */
+    void updateMaxUnsafeAutoIdTimestamp(long newTimestamp);
+
+    int fillSeqNoGaps(long primaryTerm) throws IOException;
+
+    // File format methods follow below
+    void forceMerge(
+        boolean flush,
+        int maxNumSegments,
+        boolean onlyExpungeDeletes,
+        boolean upgrade,
+        boolean upgradeOnlyAncientSegments,
+        String forceMergeUUID
+    ) throws EngineException, IOException;
+
+    void writeIndexingBuffer() throws EngineException;
+
+    void refresh(String source) throws EngineException;
+
+    void flush(boolean force, boolean waitIfOngoing) throws EngineException;
+
+    SafeCommitInfo getSafeCommitInfo();
+
+    // Translog methods follow below
+    TranslogManager translogManager();
+
+    Closeable acquireHistoryRetentionLock();
+
+    Translog.Snapshot newChangesSnapshot(
+        String source,
+        long fromSeqNo,
+        long toSeqNo,
+        boolean requiredFullRange,
+        boolean accurateCount
+    ) throws IOException;
+
+    String getHistoryUUID();
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/bridge/IndexingThrottler.java b/server/src/main/java/org/opensearch/index/engine/exec/bridge/IndexingThrottler.java
new file mode 100644
index 0000000000000..050dc07d1011b
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/bridge/IndexingThrottler.java
@@ -0,0 +1,37 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.bridge;
+
+import org.opensearch.common.annotation.PublicApi;
+
+@PublicApi(since = "1.0.0")
+public interface IndexingThrottler {
+
+    /**
+     * Returns the number of milliseconds this engine was under index throttling.
+     */
+    long getIndexThrottleTimeInMillis();
+
+    /**
+     * Returns the <code>true</code> iff this engine is currently under index throttling.
+     * @see #getIndexThrottleTimeInMillis()
+     */
+    boolean isThrottled();
+
+    /**
+     * Request that this engine throttle incoming indexing requests to one thread.
+     * Must be matched by a later call to {@link #deactivateThrottling()}.
+     */
+    void activateThrottling();
+
+    /**
+     * Reverses a previous {@link #activateThrottling} call.
+     */
+    void deactivateThrottling();
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/bridge/StatsHolder.java b/server/src/main/java/org/opensearch/index/engine/exec/bridge/StatsHolder.java
new file mode 100644
index 0000000000000..27d0c099aaa53
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/bridge/StatsHolder.java
@@ -0,0 +1,33 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.bridge;
+
+import org.opensearch.common.annotation.PublicApi;
+import org.opensearch.index.engine.CommitStats;
+import org.opensearch.index.engine.SegmentsStats;
+import org.opensearch.index.merge.MergeStats;
+import org.opensearch.index.shard.DocsStats;
+import org.opensearch.indices.pollingingest.PollingIngestStats;
+import org.opensearch.search.suggest.completion.CompletionStats;
+
+@PublicApi(since = "1.0.0")
+public interface StatsHolder {
+
+    CommitStats commitStats();
+
+    DocsStats docStats();
+
+    SegmentsStats segmentsStats(boolean includeSegmentFileSizes, boolean includeUnloadedSegments);
+
+    CompletionStats completionStats(String... fieldNamePatterns);
+
+    PollingIngestStats pollingIngestStats();
+
+    MergeStats getMergeStats();
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/CommitPoint.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/CommitPoint.java
new file mode 100644
index 0000000000000..b3791660206d2
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/CommitPoint.java
@@ -0,0 +1,96 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.commit;
+
+import java.nio.file.Path;
+import java.util.Collection;
+import java.util.Map;
+
+public final class CommitPoint {
+
+    private final String commitFileName;
+    private final long generation;
+    private final Collection<String> fileNames;
+    private final Path directory;
+    private final Map<String, String> commitData;
+
+    private CommitPoint(Builder builder) {
+        this.commitFileName = builder.commitFileName;
+        this.generation = builder.generation;
+        this.fileNames = builder.fileNames;
+        this.directory = builder.directory;
+        this.commitData = builder.commitData;
+    }
+
+    public String getCommitFileName() {
+        return commitFileName;
+    }
+
+    public long getGeneration() {
+        return generation;
+    }
+
+    public Collection<String> getFileNames() {
+        return fileNames;
+    }
+
+    public Path getDirectory() {
+        return directory;
+    }
+
+    public Map<String, String> getCommitData() {
+        return commitData;
+    }
+
+    public static Builder builder() {
+        return new Builder();
+    }
+
+    public static final class Builder {
+
+        private String commitFileName;
+        private long generation;
+        private Collection<String> fileNames;
+        private Path directory;
+        private Map<String, String> commitData;
+
+        private Builder() {
+        }
+
+        public Builder commitFileName(String commitFileName) {
+            this.commitFileName = commitFileName;
+            return this;
+        }
+
+        public Builder generation(long generation) {
+            this.generation = generation;
+            return this;
+        }
+
+        public Builder fileNames(Collection<String> fileNames) {
+            this.fileNames = fileNames;
+            return this;
+        }
+
+        public Builder directory(Path directory) {
+            this.directory = directory;
+            return this;
+        }
+
+        public Builder commitData(Map<String, String> commitData) {
+            this.commitData = commitData;
+            return this;
+        }
+
+        public CommitPoint build() {
+            return new CommitPoint(this);
+        }
+    }
+
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java
new file mode 100644
index 0000000000000..8c56bd6c8c983
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/Committer.java
@@ -0,0 +1,18 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.commit;
+
+import org.opensearch.index.engine.exec.coord.CatalogSnapshot;
+
+public interface Committer {
+
+    void addLuceneIndexes(CatalogSnapshot catalogSnapshot);
+
+    CommitPoint commit(CatalogSnapshot catalogSnapshot);
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java
new file mode 100644
index 0000000000000..6a09850fdbfbb
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneCommitEngine.java
@@ -0,0 +1,70 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.commit;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.commons.lang3.SerializationUtils;
+import org.apache.lucene.index.IndexCommit;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.NIOFSDirectory;
+import org.opensearch.index.engine.exec.DataFormat;
+import org.opensearch.index.engine.exec.WriterFileSet;
+import org.opensearch.index.engine.exec.coord.CatalogSnapshot;
+
+public class LuceneCommitEngine implements Committer {
+
+    private final IndexWriter indexWriter;
+    private final LuceneIndexDeletionPolicy indexDeletionPolicy;
+
+    public LuceneCommitEngine(Path commitPath) throws IOException {
+        Directory directory = new NIOFSDirectory(commitPath);
+        indexDeletionPolicy = new LuceneIndexDeletionPolicy();
+        IndexWriterConfig indexWriterConfig = new IndexWriterConfig();
+        indexWriterConfig.setIndexDeletionPolicy(indexDeletionPolicy);
+        this.indexWriter = new IndexWriter(directory, indexWriterConfig);
+    }
+
+    @Override
+    public void addLuceneIndexes(CatalogSnapshot catalogSnapshot) {
+        Collection<WriterFileSet> luceneFileCollection = catalogSnapshot.getSearchableFiles(DataFormat.LUCENE.name());
+        luceneFileCollection.forEach(writerFileSet -> {
+            try {
+                indexWriter.addIndexes(new NIOFSDirectory(Path.of(writerFileSet.getDirectory())));
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        });
+        Map<String, String> userData = new HashMap<>();
+        catalogSnapshot.getSegments().forEach(segment -> userData.put(String.valueOf(segment.getGeneration()),
+            new String(SerializationUtils.serialize(segment))));
+        indexWriter.setLiveCommitData(userData.entrySet());
+    }
+
+    @Override
+    public CommitPoint commit(CatalogSnapshot catalogSnapshot) {
+        addLuceneIndexes(catalogSnapshot);
+        try {
+            indexWriter.commit();
+            IndexCommit indexCommit = indexDeletionPolicy.getLatestIndexCommit();
+            return CommitPoint.builder().commitFileName(indexCommit.getSegmentsFileName())
+                .fileNames(indexCommit.getFileNames()).commitData(indexCommit.getUserData())
+                .generation(indexCommit.getGeneration())
+                .directory(Path.of(indexCommit.getSegmentsFileName()).getParent()).build();
+        } catch (IOException e) {
+            throw new RuntimeException("lucene commit engine failed", e);
+        }
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneIndexDeletionPolicy.java b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneIndexDeletionPolicy.java
new file mode 100644
index 0000000000000..5a6d14d74a191
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/commit/LuceneIndexDeletionPolicy.java
@@ -0,0 +1,33 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.commit;
+
+import java.io.IOException;
+import java.util.List;
+import org.apache.lucene.index.IndexCommit;
+import org.apache.lucene.index.IndexDeletionPolicy;
+
+public final class LuceneIndexDeletionPolicy extends IndexDeletionPolicy {
+
+    private IndexCommit latestIndexCommit;
+
+    @Override
+    public void onInit(List<? extends IndexCommit> commits) throws IOException {
+
+    }
+
+    @Override
+    public void onCommit(List<? extends IndexCommit> commits) throws IOException {
+        latestIndexCommit = commits.getLast();
+    }
+
+    public IndexCommit getLatestIndexCommit() {
+        return latestIndexCommit;
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java
new file mode 100644
index 0000000000000..58a224d0fe9ae
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeDataFormatWriter.java
@@ -0,0 +1,188 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.composite;
+
+import org.apache.commons.lang3.tuple.ImmutablePair;
+import org.apache.lucene.util.SetOnce;
+import org.opensearch.index.engine.exec.DataFormat;
+import org.opensearch.index.engine.exec.DocumentInput;
+import org.opensearch.index.engine.exec.FileInfos;
+import org.opensearch.index.engine.exec.WriterFileSet;
+import org.opensearch.index.engine.exec.FlushIn;
+import org.opensearch.index.engine.exec.WriteResult;
+import org.opensearch.index.engine.exec.Writer;
+import org.opensearch.index.mapper.MappedFieldType;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Optional;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.locks.Condition;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.stream.Collectors;
+
+public class CompositeDataFormatWriter implements Writer<CompositeDataFormatWriter.CompositeDocumentInput>, Lock {
+
+    private final List<ImmutablePair<DataFormat, Writer<? extends DocumentInput<?>>>> writers;
+    private final Runnable postWrite;
+    private final ReentrantLock lock;
+    private final SetOnce<Boolean> flushPending = new SetOnce<>();
+    private final SetOnce<Boolean> hasFlushed = new SetOnce<>();
+    private final long writerGeneration;
+    private boolean aborted;
+
+    public CompositeDataFormatWriter(CompositeIndexingExecutionEngine engine,
+        long writerGeneration) {
+        this.writers = new ArrayList<>();
+        this.lock = new ReentrantLock();
+        this.aborted = false;
+        this.writerGeneration = writerGeneration;
+        engine.getDelegates().forEach(delegate -> {
+            try {
+                writers.add(ImmutablePair.of(delegate.getDataFormat(), delegate.createWriter(writerGeneration)));
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        });
+        this.postWrite = () -> {
+            engine.getDataFormatWriterPool().releaseAndUnlock(this);
+        };
+    }
+
+    @Override
+    public WriteResult addDoc(CompositeDocumentInput d) throws IOException {
+        return d.addToWriter();
+    }
+
+    @Override
+    public FileInfos flush(FlushIn flushIn) throws IOException {
+        FileInfos fileInfos = new FileInfos();
+        for (ImmutablePair<DataFormat, Writer<? extends DocumentInput<?>>> writerPair : writers) {
+            Optional<WriterFileSet> fileMetadataOptional = writerPair.getRight().flush(flushIn)
+                .getWriterFileSet(writerPair.getLeft());
+            fileMetadataOptional.ifPresent(
+                fileMetadata -> fileInfos.putWriterFileSet(writerPair.getLeft(), fileMetadata));
+        }
+        hasFlushed.set(true);
+        return fileInfos;
+    }
+
+    @Override
+    public void sync() throws IOException {
+
+    }
+
+    @Override
+    public void close() {
+
+    }
+
+    @Override
+    public CompositeDocumentInput newDocumentInput() {
+        return new CompositeDocumentInput(
+            writers.stream().map(ImmutablePair::getRight).map(Writer::newDocumentInput).collect(Collectors.toList()),
+            this, postWrite);
+    }
+
+    void abort() throws IOException {
+        aborted = true;
+    }
+
+    public void setFlushPending() {
+        flushPending.set(Boolean.TRUE);
+    }
+
+    public boolean hasFlushed() {
+        return hasFlushed.get() == Boolean.TRUE;
+    }
+
+    public boolean isFlushPending() {
+        return flushPending.get() == Boolean.TRUE;
+    }
+
+    public boolean isAborted() {
+        return aborted;
+    }
+
+    @Override
+    public void lock() {
+        lock.lock();
+    }
+
+    @Override
+    public void lockInterruptibly() throws InterruptedException {
+        lock.lockInterruptibly();
+    }
+
+    @Override
+    public boolean tryLock() {
+        return lock.tryLock();
+    }
+
+    @Override
+    public boolean tryLock(long time, TimeUnit unit) throws InterruptedException {
+        return lock.tryLock(time, unit);
+    }
+
+    @Override
+    public void unlock() {
+        lock.unlock();
+    }
+
+    boolean isHeldByCurrentThread() {
+        return lock.isHeldByCurrentThread();
+    }
+
+    @Override
+    public Condition newCondition() {
+        throw new UnsupportedOperationException();
+    }
+
+    public static class CompositeDocumentInput implements DocumentInput<List<? extends DocumentInput<?>>> {
+
+        List<? extends DocumentInput<?>> inputs;
+        CompositeDataFormatWriter writer;
+        Runnable onClose;
+
+        public CompositeDocumentInput(List<? extends DocumentInput<?>> inputs, CompositeDataFormatWriter writer,
+            Runnable onClose) {
+            this.inputs = inputs;
+            this.writer = writer;
+            this.onClose = onClose;
+        }
+
+        @Override
+        public void addField(MappedFieldType fieldType, Object value) {
+            for (DocumentInput<?> input : inputs) {
+                input.addField(fieldType, value);
+            }
+        }
+
+        @Override
+        public List<? extends DocumentInput<?>> getFinalInput() {
+            return null;
+        }
+
+        @Override
+        public WriteResult addToWriter() throws IOException {
+            WriteResult writeResult = null;
+            for (DocumentInput<?> input : inputs) {
+                writeResult = input.addToWriter();
+            }
+            return writeResult;
+        }
+
+        @Override
+        public void close() throws Exception {
+            onClose.run();
+        }
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java
new file mode 100644
index 0000000000000..cd45d24432553
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/composite/CompositeIndexingExecutionEngine.java
@@ -0,0 +1,127 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.composite;
+
+import java.util.Collections;
+import java.util.concurrent.atomic.AtomicLong;
+
+import org.opensearch.index.engine.exec.DataFormat;
+import org.opensearch.index.engine.exec.FileInfos;
+import org.opensearch.index.engine.exec.IndexingExecutionEngine;
+import org.opensearch.index.engine.exec.RefreshInput;
+import org.opensearch.index.engine.exec.RefreshResult;
+import org.opensearch.index.engine.exec.Writer;
+import org.opensearch.index.engine.exec.coord.Any;
+import org.opensearch.index.engine.exec.coord.CompositeDataFormatWriterPool;
+import org.opensearch.index.engine.exec.text.TextEngine;
+import org.opensearch.index.mapper.MapperService;
+import org.opensearch.index.shard.ShardPath;
+import org.opensearch.plugins.DataSourcePlugin;
+import org.opensearch.plugins.PluginsService;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.ConcurrentLinkedQueue;
+
+public class CompositeIndexingExecutionEngine implements IndexingExecutionEngine<Any> {
+
+    private final CompositeDataFormatWriterPool dataFormatWriterPool;
+    private DataFormat dataFormat;
+    private final AtomicLong writerGeneration;
+    private final List<IndexingExecutionEngine<?>> delegates = new ArrayList<>();
+
+    public CompositeIndexingExecutionEngine(MapperService mapperService, PluginsService pluginsService, Any dataformat, ShardPath shardPath, long initialWriterGeneration) {
+        this.dataFormat = dataformat;
+        this.writerGeneration = new AtomicLong(initialWriterGeneration);
+        try {
+            for (DataFormat dataFormat : dataformat.getDataFormats()) {
+                DataSourcePlugin plugin = pluginsService.filterPlugins(DataSourcePlugin.class).stream().filter(curr -> curr.getDataFormat().equals(dataFormat)).findFirst().orElseThrow(() -> new IllegalArgumentException("dataformat [" + dataFormat + "] is not registered."));
+                delegates.add(plugin.indexingEngine(mapperService, shardPath));
+            }
+        } catch (NullPointerException e) {
+            // my own testing
+            delegates.add(new TextEngine());
+        }
+        this.dataFormatWriterPool = new CompositeDataFormatWriterPool(() -> new CompositeDataFormatWriter(this, writerGeneration.getAndIncrement()), ConcurrentLinkedQueue::new, Runtime.getRuntime().availableProcessors());
+    }
+
+    public CompositeIndexingExecutionEngine(MapperService mapperService, PluginsService pluginsService, ShardPath shardPath, long initialWriterGeneration) {
+        this.writerGeneration = new AtomicLong(initialWriterGeneration);
+        try {
+            DataSourcePlugin plugin = pluginsService.filterPlugins(DataSourcePlugin.class).stream().findAny().orElseThrow(() -> new IllegalArgumentException("dataformat [" + DataFormat.TEXT + "] is not registered."));
+            delegates.add(plugin.indexingEngine(mapperService, shardPath));
+        } catch (NullPointerException e) {
+            delegates.add(new TextEngine());
+        }
+        this.dataFormatWriterPool = new CompositeDataFormatWriterPool(() -> new CompositeDataFormatWriter(this, writerGeneration.getAndIncrement()), ConcurrentLinkedQueue::new, Runtime.getRuntime().availableProcessors());
+    }
+
+    @Override
+    public DataFormat getDataFormat() {
+        return dataFormat;
+    }
+
+    @Override
+    public List<String> supportedFieldTypes() {
+        throw new UnsupportedOperationException();
+    }
+
+    @Override
+    public Writer<CompositeDataFormatWriter.CompositeDocumentInput> createWriter(long generation) throws IOException {
+        throw new UnsupportedOperationException();
+    }
+
+    public Writer<CompositeDataFormatWriter.CompositeDocumentInput> createCompositeWriter() {
+        return dataFormatWriterPool.getAndLock();
+    }
+
+    @Override
+    public RefreshResult refresh(RefreshInput ignore) throws IOException {
+        RefreshResult finalResult = new RefreshResult();
+        Map<DataFormat, RefreshInput> refreshInputs = new HashMap<>();
+        try {
+            List<CompositeDataFormatWriter> dataFormatWriters = dataFormatWriterPool.checkoutAll();
+
+            // flush to disk
+            for (CompositeDataFormatWriter dataFormatWriter : dataFormatWriters) {
+                FileInfos fileInfos = dataFormatWriter.flush(null);
+                fileInfos.getWriterFilesMap().forEach((key, value) -> refreshInputs.computeIfAbsent(key, dataFormat -> new RefreshInput()).add(value));
+            }
+
+            if (refreshInputs.isEmpty()) {
+                return null;
+            }
+
+            // make indexing engines aware of everything
+            for (IndexingExecutionEngine<?> delegate : delegates) {
+                RefreshInput refreshInput = refreshInputs.get(delegate.getDataFormat());
+                if (refreshInput != null) {
+                    RefreshResult result = delegate.refresh(refreshInput);
+                    finalResult.add(delegate.getDataFormat(), result.getRefreshedFiles(delegate.getDataFormat()));
+                }
+            }
+
+            // provide a view to the upper layer
+            return finalResult;
+        } catch (IOException ex) {
+            throw new RuntimeException(ex);
+        }
+    }
+
+    public List<IndexingExecutionEngine<?>> getDelegates() {
+        return Collections.unmodifiableList(delegates);
+    }
+
+    public CompositeDataFormatWriterPool getDataFormatWriterPool() {
+        return dataFormatWriterPool;
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java
new file mode 100644
index 0000000000000..c55834ec337d1
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/Any.java
@@ -0,0 +1,50 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.coord;
+
+import org.opensearch.common.settings.Setting;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.index.engine.exec.DataFormat;
+
+import java.util.List;
+
+public class Any implements DataFormat {
+
+    private List<DataFormat> dataFormats;
+
+    public Any(List<DataFormat> dataFormats) {
+        this.dataFormats = dataFormats;
+    }
+
+    @Override
+    public Setting<Settings> dataFormatSettings() {
+        return null;
+    }
+
+    @Override
+    public Setting<Settings> clusterLeveldataFormatSettings() {
+        return null;
+    }
+
+    @Override
+    public String name() {
+        return "all";
+    }
+
+    public List<DataFormat> getDataFormats() {
+        return dataFormats;
+    }
+
+    @Override
+    public void configureStore() {
+        for (DataFormat dataFormat : dataFormats) {
+            dataFormat.configureStore();
+        }
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshot.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshot.java
new file mode 100644
index 0000000000000..680f325d84a69
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CatalogSnapshot.java
@@ -0,0 +1,91 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.coord;
+
+import org.opensearch.common.annotation.ExperimentalApi;
+import org.opensearch.common.util.concurrent.AbstractRefCounted;
+import org.opensearch.index.engine.exec.RefreshResult;
+import org.opensearch.index.engine.exec.WriterFileSet;
+
+import java.io.Serializable;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+
+@ExperimentalApi
+public class CatalogSnapshot extends AbstractRefCounted {
+
+    private final long id;
+    private final Map<String, Collection<WriterFileSet>> dfGroupedSearchableFiles;
+
+    public CatalogSnapshot(RefreshResult refreshResult, long id) {
+        super("catalog_snapshot");
+        this.id = id;
+        this.dfGroupedSearchableFiles = new HashMap<>();
+        refreshResult.getRefreshedFiles().forEach((dataFormat, writerFiles) -> dfGroupedSearchableFiles.put(dataFormat.name(), writerFiles));
+    }
+
+    public Collection<WriterFileSet> getSearchableFiles(String dataFormat) {
+        if (dfGroupedSearchableFiles.containsKey(dataFormat)) {
+            return dfGroupedSearchableFiles.get(dataFormat);
+        }
+        return Collections.emptyList();
+    }
+
+    public Collection<Segment> getSegments() {
+        Map<Long, Segment> segmentMap = new HashMap<>();
+        dfGroupedSearchableFiles.forEach((dataFormat, writerFileSets) -> writerFileSets.forEach(writerFileSet -> {
+            Segment segment = segmentMap.computeIfAbsent(writerFileSet.getWriterGeneration(), Segment::new);
+            segment.addSearchableFiles(dataFormat, writerFileSet);
+        }));
+        return Collections.unmodifiableCollection(segmentMap.values());
+    }
+
+    @Override
+    protected void closeInternal() {
+        // notify to file deleter, search, etc
+    }
+
+    public long getId() {
+        return id;
+    }
+
+    @Override
+    public String toString() {
+        return "CatalogSnapshot{" +
+            "id=" + id +
+            ", dfGroupedSearchableFiles=" + dfGroupedSearchableFiles +
+            '}';
+    }
+
+    public static class Segment implements Serializable {
+
+        private final long generation;
+        private final Map<String, WriterFileSet> dfGroupedSearchableFiles;
+
+        public Segment(long generation) {
+            this.dfGroupedSearchableFiles = new HashMap<>();
+            this.generation = generation;
+        }
+
+        public void addSearchableFiles(String dataFormat, WriterFileSet writerFileSetGroup) {
+            dfGroupedSearchableFiles.put(dataFormat, writerFileSetGroup);
+        }
+
+        public long getGeneration() {
+            return generation;
+        }
+
+        @Override
+        public String toString() {
+            return "Segment{" + "generation=" + generation + ", dfGroupedSearchableFiles=" + dfGroupedSearchableFiles + '}';
+        }
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeDataFormatWriterPool.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeDataFormatWriterPool.java
new file mode 100644
index 0000000000000..2934b4b4b50fc
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeDataFormatWriterPool.java
@@ -0,0 +1,127 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.coord;
+
+import org.apache.lucene.store.AlreadyClosedException;
+import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter;
+import org.opensearch.index.engine.exec.queue.LockableConcurrentQueue;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.IdentityHashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.Objects;
+import java.util.Queue;
+import java.util.Set;
+import java.util.function.Supplier;
+
+public class CompositeDataFormatWriterPool implements Iterable<CompositeDataFormatWriter>, Closeable {
+
+    private final Set<CompositeDataFormatWriter> writers;
+    private final LockableConcurrentQueue<CompositeDataFormatWriter> availableWriters;
+    private final Supplier<CompositeDataFormatWriter> writerSupplier;
+    private volatile boolean closed;
+
+    public CompositeDataFormatWriterPool(
+        Supplier<CompositeDataFormatWriter> writerSupplier,
+        Supplier<Queue<CompositeDataFormatWriter>> queueSupplier,
+        int concurrency
+    ) {
+        this.writers = Collections.newSetFromMap(new IdentityHashMap<>());
+        this.writerSupplier = writerSupplier;
+        this.availableWriters = new LockableConcurrentQueue<>(queueSupplier, concurrency);
+    }
+
+    /**
+     * This method is used by CompositeIndexingExecutionEngine to grab a writer from the pool to perform an indexing
+     * operation.
+     *
+     * @return a pooled CompositeDataFormatWriter if available, or a newly created instance if none are available
+     */
+    public CompositeDataFormatWriter getAndLock() {
+        ensureOpen();
+        CompositeDataFormatWriter compositeDataFormatWriter = availableWriters.lockAndPoll();
+        return Objects.requireNonNullElseGet(compositeDataFormatWriter, this::fetchWriter);
+    }
+
+    /**
+     * Create a new {@link CompositeDataFormatWriter} to be added to this pool.
+     *
+     * @return a new instance of {@link CompositeDataFormatWriter}
+     */
+    private synchronized CompositeDataFormatWriter fetchWriter() {
+        ensureOpen();
+        CompositeDataFormatWriter compositeDataFormatWriter = writerSupplier.get();
+        compositeDataFormatWriter.lock();
+        writers.add(compositeDataFormatWriter);
+        return compositeDataFormatWriter;
+    }
+
+    /**
+     * Release the given {@link CompositeDataFormatWriter} to this pool for reuse if it is currently managed by this
+     * pool.
+     *
+     * @param state {@link CompositeDataFormatWriter} to release to the pool.
+     */
+    public void releaseAndUnlock(CompositeDataFormatWriter state) {
+        assert
+            !state.isFlushPending() && !state.isAborted() :
+            "CompositeDataFormatWriter has pending flush: " + state.isFlushPending() + " aborted=" + state.isAborted();
+        assert isRegistered(state) : "CompositeDocumentWriterPool doesn't know about this CompositeDataFormatWriter";
+        availableWriters.addAndUnlock(state);
+    }
+
+    /**
+     * Lock and checkout all CompositeDataFormatWriters from the pool for flush.
+     *
+     * @return Unmodifiable list of all CompositeDataFormatWriters locked by current thread.
+     */
+    public synchronized List<CompositeDataFormatWriter> checkoutAll() {
+        List<CompositeDataFormatWriter> checkedOutWriters = new ArrayList<>();
+        for (CompositeDataFormatWriter compositeDataFormatWriter : this) {
+            compositeDataFormatWriter.lock();
+            if (isRegistered(compositeDataFormatWriter) && writers.remove(compositeDataFormatWriter)) {
+                availableWriters.remove(compositeDataFormatWriter);
+                checkedOutWriters.add(compositeDataFormatWriter);
+            } else {
+                compositeDataFormatWriter.unlock();
+            }
+        }
+        return Collections.unmodifiableList(checkedOutWriters);
+    }
+
+    /**
+     * Check if {@link CompositeDataFormatWriter} is part of this pool.
+     *
+     * @param perThread {@link CompositeDataFormatWriter} to validate.
+     * @return true if {@link CompositeDataFormatWriter} is part of this pool, false otherwise.
+     */
+    synchronized boolean isRegistered(CompositeDataFormatWriter perThread) {
+        return writers.contains(perThread);
+    }
+
+    private void ensureOpen() {
+        if (closed) {
+            throw new AlreadyClosedException("CompositeDocumentWriterPool is already closed");
+        }
+    }
+
+    @Override
+    public synchronized Iterator<CompositeDataFormatWriter> iterator() {
+        return List.copyOf(writers).iterator();
+    }
+
+    @Override
+    public void close() throws IOException {
+        this.closed = true;
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java
new file mode 100644
index 0000000000000..1329d7879d1d0
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/CompositeEngine.java
@@ -0,0 +1,304 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.coord;
+
+import org.apache.lucene.search.ReferenceManager;
+import org.opensearch.common.annotation.ExperimentalApi;
+import org.opensearch.index.engine.CatalogSnapshotAwareRefreshListener;
+import org.opensearch.index.engine.Engine;
+import org.opensearch.index.engine.EngineException;
+import org.opensearch.index.engine.SafeCommitInfo;
+import org.opensearch.index.engine.SearchExecEngine;
+import org.opensearch.index.engine.Segment;
+import org.opensearch.index.engine.exec.RefreshInput;
+import org.opensearch.index.engine.exec.RefreshResult;
+import org.opensearch.index.engine.exec.WriteResult;
+import org.opensearch.index.engine.exec.bridge.Indexer;
+import org.opensearch.index.engine.exec.commit.Committer;
+import org.opensearch.index.engine.exec.commit.LuceneCommitEngine;
+import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter;
+import org.opensearch.index.engine.exec.composite.CompositeIndexingExecutionEngine;
+import org.opensearch.index.mapper.KeywordFieldMapper;
+import org.opensearch.index.mapper.MapperService;
+import org.opensearch.index.shard.ShardPath;
+import org.opensearch.index.translog.Translog;
+import org.opensearch.index.translog.TranslogManager;
+import org.opensearch.plugins.PluginsService;
+import org.opensearch.plugins.SearchEnginePlugin;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+@ExperimentalApi
+public class CompositeEngine implements Indexer {
+
+    private final CompositeIndexingExecutionEngine engine;
+    private final Committer compositeEngineCommitter;
+    private final List<ReferenceManager.RefreshListener> refreshListeners = new ArrayList<>();
+    private CatalogSnapshot catalogSnapshot;
+    private final List<CatalogSnapshotAwareRefreshListener> catalogSnapshotAwareRefreshListeners = new ArrayList<>();
+    private final Map<org.opensearch.vectorized.execution.search.DataFormat, List<SearchExecEngine<?, ?, ?, ?>>> readEngines = new HashMap<>();
+
+    public CompositeEngine(MapperService mapperService, PluginsService pluginsService, ShardPath shardPath) throws IOException {
+        List<SearchEnginePlugin> searchEnginePlugins = pluginsService.filterPlugins(SearchEnginePlugin.class);
+        // How to bring the Dataformat here? Currently this means only Text and LuceneFormat can be used
+        this.engine = new CompositeIndexingExecutionEngine(mapperService, pluginsService, shardPath, 0);
+        Path committerPath = Files.createTempDirectory("lucene-committer-index");
+        this.compositeEngineCommitter = new LuceneCommitEngine(committerPath);
+
+        // Refresh here so that catalog snapshot gets initialized
+        // TODO : any better way to do this ?
+        refresh("start");
+        // TODO : how to extend this for Lucene ? where engine is a r/w engine
+        // Create read specific engines for each format which is associated with shard
+        for (SearchEnginePlugin searchEnginePlugin : searchEnginePlugins) {
+            for (org.opensearch.vectorized.execution.search.DataFormat dataFormat : searchEnginePlugin.getSupportedFormats()) {
+                List<SearchExecEngine<?, ?, ?, ?>> currentSearchEngines = readEngines.getOrDefault(dataFormat, new ArrayList<>());
+                SearchExecEngine<?, ?, ?, ?> newSearchEngine = searchEnginePlugin.createEngine(dataFormat,
+                        Collections.emptyList(),
+                        shardPath);
+
+                currentSearchEngines.add(newSearchEngine);
+                readEngines.put(dataFormat, currentSearchEngines);
+
+                // TODO : figure out how to do internal and external refresh listeners
+                // Maybe external refresh should be managed in opensearch core and plugins should always give
+                // internal refresh managers
+                // 60s as refresh interval -> ExternalReaderManager acquires a view every 60 seconds
+                // InternalReaderManager -> IndexingMemoryController , it keeps on refreshing internal maanger
+                //
+                if (newSearchEngine.getRefreshListener(Engine.SearcherScope.INTERNAL) != null) {
+                    catalogSnapshotAwareRefreshListeners.add(newSearchEngine.getRefreshListener(Engine.SearcherScope.INTERNAL));
+                }
+            }
+        }
+    }
+
+    public SearchExecEngine<?, ?, ?, ?> getReadEngine(org.opensearch.vectorized.execution.search.DataFormat dataFormat) {
+        return readEngines.getOrDefault(dataFormat, new ArrayList<>()).getFirst();
+    }
+
+    public SearchExecEngine<?, ?, ?, ?> getPrimaryReadEngine() {
+        // Return the first available ReadEngine as primary
+        return readEngines.values().stream()
+                .filter(list -> !list.isEmpty())
+                .findFirst()
+                .map(List::getFirst)
+                .orElse(null);
+    }
+
+    public CompositeDataFormatWriter.CompositeDocumentInput documentInput() throws IOException {
+        return engine.createCompositeWriter().newDocumentInput();
+    }
+
+    public Engine.IndexResult index(Engine.Index index) throws IOException {
+        WriteResult writeResult = index.documentInput.addToWriter();
+        // translog, checkpoint, other checks
+        return new Engine.IndexResult(writeResult.version(), writeResult.seqNo(), writeResult.term(), writeResult.success());
+    }
+
+    public synchronized void refresh(String source) throws EngineException {
+        refreshListeners.forEach(ref -> {
+            try {
+                ref.beforeRefresh();
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        });
+
+        long id = 0L;
+        if (catalogSnapshot != null) {
+            id = catalogSnapshot.getId();
+        }
+        CatalogSnapshot newCatSnap;
+        try {
+            RefreshResult refreshResult = engine.refresh(new RefreshInput());
+            if (refreshResult == null) {
+                return;
+            }
+            newCatSnap = new CatalogSnapshot(refreshResult, id + 1L);
+            System.out.println("CATALOG SNAPSHOT: " + newCatSnap);
+        } catch (IOException ex) {
+            throw new RuntimeException(ex);
+        }
+
+        newCatSnap.incRef();
+        if (catalogSnapshot != null) {
+            catalogSnapshot.decRef();
+        }
+        catalogSnapshot = newCatSnap;
+        compositeEngineCommitter.addLuceneIndexes(catalogSnapshot);
+
+        catalogSnapshotAwareRefreshListeners.forEach(ref -> {
+            try {
+                ref.afterRefresh(true, catalogSnapshot);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        });
+        refreshListeners.forEach(ref -> {
+            try {
+                ref.afterRefresh(true);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        });
+    }
+
+    public CatalogSnapshot catalogSnapshot() {
+        return catalogSnapshot;
+    }
+
+    // This should get wired into searcher acquireSnapshot for initializing reader context later
+    // this now becomes equivalent of the reader
+    // Each search side specific impl can decide on how to init specific reader instances using this pit snapshot provided by writers
+    public ReleasableRef<CatalogSnapshot> acquireSnapshot() {
+        catalogSnapshot.incRef(); // this should be package-private
+        return new ReleasableRef<CatalogSnapshot>(catalogSnapshot) {
+            @Override
+            public void close() throws Exception {
+                catalogSnapshot.decRef(); // this should be package-private
+            }
+        };
+    }
+
+    @ExperimentalApi
+    public static abstract class ReleasableRef<T> implements AutoCloseable {
+
+        private T t;
+
+        public ReleasableRef(T t) {
+            this.t = t;
+        }
+
+        public T getRef() {
+            return t;
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        CompositeEngine coordinator = new CompositeEngine(null, null, null);
+
+        for (int i = 0; i < 5; i++) {
+
+            // Ingestion into one generation
+            for (int k = 0; k < 10; k++) {
+                try (CompositeDataFormatWriter.CompositeDocumentInput doc = coordinator.documentInput()) {
+
+                    // Mapper part
+                    doc.addField(new KeywordFieldMapper.KeywordFieldType("f1"), k + "_v1");
+                    doc.addField(new KeywordFieldMapper.KeywordFieldType("f2"), k + "_v2");
+                    doc.addField(new KeywordFieldMapper.KeywordFieldType("f3"), k + "_v3");
+                    doc.addField(new KeywordFieldMapper.KeywordFieldType("f4"), k + "_v4");
+                    Engine.Index index = new Engine.Index(null, 1L, null);
+                    index.documentInput = doc;
+
+                    // applyIndexOperation part
+                    coordinator.index(index);
+                }
+            }
+
+            // Refresh until generation
+            coordinator.refresh("_manual_test");
+            System.out.println(coordinator.catalogSnapshot);
+        }
+    }
+
+    @Override
+    public Engine.DeleteResult delete(Engine.Delete delete) throws IOException {
+        return null;
+    }
+
+    @Override
+    public Engine.NoOpResult noOp(Engine.NoOp noOp) throws IOException {
+        return null;
+    }
+
+    @Override
+    public int countNumberOfHistoryOperations(String source, long fromSeqNo, long toSeqNumber) throws IOException {
+        return 0;
+    }
+
+    @Override
+    public boolean hasCompleteOperationHistory(String reason, long startingSeqNo) {
+        return false;
+    }
+
+    @Override
+    public long getIndexBufferRAMBytesUsed() {
+        return 0;
+    }
+
+    @Override
+    public List<Segment> segments(boolean verbose) {
+        return List.of();
+    }
+
+    @Override
+    public long getMaxSeenAutoIdTimestamp() {
+        return 0;
+    }
+
+    @Override
+    public void updateMaxUnsafeAutoIdTimestamp(long newTimestamp) {
+
+    }
+
+    @Override
+    public int fillSeqNoGaps(long primaryTerm) throws IOException {
+        return 0;
+    }
+
+    @Override
+    public void forceMerge(boolean flush, int maxNumSegments, boolean onlyExpungeDeletes, boolean upgrade, boolean upgradeOnlyAncientSegments, String forceMergeUUID) throws EngineException, IOException {
+
+    }
+
+    @Override
+    public void writeIndexingBuffer() throws EngineException {
+
+    }
+
+    @Override
+    public void flush(boolean force, boolean waitIfOngoing) throws EngineException {
+        compositeEngineCommitter.commit(catalogSnapshot);
+    }
+
+    @Override
+    public SafeCommitInfo getSafeCommitInfo() {
+        return null;
+    }
+
+    @Override
+    public TranslogManager translogManager() {
+        return null;
+    }
+
+    @Override
+    public Closeable acquireHistoryRetentionLock() {
+        return null;
+    }
+
+    @Override
+    public Translog.Snapshot newChangesSnapshot(String source, long fromSeqNo, long toSeqNo, boolean requiredFullRange, boolean accurateCount) throws IOException {
+        return null;
+    }
+
+    @Override
+    public String getHistoryUUID() {
+        return "";
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexingManager.java b/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexingManager.java
new file mode 100644
index 0000000000000..3e6a751caef2a
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/coord/IndexingManager.java
@@ -0,0 +1,137 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.coord;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.ArrayList;
+import java.util.List;
+import org.apache.lucene.search.ReferenceManager;
+import org.opensearch.index.engine.Engine;
+import org.opensearch.index.engine.EngineException;
+import org.opensearch.index.engine.exec.DataFormat;
+import org.opensearch.index.engine.exec.RefreshInput;
+import org.opensearch.index.engine.exec.WriteResult;
+import org.opensearch.index.engine.exec.commit.Committer;
+import org.opensearch.index.engine.exec.commit.LuceneCommitEngine;
+import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter;
+import org.opensearch.index.engine.exec.composite.CompositeIndexingExecutionEngine;
+import org.opensearch.index.mapper.KeywordFieldMapper;
+import org.opensearch.index.mapper.MapperService;
+
+public class IndexingManager {
+
+    private final CompositeIndexingExecutionEngine engine;
+    private final List<ReferenceManager.RefreshListener> refreshListeners = new ArrayList<>();
+    private final Committer committer;
+    private CatalogSnapshot catalogSnapshot;
+
+    public IndexingManager(Path indexPath, MapperService mapperService/*, EngineConfig engineConfig*/)
+        throws IOException {
+        this.engine = new CompositeIndexingExecutionEngine(mapperService, null, new Any(List.of(DataFormat.TEXT)), null,
+            0);
+        this.committer = new LuceneCommitEngine(indexPath);
+    }
+
+    public CompositeDataFormatWriter.CompositeDocumentInput documentInput() throws IOException {
+        return engine.createCompositeWriter().newDocumentInput();
+    }
+
+    public Engine.IndexResult index(Engine.Index index) throws Exception {
+        WriteResult writeResult = index.documentInput.addToWriter();
+        // translog, checkpoint, other checks
+        return new Engine.IndexResult(writeResult.version(), writeResult.seqNo(), writeResult.term(),
+            writeResult.success());
+    }
+
+    public synchronized void refresh(String source) throws EngineException, IOException {
+        refreshListeners.forEach(ref -> {
+            try {
+                ref.beforeRefresh();
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        });
+
+        long id = 0L;
+        if (catalogSnapshot != null) {
+            id = catalogSnapshot.getId();
+        }
+        CatalogSnapshot newCatSnap = new CatalogSnapshot(engine.refresh(new RefreshInput()), id + 1L);
+        newCatSnap.incRef();
+        if (catalogSnapshot != null) {
+            catalogSnapshot.decRef();
+        }
+        catalogSnapshot = newCatSnap;
+
+        refreshListeners.forEach(ref -> {
+            try {
+                ref.afterRefresh(true);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        });
+    }
+
+    // This should get wired into searcher acquireSnapshot for initializing reader context later
+    // this now becomes equivalent of the reader
+    // Each search side specific impl can decide on how to init specific reader instances using this pit snapshot provided by writers
+    public ReleasableRef<CatalogSnapshot> acquireSnapshot() {
+        catalogSnapshot.incRef(); // this should be package-private
+        return new ReleasableRef<>(catalogSnapshot) {
+            @Override
+            public void close() throws Exception {
+                catalogSnapshot.decRef(); // this should be package-private
+            }
+        };
+    }
+
+    public static abstract class ReleasableRef<T> implements AutoCloseable {
+
+        private final T t;
+
+        public ReleasableRef(T t) {
+            this.t = t;
+        }
+
+        public T getRef() {
+            return t;
+        }
+    }
+
+    public static void main(String[] args) throws Exception {
+        IndexingManager coordinator = new IndexingManager(
+            Path.of("/Users/shnkgo/Downloads/mustang/lucene-committer-index/"), null);
+
+        for (int i = 0; i < 5; i++) {
+
+            // Ingestion into one generation
+            for (int k = 0; k < 10; k++) {
+                try (CompositeDataFormatWriter.CompositeDocumentInput doc = coordinator.documentInput()) {
+
+                    // Mapper part
+                    doc.addField(new KeywordFieldMapper.KeywordFieldType("f1"), k + "_v1");
+                    doc.addField(new KeywordFieldMapper.KeywordFieldType("f2"), k + "_v2");
+                    doc.addField(new KeywordFieldMapper.KeywordFieldType("f3"), k + "_v3");
+                    doc.addField(new KeywordFieldMapper.KeywordFieldType("f4"), k + "_v4");
+                    Engine.Index index = new Engine.Index(null, 1L, null);
+                    index.documentInput = doc;
+
+                    // applyIndexOperation part
+                    coordinator.index(index);
+                }
+            }
+
+            // Refresh until generation
+            coordinator.refresh("_manual_test");
+            System.out.println(coordinator.catalogSnapshot);
+        }
+    }
+
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java
new file mode 100644
index 0000000000000..8afdc4f9901d4
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/lucene/LuceneIEEngine.java
@@ -0,0 +1,130 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.lucene;
+
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.util.BytesRef;
+import org.opensearch.index.engine.InternalEngine;
+import org.opensearch.index.engine.exec.DataFormat;
+import org.opensearch.index.engine.exec.DocumentInput;
+import org.opensearch.index.engine.exec.FileInfos;
+import org.opensearch.index.engine.exec.FlushIn;
+import org.opensearch.index.engine.exec.IndexingExecutionEngine;
+import org.opensearch.index.engine.exec.RefreshInput;
+import org.opensearch.index.engine.exec.RefreshResult;
+import org.opensearch.index.engine.exec.WriteResult;
+import org.opensearch.index.engine.exec.Writer;
+import org.opensearch.index.mapper.KeywordFieldMapper;
+import org.opensearch.index.mapper.MappedFieldType;
+import org.opensearch.index.mapper.ParseContext;
+
+import java.io.IOException;
+import java.util.List;
+
+public class LuceneIEEngine implements IndexingExecutionEngine<DataFormat.LuceneDataFormat> {
+
+    private final InternalEngine internalEngine;
+
+    public LuceneIEEngine(InternalEngine internalEngine) {
+        this.internalEngine = internalEngine;
+    }
+
+    @Override
+    public List<String> supportedFieldTypes() {
+        return List.of();
+    }
+
+
+    @Override
+    public Writer<? extends DocumentInput<?>> createWriter(long writerGeneration) throws IOException {
+        return new LuceneWriter(internalEngine.indexWriter, writerGeneration);
+    }
+
+    @Override
+    public RefreshResult refresh(RefreshInput refreshInput) throws IOException {
+        internalEngine.refresh(refreshInput.getClass().getName());
+        return null;
+    }
+
+    @Override
+    public DataFormat getDataFormat() {
+        return DataFormat.LUCENE;
+    }
+
+
+    public static class LuceneDocumentInput implements DocumentInput<ParseContext.Document> {
+
+        private final ParseContext.Document doc;
+        private final IndexWriter writer;
+
+        public LuceneDocumentInput(ParseContext.Document doc, IndexWriter w) {
+            this.doc = doc;
+            this.writer = w;
+        }
+
+        @Override
+        public void addField(MappedFieldType fieldType, Object value) {
+            doc.add(new KeywordFieldMapper.KeywordField("f1", new BytesRef("good_field"), null));
+        }
+
+        @Override
+        public ParseContext.Document getFinalInput() {
+            return doc;
+        }
+
+        @Override
+        public WriteResult addToWriter() throws IOException {
+            writer.addDocument(doc);
+            return null;
+        }
+
+        @Override
+        public void close() throws Exception {
+            // no-op, reuse writer
+        }
+    }
+
+    public static class LuceneWriter implements Writer<LuceneDocumentInput> {
+
+        private final IndexWriter writer;
+        private final long writerGeneration;
+
+        public LuceneWriter(IndexWriter writer, long writerGeneration) {
+            this.writer = writer;
+            this.writerGeneration = writerGeneration;
+        }
+
+        @Override
+        public WriteResult addDoc(LuceneDocumentInput d) throws IOException {
+            writer.addDocument(d.doc);
+            return null;
+        }
+
+        @Override
+        public FileInfos flush(FlushIn flushIn) throws IOException {
+            writer.flush();
+            return null;
+        }
+
+        @Override
+        public void sync() throws IOException {
+            writer.flush();
+        }
+
+        @Override
+        public void close() {
+            // no-op
+        }
+
+        @Override
+        public LuceneDocumentInput newDocumentInput() {
+            return new LuceneDocumentInput(new ParseContext.Document(), writer);
+        }
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/queue/ConcurrentQueue.java b/server/src/main/java/org/opensearch/index/engine/exec/queue/ConcurrentQueue.java
new file mode 100644
index 0000000000000..9b8b774063a87
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/queue/ConcurrentQueue.java
@@ -0,0 +1,123 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.queue;
+
+import java.util.Queue;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.function.Predicate;
+import java.util.function.Supplier;
+
+public final class ConcurrentQueue<T> {
+
+    static final int MIN_CONCURRENCY = 1;
+    static final int MAX_CONCURRENCY = 256;
+
+    private final int concurrency;
+    private final Lock[] locks;
+    private final Queue<T>[] queues;
+    private final Supplier<Queue<T>> queueSupplier;
+
+    ConcurrentQueue(Supplier<Queue<T>> queueSupplier, int concurrency) {
+        if (concurrency < MIN_CONCURRENCY || concurrency > MAX_CONCURRENCY) {
+            throw new IllegalArgumentException(
+                "concurrency must be in [" + MIN_CONCURRENCY + ", " + MAX_CONCURRENCY + "], got " + concurrency);
+        }
+        this.concurrency = concurrency;
+        this.queueSupplier = queueSupplier;
+        locks = new Lock[concurrency];
+        @SuppressWarnings({"rawtypes", "unchecked"}) Queue<T>[] queues = new Queue[concurrency];
+        this.queues = queues;
+        for (int i = 0; i < concurrency; ++i) {
+            locks[i] = new ReentrantLock();
+            queues[i] = queueSupplier.get();
+        }
+    }
+
+    void add(T entry) {
+        // Seed the order in which to look at entries based on the current thread. This helps distribute
+        // entries across queues and gives a bit of thread affinity between entries and threads, which
+        // can't hurt.
+        final int threadHash = Thread.currentThread().hashCode() & 0xFFFF;
+        for (int i = 0; i < concurrency; ++i) {
+            final int index = (threadHash + i) % concurrency;
+            final Lock lock = locks[index];
+            final Queue<T> queue = queues[index];
+            if (lock.tryLock()) {
+                try {
+                    queue.add(entry);
+                    return;
+                } finally {
+                    lock.unlock();
+                }
+            }
+        }
+        final int index = threadHash % concurrency;
+        final Lock lock = locks[index];
+        final Queue<T> queue = queues[index];
+        lock.lock();
+        try {
+            queue.add(entry);
+        } finally {
+            lock.unlock();
+        }
+    }
+
+    T poll(Predicate<T> predicate) {
+        final int threadHash = Thread.currentThread().hashCode() & 0xFFFF;
+        for (int i = 0; i < concurrency; ++i) {
+            final int index = (threadHash + i) % concurrency;
+            final Lock lock = locks[index];
+            final Queue<T> queue = queues[index];
+            if (lock.tryLock()) {
+                try {
+                    for (T entry : queue) {
+                        if (predicate.test(entry)) {
+                            return entry;
+                        }
+                    }
+                } finally {
+                    lock.unlock();
+                }
+            }
+        }
+        for (int i = 0; i < concurrency; ++i) {
+            final int index = (threadHash + i) % concurrency;
+            final Lock lock = locks[index];
+            final Queue<T> queue = queues[index];
+            lock.lock();
+            try {
+                for (T entry : queue) {
+                    if (predicate.test(entry)) {
+                        return entry;
+                    }
+                }
+            } finally {
+                lock.unlock();
+            }
+        }
+        return null;
+    }
+
+    boolean remove(T entry) {
+        for (int i = 0; i < concurrency; ++i) {
+            final Lock lock = locks[i];
+            final Queue<T> queue = queues[i];
+            lock.lock();
+            try {
+                if (queue.remove(entry)) {
+                    return true;
+                }
+            } finally {
+                lock.unlock();
+            }
+        }
+        return false;
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/queue/LockableConcurrentQueue.java b/server/src/main/java/org/opensearch/index/engine/exec/queue/LockableConcurrentQueue.java
new file mode 100644
index 0000000000000..e46ec5137308a
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/queue/LockableConcurrentQueue.java
@@ -0,0 +1,54 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.queue;
+
+import java.util.Queue;
+import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.locks.Lock;
+import java.util.function.Supplier;
+
+public final class LockableConcurrentQueue<T extends Lock> {
+
+    private final ConcurrentQueue<T> queue;
+    private final AtomicInteger addAndUnlockCounter = new AtomicInteger();
+
+    public LockableConcurrentQueue(Supplier<Queue<T>> queueSupplier, int concurrency) {
+        this.queue = new ConcurrentQueue<>(queueSupplier, concurrency);
+    }
+
+    /**
+     * Lock an entry, and poll it from the queue, in that order. If no entry can be found and locked,
+     * {@code null} is returned.
+     */
+    public T lockAndPoll() {
+        int addAndUnlockCount;
+        do {
+            addAndUnlockCount = addAndUnlockCounter.get();
+            T entry = queue.poll(Lock::tryLock);
+            if (entry != null) {
+                return entry;
+            }
+            // If an entry has been added to the queue in the meantime, try again.
+        } while (addAndUnlockCount != addAndUnlockCounter.get());
+
+        return null;
+    }
+
+    /** Remove an entry from the queue. */
+    public boolean remove(T entry) {
+        return queue.remove(entry);
+    }
+
+    /** Add an entry to the queue and unlock it, in that order. */
+    public void addAndUnlock(T entry) {
+        queue.add(entry);
+        entry.unlock();
+        addAndUnlockCounter.incrementAndGet();
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java b/server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java
new file mode 100644
index 0000000000000..b19a6c893cc11
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/text/TextDF.java
@@ -0,0 +1,36 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.text;
+
+import org.opensearch.common.settings.Setting;
+import org.opensearch.common.settings.Settings;
+import org.opensearch.index.engine.exec.DataFormat;
+
+
+public class TextDF implements DataFormat {
+    @Override
+    public Setting<Settings> dataFormatSettings() {
+        return null;
+    }
+
+    @Override
+    public Setting<Settings> clusterLeveldataFormatSettings() {
+        return null;
+    }
+
+    @Override
+    public String name() {
+        return "text";
+    }
+
+    @Override
+    public void configureStore() {
+
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java
new file mode 100644
index 0000000000000..7e7743c17f6e7
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/text/TextEngine.java
@@ -0,0 +1,147 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.text;
+
+import java.nio.file.Path;
+import org.opensearch.index.engine.exec.DataFormat;
+import org.opensearch.index.engine.exec.DocumentInput;
+import org.opensearch.index.engine.exec.FileInfos;
+import org.opensearch.index.engine.exec.WriterFileSet;
+import org.opensearch.index.engine.exec.FlushIn;
+import org.opensearch.index.engine.exec.IndexingExecutionEngine;
+import org.opensearch.index.engine.exec.RefreshInput;
+import org.opensearch.index.engine.exec.RefreshResult;
+import org.opensearch.index.engine.exec.WriteResult;
+import org.opensearch.index.engine.exec.Writer;
+import org.opensearch.index.mapper.MappedFieldType;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicLong;
+
+public class TextEngine implements IndexingExecutionEngine<TextDF> {
+
+    private final AtomicLong counter = new AtomicLong();
+    private final Set<TextWriter> openWriters = new HashSet<>();
+    private final List<WriterFileSet> openFiles = new ArrayList<>();
+
+    @Override
+    public List<String> supportedFieldTypes() {
+        return List.of();
+    }
+
+    @Override
+    public Writer<? extends DocumentInput<?>> createWriter(long writerGeneration) throws IOException {
+        return new TextWriter("text_file" + counter.getAndIncrement(), this, writerGeneration);
+    }
+
+    @Override
+    public DataFormat getDataFormat() {
+        return DataFormat.TEXT;
+    }
+
+    @Override
+    public RefreshResult refresh(RefreshInput refreshInput) throws IOException {
+        openFiles.addAll(refreshInput.getWriterFiles());
+        RefreshResult refreshResult = new RefreshResult();
+        refreshResult.add(DataFormat.TEXT, openFiles);
+        return refreshResult;
+    }
+
+    public static class TextInput implements DocumentInput<String> {
+
+        private final StringBuilder sb = new StringBuilder();
+        private final TextWriter writer;
+
+        public TextInput(TextWriter writer) {
+            this.writer = writer;
+        }
+
+        @Override
+        public void addField(MappedFieldType fieldType, Object value) {
+            sb.append(fieldType.name()).append("=").append(value).append(";");
+        }
+
+        @Override
+        public String getFinalInput() {
+            return sb.append("\n").toString();
+        }
+
+        @Override
+        public WriteResult addToWriter() throws IOException {
+            return writer.addDoc(this);
+        }
+
+        @Override
+        public void close() throws Exception {
+            //no op
+        }
+    }
+
+    public static class TextWriter implements Writer<TextInput> {
+
+        private final StringBuilder sb = new StringBuilder();
+        private final File currentFile;
+        private final AtomicBoolean flushed = new AtomicBoolean(false);
+        private final Runnable onClose;
+        private final long writerGeneration;
+
+        public TextWriter(String currentFile, TextEngine engine, long writerGeneration) throws IOException {
+            this.currentFile = new File("/Users/shnkgo/mustang" + currentFile);
+            this.currentFile.createNewFile();
+            this.writerGeneration = writerGeneration;
+            boolean canWrite = this.currentFile.setWritable(true);
+            if (!canWrite) {
+                throw new IllegalStateException("Cannot write to file [" + currentFile + "]");
+            }
+            engine.openWriters.add(this);
+            onClose = () -> engine.openWriters.remove(this);
+        }
+
+        @Override
+        public WriteResult addDoc(TextInput d) throws IOException {
+            sb.append(d.getFinalInput());
+            return new WriteResult(true, null, 1, 1, 1);
+        }
+
+        @Override
+        public FileInfos flush(FlushIn flushIn) throws IOException {
+            try (FileWriter fw = new FileWriter(currentFile)) {
+                fw.write(sb.toString());
+            }
+            flushed.set(true);
+            FileInfos fileInfos = new FileInfos();
+            WriterFileSet writerFileSet = new WriterFileSet(currentFile.toPath().getParent(), writerGeneration);
+            writerFileSet.add(currentFile.getName());
+            fileInfos.putWriterFileSet(DataFormat.TEXT, writerFileSet);
+            return fileInfos;
+        }
+
+        @Override
+        public void sync() throws IOException {
+        }
+
+        @Override
+        public void close() {
+            onClose.run();
+        }
+
+        @Override
+        public TextInput newDocumentInput() {
+            return new TextInput(this);
+        }
+
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/engine/exec/util/SetOnce.java b/server/src/main/java/org/opensearch/index/engine/exec/util/SetOnce.java
new file mode 100644
index 0000000000000..189e49cef8458
--- /dev/null
+++ b/server/src/main/java/org/opensearch/index/engine/exec/util/SetOnce.java
@@ -0,0 +1,73 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.index.engine.exec.util;
+
+import java.util.concurrent.atomic.AtomicReference;
+
+public final class SetOnce<T> implements Cloneable {
+
+    /** Thrown when {@link SetOnce#set(Object)} is called more than once. */
+    public static final class AlreadySetException extends IllegalStateException {
+        public AlreadySetException() {
+            super("The object cannot be set twice!");
+        }
+    }
+
+    /** Holding object and marking that it was already set */
+    private static final class Wrapper<T> {
+        private T object;
+
+        private Wrapper(T object) {
+            this.object = object;
+        }
+    }
+
+    private final AtomicReference<Wrapper<T>> set;
+
+    /**
+     * A default constructor which does not set the internal object, and allows setting it by calling
+     * {@link #set(Object)}.
+     */
+    public SetOnce() {
+        set = new AtomicReference<>();
+    }
+
+    /**
+     * Creates a new instance with the internal object set to the given object. Note that any calls to
+     * {@link #set(Object)} afterwards will result in {@link AlreadySetException}
+     *
+     * @throws AlreadySetException if called more than once
+     * @see #set(Object)
+     */
+    public SetOnce(T obj) {
+        set = new AtomicReference<>(new Wrapper<>(obj));
+    }
+
+    /** Sets the given object. If the object has already been set, an exception is thrown. */
+    public final void set(T obj) {
+        if (!trySet(obj)) {
+            throw new AlreadySetException();
+        }
+    }
+
+    /**
+     * Sets the given object if none was set before.
+     *
+     * @return true if object was set successfully, false otherwise
+     */
+    public final boolean trySet(T obj) {
+        return set.compareAndSet(null, new Wrapper<>(obj));
+    }
+
+    /** Returns the object set by {@link #set(Object)}. */
+    public final T get() {
+        Wrapper<T> wrapper = set.get();
+        return wrapper == null ? null : wrapper.object;
+    }
+}
diff --git a/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java
index ea4cff42ca905..8a9e8e3d9f517 100644
--- a/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/BooleanFieldMapper.java
@@ -389,17 +389,19 @@ protected void parseCreateField(ParseContext context) throws IOException {
         if (value == null) {
             return;
         }
-        if (indexed) {
-            context.doc().add(new Field(fieldType().name(), value ? "T" : "F", Defaults.FIELD_TYPE));
-        }
-        if (stored) {
-            context.doc().add(new StoredField(fieldType().name(), value ? "T" : "F"));
-        }
-        if (hasDocValues) {
-            context.doc().add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0));
-        } else {
-            createFieldNamesField(context);
-        }
+
+        context.compositeDocumentInput().addField(fieldType(), value);
+//        if (indexed) {
+//            context.doc().add(new Field(fieldType().name(), value ? "T" : "F", Defaults.FIELD_TYPE));
+//        }
+//        if (stored) {
+//            context.doc().add(new StoredField(fieldType().name(), value ? "T" : "F"));
+//        }
+//        if (hasDocValues) {
+//            context.doc().add(new SortedNumericDocValuesField(fieldType().name(), value ? 1 : 0));
+//        } else {
+//            createFieldNamesField(context);
+//        }
     }
 
     @Override
@@ -430,7 +432,7 @@ protected void canDeriveSourceInternal() {
      *    2. When using stored field, for multi value field order would be preserved
      */
     @Override
-    protected DerivedFieldGenerator derivedFieldGenerator() {
+    public DerivedFieldGenerator derivedFieldGenerator() {
         return new DerivedFieldGenerator(mappedFieldType, new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()) {
             @Override
             public Object convert(Object value) {
diff --git a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java
index 270a4606b11c6..d3d3784f1295c 100644
--- a/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/DateFieldMapper.java
@@ -247,7 +247,7 @@ protected void canDeriveSourceInternal() {
      *        "format"
      */
     @Override
-    protected DerivedFieldGenerator derivedFieldGenerator() {
+    public DerivedFieldGenerator derivedFieldGenerator() {
         return new DerivedFieldGenerator(mappedFieldType, new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()) {
             @Override
             public Object convert(Object value) {
@@ -842,21 +842,23 @@ protected void parseCreateField(ParseContext context) throws IOException {
             }
         }
 
-        if (indexed) {
-            context.doc().add(new LongPoint(fieldType().name(), timestamp));
-        }
-        if (hasDocValues) {
-            if (skiplist) {
-                context.doc().add(SortedNumericDocValuesField.indexedField(fieldType().name(), timestamp));
-            } else {
-                context.doc().add(new SortedNumericDocValuesField(fieldType().name(), timestamp));
-            }
-        } else if (store || indexed) {
-            createFieldNamesField(context);
-        }
-        if (store) {
-            context.doc().add(new StoredField(fieldType().name(), timestamp));
-        }
+        context.compositeDocumentInput().addField(fieldType(), timestamp);
+
+//        if (indexed) {
+//            context.doc().add(new LongPoint(fieldType().name(), timestamp));
+//        }
+//        if (hasDocValues) {
+//            if (skiplist) {
+//                context.doc().add(SortedNumericDocValuesField.indexedField(fieldType().name(), timestamp));
+//            } else {
+//                context.doc().add(new SortedNumericDocValuesField(fieldType().name(), timestamp));
+//            }
+//        } else if (store || indexed) {
+//            createFieldNamesField(context);
+//        }
+//        if (store) {
+//            context.doc().add(new StoredField(fieldType().name(), timestamp));
+//        }
     }
 
     public Long getNullValue() {
diff --git a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldGenerator.java b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldGenerator.java
index 383bd25dc7d0c..9f6de67843932 100644
--- a/server/src/main/java/org/opensearch/index/mapper/DerivedFieldGenerator.java
+++ b/server/src/main/java/org/opensearch/index/mapper/DerivedFieldGenerator.java
@@ -12,6 +12,7 @@
 import org.opensearch.core.xcontent.XContentBuilder;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.Objects;
 
 /**
@@ -58,4 +59,13 @@ public FieldValueType getDerivedFieldPreference() {
     public void generate(XContentBuilder builder, LeafReader reader, int docId) throws IOException {
         fieldValueFetcher.write(builder, fieldValueFetcher.fetch(reader, docId));
     }
+
+    /**
+     * Generate the derived field value based on the preference of derived field and field value type
+     * @param builder - builder to store the derived source filed
+     * @param values - values for which we want to generate the source
+     */
+    public void generate(XContentBuilder builder, List<Object> values) throws IOException {
+        fieldValueFetcher.write(builder, values);
+    }
 }
diff --git a/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java b/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java
index cb7e08f062d6d..cd520eb5eb1e2 100644
--- a/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/DocumentMapper.java
@@ -51,6 +51,7 @@
 import org.opensearch.index.IndexSettings;
 import org.opensearch.index.IndexSortConfig;
 import org.opensearch.index.analysis.IndexAnalyzers;
+import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter;
 import org.opensearch.index.mapper.MapperService.MergeReason;
 import org.opensearch.index.mapper.MetadataFieldMapper.TypeParser;
 import org.opensearch.index.query.NestedQueryBuilder;
@@ -253,6 +254,10 @@ public ParsedDocument parse(SourceToParse source) throws MapperParsingException
         return documentParser.parseDocument(source, mapping.metadataMappers);
     }
 
+    public ParsedDocument parse(SourceToParse source, CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput) throws MapperParsingException {
+        return documentParser.parseDocument(source, mapping.metadataMappers, compositeDocumentInput);
+    }
+
     public ParsedDocument createDeleteTombstoneDoc(String index, String id) throws MapperParsingException {
         final SourceToParse emptySource = new SourceToParse(index, id, new BytesArray("{}"), MediaTypeRegistry.JSON);
         return documentParser.parseDocument(emptySource, deleteTombstoneMetadataFieldMappers).toTombstone();
diff --git a/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java b/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java
index 213fb48595b8b..b81b3dfde7951 100644
--- a/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java
+++ b/server/src/main/java/org/opensearch/index/mapper/DocumentParser.java
@@ -46,6 +46,7 @@
 import org.opensearch.core.xcontent.MediaType;
 import org.opensearch.core.xcontent.XContentParser;
 import org.opensearch.index.IndexSettings;
+import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter;
 import org.opensearch.index.mapper.DynamicTemplate.XContentFieldType;
 
 import java.io.IOException;
@@ -76,6 +77,10 @@ final class DocumentParser {
     }
 
     ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadataFieldsMappers) throws MapperParsingException {
+        return parseDocument(source, metadataFieldsMappers, null);
+    }
+
+    ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadataFieldsMappers, CompositeDataFormatWriter.CompositeDocumentInput documentInput) throws MapperParsingException {
         final Mapping mapping = docMapper.mapping();
         final ParseContext.InternalParseContext context;
         final MediaType mediaType = source.getMediaType();
@@ -88,7 +93,7 @@ ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadat
                 mediaType
             )
         ) {
-            context = new ParseContext.InternalParseContext(indexSettings, docMapperParser, docMapper, source, parser);
+            context = new ParseContext.InternalParseContext(indexSettings, docMapperParser, docMapper, source, parser, documentInput);
             validateStart(parser);
             internalParseDocument(mapping, metadataFieldsMappers, context, parser);
             validateEnd(parser);
@@ -102,7 +107,7 @@ ParsedDocument parseDocument(SourceToParse source, MetadataFieldMapper[] metadat
 
         context.postParse();
 
-        return parsedDocument(source, context, createDynamicUpdate(mapping, docMapper, context.getDynamicMappers()));
+        return parsedDocument(source, context, createDynamicUpdate(mapping, docMapper, context.getDynamicMappers()), documentInput);
     }
 
     private static boolean containsDisabledObjectMapper(ObjectMapper objectMapper, String[] subfields) {
@@ -176,7 +181,7 @@ private static boolean isEmptyDoc(Mapping mapping, XContentParser parser) throws
         return false;
     }
 
-    private static ParsedDocument parsedDocument(SourceToParse source, ParseContext.InternalParseContext context, Mapping update) {
+    private static ParsedDocument parsedDocument(SourceToParse source, ParseContext.InternalParseContext context, Mapping update, CompositeDataFormatWriter.CompositeDocumentInput documentInput) {
         return new ParsedDocument(
             context.version(),
             context.seqID(),
@@ -185,7 +190,8 @@ private static ParsedDocument parsedDocument(SourceToParse source, ParseContext.
             context.docs(),
             context.sourceToParse().source(),
             context.sourceToParse().getMediaType(),
-            update
+            update,
+            documentInput
         );
     }
 
diff --git a/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java
index aaa2c9c029974..fee2194e14976 100644
--- a/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/FieldMapper.java
@@ -600,7 +600,7 @@ protected Explicit<Boolean> ignoreMalformed() {
      * Method to create derived source generator for this field mapper, it is illegal to enable the
      * derived source feature and not implement this method for a field mapper
      */
-    protected DerivedFieldGenerator derivedFieldGenerator() {
+    public DerivedFieldGenerator derivedFieldGenerator() {
         return null;
     }
 
diff --git a/server/src/main/java/org/opensearch/index/mapper/GeoPointFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/GeoPointFieldMapper.java
index 2910bd2856d2f..89844e14a351d 100644
--- a/server/src/main/java/org/opensearch/index/mapper/GeoPointFieldMapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/GeoPointFieldMapper.java
@@ -219,7 +219,7 @@ protected void canDeriveSourceInternal() {
      *    4. When using stored field, order and duplicate values would be preserved
      */
     @Override
-    protected DerivedFieldGenerator derivedFieldGenerator() {
+    public DerivedFieldGenerator derivedFieldGenerator() {
         return new DerivedFieldGenerator(mappedFieldType, new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()) {
             @Override
             public Object convert(Object value) {
diff --git a/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java
index b2e8f75a4f444..041da68ea0cdf 100644
--- a/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/IpFieldMapper.java
@@ -199,7 +199,7 @@ protected void canDeriveSourceInternal() {
      *    2. When using stored field, order and duplicate values would be preserved
      */
     @Override
-    protected DerivedFieldGenerator derivedFieldGenerator() {
+    public DerivedFieldGenerator derivedFieldGenerator() {
         return new DerivedFieldGenerator(
             mappedFieldType,
             new SortedSetDocValuesFetcher(mappedFieldType, simpleName()),
diff --git a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java
index 7ace516459763..7897d232519a3 100644
--- a/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/KeywordFieldMapper.java
@@ -294,7 +294,7 @@ protected void canDeriveSourceInternal() {
      *    2. When using stored field, order and duplicate values would be preserved
      */
     @Override
-    protected DerivedFieldGenerator derivedFieldGenerator() {
+    public DerivedFieldGenerator derivedFieldGenerator() {
         return new DerivedFieldGenerator(
             mappedFieldType,
             new SortedSetDocValuesFetcher(mappedFieldType, simpleName()),
@@ -862,20 +862,22 @@ protected void parseCreateField(ParseContext context) throws IOException {
             value = normalizeValue(normalizer, name(), value);
         }
 
-        // convert to utf8 only once before feeding postings/dv/stored fields
-        final BytesRef binaryValue = new BytesRef(value);
-        if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) {
-            Field field = new KeywordField(fieldType().name(), binaryValue, fieldType);
-            context.doc().add(field);
-
-            if (fieldType().hasDocValues() == false && fieldType.omitNorms()) {
-                createFieldNamesField(context);
-            }
-        }
+        context.compositeDocumentInput().addField(fieldType(), value);
 
-        if (fieldType().hasDocValues()) {
-            context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue));
-        }
+        // convert to utf8 only once before feeding postings/dv/stored fields
+//        final BytesRef binaryValue = new BytesRef(value);
+//        if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) {
+//            Field field = new KeywordField(fieldType().name(), binaryValue, fieldType);
+//            context.doc().add(field);
+//
+//            if (fieldType().hasDocValues() == false && fieldType.omitNorms()) {
+//                createFieldNamesField(context);
+//            }
+//        }
+//
+//        if (fieldType().hasDocValues()) {
+//            context.doc().add(new SortedSetDocValuesField(fieldType().name(), binaryValue));
+//        }
     }
 
     static String normalizeValue(NamedAnalyzer normalizer, String field, String value) throws IOException {
diff --git a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java
index a3ea6b5764913..751b56cec6248 100644
--- a/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java
+++ b/server/src/main/java/org/opensearch/index/mapper/MappedFieldType.java
@@ -87,6 +87,7 @@ public abstract class MappedFieldType {
     private final boolean docValues;
     private final boolean isIndexed;
     private final boolean isStored;
+    private final boolean isColumnar;
     private final TextSearchInfo textSearchInfo;
     private final Map<String, String> meta;
     private float boost;
@@ -101,6 +102,8 @@ public MappedFieldType(
         TextSearchInfo textSearchInfo,
         Map<String, String> meta
     ) {
+        // TODO: take the value from user input
+        this.isColumnar = true;
         this.boost = 1.0f;
         this.name = Objects.requireNonNull(name);
         this.isIndexed = isIndexed;
@@ -185,6 +188,13 @@ public boolean isStored() {
         return isStored;
     }
 
+    /**
+     * Returns true if the field is columnar.
+     */
+    public boolean isColumnar() {
+        return isColumnar;
+    }
+
     /**
      * If the field supports using the indexed data to speed up operations related to ordering of data, such as sorting or aggs, return
      * a function for doing that.  If it is unsupported for this field type, there is no need to override this method.
diff --git a/server/src/main/java/org/opensearch/index/mapper/Mapper.java b/server/src/main/java/org/opensearch/index/mapper/Mapper.java
index 3b9024162656f..d6f5bdcbd9af2 100644
--- a/server/src/main/java/org/opensearch/index/mapper/Mapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/Mapper.java
@@ -319,4 +319,8 @@ public void canDeriveSource() {
     public void deriveSource(XContentBuilder builder, LeafReader leafReader, int docId) throws IOException {
         throw new UnsupportedOperationException("Derived source field is not supported for [" + name() + "] field");
     }
+
+    public DerivedFieldGenerator derivedFieldGenerator() throws IOException {
+        throw new UnsupportedOperationException("Converting [" + name() + "] is not supported for [" + name() + "] field");
+    }
 }
diff --git a/server/src/main/java/org/opensearch/index/mapper/MapperService.java b/server/src/main/java/org/opensearch/index/mapper/MapperService.java
index b0acdceeff9ce..3c7d9374fa257 100644
--- a/server/src/main/java/org/opensearch/index/mapper/MapperService.java
+++ b/server/src/main/java/org/opensearch/index/mapper/MapperService.java
@@ -141,7 +141,7 @@ public enum MergeReason {
     );
     public static final Setting<Long> INDEX_MAPPING_TOTAL_FIELDS_LIMIT_SETTING = Setting.longSetting(
         "index.mapping.total_fields.limit",
-        1000L,
+        10000L,
         0,
         Property.Dynamic,
         Property.IndexScope
diff --git a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java
index a04f8888a2347..ba2a215a8ad41 100644
--- a/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/NumberFieldMapper.java
@@ -209,10 +209,21 @@ public boolean isDataCubeMetricSupported() {
      *       compared to stored field(stored as float)
      */
     @Override
-    protected DerivedFieldGenerator derivedFieldGenerator() {
+    public DerivedFieldGenerator derivedFieldGenerator() {
         return new DerivedFieldGenerator(mappedFieldType, new SortedNumericDocValuesFetcher(mappedFieldType, simpleName()) {
             @Override
             public Object convert(Object value) {
+                if(value instanceof Integer) {
+                    Integer val = (Integer) value;
+
+                    return switch (type) {
+                        case HALF_FLOAT -> HalfFloatPoint.sortableShortToHalfFloat(val.shortValue());
+                        case FLOAT -> NumericUtils.sortableIntToFloat(val);
+                        case DOUBLE -> NumericUtils.sortableLongToDouble(val);
+                        case BYTE, SHORT, INTEGER, LONG -> val;
+                        case UNSIGNED_LONG -> Numbers.toUnsignedBigInteger(val);
+                    };
+                }
                 Long val = (Long) value;
                 if (val == null) {
                     return null;
@@ -2171,7 +2182,9 @@ protected void parseCreateField(ParseContext context) throws IOException {
             numericValue = fieldType().type.parse(value, coerce.value());
         }
 
-        context.doc().addAll(fieldType().type.createFields(fieldType().name(), numericValue, indexed, hasDocValues, skiplist, stored));
+        context.compositeDocumentInput().addField(fieldType(), numericValue);
+
+//        context.doc().addAll(fieldType().type.createFields(fieldType().name(), numericValue, indexed, hasDocValues, skiplist, stored));
 
         if (hasDocValues == false && (stored || indexed)) {
             createFieldNamesField(context);
diff --git a/server/src/main/java/org/opensearch/index/mapper/ParseContext.java b/server/src/main/java/org/opensearch/index/mapper/ParseContext.java
index 5d382ff28bcf9..5ef7e892a7ce5 100644
--- a/server/src/main/java/org/opensearch/index/mapper/ParseContext.java
+++ b/server/src/main/java/org/opensearch/index/mapper/ParseContext.java
@@ -39,6 +39,7 @@
 import org.opensearch.common.annotation.PublicApi;
 import org.opensearch.core.xcontent.XContentParser;
 import org.opensearch.index.IndexSettings;
+import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter;
 
 import java.util.ArrayList;
 import java.util.Collection;
@@ -242,6 +243,11 @@ public Document doc() {
             return in.doc();
         }
 
+        @Override
+        public CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput() {
+            return in.compositeDocumentInput();
+        }
+
         @Override
         protected void addDoc(Document doc) {
             in.addDoc(doc);
@@ -393,12 +399,25 @@ public static class InternalParseContext extends ParseContext {
 
         private final Set<String> ignoredFields = new HashSet<>();
 
+        private CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput;
+
         public InternalParseContext(
             IndexSettings indexSettings,
             DocumentMapperParser docMapperParser,
             DocumentMapper docMapper,
             SourceToParse source,
             XContentParser parser
+        ) {
+            this(indexSettings, docMapperParser, docMapper, source, parser, null);
+        }
+
+        public InternalParseContext(
+            IndexSettings indexSettings,
+            DocumentMapperParser docMapperParser,
+            DocumentMapper docMapper,
+            SourceToParse source,
+            XContentParser parser,
+            CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput
         ) {
             this.indexSettings = indexSettings;
             this.docMapper = docMapper;
@@ -417,6 +436,7 @@ public InternalParseContext(
             this.currentArrayDepth = 0L;
             this.maxAllowedFieldDepth = indexSettings.getMappingDepthLimit();
             this.maxAllowedArrayDepth = indexSettings.getMappingDepthLimit();
+            this.compositeDocumentInput = compositeDocumentInput;
         }
 
         @Override
@@ -458,6 +478,11 @@ public Document doc() {
             return this.document;
         }
 
+        @Override
+        public CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput() {
+            return compositeDocumentInput;
+        }
+
         @Override
         protected void addDoc(Document doc) {
             numNestedDocs++;
@@ -718,6 +743,7 @@ public boolean isWithinMultiFields() {
     public abstract Document rootDoc();
 
     public abstract Document doc();
+    public abstract CompositeDataFormatWriter.CompositeDocumentInput compositeDocumentInput();
 
     protected abstract void addDoc(Document doc);
 
diff --git a/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java b/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java
index 16e38980f8600..bcbf6a5fb38f3 100644
--- a/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java
+++ b/server/src/main/java/org/opensearch/index/mapper/ParsedDocument.java
@@ -37,6 +37,8 @@
 import org.opensearch.common.xcontent.XContentType;
 import org.opensearch.core.common.bytes.BytesReference;
 import org.opensearch.core.xcontent.MediaType;
+import org.opensearch.index.engine.exec.DocumentInput;
+import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter;
 import org.opensearch.index.mapper.MapperService.MergeReason;
 import org.opensearch.index.mapper.ParseContext.Document;
 
@@ -64,6 +66,12 @@ public class ParsedDocument {
 
     private Mapping dynamicMappingsUpdate;
 
+    private CompositeDataFormatWriter.CompositeDocumentInput documentInput;
+
+    public CompositeDataFormatWriter.CompositeDocumentInput getDocumentInput() {
+        return documentInput;
+    }
+
     public ParsedDocument(
         Field version,
         SeqNoFieldMapper.SequenceIDFields seqID,
@@ -73,6 +81,22 @@ public ParsedDocument(
         BytesReference source,
         MediaType mediaType,
         Mapping dynamicMappingsUpdate
+    ) {
+        this(
+            version, seqID, id, routing, documents, source, mediaType, dynamicMappingsUpdate, null
+        );
+    }
+
+    public ParsedDocument(
+        Field version,
+        SeqNoFieldMapper.SequenceIDFields seqID,
+        String id,
+        String routing,
+        List<Document> documents,
+        BytesReference source,
+        MediaType mediaType,
+        Mapping dynamicMappingsUpdate,
+        CompositeDataFormatWriter.CompositeDocumentInput documentInput
     ) {
         this.version = version;
         this.seqID = seqID;
@@ -82,6 +106,7 @@ public ParsedDocument(
         this.source = source;
         this.dynamicMappingsUpdate = dynamicMappingsUpdate;
         this.mediaType = mediaType;
+        this.documentInput = documentInput;
     }
 
     public String id() {
diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java
index bb726893b3d17..66107621f1049 100644
--- a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java
@@ -1238,7 +1238,7 @@ protected void canDeriveSourceInternal() {}
      * Derive source using stored field, which would always be present for derived source enabled index field
      */
     @Override
-    protected DerivedFieldGenerator derivedFieldGenerator() {
+    public DerivedFieldGenerator derivedFieldGenerator() {
         return new DerivedFieldGenerator(mappedFieldType, null, new StoredFieldFetcher(mappedFieldType, simpleName())) {
             @Override
             public FieldValueType getDerivedFieldPreference() {
diff --git a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java
index b10371f301a59..2c1e532542c63 100644
--- a/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java
+++ b/server/src/main/java/org/opensearch/index/mapper/WildcardFieldMapper.java
@@ -928,7 +928,7 @@ protected void canDeriveSourceInternal() {
      *    1. When using doc values, for multi value field, result would be deduplicated and in sorted order
      */
     @Override
-    protected DerivedFieldGenerator derivedFieldGenerator() {
+    public DerivedFieldGenerator derivedFieldGenerator() {
         return new DerivedFieldGenerator(mappedFieldType, new SortedSetDocValuesFetcher(mappedFieldType, simpleName()) {
             @Override
             public Object convert(Object value) {
diff --git a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java
index f2c278f04b021..e444bd8f858b0 100644
--- a/server/src/main/java/org/opensearch/index/query/QueryShardContext.java
+++ b/server/src/main/java/org/opensearch/index/query/QueryShardContext.java
@@ -570,6 +570,7 @@ public boolean indexSortedOnField(String field) {
         return indexSortConfig.hasPrimarySortOnField(field);
     }
 
+    // This converts the QB to query
     public ParsedQuery toQuery(QueryBuilder queryBuilder) {
         return toQuery(queryBuilder, q -> {
             Query query = q.toQuery(this);
@@ -580,6 +581,7 @@ public ParsedQuery toQuery(QueryBuilder queryBuilder) {
         });
     }
 
+    // This converts the QB to query
     private ParsedQuery toQuery(QueryBuilder queryBuilder, CheckedFunction<QueryBuilder, Query, IOException> filterOrQuery) {
         reset();
         try {
diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java
index 609a6290d36ce..1212ed617c93a 100644
--- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java
+++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java
@@ -81,6 +81,7 @@
 import org.opensearch.common.CheckedConsumer;
 import org.opensearch.common.CheckedFunction;
 import org.opensearch.common.CheckedRunnable;
+import org.opensearch.common.CheckedSupplier;
 import org.opensearch.common.Nullable;
 import org.opensearch.common.SetOnce;
 import org.opensearch.common.annotation.ExperimentalApi;
@@ -134,6 +135,7 @@
 import org.opensearch.index.engine.EngineConfigFactory;
 import org.opensearch.index.engine.EngineException;
 import org.opensearch.index.engine.EngineFactory;
+import org.opensearch.index.engine.EngineSearcherSupplier;
 import org.opensearch.index.engine.IngestionEngine;
 import org.opensearch.index.engine.MergedSegmentWarmerFactory;
 import org.opensearch.index.engine.NRTReplicationEngine;
@@ -142,6 +144,12 @@
 import org.opensearch.index.engine.SafeCommitInfo;
 import org.opensearch.index.engine.Segment;
 import org.opensearch.index.engine.SegmentsStats;
+import org.opensearch.index.engine.exec.bridge.CheckpointState;
+import org.opensearch.index.engine.exec.bridge.Indexer;
+import org.opensearch.index.engine.exec.bridge.IndexingThrottler;
+import org.opensearch.index.engine.exec.bridge.StatsHolder;
+import org.opensearch.index.engine.exec.composite.CompositeDataFormatWriter;
+import org.opensearch.index.engine.exec.coord.CompositeEngine;
 import org.opensearch.index.fielddata.FieldDataStats;
 import org.opensearch.index.fielddata.ShardFieldData;
 import org.opensearch.index.flush.FlushStats;
@@ -210,6 +218,7 @@
 import org.opensearch.indices.replication.checkpoint.ReplicationCheckpoint;
 import org.opensearch.indices.replication.checkpoint.SegmentReplicationCheckpointPublisher;
 import org.opensearch.indices.replication.common.ReplicationTimer;
+import org.opensearch.plugins.PluginsService;
 import org.opensearch.repositories.RepositoriesService;
 import org.opensearch.repositories.Repository;
 import org.opensearch.search.suggest.completion.CompletionStats;
@@ -389,7 +398,7 @@ Runnable getGlobalCheckpointSyncer() {
     private final MergedSegmentPublisher mergedSegmentPublisher;
     private final ReferencedSegmentsPublisher referencedSegmentsPublisher;
     private final Set<MergedSegmentCheckpoint> pendingMergedSegmentCheckpoints = Sets.newConcurrentHashSet();
-
+    private final CompositeEngine compositeEngine;
     @InternalApi
     public IndexShard(
         final ShardRouting shardRouting,
@@ -429,7 +438,8 @@ public IndexShard(
         final Object refreshMutex,
         final ClusterApplierService clusterApplierService,
         @Nullable final MergedSegmentPublisher mergedSegmentPublisher,
-        @Nullable final ReferencedSegmentsPublisher referencedSegmentsPublisher
+        @Nullable final ReferencedSegmentsPublisher referencedSegmentsPublisher,
+        PluginsService pluginsService
     ) throws IOException {
         super(shardRouting.shardId(), indexSettings);
         assert shardRouting.initializing();
@@ -448,7 +458,7 @@ public IndexShard(
         this.translogSyncProcessor = createTranslogSyncProcessor(
             logger,
             threadPool,
-            this::getEngine,
+            this::getIndexer,
             indexSettings.isAssignedOnRemoteNode(),
             () -> getRemoteTranslogUploadBufferInterval(remoteStoreSettings::getClusterRemoteTranslogBufferInterval)
         );
@@ -554,8 +564,12 @@ public boolean shouldCache(Query query) {
                 startRefreshTask();
             }
         }
+        this.compositeEngine = new CompositeEngine(mapperService, pluginsService, path);
     }
 
+    public CompositeEngine getIndexingExecutionCoordinator() {
+        return compositeEngine;
+    }
     /**
      * By default, UNASSIGNED_SEQ_NO is used as the initial global checkpoint for new shard initialization. Ingestion
      * source does not track sequence numbers explicitly and hence defaults to NO_OPS_PERFORMED for compatibility.
@@ -837,21 +851,21 @@ public void updateShardState(
                         assert getOperationPrimaryTerm() == newPrimaryTerm;
                         try {
                             if (indexSettings.isSegRepEnabledOrRemoteNode()) {
-                                // this Shard's engine was read only, we need to update its engine before restoring local history from xlog.
+                                // this Shard's indexer was read only, we need to update its indexer before restoring local history from xlog.
                                 assert newRouting.primary() && currentRouting.primary() == false;
                                 ReplicationTimer timer = new ReplicationTimer();
                                 timer.start();
                                 logger.debug(
-                                    "Resetting engine on promotion of shard [{}] to primary, startTime {}\n",
+                                    "Resetting indexer on promotion of shard [{}] to primary, startTime {}\n",
                                     shardId,
                                     timer.startTime()
                                 );
                                 resetEngineToGlobalCheckpoint();
                                 timer.stop();
-                                logger.info("Completed engine failover for shard [{}] in: {} ms", shardId, timer.time());
-                                // It is possible an engine can open with a SegmentInfos on a higher gen but the reader does not refresh to
+                                logger.info("Completed indexer failover for shard [{}] in: {} ms", shardId, timer.time());
+                                // It is possible an indexer can open with a SegmentInfos on a higher gen but the reader does not refresh to
                                 // trigger our refresh listener.
-                                // Force update the checkpoint post engine reset.
+                                // Force update the checkpoint post indexer reset.
                                 updateReplicationCheckpoint();
                             }
 
@@ -870,19 +884,20 @@ public void updateShardState(
                              * primary/replica re-sync completes successfully and we are now being promoted, we have to restore
                              * the reverted operations on this shard by replaying the translog to avoid losing acknowledged writes.
                              */
-                            final Engine engine = getEngine();
-                            engine.translogManager()
+                            final Indexer indexer = getIndexer();
+                            final CheckpointState checkpointState = getCheckpointState();
+                            indexer.translogManager()
                                 .restoreLocalHistoryFromTranslog(
-                                    engine.getProcessedLocalCheckpoint(),
-                                    (snapshot) -> runTranslogRecovery(engine, snapshot, Engine.Operation.Origin.LOCAL_RESET, () -> {})
+                                    checkpointState.getProcessedLocalCheckpoint(),
+                                    (snapshot) -> runTranslogRecovery(indexer, snapshot, Engine.Operation.Origin.LOCAL_RESET, () -> {})
                                 );
                             /* Rolling the translog generation is not strictly needed here (as we will never have collisions between
                              * sequence numbers in a translog generation in a new primary as it takes the last known sequence number
                              * as a starting point), but it simplifies reasoning about the relationship between primary terms and
                              * translog generations.
                              */
-                            engine.translogManager().rollTranslogGeneration();
-                            engine.fillSeqNoGaps(newPrimaryTerm);
+                            indexer.translogManager().rollTranslogGeneration();
+                            indexer.fillSeqNoGaps(newPrimaryTerm);
                             replicationTracker.updateLocalCheckpoint(currentRouting.allocationId().getId(), getLocalCheckpoint());
                             primaryReplicaSyncer.accept(this, new ActionListener<ResyncTask>() {
                                 @Override
@@ -1006,7 +1021,7 @@ public void relocated(
                 }
 
                 // Ensure all in-flight remote store translog upload drains, before we perform the performSegRep.
-                releasablesOnHandoffFailures.add(getEngine().translogManager().drainSync());
+                releasablesOnHandoffFailures.add(getIndexer().translogManager().drainSync());
 
                 // no shard operation permits are being held here, move state from started to relocated
                 assert indexShardOperationPermits.getActiveOperationsCount() == OPERATIONS_BLOCKED
@@ -1117,7 +1132,7 @@ public Engine.IndexResult applyIndexOperationOnPrimary(
     ) throws IOException {
         assert versionType.validateVersionForWrites(version);
         return applyIndexOperation(
-            getEngine(),
+            getIndexingExecutionCoordinator(),
             UNASSIGNED_SEQ_NO,
             getOperationPrimaryTerm(),
             version,
@@ -1128,7 +1143,8 @@ public Engine.IndexResult applyIndexOperationOnPrimary(
             isRetry,
             Engine.Operation.Origin.PRIMARY,
             sourceToParse,
-            null
+            null,
+            compositeEngine::documentInput
         );
     }
 
@@ -1142,7 +1158,7 @@ public Engine.IndexResult applyIndexOperationOnReplica(
         SourceToParse sourceToParse
     ) throws IOException {
         return applyIndexOperation(
-            getEngine(),
+            getIndexer(),
             seqNo,
             opPrimaryTerm,
             version,
@@ -1153,12 +1169,13 @@ public Engine.IndexResult applyIndexOperationOnReplica(
             isRetry,
             Engine.Operation.Origin.REPLICA,
             sourceToParse,
-            id
+            id,
+            null
         );
     }
 
     private Engine.IndexResult applyIndexOperation(
-        Engine engine,
+        Indexer engine,
         long seqNo,
         long opPrimaryTerm,
         long version,
@@ -1169,7 +1186,8 @@ private Engine.IndexResult applyIndexOperation(
         boolean isRetry,
         Engine.Operation.Origin origin,
         SourceToParse sourceToParse,
-        String id
+        String id,
+        CheckedSupplier<CompositeDataFormatWriter.CompositeDocumentInput, IOException> documentInputSupplier
     ) throws IOException {
 
         // For Segment Replication enabled replica shards we can be skip parsing the documents as we directly copy segments from primary
@@ -1189,7 +1207,7 @@ private Engine.IndexResult applyIndexOperation(
                 UNASSIGNED_SEQ_NO,
                 0
             );
-            return getEngine().index(index);
+            return getIndexer().index(index);
         }
         assert opPrimaryTerm <= getOperationPrimaryTerm() : "op term [ "
             + opPrimaryTerm
@@ -1198,7 +1216,7 @@ private Engine.IndexResult applyIndexOperation(
             + "]";
         ensureWriteAllowed(origin);
         Engine.Index operation;
-        try {
+        try (CompositeDataFormatWriter.CompositeDocumentInput documentInput = documentInputSupplier.get()) {
             operation = prepareIndex(
                 docMapper(),
                 sourceToParse,
@@ -1210,12 +1228,14 @@ private Engine.IndexResult applyIndexOperation(
                 autoGeneratedTimeStamp,
                 isRetry,
                 ifSeqNo,
-                ifPrimaryTerm
+                ifPrimaryTerm,
+                documentInput
             );
             Mapping update = operation.parsedDoc().dynamicMappingsUpdate();
             if (update != null) {
                 return new Engine.IndexResult(update);
             }
+            return index(engine, operation);
         } catch (Exception e) {
             // We treat any exception during parsing and or mapping update as a document level failure
             // with the exception side effects of closing the shard. Since we don't have the shard, we
@@ -1224,8 +1244,6 @@ private Engine.IndexResult applyIndexOperation(
             verifyNotClosed(e);
             return new Engine.IndexResult(e, version, opPrimaryTerm, seqNo);
         }
-
-        return index(engine, operation);
     }
 
     public static Engine.Index prepareIndex(
@@ -1239,10 +1257,11 @@ public static Engine.Index prepareIndex(
         long autoGeneratedIdTimestamp,
         boolean isRetry,
         long ifSeqNo,
-        long ifPrimaryTerm
+        long ifPrimaryTerm,
+        CompositeDataFormatWriter.CompositeDocumentInput documentInput
     ) {
         long startTime = System.nanoTime();
-        ParsedDocument doc = docMapper.getDocumentMapper().parse(source);
+        ParsedDocument doc = docMapper.getDocumentMapper().parse(source, documentInput);;
         if (docMapper.getMapping() != null) {
             doc.addDynamicMappingsUpdate(docMapper.getMapping());
         }
@@ -1263,7 +1282,7 @@ public static Engine.Index prepareIndex(
         );
     }
 
-    private Engine.IndexResult index(Engine engine, Engine.Index index) throws IOException {
+    private Engine.IndexResult index(Indexer engine, Engine.Index index) throws IOException {
         active.set(true);
         final Engine.IndexResult result;
         index = indexingOperationListeners.preIndex(shardId, index);
@@ -1319,10 +1338,10 @@ private Engine.IndexResult index(Engine engine, Engine.Index index) throws IOExc
     }
 
     public Engine.NoOpResult markSeqNoAsNoop(long seqNo, long opPrimaryTerm, String reason) throws IOException {
-        return markSeqNoAsNoop(getEngine(), seqNo, opPrimaryTerm, reason, Engine.Operation.Origin.REPLICA);
+        return markSeqNoAsNoop(getIndexer(), seqNo, opPrimaryTerm, reason, Engine.Operation.Origin.REPLICA);
     }
 
-    private Engine.NoOpResult markSeqNoAsNoop(Engine engine, long seqNo, long opPrimaryTerm, String reason, Engine.Operation.Origin origin)
+    private Engine.NoOpResult markSeqNoAsNoop(Indexer engine, long seqNo, long opPrimaryTerm, String reason, Engine.Operation.Origin origin)
         throws IOException {
         assert opPrimaryTerm <= getOperationPrimaryTerm() : "op term [ "
             + opPrimaryTerm
@@ -1335,7 +1354,7 @@ private Engine.NoOpResult markSeqNoAsNoop(Engine engine, long seqNo, long opPrim
         return noOp(engine, noOp);
     }
 
-    private Engine.NoOpResult noOp(Engine engine, Engine.NoOp noOp) throws IOException {
+    private Engine.NoOpResult noOp(Indexer engine, Engine.NoOp noOp) throws IOException {
         active.set(true);
         if (logger.isTraceEnabled()) {
             logger.trace("noop (seq# [{}])", noOp.seqNo());
@@ -1360,7 +1379,7 @@ public Engine.DeleteResult applyDeleteOperationOnPrimary(
     ) throws IOException {
         assert versionType.validateVersionForWrites(version);
         return applyDeleteOperation(
-            getEngine(),
+            getIndexer(),
             UNASSIGNED_SEQ_NO,
             getOperationPrimaryTerm(),
             version,
@@ -1386,10 +1405,10 @@ public Engine.DeleteResult applyDeleteOperationOnReplica(long seqNo, long opPrim
                 UNASSIGNED_SEQ_NO,
                 0
             );
-            return getEngine().delete(delete);
+            return getIndexer().delete(delete);
         }
         return applyDeleteOperation(
-            getEngine(),
+            getIndexer(),
             seqNo,
             opPrimaryTerm,
             version,
@@ -1402,7 +1421,7 @@ public Engine.DeleteResult applyDeleteOperationOnReplica(long seqNo, long opPrim
     }
 
     private Engine.DeleteResult applyDeleteOperation(
-        Engine engine,
+        Indexer engine,
         long seqNo,
         long opPrimaryTerm,
         long version,
@@ -1437,7 +1456,7 @@ public static Engine.Delete prepareDelete(
         return new Engine.Delete(id, uid, seqNo, primaryTerm, version, versionType, origin, startTime, ifSeqNo, ifPrimaryTerm);
     }
 
-    private Engine.DeleteResult delete(Engine engine, Engine.Delete delete) throws IOException {
+    private Engine.DeleteResult delete(Indexer engine, Engine.Delete delete) throws IOException {
         active.set(true);
         final Engine.DeleteResult result;
         delete = indexingOperationListeners.preDelete(shardId, delete);
@@ -1460,7 +1479,7 @@ public Engine.GetResult get(Engine.Get get) {
         if (mapper == null) {
             return GetResult.NOT_EXISTS;
         }
-        return getEngine().get(get, this::acquireSearcher);
+        return getEngine().get(get, this::acquireSearcher); // TODO: READER INTERFACE
     }
 
     /**
@@ -1471,7 +1490,8 @@ public void refresh(String source) {
         if (logger.isTraceEnabled()) {
             logger.trace("refresh with source [{}]", source);
         }
-        getEngine().refresh(source);
+        getIndexingExecutionCoordinator().refresh(source);
+//        getIndexer().refresh(source);
     }
 
     /**
@@ -1502,7 +1522,7 @@ public FlushStats flushStats() {
 
     public DocsStats docStats() {
         readAllowed();
-        return getEngine().docStats();
+        return getStatsHolder().docStats();
     }
 
     /**
@@ -1510,7 +1530,7 @@ public DocsStats docStats() {
      * @throws AlreadyClosedException if shard is closed
      */
     public CommitStats commitStats() {
-        return getEngine().commitStats();
+        return getStatsHolder().commitStats();
     }
 
     /**
@@ -1518,11 +1538,11 @@ public CommitStats commitStats() {
      * @throws AlreadyClosedException if shard is closed
      */
     public SeqNoStats seqNoStats() {
-        return getEngine().getSeqNoStats(replicationTracker.getGlobalCheckpoint());
+        return getCheckpointState().getSeqNoStats(replicationTracker.getGlobalCheckpoint());
     }
 
     public IndexingStats indexingStats() {
-        Engine engine = getEngineOrNull();
+        IndexingThrottler engine = getIndexingThrottler();
         final boolean throttled;
         final long throttleTimeInMillis;
         if (engine == null) {
@@ -1555,17 +1575,17 @@ public StoreStats storeStats() {
     }
 
     public MergeStats mergeStats() {
-        final Engine engine = getEngineOrNull();
+        final StatsHolder engine = getStatsHolderOrNull();
         if (engine == null) {
             return new MergeStats();
         }
         final MergeStats mergeStats = engine.getMergeStats();
-        mergeStats.addUnreferencedFileCleanUpStats(engine.unreferencedFileCleanUpsPerformed());
+//        mergeStats.addUnreferencedFileCleanUpStats(engine.unreferencedFileCleanUpsPerformed());
         return mergeStats;
     }
 
     public SegmentsStats segmentStats(boolean includeSegmentFileSizes, boolean includeUnloadedSegments) {
-        SegmentsStats segmentsStats = getEngine().segmentsStats(includeSegmentFileSizes, includeUnloadedSegments);
+        SegmentsStats segmentsStats = getStatsHolder().segmentsStats(includeSegmentFileSizes, includeUnloadedSegments);
         segmentsStats.addBitsetMemoryInBytes(shardBitsetFilterCache.getMemorySizeInBytes());
         // Populate remote_store stats only if the index is remote store backed
         if (indexSettings().isAssignedOnRemoteNode()) {
@@ -1588,7 +1608,7 @@ public FieldDataStats fieldDataStats(String... fields) {
     }
 
     public TranslogStats translogStats() {
-        TranslogStats translogStats = getEngine().translogManager().getTranslogStats();
+        TranslogStats translogStats = getIndexer().translogManager().getTranslogStats();
         // Populate remote_store stats only if the index is remote store backed
         if (indexSettings.isAssignedOnRemoteNode()) {
             translogStats.addRemoteTranslogStats(
@@ -1601,11 +1621,11 @@ public TranslogStats translogStats() {
 
     public CompletionStats completionStats(String... fields) {
         readAllowed();
-        return getEngine().completionStats(fields);
+        return getStatsHolder().completionStats(fields);
     }
 
     public PollingIngestStats pollingIngestStats() {
-        return getEngine().pollingIngestStats();
+        return getStatsHolder().pollingIngestStats();
     }
 
     /**
@@ -1624,7 +1644,7 @@ public void flush(FlushRequest request) {
          */
         verifyNotClosed();
         final long time = System.nanoTime();
-        getEngine().flush(force, waitIfOngoing);
+        getIndexingExecutionCoordinator().flush(force, waitIfOngoing);
         flushMetric.inc(System.nanoTime() - time);
     }
 
@@ -1637,15 +1657,14 @@ public void trimTranslog() {
             return;
         }
         verifyNotClosed();
-        final Engine engine = getEngine();
-        engine.translogManager().trimUnreferencedTranslogFiles();
+        getIndexer().translogManager().trimUnreferencedTranslogFiles();
     }
 
     /**
      * Rolls the tranlog generation and cleans unneeded.
      */
     public void rollTranslogGeneration() throws IOException {
-        final Engine engine = getEngine();
+        final Indexer engine = getIndexer();
         engine.translogManager().rollTranslogGeneration();
     }
 
@@ -1654,7 +1673,7 @@ public void forceMerge(ForceMergeRequest forceMerge) throws IOException {
         if (logger.isTraceEnabled()) {
             logger.trace("force merge with {}", forceMerge);
         }
-        Engine engine = getEngine();
+        Indexer engine = getIndexer();
         engine.forceMerge(
             forceMerge.flush(),
             forceMerge.maxNumSegments(),
@@ -1675,7 +1694,7 @@ public org.apache.lucene.util.Version upgrade(UpgradeRequest upgrade) throws IOE
         }
         org.apache.lucene.util.Version previousVersion = minimumCompatibleVersion();
         // we just want to upgrade the segments, not actually forge merge to a single segment
-        final Engine engine = getEngine();
+        final Indexer engine = getIndexer();
         engine.forceMerge(
             true,  // we need to flush at the end to make sure the upgrade is durable
             Integer.MAX_VALUE, // we just want to upgrade the segments, not actually optimize to a single segment
@@ -1694,7 +1713,7 @@ public org.apache.lucene.util.Version upgrade(UpgradeRequest upgrade) throws IOE
 
     public org.apache.lucene.util.Version minimumCompatibleVersion() {
         org.apache.lucene.util.Version luceneVersion = null;
-        for (Segment segment : getEngine().segments(false)) {
+        for (Segment segment : getIndexer().segments(false)) {
             if (luceneVersion == null || luceneVersion.onOrAfter(segment.getVersion())) {
                 luceneVersion = segment.getVersion();
             }
@@ -1724,19 +1743,21 @@ public RemoteSegmentMetadata fetchLastRemoteUploadedSegmentMetadata() throws IOE
      *
      * @param flushFirst <code>true</code> if the index should first be flushed to disk / a low level lucene commit should be executed
      */
+    // TODO: This full method changes
     public GatedCloseable<IndexCommit> acquireLastIndexCommit(boolean flushFirst) throws EngineException {
         final IndexShardState state = this.state; // one time volatile read
         // we allow snapshot on closed index shard, since we want to do one after we close the shard and before we close the engine
         if (state == IndexShardState.STARTED || state == IndexShardState.CLOSED) {
-            return getEngine().acquireLastIndexCommit(flushFirst);
+            return getEngine().acquireLastIndexCommit(flushFirst); // TODO: READER, SNAPSHOTTER?
         } else {
             throw new IllegalIndexShardStateException(shardId, state, "snapshot is not allowed");
         }
     }
 
+    // TODO: This full method changes
     public GatedCloseable<IndexCommit> acquireLastIndexCommitAndRefresh(boolean flushFirst) throws EngineException {
         GatedCloseable<IndexCommit> indexCommit = acquireLastIndexCommit(flushFirst);
-        getEngine().refresh("Snapshot for Remote Store based Shard");
+        getIndexer().refresh("Snapshot for Remote Store based Shard");
         return indexCommit;
     }
 
@@ -1865,6 +1886,7 @@ public Set<MergedSegmentCheckpoint> getPendingMergedSegmentCheckpoints() {
     /**
      * Snapshots the most recent safe index commit from the currently running engine.
      * All index files referenced by this index commit won't be freed until the commit/snapshot is closed.
+     * TODO: This method changes
      */
     public GatedCloseable<IndexCommit> acquireSafeIndexCommit() throws EngineException {
         final IndexShardState state = this.state; // one time volatile read
@@ -1927,6 +1949,7 @@ public Tuple<GatedCloseable<SegmentInfos>, ReplicationCheckpoint> getLatestSegme
      * @param segmentInfos {@link SegmentInfos} infos to use to compute.
      * @return {@link ReplicationCheckpoint} Checkpoint computed from the infos.
      * @throws IOException When there is an error computing segment metadata from the store.
+     * TODO: SegRep changes for decoupling. looks to depend on codec.
      */
     ReplicationCheckpoint computeReplicationCheckpoint(SegmentInfos segmentInfos) throws IOException {
         if (segmentInfos == null) {
@@ -2154,7 +2177,7 @@ public void failShard(String reason, @Nullable Exception e) {
     /**
      * Acquires a point-in-time reader that can be used to create {@link Engine.Searcher}s on demand.
      */
-    public Engine.SearcherSupplier acquireSearcherSupplier() {
+    public EngineSearcherSupplier<Engine.Searcher> acquireSearcherSupplier() {
         return acquireSearcherSupplier(Engine.SearcherScope.EXTERNAL);
     }
 
@@ -2165,6 +2188,7 @@ public Engine.SearcherSupplier acquireSearcherSupplier(Engine.SearcherScope scop
         readAllowed();
         markSearcherAccessed();
         final Engine engine = getEngine();
+        compositeEngine.getPrimaryReadEngine().acquireSearcherSupplier(null, scope);
         return engine.acquireSearcherSupplier(this::wrapSearcher, scope);
     }
 
@@ -2196,6 +2220,7 @@ private Engine.Searcher wrapSearcher(Engine.Searcher searcher) {
             throw new OpenSearchException("failed to wrap searcher", ex);
         } finally {
             if (success == false) {
+                // TODO important
                 Releasables.close(success, searcher);
             }
         }
@@ -2434,7 +2459,7 @@ public void postRecovery(String reason) throws IndexShardStartedException, Index
             // we may not expose operations that were indexed with a refresh listener that was immediately
             // responded to in addRefreshListener. The refresh must happen under the same mutex used in addRefreshListener
             // and before moving this shard to POST_RECOVERY state (i.e., allow to read from this shard).
-            getEngine().refresh("post_recovery");
+            getIndexer().refresh("post_recovery");
             synchronized (mutex) {
                 if (state == IndexShardState.CLOSED) {
                     throw new IndexShardClosedException(shardId);
@@ -2511,7 +2536,7 @@ private long recoverLocallyUpToGlobalCheckpoint() {
                 final TranslogRecoveryRunner translogRecoveryRunner = (snapshot) -> {
                     recoveryState.getTranslog().totalLocal(snapshot.totalOperations());
                     final int recoveredOps = runTranslogRecovery(
-                        getEngine(),
+                        getIndexer(),
                         snapshot,
                         Engine.Operation.Origin.LOCAL_TRANSLOG_RECOVERY,
                         recoveryState.getTranslog()::incrementRecoveredOperations
@@ -2520,9 +2545,9 @@ private long recoverLocallyUpToGlobalCheckpoint() {
                     return recoveredOps;
                 };
                 innerOpenEngineAndTranslog(() -> globalCheckpoint);
-                getEngine().translogManager()
-                    .recoverFromTranslog(translogRecoveryRunner, getEngine().getProcessedLocalCheckpoint(), globalCheckpoint);
-                logger.trace("shard locally recovered up to {}", getEngine().getSeqNoStats(globalCheckpoint));
+                getIndexer().translogManager()
+                    .recoverFromTranslog(translogRecoveryRunner, getCheckpointState().getProcessedLocalCheckpoint(), globalCheckpoint);
+                logger.trace("shard locally recovered up to {}", getCheckpointState().getSeqNoStats(globalCheckpoint));
             } finally {
                 synchronized (engineMutex) {
                     IOUtils.close(currentEngineReference.getAndSet(null));
@@ -2598,7 +2623,7 @@ private void validateLocalRecoveryState() {
     }
 
     public void trimOperationOfPreviousPrimaryTerms(long aboveSeqNo) {
-        getEngine().translogManager().trimOperationsFromTranslog(getOperationPrimaryTerm(), aboveSeqNo);
+        getIndexer().translogManager().trimOperationsFromTranslog(getOperationPrimaryTerm(), aboveSeqNo);
     }
 
     /**
@@ -2608,7 +2633,7 @@ public void trimOperationOfPreviousPrimaryTerms(long aboveSeqNo) {
      * @see #updateMaxUnsafeAutoIdTimestamp(long)
      */
     public long getMaxSeenAutoIdTimestamp() {
-        return getEngine().getMaxSeenAutoIdTimestamp();
+        return getIndexer().getMaxSeenAutoIdTimestamp();
     }
 
     /**
@@ -2621,14 +2646,14 @@ public long getMaxSeenAutoIdTimestamp() {
      * a retry append-only (without timestamp) via recovery, then an original append-only (with timestamp) via replication.
      */
     public void updateMaxUnsafeAutoIdTimestamp(long maxSeenAutoIdTimestampFromPrimary) {
-        getEngine().updateMaxUnsafeAutoIdTimestamp(maxSeenAutoIdTimestampFromPrimary);
+        getIndexer().updateMaxUnsafeAutoIdTimestamp(maxSeenAutoIdTimestampFromPrimary);
     }
 
     public Engine.Result applyTranslogOperation(Translog.Operation operation, Engine.Operation.Origin origin) throws IOException {
-        return applyTranslogOperation(getEngine(), operation, origin);
+        return applyTranslogOperation(getIndexer(), operation, origin);
     }
 
-    private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation operation, Engine.Operation.Origin origin)
+    private Engine.Result applyTranslogOperation(Indexer engine, Translog.Operation operation, Engine.Operation.Origin origin)
         throws IOException {
         // If a translog op is replayed on the primary (eg. ccr), we need to use external instead of null for its version type.
         final VersionType versionType = (origin == Engine.Operation.Origin.PRIMARY) ? VersionType.EXTERNAL : null;
@@ -2656,7 +2681,8 @@ private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation o
                         MediaTypeRegistry.xContentType(index.source()),
                         index.routing()
                     ),
-                    index.id()
+                    index.id(),
+                    null
                 );
                 break;
             case DELETE:
@@ -2687,7 +2713,7 @@ private Engine.Result applyTranslogOperation(Engine engine, Translog.Operation o
      * Replays translog operations from the provided translog {@code snapshot} to the current engine using the given {@code origin}.
      * The callback {@code onOperationRecovered} is notified after each translog operation is replayed successfully.
      */
-    int runTranslogRecovery(Engine engine, Translog.Snapshot snapshot, Engine.Operation.Origin origin, Runnable onOperationRecovered)
+    int runTranslogRecovery(Indexer engine, Translog.Snapshot snapshot, Engine.Operation.Origin origin, Runnable onOperationRecovered)
         throws IOException {
         int opsRecovered = 0;
         Translog.Operation operation;
@@ -2747,7 +2773,7 @@ public void openEngineAndRecoverFromTranslog(boolean syncFromRemote) throws IOEx
             translogRecoveryStats.totalOperations(snapshot.totalOperations());
             translogRecoveryStats.totalOperationsOnStart(snapshot.totalOperations());
             return runTranslogRecovery(
-                getEngine(),
+                getIndexer(),
                 snapshot,
                 Engine.Operation.Origin.LOCAL_TRANSLOG_RECOVERY,
                 translogRecoveryStats::incrementRecoveredOperations
@@ -2771,8 +2797,8 @@ public void openEngineAndRecoverFromTranslog(boolean syncFromRemote) throws IOEx
             translogConfig.setDownloadRemoteTranslogOnInit(true);
         }
 
-        getEngine().translogManager()
-            .recoverFromTranslog(translogRecoveryRunner, getEngine().getProcessedLocalCheckpoint(), Long.MAX_VALUE);
+        getIndexer().translogManager()
+            .recoverFromTranslog(translogRecoveryRunner, getCheckpointState().getProcessedLocalCheckpoint(), Long.MAX_VALUE);
     }
 
     /**
@@ -2799,7 +2825,7 @@ void openEngineAndSkipTranslogRecovery(boolean syncFromRemote) throws IOExceptio
         innerOpenEngineAndTranslog(replicationTracker, syncFromRemote);
         assert routingEntry().isSearchOnly() == false || translogStats().estimatedNumberOfOperations() == 0
             : "Translog is expected to be empty but holds " + translogStats().estimatedNumberOfOperations() + "Operations.";
-        getEngine().translogManager().skipTranslogRecovery();
+        getIndexer().translogManager().skipTranslogRecovery();
     }
 
     private void innerOpenEngineAndTranslog(LongSupplier globalCheckpointSupplier) throws IOException {
@@ -2961,9 +2987,9 @@ public RecoveryState recoveryState() {
      */
     public void finalizeRecovery() {
         recoveryState().setStage(RecoveryState.Stage.FINALIZE);
-        Engine engine = getEngine();
+        Indexer engine = getIndexer();
         engine.refresh("recovery_finalization");
-        engine.config().setEnableGcDeletes(true);
+        //engine.config().setEnableGcDeletes(true);
     }
 
     /**
@@ -3284,7 +3310,7 @@ protected void doRun() {
      * Acquires a lock on the translog files and Lucene soft-deleted documents to prevent them from being trimmed
      */
     public Closeable acquireHistoryRetentionLock() {
-        return getEngine().acquireHistoryRetentionLock();
+        return getIndexer().acquireHistoryRetentionLock();
     }
 
     /**
@@ -3294,7 +3320,7 @@ public Closeable acquireHistoryRetentionLock() {
      */
     public Translog.Snapshot getHistoryOperations(String reason, long startingSeqNo, long endSeqNo, boolean accurateCount)
         throws IOException {
-        return getEngine().newChangesSnapshot(reason, startingSeqNo, endSeqNo, true, accurateCount);
+        return getIndexer().newChangesSnapshot(reason, startingSeqNo, endSeqNo, true, accurateCount);
     }
 
     /**
@@ -3305,7 +3331,7 @@ public Translog.Snapshot getHistoryOperations(String reason, long startingSeqNo,
     public Translog.Snapshot getHistoryOperationsFromTranslog(long startingSeqNo, long endSeqNo) throws IOException {
         assert indexSettings.isSegRepEnabledOrRemoteNode() == false
             : "unsupported operation for segment replication enabled indices or remote store backed indices";
-        return getEngine().translogManager().newChangesSnapshot(startingSeqNo, endSeqNo, true);
+        return getIndexer().translogManager().newChangesSnapshot(startingSeqNo, endSeqNo, true);
     }
 
     /**
@@ -3313,7 +3339,7 @@ public Translog.Snapshot getHistoryOperationsFromTranslog(long startingSeqNo, lo
      * This method should be called after acquiring the retention lock; See {@link #acquireHistoryRetentionLock()}
      */
     public boolean hasCompleteHistoryOperations(String reason, long startingSeqNo) {
-        return getEngine().hasCompleteOperationHistory(reason, startingSeqNo);
+        return getIndexer().hasCompleteOperationHistory(reason, startingSeqNo);
     }
 
     /**
@@ -3322,7 +3348,7 @@ public boolean hasCompleteHistoryOperations(String reason, long startingSeqNo) {
      * @return the minimum retained sequence number
      */
     public long getMinRetainedSeqNo() {
-        return getEngine().getMinRetainedSeqNo();
+        return getCheckpointState().getMinRetainedSeqNo();
     }
 
     /**
@@ -3333,7 +3359,7 @@ public long getMinRetainedSeqNo() {
      * @return           number of history operations in the sequence number range
      */
     public int countNumberOfHistoryOperations(String source, long fromSeqNo, long toSeqNo) throws IOException {
-        return getEngine().countNumberOfHistoryOperations(source, fromSeqNo, toSeqNo);
+        return getIndexer().countNumberOfHistoryOperations(source, fromSeqNo, toSeqNo);
     }
 
     /**
@@ -3354,15 +3380,15 @@ public Translog.Snapshot newChangesSnapshot(
         boolean requiredFullRange,
         boolean accurateCount
     ) throws IOException {
-        return getEngine().newChangesSnapshot(source, fromSeqNo, toSeqNo, requiredFullRange, accurateCount);
+        return getIndexer().newChangesSnapshot(source, fromSeqNo, toSeqNo, requiredFullRange, accurateCount);
     }
 
     public List<Segment> segments(boolean verbose) {
-        return getEngine().segments(verbose);
+        return getIndexer().segments(verbose);
     }
 
     public String getHistoryUUID() {
-        return getEngine().getHistoryUUID();
+        return getIndexer().getHistoryUUID();
     }
 
     public IndexEventListener getIndexEventListener() {
@@ -3371,7 +3397,7 @@ public IndexEventListener getIndexEventListener() {
 
     public void activateThrottling() {
         try {
-            getEngine().activateThrottling();
+            getIndexingThrottler().activateThrottling();
         } catch (AlreadyClosedException ex) {
             // ignore
         }
@@ -3379,7 +3405,7 @@ public void activateThrottling() {
 
     public void deactivateThrottling() {
         try {
-            getEngine().deactivateThrottling();
+            getIndexingThrottler().deactivateThrottling();
         } catch (AlreadyClosedException ex) {
             // ignore
         }
@@ -3413,8 +3439,7 @@ private void handleRefreshException(Exception e) {
      */
     public void writeIndexingBuffer() {
         try {
-            Engine engine = getEngine();
-            engine.writeIndexingBuffer();
+            getIndexer().writeIndexingBuffer();
         } catch (Exception e) {
             handleRefreshException(e);
         }
@@ -3697,7 +3722,7 @@ public void markAllocationIdAsInSync(final String allocationId, final long local
      * @return the local checkpoint
      */
     public long getLocalCheckpoint() {
-        return getEngine().getPersistedLocalCheckpoint();
+        return getCheckpointState().getPersistedLocalCheckpoint();
     }
 
     /**
@@ -3705,7 +3730,7 @@ public long getLocalCheckpoint() {
      * Also see {@link #getLocalCheckpoint()}.
      */
     public long getProcessedLocalCheckpoint() {
-        return getEngine().getProcessedLocalCheckpoint();
+        return getCheckpointState().getProcessedLocalCheckpoint();
     }
 
     /**
@@ -3721,7 +3746,7 @@ public long getLastKnownGlobalCheckpoint() {
      * Returns the latest global checkpoint value that has been persisted in the underlying storage (i.e. translog's checkpoint)
      */
     public long getLastSyncedGlobalCheckpoint() {
-        return getEngine().getLastSyncedGlobalCheckpoint();
+        return getCheckpointState().getLastSyncedGlobalCheckpoint();
     }
 
     /**
@@ -3747,7 +3772,7 @@ public void maybeSyncGlobalCheckpoint(final String reason) {
         }
         assert assertPrimaryMode();
         // only sync if there are no operations in flight, or when using async durability
-        final SeqNoStats stats = getEngine().getSeqNoStats(replicationTracker.getGlobalCheckpoint());
+        final SeqNoStats stats = getCheckpointState().getSeqNoStats(replicationTracker.getGlobalCheckpoint());
         final boolean asyncDurability = indexSettings().getTranslogDurability() == Durability.ASYNC;
         if (stats.getMaxSeqNo() == stats.getGlobalCheckpoint() || asyncDurability) {
             final Map<String, Long> globalCheckpoints = getInSyncGlobalCheckpoints();
@@ -3867,7 +3892,7 @@ private void postActivatePrimaryMode() {
             // This helps to get a consistent state in remote store where both remote segment store and remote
             // translog contains data.
             try {
-                getEngine().translogManager().syncTranslog();
+                getIndexer().translogManager().syncTranslog();
             } catch (IOException e) {
                 logger.error("Failed to sync translog to remote from new primary", e);
             }
@@ -3976,7 +4001,24 @@ private void doCheckIndex() throws IOException {
         recoveryState.getVerifyIndex().checkIndexTime(Math.max(0, TimeValue.nsecToMSec(System.nanoTime() - timeNS)));
     }
 
-    Engine getEngine() {
+
+    public Indexer getIndexer() {
+        return getEngine();
+    }
+
+    public CheckpointState getCheckpointState() {
+        return getEngine();
+    }
+
+    public StatsHolder getStatsHolder() {
+        return getEngine();
+    }
+
+    public IndexingThrottler getIndexingThrottler() {
+        return getEngine();
+    }
+
+    public Engine getEngine() {
         Engine engine = getEngineOrNull();
         if (engine == null) {
             throw new AlreadyClosedException("engine is closed");
@@ -3984,6 +4026,23 @@ Engine getEngine() {
         return engine;
     }
 
+
+    protected Indexer getIndexerOrNull() {
+        return getEngineOrNull();
+    }
+
+    public CheckpointState getCheckpointStateOrNull() {
+        return getEngineOrNull();
+    }
+
+    public StatsHolder getStatsHolderOrNull() {
+        return getEngineOrNull();
+    }
+
+    public IndexingThrottler getIndexingThrottlerOrNull() {
+        return getEngineOrNull();
+    }
+
     /**
      * NOTE: returns null if engine is not yet started (e.g. recovery phase 1, copying over index files, is still running), or if engine is
      * closed.
@@ -4174,7 +4233,7 @@ public boolean useRetentionLeasesInPeerRecovery() {
 
     private SafeCommitInfo getSafeCommitInfo() {
         final Engine engine = getEngineOrNull();
-        return engine == null ? SafeCommitInfo.EMPTY : engine.getSafeCommitInfo();
+        return engine == null ? SafeCommitInfo.EMPTY : getIndexer().getSafeCommitInfo();
     }
 
     class ShardEventListener implements Engine.EventListener {
@@ -4252,10 +4311,12 @@ private EngineConfig newEngineConfig(LongSupplier globalCheckpointSupplier) thro
         if (indexSettings.isSegRepEnabledOrRemoteNode()) {
             internalRefreshListener.add(new ReplicationCheckpointUpdater());
         }
+        // HERE
         if (this.checkpointPublisher != null && shardRouting.primary() && indexSettings.isSegRepLocalEnabled()) {
             internalRefreshListener.add(new CheckpointRefreshListener(this, this.checkpointPublisher));
         }
 
+        // HERE
         if (isRemoteStoreEnabled() || isMigratingToRemote()) {
             internalRefreshListener.add(
                 new RemoteStoreRefreshListener(
@@ -4709,7 +4770,7 @@ public List<String> getActiveOperations() {
     private static AsyncIOProcessor<Translog.Location> createTranslogSyncProcessor(
         Logger logger,
         ThreadPool threadPool,
-        Supplier<Engine> engineSupplier,
+        Supplier<Indexer> engineSupplier,
         boolean bufferAsyncIoProcessor,
         Supplier<TimeValue> bufferIntervalSupplier
     ) {
@@ -4908,7 +4969,7 @@ ReplicationTracker getReplicationTracker() {
     public boolean scheduledRefresh() {
         verifyNotClosed();
         boolean listenerNeedsRefresh = refreshListeners.refreshNeeded();
-        if (isReadAllowed() && (listenerNeedsRefresh || getEngine().refreshNeeded())) {
+        if (isReadAllowed() && (listenerNeedsRefresh || true)) {
             if (listenerNeedsRefresh == false // if we have a listener that is waiting for a refresh we need to force it
                 && isSearchIdleSupported()
                 && isSearchIdle()
@@ -4917,15 +4978,19 @@ && isSearchIdle()
                 // lets skip this refresh since we are search idle and
                 // don't necessarily need to refresh. the next searcher access will register a refreshListener and that will
                 // cause the next schedule to refresh.
-                final Engine engine = getEngine();
-                engine.maybePruneDeletes(); // try to prune the deletes in the engine if we accumulated some
-                setRefreshPending(engine);
-                return false;
+//                final Engine engine = getEngine();
+//                engine.maybePruneDeletes(); // try to prune the deletes in the engine if we accumulated some
+//                setRefreshPending(engine);
+//                return false;
+                getIndexingExecutionCoordinator().refresh("schedule");
+                return true;
             } else {
                 if (logger.isTraceEnabled()) {
                     logger.trace("refresh with source [schedule]");
                 }
-                return getEngine().maybeRefresh("schedule");
+                getIndexingExecutionCoordinator().refresh("schedule");
+                return true;
+//                return getEngine().maybeRefresh("schedule");
             }
         }
         final Engine engine = getEngine();
diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java
index 59f967744cc77..5b3be542eb160 100644
--- a/server/src/main/java/org/opensearch/indices/IndicesService.java
+++ b/server/src/main/java/org/opensearch/indices/IndicesService.java
@@ -164,6 +164,7 @@
 import org.opensearch.node.remotestore.RemoteStoreNodeAttribute;
 import org.opensearch.plugins.IndexStorePlugin;
 import org.opensearch.plugins.PluginsService;
+import org.opensearch.plugins.SearchEnginePlugin;
 import org.opensearch.repositories.RepositoriesService;
 import org.opensearch.script.ScriptService;
 import org.opensearch.search.aggregations.support.ValuesSourceRegistry;
@@ -1101,7 +1102,9 @@ private synchronized IndexService createIndexService(
             this.remoteStoreSettings,
             replicator,
             segmentReplicationStatsProvider,
-            this::getClusterDefaultMaxMergeAtOnce
+            this::getClusterDefaultMaxMergeAtOnce,
+            getSearchEnginePlugin(),
+            this.pluginsService
         );
     }
 
@@ -1109,6 +1112,13 @@ private EngineConfigFactory getEngineConfigFactory(final IndexSettings idxSettin
         return new EngineConfigFactory(this.pluginsService, idxSettings);
     }
 
+    private SearchEnginePlugin getSearchEnginePlugin() throws IOException {
+        List<SearchEnginePlugin> searchEnginePlugins = pluginsService.filterPlugins(SearchEnginePlugin.class);
+        return !searchEnginePlugins.isEmpty()
+            ? searchEnginePlugins.getFirst()
+            : null;
+    }
+
     private IngestionConsumerFactory getIngestionConsumerFactory(final IndexSettings idxSettings) {
         final IndexMetadata indexMetadata = idxSettings.getIndexMetadata();
         if (indexMetadata == null) {
diff --git a/server/src/main/java/org/opensearch/node/Node.java b/server/src/main/java/org/opensearch/node/Node.java
index ae8299ee7ccb5..416237111ff7b 100644
--- a/server/src/main/java/org/opensearch/node/Node.java
+++ b/server/src/main/java/org/opensearch/node/Node.java
@@ -218,6 +218,8 @@
 import org.opensearch.plugins.ClusterPlugin;
 import org.opensearch.plugins.CryptoKeyProviderPlugin;
 import org.opensearch.plugins.CryptoPlugin;
+import org.opensearch.plugins.SearchEnginePlugin;
+import org.opensearch.plugins.DataSourcePlugin;
 import org.opensearch.plugins.DiscoveryPlugin;
 import org.opensearch.plugins.EnginePlugin;
 import org.opensearch.plugins.ExtensionAwarePlugin;
@@ -294,6 +296,8 @@
 import org.opensearch.transport.client.Client;
 import org.opensearch.transport.client.node.NodeClient;
 import org.opensearch.usage.UsageService;
+import org.opensearch.vectorized.execution.search.DataFormat;
+import org.opensearch.vectorized.execution.search.spi.DataSourceCodec;
 import org.opensearch.watcher.ResourceWatcherService;
 import org.opensearch.wlm.WorkloadGroupService;
 import org.opensearch.wlm.WorkloadGroupsStateAccessor;
@@ -1111,10 +1115,40 @@ protected Node(final Environment initialEnvironment, Collection<PluginInfo> clas
                     ).stream()
                 )
                 .collect(Collectors.toList());
-
             // Add the telemetryAwarePlugin components to the existing pluginComponents collection.
             pluginComponents.addAll(telemetryAwarePluginComponents);
 
+            Map<DataFormat, DataSourceCodec> dataSourceCodecMap = new HashMap<>();
+            for (DataSourcePlugin dataSourcePlugin : pluginsService.filterPlugins(DataSourcePlugin.class)) {
+                if (dataSourcePlugin.getDataSourceCodecs().isPresent()) {
+                    dataSourceCodecMap.putAll(dataSourcePlugin.getDataSourceCodecs().get());
+                }
+            }
+
+            // TODO : compilation issue
+
+            Collection<Object> dataSourceAwareComponents = pluginsService.filterPlugins(SearchEnginePlugin.class)
+                .stream()
+                .flatMap(
+                    p -> p.createComponents(
+                        client,
+                        clusterService,
+                        threadPool,
+                        resourceWatcherService,
+                        scriptService,
+                        xContentRegistry,
+                        environment,
+                        nodeEnvironment,
+                        namedWriteableRegistry,
+                        clusterModule.getIndexNameExpressionResolver(),
+                        repositoriesServiceReference::get,
+                        dataSourceCodecMap
+                    ).stream()
+                )
+                .collect(Collectors.toList());
+
+            // Add all dataSourceAwarePlugin components to the existing pluginComponents
+            pluginComponents.addAll(dataSourceAwareComponents);
             List<IdentityAwarePlugin> identityAwarePlugins = pluginsService.filterPlugins(IdentityAwarePlugin.class);
             identityService.initializeIdentityAwarePlugins(identityAwarePlugins);
 
@@ -1525,7 +1559,8 @@ protected Node(final Environment initialEnvironment, Collection<PluginInfo> clas
                 searchModule.getIndexSearcherExecutor(threadPool),
                 taskResourceTrackingService,
                 searchModule.getConcurrentSearchRequestDeciderFactories(),
-                searchModule.getPluginProfileMetricsProviders()
+                searchModule.getPluginProfileMetricsProviders(),
+                pluginsService.filterPlugins(DataSourcePlugin.class)
             );
 
             final List<PersistentTasksExecutor<?>> tasksExecutors = pluginsService.filterPlugins(PersistentTaskPlugin.class)
@@ -2256,7 +2291,8 @@ protected SearchService newSearchService(
         Executor indexSearcherExecutor,
         TaskResourceTrackingService taskResourceTrackingService,
         Collection<ConcurrentSearchRequestDecider.Factory> concurrentSearchDeciderFactories,
-        List<SearchPlugin.ProfileMetricsProvider> pluginProfilers
+        List<SearchPlugin.ProfileMetricsProvider> pluginProfilers,
+        List<DataSourcePlugin> dataSourcePluginList
     ) {
         return new SearchService(
             clusterService,
@@ -2271,7 +2307,8 @@ protected SearchService newSearchService(
             indexSearcherExecutor,
             taskResourceTrackingService,
             concurrentSearchDeciderFactories,
-            pluginProfilers
+            pluginProfilers,
+            dataSourcePluginList
         );
     }
 
diff --git a/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java b/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java
new file mode 100644
index 0000000000000..cf008d3098fcd
--- /dev/null
+++ b/server/src/main/java/org/opensearch/plugins/DataSourcePlugin.java
@@ -0,0 +1,28 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.plugins;
+
+import org.opensearch.index.engine.exec.DataFormat;
+import org.opensearch.index.engine.exec.IndexingExecutionEngine;
+import org.opensearch.index.mapper.MapperService;
+import org.opensearch.index.shard.ShardPath;
+import org.opensearch.vectorized.execution.search.spi.DataSourceCodec;
+
+import java.util.Map;
+import java.util.Optional;
+
+public interface DataSourcePlugin {
+    default Optional<Map<org.opensearch.vectorized.execution.search.DataFormat, DataSourceCodec>> getDataSourceCodecs() {
+        return Optional.empty();
+    }
+
+    <T extends DataFormat> IndexingExecutionEngine<T> indexingEngine(MapperService mapperService, ShardPath shardPath);
+
+    DataFormat getDataFormat();
+}
diff --git a/server/src/main/java/org/opensearch/plugins/PluginsService.java b/server/src/main/java/org/opensearch/plugins/PluginsService.java
index 5e382584dbe0e..ccbc10f77cb14 100644
--- a/server/src/main/java/org/opensearch/plugins/PluginsService.java
+++ b/server/src/main/java/org/opensearch/plugins/PluginsService.java
@@ -42,6 +42,7 @@
 import org.opensearch.OpenSearchException;
 import org.opensearch.Version;
 import org.opensearch.action.admin.cluster.node.info.PluginsAndModules;
+import org.opensearch.common.annotation.ExperimentalApi;
 import org.opensearch.common.bootstrap.JarHell;
 import org.opensearch.common.collect.Tuple;
 import org.opensearch.common.inject.Module;
@@ -88,6 +89,7 @@
  *
  * @opensearch.internal
  */
+@ExperimentalApi // TODO : this cannot be experimental, just marking it to bypass for now
 public class PluginsService implements ReportingService<PluginsAndModules> {
 
     private static final Logger logger = LogManager.getLogger(PluginsService.class);
diff --git a/server/src/main/java/org/opensearch/plugins/SearchEnginePlugin.java b/server/src/main/java/org/opensearch/plugins/SearchEnginePlugin.java
new file mode 100644
index 0000000000000..e1c68761dd0a7
--- /dev/null
+++ b/server/src/main/java/org/opensearch/plugins/SearchEnginePlugin.java
@@ -0,0 +1,60 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.plugins;
+
+import org.opensearch.cluster.metadata.IndexNameExpressionResolver;
+import org.opensearch.cluster.service.ClusterService;
+import org.opensearch.core.common.io.stream.NamedWriteableRegistry;
+import org.opensearch.core.xcontent.NamedXContentRegistry;
+import org.opensearch.env.Environment;
+import org.opensearch.env.NodeEnvironment;
+import org.opensearch.index.engine.SearchExecEngine;
+import org.opensearch.index.engine.exec.FileMetadata;
+import org.opensearch.index.shard.ShardPath;
+import org.opensearch.repositories.RepositoriesService;
+import org.opensearch.script.ScriptService;
+import org.opensearch.threadpool.ThreadPool;
+import org.opensearch.transport.client.Client;
+import org.opensearch.vectorized.execution.search.DataFormat;
+import org.opensearch.vectorized.execution.search.spi.DataSourceCodec;
+import org.opensearch.watcher.ResourceWatcherService;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.function.Supplier;
+
+public interface SearchEnginePlugin extends SearchPlugin{
+
+    /**
+     * Make dataSourceCodecs available for the DataSourceAwarePlugin(s)
+     */
+    default Collection<Object> createComponents(
+        Client client,
+        ClusterService clusterService,
+        ThreadPool threadPool,
+        ResourceWatcherService resourceWatcherService,
+        ScriptService scriptService,
+        NamedXContentRegistry xContentRegistry,
+        Environment environment,
+        NodeEnvironment nodeEnvironment,
+        NamedWriteableRegistry namedWriteableRegistry,
+        IndexNameExpressionResolver indexNameExpressionResolver,
+        Supplier<RepositoriesService> repositoriesServiceSupplier,
+        Map<DataFormat, DataSourceCodec> dataSourceCodecs
+    ) {
+        return Collections.emptyList();
+    }
+
+    List<DataFormat> getSupportedFormats();
+
+    SearchExecEngine<?,?,?,?> createEngine(DataFormat dataFormat, Collection<FileMetadata> formatCatalogSnapshot, ShardPath shardPath) throws IOException;
+}
diff --git a/server/src/main/java/org/opensearch/search/ContextEngineSearcher.java b/server/src/main/java/org/opensearch/search/ContextEngineSearcher.java
new file mode 100644
index 0000000000000..85809b993b165
--- /dev/null
+++ b/server/src/main/java/org/opensearch/search/ContextEngineSearcher.java
@@ -0,0 +1,31 @@
+package org.opensearch.search;
+
+import org.opensearch.index.engine.EngineSearcher;
+import org.opensearch.search.aggregations.SearchResultsCollector;
+import org.opensearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.List;
+
+/**
+ * Engine-agnostic equivalent of ContextIndexSearcher that wraps EngineSearcher
+ * and provides search context awareness
+ */
+public record ContextEngineSearcher<Q,C>(EngineSearcher<Q,C> engineSearcher,
+                                       SearchContext searchContext) implements EngineSearcher<Q,C> {
+
+    @Override
+    public String source() {
+        return engineSearcher.source();
+    }
+
+    @Override
+    public void search(Q query, List<SearchResultsCollector<C>> collectors) throws IOException {
+        engineSearcher.search(query, collectors);
+    }
+
+    @Override
+    public void close() {
+        engineSearcher.close();
+    }
+}
diff --git a/server/src/main/java/org/opensearch/search/DefaultSearchContext.java b/server/src/main/java/org/opensearch/search/DefaultSearchContext.java
index dda3e203c0667..99371456499b4 100644
--- a/server/src/main/java/org/opensearch/search/DefaultSearchContext.java
+++ b/server/src/main/java/org/opensearch/search/DefaultSearchContext.java
@@ -221,6 +221,7 @@ final class DefaultSearchContext extends SearchContext {
 
     private final boolean isStreamSearch;
     private StreamSearchChannelListener listener;
+    private Map<String, Object[]> dfResults;
 
     DefaultSearchContext(
         ReaderContext readerContext,
@@ -252,7 +253,7 @@ final class DefaultSearchContext extends SearchContext {
         this.indexService = readerContext.indexService();
         this.indexShard = readerContext.indexShard();
         this.clusterService = clusterService;
-        this.engineSearcher = readerContext.acquireSearcher("search");
+        this.engineSearcher = (Engine.Searcher) readerContext.acquireSearcher("search");
         this.concurrentSearchMode = evaluateConcurrentSearchMode(executor);
         this.searcher = new ContextIndexSearcher(
             engineSearcher.getIndexReader(),
@@ -1277,4 +1278,12 @@ public StreamSearchChannelListener getStreamChannelListener() {
     public boolean isStreamSearch() {
         return isStreamSearch;
     }
+
+    public void setDFResults(Map<String, Object[]> dfResults) {
+        this.dfResults = dfResults;
+    }
+
+    public Map<String, Object[]> getDFResults() {
+        return dfResults;
+    }
 }
diff --git a/server/src/main/java/org/opensearch/search/SearchService.java b/server/src/main/java/org/opensearch/search/SearchService.java
index eeb4978d4c1f8..ae2d9682bc4fc 100644
--- a/server/src/main/java/org/opensearch/search/SearchService.java
+++ b/server/src/main/java/org/opensearch/search/SearchService.java
@@ -83,6 +83,8 @@
 import org.opensearch.index.IndexService;
 import org.opensearch.index.IndexSettings;
 import org.opensearch.index.engine.Engine;
+import org.opensearch.index.engine.EngineSearcherSupplier;
+import org.opensearch.index.engine.SearchExecEngine;
 import org.opensearch.index.mapper.DerivedFieldResolver;
 import org.opensearch.index.mapper.DerivedFieldResolverFactory;
 import org.opensearch.index.query.InnerHitContextBuilder;
@@ -99,6 +101,7 @@
 import org.opensearch.indices.IndicesService;
 import org.opensearch.indices.cluster.IndicesClusterStateService.AllocatedIndices.IndexRemovalReason;
 import org.opensearch.node.ResponseCollectorService;
+import org.opensearch.plugins.DataSourcePlugin;
 import org.opensearch.plugins.SearchPlugin;
 import org.opensearch.script.FieldScript;
 import org.opensearch.script.ScriptService;
@@ -423,6 +426,7 @@ public class SearchService extends AbstractLifecycleComponent implements IndexEv
 
     private final FetchPhase fetchPhase;
     private final Collection<ConcurrentSearchRequestDecider.Factory> concurrentSearchDeciderFactories;
+    private final List<DataSourcePlugin> dataSourcePluginList;
 
     private volatile long defaultKeepAlive;
 
@@ -471,7 +475,8 @@ public SearchService(
         Executor indexSearcherExecutor,
         TaskResourceTrackingService taskResourceTrackingService,
         Collection<ConcurrentSearchRequestDecider.Factory> concurrentSearchDeciderFactories,
-        List<SearchPlugin.ProfileMetricsProvider> pluginProfilers
+        List<SearchPlugin.ProfileMetricsProvider> pluginProfilers,
+        List<DataSourcePlugin> dataSourcePluginList
     ) {
         Settings settings = clusterService.getSettings();
         this.threadPool = threadPool;
@@ -499,7 +504,7 @@ public SearchService(
                 this::setPitKeepAlives,
                 this::validatePitKeepAlives
             );
-
+        this.dataSourcePluginList = dataSourcePluginList;
         clusterService.getClusterSettings()
             .addSettingsUpdateConsumer(DEFAULT_KEEPALIVE_SETTING, MAX_KEEPALIVE_SETTING, this::setKeepAlives, this::validateKeepAlives);
 
@@ -803,18 +808,54 @@ private SearchPhaseResult executeQueryPhase(
         boolean isStreamSearch,
         ActionListener<SearchPhaseResult> listener
     ) throws Exception {
+        // Till here things are generic but for datafusion , we need to abstract out and get the read engine specific implementation
+        // it could be reusing existing
         final ReaderContext readerContext = createOrGetReaderContext(request, keepStatesInContext);
+        @SuppressWarnings("unchecked")
+        SearchExecEngine searchExecEngine = readerContext.indexShard()
+            .getIndexingExecutionCoordinator()
+            .getPrimaryReadEngine();
+        SearchShardTarget shardTarget = new SearchShardTarget(
+            clusterService.localNode().getId(),
+            readerContext.indexShard().shardId(),
+            request.getClusterAlias(),
+            OriginalIndices.NONE
+        );
         try (
             Releasable ignored = readerContext.markAsUsed(getKeepAlive(request));
-            SearchContext context = createContext(readerContext, request, task, true, isStreamSearch)
+            // Get engine-specific executor and context
+            // TODO : move this logic to work with Lucene
+
+            SearchContext context = createContext(readerContext, request, task, true, isStreamSearch, searchExecEngine);
+
+            //SearchContext context = createContext(readerContext, request, task, true)
         ) {
+            // TODO : this is not correct - need to tie source to plugin context above
+            //context.aggregations(context1.aggregations());
+            // TODO Execute plan here
+            // TODO : figure out how to tie this
+            byte[] substraitQuery = request.source().queryPlanIR();
+            if (substraitQuery != null) {
+                // setDFResults in context
+                searchExecEngine.executeQueryPhase(context);
+            }
+
             if (isStreamSearch) {
                 assert listener instanceof StreamSearchChannelListener : "Stream search expects StreamSearchChannelListener";
                 context.setStreamChannelListener((StreamSearchChannelListener<SearchPhaseResult, ShardSearchRequest>) listener);
             }
             final long afterQueryTime;
             try (SearchOperationListenerExecutor executor = new SearchOperationListenerExecutor(context)) {
+                // TODO check for this
+//                @SuppressWarnings("unchecked")
+//                QueryPhaseExecutor<SearchContext> queryPhaseExecutor =
+//                    (QueryPhaseExecutor<SearchContext>) searchExecEngine.getQueryPhaseExecutor();
+
+                //QueryPhaseExecutor<?> queryPhaseExecutor = readEngine.getQueryPhaseExecutor();
+//                boolean success = queryPhaseExecutor.execute(context);
                 loadOrExecuteQueryPhase(request, context);
+                //queryPhase.execute(context);
+                // loadOrExecuteQueryPhase(request, context);
                 if (context.queryResult().hasSearchContext() == false && readerContext.singleSession()) {
                     freeReaderContext(readerContext.id());
                 }
@@ -1057,7 +1098,8 @@ final ReaderContext createOrGetReaderContext(ShardSearchRequest request, boolean
         }
         IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
         IndexShard shard = indexService.getShard(request.shardId().id());
-        Engine.SearcherSupplier reader = shard.acquireSearcherSupplier();
+        // TODO acquire search supplier
+        EngineSearcherSupplier<?> reader = shard.acquireSearcherSupplier();
         return createAndPutReaderContext(request, indexService, shard, reader, keepStatesInContext);
     }
 
@@ -1065,7 +1107,7 @@ final ReaderContext createAndPutReaderContext(
         ShardSearchRequest request,
         IndexService indexService,
         IndexShard shard,
-        Engine.SearcherSupplier reader,
+        EngineSearcherSupplier<?> reader,
         boolean keepStatesInContext
     ) {
         assert request.readerId() == null;
@@ -1131,7 +1173,7 @@ public void createPitReaderContext(ShardId shardId, TimeValue keepAlive, ActionL
         final IndexShard shard = indexService.getShard(shardId.id());
         final SearchOperationListener searchOperationListener = shard.getSearchOperationListener();
         shard.awaitShardSearchActive(ignored -> {
-            Engine.SearcherSupplier searcherSupplier = null;
+            EngineSearcherSupplier<?> searcherSupplier = null;
             ReaderContext readerContext = null;
             Releasable decreasePitContexts = openPitContexts::decrementAndGet;
             try {
@@ -1226,7 +1268,7 @@ final SearchContext createContext(
         SearchShardTask task,
         boolean includeAggregations
     ) throws IOException {
-        return createContext(readerContext, request, task, includeAggregations, false);
+        return createContext(readerContext, request, task, includeAggregations, false, null);
     }
 
     private SearchContext createContext(
@@ -1234,9 +1276,18 @@ private SearchContext createContext(
         ShardSearchRequest request,
         SearchShardTask task,
         boolean includeAggregations,
-        boolean isStreamSearch
+        boolean isStreamSearch,
+        SearchExecEngine searchExecEngine
     ) throws IOException {
-        final DefaultSearchContext context = createSearchContext(readerContext, request, defaultSearchTimeout, false, isStreamSearch);
+        //final DefaultSearchContext originalContext = createSearchContext(readerContext, request, defaultSearchTimeout, false, isStreamSearch);
+
+        SearchShardTarget shardTarget = new SearchShardTarget(
+            clusterService.localNode().getId(),
+            readerContext.indexShard().shardId(),
+            request.getClusterAlias(),
+            OriginalIndices.NONE
+        );
+        SearchContext context = searchExecEngine.createContext(readerContext, request, shardTarget, task, bigArrays);
         try {
             if (request.scroll() != null) {
                 context.scrollContext().scroll = request.scroll();
@@ -1265,7 +1316,7 @@ private SearchContext createContext(
     public DefaultSearchContext createSearchContext(ShardSearchRequest request, TimeValue timeout, boolean validate) throws IOException {
         final IndexService indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
         final IndexShard indexShard = indexService.getShard(request.shardId().getId());
-        final Engine.SearcherSupplier reader = indexShard.acquireSearcherSupplier();
+        final EngineSearcherSupplier<?> reader = indexShard.acquireSearcherSupplier();
         final ShardSearchContextId id = new ShardSearchContextId(sessionId, idGenerator.incrementAndGet());
         try (ReaderContext readerContext = new ReaderContext(id, indexService, indexShard, reader, -1L, true)) {
             DefaultSearchContext searchContext = createSearchContext(readerContext, request, timeout, validate);
@@ -1502,10 +1553,10 @@ private void processFailure(ReaderContext context, Exception exc) {
         }
     }
 
-    private void parseSource(DefaultSearchContext context, SearchSourceBuilder source, boolean includeAggregations) {
+    private void parseSource(SearchContext context, SearchSourceBuilder source, boolean includeAggregations) {
         // nothing to parse...
         if (source == null) {
-            context.evaluateRequestShouldUseConcurrentSearch();
+           // context.evaluateRequestShouldUseConcurrentSearch(); // TODO : specific to default search context
             return;
         }
 
@@ -1662,7 +1713,7 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc
             if (context.scrollContext() == null && !(context.readerContext() instanceof PitReaderContext)) {
                 throw new SearchException(shardTarget, "`slice` cannot be used outside of a scroll context or PIT context");
             }
-            context.sliceBuilder(source.slice());
+            // context.sliceBuilder(source.slice());  // TODO : specific to default search context
         }
 
         if (source.storedFields() != null) {
@@ -1696,13 +1747,13 @@ private void parseSource(DefaultSearchContext context, SearchSourceBuilder sourc
             final CollapseContext collapseContext = source.collapse().build(queryShardContext);
             context.collapse(collapseContext);
         }
-        context.evaluateRequestShouldUseConcurrentSearch();
+        // context.evaluateRequestShouldUseConcurrentSearch();  // TODO : specific to default search context
         if (source.profile()) {
             final Function<Query, Collection<Supplier<ProfileMetric>>> pluginProfileMetricsSupplier = (query) -> pluginProfilers.stream()
                 .flatMap(p -> p.getQueryProfileMetrics(context, query).stream())
                 .toList();
             Profilers profilers = new Profilers(context.searcher(), context.shouldUseConcurrentSearch(), pluginProfileMetricsSupplier);
-            context.setProfilers(profilers);
+            // context.setProfilers(profilers); // TODO : specific to default search context
         }
 
         if (context.getStarTreeIndexEnabled() && StarTreeQueryHelper.isStarTreeSupported(context)) {
@@ -1820,7 +1871,7 @@ private CanMatchResponse canMatch(ShardSearchRequest request, boolean checkRefre
             final boolean hasRefreshPending;
             if (readerContext != null) {
                 indexService = readerContext.indexService();
-                canMatchSearcher = readerContext.acquireSearcher(Engine.CAN_MATCH_SEARCH_SOURCE);
+                canMatchSearcher = (Engine.Searcher) readerContext.acquireSearcher(Engine.CAN_MATCH_SEARCH_SOURCE);
                 hasRefreshPending = false;
             } else {
                 indexService = indicesService.indexServiceSafe(request.shardId().getIndex());
diff --git a/server/src/main/java/org/opensearch/search/aggregations/SearchResultsCollector.java b/server/src/main/java/org/opensearch/search/aggregations/SearchResultsCollector.java
new file mode 100644
index 0000000000000..836fa4509531f
--- /dev/null
+++ b/server/src/main/java/org/opensearch/search/aggregations/SearchResultsCollector.java
@@ -0,0 +1,24 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.search.aggregations;
+import org.opensearch.common.annotation.ExperimentalApi;
+
+/**
+ * Experimental
+ * @opensearch.internal
+ */
+// TODO : account for sub collectors
+@ExperimentalApi
+public interface SearchResultsCollector<T> {
+
+    /**
+     * collect
+     */
+    void collect(T value);
+}
diff --git a/server/src/main/java/org/opensearch/search/aggregations/ShardResultConvertor.java b/server/src/main/java/org/opensearch/search/aggregations/ShardResultConvertor.java
new file mode 100644
index 0000000000000..5568b7051246b
--- /dev/null
+++ b/server/src/main/java/org/opensearch/search/aggregations/ShardResultConvertor.java
@@ -0,0 +1,18 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.search.aggregations;
+
+import java.util.List;
+import java.util.Map;
+
+public interface ShardResultConvertor {
+
+    List<InternalAggregation> convert(Map<String, Object[]> shardResult);
+
+}
diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java
index 5f99a9cc05558..2ad44cd33aa74 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/AvgAggregator.java
@@ -51,6 +51,7 @@
 import org.opensearch.search.aggregations.InternalAggregation;
 import org.opensearch.search.aggregations.LeafBucketCollector;
 import org.opensearch.search.aggregations.LeafBucketCollectorBase;
+import org.opensearch.search.aggregations.ShardResultConvertor;
 import org.opensearch.search.aggregations.StarTreeBucketCollector;
 import org.opensearch.search.aggregations.StarTreePreComputeCollector;
 import org.opensearch.search.aggregations.support.ValuesSource;
@@ -59,6 +60,8 @@
 import org.opensearch.search.startree.StarTreeQueryHelper;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;
 
 import static org.opensearch.search.startree.StarTreeQueryHelper.getStarTreeFilteredValues;
@@ -69,7 +72,7 @@
  *
  * @opensearch.internal
  */
-class AvgAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector {
+class AvgAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector, ShardResultConvertor {
 
     final ValuesSource.Numeric valuesSource;
 
@@ -275,4 +278,15 @@ public void collectStarTreeEntry(int starTreeEntryBit, long bucket) throws IOExc
             }
         };
     }
+
+    @Override
+    public List<InternalAggregation> convert(Map<String, Object[]> shardResult) {
+        Object[] counts = shardResult.get(name + "_count");
+        Object[] sums = shardResult.get(name + "_sum");
+        List<InternalAggregation> results = new ArrayList<>(counts.length);
+        for (int i = 0; i < counts.length; i++) {
+            results.add(new InternalAvg(name, (Long) counts[i], (Long) sums[i], format, metadata()));
+        }
+        return results;
+    }
 }
diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java
index 93192411ea0f8..341f905e78ef0 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/MaxAggregator.java
@@ -51,6 +51,7 @@
 import org.opensearch.search.aggregations.InternalAggregation;
 import org.opensearch.search.aggregations.LeafBucketCollector;
 import org.opensearch.search.aggregations.LeafBucketCollectorBase;
+import org.opensearch.search.aggregations.ShardResultConvertor;
 import org.opensearch.search.aggregations.StarTreeBucketCollector;
 import org.opensearch.search.aggregations.StarTreePreComputeCollector;
 import org.opensearch.search.aggregations.support.ValuesSource;
@@ -59,7 +60,9 @@
 import org.opensearch.search.startree.StarTreeQueryHelper;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Function;
@@ -71,7 +74,7 @@
  *
  * @opensearch.internal
  */
-class MaxAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector {
+class MaxAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector, ShardResultConvertor {
 
     final ValuesSource.Numeric valuesSource;
     final DocValueFormat formatter;
@@ -280,4 +283,14 @@ public StarTreeBucketCollector getStarTreeBucketCollector(
     public void doReset() {
         maxes.fill(0, maxes.size(), Double.NEGATIVE_INFINITY);
     }
+
+    @Override
+    public List<InternalAggregation> convert(Map<String, Object[]> shardResult) {
+            Object[] values = shardResult.get(name);
+            List<InternalAggregation> results = new ArrayList<>(values.length);
+            for (Object value : values) {
+                results.add(new InternalMax(name, (Long) value, formatter, metadata()));
+            }
+            return results;
+    }
 }
diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java
index 22749382216dd..3652e36453263 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/MinAggregator.java
@@ -51,6 +51,7 @@
 import org.opensearch.search.aggregations.InternalAggregation;
 import org.opensearch.search.aggregations.LeafBucketCollector;
 import org.opensearch.search.aggregations.LeafBucketCollectorBase;
+import org.opensearch.search.aggregations.ShardResultConvertor;
 import org.opensearch.search.aggregations.StarTreeBucketCollector;
 import org.opensearch.search.aggregations.StarTreePreComputeCollector;
 import org.opensearch.search.aggregations.support.ValuesSource;
@@ -59,6 +60,8 @@
 import org.opensearch.search.startree.StarTreeQueryHelper;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;
 import java.util.concurrent.atomic.AtomicReference;
 import java.util.function.Function;
@@ -70,7 +73,7 @@
  *
  * @opensearch.internal
  */
-class MinAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector {
+class MinAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector, ShardResultConvertor {
     private static final int MAX_BKD_LOOKUPS = 1024;
 
     final ValuesSource.Numeric valuesSource;
@@ -271,4 +274,14 @@ public StarTreeBucketCollector getStarTreeBucketCollector(
             (bucket, metricValue) -> mins.set(bucket, Math.min(mins.get(bucket), NumericUtils.sortableLongToDouble(metricValue)))
         );
     }
+
+    @Override
+    public List<InternalAggregation> convert(Map<String, Object[]> shardResult) {
+            Object[] values = shardResult.get(name);
+            List<InternalAggregation> results = new ArrayList<>(values.length);
+            for (Object value : values) {
+                results.add(new InternalMin(name, (Long) value, format, metadata()));
+            }
+            return results;
+    }
 }
diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java
index ba32592f75ea1..0a611329a2fa8 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/SumAggregator.java
@@ -45,6 +45,7 @@
 import org.opensearch.search.aggregations.InternalAggregation;
 import org.opensearch.search.aggregations.LeafBucketCollector;
 import org.opensearch.search.aggregations.LeafBucketCollectorBase;
+import org.opensearch.search.aggregations.ShardResultConvertor;
 import org.opensearch.search.aggregations.StarTreeBucketCollector;
 import org.opensearch.search.aggregations.StarTreePreComputeCollector;
 import org.opensearch.search.aggregations.support.ValuesSource;
@@ -53,6 +54,8 @@
 import org.opensearch.search.startree.StarTreeQueryHelper;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;
 
 import static org.opensearch.search.startree.StarTreeQueryHelper.getSupportedStarTree;
@@ -62,7 +65,7 @@
  *
  * @opensearch.internal
  */
-public class SumAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector {
+public class SumAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector, ShardResultConvertor {
 
     private final ValuesSource.Numeric valuesSource;
     private final DocValueFormat format;
@@ -215,4 +218,14 @@ public InternalAggregation buildEmptyAggregation() {
     public void doClose() {
         Releasables.close(sums, compensations);
     }
+
+    @Override
+    public List<InternalAggregation> convert(Map<String, Object[]> shardResult) {
+            Object[] values = shardResult.get(name);
+            List<InternalAggregation> results = new ArrayList<>(values.length);
+            for (Object value : values) {
+                results.add(new InternalSum(name, (Long) value, format, metadata()));
+            }
+            return results;
+    }
 }
diff --git a/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java
index 3541753d94e6f..76c5bb31fd166 100644
--- a/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java
+++ b/server/src/main/java/org/opensearch/search/aggregations/metrics/ValueCountAggregator.java
@@ -45,6 +45,7 @@
 import org.opensearch.search.aggregations.InternalAggregation;
 import org.opensearch.search.aggregations.LeafBucketCollector;
 import org.opensearch.search.aggregations.LeafBucketCollectorBase;
+import org.opensearch.search.aggregations.ShardResultConvertor;
 import org.opensearch.search.aggregations.StarTreeBucketCollector;
 import org.opensearch.search.aggregations.StarTreePreComputeCollector;
 import org.opensearch.search.aggregations.support.ValuesSource;
@@ -53,6 +54,8 @@
 import org.opensearch.search.startree.StarTreeQueryHelper;
 
 import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
 import java.util.Map;
 
 import static org.opensearch.search.startree.StarTreeQueryHelper.getSupportedStarTree;
@@ -65,7 +68,7 @@
  *
  * @opensearch.internal
  */
-public class ValueCountAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector {
+public class ValueCountAggregator extends NumericMetricsAggregator.SingleValue implements StarTreePreComputeCollector, ShardResultConvertor {
 
     final ValuesSource valuesSource;
 
@@ -209,4 +212,14 @@ public StarTreeBucketCollector getStarTreeBucketCollector(
             (bucket, metricValue) -> counts.increment(bucket, metricValue)
         );
     }
+
+    @Override
+    public List<InternalAggregation> convert(Map<String, Object[]> shardResult) {
+            Object[] values = shardResult.get(name);
+            List<InternalAggregation> results = new ArrayList<>(values.length);
+            for (Object value : values) {
+                results.add(new InternalValueCount(name, (Long) value, metadata()));
+            }
+            return results;
+    }
 }
diff --git a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java
index 90dfc1e086602..442d81f585015 100644
--- a/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java
+++ b/server/src/main/java/org/opensearch/search/builder/SearchSourceBuilder.java
@@ -42,6 +42,8 @@
 import org.opensearch.core.ParseField;
 import org.opensearch.core.common.ParsingException;
 import org.opensearch.core.common.Strings;
+import org.opensearch.core.common.bytes.BytesArray;
+import org.opensearch.core.common.bytes.BytesReference;
 import org.opensearch.core.common.io.stream.StreamInput;
 import org.opensearch.core.common.io.stream.StreamOutput;
 import org.opensearch.core.common.io.stream.Writeable;
@@ -78,6 +80,7 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
 import java.util.Map;
@@ -137,6 +140,7 @@ public final class SearchSourceBuilder implements Writeable, ToXContentObject, R
     public static final ParseField POINT_IN_TIME = new ParseField("pit");
     public static final ParseField SEARCH_PIPELINE = new ParseField("search_pipeline");
     public static final ParseField VERBOSE_SEARCH_PIPELINE = new ParseField("verbose_pipeline");
+    public static final ParseField QUERY_PLAN_IR = new ParseField("query_plan_ir");
 
     public static SearchSourceBuilder fromXContent(XContentParser parser) throws IOException {
         return fromXContent(parser, true);
@@ -229,6 +233,8 @@ public static HighlightBuilder highlight() {
 
     private boolean verbosePipeline = false;
 
+    private byte[] queryPlanIR;
+
     /**
      * Constructs a new search source builder.
      */
@@ -308,6 +314,10 @@ public SearchSourceBuilder(StreamInput in) throws IOException {
         if (in.getVersion().onOrAfter(Version.V_2_19_0)) {
             verbosePipeline = in.readBoolean();
         }
+        if (in.getVersion().onOrAfter(Version.V_3_0_0)) {
+            BytesReference bytesRef = in.readOptionalBytesReference();
+            queryPlanIR = bytesRef != null ? BytesReference.toBytes(bytesRef) : null;
+        }
     }
 
     @Override
@@ -394,6 +404,9 @@ public void writeTo(StreamOutput out) throws IOException {
         if (out.getVersion().onOrAfter(Version.V_2_19_0)) {
             out.writeBoolean(verbosePipeline);
         }
+        if (out.getVersion().onOrAfter(Version.V_3_0_0)) {
+            out.writeOptionalBytesReference(queryPlanIR != null ? new BytesArray(queryPlanIR) : null);
+        }
     }
 
     /**
@@ -1171,6 +1184,21 @@ public Boolean verbosePipeline() {
         return verbosePipeline;
     }
 
+    /**
+     * Sets the query plan intermediate representation for this search request.
+     */
+    public SearchSourceBuilder queryPlanIR(byte[] queryPlanIR) {
+        this.queryPlanIR = queryPlanIR;
+        return this;
+    }
+
+    /**
+     * Gets the query plan intermediate representation for this search request.
+     */
+    public byte[] queryPlanIR() {
+        return queryPlanIR;
+    }
+
     /**
      * Rewrites this search source builder into its primitive form. e.g. by
      * rewriting the QueryBuilder. If the builder did not change the identity
@@ -1270,6 +1298,7 @@ private SearchSourceBuilder shallowCopy(
         rewrittenBuilder.derivedFields = derivedFields;
         rewrittenBuilder.searchPipeline = searchPipeline;
         rewrittenBuilder.verbosePipeline = verbosePipeline;
+        rewrittenBuilder.queryPlanIR = queryPlanIR;
         return rewrittenBuilder;
     }
 
@@ -1341,6 +1370,8 @@ public void parseXContent(XContentParser parser, boolean checkTrailingTokens) th
                     searchPipeline = parser.text();
                 } else if (VERBOSE_SEARCH_PIPELINE.match(currentFieldName, parser.getDeprecationHandler())) {
                     verbosePipeline = parser.booleanValue();
+                } else if (QUERY_PLAN_IR.match(currentFieldName, parser.getDeprecationHandler())) {
+                    queryPlanIR = parser.binaryValue();
                 } else {
                     throw new ParsingException(
                         parser.getTokenLocation(),
@@ -1678,6 +1709,10 @@ public XContentBuilder innerToXContent(XContentBuilder builder, Params params) t
             builder.field(VERBOSE_SEARCH_PIPELINE.getPreferredName(), verbosePipeline);
         }
 
+        if (queryPlanIR != null) {
+            builder.field(QUERY_PLAN_IR.getPreferredName(), queryPlanIR);
+        }
+
         return builder;
     }
 
@@ -1957,7 +1992,8 @@ public int hashCode() {
             derivedFieldsObject,
             derivedFields,
             searchPipeline,
-            verbosePipeline
+            verbosePipeline,
+            Arrays.hashCode(queryPlanIR)
         );
     }
 
@@ -2004,7 +2040,8 @@ public boolean equals(Object obj) {
             && Objects.equals(derivedFieldsObject, other.derivedFieldsObject)
             && Objects.equals(derivedFields, other.derivedFields)
             && Objects.equals(searchPipeline, other.searchPipeline)
-            && Objects.equals(verbosePipeline, other.verbosePipeline);
+            && Objects.equals(verbosePipeline, other.verbosePipeline)
+            && Arrays.equals(queryPlanIR, other.queryPlanIR);
     }
 
     @Override
diff --git a/server/src/main/java/org/opensearch/search/internal/LegacyReaderContext.java b/server/src/main/java/org/opensearch/search/internal/LegacyReaderContext.java
index 05ab12d5ae809..4a4b96113930c 100644
--- a/server/src/main/java/org/opensearch/search/internal/LegacyReaderContext.java
+++ b/server/src/main/java/org/opensearch/search/internal/LegacyReaderContext.java
@@ -34,6 +34,8 @@
 
 import org.opensearch.index.IndexService;
 import org.opensearch.index.engine.Engine;
+import org.opensearch.index.engine.EngineSearcher;
+import org.opensearch.index.engine.EngineSearcherSupplier;
 import org.opensearch.index.shard.IndexShard;
 import org.opensearch.search.RescoreDocIds;
 import org.opensearch.search.dfs.AggregatedDfs;
@@ -57,7 +59,7 @@ public LegacyReaderContext(
         ShardSearchContextId id,
         IndexService indexService,
         IndexShard indexShard,
-        Engine.SearcherSupplier reader,
+        EngineSearcherSupplier<?> reader,
         ShardSearchRequest shardSearchRequest,
         long keepAliveInMillis
     ) {
@@ -70,7 +72,7 @@ public LegacyReaderContext(
             // to reuse the searcher created on the request that initialized the scroll.
             // This ensures that we wrap the searcher's reader with the user's permissions
             // when they are available.
-            final Engine.Searcher delegate = searcherSupplier.acquireSearcher("search");
+            final Engine.Searcher delegate = (Engine.Searcher) searcherSupplier.acquireSearcher("search");
             addOnClose(delegate);
             // wrap the searcher so that closing is a noop, the actual closing happens when this context is closed
             this.searcher = new Engine.Searcher(
@@ -89,7 +91,7 @@ public LegacyReaderContext(
     }
 
     @Override
-    public Engine.Searcher acquireSearcher(String source) {
+    public EngineSearcher<?,?> acquireSearcher(String source) {
         if (scrollContext != null) {
             assert Engine.SEARCH_SOURCE.equals(source) : "scroll context should not acquire searcher for " + source;
             return searcher;
diff --git a/server/src/main/java/org/opensearch/search/internal/PitReaderContext.java b/server/src/main/java/org/opensearch/search/internal/PitReaderContext.java
index 5c2a9f82f98e4..b09f40f35172f 100644
--- a/server/src/main/java/org/opensearch/search/internal/PitReaderContext.java
+++ b/server/src/main/java/org/opensearch/search/internal/PitReaderContext.java
@@ -14,6 +14,7 @@
 import org.opensearch.common.lease.Releasables;
 import org.opensearch.index.IndexService;
 import org.opensearch.index.engine.Engine;
+import org.opensearch.index.engine.EngineSearcherSupplier;
 import org.opensearch.index.engine.Segment;
 import org.opensearch.index.shard.IndexShard;
 
@@ -43,7 +44,7 @@ public PitReaderContext(
         ShardSearchContextId id,
         IndexService indexService,
         IndexShard indexShard,
-        Engine.SearcherSupplier searcherSupplier,
+        EngineSearcherSupplier<?> searcherSupplier,
         long keepAliveInMillis,
         boolean singleSession
     ) {
diff --git a/server/src/main/java/org/opensearch/search/internal/ReaderContext.java b/server/src/main/java/org/opensearch/search/internal/ReaderContext.java
index 776e92d325ae4..1293032f7932e 100644
--- a/server/src/main/java/org/opensearch/search/internal/ReaderContext.java
+++ b/server/src/main/java/org/opensearch/search/internal/ReaderContext.java
@@ -38,6 +38,8 @@
 import org.opensearch.common.util.concurrent.AbstractRefCounted;
 import org.opensearch.index.IndexService;
 import org.opensearch.index.engine.Engine;
+import org.opensearch.index.engine.EngineSearcher;
+import org.opensearch.index.engine.EngineSearcherSupplier;
 import org.opensearch.index.shard.IndexShard;
 import org.opensearch.search.RescoreDocIds;
 import org.opensearch.search.dfs.AggregatedDfs;
@@ -65,7 +67,7 @@ public class ReaderContext implements Releasable {
     private final ShardSearchContextId id;
     private final IndexService indexService;
     private final IndexShard indexShard;
-    protected final Engine.SearcherSupplier searcherSupplier;
+    protected final EngineSearcherSupplier<?> searcherSupplier;
     private final AtomicBoolean closed = new AtomicBoolean(false);
     private final boolean singleSession;
 
@@ -84,7 +86,7 @@ public ReaderContext(
         ShardSearchContextId id,
         IndexService indexService,
         IndexShard indexShard,
-        Engine.SearcherSupplier searcherSupplier,
+        EngineSearcherSupplier<?> searcherSupplier,
         long keepAliveInMillis,
         boolean singleSession
     ) {
@@ -150,7 +152,7 @@ public IndexShard indexShard() {
         return indexShard;
     }
 
-    public Engine.Searcher acquireSearcher(String source) {
+    public EngineSearcher<?,?> acquireSearcher(String source) {
         return searcherSupplier.acquireSearcher(source);
     }
 
diff --git a/server/src/main/java/org/opensearch/search/internal/SearchContext.java b/server/src/main/java/org/opensearch/search/internal/SearchContext.java
index 4eadd8817a5c3..ec392b4e0cf9b 100644
--- a/server/src/main/java/org/opensearch/search/internal/SearchContext.java
+++ b/server/src/main/java/org/opensearch/search/internal/SearchContext.java
@@ -83,6 +83,7 @@
 import org.opensearch.search.suggest.SuggestionSearchContext;
 
 import java.util.Collection;
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
@@ -561,4 +562,12 @@ public StreamSearchChannelListener<SearchPhaseResult, ShardSearchRequest> getStr
     public boolean isStreamSearch() {
         return false;
     }
+
+    public void setDFResults(Map<String, Object[]> dfResults) {
+
+    }
+
+    public Map<String, Object[]> getDFResults() {
+        return Collections.emptyMap();
+    }
 }
diff --git a/server/src/main/java/org/opensearch/search/query/GenericQueryPhase.java b/server/src/main/java/org/opensearch/search/query/GenericQueryPhase.java
new file mode 100644
index 0000000000000..533ef9b328c99
--- /dev/null
+++ b/server/src/main/java/org/opensearch/search/query/GenericQueryPhase.java
@@ -0,0 +1,25 @@
+package org.opensearch.search.query;
+
+import java.util.LinkedList;
+
+/**
+ * Generic query phase that can work with different context and searcher types
+ * @param <C> Context type
+ * @param <S> Searcher type
+ * @param <Q> Query type
+ */
+public class GenericQueryPhase<C, S, Q> {
+    private final GenericQueryPhaseSearcher<C, S, Q> queryPhaseSearcher;
+
+    public GenericQueryPhase(GenericQueryPhaseSearcher<C, S, Q> queryPhaseSearcher) {
+        this.queryPhaseSearcher = queryPhaseSearcher;
+    }
+
+    public boolean executeInternal(C context, S searcher, Q query) throws QueryPhaseExecutionException {
+        try {
+            return queryPhaseSearcher.searchWith(context, searcher, query, new LinkedList<>() /* Figure out how to pass collectors */, false, false);
+        } catch (Exception e) {
+            throw new QueryPhaseExecutionException(null, "Failed to execute query", e);
+        }
+    }
+}
diff --git a/server/src/main/java/org/opensearch/search/query/GenericQueryPhaseSearcher.java b/server/src/main/java/org/opensearch/search/query/GenericQueryPhaseSearcher.java
new file mode 100644
index 0000000000000..65a8c9a6b6ff5
--- /dev/null
+++ b/server/src/main/java/org/opensearch/search/query/GenericQueryPhaseSearcher.java
@@ -0,0 +1,31 @@
+package org.opensearch.search.query;
+
+import org.opensearch.common.annotation.ExperimentalApi;
+import org.opensearch.search.aggregations.AggregationProcessor;
+
+import java.io.IOException;
+import java.util.LinkedList;
+
+/**
+ * Generic query phase searcher that can work with different context and searcher types
+ * @param <C> Context type (SearchContext for Lucene, EngineReaderContext for DataFusion)
+ * @param <S> Searcher type (ContextIndexSearcher for Lucene, ContextEngineSearcher for DataFusion)
+ * @param <Q> Query type (Query for Lucene, byte[] for DataFusion Substrait)
+ */
+// TODO make this part of QueryPhaseSearcher
+    @ExperimentalApi
+public interface GenericQueryPhaseSearcher<C, S, Q> {
+
+    boolean searchWith(
+        C context,
+        S searcher,
+        Q query,
+        LinkedList<QueryCollectorContext> collectors,
+        boolean hasFilterCollector,
+        boolean hasTimeout
+    ) throws IOException;
+
+    default AggregationProcessor aggregationProcessor(C context) {
+        return new org.opensearch.search.aggregations.DefaultAggregationProcessor();
+    }
+}
diff --git a/server/src/main/java/org/opensearch/search/query/LuceneQueryPhaseExecutor.java b/server/src/main/java/org/opensearch/search/query/LuceneQueryPhaseExecutor.java
new file mode 100644
index 0000000000000..59493a8991733
--- /dev/null
+++ b/server/src/main/java/org/opensearch/search/query/LuceneQueryPhaseExecutor.java
@@ -0,0 +1,19 @@
+package org.opensearch.search.query;
+
+import org.opensearch.search.internal.SearchContext;
+
+/**
+ * Lucene-specific query phase executor
+ */
+public class LuceneQueryPhaseExecutor implements QueryPhaseExecutor<SearchContext> {
+    
+    @Override
+    public boolean execute(SearchContext context) throws QueryPhaseExecutionException {
+        return QueryPhase.executeInternal(context);
+    }
+    
+    @Override
+    public boolean canHandle(SearchContext context) {
+        return context != null;
+    }
+}
diff --git a/server/src/main/java/org/opensearch/search/query/QueryExecutionContext.java b/server/src/main/java/org/opensearch/search/query/QueryExecutionContext.java
new file mode 100644
index 0000000000000..f1501458f5211
--- /dev/null
+++ b/server/src/main/java/org/opensearch/search/query/QueryExecutionContext.java
@@ -0,0 +1,13 @@
+package org.opensearch.search.query;
+
+/**
+ * Common interface for query execution contexts
+ */
+public interface QueryExecutionContext {
+    
+    /**
+     * Execute query phase for this context
+     * @return whether rescoring phase should be executed
+     */
+    boolean executeQueryPhase() throws QueryPhaseExecutionException;
+}
diff --git a/server/src/main/java/org/opensearch/search/query/QueryPhase.java b/server/src/main/java/org/opensearch/search/query/QueryPhase.java
index f8427440a6c13..25cceae77bfd5 100644
--- a/server/src/main/java/org/opensearch/search/query/QueryPhase.java
+++ b/server/src/main/java/org/opensearch/search/query/QueryPhase.java
@@ -60,6 +60,7 @@
 import org.opensearch.search.aggregations.AggregationProcessor;
 import org.opensearch.search.aggregations.DefaultAggregationProcessor;
 import org.opensearch.search.aggregations.GlobalAggCollectorManager;
+import org.opensearch.search.aggregations.InternalAggregations;
 import org.opensearch.search.internal.ContextIndexSearcher;
 import org.opensearch.search.internal.ScrollContext;
 import org.opensearch.search.internal.SearchContext;
@@ -98,6 +99,7 @@ public class QueryPhase {
     // TODO: remove this property
     public static final boolean SYS_PROP_REWRITE_SORT = Booleans.parseBoolean(System.getProperty("opensearch.search.rewrite_sort", "true"));
     public static final QueryPhaseSearcher DEFAULT_QUERY_PHASE_SEARCHER = new DefaultQueryPhaseSearcher();
+
     private final QueryPhaseSearcher queryPhaseSearcher;
     private final SuggestProcessor suggestProcessor;
     private final RescoreProcessor rescoreProcessor;
@@ -148,18 +150,29 @@ public void execute(SearchContext searchContext) throws QueryPhaseExecutionExcep
             LOGGER.trace("{}", new SearchContextSourcePrinter(searchContext));
         }
 
+        // Keeping AggregationProcessor and preProcess uncommented since it builds aggregation nesting
         final AggregationProcessor aggregationProcessor = queryPhaseSearcher.aggregationProcessor(searchContext);
         // Pre-process aggregations as late as possible. In the case of a DFS_Q_T_F
         // request, preProcess is called on the DFS phase phase, this is why we pre-process them
         // here to make sure it happens during the QUERY phase
         aggregationProcessor.preProcess(searchContext);
-        boolean rescore = executeInternal(searchContext, queryPhaseSearcher);
 
-        if (rescore) { // only if we do a regular search
-            rescoreProcessor.process(searchContext);
-        }
-        suggestProcessor.process(searchContext);
-        aggregationProcessor.postProcess(searchContext);
+        searchContext.queryResult()
+            .topDocs(
+                new TopDocsAndMaxScore(new TopDocs(new TotalHits(0, TotalHits.Relation.EQUAL_TO), Lucene.EMPTY_SCORE_DOCS), Float.NaN),
+                new DocValueFormat[0]
+            );
+
+        // boolean rescore = executeInternal(searchContext, queryPhaseSearcher);
+
+        // Post process
+        SearchEngineResultConversionUtils.convertDFResultGeneric(searchContext);
+
+        // if (rescore) { // only if we do a regular search
+        // rescoreProcessor.process(searchContext);
+        // }
+        // suggestProcessor.process(searchContext);
+         aggregationProcessor.postProcess(searchContext);
 
         if (searchContext.getProfilers() != null) {
             ProfileShardResult shardResults = SearchProfileShardResults.buildShardResults(
diff --git a/server/src/main/java/org/opensearch/search/query/QueryPhaseExecutor.java b/server/src/main/java/org/opensearch/search/query/QueryPhaseExecutor.java
new file mode 100644
index 0000000000000..f9ae60a5c2bfa
--- /dev/null
+++ b/server/src/main/java/org/opensearch/search/query/QueryPhaseExecutor.java
@@ -0,0 +1,15 @@
+package org.opensearch.search.query;
+
+import org.opensearch.common.annotation.ExperimentalApi;
+import org.opensearch.search.internal.SearchContext;
+
+/**
+ * Strategy interface for executing query phases across different engines
+ */
+@ExperimentalApi
+public interface QueryPhaseExecutor<C extends SearchContext> {
+
+    boolean execute(C context) throws QueryPhaseExecutionException;
+
+    boolean canHandle(C context);
+}
diff --git a/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcher.java b/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcher.java
index 38e45a5212c81..790558db5228d 100644
--- a/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcher.java
+++ b/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcher.java
@@ -23,6 +23,8 @@
  * The extension point which allows to plug in custom search implementation to be
  * used at {@link QueryPhase}.
  *
+ * TODO : Change this ? query phase searcher shouldn't rely on Lucene
+ *
  * @opensearch.api
  */
 @PublicApi(since = "2.0.0")
diff --git a/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcherWrapper.java b/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcherWrapper.java
index 19a59e9f7bebe..80ed92500fc49 100644
--- a/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcherWrapper.java
+++ b/server/src/main/java/org/opensearch/search/query/QueryPhaseSearcherWrapper.java
@@ -54,11 +54,13 @@ public boolean searchWith(
         boolean hasFilterCollector,
         boolean hasTimeout
     ) throws IOException {
-        if (searchContext.shouldUseConcurrentSearch()) {
-            return concurrentQueryPhaseSearcher.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout);
-        } else {
-            return defaultQueryPhaseSearcher.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout);
-        }
+        // if (searchContext.shouldUseConcurrentSearch()) {
+        // return concurrentQueryPhaseSearcher.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout);
+        // } else {
+        // return defaultQueryPhaseSearcher.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout);
+        // }
+        //
+        return defaultQueryPhaseSearcher.searchWith(searchContext, searcher, query, collectors, hasFilterCollector, hasTimeout);
     }
 
     /**
@@ -68,10 +70,11 @@ public boolean searchWith(
      */
     @Override
     public AggregationProcessor aggregationProcessor(SearchContext searchContext) {
-        if (searchContext.shouldUseConcurrentSearch()) {
-            return concurrentQueryPhaseSearcher.aggregationProcessor(searchContext);
-        } else {
-            return defaultQueryPhaseSearcher.aggregationProcessor(searchContext);
-        }
+        // if (searchContext.shouldUseConcurrentSearch()) {
+        // return concurrentQueryPhaseSearcher.aggregationProcessor(searchContext);
+        // } else {
+        // return defaultQueryPhaseSearcher.aggregationProcessor(searchContext);
+        // }
+        return defaultQueryPhaseSearcher.aggregationProcessor(searchContext);
     }
 }
diff --git a/server/src/main/java/org/opensearch/search/query/SearchEngineResultConversionUtils.java b/server/src/main/java/org/opensearch/search/query/SearchEngineResultConversionUtils.java
new file mode 100644
index 0000000000000..9e9ac280453e3
--- /dev/null
+++ b/server/src/main/java/org/opensearch/search/query/SearchEngineResultConversionUtils.java
@@ -0,0 +1,64 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ *
+ * The OpenSearch Contributors require contributions made to
+ * this file be licensed under the Apache-2.0 license or a
+ * compatible open source license.
+ */
+
+package org.opensearch.search.query;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.opensearch.search.aggregations.Aggregator;
+import org.opensearch.search.aggregations.InternalAggregations;
+import org.opensearch.search.aggregations.ShardResultConvertor;
+import org.opensearch.search.internal.SearchContext;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+
+public class SearchEngineResultConversionUtils {
+
+    private static final Logger LOGGER = LogManager.getLogger(SearchEngineResultConversionUtils.class);
+
+    public static void convertDFResultGeneric(SearchContext searchContext) {
+        if (searchContext.aggregations() != null) {
+            Map<String, Object[]> dfResult = searchContext.getDFResults();
+
+            // Create aggregators which will process the result from DataFusion
+            try {
+
+                List<Aggregator> aggregators = new ArrayList<>();
+
+                if (searchContext.aggregations().factories().hasGlobalAggregator()) {
+                    aggregators.addAll(searchContext.aggregations().factories().createTopLevelGlobalAggregators(searchContext));
+                }
+
+                if (searchContext.aggregations().factories().hasNonGlobalAggregator()) {
+                    aggregators.addAll(searchContext.aggregations().factories().createTopLevelNonGlobalAggregators(searchContext));
+                }
+
+                List<ShardResultConvertor> shardResultConvertors = aggregators.stream().map(x -> {
+                    if (x instanceof ShardResultConvertor) {
+                        return ((ShardResultConvertor) x);
+                    } else {
+                        throw new UnsupportedOperationException("Aggregator doesn't support converting results from shard: " + x);
+                    }
+                }).toList();
+
+                InternalAggregations internalAggregations = InternalAggregations.from(
+                    shardResultConvertors.stream().flatMap(x -> x.convert(dfResult).stream()).collect(Collectors.toList())
+                );
+                LOGGER.info("Internal Aggregations converted {}", internalAggregations.asMap());
+                searchContext.queryResult().aggregations(internalAggregations);
+            } catch (IOException e) {
+                throw new RuntimeException(e);
+            }
+        }
+    }
+
+}
diff --git a/server/src/test/java/org/opensearch/index/IndexModuleTests.java b/server/src/test/java/org/opensearch/index/IndexModuleTests.java
index 29dd60c3e638f..6bb68a263b46a 100644
--- a/server/src/test/java/org/opensearch/index/IndexModuleTests.java
+++ b/server/src/test/java/org/opensearch/index/IndexModuleTests.java
@@ -270,7 +270,9 @@ private IndexService newIndexService(IndexModule module) throws IOException {
             DefaultRemoteStoreSettings.INSTANCE,
             s -> {},
             null,
-            () -> TieredMergePolicyProvider.DEFAULT_MAX_MERGE_AT_ONCE
+            () -> TieredMergePolicyProvider.DEFAULT_MAX_MERGE_AT_ONCE,
+            null,
+            null
         );
     }
 
diff --git a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java
index a936d4ce79ec2..cdaf3293cfb64 100644
--- a/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java
+++ b/server/src/test/java/org/opensearch/snapshots/SnapshotResiliencyTests.java
@@ -2367,6 +2367,7 @@ public void onFailure(final Exception e) {
                     null,
                     new TaskResourceTrackingService(settings, clusterSettings, threadPool),
                     Collections.emptyList(),
+                    Collections.emptyList(),
                     Collections.emptyList()
                 );
                 SearchPhaseController searchPhaseController = new SearchPhaseController(
diff --git a/test/framework/src/main/java/org/opensearch/index/engine/TranslogHandler.java b/test/framework/src/main/java/org/opensearch/index/engine/TranslogHandler.java
index 9e4e59d9a4d15..064bc6281d997 100644
--- a/test/framework/src/main/java/org/opensearch/index/engine/TranslogHandler.java
+++ b/test/framework/src/main/java/org/opensearch/index/engine/TranslogHandler.java
@@ -153,6 +153,7 @@ public Engine.Operation convertToEngineOp(Translog.Operation operation, Engine.O
                     true,
                     SequenceNumbers.UNASSIGNED_SEQ_NO,
                     SequenceNumbers.UNASSIGNED_PRIMARY_TERM
+                    ,null // TODO
                 );
                 return engineIndex;
             case DELETE:
diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java
index a300e2c9cc717..7513db2d13ab7 100644
--- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java
+++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java
@@ -738,7 +738,8 @@ protected IndexShard newShard(
                 new Object(),
                 clusterService.getClusterApplierService(),
                 MergedSegmentPublisher.EMPTY,
-                ReferencedSegmentsPublisher.EMPTY
+                ReferencedSegmentsPublisher.EMPTY,
+                null
             );
             indexShard.addShardFailureCallback(DEFAULT_SHARD_FAILURE_HANDLER);
             if (remoteStoreStatsTrackerFactory != null) {
diff --git a/test/framework/src/main/java/org/opensearch/node/MockNode.java b/test/framework/src/main/java/org/opensearch/node/MockNode.java
index 8297e6b066cde..8dcf2cb66e4ab 100644
--- a/test/framework/src/main/java/org/opensearch/node/MockNode.java
+++ b/test/framework/src/main/java/org/opensearch/node/MockNode.java
@@ -51,6 +51,7 @@
 import org.opensearch.env.Environment;
 import org.opensearch.http.HttpServerTransport;
 import org.opensearch.indices.IndicesService;
+import org.opensearch.plugins.DataSourcePlugin;
 import org.opensearch.plugins.Plugin;
 import org.opensearch.plugins.PluginInfo;
 import org.opensearch.plugins.SearchPlugin;
@@ -175,7 +176,8 @@ protected SearchService newSearchService(
         Executor indexSearcherExecutor,
         TaskResourceTrackingService taskResourceTrackingService,
         Collection<ConcurrentSearchRequestDecider.Factory> concurrentSearchDeciderFactories,
-        List<SearchPlugin.ProfileMetricsProvider> pluginProfilers
+        List<SearchPlugin.ProfileMetricsProvider> pluginProfilers,
+        List<DataSourcePlugin> dataSourcePluginList
     ) {
         if (getPluginsService().filterPlugins(MockSearchService.TestPlugin.class).isEmpty()) {
             return super.newSearchService(
@@ -191,7 +193,8 @@ protected SearchService newSearchService(
                 indexSearcherExecutor,
                 taskResourceTrackingService,
                 concurrentSearchDeciderFactories,
-                pluginProfilers
+                pluginProfilers,
+                null // TODO
             );
         }
         return new MockSearchService(
diff --git a/test/framework/src/main/java/org/opensearch/search/MockSearchService.java b/test/framework/src/main/java/org/opensearch/search/MockSearchService.java
index e3bc166e56d6b..0bf59b30ff011 100644
--- a/test/framework/src/main/java/org/opensearch/search/MockSearchService.java
+++ b/test/framework/src/main/java/org/opensearch/search/MockSearchService.java
@@ -114,7 +114,8 @@ public MockSearchService(
             indexSearcherExecutor,
             taskResourceTrackingService,
             Collections.emptyList(),
-            Collections.emptyList()
+            Collections.emptyList(),
+            null // TODO
         );
     }