diff --git a/.github/actions/build-faiss-native/action.yml b/.github/actions/build-faiss-native/action.yml
new file mode 100644
index 000000000000..d5058759d3af
--- /dev/null
+++ b/.github/actions/build-faiss-native/action.yml
@@ -0,0 +1,113 @@
+name: 'Build FAISS Native Library'
+description: 'Build FAISS native library for specified platform'
+inputs:
+ platform:
+ description: 'Target platform (linux-amd64, linux-aarch64, darwin-aarch64)'
+ required: true
+ faiss-version:
+ description: 'FAISS version to build (e.g., 1.7.4)'
+ required: false
+ default: '1.7.4'
+ use-homebrew:
+ description: 'Whether to use Homebrew for dependencies (macOS)'
+ required: false
+ default: 'false'
+
+runs:
+ using: 'composite'
+ steps:
+ - name: Install native dependencies (Linux)
+ if: inputs.use-homebrew == 'false'
+ shell: bash
+ run: |
+ sudo apt-get update
+ sudo apt-get install -y \
+ build-essential \
+ libopenblas-dev \
+ liblapack-dev \
+ patchelf \
+ libgomp1 \
+ wget
+
+ - name: Install GCC 9 (Linux)
+ if: inputs.use-homebrew == 'false'
+ shell: bash
+ run: |
+ sudo apt-get install -y gcc-9 g++-9 || sudo apt-get install -y gcc g++
+ if command -v gcc-9 &>/dev/null; then
+ sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 90
+ sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 90
+ fi
+ gcc --version
+
+ - name: Install CMake 3.30.1 (Linux)
+ if: inputs.use-homebrew == 'false'
+ shell: bash
+ run: |
+ CMAKE_VERSION="3.30.1"
+ if [[ "${{ inputs.platform }}" == *"amd64"* ]]; then
+ CMAKE_ARCH="x86_64"
+ else
+ CMAKE_ARCH="aarch64"
+ fi
+ wget -q https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz
+ tar -xzf cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH}.tar.gz
+ sudo mv cmake-${CMAKE_VERSION}-linux-${CMAKE_ARCH} /opt/cmake
+ sudo ln -sf /opt/cmake/bin/cmake /usr/local/bin/cmake
+ cmake --version
+
+ - name: Install FAISS (Linux)
+ if: inputs.use-homebrew == 'false'
+ shell: bash
+ run: |
+ git clone --depth 1 --branch v${{ inputs.faiss-version }} https://github.com/facebookresearch/faiss.git /tmp/faiss
+ cd /tmp/faiss
+ cmake -B build \
+ -DFAISS_ENABLE_GPU=OFF \
+ -DFAISS_ENABLE_PYTHON=OFF \
+ -DBUILD_TESTING=OFF \
+ -DCMAKE_BUILD_TYPE=Release
+ cmake --build build -j $(nproc)
+ sudo cmake --install build
+
+ - name: Install dependencies (macOS)
+ if: inputs.use-homebrew == 'true'
+ shell: bash
+ run: |
+ brew install cmake libomp openblas faiss
+
+ - name: Build native library
+ shell: bash
+ run: |
+ cd paimon-faiss-jni
+ ./scripts/build-native.sh --clean --fat-lib --faiss-version ${{ inputs.faiss-version }}
+
+ - name: List built libraries (Linux)
+ if: inputs.use-homebrew == 'false' && inputs.platform == 'linux-amd64'
+ shell: bash
+ run: |
+ echo "=== Built libraries ==="
+ ls -la paimon-faiss-jni/src/main/resources/linux/amd64/
+ echo ""
+ echo "=== Library dependencies ==="
+ ldd paimon-faiss-jni/src/main/resources/linux/amd64/libpaimon_faiss_jni.so || true
+
+ - name: List built libraries (Linux AARCH64)
+ if: inputs.use-homebrew == 'false' && inputs.platform == 'linux-aarch64'
+ shell: bash
+ run: |
+ echo "=== Built libraries ==="
+ ls -la paimon-faiss-jni/src/main/resources/linux/aarch64/
+ echo ""
+ echo "=== Library dependencies ==="
+ ldd paimon-faiss-jni/src/main/resources/linux/aarch64/libpaimon_faiss_jni.so || true
+
+ - name: List built libraries (macOS)
+ if: inputs.use-homebrew == 'true'
+ shell: bash
+ run: |
+ echo "=== Built libraries ==="
+ ls -la paimon-faiss-jni/src/main/resources/darwin/aarch64/
+ echo ""
+ echo "=== Library dependencies ==="
+ otool -L paimon-faiss-jni/src/main/resources/darwin/aarch64/libpaimon_faiss_jni.dylib || true
diff --git a/.github/workflows/faiss-vector-index-tests.yml b/.github/workflows/faiss-vector-index-tests.yml
new file mode 100644
index 000000000000..11baeff32113
--- /dev/null
+++ b/.github/workflows/faiss-vector-index-tests.yml
@@ -0,0 +1,91 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+name: Faiss Vector Index Tests
+
+on:
+ push:
+ paths:
+ - 'paimon-faiss/**'
+ - 'paimon-faiss-jni/**'
+ - '.github/workflows/faiss-vector-index-tests.yml'
+ pull_request:
+ paths:
+ - 'paimon-faiss/**'
+ - 'paimon-faiss-jni/**'
+ - '.github/workflows/faiss-vector-index-tests.yml'
+
+env:
+ JDK_VERSION: 8
+ MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=30 -Dmaven.wagon.http.retryHandler.requestSentEnabled=true
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.number || github.run_id }}
+ cancel-in-progress: true
+
+jobs:
+ build_test:
+ runs-on: ubuntu-latest
+ timeout-minutes: 90
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up JDK ${{ env.JDK_VERSION }}
+ uses: actions/setup-java@v4
+ with:
+ java-version: ${{ env.JDK_VERSION }}
+ distribution: 'temurin'
+
+ - name: Build FAISS native library
+ uses: ./.github/actions/build-faiss-native
+ with:
+ platform: linux-amd64
+
+ - name: Build paimon-faiss-jni
+ shell: bash
+ run: |
+ mvn -B clean install -pl paimon-faiss-jni -am -DskipTests -Ppaimon-faiss-vector
+
+ - name: Build paimon-faiss
+ shell: bash
+ run: |
+ mvn -B clean install -pl paimon-faiss -am -DskipTests -Ppaimon-faiss-vector
+
+ - name: Test paimon-faiss-jni
+ timeout-minutes: 10
+ run: |
+ mvn -T 1C -B test -pl paimon-faiss-jni -DskipFaissTests=false -Ppaimon-faiss-vector
+ env:
+ MAVEN_OPTS: -Xmx2048m
+
+ - name: Test paimon-faiss
+ timeout-minutes: 30
+ run: |
+ mvn -T 1C -B test -pl paimon-faiss -Ppaimon-faiss-vector
+ env:
+ MAVEN_OPTS: -Xmx4096m
+
+ - name: Build Vector E2E Test Module
+ run: mvn -T 2C -B clean install -DskipTests -Pspark3,flink1,paimon-faiss-vector -pl paimon-vector-e2e-test -am
+
+ - name: Run Vector E2E Tests
+ timeout-minutes: 30
+ run: mvn -T 2C -B verify -Pspark3,flink1,paimon-faiss-vector -pl paimon-vector-e2e-test
+ env:
+ MAVEN_OPTS: -Xmx4096m
diff --git a/.github/workflows/publish-faiss-snapshot.yml b/.github/workflows/publish-faiss-snapshot.yml
new file mode 100644
index 000000000000..7ed90c311a6d
--- /dev/null
+++ b/.github/workflows/publish-faiss-snapshot.yml
@@ -0,0 +1,187 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+name: Publish Faiss Snapshot
+
+on:
+ schedule:
+ # At the end of every day
+ - cron: '0 1 * * *'
+ workflow_dispatch:
+ push:
+ paths:
+ - 'paimon-faiss/**'
+ - 'paimon-faiss-jni/**'
+ branches:
+ - master
+
+env:
+ JDK_VERSION: 8
+ MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=30 -Dmaven.wagon.http.retryHandler.requestSentEnabled=true
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.number || github.run_id }}
+ cancel-in-progress: true
+
+jobs:
+ # Build native library for Linux AMD64
+ build-native-linux-amd64:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up JDK ${{ env.JDK_VERSION }}
+ uses: actions/setup-java@v4
+ with:
+ java-version: ${{ env.JDK_VERSION }}
+ distribution: 'temurin'
+
+ - name: Build FAISS native library
+ uses: ./.github/actions/build-faiss-native
+ with:
+ platform: linux-amd64
+
+ - name: Upload native library
+ uses: actions/upload-artifact@v4
+ with:
+ name: native-linux-amd64
+ path: paimon-faiss-jni/src/main/resources/linux/amd64/
+ retention-days: 1
+
+ # Build native library for Linux AARCH64
+ build-native-linux-aarch64:
+ runs-on: ubuntu-24.04-arm
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up JDK ${{ env.JDK_VERSION }}
+ uses: actions/setup-java@v4
+ with:
+ java-version: ${{ env.JDK_VERSION }}
+ distribution: 'temurin'
+
+ - name: Build FAISS native library
+ uses: ./.github/actions/build-faiss-native
+ with:
+ platform: linux-aarch64
+
+ - name: Upload native library
+ uses: actions/upload-artifact@v4
+ with:
+ name: native-linux-aarch64
+ path: paimon-faiss-jni/src/main/resources/linux/aarch64/
+ retention-days: 1
+
+ # Build native library for macOS ARM (Apple Silicon)
+ build-native-macos-arm:
+ runs-on: macos-14
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up JDK ${{ env.JDK_VERSION }}
+ uses: actions/setup-java@v4
+ with:
+ java-version: ${{ env.JDK_VERSION }}
+ distribution: 'temurin'
+
+ - name: Build FAISS native library
+ uses: ./.github/actions/build-faiss-native
+ with:
+ platform: darwin-aarch64
+ use-homebrew: 'true'
+
+ - name: Upload native library
+ uses: actions/upload-artifact@v4
+ with:
+ name: native-darwin-aarch64
+ path: paimon-faiss-jni/src/main/resources/darwin/aarch64/
+ retention-days: 1
+
+ # Package and publish
+ package-and-publish:
+ if: github.repository == 'apache/paimon'
+ needs: [build-native-linux-amd64, build-native-linux-aarch64, build-native-macos-arm]
+ runs-on: ubuntu-latest
+ steps:
+ - name: Checkout code
+ uses: actions/checkout@v4
+
+ - name: Set up JDK ${{ env.JDK_VERSION }}
+ uses: actions/setup-java@v4
+ with:
+ java-version: ${{ env.JDK_VERSION }}
+ distribution: 'temurin'
+
+ - name: Download Linux AMD64 native library
+ uses: actions/download-artifact@v4
+ with:
+ name: native-linux-amd64
+ path: paimon-faiss-jni/src/main/resources/linux/amd64/
+
+ - name: Download Linux AARCH64 native library
+ uses: actions/download-artifact@v4
+ with:
+ name: native-linux-aarch64
+ path: paimon-faiss-jni/src/main/resources/linux/aarch64/
+
+ - name: Download macOS ARM native library
+ uses: actions/download-artifact@v4
+ with:
+ name: native-darwin-aarch64
+ path: paimon-faiss-jni/src/main/resources/darwin/aarch64/
+
+ - name: List all native libraries
+ run: |
+ echo "=== All native libraries ==="
+ find paimon-faiss-jni/src/main/resources -type f \( -name "*.so" -o -name "*.so.*" -o -name "*.dylib" \) -exec ls -la {} \;
+
+ - name: Cache local Maven repository
+ uses: actions/cache@v4
+ with:
+ path: ~/.m2/repository
+ key: faiss-snapshot-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ faiss-snapshot-maven-
+
+ - name: Build and package paimon-faiss-jni
+ run: |
+ mvn -B clean install -pl paimon-faiss-jni -am -DskipTests -Ppaimon-faiss-vector -Drat.skip
+
+ - name: Build and package paimon-faiss
+ run: |
+ mvn -B clean install -pl paimon-faiss -am -DskipTests -Ppaimon-faiss-vector -Drat.skip
+
+ - name: Publish snapshot
+ env:
+ ASF_USERNAME: ${{ secrets.NEXUS_USER }}
+ ASF_PASSWORD: ${{ secrets.NEXUS_PW }}
+ MAVEN_OPTS: -Xmx4096m
+ run: |
+ tmp_settings="tmp-settings.xml"
+ echo "" > $tmp_settings
+ echo "apache.snapshots.https$ASF_USERNAME" >> $tmp_settings
+ echo "$ASF_PASSWORD" >> $tmp_settings
+ echo "" >> $tmp_settings
+
+ mvn --settings $tmp_settings deploy -pl paimon-faiss-jni,paimon-faiss -Dgpg.skip -Drat.skip -DskipTests -Ppaimon-faiss-vector
+
+ rm $tmp_settings
+
diff --git a/.github/workflows/utitcase.yml b/.github/workflows/utitcase.yml
index b24ab35386d3..50b6d07660a4 100644
--- a/.github/workflows/utitcase.yml
+++ b/.github/workflows/utitcase.yml
@@ -27,6 +27,9 @@ on:
- 'paimon-python/**'
- '.github/workflows/paimon-python-checks.yml'
- 'paimon-lucene/**'
+ - 'paimon-faiss/**'
+ - 'paimon-faiss-jni/**'
+ - '.github/workflows/utitcase-faiss.yml'
env:
JDK_VERSION: 8
@@ -62,7 +65,8 @@ jobs:
jvm_timezone=$(random_timezone)
echo "JVM timezone is set to $jvm_timezone"
- TEST_MODULES="!paimon-e2e-tests,"
+ # Exclude modules that have their own dedicated test workflows
+ TEST_MODULES="!paimon-e2e-tests,!paimon-vector-e2e-test,!paimon-faiss-jni,!paimon-faiss,"
for suffix in ut 3.5 3.4 3.3 3.2; do
TEST_MODULES+="!org.apache.paimon:paimon-spark-${suffix}_2.12,"
done
diff --git a/.gitignore b/.gitignore
index d961d026a773..942e109a772d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,6 +26,12 @@ paimon-python/*.egg-info/
paimon-python/dev/log
paimon-lucene/.idea/
+### Native build artifacts ###
+paimon-faiss-jni/build/
+paimon-faiss-jni/src/main/resources/darwin*
+paimon-faiss-jni/src/main/resources/linux*
+paimon-faiss-jni/src/main/native/cmake-build-debug/
+
### VS Code ###
.vscode/
diff --git a/paimon-core/src/main/java/org/apache/paimon/index/IndexFileMetaSerializer.java b/paimon-core/src/main/java/org/apache/paimon/index/IndexFileMetaSerializer.java
index 9e3ead690919..6d98e61248bb 100644
--- a/paimon-core/src/main/java/org/apache/paimon/index/IndexFileMetaSerializer.java
+++ b/paimon-core/src/main/java/org/apache/paimon/index/IndexFileMetaSerializer.java
@@ -47,7 +47,7 @@ public InternalRow toRow(IndexFileMeta record) {
globalIndexMeta.rowRangeStart(),
globalIndexMeta.rowRangeEnd(),
globalIndexMeta.indexFieldId(),
- globalIndexMeta.indexMeta() == null
+ globalIndexMeta.extraFieldIds() == null
? null
: new GenericArray(globalIndexMeta.extraFieldIds()),
globalIndexMeta.indexMeta());
diff --git a/paimon-core/src/main/java/org/apache/paimon/manifest/IndexManifestEntrySerializer.java b/paimon-core/src/main/java/org/apache/paimon/manifest/IndexManifestEntrySerializer.java
index cd7a1efd84ef..98ab4df6f136 100644
--- a/paimon-core/src/main/java/org/apache/paimon/manifest/IndexManifestEntrySerializer.java
+++ b/paimon-core/src/main/java/org/apache/paimon/manifest/IndexManifestEntrySerializer.java
@@ -54,7 +54,7 @@ public InternalRow convertTo(IndexManifestEntry record) {
globalIndexMeta.rowRangeStart(),
globalIndexMeta.rowRangeEnd(),
globalIndexMeta.indexFieldId(),
- globalIndexMeta.indexMeta() == null
+ globalIndexMeta.extraFieldIds() == null
? null
: new GenericArray(globalIndexMeta.extraFieldIds()),
globalIndexMeta.indexMeta());
diff --git a/paimon-faiss-jni/NOTICE b/paimon-faiss-jni/NOTICE
new file mode 100644
index 000000000000..349f16941e1c
--- /dev/null
+++ b/paimon-faiss-jni/NOTICE
@@ -0,0 +1,9 @@
+Apache Paimon Faiss JNI
+Copyright 2024-2026 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (https://www.apache.org/).
+
+This product includes Faiss (https://github.com/facebookresearch/faiss)
+developed by Facebook AI Research under the MIT License.
+
diff --git a/paimon-faiss-jni/pom.xml b/paimon-faiss-jni/pom.xml
new file mode 100644
index 000000000000..65f27b6f7a3a
--- /dev/null
+++ b/paimon-faiss-jni/pom.xml
@@ -0,0 +1,135 @@
+
+
+
+ 4.0.0
+
+
+ paimon-parent
+ org.apache.paimon
+ 1.4-SNAPSHOT
+
+
+ paimon-faiss-jni
+ Paimon : Faiss JNI
+
+
+ 1.7.4
+ true
+
+
+
+
+ org.slf4j
+ slf4j-api
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter
+ test
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-antrun-plugin
+
+
+ generate-jni-headers
+ compile
+
+ run
+
+
+
+
+
+
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-resources-plugin
+
+
+ so
+ dylib
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+
+ ${skipFaissTests}
+
+
+
+
+
+
+
+
+ release
+
+
+
+
+ org.apache.maven.plugins
+ maven-resources-plugin
+
+
+ copy-native-libs
+ prepare-package
+
+ copy-resources
+
+
+ ${project.build.outputDirectory}
+
+
+ ${project.basedir}/src/main/resources
+
+ **/*.so
+ **/*.dylib
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/paimon-faiss-jni/scripts/build-native.sh b/paimon-faiss-jni/scripts/build-native.sh
new file mode 100755
index 000000000000..672cea9c09a8
--- /dev/null
+++ b/paimon-faiss-jni/scripts/build-native.sh
@@ -0,0 +1,343 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+set -e
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
+NATIVE_DIR="$PROJECT_DIR/src/main/native"
+BUILD_DIR="$PROJECT_DIR/build/native"
+
+# Parse arguments
+OPT_LEVEL="generic"
+CLEAN=false
+FAT_LIB=true # Default to fat lib
+FAISS_VERSION="1.7.4" # Default FAISS version
+
+while [[ $# -gt 0 ]]; do
+ case $1 in
+ --opt-level)
+ OPT_LEVEL="$2"
+ shift 2
+ ;;
+ --faiss-version)
+ FAISS_VERSION="$2"
+ shift 2
+ ;;
+ --clean)
+ CLEAN=true
+ shift
+ ;;
+ --fat-lib)
+ FAT_LIB=true
+ shift
+ ;;
+ --no-fat-lib)
+ FAT_LIB=false
+ shift
+ ;;
+ --help)
+ echo "Usage: $0 [options]"
+ echo ""
+ echo "Options:"
+ echo " --opt-level LEVEL Optimization level: generic, avx2, avx512 (default: generic)"
+ echo " --faiss-version VER FAISS version to use (default: 1.7.4)"
+ echo " --fat-lib Build fat library with all dependencies (default: enabled)"
+ echo " --no-fat-lib Build without bundling dependencies"
+ echo " --clean Clean build directory before building"
+ echo " --help Show this help message"
+ echo ""
+ echo "Environment variables:"
+ echo " FAISS_ROOT Path to Faiss installation"
+ echo " JAVA_HOME Path to Java installation"
+ echo " OPENBLAS_ROOT Path to OpenBLAS installation"
+ echo ""
+ echo "Example:"
+ echo " FAISS_ROOT=/opt/faiss $0 --clean --fat-lib --faiss-version 1.8.0"
+ exit 0
+ ;;
+ *)
+ echo "Unknown option: $1"
+ exit 1
+ ;;
+ esac
+done
+
+echo "================================================"
+echo "Building Paimon Faiss JNI - Native Library"
+echo "================================================"
+echo "FAISS version: $FAISS_VERSION"
+echo "Optimization level: $OPT_LEVEL"
+echo "Fat library: $FAT_LIB"
+echo ""
+
+# Clean if requested
+if [ "$CLEAN" = true ]; then
+ echo "Cleaning build directory..."
+ rm -rf "$BUILD_DIR"
+fi
+
+# Create build directory
+mkdir -p "$BUILD_DIR"
+cd "$BUILD_DIR"
+
+# Check for CMake cache from different source directory (cross-machine builds)
+if [ -f "CMakeCache.txt" ]; then
+ CACHED_SOURCE=$(grep "CMAKE_HOME_DIRECTORY:INTERNAL=" CMakeCache.txt 2>/dev/null | cut -d'=' -f2)
+ if [ -n "$CACHED_SOURCE" ] && [ "$CACHED_SOURCE" != "$NATIVE_DIR" ]; then
+ echo "Detected CMake cache from different source directory."
+ echo " Cached: $CACHED_SOURCE"
+ echo " Current: $NATIVE_DIR"
+ echo "Cleaning build directory to avoid conflicts..."
+ rm -rf "$BUILD_DIR"/*
+ fi
+fi
+
+# Detect platform
+OS=$(uname -s)
+ARCH=$(uname -m)
+
+echo "Detected platform: $OS $ARCH"
+
+# macOS specific: check for libomp
+if [ "$OS" = "Darwin" ]; then
+ if ! brew list libomp &>/dev/null; then
+ echo ""
+ echo "WARNING: libomp not found. Installing via Homebrew..."
+ echo "Run: brew install libomp"
+ echo ""
+ echo "If you don't have Homebrew, install it from https://brew.sh"
+ echo "Or install libomp manually and set OPENMP_ROOT environment variable."
+ echo ""
+
+ # Try to install automatically
+ if command -v brew &>/dev/null; then
+ brew install libomp
+ else
+ echo "ERROR: Homebrew not found. Please install libomp manually."
+ exit 1
+ fi
+ else
+ echo "Found libomp via Homebrew"
+ fi
+fi
+
+# Run CMake
+echo ""
+echo "Configuring with CMake..."
+
+CMAKE_ARGS=(
+ -DCMAKE_BUILD_TYPE=Release
+ -DFAISS_VERSION="$FAISS_VERSION"
+ -DFAISS_OPT_LEVEL="$OPT_LEVEL"
+ -DBUILD_FAT_LIB="$FAT_LIB"
+)
+
+# Add platform-specific options
+if [ "$OS" = "Darwin" ]; then
+ # On macOS, we might need to specify the SDK
+ if [ -n "$SDKROOT" ]; then
+ CMAKE_ARGS+=(-DCMAKE_OSX_SYSROOT="$SDKROOT")
+ fi
+
+ # For Apple Silicon, we might want universal binary
+ if [ "$ARCH" = "arm64" ]; then
+ CMAKE_ARGS+=(-DCMAKE_OSX_ARCHITECTURES="arm64")
+ fi
+fi
+
+# If FAISS_ROOT is set, pass it to CMake
+if [ -n "$FAISS_ROOT" ]; then
+ CMAKE_ARGS+=(-DFAISS_ROOT="$FAISS_ROOT")
+ echo "Using FAISS_ROOT: $FAISS_ROOT"
+fi
+
+# If OPENBLAS_ROOT is set, pass it to CMake
+if [ -n "$OPENBLAS_ROOT" ]; then
+ CMAKE_ARGS+=(-DOPENBLAS_ROOT="$OPENBLAS_ROOT")
+ echo "Using OPENBLAS_ROOT: $OPENBLAS_ROOT"
+fi
+
+# If JAVA_HOME is set, use it
+if [ -n "$JAVA_HOME" ]; then
+ CMAKE_ARGS+=(-DJAVA_HOME="$JAVA_HOME")
+ echo "Using JAVA_HOME: $JAVA_HOME"
+fi
+
+cmake "${CMAKE_ARGS[@]}" "$NATIVE_DIR"
+
+# Build
+echo ""
+echo "Building..."
+cmake --build . --config Release -j "$(nproc 2>/dev/null || sysctl -n hw.ncpu)"
+
+echo ""
+echo "============================================"
+echo "Build completed successfully!"
+echo "============================================"
+
+# Determine output directory based on platform
+if [ "$OS" = "Linux" ]; then
+ PLATFORM_OS="linux"
+ if [ "$ARCH" = "x86_64" ] || [ "$ARCH" = "amd64" ]; then
+ PLATFORM_ARCH="amd64"
+ else
+ PLATFORM_ARCH="aarch64"
+ fi
+elif [ "$OS" = "Darwin" ]; then
+ PLATFORM_OS="darwin"
+ if [ "$ARCH" = "arm64" ]; then
+ PLATFORM_ARCH="aarch64"
+ else
+ PLATFORM_ARCH="amd64"
+ fi
+fi
+OUTPUT_DIR="$PROJECT_DIR/src/main/resources/$PLATFORM_OS/$PLATFORM_ARCH"
+
+# Bundle dependency libraries if building fat lib and they're dynamically linked
+if [ "$FAT_LIB" = true ] && [ "$OS" = "Linux" ]; then
+ echo ""
+ echo "Checking for dynamic dependencies to bundle..."
+
+ JNI_LIB="$OUTPUT_DIR/libpaimon_faiss_jni.so"
+ if [ -f "$JNI_LIB" ]; then
+ # Function to bundle a library and its dependencies
+ bundle_lib() {
+ local lib_path="$1"
+ local target_name="$2"
+
+ if [ -z "$lib_path" ] || [ ! -f "$lib_path" ]; then
+ return 1
+ fi
+
+ local real_path=$(readlink -f "$lib_path")
+ if [ -f "$OUTPUT_DIR/$target_name" ]; then
+ echo " Already bundled: $target_name"
+ return 0
+ fi
+
+ cp "$real_path" "$OUTPUT_DIR/$target_name"
+ chmod +x "$OUTPUT_DIR/$target_name"
+ echo " Bundled: $real_path -> $target_name"
+ return 0
+ }
+
+ # Libraries to bundle (pattern -> target name)
+ # We check the JNI lib and all bundled libs recursively
+ LIBS_TO_CHECK="$JNI_LIB"
+ LIBS_CHECKED=""
+
+ while [ -n "$LIBS_TO_CHECK" ]; do
+ CURRENT_LIB=$(echo "$LIBS_TO_CHECK" | awk '{print $1}')
+ LIBS_TO_CHECK=$(echo "$LIBS_TO_CHECK" | cut -d' ' -f2-)
+ [ "$LIBS_TO_CHECK" = "$CURRENT_LIB" ] && LIBS_TO_CHECK=""
+
+ # Skip if already checked
+ echo "$LIBS_CHECKED" | grep -q "$CURRENT_LIB" && continue
+ LIBS_CHECKED="$LIBS_CHECKED $CURRENT_LIB"
+
+ echo "Checking dependencies of: $(basename "$CURRENT_LIB")"
+
+ # Get all dependencies
+ DEPS=$(ldd "$CURRENT_LIB" 2>/dev/null | grep "=>" | awk '{print $1 " " $3}')
+
+ while IFS= read -r dep_line; do
+ [ -z "$dep_line" ] && continue
+ DEP_NAME=$(echo "$dep_line" | awk '{print $1}')
+ DEP_PATH=$(echo "$dep_line" | awk '{print $2}')
+
+ # Skip system libraries that are universally available
+ case "$DEP_NAME" in
+ linux-vdso.so*|libc.so*|libm.so*|libpthread.so*|libdl.so*|librt.so*|ld-linux*)
+ continue
+ ;;
+ esac
+
+ # Bundle specific libraries we know are problematic
+ case "$DEP_NAME" in
+ libopenblas*)
+ if bundle_lib "$DEP_PATH" "libopenblas.so.0"; then
+ LIBS_TO_CHECK="$LIBS_TO_CHECK $OUTPUT_DIR/libopenblas.so.0"
+ fi
+ ;;
+ libgfortran*)
+ # Keep the original versioned name
+ bundle_lib "$DEP_PATH" "$DEP_NAME"
+ ;;
+ libgomp*)
+ bundle_lib "$DEP_PATH" "libgomp.so.1"
+ ;;
+ libquadmath*)
+ bundle_lib "$DEP_PATH" "$DEP_NAME"
+ ;;
+ libgcc_s*)
+ bundle_lib "$DEP_PATH" "$DEP_NAME"
+ ;;
+ libblas*|liblapack*)
+ bundle_lib "$DEP_PATH" "$DEP_NAME"
+ ;;
+ esac
+ done <<< "$DEPS"
+ done
+
+ # Set rpath to $ORIGIN for all bundled libraries
+ if command -v patchelf &>/dev/null; then
+ echo ""
+ echo "Setting rpath to \$ORIGIN for all libraries..."
+ for lib in "$OUTPUT_DIR"/*.so*; do
+ if [ -f "$lib" ]; then
+ patchelf --set-rpath '$ORIGIN' "$lib" 2>/dev/null || true
+ fi
+ done
+ echo "Done setting rpath"
+ else
+ echo ""
+ echo "WARNING: patchelf not found, cannot set rpath"
+ echo "Install with: sudo apt-get install patchelf"
+ fi
+ fi
+fi
+
+echo ""
+echo "Native library location:"
+BUILT_LIBS=$(find "$PROJECT_DIR/src/main/resources" -type f \( -name "*.so" -o -name "*.so.*" -o -name "*.dylib" \) 2>/dev/null)
+
+if [ -n "$BUILT_LIBS" ]; then
+ for lib in $BUILT_LIBS; do
+ echo ""
+ echo "Library: $lib"
+ ls -la "$lib"
+
+ # Show library dependencies
+ echo ""
+ echo "Dependencies:"
+ if [ "$OS" = "Darwin" ]; then
+ otool -L "$lib" 2>/dev/null | head -20 || true
+ elif [ "$OS" = "Linux" ]; then
+ ldd "$lib" 2>/dev/null | head -20 || readelf -d "$lib" 2>/dev/null | grep NEEDED | head -20 || true
+ fi
+ done
+else
+ echo " (no libraries found)"
+ ls -la "$PROJECT_DIR/src/main/resources/"*/*/ 2>/dev/null || true
+fi
+
+echo ""
+echo "To package the JAR with native libraries, run:"
+echo " mvn package"
+
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Faiss.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Faiss.java
new file mode 100644
index 000000000000..1d08e1b9c7f9
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Faiss.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/** Global Faiss configuration and utilities. */
+public final class Faiss {
+
+ static {
+ try {
+ NativeLibraryLoader.load();
+ } catch (FaissException e) {
+ throw new ExceptionInInitializerError(e);
+ }
+ }
+
+ public static String getVersion() {
+ return FaissNative.getVersion();
+ }
+
+ /**
+ * Set the number of threads for parallel operations.
+ *
+ *
This affects operations like index training, adding vectors, and searching. Set to 1 to
+ * disable parallelism.
+ *
+ * @param numThreads the number of threads (must be positive)
+ */
+ public static void setNumThreads(int numThreads) {
+ if (numThreads <= 0) {
+ throw new IllegalArgumentException("Number of threads must be positive: " + numThreads);
+ }
+ FaissNative.setNumThreads(numThreads);
+ }
+
+ /**
+ * Get the number of threads for parallel operations.
+ *
+ * @return the current number of threads
+ */
+ public static int getNumThreads() {
+ return FaissNative.getNumThreads();
+ }
+
+ /**
+ * Ensure the native library is loaded.
+ *
+ *
This method is called automatically when any Faiss class is used. It can be called
+ * explicitly to load the library early and catch any loading errors.
+ *
+ * @throws FaissException if the native library cannot be loaded
+ */
+ public static void loadLibrary() throws FaissException {
+ NativeLibraryLoader.load();
+ }
+
+ /**
+ * Check if the native library has been loaded.
+ *
+ * @return true if the library is loaded
+ */
+ public static boolean isLibraryLoaded() {
+ return NativeLibraryLoader.isLoaded();
+ }
+}
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissException.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissException.java
new file mode 100644
index 000000000000..c670b619e899
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissException.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/**
+ * Exception thrown when a Faiss operation fails.
+ *
+ *
This exception wraps errors from the native Faiss library as well as errors that occur during
+ * JNI operations.
+ */
+public class FaissException extends Exception {
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Creates a new FaissException with the specified message.
+ *
+ * @param message the error message
+ */
+ public FaissException(String message) {
+ super(message);
+ }
+
+ /**
+ * Creates a new FaissException with the specified message and cause.
+ *
+ * @param message the error message
+ * @param cause the underlying cause
+ */
+ public FaissException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ /**
+ * Creates a new FaissException with the specified cause.
+ *
+ * @param cause the underlying cause
+ */
+ public FaissException(Throwable cause) {
+ super(cause);
+ }
+}
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissNative.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissNative.java
new file mode 100644
index 000000000000..f98a15b47f30
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/FaissNative.java
@@ -0,0 +1,328 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import java.nio.ByteBuffer;
+
+/**
+ * Native method declarations for Faiss JNI with zero-copy support.
+ *
+ *
This class contains all the native method declarations that are implemented in the JNI C++
+ * layer. These methods directly map to Faiss C++ API calls.
+ *
+ *
Users should not call these methods directly. Instead, use the high-level Java API classes
+ * like {@link Index} and {@link IndexFactory}.
+ *
+ *
All vector operations use {@link ByteBuffer#allocateDirect(int)} buffers to achieve zero-copy
+ * data transfer between Java and native code. This eliminates memory duplication and improves
+ * performance for large-scale vector operations.
+ */
+final class FaissNative {
+
+ static {
+ try {
+ NativeLibraryLoader.load();
+ } catch (FaissException e) {
+ throw new ExceptionInInitializerError(e);
+ }
+ }
+
+ /**
+ * Create an index using an index factory string.
+ *
+ * @param dimension the dimension of the vectors
+ * @param description the index description string (e.g., "Flat", "IVF100,Flat", "HNSW32")
+ * @param metricType the metric type (0 = L2, 1 = Inner Product)
+ * @return the native handle of the created index
+ */
+ static native long indexFactoryCreate(int dimension, String description, int metricType);
+
+ /**
+ * Destroy an index and free its resources.
+ *
+ * @param handle the native handle of the index
+ */
+ static native void indexDestroy(long handle);
+
+ /**
+ * Get the dimension of an index.
+ *
+ * @param handle the native handle of the index
+ * @return the dimension
+ */
+ static native int indexGetDimension(long handle);
+
+ /**
+ * Get the number of vectors in an index.
+ *
+ * @param handle the native handle of the index
+ * @return the number of vectors
+ */
+ static native long indexGetCount(long handle);
+
+ /**
+ * Check if an index is trained.
+ *
+ * @param handle the native handle of the index
+ * @return true if trained
+ */
+ static native boolean indexIsTrained(long handle);
+
+ /**
+ * Get the metric type of an index.
+ *
+ * @param handle the native handle of the index
+ * @return the metric type (0 = L2, 1 = Inner Product)
+ */
+ static native int indexGetMetricType(long handle);
+
+ /**
+ * Reset an index (remove all vectors).
+ *
+ * @param handle the native handle of the index
+ */
+ static native void indexReset(long handle);
+
+ /**
+ * Write an index to a file.
+ *
+ * @param handle the native handle of the index
+ * @param path the file path to write to
+ */
+ static native void indexWriteToFile(long handle, String path);
+
+ /**
+ * Read an index from a file.
+ *
+ * @param path the file path to read from
+ * @return the native handle of the loaded index
+ */
+ static native long indexReadFromFile(String path);
+
+ /**
+ * Add vectors to an index using a direct ByteBuffer (zero-copy).
+ *
+ *
The buffer must be a direct buffer created via {@link ByteBuffer#allocateDirect(int)} and
+ * must contain n * dimension floats in native byte order.
+ *
+ * @param handle the native handle of the index
+ * @param n the number of vectors to add
+ * @param vectorBuffer direct ByteBuffer containing vectors (n * dimension * 4 bytes)
+ */
+ static native void indexAdd(long handle, long n, ByteBuffer vectorBuffer);
+
+ /**
+ * Add vectors with IDs to an index using direct ByteBuffers (zero-copy).
+ *
+ * @param handle the native handle of the index
+ * @param n the number of vectors to add
+ * @param vectorBuffer direct ByteBuffer containing vectors (n * dimension * 4 bytes)
+ * @param idBuffer direct ByteBuffer containing IDs (n * 8 bytes)
+ */
+ static native void indexAddWithIds(
+ long handle, long n, ByteBuffer vectorBuffer, ByteBuffer idBuffer);
+
+ /**
+ * Search for the k nearest neighbors.
+ *
+ * @param handle the native handle of the index
+ * @param n the number of query vectors
+ * @param queryVectors the query vectors (n * dimension floats)
+ * @param k the number of nearest neighbors to find
+ * @param distances output array for distances (n * k floats)
+ * @param labels output array for labels (n * k longs)
+ */
+ static native void indexSearch(
+ long handle, long n, float[] queryVectors, int k, float[] distances, long[] labels);
+
+ /**
+ * Train an index using a direct ByteBuffer (zero-copy).
+ *
+ * @param handle the native handle of the index
+ * @param n the number of training vectors
+ * @param vectorBuffer direct ByteBuffer containing training vectors (n * dimension * 4 bytes)
+ */
+ static native void indexTrain(long handle, long n, ByteBuffer vectorBuffer);
+
+ /**
+ * Search for neighbors within a given radius using direct ByteBuffer (zero-copy).
+ *
+ * @param handle the native handle of the index
+ * @param n the number of query vectors
+ * @param queryBuffer direct ByteBuffer containing query vectors (n * dimension * 4 bytes)
+ * @param radius the search radius
+ * @return a range search result handle
+ */
+ static native long indexRangeSearch(long handle, long n, ByteBuffer queryBuffer, float radius);
+
+ /**
+ * Serialize an index to a direct ByteBuffer (zero-copy).
+ *
+ *
Returns the number of bytes written. The caller must provide a buffer large enough to hold
+ * the serialized index. Use {@link #indexSerializeSize(long)} to get the required size.
+ *
+ * @param handle the native handle of the index
+ * @param buffer direct ByteBuffer to write the serialized index to
+ * @return the number of bytes written
+ */
+ static native long indexSerialize(long handle, ByteBuffer buffer);
+
+ /**
+ * Get the size in bytes needed to serialize an index.
+ *
+ * @param handle the native handle of the index
+ * @return the size in bytes
+ */
+ static native long indexSerializeSize(long handle);
+
+ /**
+ * Deserialize an index from a byte array.
+ *
+ * @param data the serialized index data
+ * @param length the number of bytes to read
+ * @return the native handle of the loaded index
+ */
+ static native long indexDeserialize(byte[] data, long length);
+
+ /**
+ * Destroy a range search result.
+ *
+ * @param handle the native handle of the range search result
+ */
+ static native void rangeSearchResultDestroy(long handle);
+
+ /**
+ * Get the number of results for each query in a range search.
+ *
+ * @param handle the native handle of the range search result
+ * @param limitsBuffer direct ByteBuffer for limits output ((nq + 1) * 8 bytes)
+ */
+ static native void rangeSearchResultGetLimits(long handle, ByteBuffer limitsBuffer);
+
+ /**
+ * Get the total number of results in a range search.
+ *
+ * @param handle the native handle of the range search result
+ * @return the total number of results
+ */
+ static native long rangeSearchResultGetTotalSize(long handle);
+
+ /**
+ * Get all labels from a range search result.
+ *
+ * @param handle the native handle of the range search result
+ * @param labelsBuffer direct ByteBuffer for labels output
+ */
+ static native void rangeSearchResultGetLabels(long handle, ByteBuffer labelsBuffer);
+
+ /**
+ * Get all distances from a range search result.
+ *
+ * @param handle the native handle of the range search result
+ * @param distancesBuffer direct ByteBuffer for distances output
+ */
+ static native void rangeSearchResultGetDistances(long handle, ByteBuffer distancesBuffer);
+
+ /**
+ * Get the number of queries in a range search result.
+ *
+ * @param handle the native handle of the range search result
+ * @return the number of queries
+ */
+ static native int rangeSearchResultGetNumQueries(long handle);
+
+ /**
+ * Get the number of probe lists for an IVF index.
+ *
+ * @param handle the native handle of the index
+ * @return the number of probe lists (nprobe)
+ */
+ static native int ivfGetNprobe(long handle);
+
+ /**
+ * Set the number of probe lists for an IVF index.
+ *
+ * @param handle the native handle of the index
+ * @param nprobe the number of probe lists
+ */
+ static native void ivfSetNprobe(long handle, int nprobe);
+
+ /**
+ * Get the number of lists (clusters) in an IVF index.
+ *
+ * @param handle the native handle of the index
+ * @return the number of lists
+ */
+ static native int ivfGetNlist(long handle);
+
+ /**
+ * Get the efSearch parameter of an HNSW index.
+ *
+ * @param handle the native handle of the index
+ * @return the efSearch value
+ */
+ static native int hnswGetEfSearch(long handle);
+
+ /**
+ * Set the efSearch parameter of an HNSW index.
+ *
+ * @param handle the native handle of the index
+ * @param efSearch the efSearch value
+ */
+ static native void hnswSetEfSearch(long handle, int efSearch);
+
+ /**
+ * Get the efConstruction parameter of an HNSW index.
+ *
+ * @param handle the native handle of the index
+ * @return the efConstruction value
+ */
+ static native int hnswGetEfConstruction(long handle);
+
+ /**
+ * Set the efConstruction parameter of an HNSW index.
+ *
+ *
This must be set before adding any vectors to the index.
+ *
+ * @param handle the native handle of the index
+ * @param efConstruction the efConstruction value
+ */
+ static native void hnswSetEfConstruction(long handle, int efConstruction);
+
+ /**
+ * Get the Faiss library version.
+ *
+ * @return the version string
+ */
+ static native String getVersion();
+
+ /**
+ * Set the number of threads for parallel operations.
+ *
+ * @param numThreads the number of threads
+ */
+ static native void setNumThreads(int numThreads);
+
+ /**
+ * Get the number of threads for parallel operations.
+ *
+ * @return the number of threads
+ */
+ static native int getNumThreads();
+}
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Index.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Index.java
new file mode 100644
index 000000000000..0ccf0a9f0d1c
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/Index.java
@@ -0,0 +1,354 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+/**
+ * A Faiss index for similarity search with zero-copy support.
+ *
+ *
This class wraps a native Faiss index and provides methods for adding vectors, searching for
+ * nearest neighbors, and managing the index. All vector operations use direct ByteBuffers for
+ * zero-copy data transfer between Java and native code.
+ *
+ *
Thread Safety: Index instances are NOT thread-safe. External synchronization is required if an
+ * index is accessed from multiple threads.
+ *
+ * @see IndexFactory
+ */
+public class Index implements AutoCloseable {
+
+ /** Native handle to the Faiss index. */
+ private long nativeHandle;
+
+ /** The dimension of vectors in this index. */
+ private final int dimension;
+
+ /** Whether this index has been closed. */
+ private volatile boolean closed = false;
+
+ /**
+ * Create an Index wrapper around a native handle.
+ *
+ * @param nativeHandle the native handle
+ * @param dimension the vector dimension
+ */
+ Index(long nativeHandle, int dimension) {
+ this.nativeHandle = nativeHandle;
+ this.dimension = dimension;
+ }
+
+ /**
+ * Get the dimension of vectors in this index.
+ *
+ * @return the vector dimension
+ */
+ public int getDimension() {
+ return dimension;
+ }
+
+ /**
+ * Get the number of vectors in this index.
+ *
+ * @return the number of vectors
+ */
+ public long getCount() {
+ checkNotClosed();
+ return FaissNative.indexGetCount(nativeHandle);
+ }
+
+ /**
+ * Check if this index is trained.
+ *
+ *
Some index types (like IVF) require training before vectors can be added. Flat indexes are
+ * always considered trained.
+ *
+ * @return true if the index is trained
+ */
+ public boolean isTrained() {
+ checkNotClosed();
+ return FaissNative.indexIsTrained(nativeHandle);
+ }
+
+ /**
+ * Get the metric type used by this index.
+ *
+ * @return the metric type
+ */
+ public MetricType getMetricType() {
+ checkNotClosed();
+ return MetricType.fromValue(FaissNative.indexGetMetricType(nativeHandle));
+ }
+
+ /**
+ * Train the index on a set of training vectors (zero-copy).
+ *
+ *
This is required for some index types (like IVF) before adding vectors. For flat indexes,
+ * this is a no-op.
+ *
+ * @param n the number of training vectors
+ * @param vectorBuffer direct ByteBuffer containing training vectors (n * dimension floats)
+ */
+ public void train(long n, ByteBuffer vectorBuffer) {
+ checkNotClosed();
+ validateDirectBuffer(vectorBuffer, n * dimension * Float.BYTES, "vector");
+ FaissNative.indexTrain(nativeHandle, n, vectorBuffer);
+ }
+
+ /**
+ * Add vectors to the index (zero-copy).
+ *
+ *
The vectors are assigned sequential IDs starting from the current count.
+ *
+ * @param n the number of vectors to add
+ * @param vectorBuffer direct ByteBuffer containing vectors (n * dimension floats)
+ */
+ public void add(long n, ByteBuffer vectorBuffer) {
+ checkNotClosed();
+ validateDirectBuffer(vectorBuffer, n * dimension * Float.BYTES, "vector");
+ FaissNative.indexAdd(nativeHandle, n, vectorBuffer);
+ }
+
+ /**
+ * Add vectors with explicit IDs to the index (zero-copy).
+ *
+ *
Note: Not all index types support this operation. Flat indexes and IndexIDMap wrapped
+ * indexes support it.
+ *
+ * @param n the number of vectors to add
+ * @param vectorBuffer direct ByteBuffer containing vectors (n * dimension floats)
+ * @param idBuffer direct ByteBuffer containing IDs (n longs)
+ */
+ public void addWithIds(long n, ByteBuffer vectorBuffer, ByteBuffer idBuffer) {
+ checkNotClosed();
+ validateDirectBuffer(vectorBuffer, n * dimension * Float.BYTES, "vector");
+ validateDirectBuffer(idBuffer, n * Long.BYTES, "id");
+ FaissNative.indexAddWithIds(nativeHandle, n, vectorBuffer, idBuffer);
+ }
+
+ /**
+ * Search for the k nearest neighbors of query vectors.
+ *
+ * @param n the number of query vectors
+ * @param queryVectors array containing query vectors (n * dimension floats)
+ * @param k the number of nearest neighbors to find
+ * @param distances output array for distances (n * k floats)
+ * @param labels output array for labels (n * k longs)
+ */
+ public void search(long n, float[] queryVectors, int k, float[] distances, long[] labels) {
+ checkNotClosed();
+ if (queryVectors.length < n * dimension) {
+ throw new IllegalArgumentException(
+ "Query vectors array too small: required "
+ + (n * dimension)
+ + ", got "
+ + queryVectors.length);
+ }
+ if (distances.length < n * k) {
+ throw new IllegalArgumentException(
+ "Distances array too small: required " + (n * k) + ", got " + distances.length);
+ }
+ if (labels.length < n * k) {
+ throw new IllegalArgumentException(
+ "Labels array too small: required " + (n * k) + ", got " + labels.length);
+ }
+ FaissNative.indexSearch(nativeHandle, n, queryVectors, k, distances, labels);
+ }
+
+ /**
+ * Search for all neighbors within a given radius (zero-copy).
+ *
+ * @param n the number of query vectors
+ * @param queryBuffer direct ByteBuffer containing query vectors (n * dimension floats)
+ * @param radius the search radius
+ * @return the range search result
+ */
+ public RangeSearchResult rangeSearch(long n, ByteBuffer queryBuffer, float radius) {
+ checkNotClosed();
+ validateDirectBuffer(queryBuffer, n * dimension * Float.BYTES, "query");
+ long resultHandle = FaissNative.indexRangeSearch(nativeHandle, n, queryBuffer, radius);
+ return new RangeSearchResult(resultHandle, (int) n);
+ }
+
+ /** Reset the index (remove all vectors). */
+ public void reset() {
+ checkNotClosed();
+ FaissNative.indexReset(nativeHandle);
+ }
+
+ /**
+ * Write the index to a file.
+ *
+ * @param path the file path
+ */
+ public void writeToFile(String path) {
+ checkNotClosed();
+ FaissNative.indexWriteToFile(nativeHandle, path);
+ }
+
+ /**
+ * Write the index to a file.
+ *
+ * @param file the file
+ */
+ public void writeToFile(File file) {
+ writeToFile(file.getAbsolutePath());
+ }
+
+ /**
+ * Read an index from a file.
+ *
+ * @param path the file path
+ * @return the loaded index
+ */
+ public static Index readFromFile(String path) {
+ long handle = FaissNative.indexReadFromFile(path);
+ int dimension = FaissNative.indexGetDimension(handle);
+ return new Index(handle, dimension);
+ }
+
+ /**
+ * Read an index from a file.
+ *
+ * @param file the file
+ * @return the loaded index
+ */
+ public static Index readFromFile(File file) {
+ return readFromFile(file.getAbsolutePath());
+ }
+
+ /**
+ * Get the size in bytes needed to serialize this index.
+ *
+ * @return the serialization size
+ */
+ public long serializeSize() {
+ checkNotClosed();
+ return FaissNative.indexSerializeSize(nativeHandle);
+ }
+
+ /**
+ * Serialize the index to a direct ByteBuffer (zero-copy).
+ *
+ * @param buffer direct ByteBuffer to write to (must have sufficient capacity)
+ * @return the number of bytes written
+ */
+ public long serialize(ByteBuffer buffer) {
+ checkNotClosed();
+ if (!buffer.isDirect()) {
+ throw new IllegalArgumentException("Buffer must be a direct buffer");
+ }
+ return FaissNative.indexSerialize(nativeHandle, buffer);
+ }
+
+ /**
+ * Deserialize an index from a byte array.
+ *
+ * @param data the serialized index data
+ * @return the deserialized index
+ */
+ public static Index deserialize(byte[] data) {
+ long handle = FaissNative.indexDeserialize(data, data.length);
+ int dimension = FaissNative.indexGetDimension(handle);
+ return new Index(handle, dimension);
+ }
+
+ /**
+ * Allocate a direct ByteBuffer suitable for vector data.
+ *
+ * @param numVectors number of vectors
+ * @param dimension vector dimension
+ * @return a direct ByteBuffer in native byte order
+ */
+ public static ByteBuffer allocateVectorBuffer(int numVectors, int dimension) {
+ return ByteBuffer.allocateDirect(numVectors * dimension * Float.BYTES)
+ .order(ByteOrder.nativeOrder());
+ }
+
+ /**
+ * Allocate a direct ByteBuffer suitable for ID data.
+ *
+ * @param numIds number of IDs
+ * @return a direct ByteBuffer in native byte order
+ */
+ public static ByteBuffer allocateIdBuffer(int numIds) {
+ return ByteBuffer.allocateDirect(numIds * Long.BYTES).order(ByteOrder.nativeOrder());
+ }
+
+ private void validateDirectBuffer(ByteBuffer buffer, long requiredBytes, String name) {
+ if (!buffer.isDirect()) {
+ throw new IllegalArgumentException(name + " buffer must be a direct buffer");
+ }
+ if (buffer.capacity() < requiredBytes) {
+ throw new IllegalArgumentException(
+ name
+ + " buffer too small: required "
+ + requiredBytes
+ + " bytes, got "
+ + buffer.capacity());
+ }
+ }
+
+ /**
+ * Get the native handle.
+ *
+ *
This is for internal use only.
+ *
+ * @return the native handle
+ */
+ long nativeHandle() {
+ return nativeHandle;
+ }
+
+ private void checkNotClosed() {
+ if (closed) {
+ throw new IllegalStateException("Index has been closed");
+ }
+ }
+
+ @Override
+ public void close() {
+ if (!closed) {
+ closed = true;
+ if (nativeHandle != 0) {
+ FaissNative.indexDestroy(nativeHandle);
+ nativeHandle = 0;
+ }
+ }
+ }
+
+ @Override
+ public String toString() {
+ if (closed) {
+ return "Index[closed]";
+ }
+ return "Index{"
+ + "dimension="
+ + dimension
+ + ", count="
+ + getCount()
+ + ", trained="
+ + isTrained()
+ + ", metricType="
+ + getMetricType()
+ + '}';
+ }
+}
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexFactory.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexFactory.java
new file mode 100644
index 000000000000..09f081235a52
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexFactory.java
@@ -0,0 +1,57 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/** Factory for creating Faiss indexes. */
+public final class IndexFactory {
+
+ /**
+ * Create a Faiss index using the index factory.
+ *
+ * @param dimension the dimension of the vectors
+ * @param description the index description string
+ * @param metricType the metric type for similarity computation
+ * @return the created index
+ */
+ public static Index create(int dimension, String description, MetricType metricType) {
+ if (dimension <= 0) {
+ throw new IllegalArgumentException("Dimension must be positive: " + dimension);
+ }
+ if (description == null || description.isEmpty()) {
+ throw new IllegalArgumentException("Index description cannot be null or empty");
+ }
+ if (metricType == null) {
+ throw new IllegalArgumentException("Metric type cannot be null");
+ }
+
+ long handle = FaissNative.indexFactoryCreate(dimension, description, metricType.getValue());
+ return new Index(handle, dimension);
+ }
+
+ /**
+ * Create a Faiss index with L2 (Euclidean) metric.
+ *
+ * @param dimension the dimension of the vectors
+ * @param description the index description string
+ * @return the created index
+ */
+ public static Index create(int dimension, String description) {
+ return create(dimension, description, MetricType.L2);
+ }
+}
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexHNSW.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexHNSW.java
new file mode 100644
index 000000000000..1e4744f8e748
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexHNSW.java
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/**
+ * Utility class for HNSW (Hierarchical Navigable Small World) index operations.
+ *
+ *
HNSW indexes build a graph structure for fast approximate nearest neighbor search. The key
+ * parameters are:
+ *
+ *
+ *
{@code M} - The number of neighbors in the graph. Higher values increase memory usage and
+ * build time but improve search accuracy.
+ *
{@code efConstruction} - The size of the dynamic candidate list during construction. Higher
+ * values increase build time but can improve the graph quality.
+ *
{@code efSearch} - The size of the dynamic candidate list during search. Higher values
+ * increase search accuracy at the cost of speed.
+ *
+ */
+public final class IndexHNSW {
+
+ /**
+ * Get the efSearch parameter.
+ *
+ *
This controls the size of the dynamic candidate list during search. Higher values give
+ * more accurate results but slower search.
+ *
+ * @param index the HNSW index
+ * @return the current efSearch value
+ * @throws IllegalArgumentException if the index is not an HNSW index
+ */
+ public static int getEfSearch(Index index) {
+ return FaissNative.hnswGetEfSearch(index.nativeHandle());
+ }
+
+ /**
+ * Set the efSearch parameter.
+ *
+ *
This should be at least k (the number of neighbors requested in search). Typical values
+ * range from 16 to 256. Higher values give more accurate results but slower search.
+ *
+ * @param index the HNSW index
+ * @param efSearch the efSearch value
+ * @throws IllegalArgumentException if the index is not an HNSW index
+ */
+ public static void setEfSearch(Index index, int efSearch) {
+ if (efSearch <= 0) {
+ throw new IllegalArgumentException("efSearch must be positive: " + efSearch);
+ }
+ FaissNative.hnswSetEfSearch(index.nativeHandle(), efSearch);
+ }
+
+ /**
+ * Get the efConstruction parameter.
+ *
+ *
This was the size of the dynamic candidate list during index construction. It cannot be
+ * changed after the index is built.
+ *
+ * @param index the HNSW index
+ * @return the efConstruction value
+ * @throws IllegalArgumentException if the index is not an HNSW index
+ */
+ public static int getEfConstruction(Index index) {
+ return FaissNative.hnswGetEfConstruction(index.nativeHandle());
+ }
+
+ /**
+ * Set the efConstruction parameter.
+ *
+ *
This controls the size of the dynamic candidate list during construction. It must be set
+ * before adding any vectors to the index. Higher values give more accurate results but slower
+ * construction. Typical values range from 40 to 400.
+ *
+ * @param index the HNSW index
+ * @param efConstruction the efConstruction value
+ * @throws IllegalArgumentException if the index is not an HNSW index or efConstruction is not
+ * positive
+ */
+ public static void setEfConstruction(Index index, int efConstruction) {
+ if (efConstruction <= 0) {
+ throw new IllegalArgumentException(
+ "efConstruction must be positive: " + efConstruction);
+ }
+ FaissNative.hnswSetEfConstruction(index.nativeHandle(), efConstruction);
+ }
+}
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexIVF.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexIVF.java
new file mode 100644
index 000000000000..7cf98dfbd0f1
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/IndexIVF.java
@@ -0,0 +1,62 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/** Utility class for IVF (Inverted File) index operations. */
+public final class IndexIVF {
+
+ /**
+ * Get the number of clusters to probe during search (nprobe).
+ *
+ * @param index the IVF index
+ * @return the current nprobe value
+ * @throws IllegalArgumentException if the index is not an IVF index
+ */
+ public static int getNprobe(Index index) {
+ return FaissNative.ivfGetNprobe(index.nativeHandle());
+ }
+
+ /**
+ * Set the number of clusters to probe during search (nprobe).
+ *
+ *
Higher values increase accuracy but decrease search speed. A good starting point is 1-10%
+ * of the total number of clusters.
+ *
+ * @param index the IVF index
+ * @param nprobe the number of clusters to probe
+ * @throws IllegalArgumentException if the index is not an IVF index
+ */
+ public static void setNprobe(Index index, int nprobe) {
+ if (nprobe <= 0) {
+ throw new IllegalArgumentException("nprobe must be positive: " + nprobe);
+ }
+ FaissNative.ivfSetNprobe(index.nativeHandle(), nprobe);
+ }
+
+ /**
+ * Get the total number of clusters (nlist) in the index.
+ *
+ * @param index the IVF index
+ * @return the number of clusters
+ * @throws IllegalArgumentException if the index is not an IVF index
+ */
+ public static int getNlist(Index index) {
+ return FaissNative.ivfGetNlist(index.nativeHandle());
+ }
+}
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/MetricType.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/MetricType.java
new file mode 100644
index 000000000000..cd20bef07cbe
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/MetricType.java
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+/**
+ * Metric type for similarity search.
+ *
+ *
Faiss supports two main metric types for measuring similarity between vectors:
+ *
+ *
+ *
{@link #L2} - Euclidean distance (L2 norm). Smaller values indicate more similar vectors.
+ *
{@link #INNER_PRODUCT} - Inner product (dot product). Larger values indicate more similar
+ * vectors.
+ *
+ */
+public enum MetricType {
+ L2(0),
+ INNER_PRODUCT(1);
+
+ private final int value;
+
+ MetricType(int value) {
+ this.value = value;
+ }
+
+ /**
+ * Get the numeric value of this metric type.
+ *
+ * @return the numeric value
+ */
+ public int getValue() {
+ return value;
+ }
+
+ /**
+ * Get a MetricType from its numeric value.
+ *
+ * @param value the numeric value
+ * @return the corresponding MetricType
+ * @throws IllegalArgumentException if the value is not valid
+ */
+ public static MetricType fromValue(int value) {
+ for (MetricType type : values()) {
+ if (type.value == value) {
+ return type;
+ }
+ }
+ throw new IllegalArgumentException("Unknown metric type value: " + value);
+ }
+}
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/NativeLibraryLoader.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/NativeLibraryLoader.java
new file mode 100644
index 000000000000..58c8806378c2
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/NativeLibraryLoader.java
@@ -0,0 +1,306 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Path;
+
+/**
+ * Native library loader for Faiss JNI.
+ *
+ *
This class is responsible for loading the native Faiss library from the JAR file or system
+ * path. It follows a similar pattern to RocksDB's native library loading mechanism.
+ *
+ *
The loader attempts to load the library in the following order:
+ *
+ *
+ *
From the path specified by the {@code paimon.faiss.lib.path} system property
+ *
From the system library path using {@code System.loadLibrary}
+ *
From the JAR file bundled with the distribution
+ *
+ */
+public class NativeLibraryLoader {
+ private static final Logger LOG = LoggerFactory.getLogger(NativeLibraryLoader.class);
+
+ /** The name of the native library. */
+ private static final String JNI_LIBRARY_NAME = "paimon_faiss_jni";
+
+ /** System property to specify a custom path to the native library. */
+ private static final String LIBRARY_PATH_PROPERTY = "paimon.faiss.lib.path";
+
+ /**
+ * Dependency libraries that need to be loaded before the main JNI library. These are bundled in
+ * the JAR when the main library cannot be statically linked.
+ *
+ *
Order matters! Libraries must be loaded before the libraries that depend on them.
+ */
+ private static final String[] DEPENDENCY_LIBRARIES = {
+ // GCC runtime libraries (must be loaded first as others depend on them)
+ "libgcc_s.so.1",
+ // Quadmath library (needed by gfortran)
+ "libquadmath.so.0",
+ // Fortran runtime (needed by OpenBLAS) - try multiple versions
+ "libgfortran.so.5",
+ "libgfortran.so.4",
+ "libgfortran.so.3",
+ // OpenMP runtime
+ "libgomp.so.1",
+ // BLAS/LAPACK
+ "libblas.so.3",
+ "liblapack.so.3",
+ // OpenBLAS for FAISS (load last as it depends on above)
+ "libopenblas.so.0",
+ };
+
+ /** Whether the native library has been loaded. */
+ private static volatile boolean libraryLoaded = false;
+
+ /** Lock for thread-safe library loading. */
+ private static final Object LOAD_LOCK = new Object();
+
+ /** Temporary directory for extracting native libraries. */
+ private static Path tempDir;
+
+ private NativeLibraryLoader() {
+ // Utility class, no instantiation
+ }
+
+ /**
+ * Load the native library.
+ *
+ * @throws FaissException if the library cannot be loaded
+ */
+ public static void load() throws FaissException {
+ if (libraryLoaded) {
+ return;
+ }
+
+ synchronized (LOAD_LOCK) {
+ if (libraryLoaded) {
+ return;
+ }
+
+ try {
+ loadNativeLibrary();
+ libraryLoaded = true;
+ LOG.info("Faiss native library loaded successfully");
+ } catch (Exception e) {
+ throw new FaissException("Failed to load Faiss native library", e);
+ }
+ }
+ }
+
+ /**
+ * Check if the native library has been loaded.
+ *
+ * @return true if the library is loaded
+ */
+ public static boolean isLoaded() {
+ return libraryLoaded;
+ }
+
+ private static void loadNativeLibrary() throws IOException {
+ // First, try loading from custom path
+ String customPath = System.getProperty(LIBRARY_PATH_PROPERTY);
+ if (customPath != null && !customPath.isEmpty()) {
+ File customLibrary = new File(customPath);
+ if (customLibrary.exists()) {
+ System.load(customLibrary.getAbsolutePath());
+ LOG.info("Loaded Faiss native library from custom path: {}", customPath);
+ return;
+ } else {
+ LOG.warn("Custom library path specified but file not found: {}", customPath);
+ }
+ }
+
+ // Second, try loading from system library path
+ try {
+ System.loadLibrary(JNI_LIBRARY_NAME);
+ LOG.info("Loaded Faiss native library from system path");
+ return;
+ } catch (UnsatisfiedLinkError e) {
+ LOG.debug(
+ "Could not load from system path, trying bundled library: {}", e.getMessage());
+ }
+
+ // Third, try loading from JAR
+ loadFromJar();
+ }
+
+ private static void loadFromJar() throws IOException {
+ String libraryPath = getLibraryResourcePath();
+ LOG.debug("Attempting to load native library from JAR: {}", libraryPath);
+
+ try (InputStream is = NativeLibraryLoader.class.getResourceAsStream(libraryPath)) {
+ if (is == null) {
+ throw new IOException(
+ "Native library not found in JAR: "
+ + libraryPath
+ + ". "
+ + "Make sure you are using the correct JAR for your platform ("
+ + getPlatformIdentifier()
+ + ")");
+ }
+
+ // Create temp directory if needed
+ if (tempDir == null) {
+ tempDir = Files.createTempDirectory("paimon-faiss-native");
+ tempDir.toFile().deleteOnExit();
+ }
+
+ // First, extract and load dependency libraries (if bundled)
+ loadDependencyLibraries();
+
+ // Extract native library to temp file
+ String fileName = System.mapLibraryName(JNI_LIBRARY_NAME);
+ File tempFile = new File(tempDir.toFile(), fileName);
+ tempFile.deleteOnExit();
+
+ try (OutputStream os = new FileOutputStream(tempFile)) {
+ byte[] buffer = new byte[8192];
+ int bytesRead;
+ while ((bytesRead = is.read(buffer)) != -1) {
+ os.write(buffer, 0, bytesRead);
+ }
+ }
+
+ // Make the file executable (for Unix-like systems)
+ if (!tempFile.setExecutable(true)) {
+ LOG.warn("Could not set executable permission on native library");
+ }
+
+ // Load the library
+ System.load(tempFile.getAbsolutePath());
+ LOG.info("Loaded Faiss native library from JAR: {}", libraryPath);
+ }
+ }
+
+ /**
+ * Extract and load dependency libraries that are bundled in the JAR. These must be loaded
+ * before the main JNI library to satisfy its dynamic linking requirements.
+ */
+ private static void loadDependencyLibraries() {
+ String os = getOsName();
+ String arch = getArchName();
+
+ for (String depLib : DEPENDENCY_LIBRARIES) {
+ String resourcePath = "/" + os + "/" + arch + "/" + depLib;
+ try (InputStream is = NativeLibraryLoader.class.getResourceAsStream(resourcePath)) {
+ if (is == null) {
+ LOG.debug("Dependency library not bundled: {}", depLib);
+ continue;
+ }
+
+ File tempFile = new File(tempDir.toFile(), depLib);
+ tempFile.deleteOnExit();
+
+ try (OutputStream fos = new FileOutputStream(tempFile)) {
+ byte[] buffer = new byte[8192];
+ int bytesRead;
+ while ((bytesRead = is.read(buffer)) != -1) {
+ fos.write(buffer, 0, bytesRead);
+ }
+ }
+
+ if (!tempFile.setExecutable(true)) {
+ LOG.warn("Could not set executable permission on: {}", depLib);
+ }
+
+ // Load the dependency library
+ System.load(tempFile.getAbsolutePath());
+ LOG.info("Loaded bundled dependency library: {}", depLib);
+ } catch (UnsatisfiedLinkError e) {
+ // Library might already be loaded or not needed
+ LOG.debug("Could not load dependency {}: {}", depLib, e.getMessage());
+ } catch (IOException e) {
+ LOG.debug("Could not extract dependency {}: {}", depLib, e.getMessage());
+ }
+ }
+ }
+
+ private static String getLibraryResourcePath() {
+ String os = getOsName();
+ String arch = getArchName();
+ String libraryFileName = System.mapLibraryName(JNI_LIBRARY_NAME);
+ return "/" + os + "/" + arch + "/" + libraryFileName;
+ }
+
+ /**
+ * Get the platform identifier for the current system.
+ *
+ * @return platform identifier string (e.g., "linux/amd64", "darwin/aarch64")
+ */
+ static String getPlatformIdentifier() {
+ return getOsName() + "/" + getArchName();
+ }
+
+ /**
+ * Get the normalized OS name for the current system.
+ *
+ * @return OS name string (e.g., "linux", "darwin")
+ */
+ private static String getOsName() {
+ String osName = System.getProperty("os.name").toLowerCase();
+
+ if (osName.contains("linux")) {
+ return "linux";
+ } else if (osName.contains("mac") || osName.contains("darwin")) {
+ return "darwin";
+ } else {
+ throw new UnsupportedOperationException(
+ "Unsupported operating system: "
+ + osName
+ + ". Only Linux and macOS are supported.");
+ }
+ }
+
+ /**
+ * Get the normalized architecture name for the current system.
+ *
+ * @return architecture name string (e.g., "amd64", "aarch64")
+ */
+ private static String getArchName() {
+ String osArch = System.getProperty("os.arch").toLowerCase();
+
+ if (osArch.equals("amd64") || osArch.equals("x86_64")) {
+ return "amd64";
+ } else if (osArch.equals("aarch64") || osArch.equals("arm64")) {
+ return "aarch64";
+ } else {
+ throw new UnsupportedOperationException("Unsupported architecture: " + osArch);
+ }
+ }
+
+ /**
+ * Get the name of the JNI library.
+ *
+ * @return the library name
+ */
+ public static String getLibraryName() {
+ return JNI_LIBRARY_NAME;
+ }
+}
diff --git a/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/RangeSearchResult.java b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/RangeSearchResult.java
new file mode 100644
index 000000000000..8cbecf568fa2
--- /dev/null
+++ b/paimon-faiss-jni/src/main/java/org/apache/paimon/faiss/RangeSearchResult.java
@@ -0,0 +1,204 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.FloatBuffer;
+import java.nio.LongBuffer;
+
+/**
+ * Result of a range search operation with zero-copy support.
+ *
+ *
Unlike k-NN search which returns a fixed number of neighbors per query, range search returns
+ * all neighbors within a given radius, which can vary per query.
+ *
+ *
This class uses direct ByteBuffers to avoid memory copies when retrieving results from native
+ * code.
+ */
+public class RangeSearchResult implements AutoCloseable {
+
+ private long nativeHandle;
+ private final int numQueries;
+ private ByteBuffer limitsBuffer;
+ private ByteBuffer labelsBuffer;
+ private ByteBuffer distancesBuffer;
+ private long totalSize = -1;
+
+ /**
+ * Create a new RangeSearchResult from a native handle.
+ *
+ * @param nativeHandle the native handle
+ * @param numQueries the number of query vectors
+ */
+ RangeSearchResult(long nativeHandle, int numQueries) {
+ this.nativeHandle = nativeHandle;
+ this.numQueries = numQueries;
+ }
+
+ /**
+ * Get the number of query vectors.
+ *
+ * @return the number of queries
+ */
+ public int getNumQueries() {
+ return numQueries;
+ }
+
+ /**
+ * Get the number of results for a specific query.
+ *
+ * @param queryIndex the query index
+ * @return the number of results
+ */
+ public long getResultCount(int queryIndex) {
+ ensureLimitsLoaded();
+ if (queryIndex < 0 || queryIndex >= numQueries) {
+ throw new IndexOutOfBoundsException("Query index out of bounds: " + queryIndex);
+ }
+ LongBuffer limits = limitsBuffer.asLongBuffer();
+ return limits.get(queryIndex + 1) - limits.get(queryIndex);
+ }
+
+ /**
+ * Get the total number of results across all queries.
+ *
+ * @return the total number of results
+ */
+ public long getTotalResultCount() {
+ if (totalSize < 0 && nativeHandle != 0) {
+ totalSize = FaissNative.rangeSearchResultGetTotalSize(nativeHandle);
+ }
+ return totalSize;
+ }
+
+ /**
+ * Get the limits buffer containing the start/end indices for each query.
+ *
+ *
The limits buffer has (numQueries + 1) longs. For query i, results are in the range
+ * [limits[i], limits[i+1]).
+ *
+ * @return the limits buffer as a LongBuffer view
+ */
+ public LongBuffer getLimitsBuffer() {
+ ensureLimitsLoaded();
+ limitsBuffer.rewind();
+ return limitsBuffer.asLongBuffer();
+ }
+
+ /**
+ * Get the labels buffer containing all result labels.
+ *
+ * @return the labels buffer as a LongBuffer view
+ */
+ public LongBuffer getLabelsBuffer() {
+ ensureFullyLoaded();
+ labelsBuffer.rewind();
+ return labelsBuffer.asLongBuffer();
+ }
+
+ /**
+ * Get the distances buffer containing all result distances.
+ *
+ * @return the distances buffer as a FloatBuffer view
+ */
+ public FloatBuffer getDistancesBuffer() {
+ ensureFullyLoaded();
+ distancesBuffer.rewind();
+ return distancesBuffer.asFloatBuffer();
+ }
+
+ /**
+ * Get the labels for a specific query.
+ *
+ * @param queryIndex the query index
+ * @return the labels for this query
+ */
+ public long[] getLabelsForQuery(int queryIndex) {
+ ensureFullyLoaded();
+ if (queryIndex < 0 || queryIndex >= numQueries) {
+ throw new IndexOutOfBoundsException("Query index out of bounds: " + queryIndex);
+ }
+ LongBuffer limits = limitsBuffer.asLongBuffer();
+ int start = (int) limits.get(queryIndex);
+ int end = (int) limits.get(queryIndex + 1);
+ int count = end - start;
+
+ long[] result = new long[count];
+ LongBuffer labels = labelsBuffer.asLongBuffer();
+ labels.position(start);
+ labels.get(result);
+ return result;
+ }
+
+ /**
+ * Get the distances for a specific query.
+ *
+ * @param queryIndex the query index
+ * @return the distances for this query
+ */
+ public float[] getDistancesForQuery(int queryIndex) {
+ ensureFullyLoaded();
+ if (queryIndex < 0 || queryIndex >= numQueries) {
+ throw new IndexOutOfBoundsException("Query index out of bounds: " + queryIndex);
+ }
+ LongBuffer limits = limitsBuffer.asLongBuffer();
+ int start = (int) limits.get(queryIndex);
+ int end = (int) limits.get(queryIndex + 1);
+ int count = end - start;
+
+ float[] result = new float[count];
+ FloatBuffer distances = distancesBuffer.asFloatBuffer();
+ distances.position(start);
+ distances.get(result);
+ return result;
+ }
+
+ private void ensureLimitsLoaded() {
+ if (limitsBuffer == null && nativeHandle != 0) {
+ limitsBuffer =
+ ByteBuffer.allocateDirect((numQueries + 1) * Long.BYTES)
+ .order(ByteOrder.nativeOrder());
+ FaissNative.rangeSearchResultGetLimits(nativeHandle, limitsBuffer);
+ }
+ }
+
+ private void ensureFullyLoaded() {
+ ensureLimitsLoaded();
+ if (labelsBuffer == null && nativeHandle != 0) {
+ long total = getTotalResultCount();
+ labelsBuffer =
+ ByteBuffer.allocateDirect((int) total * Long.BYTES)
+ .order(ByteOrder.nativeOrder());
+ distancesBuffer =
+ ByteBuffer.allocateDirect((int) total * Float.BYTES)
+ .order(ByteOrder.nativeOrder());
+ FaissNative.rangeSearchResultGetLabels(nativeHandle, labelsBuffer);
+ FaissNative.rangeSearchResultGetDistances(nativeHandle, distancesBuffer);
+ }
+ }
+
+ @Override
+ public void close() {
+ if (nativeHandle != 0) {
+ FaissNative.rangeSearchResultDestroy(nativeHandle);
+ nativeHandle = 0;
+ }
+ }
+}
diff --git a/paimon-faiss-jni/src/main/native/CMakeLists.txt b/paimon-faiss-jni/src/main/native/CMakeLists.txt
new file mode 100644
index 000000000000..b7f3c8713975
--- /dev/null
+++ b/paimon-faiss-jni/src/main/native/CMakeLists.txt
@@ -0,0 +1,457 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cmake_minimum_required(VERSION 3.30.1)
+project(paimon_faiss_jni VERSION 0.1.0 LANGUAGES CXX)
+
+# FAISS version
+set(FAISS_VERSION "1.7.4" CACHE STRING "FAISS library version")
+add_definitions(-DFAISS_VERSION="${FAISS_VERSION}")
+
+# Check GCC version (must be >= 9.3.0)
+if(CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
+ if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS "9.3.0")
+ message(FATAL_ERROR "GCC version must be >= 9.3.0. Found: ${CMAKE_CXX_COMPILER_VERSION}")
+ endif()
+ message(STATUS "Using GCC ${CMAKE_CXX_COMPILER_VERSION}")
+endif()
+
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+set(CMAKE_POSITION_INDEPENDENT_CODE ON)
+
+# Options
+option(FAISS_ENABLE_GPU "Build with GPU support" OFF)
+option(FAISS_OPT_LEVEL "Optimization level (generic, avx2, avx512)" "generic")
+option(BUILD_FAT_LIB "Build fat library with all dependencies statically linked" ON)
+
+# Find JNI
+find_package(JNI REQUIRED)
+include_directories(${JNI_INCLUDE_DIRS})
+
+# Find OpenMP (with special handling for macOS)
+if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+ # macOS requires special handling for OpenMP
+ # First try to find libomp from Homebrew
+ execute_process(
+ COMMAND brew --prefix libomp
+ OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX
+ OUTPUT_STRIP_TRAILING_WHITESPACE
+ ERROR_QUIET
+ )
+
+ if(HOMEBREW_LIBOMP_PREFIX)
+ message(STATUS "Found Homebrew libomp: ${HOMEBREW_LIBOMP_PREFIX}")
+ set(OpenMP_C_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
+ set(OpenMP_CXX_FLAGS "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include")
+ set(OpenMP_C_LIB_NAMES "omp")
+ set(OpenMP_CXX_LIB_NAMES "omp")
+ set(OpenMP_omp_LIBRARY "${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib")
+
+ # Create imported target manually
+ if(NOT TARGET OpenMP::OpenMP_CXX)
+ add_library(OpenMP::OpenMP_CXX SHARED IMPORTED)
+ set_target_properties(OpenMP::OpenMP_CXX PROPERTIES
+ IMPORTED_LOCATION "${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib"
+ INTERFACE_INCLUDE_DIRECTORIES "${HOMEBREW_LIBOMP_PREFIX}/include"
+ INTERFACE_COMPILE_OPTIONS "-Xpreprocessor;-fopenmp"
+ )
+ endif()
+ set(OpenMP_FOUND TRUE)
+ else()
+ message(WARNING "libomp not found via Homebrew. Trying standard OpenMP detection...")
+ find_package(OpenMP)
+ endif()
+else()
+ find_package(OpenMP REQUIRED)
+endif()
+
+if(NOT OpenMP_FOUND AND NOT TARGET OpenMP::OpenMP_CXX)
+ message(WARNING "OpenMP not found. Building without OpenMP support.")
+ message(WARNING "On macOS, install libomp: brew install libomp")
+endif()
+
+# Find Faiss
+# For fat lib, prefer static libraries
+if(BUILD_FAT_LIB)
+ message(STATUS "Building fat library - preferring static libraries")
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ".a" ".so" ".dylib")
+ set(FAISS_STATIC_PREFERRED TRUE)
+else()
+ set(FAISS_STATIC_PREFERRED FALSE)
+endif()
+
+# First try to find Faiss via CMake config
+find_package(faiss CONFIG QUIET)
+
+if(NOT faiss_FOUND)
+ # Try pkg-config
+ find_package(PkgConfig QUIET)
+ if(PKG_CONFIG_FOUND)
+ pkg_check_modules(FAISS QUIET faiss)
+ endif()
+
+ if(NOT FAISS_FOUND)
+ # Manual search - look in common locations
+ find_path(FAISS_INCLUDE_DIR
+ NAMES faiss/Index.h
+ PATHS
+ /usr/local/include
+ /usr/include
+ ${FAISS_ROOT}/include
+ $ENV{FAISS_ROOT}/include
+ )
+
+ # For fat lib, try to find static library first
+ if(BUILD_FAT_LIB)
+ find_library(FAISS_LIBRARY_STATIC
+ NAMES libfaiss.a faiss_static
+ PATHS
+ /usr/local/lib
+ /usr/lib
+ /usr/local/lib64
+ /usr/lib64
+ ${FAISS_ROOT}/lib
+ ${FAISS_ROOT}/lib64
+ $ENV{FAISS_ROOT}/lib
+ $ENV{FAISS_ROOT}/lib64
+ )
+ if(FAISS_LIBRARY_STATIC)
+ set(FAISS_LIBRARY ${FAISS_LIBRARY_STATIC})
+ message(STATUS "Found Faiss static library: ${FAISS_LIBRARY}")
+ endif()
+ endif()
+
+ # If static not found or not building fat lib, find any library
+ if(NOT FAISS_LIBRARY)
+ find_library(FAISS_LIBRARY
+ NAMES faiss
+ PATHS
+ /usr/local/lib
+ /usr/lib
+ /usr/local/lib64
+ /usr/lib64
+ ${FAISS_ROOT}/lib
+ ${FAISS_ROOT}/lib64
+ $ENV{FAISS_ROOT}/lib
+ $ENV{FAISS_ROOT}/lib64
+ )
+ endif()
+
+ if(FAISS_INCLUDE_DIR AND FAISS_LIBRARY)
+ set(FAISS_FOUND TRUE)
+ set(FAISS_INCLUDE_DIRS ${FAISS_INCLUDE_DIR})
+ set(FAISS_LIBRARIES ${FAISS_LIBRARY})
+ message(STATUS "Found Faiss: ${FAISS_LIBRARY}")
+ else()
+ message(FATAL_ERROR "Faiss not found. Please install Faiss or set FAISS_ROOT environment variable.")
+ endif()
+ endif()
+endif()
+
+# Find BLAS/LAPACK for static linking (Faiss depends on them)
+if(BUILD_FAT_LIB)
+ # Save original suffixes
+ set(_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES ${CMAKE_FIND_LIBRARY_SUFFIXES})
+
+ # Force static library search only
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
+
+ # Try to find OpenBLAS static library
+ find_library(OPENBLAS_STATIC_LIBRARY
+ NAMES openblas openblas_static
+ PATHS
+ /usr/local/lib
+ /usr/lib
+ /usr/local/lib64
+ /usr/lib64
+ /usr/lib/x86_64-linux-gnu
+ /usr/lib/aarch64-linux-gnu
+ ${OPENBLAS_ROOT}/lib
+ $ENV{OPENBLAS_ROOT}/lib
+ NO_DEFAULT_PATH
+ )
+
+ # Also try default paths
+ if(NOT OPENBLAS_STATIC_LIBRARY)
+ find_library(OPENBLAS_STATIC_LIBRARY
+ NAMES openblas openblas_static
+ )
+ endif()
+
+ if(OPENBLAS_STATIC_LIBRARY AND OPENBLAS_STATIC_LIBRARY MATCHES "\\.a$")
+ message(STATUS "Found OpenBLAS static library: ${OPENBLAS_STATIC_LIBRARY}")
+ set(OPENBLAS_USE_STATIC TRUE)
+ list(APPEND FAISS_STATIC_LIBS ${OPENBLAS_STATIC_LIBRARY})
+ else()
+ message(STATUS "OpenBLAS static library not found, trying shared library")
+ set(OPENBLAS_USE_STATIC FALSE)
+
+ # Restore suffixes and find shared library
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
+ find_library(OPENBLAS_SHARED_LIBRARY
+ NAMES openblas
+ PATHS
+ /usr/local/lib
+ /usr/lib
+ /usr/local/lib64
+ /usr/lib64
+ /usr/lib/x86_64-linux-gnu
+ /usr/lib/aarch64-linux-gnu
+ ${OPENBLAS_ROOT}/lib
+ $ENV{OPENBLAS_ROOT}/lib
+ )
+ if(OPENBLAS_SHARED_LIBRARY)
+ message(STATUS "Found OpenBLAS shared library: ${OPENBLAS_SHARED_LIBRARY}")
+ list(APPEND FAISS_EXTRA_LIBS ${OPENBLAS_SHARED_LIBRARY})
+ # Mark that we need to bundle this library
+ set(BUNDLE_OPENBLAS TRUE)
+ set(BUNDLE_OPENBLAS_PATH ${OPENBLAS_SHARED_LIBRARY})
+ else()
+ # Try to find any BLAS
+ find_package(BLAS QUIET)
+ if(BLAS_FOUND)
+ list(APPEND FAISS_EXTRA_LIBS ${BLAS_LIBRARIES})
+ message(STATUS "Found BLAS: ${BLAS_LIBRARIES}")
+ endif()
+ endif()
+ endif()
+
+ # Restore suffixes for static search
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ".a")
+
+ # Find LAPACK static library
+ find_library(LAPACK_STATIC_LIBRARY
+ NAMES lapack
+ PATHS
+ /usr/local/lib
+ /usr/lib
+ /usr/local/lib64
+ /usr/lib64
+ /usr/lib/x86_64-linux-gnu
+ /usr/lib/aarch64-linux-gnu
+ )
+ if(LAPACK_STATIC_LIBRARY AND LAPACK_STATIC_LIBRARY MATCHES "\\.a$")
+ message(STATUS "Found LAPACK static library: ${LAPACK_STATIC_LIBRARY}")
+ list(APPEND FAISS_STATIC_LIBS ${LAPACK_STATIC_LIBRARY})
+ endif()
+
+ # Find gfortran static library (needed by OpenBLAS)
+ find_library(GFORTRAN_STATIC_LIBRARY
+ NAMES gfortran
+ )
+ if(GFORTRAN_STATIC_LIBRARY AND GFORTRAN_STATIC_LIBRARY MATCHES "\\.a$")
+ message(STATUS "Found gfortran static library: ${GFORTRAN_STATIC_LIBRARY}")
+ list(APPEND FAISS_STATIC_LIBS ${GFORTRAN_STATIC_LIBRARY})
+ endif()
+
+ # Restore original suffixes
+ set(CMAKE_FIND_LIBRARY_SUFFIXES ${_ORIG_CMAKE_FIND_LIBRARY_SUFFIXES})
+
+ # On Linux, we may need pthread, dl, and m (these are typically dynamically linked)
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ list(APPEND FAISS_EXTRA_LIBS pthread dl m)
+
+ # Try to find gfortran shared if static not found
+ if(NOT GFORTRAN_STATIC_LIBRARY)
+ find_library(GFORTRAN_LIBRARY gfortran)
+ if(GFORTRAN_LIBRARY)
+ list(APPEND FAISS_EXTRA_LIBS ${GFORTRAN_LIBRARY})
+ endif()
+ endif()
+ endif()
+endif()
+
+# Platform detection - using {os}/{arch} directory structure
+if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ set(PLATFORM_OS "linux")
+ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
+ set(PLATFORM_ARCH "amd64")
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64")
+ set(PLATFORM_ARCH "aarch64")
+ else()
+ message(FATAL_ERROR "Unsupported Linux architecture: ${CMAKE_SYSTEM_PROCESSOR}")
+ endif()
+elseif(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+ set(PLATFORM_OS "darwin")
+ if(CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|amd64")
+ set(PLATFORM_ARCH "amd64")
+ elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "arm64|aarch64")
+ set(PLATFORM_ARCH "aarch64")
+ else()
+ message(FATAL_ERROR "Unsupported macOS architecture: ${CMAKE_SYSTEM_PROCESSOR}")
+ endif()
+else()
+ message(FATAL_ERROR "Unsupported operating system: ${CMAKE_SYSTEM_NAME}. Only Linux and macOS are supported.")
+endif()
+
+set(PLATFORM_DIR "${PLATFORM_OS}/${PLATFORM_ARCH}")
+message(STATUS "Building for platform: ${PLATFORM_DIR}")
+
+# Build the JNI library
+add_library(paimon_faiss_jni SHARED
+ paimon_faiss_jni.cpp
+)
+
+# Include directories
+if(TARGET faiss)
+ target_link_libraries(paimon_faiss_jni PRIVATE faiss)
+else()
+ target_include_directories(paimon_faiss_jni PRIVATE ${FAISS_INCLUDE_DIRS})
+ target_link_libraries(paimon_faiss_jni PRIVATE ${FAISS_LIBRARIES})
+endif()
+
+# Link extra libraries for fat lib (BLAS, LAPACK, etc.)
+if(BUILD_FAT_LIB)
+ # Link static libraries with --whole-archive to embed all symbols
+ if(FAISS_STATIC_LIBS AND CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ message(STATUS "Linking static libraries with --whole-archive: ${FAISS_STATIC_LIBS}")
+ target_link_options(paimon_faiss_jni PRIVATE
+ "-Wl,--whole-archive"
+ )
+ target_link_libraries(paimon_faiss_jni PRIVATE ${FAISS_STATIC_LIBS})
+ target_link_options(paimon_faiss_jni PRIVATE
+ "-Wl,--no-whole-archive"
+ )
+ elseif(FAISS_STATIC_LIBS)
+ # macOS doesn't use --whole-archive, use -force_load instead
+ foreach(static_lib ${FAISS_STATIC_LIBS})
+ target_link_options(paimon_faiss_jni PRIVATE "-Wl,-force_load,${static_lib}")
+ endforeach()
+ message(STATUS "Linking static libraries with -force_load: ${FAISS_STATIC_LIBS}")
+ endif()
+
+ # Link remaining shared libraries
+ if(FAISS_EXTRA_LIBS)
+ target_link_libraries(paimon_faiss_jni PRIVATE ${FAISS_EXTRA_LIBS})
+ message(STATUS "Linking extra libraries: ${FAISS_EXTRA_LIBS}")
+ endif()
+endif()
+
+# Link OpenMP - always use dynamic linking for OpenMP (static libgomp.a often lacks -fPIC)
+if(TARGET OpenMP::OpenMP_CXX)
+ target_link_libraries(paimon_faiss_jni PRIVATE OpenMP::OpenMP_CXX)
+ message(STATUS "Linking OpenMP via imported target")
+elseif(OpenMP_FOUND)
+ target_compile_options(paimon_faiss_jni PRIVATE ${OpenMP_CXX_FLAGS})
+ # Link against the shared gomp library
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ find_library(GOMP_SHARED_LIBRARY NAMES gomp PATHS /usr/lib /usr/lib64 /usr/lib/x86_64-linux-gnu)
+ if(GOMP_SHARED_LIBRARY)
+ target_link_libraries(paimon_faiss_jni PRIVATE ${GOMP_SHARED_LIBRARY})
+ message(STATUS "Linking OpenMP shared library: ${GOMP_SHARED_LIBRARY}")
+ else()
+ target_link_libraries(paimon_faiss_jni PRIVATE gomp)
+ message(STATUS "Linking OpenMP: gomp")
+ endif()
+ else()
+ target_link_libraries(paimon_faiss_jni PRIVATE ${OpenMP_CXX_FLAGS})
+ endif()
+endif()
+
+# Platform-specific settings
+if(CMAKE_SYSTEM_NAME STREQUAL "Darwin")
+ # macOS specific settings
+ set_target_properties(paimon_faiss_jni PROPERTIES
+ SUFFIX ".dylib"
+ INSTALL_NAME_DIR "@rpath"
+ BUILD_WITH_INSTALL_RPATH TRUE
+ )
+
+ # Link against libc++
+ target_link_libraries(paimon_faiss_jni PRIVATE c++)
+
+ # For fat lib on macOS, embed OpenMP library path
+ if(BUILD_FAT_LIB AND HOMEBREW_LIBOMP_PREFIX)
+ target_link_options(paimon_faiss_jni PRIVATE
+ "-Wl,-rpath,@loader_path"
+ "-Wl,-rpath,${HOMEBREW_LIBOMP_PREFIX}/lib"
+ )
+ endif()
+
+elseif(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ # Linux specific settings
+ set_target_properties(paimon_faiss_jni PROPERTIES
+ SUFFIX ".so"
+ )
+
+ if(BUILD_FAT_LIB)
+ # For fat lib, use static libstdc++ and libgcc
+ target_link_options(paimon_faiss_jni PRIVATE
+ "-static-libstdc++"
+ "-static-libgcc"
+ "-Wl,--exclude-libs,ALL"
+ )
+ message(STATUS "Using static libstdc++ and libgcc for fat lib")
+ else()
+ target_link_libraries(paimon_faiss_jni PRIVATE stdc++)
+ endif()
+
+endif()
+
+# Set output directory - output to src/main/resources/{os}/{arch}/
+set(OUTPUT_DIR "${CMAKE_SOURCE_DIR}/../resources/${PLATFORM_DIR}")
+set_target_properties(paimon_faiss_jni PROPERTIES
+ LIBRARY_OUTPUT_DIRECTORY ${OUTPUT_DIR}
+ RUNTIME_OUTPUT_DIRECTORY ${OUTPUT_DIR}
+)
+
+# Optimization level
+if(FAISS_OPT_LEVEL STREQUAL "avx2")
+ target_compile_options(paimon_faiss_jni PRIVATE -mavx2 -mfma)
+ message(STATUS "Building with AVX2 optimizations")
+elseif(FAISS_OPT_LEVEL STREQUAL "avx512")
+ target_compile_options(paimon_faiss_jni PRIVATE -mavx512f -mavx512dq -mavx512bw -mavx512vl)
+ message(STATUS "Building with AVX-512 optimizations")
+else()
+ message(STATUS "Building with generic optimizations")
+endif()
+
+# Copy bundled shared libraries to output directory and set rpath
+if(BUILD_FAT_LIB AND BUNDLE_OPENBLAS AND BUNDLE_OPENBLAS_PATH)
+ message(STATUS "Will bundle OpenBLAS shared library: ${BUNDLE_OPENBLAS_PATH}")
+
+ # Get the actual library file (resolve symlinks)
+ get_filename_component(OPENBLAS_REALPATH ${BUNDLE_OPENBLAS_PATH} REALPATH)
+ get_filename_component(OPENBLAS_FILENAME ${OPENBLAS_REALPATH} NAME)
+
+ # Copy OpenBLAS to output directory after build
+ add_custom_command(TARGET paimon_faiss_jni POST_BUILD
+ COMMAND ${CMAKE_COMMAND} -E copy_if_different
+ ${OPENBLAS_REALPATH}
+ ${OUTPUT_DIR}/libopenblas.so.0
+ COMMENT "Bundling OpenBLAS shared library"
+ )
+
+ # Set rpath to look in the same directory as the library
+ if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
+ target_link_options(paimon_faiss_jni PRIVATE
+ "-Wl,-rpath,$ORIGIN"
+ )
+ # Also patch the library after build to use the bundled libopenblas
+ add_custom_command(TARGET paimon_faiss_jni POST_BUILD
+ COMMAND patchelf --set-rpath "$$ORIGIN" ${OUTPUT_DIR}/libpaimon_faiss_jni.so || true
+ COMMENT "Setting rpath to $ORIGIN"
+ )
+ endif()
+endif()
+
+# Install target
+install(TARGETS paimon_faiss_jni
+ LIBRARY DESTINATION ${PLATFORM_DIR}
+ RUNTIME DESTINATION ${PLATFORM_DIR}
+)
+
diff --git a/paimon-faiss-jni/src/main/native/paimon_faiss_jni.cpp b/paimon-faiss-jni/src/main/native/paimon_faiss_jni.cpp
new file mode 100644
index 000000000000..72e8cc66808e
--- /dev/null
+++ b/paimon-faiss-jni/src/main/native/paimon_faiss_jni.cpp
@@ -0,0 +1,475 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "paimon_faiss_jni.h"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#ifdef _OPENMP
+#include
+#endif
+
+#include
+#include
+#include
+#include
+#include
+
+// Helper macro for exception handling
+#define FAISS_TRY try {
+#define FAISS_CATCH(env) \
+ } catch (const std::exception& e) { \
+ jclass exceptionClass = env->FindClass("org/apache/paimon/faiss/FaissException"); \
+ if (exceptionClass != nullptr) { \
+ env->ThrowNew(exceptionClass, e.what()); \
+ } \
+ } catch (...) { \
+ jclass exceptionClass = env->FindClass("org/apache/paimon/faiss/FaissException"); \
+ if (exceptionClass != nullptr) { \
+ env->ThrowNew(exceptionClass, "Unknown native exception"); \
+ } \
+ }
+
+// Helper function to convert jstring to std::string
+static std::string jstringToString(JNIEnv* env, jstring jstr) {
+ if (jstr == nullptr) {
+ return "";
+ }
+ const char* chars = env->GetStringUTFChars(jstr, nullptr);
+ std::string result(chars);
+ env->ReleaseStringUTFChars(jstr, chars);
+ return result;
+}
+
+// Helper function to get index pointer from handle
+static faiss::Index* getIndex(jlong handle) {
+ return reinterpret_cast(handle);
+}
+
+// Helper to get IVF index
+static faiss::IndexIVF* getIndexIVF(jlong handle) {
+ faiss::Index* index = getIndex(handle);
+
+ // Try direct cast
+ faiss::IndexIVF* ivf = dynamic_cast(index);
+ if (ivf != nullptr) {
+ return ivf;
+ }
+
+ // Try through IDMap wrapper
+ faiss::IndexIDMap* idmap = dynamic_cast(index);
+ if (idmap != nullptr) {
+ ivf = dynamic_cast(idmap->index);
+ if (ivf != nullptr) {
+ return ivf;
+ }
+ }
+
+ throw std::runtime_error("Index is not an IVF index");
+}
+
+// Helper to get HNSW index
+static faiss::IndexHNSW* getIndexHNSW(jlong handle) {
+ faiss::Index* index = getIndex(handle);
+
+ // Try direct cast
+ faiss::IndexHNSW* hnsw = dynamic_cast(index);
+ if (hnsw != nullptr) {
+ return hnsw;
+ }
+
+ // Try through IDMap wrapper
+ faiss::IndexIDMap* idmap = dynamic_cast(index);
+ if (idmap != nullptr) {
+ hnsw = dynamic_cast(idmap->index);
+ if (hnsw != nullptr) {
+ return hnsw;
+ }
+ }
+
+ throw std::runtime_error("Index is not an HNSW index");
+}
+
+// Helper to get direct buffer address with validation
+static void* getDirectBufferAddress(JNIEnv* env, jobject buffer, const char* name) {
+ void* addr = env->GetDirectBufferAddress(buffer);
+ if (addr == nullptr) {
+ std::string msg = std::string(name) + " buffer is not a direct buffer or address is unavailable";
+ throw std::runtime_error(msg);
+ }
+ return addr;
+}
+
+// Range search result wrapper
+struct RangeSearchResultWrapper {
+ faiss::RangeSearchResult result;
+ int nq;
+
+ RangeSearchResultWrapper(int nq_) : result(nq_), nq(nq_) {}
+};
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexFactoryCreate
+ (JNIEnv* env, jclass, jint dimension, jstring description, jint metricType) {
+ FAISS_TRY
+ std::string desc = jstringToString(env, description);
+ faiss::MetricType metric = (metricType == 0) ? faiss::METRIC_L2 : faiss::METRIC_INNER_PRODUCT;
+ faiss::Index* index = faiss::index_factory(dimension, desc.c_str(), metric);
+ return reinterpret_cast(index);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexDestroy
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ delete getIndex(handle);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetDimension
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndex(handle)->d);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetCount
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndex(handle)->ntotal);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT jboolean JNICALL Java_org_apache_paimon_faiss_FaissNative_indexIsTrained
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return getIndex(handle)->is_trained ? JNI_TRUE : JNI_FALSE;
+ FAISS_CATCH(env)
+ return JNI_FALSE;
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetMetricType
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ faiss::MetricType metric = getIndex(handle)->metric_type;
+ return (metric == faiss::METRIC_L2) ? 0 : 1;
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexReset
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ getIndex(handle)->reset();
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexTrain
+ (JNIEnv* env, jclass, jlong handle, jlong n, jobject vectorBuffer) {
+ FAISS_TRY
+ float* vectorData = static_cast(getDirectBufferAddress(env, vectorBuffer, "vector"));
+ getIndex(handle)->train(n, vectorData);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexAdd
+ (JNIEnv* env, jclass, jlong handle, jlong n, jobject vectorBuffer) {
+ FAISS_TRY
+ float* vectorData = static_cast(getDirectBufferAddress(env, vectorBuffer, "vector"));
+ getIndex(handle)->add(n, vectorData);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexAddWithIds
+ (JNIEnv* env, jclass, jlong handle, jlong n, jobject vectorBuffer, jobject idBuffer) {
+ FAISS_TRY
+ float* vectorData = static_cast(getDirectBufferAddress(env, vectorBuffer, "vector"));
+ int64_t* idData = static_cast(getDirectBufferAddress(env, idBuffer, "id"));
+
+ // Convert to faiss::idx_t if size differs
+ if (sizeof(faiss::idx_t) == sizeof(int64_t)) {
+ getIndex(handle)->add_with_ids(n, vectorData, reinterpret_cast(idData));
+ } else {
+ std::vector faissIds(n);
+ for (jlong i = 0; i < n; i++) {
+ faissIds[i] = static_cast(idData[i]);
+ }
+ getIndex(handle)->add_with_ids(n, vectorData, faissIds.data());
+ }
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexSearch
+ (JNIEnv* env, jclass, jlong handle, jlong n, jfloatArray queryVectors, jint k,
+ jfloatArray distances, jlongArray labels) {
+ FAISS_TRY
+ // Get query vectors
+ jfloat* queryData = env->GetFloatArrayElements(queryVectors, nullptr);
+ if (queryData == nullptr) {
+ throw std::runtime_error("Failed to get query vectors");
+ }
+
+ // Allocate temporary arrays for results
+ std::vector distTemp(n * k);
+ std::vector labelTemp(n * k);
+
+ // Perform search
+ getIndex(handle)->search(n, queryData, k, distTemp.data(), labelTemp.data());
+
+ // Release query array
+ env->ReleaseFloatArrayElements(queryVectors, queryData, JNI_ABORT);
+
+ // Copy results to output arrays
+ env->SetFloatArrayRegion(distances, 0, n * k, distTemp.data());
+
+ // Convert labels to jlong
+ std::vector jlongLabels(n * k);
+ for (jlong i = 0; i < n * k; i++) {
+ jlongLabels[i] = static_cast(labelTemp[i]);
+ }
+ env->SetLongArrayRegion(labels, 0, n * k, jlongLabels.data());
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexRangeSearch
+ (JNIEnv* env, jclass, jlong handle, jlong n, jobject queryBuffer, jfloat radius) {
+ FAISS_TRY
+ float* queryData = static_cast(getDirectBufferAddress(env, queryBuffer, "query"));
+
+ RangeSearchResultWrapper* wrapper = new RangeSearchResultWrapper(static_cast(n));
+ getIndex(handle)->range_search(n, queryData, radius, &wrapper->result);
+
+ return reinterpret_cast(wrapper);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexWriteToFile
+ (JNIEnv* env, jclass, jlong handle, jstring path) {
+ FAISS_TRY
+ std::string filePath = jstringToString(env, path);
+ faiss::write_index(getIndex(handle), filePath.c_str());
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexReadFromFile
+ (JNIEnv* env, jclass, jstring path) {
+ FAISS_TRY
+ std::string filePath = jstringToString(env, path);
+ faiss::Index* index = faiss::read_index(filePath.c_str(), faiss::IO_FLAG_MMAP);
+ return reinterpret_cast(index);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexSerializeSize
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ faiss::VectorIOWriter writer;
+ faiss::write_index(getIndex(handle), &writer);
+ return static_cast(writer.data.size());
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexSerialize
+ (JNIEnv* env, jclass, jlong handle, jobject buffer) {
+ FAISS_TRY
+ void* bufferData = getDirectBufferAddress(env, buffer, "output");
+
+ faiss::VectorIOWriter writer;
+ faiss::write_index(getIndex(handle), &writer);
+
+ jlong capacity = env->GetDirectBufferCapacity(buffer);
+ if (static_cast(capacity) < writer.data.size()) {
+ throw std::runtime_error("Buffer too small for serialized index");
+ }
+
+ memcpy(bufferData, writer.data.data(), writer.data.size());
+ return static_cast(writer.data.size());
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexDeserialize
+ (JNIEnv* env, jclass, jbyteArray data, jlong length) {
+ FAISS_TRY
+ jbyte* dataPtr = env->GetByteArrayElements(data, nullptr);
+ if (dataPtr == nullptr) {
+ throw std::runtime_error("Failed to get byte array elements");
+ }
+
+ faiss::VectorIOReader reader;
+ reader.data.resize(length);
+ memcpy(reader.data.data(), dataPtr, length);
+
+ env->ReleaseByteArrayElements(data, dataPtr, JNI_ABORT);
+
+ faiss::Index* index = faiss::read_index(&reader);
+ return reinterpret_cast(index);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultDestroy
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ delete reinterpret_cast(handle);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetTotalSize
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ RangeSearchResultWrapper* wrapper = reinterpret_cast(handle);
+ return static_cast(wrapper->result.lims[wrapper->nq]);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetNumQueries
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ RangeSearchResultWrapper* wrapper = reinterpret_cast(handle);
+ return static_cast(wrapper->nq);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetLimits
+ (JNIEnv* env, jclass, jlong handle, jobject limitsBuffer) {
+ FAISS_TRY
+ RangeSearchResultWrapper* wrapper = reinterpret_cast(handle);
+ int64_t* limitsData = static_cast(getDirectBufferAddress(env, limitsBuffer, "limits"));
+
+ for (int i = 0; i <= wrapper->nq; i++) {
+ limitsData[i] = static_cast(wrapper->result.lims[i]);
+ }
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetLabels
+ (JNIEnv* env, jclass, jlong handle, jobject labelsBuffer) {
+ FAISS_TRY
+ RangeSearchResultWrapper* wrapper = reinterpret_cast(handle);
+ int64_t* labelsData = static_cast(getDirectBufferAddress(env, labelsBuffer, "labels"));
+
+ size_t total = wrapper->result.lims[wrapper->nq];
+ for (size_t i = 0; i < total; i++) {
+ labelsData[i] = static_cast(wrapper->result.labels[i]);
+ }
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetDistances
+ (JNIEnv* env, jclass, jlong handle, jobject distancesBuffer) {
+ FAISS_TRY
+ RangeSearchResultWrapper* wrapper = reinterpret_cast(handle);
+ float* distData = static_cast(getDirectBufferAddress(env, distancesBuffer, "distances"));
+
+ size_t total = wrapper->result.lims[wrapper->nq];
+ memcpy(distData, wrapper->result.distances, total * sizeof(float));
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfGetNprobe
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndexIVF(handle)->nprobe);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfSetNprobe
+ (JNIEnv* env, jclass, jlong handle, jint nprobe) {
+ FAISS_TRY
+ getIndexIVF(handle)->nprobe = static_cast(nprobe);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfGetNlist
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndexIVF(handle)->nlist);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswGetEfSearch
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndexHNSW(handle)->hnsw.efSearch);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswSetEfSearch
+ (JNIEnv* env, jclass, jlong handle, jint efSearch) {
+ FAISS_TRY
+ getIndexHNSW(handle)->hnsw.efSearch = static_cast(efSearch);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswGetEfConstruction
+ (JNIEnv* env, jclass, jlong handle) {
+ FAISS_TRY
+ return static_cast(getIndexHNSW(handle)->hnsw.efConstruction);
+ FAISS_CATCH(env)
+ return 0;
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswSetEfConstruction
+ (JNIEnv* env, jclass, jlong handle, jint efConstruction) {
+ FAISS_TRY
+ getIndexHNSW(handle)->hnsw.efConstruction = static_cast(efConstruction);
+ FAISS_CATCH(env)
+}
+
+JNIEXPORT jstring JNICALL Java_org_apache_paimon_faiss_FaissNative_getVersion
+ (JNIEnv* env, jclass) {
+ // Return FAISS version defined in CMakeLists.txt
+ return env->NewStringUTF(FAISS_VERSION);
+}
+
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_setNumThreads
+ (JNIEnv* env, jclass, jint numThreads) {
+#ifdef _OPENMP
+ omp_set_num_threads(numThreads);
+#else
+ // OpenMP not available, ignore
+ (void)numThreads;
+#endif
+}
+
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_getNumThreads
+ (JNIEnv* env, jclass) {
+#ifdef _OPENMP
+ return omp_get_max_threads();
+#else
+ return 1;
+#endif
+}
diff --git a/paimon-faiss-jni/src/main/native/paimon_faiss_jni.h b/paimon-faiss-jni/src/main/native/paimon_faiss_jni.h
new file mode 100644
index 000000000000..c5739dbe8e82
--- /dev/null
+++ b/paimon-faiss-jni/src/main/native/paimon_faiss_jni.h
@@ -0,0 +1,298 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef PAIMON_FAISS_JNI_H
+#define PAIMON_FAISS_JNI_H
+
+#include
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexFactoryCreate
+ * Signature: (ILjava/lang/String;I)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexFactoryCreate
+ (JNIEnv *, jclass, jint, jstring, jint);
+
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexDestroy
+ * Signature: (J)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexDestroy
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexGetDimension
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetDimension
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexGetCount
+ * Signature: (J)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetCount
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexIsTrained
+ * Signature: (J)Z
+ */
+JNIEXPORT jboolean JNICALL Java_org_apache_paimon_faiss_FaissNative_indexIsTrained
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexGetMetricType
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_indexGetMetricType
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexReset
+ * Signature: (J)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexReset
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexTrain
+ * Signature: (JJLjava/nio/ByteBuffer;)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexTrain
+ (JNIEnv *, jclass, jlong, jlong, jobject);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexAdd
+ * Signature: (JJLjava/nio/ByteBuffer;)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexAdd
+ (JNIEnv *, jclass, jlong, jlong, jobject);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexAddWithIds
+ * Signature: (JJLjava/nio/ByteBuffer;Ljava/nio/ByteBuffer;)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexAddWithIds
+ (JNIEnv *, jclass, jlong, jlong, jobject, jobject);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexSearch
+ * Signature: (JJ[FI[F[J)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexSearch
+ (JNIEnv *, jclass, jlong, jlong, jfloatArray, jint, jfloatArray, jlongArray);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexRangeSearch
+ * Signature: (JJLjava/nio/ByteBuffer;F)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexRangeSearch
+ (JNIEnv *, jclass, jlong, jlong, jobject, jfloat);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexWriteToFile
+ * Signature: (JLjava/lang/String;)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_indexWriteToFile
+ (JNIEnv *, jclass, jlong, jstring);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexReadFromFile
+ * Signature: (Ljava/lang/String;)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexReadFromFile
+ (JNIEnv *, jclass, jstring);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexSerializeSize
+ * Signature: (J)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexSerializeSize
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexSerialize
+ * Signature: (JLjava/nio/ByteBuffer;)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexSerialize
+ (JNIEnv *, jclass, jlong, jobject);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: indexDeserialize
+ * Signature: ([BJ)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_indexDeserialize
+ (JNIEnv *, jclass, jbyteArray, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: rangeSearchResultDestroy
+ * Signature: (J)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultDestroy
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: rangeSearchResultGetTotalSize
+ * Signature: (J)J
+ */
+JNIEXPORT jlong JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetTotalSize
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: rangeSearchResultGetNumQueries
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetNumQueries
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: rangeSearchResultGetLimits
+ * Signature: (JLjava/nio/ByteBuffer;)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetLimits
+ (JNIEnv *, jclass, jlong, jobject);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: rangeSearchResultGetLabels
+ * Signature: (JLjava/nio/ByteBuffer;)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetLabels
+ (JNIEnv *, jclass, jlong, jobject);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: rangeSearchResultGetDistances
+ * Signature: (JLjava/nio/ByteBuffer;)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_rangeSearchResultGetDistances
+ (JNIEnv *, jclass, jlong, jobject);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: ivfGetNprobe
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfGetNprobe
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: ivfSetNprobe
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfSetNprobe
+ (JNIEnv *, jclass, jlong, jint);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: ivfGetNlist
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_ivfGetNlist
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: hnswGetEfSearch
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswGetEfSearch
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: hnswSetEfSearch
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswSetEfSearch
+ (JNIEnv *, jclass, jlong, jint);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: hnswGetEfConstruction
+ * Signature: (J)I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswGetEfConstruction
+ (JNIEnv *, jclass, jlong);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: hnswSetEfConstruction
+ * Signature: (JI)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_hnswSetEfConstruction
+ (JNIEnv *, jclass, jlong, jint);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: getVersion
+ * Signature: ()Ljava/lang/String;
+ */
+JNIEXPORT jstring JNICALL Java_org_apache_paimon_faiss_FaissNative_getVersion
+ (JNIEnv *, jclass);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: setNumThreads
+ * Signature: (I)V
+ */
+JNIEXPORT void JNICALL Java_org_apache_paimon_faiss_FaissNative_setNumThreads
+ (JNIEnv *, jclass, jint);
+
+/*
+ * Class: org_apache_paimon_faiss_FaissNative
+ * Method: getNumThreads
+ * Signature: ()I
+ */
+JNIEXPORT jint JNICALL Java_org_apache_paimon_faiss_FaissNative_getNumThreads
+ (JNIEnv *, jclass);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* PAIMON_FAISS_JNI_H */
diff --git a/paimon-faiss-jni/src/test/java/org/apache/paimon/faiss/IndexTest.java b/paimon-faiss-jni/src/test/java/org/apache/paimon/faiss/IndexTest.java
new file mode 100644
index 000000000000..8f341465c70b
--- /dev/null
+++ b/paimon-faiss-jni/src/test/java/org/apache/paimon/faiss/IndexTest.java
@@ -0,0 +1,397 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss;
+
+import org.junit.jupiter.api.Test;
+import org.junit.jupiter.api.io.TempDir;
+
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+import java.nio.FloatBuffer;
+import java.nio.file.Path;
+import java.util.Random;
+
+import static org.junit.jupiter.api.Assertions.assertArrayEquals;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+/**
+ * Tests for the Faiss Index class.
+ *
+ *
Note: These tests require the native library to be built and available. They will be skipped
+ * if the native library is not found.
+ */
+class IndexTest {
+
+ private static final int DIMENSION = 128;
+ private static final int NUM_VECTORS = 1000;
+ private static final int K = 10;
+
+ @Test
+ void testFlatIndexBasicOperations() {
+ try (Index index = createFlatIndexWithMetric(MetricType.L2)) {
+ assertEquals(DIMENSION, index.getDimension());
+ assertEquals(0, index.getCount());
+ assertTrue(index.isTrained());
+ assertEquals(MetricType.L2, index.getMetricType());
+
+ // Add vectors using zero-copy API
+ ByteBuffer vectorBuffer = createVectorBuffer(NUM_VECTORS, DIMENSION);
+ index.add(NUM_VECTORS, vectorBuffer);
+ assertEquals(NUM_VECTORS, index.getCount());
+
+ // Search using array API
+ float[] queryVectors = createQueryVectors(1, DIMENSION);
+ float[] distances = new float[K];
+ long[] labels = new long[K];
+
+ index.search(1, queryVectors, K, distances, labels);
+
+ // Verify labels are in valid range
+ for (int i = 0; i < K; i++) {
+ assertTrue(
+ labels[i] >= 0 && labels[i] < NUM_VECTORS,
+ "Label " + labels[i] + " out of range");
+ }
+
+ // Verify distances are non-negative for L2
+ for (int i = 0; i < K; i++) {
+ assertTrue(distances[i] >= 0, "Distance should be non-negative for L2");
+ }
+ }
+ }
+
+ @Test
+ void testFlatIndexWithIds() {
+ try (Index index = IndexFactory.create(DIMENSION, "IDMap,Flat", MetricType.L2)) {
+ ByteBuffer vectorBuffer = createVectorBuffer(NUM_VECTORS, DIMENSION);
+ ByteBuffer idBuffer = Index.allocateIdBuffer(NUM_VECTORS);
+ idBuffer.asLongBuffer();
+ for (int i = 0; i < NUM_VECTORS; i++) {
+ idBuffer.putLong(i * Long.BYTES, i * 100L); // Use custom IDs
+ }
+
+ index.addWithIds(NUM_VECTORS, vectorBuffer, idBuffer);
+ assertEquals(NUM_VECTORS, index.getCount());
+
+ // Search should return our custom IDs
+ float[] queryVectors = createQueryVectors(1, DIMENSION);
+ float[] distances = new float[K];
+ long[] labels = new long[K];
+
+ index.search(1, queryVectors, K, distances, labels);
+
+ for (int i = 0; i < K; i++) {
+ assertTrue(labels[i] % 100 == 0, "Label should be a multiple of 100");
+ }
+ }
+ }
+
+ @Test
+ void testBatchSearch() {
+ try (Index index = createFlatIndexWithMetric(MetricType.L2)) {
+ ByteBuffer vectorBuffer = createVectorBuffer(NUM_VECTORS, DIMENSION);
+ index.add(NUM_VECTORS, vectorBuffer);
+
+ int numQueries = 5;
+ float[] queryVectors = createQueryVectors(numQueries, DIMENSION);
+ float[] distances = new float[numQueries * K];
+ long[] labels = new long[numQueries * K];
+
+ index.search(numQueries, queryVectors, K, distances, labels);
+
+ // Read results for each query
+ for (int q = 0; q < numQueries; q++) {
+ for (int n = 0; n < K; n++) {
+ int idx = q * K + n;
+ assertTrue(labels[idx] >= 0 && labels[idx] < NUM_VECTORS);
+ assertTrue(distances[idx] >= 0);
+ }
+ }
+ }
+ }
+
+ private Index createFlatIndexWithMetric(MetricType metricType) {
+ return IndexFactory.create(DIMENSION, "Flat", metricType);
+ }
+
+ private Index createFlatIndex() {
+ return IndexFactory.create(DIMENSION, "Flat", MetricType.L2);
+ }
+
+ @Test
+ void testInnerProductMetric() {
+ try (Index index = createFlatIndexWithMetric(MetricType.INNER_PRODUCT)) {
+ assertEquals(MetricType.INNER_PRODUCT, index.getMetricType());
+
+ ByteBuffer vectorBuffer = createVectorBuffer(NUM_VECTORS, DIMENSION);
+ index.add(NUM_VECTORS, vectorBuffer);
+
+ float[] queryVectors = createQueryVectors(1, DIMENSION);
+ float[] distances = new float[K];
+ long[] labels = new long[K];
+
+ index.search(1, queryVectors, K, distances, labels);
+
+ // For inner product, higher is better, so first result should have highest score
+ for (int i = 1; i < K; i++) {
+ assertTrue(
+ distances[i - 1] >= distances[i],
+ "Distances should be sorted in descending order for inner product");
+ }
+ }
+ }
+
+ @Test
+ void testIndexReset() {
+ try (Index index = createFlatIndex()) {
+ ByteBuffer vectorBuffer = createVectorBuffer(100, DIMENSION);
+ index.add(100, vectorBuffer);
+ assertEquals(100, index.getCount());
+
+ index.reset();
+ assertEquals(0, index.getCount());
+
+ // Can add again after reset
+ index.add(100, vectorBuffer);
+ assertEquals(100, index.getCount());
+ }
+ }
+
+ @Test
+ void testIndexSerialization(@TempDir Path tempDir) {
+ ByteBuffer vectorBuffer = createVectorBuffer(NUM_VECTORS, DIMENSION);
+ float[] queryVectors = createQueryVectors(1, DIMENSION);
+ long[] originalLabels = new long[K];
+ float[] originalDistances = new float[K];
+
+ // Create and populate index
+ try (Index index = createFlatIndex()) {
+ index.add(NUM_VECTORS, vectorBuffer);
+
+ // Perform search and save original results
+ index.search(1, queryVectors, K, originalDistances, originalLabels);
+
+ // Test file I/O
+ File indexFile = tempDir.resolve("test.index").toFile();
+ index.writeToFile(indexFile);
+
+ try (Index loadedIndex = Index.readFromFile(indexFile)) {
+ assertEquals(DIMENSION, loadedIndex.getDimension());
+ assertEquals(NUM_VECTORS, loadedIndex.getCount());
+
+ float[] loadedDistances = new float[K];
+ long[] loadedLabels = new long[K];
+ loadedIndex.search(1, queryVectors, K, loadedDistances, loadedLabels);
+
+ assertArrayEquals(originalLabels, loadedLabels);
+ }
+ }
+
+ // Test ByteBuffer serialization (zero-copy for write) and byte[] deserialization
+ try (Index index = createFlatIndex()) {
+ index.add(NUM_VECTORS, vectorBuffer);
+
+ long serializeSize = index.serializeSize();
+ assertTrue(serializeSize > 0);
+
+ ByteBuffer serialized =
+ ByteBuffer.allocateDirect((int) serializeSize).order(ByteOrder.nativeOrder());
+ long bytesWritten = index.serialize(serialized);
+ assertEquals(serializeSize, bytesWritten);
+
+ // Convert to byte array for deserialization
+ serialized.rewind();
+ byte[] serializedBytes = new byte[(int) bytesWritten];
+ serialized.get(serializedBytes);
+
+ try (Index deserializedIndex = Index.deserialize(serializedBytes)) {
+ assertEquals(DIMENSION, deserializedIndex.getDimension());
+ assertEquals(NUM_VECTORS, deserializedIndex.getCount());
+
+ float[] deserializedDistances = new float[K];
+ long[] deserializedLabels = new long[K];
+ deserializedIndex.search(
+ 1, queryVectors, K, deserializedDistances, deserializedLabels);
+
+ assertArrayEquals(originalLabels, deserializedLabels);
+ }
+ }
+ }
+
+ @Test
+ void testIndexFactoryDescriptions() {
+ // Test various index factory strings
+ String[] descriptions = {"Flat", "IDMap,Flat", "HNSW32", "HNSW32,Flat"};
+
+ for (String desc : descriptions) {
+ try (Index index = IndexFactory.create(DIMENSION, desc, MetricType.L2)) {
+ assertEquals(DIMENSION, index.getDimension());
+ assertNotNull(index.toString());
+ }
+ }
+ }
+
+ @Test
+ void testHNSWIndex() {
+ try (Index index = IndexFactory.create(DIMENSION, "HNSW" + 32, MetricType.L2)) {
+ assertTrue(index.isTrained()); // HNSW doesn't need training
+
+ ByteBuffer vectorBuffer = createVectorBuffer(NUM_VECTORS, DIMENSION);
+ index.add(NUM_VECTORS, vectorBuffer);
+
+ // Get and set efSearch
+ int efSearch = IndexHNSW.getEfSearch(index);
+ assertTrue(efSearch > 0);
+
+ IndexHNSW.setEfSearch(index, 64);
+ assertEquals(64, IndexHNSW.getEfSearch(index));
+
+ // Search
+ float[] queryVectors = createQueryVectors(1, DIMENSION);
+ float[] distances = new float[K];
+ long[] labels = new long[K];
+
+ index.search(1, queryVectors, K, distances, labels);
+
+ for (int i = 0; i < K; i++) {
+ assertTrue(labels[i] >= 0);
+ }
+ }
+ }
+
+ @Test
+ void testErrorHandling() {
+ // Test invalid dimension
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ IndexFactory.create(0, "Flat", MetricType.L2);
+ });
+
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ IndexFactory.create(-1, "Flat", MetricType.L2);
+ });
+
+ // Test null description
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ IndexFactory.create(DIMENSION, null, MetricType.L2);
+ });
+
+ // Test buffer validation - wrong size buffer
+ try (Index index = createFlatIndex()) {
+ ByteBuffer wrongSizeBuffer =
+ ByteBuffer.allocateDirect(10).order(ByteOrder.nativeOrder());
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ index.add(1, wrongSizeBuffer); // Buffer too small for 1 vector
+ });
+ }
+
+ // Test non-direct buffer
+ try (Index index = createFlatIndex()) {
+ ByteBuffer heapBuffer = ByteBuffer.allocate(DIMENSION * Float.BYTES);
+ assertThrows(
+ IllegalArgumentException.class,
+ () -> {
+ index.add(1, heapBuffer); // Not a direct buffer
+ });
+ }
+
+ // Test closed index
+ Index closedIndex = createFlatIndex();
+ closedIndex.close();
+ assertThrows(
+ IllegalStateException.class,
+ () -> {
+ closedIndex.getCount();
+ });
+ }
+
+ @Test
+ void testSearchResultArrays() {
+ try (Index index = createFlatIndex()) {
+ ByteBuffer vectorBuffer = createVectorBuffer(100, DIMENSION);
+ index.add(100, vectorBuffer);
+
+ int numQueries = 3;
+ int k = 5;
+ float[] queryVectors = createQueryVectors(numQueries, DIMENSION);
+ float[] distances = new float[numQueries * k];
+ long[] labels = new long[numQueries * k];
+
+ index.search(numQueries, queryVectors, k, distances, labels);
+
+ // Test reading individual results
+ for (int q = 0; q < numQueries; q++) {
+ for (int n = 0; n < k; n++) {
+ int idx = q * k + n;
+ assertTrue(labels[idx] >= 0 && labels[idx] < 100);
+ assertTrue(distances[idx] >= 0);
+ }
+ }
+ }
+ }
+
+ @Test
+ void testBufferAllocationHelpers() {
+ // Test vector buffer allocation
+ ByteBuffer vectorBuffer = Index.allocateVectorBuffer(10, DIMENSION);
+ assertTrue(vectorBuffer.isDirect());
+ assertEquals(ByteOrder.nativeOrder(), vectorBuffer.order());
+ assertEquals(10 * DIMENSION * Float.BYTES, vectorBuffer.capacity());
+
+ // Test ID buffer allocation
+ ByteBuffer idBuffer = Index.allocateIdBuffer(10);
+ assertTrue(idBuffer.isDirect());
+ assertEquals(ByteOrder.nativeOrder(), idBuffer.order());
+ assertEquals(10 * Long.BYTES, idBuffer.capacity());
+ }
+
+ /** Create a direct ByteBuffer with random vectors. */
+ private ByteBuffer createVectorBuffer(int n, int d) {
+ ByteBuffer buffer = Index.allocateVectorBuffer(n, d);
+ FloatBuffer floatView = buffer.asFloatBuffer();
+
+ Random random = new Random(42);
+ for (int i = 0; i < n * d; i++) {
+ floatView.put(i, random.nextFloat());
+ }
+
+ return buffer;
+ }
+
+ /** Create a float array with random query vectors. */
+ private float[] createQueryVectors(int n, int d) {
+ float[] vectors = new float[n * d];
+ Random random = new Random(42);
+ for (int i = 0; i < n * d; i++) {
+ vectors[i] = random.nextFloat();
+ }
+ return vectors;
+ }
+}
diff --git a/paimon-faiss/pom.xml b/paimon-faiss/pom.xml
new file mode 100644
index 000000000000..12d8a5e5c8d5
--- /dev/null
+++ b/paimon-faiss/pom.xml
@@ -0,0 +1,111 @@
+
+
+
+ 4.0.0
+
+
+ paimon-parent
+ org.apache.paimon
+ 1.4-SNAPSHOT
+
+
+ paimon-faiss
+ Paimon : Faiss
+
+
+
+ org.apache.paimon
+ paimon-common
+ ${project.version}
+
+
+
+ org.apache.paimon
+ paimon-faiss-jni
+ ${project.version}
+
+
+
+
+
+ org.junit.jupiter
+ junit-jupiter
+ ${junit5.version}
+ test
+
+
+
+ org.apache.paimon
+ paimon-core
+ ${project.version}
+ test
+
+
+
+ org.apache.paimon
+ paimon-format
+ ${project.version}
+ test
+
+
+
+ org.apache.paimon
+ paimon-test-utils
+ ${project.version}
+ test
+
+
+
+ org.apache.hadoop
+ hadoop-client
+ ${hadoop.version}
+ test
+
+
+ log4j
+ log4j
+
+
+ org.slf4j
+ slf4j-log4j12
+
+
+
+
+
+
+
+
+ maven-jar-plugin
+
+
+
+ test-jar
+
+
+
+
+
+
+
+
+
diff --git a/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndex.java b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndex.java
new file mode 100644
index 000000000000..2aa9312a5faa
--- /dev/null
+++ b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndex.java
@@ -0,0 +1,404 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss.index;
+
+import org.apache.paimon.faiss.Index;
+import org.apache.paimon.faiss.IndexFactory;
+import org.apache.paimon.faiss.IndexHNSW;
+import org.apache.paimon.faiss.IndexIVF;
+import org.apache.paimon.faiss.MetricType;
+
+import java.io.Closeable;
+import java.io.File;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
+/**
+ * A wrapper class for FAISS index with zero-copy support.
+ *
+ *
This class provides a safe Java API for interacting with native FAISS indices using direct
+ * ByteBuffers for zero-copy data transfer. This eliminates memory duplication and improves
+ * performance for large-scale vector operations.
+ */
+public class FaissIndex implements Closeable {
+
+ private final Index index;
+ private final int dimension;
+ private final FaissVectorMetric metric;
+ private final FaissIndexType indexType;
+ private volatile boolean closed = false;
+
+ private FaissIndex(
+ Index index, int dimension, FaissVectorMetric metric, FaissIndexType indexType) {
+ this.index = index;
+ this.dimension = dimension;
+ this.metric = metric;
+ this.indexType = indexType;
+ }
+
+ /**
+ * Create a flat index (exact search).
+ *
+ * @param dimension the dimension of vectors
+ * @param metric the distance metric
+ * @return the created index
+ */
+ public static FaissIndex createFlatIndex(int dimension, FaissVectorMetric metric) {
+ MetricType metricType = toMetricType(metric);
+ Index index = IndexFactory.create(dimension, "IDMap,Flat", metricType);
+ return new FaissIndex(index, dimension, metric, FaissIndexType.FLAT);
+ }
+
+ /**
+ * Create an HNSW index.
+ *
+ * @param dimension the dimension of vectors
+ * @param m the number of connections per layer
+ * @param efConstruction the size of the dynamic candidate list for construction
+ * @param metric the distance metric
+ * @return the created index
+ */
+ public static FaissIndex createHnswIndex(
+ int dimension, int m, int efConstruction, FaissVectorMetric metric) {
+ MetricType metricType = toMetricType(metric);
+ String description = String.format("IDMap2,HNSW%d", m);
+ Index index = IndexFactory.create(dimension, description, metricType);
+ // Set efConstruction before adding any vectors
+ IndexHNSW.setEfConstruction(index, efConstruction);
+ return new FaissIndex(index, dimension, metric, FaissIndexType.HNSW);
+ }
+
+ /**
+ * Create an IVF index.
+ *
+ * @param dimension the dimension of vectors
+ * @param nlist the number of inverted lists (clusters)
+ * @param metric the distance metric
+ * @return the created index
+ */
+ public static FaissIndex createIvfIndex(int dimension, int nlist, FaissVectorMetric metric) {
+ MetricType metricType = toMetricType(metric);
+ String description = String.format("IDMap,IVF%d,Flat", nlist);
+ Index index = IndexFactory.create(dimension, description, metricType);
+ return new FaissIndex(index, dimension, metric, FaissIndexType.IVF);
+ }
+
+ /**
+ * Create an IVF-PQ index.
+ *
+ * @param dimension the dimension of vectors
+ * @param nlist the number of inverted lists (clusters)
+ * @param m the number of sub-quantizers
+ * @param nbits the number of bits per sub-quantizer
+ * @param metric the distance metric
+ * @return the created index
+ */
+ public static FaissIndex createIvfPqIndex(
+ int dimension, int nlist, int m, int nbits, FaissVectorMetric metric) {
+ MetricType metricType = toMetricType(metric);
+ String description = String.format("IDMap,IVF%d,PQ%dx%d", nlist, m, nbits);
+ Index index = IndexFactory.create(dimension, description, metricType);
+ return new FaissIndex(index, dimension, metric, FaissIndexType.IVF_PQ);
+ }
+
+ /**
+ * Create an IVF-SQ8 index (Inverted File with Scalar Quantization 8-bit).
+ *
+ * @param dimension the dimension of vectors
+ * @param nlist the number of inverted lists (clusters)
+ * @param metric the distance metric
+ * @return the created index
+ */
+ public static FaissIndex createIvfSq8Index(int dimension, int nlist, FaissVectorMetric metric) {
+ MetricType metricType = toMetricType(metric);
+ String description = String.format("IDMap,IVF%d,SQ8", nlist);
+ Index index = IndexFactory.create(dimension, description, metricType);
+ return new FaissIndex(index, dimension, metric, FaissIndexType.IVF_SQ8);
+ }
+
+ /**
+ * Load an index from a local file using memory-mapped I/O.
+ *
+ *
This method uses FAISS's native file reading which can leverage mmap for efficient memory
+ * usage with large indices.
+ *
+ * @param file the local file containing the FAISS index
+ * @return the loaded index
+ */
+ public static FaissIndex fromFile(File file) {
+ Index index = Index.readFromFile(file);
+ return new FaissIndex(
+ index, index.getDimension(), FaissVectorMetric.L2, FaissIndexType.UNKNOWN);
+ }
+
+ /**
+ * Search for k nearest neighbors.
+ *
+ * @param queryVectors array containing query vectors (n * dimension floats)
+ * @param n the number of queries
+ * @param k the number of nearest neighbors
+ * @param distances output array for distances (n * k floats)
+ * @param labels output array for labels (n * k longs)
+ */
+ public void search(float[] queryVectors, int n, int k, float[] distances, long[] labels) {
+ ensureOpen();
+ if (queryVectors.length < n * dimension) {
+ throw new IllegalArgumentException(
+ "Query vectors array too small: required "
+ + (n * dimension)
+ + ", got "
+ + queryVectors.length);
+ }
+ if (distances.length < n * k) {
+ throw new IllegalArgumentException(
+ "Distances array too small: required " + (n * k) + ", got " + distances.length);
+ }
+ if (labels.length < n * k) {
+ throw new IllegalArgumentException(
+ "Labels array too small: required " + (n * k) + ", got " + labels.length);
+ }
+ index.search(n, queryVectors, k, distances, labels);
+ }
+
+ /**
+ * Train the index (zero-copy).
+ *
+ *
Required for IVF-based indices before adding vectors.
+ *
+ * @param vectorBuffer direct ByteBuffer containing training vectors (n * dimension floats)
+ * @param n the number of training vectors
+ */
+ public void train(ByteBuffer vectorBuffer, int n) {
+ ensureOpen();
+ validateVectorBuffer(vectorBuffer, n);
+ index.train(n, vectorBuffer);
+ }
+
+ /**
+ * Add vectors to the index (zero-copy).
+ *
+ * @param vectorBuffer direct ByteBuffer containing vectors (n * dimension floats)
+ * @param n the number of vectors
+ */
+ public void add(ByteBuffer vectorBuffer, int n) {
+ ensureOpen();
+ validateVectorBuffer(vectorBuffer, n);
+ index.add(n, vectorBuffer);
+ }
+
+ /**
+ * Add vectors with IDs to the index (zero-copy).
+ *
+ * @param vectorBuffer direct ByteBuffer containing vectors (n * dimension floats)
+ * @param idBuffer direct ByteBuffer containing IDs (n longs)
+ * @param n the number of vectors
+ */
+ public void addWithIds(ByteBuffer vectorBuffer, ByteBuffer idBuffer, int n) {
+ ensureOpen();
+ validateVectorBuffer(vectorBuffer, n);
+ validateIdBuffer(idBuffer, n);
+ index.addWithIds(n, vectorBuffer, idBuffer);
+ }
+
+ /**
+ * Check if the index is trained.
+ *
+ * @return true if the index is trained
+ */
+ public boolean isTrained() {
+ ensureOpen();
+ return index.isTrained();
+ }
+
+ /**
+ * Get the number of vectors in the index.
+ *
+ * @return the number of vectors
+ */
+ public long size() {
+ ensureOpen();
+ return index.getCount();
+ }
+
+ /** Reset the index (remove all vectors). */
+ public void reset() {
+ ensureOpen();
+ index.reset();
+ }
+
+ // ==================== Index Configuration ====================
+
+ /**
+ * Set HNSW search parameter efSearch.
+ *
+ * @param efSearch the size of the dynamic candidate list for search
+ */
+ public void setHnswEfSearch(int efSearch) {
+ ensureOpen();
+ IndexHNSW.setEfSearch(index, efSearch);
+ }
+
+ /**
+ * Set IVF search parameter nprobe.
+ *
+ * @param nprobe the number of clusters to visit during search
+ */
+ public void setIvfNprobe(int nprobe) {
+ ensureOpen();
+ IndexIVF.setNprobe(index, nprobe);
+ }
+
+ // ==================== Index Properties ====================
+
+ /**
+ * Get the dimension of vectors in the index.
+ *
+ * @return the dimension
+ */
+ public int dimension() {
+ return dimension;
+ }
+
+ /**
+ * Get the metric used by this index.
+ *
+ * @return the metric
+ */
+ public FaissVectorMetric metric() {
+ return metric;
+ }
+
+ /**
+ * Get the type of this index.
+ *
+ * @return the index type
+ */
+ public FaissIndexType indexType() {
+ return indexType;
+ }
+
+ // ==================== Serialization ====================
+
+ /**
+ * Get the size in bytes needed to serialize this index.
+ *
+ * @return the serialization size
+ */
+ public long serializeSize() {
+ ensureOpen();
+ return index.serializeSize();
+ }
+
+ /**
+ * Serialize the index to a direct ByteBuffer (zero-copy).
+ *
+ * @param buffer direct ByteBuffer to write to (must have sufficient capacity)
+ * @return the number of bytes written
+ */
+ public long serialize(ByteBuffer buffer) {
+ ensureOpen();
+ if (!buffer.isDirect()) {
+ throw new IllegalArgumentException("Buffer must be a direct buffer");
+ }
+ return index.serialize(buffer);
+ }
+
+ // ==================== Buffer Allocation Utilities ====================
+
+ /**
+ * Allocate a direct ByteBuffer suitable for vector data.
+ *
+ * @param numVectors number of vectors
+ * @param dimension vector dimension
+ * @return a direct ByteBuffer in native byte order
+ */
+ public static ByteBuffer allocateVectorBuffer(int numVectors, int dimension) {
+ return ByteBuffer.allocateDirect(numVectors * dimension * Float.BYTES)
+ .order(ByteOrder.nativeOrder());
+ }
+
+ /**
+ * Allocate a direct ByteBuffer suitable for ID data.
+ *
+ * @param numIds number of IDs
+ * @return a direct ByteBuffer in native byte order
+ */
+ public static ByteBuffer allocateIdBuffer(int numIds) {
+ return ByteBuffer.allocateDirect(numIds * Long.BYTES).order(ByteOrder.nativeOrder());
+ }
+
+ // ==================== Internal Methods ====================
+
+ private void validateVectorBuffer(ByteBuffer buffer, int numVectors) {
+ if (!buffer.isDirect()) {
+ throw new IllegalArgumentException("Vector buffer must be a direct buffer");
+ }
+ int requiredBytes = numVectors * dimension * Float.BYTES;
+ if (buffer.capacity() < requiredBytes) {
+ throw new IllegalArgumentException(
+ "Vector buffer too small: required "
+ + requiredBytes
+ + " bytes, got "
+ + buffer.capacity());
+ }
+ }
+
+ private void validateIdBuffer(ByteBuffer buffer, int numIds) {
+ if (!buffer.isDirect()) {
+ throw new IllegalArgumentException("ID buffer must be a direct buffer");
+ }
+ int requiredBytes = numIds * Long.BYTES;
+ if (buffer.capacity() < requiredBytes) {
+ throw new IllegalArgumentException(
+ "ID buffer too small: required "
+ + requiredBytes
+ + " bytes, got "
+ + buffer.capacity());
+ }
+ }
+
+ private void ensureOpen() {
+ if (closed) {
+ throw new IllegalStateException("Index has been closed");
+ }
+ }
+
+ private static MetricType toMetricType(FaissVectorMetric metric) {
+ switch (metric) {
+ case L2:
+ return MetricType.L2;
+ case INNER_PRODUCT:
+ return MetricType.INNER_PRODUCT;
+ default:
+ throw new IllegalArgumentException("Unknown metric: " + metric);
+ }
+ }
+
+ @Override
+ public void close() {
+ if (!closed) {
+ synchronized (this) {
+ if (!closed) {
+ index.close();
+ closed = true;
+ }
+ }
+ }
+ }
+}
diff --git a/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndexMeta.java b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndexMeta.java
new file mode 100644
index 000000000000..78a8362606a9
--- /dev/null
+++ b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndexMeta.java
@@ -0,0 +1,111 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss.index;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.io.Serializable;
+
+/** Metadata for FAISS vector index. */
+public class FaissIndexMeta implements Serializable {
+
+ private static final long serialVersionUID = 1L;
+
+ private static final int VERSION = 1;
+
+ private final int dim;
+ private final int metricValue;
+ private final int indexTypeOrdinal;
+ private final long numVectors;
+ private final long minId;
+ private final long maxId;
+
+ public FaissIndexMeta(
+ int dim,
+ int metricValue,
+ int indexTypeOrdinal,
+ long numVectors,
+ long minId,
+ long maxId) {
+ this.dim = dim;
+ this.metricValue = metricValue;
+ this.indexTypeOrdinal = indexTypeOrdinal;
+ this.numVectors = numVectors;
+ this.minId = minId;
+ this.maxId = maxId;
+ }
+
+ public int dim() {
+ return dim;
+ }
+
+ public int metricValue() {
+ return metricValue;
+ }
+
+ public int indexTypeOrdinal() {
+ return indexTypeOrdinal;
+ }
+
+ public long numVectors() {
+ return numVectors;
+ }
+
+ public long minId() {
+ return minId;
+ }
+
+ public long maxId() {
+ return maxId;
+ }
+
+ /** Serialize metadata to byte array. */
+ public byte[] serialize() throws IOException {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ DataOutputStream out = new DataOutputStream(baos);
+ out.writeInt(VERSION);
+ out.writeInt(dim);
+ out.writeInt(metricValue);
+ out.writeInt(indexTypeOrdinal);
+ out.writeLong(numVectors);
+ out.writeLong(minId);
+ out.writeLong(maxId);
+ out.flush();
+ return baos.toByteArray();
+ }
+
+ /** Deserialize metadata from byte array. */
+ public static FaissIndexMeta deserialize(byte[] data) throws IOException {
+ DataInputStream in = new DataInputStream(new ByteArrayInputStream(data));
+ int version = in.readInt();
+ if (version != VERSION) {
+ throw new IOException("Unsupported FAISS index meta version: " + version);
+ }
+ int dim = in.readInt();
+ int metricValue = in.readInt();
+ int indexTypeOrdinal = in.readInt();
+ long numVectors = in.readLong();
+ long minId = in.readLong();
+ long maxId = in.readLong();
+ return new FaissIndexMeta(dim, metricValue, indexTypeOrdinal, numVectors, minId, maxId);
+ }
+}
diff --git a/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndexType.java b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndexType.java
new file mode 100644
index 000000000000..dbe615897bb0
--- /dev/null
+++ b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissIndexType.java
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss.index;
+
+/** Enumeration of supported FAISS index types. */
+public enum FaissIndexType {
+
+ /** Flat index - exact brute-force search. */
+ FLAT("Flat"),
+
+ /** HNSW (Hierarchical Navigable Small World) graph-based index. */
+ HNSW("HNSW"),
+
+ /** IVF (Inverted File) index with flat vectors. */
+ IVF("IVF"),
+
+ /** IVF-PQ (Inverted File with Product Quantization) index. */
+ IVF_PQ("IVF_PQ"),
+
+ /** IVF-SQ8 (Inverted File with Scalar Quantization 8-bit) index. */
+ IVF_SQ8("IVF_SQ8"),
+
+ /** Unknown index type (e.g., loaded from serialized data). */
+ UNKNOWN("Unknown");
+
+ private final String name;
+
+ FaissIndexType(String name) {
+ this.name = name;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public static FaissIndexType fromString(String name) {
+ for (FaissIndexType type : values()) {
+ if (type.name.equalsIgnoreCase(name)) {
+ return type;
+ }
+ }
+ throw new IllegalArgumentException("Unknown index type: " + name);
+ }
+}
diff --git a/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexReader.java b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexReader.java
new file mode 100644
index 000000000000..79ec0cda1f63
--- /dev/null
+++ b/paimon-faiss/src/main/java/org/apache/paimon/faiss/index/FaissVectorGlobalIndexReader.java
@@ -0,0 +1,488 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.paimon.faiss.index;
+
+import org.apache.paimon.fs.SeekableInputStream;
+import org.apache.paimon.globalindex.GlobalIndexIOMeta;
+import org.apache.paimon.globalindex.GlobalIndexReader;
+import org.apache.paimon.globalindex.GlobalIndexResult;
+import org.apache.paimon.globalindex.io.GlobalIndexFileReader;
+import org.apache.paimon.predicate.FieldRef;
+import org.apache.paimon.predicate.VectorSearch;
+import org.apache.paimon.types.ArrayType;
+import org.apache.paimon.types.DataType;
+import org.apache.paimon.types.FloatType;
+import org.apache.paimon.utils.IOUtils;
+import org.apache.paimon.utils.RoaringNavigableMap64;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Optional;
+import java.util.PriorityQueue;
+import java.util.UUID;
+
+/**
+ * Vector global index reader using FAISS.
+ *
+ *
This implementation uses FAISS for efficient approximate nearest neighbor search.
+ */
+public class FaissVectorGlobalIndexReader implements GlobalIndexReader {
+
+ private final List indices;
+ private final List indexMetas;
+ private final List localIndexFiles;
+ private final List ioMetas;
+ private final GlobalIndexFileReader fileReader;
+ private final DataType fieldType;
+ private final FaissVectorIndexOptions options;
+ private volatile boolean metasLoaded = false;
+ private volatile boolean indicesLoaded = false;
+
+ public FaissVectorGlobalIndexReader(
+ GlobalIndexFileReader fileReader,
+ List ioMetas,
+ DataType fieldType,
+ FaissVectorIndexOptions options) {
+ this.fileReader = fileReader;
+ this.ioMetas = ioMetas;
+ this.fieldType = fieldType;
+ this.options = options;
+ this.indices = new ArrayList<>();
+ this.indexMetas = new ArrayList<>();
+ this.localIndexFiles = new ArrayList<>();
+ }
+
+ @Override
+ public Optional visitVectorSearch(VectorSearch vectorSearch) {
+ try {
+ // First load only metadata
+ ensureLoadMetas();
+
+ RoaringNavigableMap64 includeRowIds = vectorSearch.includeRowIds();
+
+ // If includeRowIds is specified, check which indices contain matching rows
+ if (includeRowIds != null) {
+ List matchingIndices = new ArrayList<>();
+ for (int i = 0; i < indexMetas.size(); i++) {
+ FaissIndexMeta meta = indexMetas.get(i);
+ // Check if the index's rowId range overlaps with includeRowIds
+ if (hasOverlap(meta.minId(), meta.maxId(), includeRowIds)) {
+ matchingIndices.add(i);
+ }
+ }
+ // If no index contains matching rowIds, return empty
+ if (matchingIndices.isEmpty()) {
+ return Optional.empty();
+ }
+ // Load only matching indices
+ ensureLoadIndices(matchingIndices);
+ } else {
+ // Load all indices
+ ensureLoadAllIndices();
+ }
+
+ return Optional.ofNullable(search(vectorSearch));
+ } catch (IOException e) {
+ throw new RuntimeException(
+ String.format(
+ "Failed to search FAISS vector index with fieldName=%s, limit=%d",
+ vectorSearch.fieldName(), vectorSearch.limit()),
+ e);
+ }
+ }
+
+ /** Check if the range [minId, maxId] has any overlap with includeRowIds. */
+ private boolean hasOverlap(long minId, long maxId, RoaringNavigableMap64 includeRowIds) {
+ // Check if any ID in includeRowIds falls within [minId, maxId]
+ for (Long id : includeRowIds) {
+ if (id >= minId && id <= maxId) {
+ return true;
+ }
+ // Since iterator returns sorted values, we can stop early if we've passed maxId
+ if (id > maxId) {
+ break;
+ }
+ }
+ return false;
+ }
+
+ private GlobalIndexResult search(VectorSearch vectorSearch) throws IOException {
+ validateVectorType(vectorSearch.vector());
+ float[] queryVector = ((float[]) vectorSearch.vector()).clone();
+ // L2 normalize the query vector if enabled
+ if (options.normalize()) {
+ normalizeL2(queryVector);
+ }
+ int limit = vectorSearch.limit();
+
+ // Collect results from all indices using a min-heap
+ PriorityQueue result =
+ new PriorityQueue<>(Comparator.comparingDouble(sr -> sr.score));
+
+ RoaringNavigableMap64 includeRowIds = vectorSearch.includeRowIds();
+
+ // When filtering is enabled, we need to fetch more results to ensure
+ // we have enough after filtering. Use a multiplier based on index size.
+ int searchK = limit;
+ if (includeRowIds != null) {
+ // Fetch more results when filtering - up to searchFactor * limit or all filtered IDs
+ searchK =
+ Math.max(
+ limit * options.searchFactor(),
+ (int) includeRowIds.getLongCardinality());
+ }
+
+ for (FaissIndex index : indices) {
+ // Skip null indices (not loaded)
+ if (index == null) {
+ continue;
+ }
+
+ // Configure search parameters based on index type
+ configureSearchParams(index);
+
+ // Limit searchK to the index size
+ int effectiveK = (int) Math.min(searchK, index.size());
+ if (effectiveK <= 0) {
+ continue;
+ }
+
+ // Allocate result arrays
+ float[] distances = new float[effectiveK];
+ long[] labels = new long[effectiveK];
+
+ // Perform search
+ index.search(queryVector, 1, effectiveK, distances, labels);
+
+ for (int i = 0; i < effectiveK; i++) {
+ long rowId = labels[i];
+ if (rowId < 0) {
+ // Invalid result (not enough neighbors)
+ continue;
+ }
+
+ // Filter by include row IDs if specified
+ if (includeRowIds != null && !includeRowIds.contains(rowId)) {
+ continue;
+ }
+
+ // Convert distance to score (higher is better for similarity)
+ float score = convertDistanceToScore(distances[i]);
+
+ if (result.size() < limit) {
+ result.offer(new ScoredRow(rowId, score));
+ } else {
+ if (result.peek() != null && score > result.peek().score) {
+ result.poll();
+ result.offer(new ScoredRow(rowId, score));
+ }
+ }
+ }
+ }
+
+ RoaringNavigableMap64 roaringBitmap64 = new RoaringNavigableMap64();
+ HashMap id2scores = new HashMap<>(result.size());
+ for (ScoredRow scoredRow : result) {
+ id2scores.put(scoredRow.rowId, scoredRow.score);
+ roaringBitmap64.add(scoredRow.rowId);
+ }
+ return new FaissVectorSearchGlobalIndexResult(roaringBitmap64, id2scores);
+ }
+
+ private void configureSearchParams(FaissIndex index) {
+ switch (index.indexType()) {
+ case HNSW:
+ index.setHnswEfSearch(options.efSearch());
+ break;
+ case IVF:
+ case IVF_PQ:
+ case IVF_SQ8:
+ // For small indices, use higher nprobe to ensure enough results
+ // Use at least nprobe or 10% of index size, whichever is larger
+ int effectiveNprobe =
+ Math.max(options.nprobe(), (int) Math.max(1, index.size() / 10));
+ index.setIvfNprobe(effectiveNprobe);
+ break;
+ default:
+ // No special configuration needed
+ break;
+ }
+ }
+
+ private float convertDistanceToScore(float distance) {
+ // For L2 distance, smaller is better, so we invert it
+ // For inner product, larger is better (already a similarity)
+ if (options.metric() == FaissVectorMetric.L2) {
+ // Convert L2 distance to similarity score
+ return 1.0f / (1.0f + distance);
+ } else {
+ // Inner product is already a similarity
+ return distance;
+ }
+ }
+
+ private void validateVectorType(Object vector) {
+ if (!(vector instanceof float[])) {
+ throw new IllegalArgumentException(
+ "Expected float[] vector but got: " + vector.getClass());
+ }
+ if (!(fieldType instanceof ArrayType)
+ || !(((ArrayType) fieldType).getElementType() instanceof FloatType)) {
+ throw new IllegalArgumentException(
+ "FAISS currently only supports float arrays, but field type is: " + fieldType);
+ }
+ }
+
+ /** Load only metadata from all index files. */
+ private void ensureLoadMetas() throws IOException {
+ if (!metasLoaded) {
+ synchronized (this) {
+ if (!metasLoaded) {
+ for (GlobalIndexIOMeta ioMeta : ioMetas) {
+ byte[] metaBytes = ioMeta.metadata();
+ FaissIndexMeta meta = FaissIndexMeta.deserialize(metaBytes);
+ indexMetas.add(meta);
+ }
+ metasLoaded = true;
+ }
+ }
+ }
+ }
+
+ /** Load all indices. */
+ private void ensureLoadAllIndices() throws IOException {
+ if (!indicesLoaded) {
+ synchronized (this) {
+ if (!indicesLoaded) {
+ for (int i = 0; i < ioMetas.size(); i++) {
+ loadIndexAt(i);
+ }
+ indicesLoaded = true;
+ }
+ }
+ }
+ }
+
+ /** Load only the specified indices by their positions. */
+ private void ensureLoadIndices(List positions) throws IOException {
+ synchronized (this) {
+ // Ensure indices list is large enough
+ while (indices.size() < ioMetas.size()) {
+ indices.add(null);
+ }
+ for (int pos : positions) {
+ if (indices.get(pos) == null) {
+ loadIndexAt(pos);
+ }
+ }
+ }
+ }
+
+ /** Load a single index at the specified position. */
+ private void loadIndexAt(int position) throws IOException {
+ GlobalIndexIOMeta ioMeta = ioMetas.get(position);
+ FaissIndex index = null;
+ try (SeekableInputStream in = fileReader.getInputStream(ioMeta.fileName())) {
+ index = loadIndex(in);
+ if (indices.size() <= position) {
+ while (indices.size() < position) {
+ indices.add(null);
+ }
+ indices.add(index);
+ } else {
+ indices.set(position, index);
+ }
+ } catch (Exception e) {
+ IOUtils.closeQuietly(index);
+ throw e;
+ }
+ }
+
+ private FaissIndex loadIndex(SeekableInputStream in) throws IOException {
+
+ // Create a temp file for the raw FAISS index data (for mmap loading)
+ File rawIndexFile =
+ Files.createTempFile("paimon-faiss-" + UUID.randomUUID(), ".faiss").toFile();
+ localIndexFiles.add(rawIndexFile);
+
+ // Copy raw FAISS index data to the temp file
+ try (FileOutputStream fos = new FileOutputStream(rawIndexFile)) {
+ byte[] buffer = new byte[32768];
+ int bytesRead;
+ while ((bytesRead = in.read(buffer)) != -1) {
+ fos.write(buffer, 0, bytesRead);
+ }
+ }
+ // Load via mmap from the raw index file
+ return FaissIndex.fromFile(rawIndexFile);
+ }
+
+ /**
+ * L2 normalize the vector in place.
+ *
+ * @param vector the vector to normalize
+ */
+ private void normalizeL2(float[] vector) {
+ float norm = 0.0f;
+ for (float v : vector) {
+ norm += v * v;
+ }
+ norm = (float) Math.sqrt(norm);
+ if (norm > 0) {
+ for (int i = 0; i < vector.length; i++) {
+ vector[i] /= norm;
+ }
+ }
+ }
+
+ @Override
+ public void close() throws IOException {
+ Throwable firstException = null;
+
+ // Close all FAISS indices
+ for (FaissIndex index : indices) {
+ try {
+ index.close();
+ } catch (Throwable t) {
+ if (firstException == null) {
+ firstException = t;
+ } else {
+ firstException.addSuppressed(t);
+ }
+ }
+ }
+ indices.clear();
+
+ // Delete local temporary files
+ for (File localFile : localIndexFiles) {
+ try {
+ if (localFile != null && localFile.exists()) {
+ localFile.delete();
+ }
+ } catch (Throwable t) {
+ if (firstException == null) {
+ firstException = t;
+ } else {
+ firstException.addSuppressed(t);
+ }
+ }
+ }
+ localIndexFiles.clear();
+
+ if (firstException != null) {
+ if (firstException instanceof IOException) {
+ throw (IOException) firstException;
+ } else if (firstException instanceof RuntimeException) {
+ throw (RuntimeException) firstException;
+ } else {
+ throw new RuntimeException(
+ "Failed to close FAISS vector global index reader", firstException);
+ }
+ }
+ }
+
+ /** Helper class to store row ID with its score. */
+ private static class ScoredRow {
+ final long rowId;
+ final float score;
+
+ ScoredRow(long rowId, float score) {
+ this.rowId = rowId;
+ this.score = score;
+ }
+ }
+
+ // =================== unsupported =====================
+
+ @Override
+ public Optional visitIsNotNull(FieldRef fieldRef) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitIsNull(FieldRef fieldRef) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitStartsWith(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitEndsWith(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitContains(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitLike(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitLessThan(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitGreaterOrEqual(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitNotEqual(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitLessOrEqual(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitEqual(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitGreaterThan(FieldRef fieldRef, Object literal) {
+ return Optional.empty();
+ }
+
+ @Override
+ public Optional visitIn(FieldRef fieldRef, List