Skip to content

Commit 4de0e9c

Browse files
committed
support vector index by faiss
1 parent 708ea8f commit 4de0e9c

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+7774
-0
lines changed
Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
name: 'Setup FAISS'
2+
description: 'Install native dependencies and build FAISS library'
3+
4+
runs:
5+
using: "composite"
6+
steps:
7+
- name: Install native dependencies
8+
shell: bash
9+
run: |
10+
sudo apt-get update
11+
sudo apt-get install -y \
12+
build-essential \
13+
libopenblas-dev \
14+
liblapack-dev \
15+
patchelf \
16+
libgomp1 \
17+
wget
18+
19+
- name: Install GCC 9
20+
shell: bash
21+
run: |
22+
sudo apt-get install -y gcc-9 g++-9
23+
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-9 90
24+
sudo update-alternatives --install /usr/bin/g++ g++ /usr/bin/g++-9 90
25+
gcc --version
26+
g++ --version
27+
# Verify GCC version is >= 9.3.0
28+
# Use -dumpfullversion for full version, fall back to -dumpversion
29+
GCC_VERSION=$(gcc -dumpfullversion 2>/dev/null || gcc -dumpversion)
30+
echo "GCC version: $GCC_VERSION"
31+
# Extract major version
32+
GCC_MAJOR=$(echo "$GCC_VERSION" | cut -d. -f1)
33+
if [[ "$GCC_MAJOR" -lt 9 ]]; then
34+
echo "ERROR: GCC major version must be >= 9, got $GCC_MAJOR"
35+
exit 1
36+
fi
37+
echo "GCC version check passed: $GCC_VERSION (major: $GCC_MAJOR)"
38+
39+
- name: Install CMake 3.30.1
40+
shell: bash
41+
run: |
42+
CMAKE_VERSION="3.30.1"
43+
wget -q https://github.com/Kitware/CMake/releases/download/v${CMAKE_VERSION}/cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz
44+
tar -xzf cmake-${CMAKE_VERSION}-linux-x86_64.tar.gz
45+
sudo mv cmake-${CMAKE_VERSION}-linux-x86_64 /opt/cmake
46+
sudo ln -sf /opt/cmake/bin/cmake /usr/local/bin/cmake
47+
sudo ln -sf /opt/cmake/bin/ctest /usr/local/bin/ctest
48+
sudo ln -sf /opt/cmake/bin/cpack /usr/local/bin/cpack
49+
cmake --version
50+
# Verify CMake version
51+
CMAKE_INSTALLED=$(cmake --version | head -n1 | awk '{print $3}')
52+
echo "CMake version: $CMAKE_INSTALLED"
53+
if [[ "$(printf '%s\n' "3.30.1" "$CMAKE_INSTALLED" | sort -V | head -n1)" != "3.30.1" ]]; then
54+
echo "ERROR: CMake version must be >= 3.30.1, got $CMAKE_INSTALLED"
55+
exit 1
56+
fi
57+
58+
- name: Install FAISS
59+
shell: bash
60+
run: |
61+
# Clone and build FAISS
62+
git clone --depth 1 --branch v1.7.4 https://github.com/facebookresearch/faiss.git /tmp/faiss
63+
cd /tmp/faiss
64+
cmake -B build \
65+
-DFAISS_ENABLE_GPU=OFF \
66+
-DFAISS_ENABLE_PYTHON=OFF \
67+
-DBUILD_TESTING=OFF \
68+
-DCMAKE_BUILD_TYPE=Release
69+
cmake --build build -j $(nproc)
70+
sudo cmake --install build
71+
72+
- name: Build native library
73+
shell: bash
74+
run: |
75+
cd paimon-faiss-jni
76+
./scripts/build-native.sh --clean --fat-lib
77+
78+
- name: Build paimon-faiss-jni
79+
shell: bash
80+
run: |
81+
mvn -B clean install -pl paimon-faiss-jni -am -DskipTests -Ppaimon-faiss-vector
82+
83+
- name: Build paimon-faiss
84+
shell: bash
85+
run: |
86+
mvn -B clean install -pl paimon-faiss -am -DskipTests -Ppaimon-faiss-vector
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
################################################################################
2+
# Licensed to the Apache Software Foundation (ASF) under one
3+
# or more contributor license agreements. See the NOTICE file
4+
# distributed with this work for additional information
5+
# regarding copyright ownership. The ASF licenses this file
6+
# to you under the Apache License, Version 2.0 (the
7+
# "License"); you may not use this file except in compliance
8+
# with the License. You may obtain a copy of the License at
9+
#
10+
# http://www.apache.org/licenses/LICENSE-2.0
11+
#
12+
# Unless required by applicable law or agreed to in writing, software
13+
# distributed under the License is distributed on an "AS IS" BASIS,
14+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
# See the License for the specific language governing permissions and
16+
# limitations under the License.
17+
################################################################################
18+
19+
name: Faiss Vector Index Tests
20+
21+
on:
22+
push:
23+
paths:
24+
- 'paimon-faiss/**'
25+
- 'paimon-faiss-jni/**'
26+
- '.github/workflows/faiss-vector-index-tests.yml'
27+
pull_request:
28+
paths:
29+
- 'paimon-faiss/**'
30+
- 'paimon-faiss-jni/**'
31+
- '.github/workflows/faiss-vector-index-tests.yml'
32+
33+
env:
34+
JDK_VERSION: 8
35+
MAVEN_OPTS: -Dmaven.wagon.httpconnectionManager.ttlSeconds=30 -Dmaven.wagon.http.retryHandler.requestSentEnabled=true
36+
37+
concurrency:
38+
group: ${{ github.workflow }}-${{ github.event_name }}-${{ github.event.number || github.run_id }}
39+
cancel-in-progress: true
40+
41+
jobs:
42+
build_test:
43+
runs-on: ubuntu-latest
44+
timeout-minutes: 90
45+
steps:
46+
- name: Checkout code
47+
uses: actions/checkout@v4
48+
49+
- name: Set up JDK ${{ env.JDK_VERSION }}
50+
uses: actions/setup-java@v4
51+
with:
52+
java-version: ${{ env.JDK_VERSION }}
53+
distribution: 'temurin'
54+
55+
- name: Setup FAISS and build paimon-faiss
56+
uses: ./.github/actions/setup-faiss
57+
58+
- name: List bundled libraries
59+
run: |
60+
echo "=== Bundled libraries ==="
61+
ls -la paimon-faiss-jni/src/main/resources/linux/amd64/
62+
echo ""
63+
echo "=== Library dependencies ==="
64+
ldd paimon-faiss-jni/src/main/resources/linux/amd64/libpaimon_faiss_jni.so || true
65+
66+
- name: Test paimon-faiss-jni
67+
timeout-minutes: 10
68+
run: |
69+
mvn -T 1C -B test -pl paimon-faiss-jni -DskipFaissTests=false -Ppaimon-faiss-vector
70+
env:
71+
MAVEN_OPTS: -Xmx2048m
72+
73+
- name: Test paimon-faiss
74+
timeout-minutes: 30
75+
run: |
76+
mvn -T 1C -B test -pl paimon-faiss -Ppaimon-faiss-vector
77+
env:
78+
MAVEN_OPTS: -Xmx4096m
79+
80+
- name: Build Vector E2E Test Module
81+
run: mvn -T 2C -B clean install -DskipTests -Pspark3,flink1,paimon-faiss-vector -pl paimon-vector-e2e-test -am
82+
83+
- name: Run Vector E2E Tests
84+
timeout-minutes: 30
85+
run: |
86+
# run tests with random timezone to find out timezone related bugs
87+
. .github/workflows/utils.sh
88+
jvm_timezone=$(random_timezone)
89+
echo "JVM timezone is set to $jvm_timezone"
90+
mvn -T 2C -B verify -Pspark3,flink1,paimon-faiss-vector -pl paimon-vector-e2e-test -Duser.timezone=$jvm_timezone
91+
env:
92+
MAVEN_OPTS: -Xmx4096m

0 commit comments

Comments
 (0)