diff --git a/ansible/tasks/stage2-setup-postgres.yml b/ansible/tasks/stage2-setup-postgres.yml index 911e1eae5..a9033e887 100644 --- a/ansible/tasks/stage2-setup-postgres.yml +++ b/ansible/tasks/stage2-setup-postgres.yml @@ -13,7 +13,13 @@ - name: Install pg_prove from nix binary cache become: yes shell: | - sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install github:supabase/postgres/sam/2-stage-ami-nix#pg_prove" + sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install github:supabase/postgres/{{ git_commit_sha }}#pg_prove" + when: stage2_nix + +- name: Install supabase-groonga from nix binary cache + become: yes + shell: | + sudo -u postgres bash -c ". /nix/var/nix/profiles/default/etc/profile.d/nix-daemon.sh && nix profile install github:supabase/postgres/sam/pgroonga-deps#supabase-groonga" when: stage2_nix - name: Set ownership and permissions for /etc/ssl/private @@ -220,3 +226,9 @@ # script is expected to be placed by finalization tasks for different target platforms line: pgsodium.getkey_script= '{{ pg_bindir }}/pgsodium_getkey.sh' when: stage2_nix + +- name: Append GRN_PLUGINS_DIR to /etc/environment.d/postgresql.env + ansible.builtin.lineinfile: + path: /etc/environment.d/postgresql.env + line: 'GRN_PLUGINS_DIR=/var/lib/postgresql/.nix-profile/lib/groonga/plugins' + become: yes \ No newline at end of file diff --git a/flake.nix b/flake.nix index d6be3a172..1f9d5f64d 100644 --- a/flake.nix +++ b/flake.nix @@ -77,7 +77,8 @@ postgresql = pkgs.postgresql.postgresql_15; sfcgal = pkgs.callPackage ./nix/ext/sfcgal/sfcgal.nix { }; pg_regress = pkgs.callPackage ./nix/ext/pg_regress.nix { inherit postgresql; }; - + supabase-groonga = pkgs.callPackage ./nix/supabase-groonga.nix { }; + mecab-naist-jdic = pkgs.callPackage ./nix/ext/mecab-naist-jdic/default.nix { }; # Our list of PostgreSQL extensions which come from upstream Nixpkgs. # These are maintained upstream and can easily be used here just by # listing their name. Anytime the version of nixpkgs is upgraded, these @@ -284,6 +285,7 @@ # name in 'nix flake show' in order to make sure exactly what name you # want. basePackages = { + supabase-groonga = supabase-groonga; # PostgreSQL versions. psql_15 = makePostgres "15"; #psql_16 = makePostgres "16"; @@ -315,6 +317,8 @@ platforms = platforms.all; }; }; + mecab_naist_jdic = mecab-naist-jdic; + supabase_groonga = supabase-groonga; # Start a version of the server. start-server = let @@ -377,8 +381,10 @@ --subst-var-by 'PG_HBA' "$out/etc/postgresql/pg_hba.conf" \ --subst-var-by 'PG_IDENT' "$out/etc/postgresql/pg_ident.conf" \ --subst-var-by 'LOCALES' '${localeArchive}' \ - --subst-var-by 'EXTENSION_CUSTOM_SCRIPTS_DIR' "$out/extension-custom-scripts" - + --subst-var-by 'EXTENSION_CUSTOM_SCRIPTS_DIR' "$out/extension-custom-scripts" \ + --subst-var-by 'MECAB_LIB' '${basePackages.psql_15.exts.pgroonga}/lib/groonga/plugins/tokenizers/tokenizer_mecab.so' \ + --subst-var-by 'GROONGA_DIR' '${supabase-groonga}' + chmod +x $out/bin/start-postgres-server ''; @@ -448,10 +454,11 @@ let sqlTests = ./nix/tests/smoke; pg_prove = pkgs.perlPackages.TAPParserSourceHandlerpgTAP; + supabase-groonga = pkgs.callPackage ./nix/supabase-groonga.nix { }; in pkgs.runCommand "postgres-${pgpkg.version}-check-harness" { - nativeBuildInputs = with pkgs; [ coreutils bash pgpkg pg_prove pg_regress procps ]; + nativeBuildInputs = with pkgs; [ coreutils bash pgpkg pg_prove pg_regress procps supabase-groonga ]; } '' TMPDIR=$(mktemp -d) if [ $? -ne 0 ]; then @@ -469,7 +476,7 @@ mkdir -p $TMPDIR/logfile # Generate a random key and store it in an environment variable export PGSODIUM_KEY=$(head -c 32 /dev/urandom | od -A n -t x1 | tr -d ' \n') - + export GRN_PLUGINS_DIR=${supabase-groonga}/lib/groonga/plugins # Create a simple script to echo the key echo '#!/bin/sh' > $TMPDIR/getkey.sh echo 'echo $PGSODIUM_KEY' >> $TMPDIR/getkey.sh diff --git a/nix/do-not-use-vendored-libraries.patch b/nix/do-not-use-vendored-libraries.patch new file mode 100644 index 000000000..6a005349d --- /dev/null +++ b/nix/do-not-use-vendored-libraries.patch @@ -0,0 +1,15 @@ +Do not use vendored libraries + +--- a/vendor/CMakeLists.txt ++++ b/vendor/CMakeLists.txt +@@ -14,10 +14,7 @@ + # License along with this library; if not, write to the Free Software + # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + add_subdirectory(onigmo) +-add_subdirectory(mruby) +-add_subdirectory(mecab) +-add_subdirectory(message_pack) + if(GRN_WITH_MRUBY) + add_subdirectory(groonga-log) + endif() \ No newline at end of file diff --git a/nix/ext/mecab-naist-jdic/default.nix b/nix/ext/mecab-naist-jdic/default.nix new file mode 100644 index 000000000..d7ea6c541 --- /dev/null +++ b/nix/ext/mecab-naist-jdic/default.nix @@ -0,0 +1,41 @@ +{ lib, stdenv, fetchurl, mecab }: + +stdenv.mkDerivation rec { + pname = "mecab-naist-jdic"; + version = "0.6.3b-20111013"; + + src = fetchurl { + url = "https://github.com/supabase/mecab-naist-jdic/raw/main/mecab-naist-jdic-${version}.tar.gz"; + sha256 = "sha256-yzdwDcmne5U/K/OxW0nP7NZ4SFMKLPirywm1lMpWKMw="; + }; + + buildInputs = [ mecab ]; + + configureFlags = [ + "--with-charset=utf8" + ]; + + buildPhase = '' + runHook preBuild + make + ${mecab}/libexec/mecab/mecab-dict-index -d . -o . -f UTF-8 -t utf-8 + runHook postBuild + ''; + + installPhase = '' + runHook preInstall + + mkdir -p $out/lib/mecab/dic/naist-jdic + cp *.dic *.bin *.def $out/lib/mecab/dic/naist-jdic/ + + runHook postInstall + ''; + + meta = with lib; { + description = "Naist Japanese Dictionary for MeCab"; + homepage = "https://taku910.github.io/mecab/"; + license = licenses.gpl2; + platforms = platforms.unix; + maintainers = with maintainers; [ samrose ]; + }; +} \ No newline at end of file diff --git a/nix/ext/pgroonga.nix b/nix/ext/pgroonga.nix index bcd662727..884b54582 100644 --- a/nix/ext/pgroonga.nix +++ b/nix/ext/pgroonga.nix @@ -1,30 +1,47 @@ -{ lib, stdenv, fetchurl, pkg-config, postgresql, msgpack-c, groonga }: - +{ lib, stdenv, fetchurl, pkg-config, postgresql, msgpack-c, callPackage, mecab, makeWrapper }: +let + supabase-groonga = callPackage ../supabase-groonga.nix { }; +in stdenv.mkDerivation rec { pname = "pgroonga"; version = "3.0.7"; - src = fetchurl { url = "https://packages.groonga.org/source/${pname}/${pname}-${version}.tar.gz"; sha256 = "sha256-iF/zh4zDDpAw5fxW1WG8i2bfPt4VYsnYArwOoE/lwgM="; }; - - nativeBuildInputs = [ pkg-config ]; - buildInputs = [ postgresql msgpack-c groonga ]; + nativeBuildInputs = [ pkg-config makeWrapper ]; + buildInputs = [ postgresql msgpack-c supabase-groonga mecab ]; + propagatedBuildInputs = [ supabase-groonga ]; + configureFlags = [ + "--with-mecab=${mecab}" + "--enable-mecab" + "--with-groonga=${supabase-groonga}" + "--with-groonga-plugin-dir=${supabase-groonga}/lib/groonga/plugins" + ]; makeFlags = [ "HAVE_MSGPACK=1" "MSGPACK_PACKAGE_NAME=msgpack-c" + "HAVE_MECAB=1" ]; + preConfigure = '' + export GROONGA_LIBS="-L${supabase-groonga}/lib -lgroonga" + export GROONGA_CFLAGS="-I${supabase-groonga}/include" + export MECAB_CONFIG="${mecab}/bin/mecab-config" + ''; + installPhase = '' + mkdir -p $out/lib $out/share/postgresql/extension $out/bin install -D pgroonga${postgresql.dlSuffix} -t $out/lib/ install -D pgroonga.control -t $out/share/postgresql/extension install -D data/pgroonga-*.sql -t $out/share/postgresql/extension - install -D pgroonga_database${postgresql.dlSuffix} -t $out/lib/ install -D pgroonga_database.control -t $out/share/postgresql/extension install -D data/pgroonga_database-*.sql -t $out/share/postgresql/extension + + echo "Debug: Groonga plugins directory contents:" + ls -l ${supabase-groonga}/lib/groonga/plugins/tokenizers/ ''; meta = with lib; { @@ -41,4 +58,4 @@ stdenv.mkDerivation rec { platforms = postgresql.meta.platforms; maintainers = with maintainers; [ samrose ]; }; -} +} \ No newline at end of file diff --git a/nix/ext/use-system-groonga.patch b/nix/ext/use-system-groonga.patch new file mode 100644 index 000000000..6d3042bc8 --- /dev/null +++ b/nix/ext/use-system-groonga.patch @@ -0,0 +1,21 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 33b34477..f4ffefe5 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -12,7 +12,6 @@ if(MSVC_VERSION LESS 1800) + message(FATAL_ERROR "PGroonga supports only MSVC 2013 or later") + endif() + +-add_subdirectory(vendor/groonga) + + set(PGRN_POSTGRESQL_DIR "${CMAKE_INSTALL_PREFIX}" + CACHE PATH "PostgreSQL binary directory") +@@ -52,8 +51,6 @@ string(REGEX REPLACE "([0-9]+)\\.([0-9]+)\\.([0-9]+)" "\\3" + string(REGEX REPLACE ".*comment = '([^']+)'.*" "\\1" + PGRN_DESCRIPTION "${PGRN_CONTROL}") + +-file(READ "${CMAKE_CURRENT_SOURCE_DIR}/vendor/groonga/bundled_message_pack_version" +- PGRN_BUNDLED_MESSAGE_PACK_VERSION) + string(STRIP + "${PGRN_BUNDLED_MESSAGE_PACK_VERSION}" + PGRN_BUNDLED_MESSAGE_PACK_VERSION) \ No newline at end of file diff --git a/nix/fix-cmake-install-path.patch b/nix/fix-cmake-install-path.patch new file mode 100644 index 000000000..1fe317b6c --- /dev/null +++ b/nix/fix-cmake-install-path.patch @@ -0,0 +1,21 @@ +Fix CMake install path + +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -1141,11 +1141,11 @@ + + set(prefix "${CMAKE_INSTALL_PREFIX}") + set(exec_prefix "\${prefix}") +-set(bindir "\${exec_prefix}/${CMAKE_INSTALL_BINDIR}") +-set(sbindir "\${exec_prefix}/${CMAKE_INSTALL_SBINDIR}") +-set(libdir "\${prefix}/${CMAKE_INSTALL_LIBDIR}") +-set(includedir "\${prefix}/${CMAKE_INSTALL_INCLUDEDIR}") +-set(datarootdir "\${prefix}/${CMAKE_INSTALL_DATAROOTDIR}") ++set(bindir "${CMAKE_INSTALL_FULL_BINDIR}") ++set(sbindir "${CMAKE_INSTALL_FULL_SBINDIR}") ++set(libdir "${CMAKE_INSTALL_FULL_LIBDIR}") ++set(includedir "${CMAKE_INSTALL_FULL_INCLUDEDIR}") ++set(datarootdir "${CMAKE_INSTALL_FULL_DATAROOTDIR}") + set(datadir "\${datarootdir}") + set(expanded_pluginsdir "${GRN_PLUGINS_DIR}") + set(GRN_EXPANDED_DEFAULT_DOCUMENT_ROOT "${GRN_DEFAULT_DOCUMENT_ROOT}") \ No newline at end of file diff --git a/nix/supabase-groonga.nix b/nix/supabase-groonga.nix new file mode 100644 index 000000000..410bab067 --- /dev/null +++ b/nix/supabase-groonga.nix @@ -0,0 +1,75 @@ +{ lib, stdenv, cmake, fetchurl, kytea, msgpack-c, mecab, pkg-config, rapidjson +, testers, xxHash, zstd, postgresqlPackages, makeWrapper, suggestSupport ? false +, zeromq, libevent, openssl, lz4Support ? false, lz4, zlibSupport ? true, zlib +, writeShellScriptBin, callPackage }: +let mecab-naist-jdic = callPackage ./ext/mecab-naist-jdic { }; +in stdenv.mkDerivation (finalAttrs: { + pname = "supabase-groonga"; + version = "14.0.5"; + src = fetchurl { + url = + "https://packages.groonga.org/source/groonga/groonga-${finalAttrs.version}.tar.gz"; + hash = "sha256-y4UGnv8kK0z+br8wXpPf57NMXkdEJHcLCuTvYiubnIc="; + }; + patches = + [ ./fix-cmake-install-path.patch ./do-not-use-vendored-libraries.patch ]; + nativeBuildInputs = [ cmake pkg-config makeWrapper ]; + buildInputs = [ rapidjson xxHash zstd mecab kytea msgpack-c ] + ++ lib.optionals lz4Support [ lz4 ] ++ lib.optional zlibSupport [ zlib ] + ++ lib.optionals suggestSupport [ zeromq libevent ]; + cmakeFlags = [ + "-DWITH_MECAB=ON" + "-DMECAB_DICDIR=${mecab-naist-jdic}/lib/mecab/dic/naist-jdic" + "-DMECAB_CONFIG=${mecab}/bin/mecab-config" + "-DENABLE_MECAB_TOKENIZER=ON" + "-DMECAB_INCLUDE_DIR=${mecab}/include" + "-DMECAB_LIBRARY=${mecab}/lib/libmecab.so" + "-DGROONGA_ENABLE_TOKENIZER_MECAB=YES" + "-DGRN_WITH_MECAB=YES" + ]; + preConfigure = '' + export MECAB_DICDIR=${mecab-naist-jdic}/lib/mecab/dic/naist-jdic + echo "MeCab dictionary directory is: $MECAB_DICDIR" + ''; + buildPhase = '' + cmake --build . -- VERBOSE=1 + grep -i mecab CMakeCache.txt || (echo "MeCab not detected in CMake cache" && exit 1) + echo "CMake cache contents related to MeCab:" + grep -i mecab CMakeCache.txt + ''; + + # installPhase = '' + # mkdir -p $out/bin $out/lib/groonga/plugins + # cp -r lib/groonga/plugins/* $out/lib/groonga/plugins + # cp -r bin/* $out/bin + # echo "Installed Groonga plugins:" + # ls -l $out/lib/groonga/plugins + # ''; + + postInstall = '' + echo "Searching for MeCab-related files:" + find $out -name "*mecab*" + + echo "Checking Groonga plugins directory:" + ls -l $out/lib/groonga/plugins + + echo "Wrapping Groonga binary:" + wrapProgram $out/bin/groonga \ + --set GRN_PLUGINS_DIR $out/lib/groonga/plugins + + ''; + env.NIX_CFLAGS_COMPILE = + lib.optionalString zlibSupport "-I${zlib.dev}/include"; + + meta = with lib; { + homepage = "https://groonga.org/"; + description = "Open-source fulltext search engine and column store"; + license = licenses.lgpl21; + maintainers = [ maintainers.samrose ]; + platforms = platforms.all; + longDescription = '' + Groonga is an open-source fulltext search engine and column store. + It lets you write high-performance applications that requires fulltext search. + ''; + }; +}) diff --git a/nix/tests/expected/pgroonga.out b/nix/tests/expected/pgroonga.out new file mode 100644 index 000000000..5ceeed254 --- /dev/null +++ b/nix/tests/expected/pgroonga.out @@ -0,0 +1,76 @@ +create schema v; +create table v.roon( + id serial primary key, + content text +); +with tokenizers as ( + select + x + from + jsonb_array_elements( + (select pgroonga_command('tokenizer_list'))::jsonb + ) x(val) + limit + 1 + offset + 1 -- first record is unrelated and not stable +) +select + t.x::jsonb ->> 'name' +from + jsonb_array_elements((select * from tokenizers)) t(x) +order by + t.x::jsonb ->> 'name'; + ?column? +--------------------------------------------- + TokenBigram + TokenBigramIgnoreBlank + TokenBigramIgnoreBlankSplitSymbol + TokenBigramIgnoreBlankSplitSymbolAlpha + TokenBigramIgnoreBlankSplitSymbolAlphaDigit + TokenBigramSplitSymbol + TokenBigramSplitSymbolAlpha + TokenBigramSplitSymbolAlphaDigit + TokenDelimit + TokenDelimitNull + TokenDocumentVectorBM25 + TokenDocumentVectorTFIDF + TokenMecab + TokenNgram + TokenPattern + TokenRegexp + TokenTable + TokenTrigram + TokenUnigram +(19 rows) + +insert into v.roon (content) +values + ('Hello World'), + ('PostgreSQL with PGroonga is a thing'), + ('This is a full-text search test'), + ('PGroonga supports various languages'); +-- Create default index +create index pgroonga_index on v.roon using pgroonga (content); +-- Create mecab tokenizer index since we had a bug with this one once +create index pgroonga_index_mecab on v.roon using pgroonga (content) with (tokenizer='TokenMecab'); +-- Run some queries to test the index +select * from v.roon where content &@~ 'Hello'; + id | content +----+------------- + 1 | Hello World +(1 row) + +select * from v.roon where content &@~ 'powerful'; + id | content +----+--------- +(0 rows) + +select * from v.roon where content &@~ 'supports'; + id | content +----+------------------------------------- + 4 | PGroonga supports various languages +(1 row) + +drop schema v cascade; +NOTICE: drop cascades to table v.roon diff --git a/nix/tests/smoke/0005-test_pgroonga_mecab.sql b/nix/tests/smoke/0005-test_pgroonga_mecab.sql new file mode 100644 index 000000000..7341d5f6f --- /dev/null +++ b/nix/tests/smoke/0005-test_pgroonga_mecab.sql @@ -0,0 +1,36 @@ +-- File: 0005-test_pgroonga_revised.sql + +begin; + -- Plan for 3 tests: extension, table, and index + select plan(3); + + -- Create the PGroonga extension + create extension if not exists pgroonga; + + -- -- Test 1: Check if PGroonga extension exists + select has_extension('pgroonga', 'The pgroonga extension should exist.'); + + -- Create the table + create table notes( + id integer primary key, + content text + ); + + -- Test 2: Check if the table was created + SELECT has_table('public', 'notes', 'The notes table should exist.'); + -- Create the PGroonga index + CREATE INDEX pgroonga_content_index + ON notes + USING pgroonga (content) + WITH (tokenizer='TokenMecab'); + + -- -- Test 3: Check if the index was created + SELECT has_index('public', 'notes', 'pgroonga_content_index', 'The pgroonga_content_index should exist.'); + + -- -- Cleanup (this won't affect the test results as they've already been checked) + DROP INDEX IF EXISTS pgroonga_content_index; + DROP TABLE IF EXISTS notes; + + -- Finish the test plan + select * from finish(); +rollback; \ No newline at end of file diff --git a/nix/tests/sql/pgroonga.sql b/nix/tests/sql/pgroonga.sql new file mode 100644 index 000000000..503f2665c --- /dev/null +++ b/nix/tests/sql/pgroonga.sql @@ -0,0 +1,48 @@ +create schema v; + +create table v.roon( + id serial primary key, + content text +); + + +with tokenizers as ( + select + x + from + jsonb_array_elements( + (select pgroonga_command('tokenizer_list'))::jsonb + ) x(val) + limit + 1 + offset + 1 -- first record is unrelated and not stable +) +select + t.x::jsonb ->> 'name' +from + jsonb_array_elements((select * from tokenizers)) t(x) +order by + t.x::jsonb ->> 'name'; + + +insert into v.roon (content) +values + ('Hello World'), + ('PostgreSQL with PGroonga is a thing'), + ('This is a full-text search test'), + ('PGroonga supports various languages'); + +-- Create default index +create index pgroonga_index on v.roon using pgroonga (content); + +-- Create mecab tokenizer index since we had a bug with this one once +create index pgroonga_index_mecab on v.roon using pgroonga (content) with (tokenizer='TokenMecab'); + +-- Run some queries to test the index +select * from v.roon where content &@~ 'Hello'; +select * from v.roon where content &@~ 'powerful'; +select * from v.roon where content &@~ 'supports'; + + +drop schema v cascade; diff --git a/nix/tools/run-server.sh.in b/nix/tools/run-server.sh.in index 836407cfe..977a437fb 100644 --- a/nix/tools/run-server.sh.in +++ b/nix/tools/run-server.sh.in @@ -29,6 +29,7 @@ READREPL_CONFIG_FILE=@READREPL_CONF_FILE@ PG_HBA_FILE=@PG_HBA@ PG_IDENT_FILE=@PG_IDENT@ EXTENSION_CUSTOM_SCRIPTS=@EXTENSION_CUSTOM_SCRIPTS_DIR@ +GROONGA=@GROONGA_DIR@ DATDIR=$(mktemp -d) LOCALE_ARCHIVE=@LOCALES@ export LOCALE_ARCHIVE @@ -60,4 +61,5 @@ pgsodium.getkey_script = '$PGSODIUM_GETKEY_SCRIPT'" \ -e "\$a\\ session_preload_libraries = 'supautils'" \ "$PSQL_CONF_FILE" > "$DATDIR/postgresql.conf" -postgres --config-file="$DATDIR/postgresql.conf" -p "$PORTNO" -D "$DATDIR" -k /tmp \ No newline at end of file +export GRN_PLUGINS_DIR=$GROONGA/lib/groonga/plugins +postgres --config-file="$DATDIR/postgresql.conf" -p "$PORTNO" -D "$DATDIR" -k /tmp