From 64f14e88811a4f5ce16397771be222ade2f4cde2 Mon Sep 17 00:00:00 2001 From: Oliver Rice Date: Wed, 21 Aug 2024 10:56:16 -0500 Subject: [PATCH] add pgvector tests --- nix/tests/expected/pgvector.out | 90 +++++++++++++++++++++++++++++++++ nix/tests/sql/pgvector.sql | 72 ++++++++++++++++++++++++++ 2 files changed, 162 insertions(+) create mode 100644 nix/tests/expected/pgvector.out create mode 100644 nix/tests/sql/pgvector.sql diff --git a/nix/tests/expected/pgvector.out b/nix/tests/expected/pgvector.out new file mode 100644 index 000000000..6564be5f4 --- /dev/null +++ b/nix/tests/expected/pgvector.out @@ -0,0 +1,90 @@ +create schema v; +create table v.items( + id serial primary key, + embedding vector(3), + half_embedding halfvec(3), + bit_embedding bit(3), + sparse_embedding sparsevec(3) +); +-- vector ops +create index on v.items using hnsw (embedding vector_l2_ops); +create index on v.items using hnsw (embedding vector_cosine_ops); +create index on v.items using hnsw (embedding vector_l1_ops); +create index on v.items using ivfflat (embedding vector_l2_ops); +NOTICE: ivfflat index created with little data +DETAIL: This will cause low recall. +HINT: Drop the index until the table has more data. +create index on v.items using ivfflat (embedding vector_cosine_ops); +NOTICE: ivfflat index created with little data +DETAIL: This will cause low recall. +HINT: Drop the index until the table has more data. +-- halfvec ops +create index on v.items using hnsw (half_embedding halfvec_l2_ops); +create index on v.items using hnsw (half_embedding halfvec_cosine_ops); +create index on v.items using hnsw (half_embedding halfvec_l1_ops); +create index on v.items using ivfflat (half_embedding halfvec_l2_ops); +NOTICE: ivfflat index created with little data +DETAIL: This will cause low recall. +HINT: Drop the index until the table has more data. +create index on v.items using ivfflat (half_embedding halfvec_cosine_ops); +NOTICE: ivfflat index created with little data +DETAIL: This will cause low recall. +HINT: Drop the index until the table has more data. +-- sparsevec +create index on v.items using hnsw (sparse_embedding sparsevec_l2_ops); +create index on v.items using hnsw (sparse_embedding sparsevec_cosine_ops); +create index on v.items using hnsw (sparse_embedding sparsevec_l1_ops); +-- bit ops +create index on v.items using hnsw (bit_embedding bit_hamming_ops); +create index on v.items using ivfflat (bit_embedding bit_hamming_ops); +NOTICE: ivfflat index created with little data +DETAIL: This will cause low recall. +HINT: Drop the index until the table has more data. +-- Populate some records +insert into v.items( + embedding, + half_embedding, + bit_embedding, + sparse_embedding +) +values + ('[1,2,3]', '[1,2,3]', '101', '{1:4}/3'), + ('[2,3,4]', '[2,3,4]', '010', '{1:7,3:0}/3'); +-- Test op types +select + * +from + v.items +order by + embedding <-> '[2,3,5]', + embedding <=> '[2,3,5]', + embedding <+> '[2,3,5]', + embedding <#> '[2,3,5]', + half_embedding <-> '[2,3,5]', + half_embedding <=> '[2,3,5]', + half_embedding <+> '[2,3,5]', + half_embedding <#> '[2,3,5]', + sparse_embedding <-> '{2:4,3:1}/3', + sparse_embedding <=> '{2:4,3:1}/3', + sparse_embedding <+> '{2:4,3:1}/3', + sparse_embedding <#> '{2:4,3:1}/3', + bit_embedding <~> '011'; + id | embedding | half_embedding | bit_embedding | sparse_embedding +----+-----------+----------------+---------------+------------------ + 2 | [2,3,4] | [2,3,4] | 010 | {1:7}/3 + 1 | [1,2,3] | [1,2,3] | 101 | {1:4}/3 +(2 rows) + +select + avg(embedding), + avg(half_embedding) +from + v.items; + avg | avg +---------------+--------------- + [1.5,2.5,3.5] | [1.5,2.5,3.5] +(1 row) + +-- Cleanup +drop schema v cascade; +NOTICE: drop cascades to table v.items diff --git a/nix/tests/sql/pgvector.sql b/nix/tests/sql/pgvector.sql new file mode 100644 index 000000000..f2de30572 --- /dev/null +++ b/nix/tests/sql/pgvector.sql @@ -0,0 +1,72 @@ +create schema v; + +create table v.items( + id serial primary key, + embedding vector(3), + half_embedding halfvec(3), + bit_embedding bit(3), + sparse_embedding sparsevec(3) +); + +-- vector ops +create index on v.items using hnsw (embedding vector_l2_ops); +create index on v.items using hnsw (embedding vector_cosine_ops); +create index on v.items using hnsw (embedding vector_l1_ops); +create index on v.items using ivfflat (embedding vector_l2_ops); +create index on v.items using ivfflat (embedding vector_cosine_ops); + +-- halfvec ops +create index on v.items using hnsw (half_embedding halfvec_l2_ops); +create index on v.items using hnsw (half_embedding halfvec_cosine_ops); +create index on v.items using hnsw (half_embedding halfvec_l1_ops); +create index on v.items using ivfflat (half_embedding halfvec_l2_ops); +create index on v.items using ivfflat (half_embedding halfvec_cosine_ops); + +-- sparsevec +create index on v.items using hnsw (sparse_embedding sparsevec_l2_ops); +create index on v.items using hnsw (sparse_embedding sparsevec_cosine_ops); +create index on v.items using hnsw (sparse_embedding sparsevec_l1_ops); + +-- bit ops +create index on v.items using hnsw (bit_embedding bit_hamming_ops); +create index on v.items using ivfflat (bit_embedding bit_hamming_ops); + +-- Populate some records +insert into v.items( + embedding, + half_embedding, + bit_embedding, + sparse_embedding +) +values + ('[1,2,3]', '[1,2,3]', '101', '{1:4}/3'), + ('[2,3,4]', '[2,3,4]', '010', '{1:7,3:0}/3'); + +-- Test op types +select + * +from + v.items +order by + embedding <-> '[2,3,5]', + embedding <=> '[2,3,5]', + embedding <+> '[2,3,5]', + embedding <#> '[2,3,5]', + half_embedding <-> '[2,3,5]', + half_embedding <=> '[2,3,5]', + half_embedding <+> '[2,3,5]', + half_embedding <#> '[2,3,5]', + sparse_embedding <-> '{2:4,3:1}/3', + sparse_embedding <=> '{2:4,3:1}/3', + sparse_embedding <+> '{2:4,3:1}/3', + sparse_embedding <#> '{2:4,3:1}/3', + bit_embedding <~> '011'; + +select + avg(embedding), + avg(half_embedding) +from + v.items; + +-- Cleanup +drop schema v cascade;