All operators use SIMD-optimized zero-copy access automatically.
-- L2 (Euclidean) Distance
SELECT * FROM items ORDER BY embedding <-> '[1,2,3]' LIMIT 10;
-- Inner Product (Maximum similarity)
SELECT * FROM items ORDER BY embedding <#> '[1,2,3]' LIMIT 10;
-- Cosine Distance (Semantic similarity)
SELECT * FROM items ORDER BY embedding <=> '[1,2,3]' LIMIT 10;
-- L1 (Manhattan) Distance
SELECT * FROM items ORDER BY embedding <+> '[1,2,3]' LIMIT 10;-- When you need the distance value explicitly
SELECT
id,
ruvector_l2_distance(embedding, '[1,2,3]') as l2_dist,
ruvector_ip_distance(embedding, '[1,2,3]') as ip_dist,
ruvector_cosine_distance(embedding, '[1,2,3]') as cos_dist,
ruvector_l1_distance(embedding, '[1,2,3]') as l1_dist
FROM items;| Operator | Math Formula | Range | Best For |
|---|---|---|---|
<-> |
√Σ(aᵢ-bᵢ)² |
[0, ∞) | General similarity, geometry |
<#> |
-Σ(aᵢ×bᵢ) |
(-∞, ∞) | MIPS, recommendations |
<=> |
1-(a·b)/(‖a‖‖b‖) |
[0, 2] | Text, semantic search |
<+> |
Σ|aᵢ-bᵢ| |
[0, ∞) | Sparse vectors, L1 norm |
-- Find 10 nearest neighbors
SELECT id, content, embedding <-> $query AS dist
FROM documents
ORDER BY embedding <-> $query
LIMIT 10;-- Search within a category
SELECT * FROM products
WHERE category = 'electronics'
ORDER BY embedding <=> $query
LIMIT 20;-- Find all items within distance 0.5
SELECT * FROM items
WHERE embedding <-> $query < 0.5;-- Compare one vector against many
SELECT id, embedding <-> '[1,2,3]' AS distance
FROM items
WHERE id IN (1, 2, 3, 4, 5);-- HNSW index (best for most cases)
CREATE INDEX ON items USING hnsw (embedding ruvector_l2_ops)
WITH (m = 16, ef_construction = 64);
-- IVFFlat index (good for large datasets)
CREATE INDEX ON items USING ivfflat (embedding ruvector_cosine_ops)
WITH (lists = 100);- Use RuVector type, not arrays:
ruvectortype enables zero-copy - Create indexes: Essential for large datasets
- Normalize for cosine: Pre-normalize vectors if using cosine often
- Check SIMD: Run
SELECT ruvector_simd_info()to verify acceleration
RuVector operators are drop-in compatible with pgvector:
-- pgvector syntax works unchanged
SELECT * FROM items ORDER BY embedding <-> '[1,2,3]' LIMIT 10;
-- Just change the type from 'vector' to 'ruvector'
ALTER TABLE items ALTER COLUMN embedding TYPE ruvector(384);- Maximum: 16,000 dimensions
- Recommended: 128-2048 for most use cases
- Performance: Optimal at multiples of 16 (AVX-512) or 8 (AVX2)
-- Check SIMD support
SELECT ruvector_simd_info();
-- Verify vector dimensions
SELECT array_length(embedding::float4[], 1) FROM items LIMIT 1;
-- Test distance calculation
SELECT '[1,2,3]'::ruvector <-> '[4,5,6]'::ruvector;
-- Should return: 5.196152 (≈√27)| Your Data | Recommended Operator |
|---|---|
| Text embeddings (BERT, OpenAI) | <=> (cosine) |
| Image features (ResNet, CLIP) | <-> (L2) |
| Recommender systems | <#> (inner product) |
| Document vectors (TF-IDF) | <=> (cosine) |
| Sparse features | <+> (L1) |
| General floating-point | <-> (L2) |
-- Test basic functionality
CREATE TEMP TABLE test_vectors (v ruvector(3));
INSERT INTO test_vectors VALUES ('[1,2,3]'), ('[4,5,6]');
-- Should return distances
SELECT a.v <-> b.v AS l2,
a.v <#> b.v AS ip,
a.v <=> b.v AS cosine,
a.v <+> b.v AS l1
FROM test_vectors a, test_vectors b
WHERE a.v <> b.v;Expected output:
l2 | ip | cosine | l1
---------+---------+----------+------
5.19615 | -32.000 | 0.025368 | 9.00