RuVector/docs/postgres/zero-copy/examples.rs at main · ruvnet/RuVector · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
// Example code demonstrating zero-copy memory optimization in ruvector-postgres
// This file is for documentation purposes and shows how to use the new APIs

use ruvector_postgres::types::{
    RuVector, VectorData, HnswSharedMem, IvfFlatSharedMem,
    ToastStrategy, estimate_compressibility, get_memory_stats,
    palloc_vector, palloc_vector_aligned, pfree_vector,
    VectorStorage, MemoryStats, PgVectorContext,
};
use std::sync::atomic::Ordering;

// ============================================================================
// Example 1: Zero-Copy Vector Access
// ============================================================================

fn example_zero_copy_access() {
    let vec = RuVector::from_slice(&[1.0, 2.0, 3.0, 4.0]);

    // Zero-copy access to underlying data
    unsafe {
        let ptr = vec.data_ptr();
        let dims = vec.dimensions();

        // Can pass directly to SIMD functions
        // simd_euclidean_distance(ptr, other_ptr, dims);
        println!("Vector pointer: {:?}, dimensions: {}", ptr, dims);
    }

    // Check SIMD alignment
    if vec.is_simd_aligned() {
        println!("Vector is aligned for AVX-512 operations");
    }

    // Get slice without copying
    let slice = vec.as_slice();
    println!("Vector data: {:?}", slice);
}

// ============================================================================
// Example 2: PostgreSQL Memory Context
// ============================================================================

unsafe fn example_pg_memory_context() {
    // Allocate in PostgreSQL memory context
    let dims = 1536;
    let ptr = palloc_vector_aligned(dims);

    // Memory is automatically freed when transaction ends
    // No need for manual cleanup!

    // For manual cleanup (if needed before transaction end):
    // pfree_vector(ptr, dims);

    println!("Allocated {} dimensions at {:?}", dims, ptr);
}

// ============================================================================
// Example 3: Shared Memory Index Access
// ============================================================================

fn example_hnsw_shared_memory() {
    let shmem = HnswSharedMem::new(16, 64);

    // Multiple backends can read concurrently
    shmem.lock_shared();
    let entry_point = shmem.entry_point.load(Ordering::Acquire);
    let node_count = shmem.node_count.load(Ordering::Relaxed);
    println!("HNSW: entry={}, nodes={}", entry_point, node_count);
    shmem.unlock_shared();

    // Exclusive write access
    if shmem.try_lock_exclusive() {
        // Perform insertion
        shmem.node_count.fetch_add(1, Ordering::Relaxed);
        shmem.entry_point.store(42, Ordering::Release);

        // Increment version for MVCC
        let new_version = shmem.increment_version();
        println!("Updated to version {}", new_version);

        shmem.unlock_exclusive();
    }

    // Check locking state
    println!("Locked: {}, Readers: {}",
             shmem.is_locked_exclusive(),
             shmem.shared_lock_count());
}

// ============================================================================
// Example 4: IVFFlat Shared Memory
// ============================================================================

fn example_ivfflat_shared_memory() {
    let shmem = IvfFlatSharedMem::new(100, 1536);

    // Read cluster configuration
    shmem.lock_shared();
    let nlists = shmem.nlists.load(Ordering::Relaxed);
    let dims = shmem.dimensions.load(Ordering::Relaxed);
    println!("IVFFlat: {} lists, {} dims", nlists, dims);
    shmem.unlock_shared();

    // Update vector count after insertion
    if shmem.try_lock_exclusive() {
        shmem.vector_count.fetch_add(1, Ordering::Relaxed);
        shmem.unlock_exclusive();
    }
}

// ============================================================================
// Example 5: TOAST Strategy Selection
// ============================================================================

fn example_toast_strategy() {
    // Small vector: inline storage
    let small_vec = vec![1.0; 64];
    let comp = estimate_compressibility(&small_vec);
    let strategy = ToastStrategy::for_vector(64, comp);
    println!("Small vector (64-d): {:?}", strategy);

    // Large sparse vector: compression beneficial
    let mut sparse = vec![0.0; 10000];
    sparse[100] = 1.0;
    sparse[500] = 2.0;
    let comp = estimate_compressibility(&sparse);
    let strategy = ToastStrategy::for_vector(10000, comp);
    println!("Sparse vector (10K-d): {:?}, compressibility: {:.2}", strategy, comp);

    // Large dense vector: external storage
    let dense = vec![1.0; 10000];
    let comp = estimate_compressibility(&dense);
    let strategy = ToastStrategy::for_vector(10000, comp);
    println!("Dense vector (10K-d): {:?}, compressibility: {:.2}", strategy, comp);
}

// ============================================================================
// Example 6: Compressibility Estimation
// ============================================================================

fn example_compressibility_estimation() {
    // Highly compressible (all zeros)
    let zeros = vec![0.0; 1000];
    let comp = estimate_compressibility(&zeros);
    println!("All zeros: compressibility = {:.2}", comp);

    // Sparse vector
    let mut sparse = vec![0.0; 1000];
    for i in (0..1000).step_by(100) {
        sparse[i] = i as f32;
    }
    let comp = estimate_compressibility(&sparse);
    println!("Sparse (10% nnz): compressibility = {:.2}", comp);

    // Dense random
    let random: Vec<f32> = (0..1000).map(|i| (i as f32) * 0.123).collect();
    let comp = estimate_compressibility(&random);
    println!("Dense random: compressibility = {:.2}", comp);

    // Repeated values
    let repeated = vec![1.0; 1000];
    let comp = estimate_compressibility(&repeated);
    println!("Repeated values: compressibility = {:.2}", comp);
}

// ============================================================================
// Example 7: Vector Storage Tracking
// ============================================================================

fn example_vector_storage() {
    // Inline storage
    let inline_storage = VectorStorage::inline(512);
    println!("Inline: {} bytes", inline_storage.stored_size);

    // Compressed storage
    let compressed_storage = VectorStorage::compressed(10000, 2000);
    println!("Compressed: {} → {} bytes ({:.1}% compression)",
             compressed_storage.original_size,
             compressed_storage.stored_size,
             (1.0 - compressed_storage.compression_ratio()) * 100.0);
    println!("Space saved: {} bytes", compressed_storage.space_saved());

    // External storage
    let external_storage = VectorStorage::external(40000);
    println!("External: {} bytes (stored in TOAST table)",
             external_storage.stored_size);
}

// ============================================================================
// Example 8: Memory Statistics Tracking
// ============================================================================

fn example_memory_statistics() {
    let stats = get_memory_stats();

    println!("Current memory: {:.2} MB", stats.current_mb());
    println!("Peak memory: {:.2} MB", stats.peak_mb());
    println!("Cache memory: {:.2} MB", stats.cache_mb());
    println!("Total memory: {:.2} MB", stats.total_mb());
    println!("Vector count: {}", stats.vector_count);

    // Detailed breakdown
    println!("\nDetailed breakdown:");
    println!("  Current: {} bytes", stats.current_bytes);
    println!("  Peak: {} bytes", stats.peak_bytes);
    println!("  Cache: {} bytes", stats.cache_bytes);
}

// ============================================================================
// Example 9: Memory Context Tracking
// ============================================================================

fn example_memory_context_tracking() {
    let ctx = PgVectorContext::new();

    // Simulate allocations
    ctx.track_alloc(1024);
    println!("After 1KB alloc: {} bytes, {} vectors",
             ctx.current_bytes(), ctx.count());

    ctx.track_alloc(2048);
    println!("After 2KB alloc: {} bytes, {} vectors",
             ctx.current_bytes(), ctx.count());

    println!("Peak usage: {} bytes", ctx.peak_bytes());

    // Simulate deallocation
    ctx.track_dealloc(1024);
    println!("After 1KB free: {} bytes (peak: {})",
             ctx.current_bytes(), ctx.peak_bytes());
}

// ============================================================================
// Example 10: Production Usage Pattern
// ============================================================================

fn example_production_usage() {
    // Typical production workflow

    // 1. Create vector
    let embedding = RuVector::from_slice(&vec![0.1; 1536]);

    // 2. Check storage requirements
    let data = embedding.as_slice();
    let compressibility = estimate_compressibility(data);
    let strategy = ToastStrategy::for_vector(embedding.dimensions(), compressibility);

    println!("Storage strategy: {:?}", strategy);

    // 3. Initialize shared memory index
    let hnsw_shmem = HnswSharedMem::new(16, 64);

    // 4. Insert with locking
    if hnsw_shmem.try_lock_exclusive() {
        // Perform insertion
        let new_node_id = 12345; // Simulated insertion

        hnsw_shmem.node_count.fetch_add(1, Ordering::Relaxed);
        hnsw_shmem.entry_point.store(new_node_id, Ordering::Release);
        hnsw_shmem.increment_version();

        hnsw_shmem.unlock_exclusive();
    }

    // 5. Search with concurrent access
    hnsw_shmem.lock_shared();
    let entry = hnsw_shmem.entry_point.load(Ordering::Acquire);
    println!("Search starting from node {}", entry);
    hnsw_shmem.unlock_shared();

    // 6. Monitor memory
    let stats = get_memory_stats();
    if stats.current_mb() > 1000.0 {
        println!("WARNING: High memory usage: {:.2} MB", stats.current_mb());
    }
}

// ============================================================================
// Example 11: SIMD-Aligned Operations
// ============================================================================

fn example_simd_aligned_operations() {
    // Create vectors with different alignment
    let vec1 = RuVector::from_slice(&vec![1.0; 1536]);

    unsafe {
        // Check alignment
        if vec1.is_simd_aligned() {
            let ptr = vec1.data_ptr();
            println!("Vector is aligned for AVX-512");

            // Can use aligned SIMD loads
            // let result = _mm512_load_ps(ptr);
        } else {
            let ptr = vec1.data_ptr();
            println!("Vector requires unaligned loads");

            // Use unaligned SIMD loads
            // let result = _mm512_loadu_ps(ptr);
        }
    }

    // Check memory layout
    println!("Memory size: {} bytes", vec1.memory_size());
    println!("Data size: {} bytes", vec1.data_size());
    println!("Is inline: {}", vec1.is_inline());
}

// ============================================================================
// Example 12: Concurrent Index Operations
// ============================================================================

fn example_concurrent_operations() {
    let shmem = HnswSharedMem::new(16, 64);

    // Simulate multiple concurrent readers
    println!("Concurrent reads:");
    for i in 0..5 {
        shmem.lock_shared();
        let entry = shmem.entry_point.load(Ordering::Acquire);
        println!("  Reader {}: entry_point = {}", i, entry);
        shmem.unlock_shared();
    }

    // Single writer
    println!("\nExclusive write:");
    if shmem.try_lock_exclusive() {
        println!("  Acquired exclusive lock");
        shmem.entry_point.store(999, Ordering::Release);
        let version = shmem.increment_version();
        println!("  Updated to version {}", version);
        shmem.unlock_exclusive();
        println!("  Released exclusive lock");
    }

    // Verify update
    shmem.lock_shared();
    let entry = shmem.entry_point.load(Ordering::Acquire);
    let version = shmem.version();
    println!("\nAfter update: entry={}, version={}", entry, version);
    shmem.unlock_shared();
}

// ============================================================================
// Main function (for demonstration)
// ============================================================================

#[cfg(test)]
mod examples {
    use super::*;

    #[test]
    fn run_all_examples() {
        println!("\n=== Example 1: Zero-Copy Vector Access ===");
        example_zero_copy_access();

        // Skip unsafe examples in tests
        // unsafe { example_pg_memory_context(); }

        println!("\n=== Example 3: HNSW Shared Memory ===");
        example_hnsw_shared_memory();

        println!("\n=== Example 4: IVFFlat Shared Memory ===");
        example_ivfflat_shared_memory();

        println!("\n=== Example 5: TOAST Strategy ===");
        example_toast_strategy();

        println!("\n=== Example 6: Compressibility ===");
        example_compressibility_estimation();

        println!("\n=== Example 7: Vector Storage ===");
        example_vector_storage();

        println!("\n=== Example 8: Memory Statistics ===");
        example_memory_statistics();

        println!("\n=== Example 9: Memory Context ===");
        example_memory_context_tracking();

        println!("\n=== Example 10: Production Usage ===");
        example_production_usage();

        println!("\n=== Example 11: SIMD Alignment ===");
        example_simd_aligned_operations();

        println!("\n=== Example 12: Concurrent Operations ===");
        example_concurrent_operations();
    }
}