|
| 1 | +using ClickHouse.Driver.ADO; |
| 2 | +using ClickHouse.Driver.Utility; |
| 3 | + |
| 4 | +namespace ClickHouse.Driver.Examples; |
| 5 | + |
| 6 | +/// <summary> |
| 7 | +/// Demonstrates using QBit vectors for similarity search in ClickHouse. |
| 8 | +/// QBit is a quantized vector type that provides efficient storage with configurable precision. |
| 9 | +/// This example shows semantic similarity search with different precision levels using L2DistanceTransposed. |
| 10 | +/// </summary> |
| 11 | +public static class QBitSimilaritySearch |
| 12 | +{ |
| 13 | + public static async Task Run() |
| 14 | + { |
| 15 | + using var connection = new ClickHouseConnection("Host=localhost"); |
| 16 | + await connection.OpenAsync(); |
| 17 | + |
| 18 | + Console.WriteLine("=== QBit Similarity Search with Different Precision Levels ===\n"); |
| 19 | + |
| 20 | + var tableName = "example_qbit_similarity"; |
| 21 | + |
| 22 | + await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {tableName}"); |
| 23 | + await connection.ExecuteStatementAsync($@" |
| 24 | + CREATE TABLE {tableName} |
| 25 | + ( |
| 26 | + word String, |
| 27 | + vec QBit(Float32, 5) |
| 28 | + ) |
| 29 | + ENGINE = MergeTree |
| 30 | + ORDER BY word |
| 31 | + "); |
| 32 | + |
| 33 | + // Insert sample word embeddings (simplified 5-dimensional vectors) |
| 34 | + // In practice, these would come from an embedding model |
| 35 | + await connection.ExecuteStatementAsync($@" |
| 36 | + INSERT INTO {tableName} VALUES |
| 37 | + ('apple', [0.9, 0.1, 0.8, 0.2, 0.7]), |
| 38 | + ('banana', [0.85, 0.15, 0.75, 0.25, 0.65]), |
| 39 | + ('orange', [0.88, 0.12, 0.78, 0.22, 0.68]), |
| 40 | + ('dog', [0.1, 0.9, 0.2, 0.8, 0.3]), |
| 41 | + ('horse', [0.15, 0.85, 0.25, 0.75, 0.35]), |
| 42 | + ('cat', [0.12, 0.88, 0.22, 0.78, 0.32]) |
| 43 | + "); |
| 44 | + |
| 45 | + Console.WriteLine("Inserted 6 words with 5-dimensional QBit(Float32, 5) embeddings\n"); |
| 46 | + |
| 47 | + // Query vector: looking for words similar to "apple" |
| 48 | + var queryVector = "[0.9, 0.1, 0.8, 0.2, 0.7]"; |
| 49 | + |
| 50 | + // Example 1: High precision search (32 bits per component) |
| 51 | + Console.WriteLine("=== High Precision Search (32 bits) ==="); |
| 52 | + Console.WriteLine("Using L2DistanceTransposed with precision=32\n"); |
| 53 | + |
| 54 | + using (var reader = await connection.ExecuteReaderAsync($@" |
| 55 | + SELECT |
| 56 | + word, |
| 57 | + L2DistanceTransposed(vec, {queryVector}, 32) AS distance |
| 58 | + FROM {tableName} |
| 59 | + ORDER BY distance |
| 60 | + ")) |
| 61 | + { |
| 62 | + Console.WriteLine("Word\t\tDistance"); |
| 63 | + Console.WriteLine("----\t\t--------"); |
| 64 | + |
| 65 | + while (reader.Read()) |
| 66 | + { |
| 67 | + var word = reader.GetString(0); |
| 68 | + var distance = reader.GetFloat(1); |
| 69 | + Console.WriteLine($"{word,-12}\t{distance:F6}"); |
| 70 | + } |
| 71 | + } |
| 72 | + |
| 73 | + // Example 2: Low precision search (12 bits per component) - faster but less accurate |
| 74 | + Console.WriteLine("\n=== Low Precision Search (12 bits) ==="); |
| 75 | + Console.WriteLine("Using L2DistanceTransposed with precision=12\n"); |
| 76 | + |
| 77 | + using (var reader = await connection.ExecuteReaderAsync($@" |
| 78 | + SELECT |
| 79 | + word, |
| 80 | + L2DistanceTransposed(vec, {queryVector}, 12) AS distance |
| 81 | + FROM {tableName} |
| 82 | + ORDER BY distance |
| 83 | + ")) |
| 84 | + { |
| 85 | + Console.WriteLine("Word\t\tDistance"); |
| 86 | + Console.WriteLine("----\t\t--------"); |
| 87 | + |
| 88 | + while (reader.Read()) |
| 89 | + { |
| 90 | + var word = reader.GetString(0); |
| 91 | + var distance = reader.GetFloat(1); |
| 92 | + Console.WriteLine($"{word,-12}\t{distance:F6}"); |
| 93 | + } |
| 94 | + } |
| 95 | + |
| 96 | + // Read vector data back as float[] |
| 97 | + Console.WriteLine("\n=== Reading QBit Data ===\n"); |
| 98 | + using (var reader = await connection.ExecuteReaderAsync($"SELECT word, vec FROM {tableName} LIMIT 3")) |
| 99 | + { |
| 100 | + while (reader.Read()) |
| 101 | + { |
| 102 | + var word = reader.GetString(0); |
| 103 | + var vec = (float[])reader.GetValue(1); |
| 104 | + Console.WriteLine($"{word}: [{string.Join(", ", vec.Select(v => v.ToString("F2")))}]"); |
| 105 | + } |
| 106 | + } |
| 107 | + |
| 108 | + await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {tableName}"); |
| 109 | + Console.WriteLine($"\nCleaned up table '{tableName}'"); |
| 110 | + } |
| 111 | +} |
0 commit comments