diff --git a/ClickHouse.Driver.Tests/BulkCopy/BulkCopyTests.cs b/ClickHouse.Driver.Tests/BulkCopy/BulkCopyTests.cs index 4f99d03..5a698d6 100644 --- a/ClickHouse.Driver.Tests/BulkCopy/BulkCopyTests.cs +++ b/ClickHouse.Driver.Tests/BulkCopy/BulkCopyTests.cs @@ -681,5 +681,91 @@ public void WriteToServerAsync_WithNullDestinationTableName_ThrowsInvalidOperati Assert.That(ex.Message, Does.Contain("Destination table not set")); } + + [Test] + [RequiredFeature(Feature.QBit)] + public async Task ShouldInsertQBitFloat32() + { + var targetTable = "test." + SanitizeTableName("bulk_qbit_float32"); + + await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {targetTable}"); + await connection.ExecuteStatementAsync($"CREATE TABLE IF NOT EXISTS {targetTable} (vec QBit(Float32, 9)) ENGINE Memory"); + + using var bulkCopy = new ClickHouseBulkCopy(connection) + { + DestinationTableName = targetTable, + }; + + var testData = new float[] { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f }; + + await bulkCopy.InitAsync(); + await bulkCopy.WriteToServerAsync([[(object)testData]]); + + Assert.That(bulkCopy.RowsWritten, Is.EqualTo(1)); + + using var reader = await connection.ExecuteReaderAsync($"SELECT vec FROM {targetTable}"); + Assert.That(reader.Read(), Is.True); + var result = (float[])reader.GetValue(0); + Assert.That(result, Is.EqualTo(testData)); + } + + [Test] + [RequiredFeature(Feature.QBit)] + public async Task ShouldInsertQBitFloat64() + { + var targetTable = "test." + SanitizeTableName("bulk_qbit_float64"); + + await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {targetTable}"); + await connection.ExecuteStatementAsync($"CREATE TABLE IF NOT EXISTS {targetTable} (vec QBit(Float64, 8)) ENGINE Memory"); + + using var bulkCopy = new ClickHouseBulkCopy(connection) + { + DestinationTableName = targetTable, + }; + + var testData = new double[] { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 }; + + await bulkCopy.InitAsync(); + await bulkCopy.WriteToServerAsync([[(object)testData]]); + + Assert.That(bulkCopy.RowsWritten, Is.EqualTo(1)); + + using var reader = await connection.ExecuteReaderAsync($"SELECT vec FROM {targetTable}"); + Assert.That(reader.Read(), Is.True); + var result = (double[])reader.GetValue(0); + Assert.That(result, Is.EqualTo(testData)); + } + + [Test] + [RequiredFeature(Feature.QBit)] + public async Task ShouldInsertQBitBFloat16() + { + var targetTable = "test." + SanitizeTableName("bulk_qbit_bfloat16"); + + await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {targetTable}"); + await connection.ExecuteStatementAsync($"CREATE TABLE IF NOT EXISTS {targetTable} (vec QBit(BFloat16, 6)) ENGINE Memory"); + + using var bulkCopy = new ClickHouseBulkCopy(connection) + { + DestinationTableName = targetTable, + }; + + var testData = new float[] { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f }; + + await bulkCopy.InitAsync(); + await bulkCopy.WriteToServerAsync([[(object)testData]]); + + Assert.That(bulkCopy.RowsWritten, Is.EqualTo(1)); + + using var reader = await connection.ExecuteReaderAsync($"SELECT vec FROM {targetTable}"); + Assert.That(reader.Read(), Is.True); + var result = (float[])reader.GetValue(0); + // BFloat16 has reduced precision, check approximate equality + Assert.That(result.Length, Is.EqualTo(testData.Length)); + for (int i = 0; i < result.Length; i++) + { + Assert.That(result[i], Is.EqualTo(testData[i]).Within(0.01f)); + } + } } diff --git a/ClickHouse.Driver.Tests/SQL/SqlSimpleSelectTests.cs b/ClickHouse.Driver.Tests/SQL/SqlSimpleSelectTests.cs index 1b9d211..6541c27 100644 --- a/ClickHouse.Driver.Tests/SQL/SqlSimpleSelectTests.cs +++ b/ClickHouse.Driver.Tests/SQL/SqlSimpleSelectTests.cs @@ -244,9 +244,17 @@ public async Task ShouldGetValueDecimal() [TestCaseSource(typeof(SqlSimpleSelectTests), nameof(SimpleSelectTypes))] public async Task ShouldExecuteRandomDataSelectQuery(string type) { - if (type.StartsWith("Nested") || type == "Nothing" || type.StartsWith("Variant") || type.StartsWith("Json") || type.Contains("BFloat16") || type.StartsWith("Time")) + if (type.StartsWith("Nested") || + type == "Nothing" || + type.StartsWith("Variant") || + type.StartsWith("Json") || + type.Contains("BFloat16") || + type.StartsWith("Time") || + type.StartsWith("QBit")) + { Assert.Ignore($"Type {type} not supported by generateRandom"); - + } + using var reader = await connection.ExecuteReaderAsync($"SELECT * FROM generateRandom('value {type.Replace("'", "\\'")}', 10, 10, 10) LIMIT 100"); reader.AssertHasFieldCount(1); } diff --git a/ClickHouse.Driver.Tests/Utilities/TestUtilities.cs b/ClickHouse.Driver.Tests/Utilities/TestUtilities.cs index 1d44a53..d056111 100644 --- a/ClickHouse.Driver.Tests/Utilities/TestUtilities.cs +++ b/ClickHouse.Driver.Tests/Utilities/TestUtilities.cs @@ -98,6 +98,10 @@ public static ClickHouseConnection GetTestClickHouseConnection(bool compression { builder["set_enable_time_time64_type"] = 1; } + if (SupportedFeatures.HasFlag(Feature.QBit)) + { + builder["set_allow_experimental_qbit_type"] = 1; + } var settings = new ClickHouseClientSettings(builder) { @@ -400,6 +404,13 @@ public static IEnumerable GetDataTypeSamples() yield return new DataTypeSample("Time64(6)", typeof(TimeSpan), "'-5:25:05.123456'::Time64(6)", (new TimeSpan(5, 25, 5).Add(TimeSpan.FromMilliseconds(123.456)).Negate())); } + if (SupportedFeatures.HasFlag(Feature.QBit)) + { + yield return new DataTypeSample("QBit(Float32, 4)", typeof(float[]), "[1.0, 2.0, 3.0, 4.0]::QBit(Float32, 4)", new float[] { 1f, 2f, 3f, 4f }); + yield return new DataTypeSample("QBit(Float64, 5)", typeof(double[]), "[1.0, 2.0, 3.0, 4.0, 5.0]::QBit(Float64, 5)", new double[] { 1.0, 2.0, 3.0, 4.0, 5.0 }); + yield return new DataTypeSample("QBit(BFloat16, 6)", typeof(float[]), "[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]::QBit(BFloat16, 6)", new float[] { 1f, 2f, 3f, 4f, 5f, 6f }); + } + // Generate composite type tests for ALL base types that FromByteCode supports // This ensures that all type decoders work correctly in composite contexts (Array, Nullable, Tuple, Map, Variant) var baseTypesToTest = new List diff --git a/ClickHouse.Driver/ADO/Feature.cs b/ClickHouse.Driver/ADO/Feature.cs index 514bb94..d557285 100644 --- a/ClickHouse.Driver/ADO/Feature.cs +++ b/ClickHouse.Driver/ADO/Feature.cs @@ -48,5 +48,8 @@ public enum Feature [SinceVersion("25.6")] Time = 1 << 12, + [SinceVersion("25.10")] + QBit = 1 << 13, + All = ~None, // Special value } diff --git a/ClickHouse.Driver/Formats/HttpParameterFormatter.cs b/ClickHouse.Driver/Formats/HttpParameterFormatter.cs index 69fc858..274583c 100644 --- a/ClickHouse.Driver/Formats/HttpParameterFormatter.cs +++ b/ClickHouse.Driver/Formats/HttpParameterFormatter.cs @@ -87,6 +87,9 @@ internal static string Format(ClickHouseType type, object value, bool quote) case ArrayType arrayType when value is IEnumerable enumerable: return $"[{string.Join(",", enumerable.Cast().Select(obj => Format(arrayType.UnderlyingType, obj, true)))}]"; + case QBitType qbitType when value is IEnumerable enumerable: + return $"[{string.Join(",", enumerable.Cast().Select(obj => Format(qbitType.ElementType, obj, true)))}]"; + case NestedType nestedType when value is IEnumerable enumerable: var values = enumerable.Cast().Select(x => Format(nestedType, x, false)); return $"[{string.Join(",", values)}]"; diff --git a/ClickHouse.Driver/Types/BinaryTypeDecoder.cs b/ClickHouse.Driver/Types/BinaryTypeDecoder.cs index 5ae0707..8115fc0 100644 --- a/ClickHouse.Driver/Types/BinaryTypeDecoder.cs +++ b/ClickHouse.Driver/Types/BinaryTypeDecoder.cs @@ -157,6 +157,9 @@ internal static ClickHouseType FromByteCode(ExtendedBinaryReader reader, TypeSet Scale = reader.Read7BitEncodedInt(), }; + case BinaryTypeIndex.QBit: + return DecodeQBit(reader, typeSettings); + default: break; } @@ -302,6 +305,13 @@ private static ClickHouseType DecodeCustomType(ExtendedBinaryReader reader) } } + private static QBitType DecodeQBit(ExtendedBinaryReader reader, TypeSettings typeSettings) + { + var elementType = FromByteCode(reader, typeSettings); + var dimension = reader.Read7BitEncodedInt(); + return new QBitType { ElementType = elementType, Dimension = dimension }; + } + private static JsonType DecodeJson(ExtendedBinaryReader reader, TypeSettings typeSettings) { var serializationVersion = reader.ReadByte(); diff --git a/ClickHouse.Driver/Types/QBitType.cs b/ClickHouse.Driver/Types/QBitType.cs new file mode 100644 index 0000000..2ed5318 --- /dev/null +++ b/ClickHouse.Driver/Types/QBitType.cs @@ -0,0 +1,58 @@ +using System; +using System.Globalization; +using ClickHouse.Driver.Formats; +using ClickHouse.Driver.Types.Grammar; + +namespace ClickHouse.Driver.Types; + +/// +/// Represents the ClickHouse QBit type: a quantized vector type for efficient storage. +/// On the wire, QBit is simply an Array of the underlying element type (Float32/Float64/BFloat16). +/// The bit-transpose optimization happens server-side for storage, not in the wire protocol. +/// +internal class QBitType : ParameterizedType +{ + // Delegate to ArrayType for wire format + private ArrayType UnderlyingArrayType => new ArrayType { UnderlyingType = ElementType }; + + public ClickHouseType ElementType { get; set; } + public int Dimension { get; set; } + + public override Type FrameworkType => ElementType.FrameworkType.MakeArrayType(); + + public override string Name => "QBit"; + + public override ParameterizedType Parse(SyntaxTreeNode node, Func parseClickHouseTypeFunc, TypeSettings settings) + { + return new QBitType + { + ElementType = parseClickHouseTypeFunc(node.ChildNodes[0]), + Dimension = int.Parse(node.ChildNodes[1].Value, CultureInfo.InvariantCulture), + }; + } + + public override string ToString() => $"{Name}({ElementType},{Dimension})"; + + public override object Read(ExtendedBinaryReader reader) + { + // QBit wire format is Array(UnderlyingType), but the length is padded to the nearest 8 + var length = reader.Read7BitEncodedInt(); + var data = Array.CreateInstance(ElementType.FrameworkType, Dimension); // Could use a pool here + for (var i = 0; i < length; i++) + { + var value = ElementType.Read(reader); + if (i < Dimension) + { + data.SetValue(ClearDBNull(value), i); + } + } + + return data; + } + + public override void Write(ExtendedBinaryWriter writer, object value) + { + // QBit wire format is just Array(ElementType) + UnderlyingArrayType.Write(writer, value); + } +} diff --git a/ClickHouse.Driver/Types/TypeConverter.cs b/ClickHouse.Driver/Types/TypeConverter.cs index e1a6148..5feffd2 100644 --- a/ClickHouse.Driver/Types/TypeConverter.cs +++ b/ClickHouse.Driver/Types/TypeConverter.cs @@ -177,6 +177,8 @@ static TypeConverter() RegisterParameterizedType(); + RegisterParameterizedType(); + // Mapping fixups ReverseMapping.Add(typeof(ClickHouseDecimal), new Decimal128Type()); ReverseMapping.Add(typeof(decimal), new Decimal128Type()); diff --git a/examples/Vector_001_QBitSimilaritySearch.cs b/examples/Vector_001_QBitSimilaritySearch.cs new file mode 100644 index 0000000..50cd61a --- /dev/null +++ b/examples/Vector_001_QBitSimilaritySearch.cs @@ -0,0 +1,111 @@ +using ClickHouse.Driver.ADO; +using ClickHouse.Driver.Utility; + +namespace ClickHouse.Driver.Examples; + +/// +/// Demonstrates using QBit vectors for similarity search in ClickHouse. +/// QBit is a quantized vector type that provides efficient storage with configurable precision. +/// This example shows semantic similarity search with different precision levels using L2DistanceTransposed. +/// +public static class QBitSimilaritySearch +{ + public static async Task Run() + { + using var connection = new ClickHouseConnection("Host=localhost"); + await connection.OpenAsync(); + + Console.WriteLine("=== QBit Similarity Search with Different Precision Levels ===\n"); + + var tableName = "example_qbit_similarity"; + + await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {tableName}"); + await connection.ExecuteStatementAsync($@" + CREATE TABLE {tableName} + ( + word String, + vec QBit(Float32, 5) + ) + ENGINE = MergeTree + ORDER BY word + "); + + // Insert sample word embeddings (simplified 5-dimensional vectors) + // In practice, these would come from an embedding model + await connection.ExecuteStatementAsync($@" + INSERT INTO {tableName} VALUES + ('apple', [0.9, 0.1, 0.8, 0.2, 0.7]), + ('banana', [0.85, 0.15, 0.75, 0.25, 0.65]), + ('orange', [0.88, 0.12, 0.78, 0.22, 0.68]), + ('dog', [0.1, 0.9, 0.2, 0.8, 0.3]), + ('horse', [0.15, 0.85, 0.25, 0.75, 0.35]), + ('cat', [0.12, 0.88, 0.22, 0.78, 0.32]) + "); + + Console.WriteLine("Inserted 6 words with 5-dimensional QBit(Float32, 5) embeddings\n"); + + // Query vector: looking for words similar to "apple" + var queryVector = "[0.9, 0.1, 0.8, 0.2, 0.7]"; + + // Example 1: High precision search (32 bits per component) + Console.WriteLine("=== High Precision Search (32 bits) ==="); + Console.WriteLine("Using L2DistanceTransposed with precision=32\n"); + + using (var reader = await connection.ExecuteReaderAsync($@" + SELECT + word, + L2DistanceTransposed(vec, {queryVector}, 32) AS distance + FROM {tableName} + ORDER BY distance + ")) + { + Console.WriteLine("Word\t\tDistance"); + Console.WriteLine("----\t\t--------"); + + while (reader.Read()) + { + var word = reader.GetString(0); + var distance = reader.GetFloat(1); + Console.WriteLine($"{word,-12}\t{distance:F6}"); + } + } + + // Example 2: Low precision search (12 bits per component) - faster but less accurate + Console.WriteLine("\n=== Low Precision Search (12 bits) ==="); + Console.WriteLine("Using L2DistanceTransposed with precision=12\n"); + + using (var reader = await connection.ExecuteReaderAsync($@" + SELECT + word, + L2DistanceTransposed(vec, {queryVector}, 12) AS distance + FROM {tableName} + ORDER BY distance + ")) + { + Console.WriteLine("Word\t\tDistance"); + Console.WriteLine("----\t\t--------"); + + while (reader.Read()) + { + var word = reader.GetString(0); + var distance = reader.GetFloat(1); + Console.WriteLine($"{word,-12}\t{distance:F6}"); + } + } + + // Read vector data back as float[] + Console.WriteLine("\n=== Reading QBit Data ===\n"); + using (var reader = await connection.ExecuteReaderAsync($"SELECT word, vec FROM {tableName} LIMIT 3")) + { + while (reader.Read()) + { + var word = reader.GetString(0); + var vec = (float[])reader.GetValue(1); + Console.WriteLine($"{word}: [{string.Join(", ", vec.Select(v => v.ToString("F2")))}]"); + } + } + + await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {tableName}"); + Console.WriteLine($"\nCleaned up table '{tableName}'"); + } +}