Skip to content

Commit cf17550

Browse files
Add QBit support
1 parent 3093a68 commit cf17550

File tree

9 files changed

+291
-2
lines changed

9 files changed

+291
-2
lines changed

ClickHouse.Driver.Tests/BulkCopy/BulkCopyTests.cs

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -681,5 +681,88 @@ public void WriteToServerAsync_WithNullDestinationTableName_ThrowsInvalidOperati
681681

682682
Assert.That(ex.Message, Does.Contain("Destination table not set"));
683683
}
684+
685+
[Test]
686+
public async Task ShouldInsertQBitFloat32()
687+
{
688+
var targetTable = "test." + SanitizeTableName("bulk_qbit_float32");
689+
690+
await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {targetTable}");
691+
await connection.ExecuteStatementAsync($"CREATE TABLE IF NOT EXISTS {targetTable} (vec QBit(Float32, 9)) ENGINE Memory");
692+
693+
using var bulkCopy = new ClickHouseBulkCopy(connection)
694+
{
695+
DestinationTableName = targetTable,
696+
};
697+
698+
var testData = new float[] { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f, 9.0f };
699+
700+
await bulkCopy.InitAsync();
701+
await bulkCopy.WriteToServerAsync([[(object)testData]]);
702+
703+
Assert.That(bulkCopy.RowsWritten, Is.EqualTo(1));
704+
705+
using var reader = await connection.ExecuteReaderAsync($"SELECT vec FROM {targetTable}");
706+
Assert.That(reader.Read(), Is.True);
707+
var result = (float[])reader.GetValue(0);
708+
Assert.That(result, Is.EqualTo(testData));
709+
}
710+
711+
[Test]
712+
public async Task ShouldInsertQBitFloat64()
713+
{
714+
var targetTable = "test." + SanitizeTableName("bulk_qbit_float64");
715+
716+
await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {targetTable}");
717+
await connection.ExecuteStatementAsync($"CREATE TABLE IF NOT EXISTS {targetTable} (vec QBit(Float64, 8)) ENGINE Memory");
718+
719+
using var bulkCopy = new ClickHouseBulkCopy(connection)
720+
{
721+
DestinationTableName = targetTable,
722+
};
723+
724+
var testData = new double[] { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0 };
725+
726+
await bulkCopy.InitAsync();
727+
await bulkCopy.WriteToServerAsync([[(object)testData]]);
728+
729+
Assert.That(bulkCopy.RowsWritten, Is.EqualTo(1));
730+
731+
using var reader = await connection.ExecuteReaderAsync($"SELECT vec FROM {targetTable}");
732+
Assert.That(reader.Read(), Is.True);
733+
var result = (double[])reader.GetValue(0);
734+
Assert.That(result, Is.EqualTo(testData));
735+
}
736+
737+
[Test]
738+
public async Task ShouldInsertQBitBFloat16()
739+
{
740+
var targetTable = "test." + SanitizeTableName("bulk_qbit_bfloat16");
741+
742+
await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {targetTable}");
743+
await connection.ExecuteStatementAsync($"CREATE TABLE IF NOT EXISTS {targetTable} (vec QBit(BFloat16, 6)) ENGINE Memory");
744+
745+
using var bulkCopy = new ClickHouseBulkCopy(connection)
746+
{
747+
DestinationTableName = targetTable,
748+
};
749+
750+
var testData = new float[] { 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f };
751+
752+
await bulkCopy.InitAsync();
753+
await bulkCopy.WriteToServerAsync([[(object)testData]]);
754+
755+
Assert.That(bulkCopy.RowsWritten, Is.EqualTo(1));
756+
757+
using var reader = await connection.ExecuteReaderAsync($"SELECT vec FROM {targetTable}");
758+
Assert.That(reader.Read(), Is.True);
759+
var result = (float[])reader.GetValue(0);
760+
// BFloat16 has reduced precision, check approximate equality
761+
Assert.That(result.Length, Is.EqualTo(testData.Length));
762+
for (int i = 0; i < result.Length; i++)
763+
{
764+
Assert.That(result[i], Is.EqualTo(testData[i]).Within(0.01f));
765+
}
766+
}
684767
}
685768

ClickHouse.Driver.Tests/SQL/SqlSimpleSelectTests.cs

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -244,9 +244,17 @@ public async Task ShouldGetValueDecimal()
244244
[TestCaseSource(typeof(SqlSimpleSelectTests), nameof(SimpleSelectTypes))]
245245
public async Task ShouldExecuteRandomDataSelectQuery(string type)
246246
{
247-
if (type.StartsWith("Nested") || type == "Nothing" || type.StartsWith("Variant") || type.StartsWith("Json") || type.Contains("BFloat16") || type.StartsWith("Time"))
247+
if (type.StartsWith("Nested") ||
248+
type == "Nothing" ||
249+
type.StartsWith("Variant") ||
250+
type.StartsWith("Json") ||
251+
type.Contains("BFloat16") ||
252+
type.StartsWith("Time") ||
253+
type.StartsWith("QBit"))
254+
{
248255
Assert.Ignore($"Type {type} not supported by generateRandom");
249-
256+
}
257+
250258
using var reader = await connection.ExecuteReaderAsync($"SELECT * FROM generateRandom('value {type.Replace("'", "\\'")}', 10, 10, 10) LIMIT 100");
251259
reader.AssertHasFieldCount(1);
252260
}

ClickHouse.Driver.Tests/Utilities/TestUtilities.cs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,10 @@ public static ClickHouseConnection GetTestClickHouseConnection(bool compression
9898
{
9999
builder["set_enable_time_time64_type"] = 1;
100100
}
101+
if (SupportedFeatures.HasFlag(Feature.QBit))
102+
{
103+
builder["set_allow_experimental_qbit_type"] = 1;
104+
}
101105

102106
var settings = new ClickHouseClientSettings(builder)
103107
{
@@ -400,6 +404,13 @@ public static IEnumerable<DataTypeSample> GetDataTypeSamples()
400404
yield return new DataTypeSample("Time64(6)", typeof(TimeSpan), "'-5:25:05.123456'::Time64(6)", (new TimeSpan(5, 25, 5).Add(TimeSpan.FromMilliseconds(123.456)).Negate()));
401405
}
402406

407+
if (SupportedFeatures.HasFlag(Feature.QBit))
408+
{
409+
yield return new DataTypeSample("QBit(Float32, 4)", typeof(float[]), "[1.0, 2.0, 3.0, 4.0]::QBit(Float32, 4)", new float[] { 1f, 2f, 3f, 4f });
410+
yield return new DataTypeSample("QBit(Float64, 5)", typeof(double[]), "[1.0, 2.0, 3.0, 4.0, 5.0]::QBit(Float64, 5)", new double[] { 1.0, 2.0, 3.0, 4.0, 5.0 });
411+
yield return new DataTypeSample("QBit(BFloat16, 6)", typeof(float[]), "[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]::QBit(BFloat16, 6)", new float[] { 1f, 2f, 3f, 4f, 5f, 6f });
412+
}
413+
403414
// Generate composite type tests for ALL base types that FromByteCode supports
404415
// This ensures that all type decoders work correctly in composite contexts (Array, Nullable, Tuple, Map, Variant)
405416
var baseTypesToTest = new List<DataTypeSample>

ClickHouse.Driver/ADO/Feature.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,5 +48,8 @@ public enum Feature
4848
[SinceVersion("25.6")]
4949
Time = 1 << 12,
5050

51+
[SinceVersion("25.10")]
52+
QBit = 1 << 13,
53+
5154
All = ~None, // Special value
5255
}

ClickHouse.Driver/Formats/HttpParameterFormatter.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -87,6 +87,9 @@ internal static string Format(ClickHouseType type, object value, bool quote)
8787
case ArrayType arrayType when value is IEnumerable enumerable:
8888
return $"[{string.Join(",", enumerable.Cast<object>().Select(obj => Format(arrayType.UnderlyingType, obj, true)))}]";
8989

90+
case QBitType qbitType when value is IEnumerable enumerable:
91+
return $"[{string.Join(",", enumerable.Cast<object>().Select(obj => Format(qbitType.ElementType, obj, true)))}]";
92+
9093
case NestedType nestedType when value is IEnumerable enumerable:
9194
var values = enumerable.Cast<object>().Select(x => Format(nestedType, x, false));
9295
return $"[{string.Join(",", values)}]";

ClickHouse.Driver/Types/BinaryTypeDecoder.cs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,9 @@ internal static ClickHouseType FromByteCode(ExtendedBinaryReader reader, TypeSet
157157
Scale = reader.Read7BitEncodedInt(),
158158
};
159159

160+
case BinaryTypeIndex.QBit:
161+
return DecodeQBit(reader, typeSettings);
162+
160163
default:
161164
break;
162165
}
@@ -302,6 +305,13 @@ private static ClickHouseType DecodeCustomType(ExtendedBinaryReader reader)
302305
}
303306
}
304307

308+
private static QBitType DecodeQBit(ExtendedBinaryReader reader, TypeSettings typeSettings)
309+
{
310+
var elementType = FromByteCode(reader, typeSettings);
311+
var dimension = reader.Read7BitEncodedInt();
312+
return new QBitType { ElementType = elementType, Dimension = dimension };
313+
}
314+
305315
private static JsonType DecodeJson(ExtendedBinaryReader reader, TypeSettings typeSettings)
306316
{
307317
var serializationVersion = reader.ReadByte();
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
using System;
2+
using System.Globalization;
3+
using ClickHouse.Driver.Formats;
4+
using ClickHouse.Driver.Types.Grammar;
5+
6+
namespace ClickHouse.Driver.Types;
7+
8+
/// <summary>
9+
/// Represents the ClickHouse QBit type: a quantized vector type for efficient storage.
10+
/// On the wire, QBit is simply an Array of the underlying element type (Float32/Float64/BFloat16).
11+
/// The bit-transpose optimization happens server-side for storage, not in the wire protocol.
12+
/// </summary>
13+
internal class QBitType : ParameterizedType
14+
{
15+
// Delegate to ArrayType for wire format
16+
private ArrayType UnderlyingArrayType => new ArrayType { UnderlyingType = ElementType };
17+
18+
public ClickHouseType ElementType { get; set; }
19+
public int Dimension { get; set; }
20+
21+
public override Type FrameworkType => ElementType.FrameworkType.MakeArrayType();
22+
23+
public override string Name => "QBit";
24+
25+
public override ParameterizedType Parse(SyntaxTreeNode node, Func<SyntaxTreeNode, ClickHouseType> parseClickHouseTypeFunc, TypeSettings settings)
26+
{
27+
return new QBitType
28+
{
29+
ElementType = parseClickHouseTypeFunc(node.ChildNodes[0]),
30+
Dimension = int.Parse(node.ChildNodes[1].Value, CultureInfo.InvariantCulture),
31+
};
32+
}
33+
34+
public override string ToString() => $"{Name}({ElementType},{Dimension})";
35+
36+
public override object Read(ExtendedBinaryReader reader)
37+
{
38+
// QBit wire format is Array(UnderlyingType), but the length is padded to the nearest 8
39+
var length = reader.Read7BitEncodedInt();
40+
var data = Array.CreateInstance(ElementType.FrameworkType, Dimension); // Could use a pool here
41+
for (var i = 0; i < length; i++)
42+
{
43+
var value = ElementType.Read(reader);
44+
if (i < Dimension)
45+
{
46+
data.SetValue(ClearDBNull(value), i);
47+
}
48+
}
49+
50+
return data;
51+
}
52+
53+
public override void Write(ExtendedBinaryWriter writer, object value)
54+
{
55+
// QBit wire format is just Array(ElementType)
56+
UnderlyingArrayType.Write(writer, value);
57+
}
58+
}

ClickHouse.Driver/Types/TypeConverter.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,8 @@ static TypeConverter()
177177

178178
RegisterParameterizedType<AggregateFunctionType>();
179179

180+
RegisterParameterizedType<QBitType>();
181+
180182
// Mapping fixups
181183
ReverseMapping.Add(typeof(ClickHouseDecimal), new Decimal128Type());
182184
ReverseMapping.Add(typeof(decimal), new Decimal128Type());
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
using ClickHouse.Driver.ADO;
2+
using ClickHouse.Driver.Utility;
3+
4+
namespace ClickHouse.Driver.Examples;
5+
6+
/// <summary>
7+
/// Demonstrates using QBit vectors for similarity search in ClickHouse.
8+
/// QBit is a quantized vector type that provides efficient storage with configurable precision.
9+
/// This example shows semantic similarity search with different precision levels using L2DistanceTransposed.
10+
/// </summary>
11+
public static class QBitSimilaritySearch
12+
{
13+
public static async Task Run()
14+
{
15+
using var connection = new ClickHouseConnection("Host=localhost");
16+
await connection.OpenAsync();
17+
18+
Console.WriteLine("=== QBit Similarity Search with Different Precision Levels ===\n");
19+
20+
var tableName = "example_qbit_similarity";
21+
22+
await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {tableName}");
23+
await connection.ExecuteStatementAsync($@"
24+
CREATE TABLE {tableName}
25+
(
26+
word String,
27+
vec QBit(Float32, 5)
28+
)
29+
ENGINE = MergeTree
30+
ORDER BY word
31+
");
32+
33+
// Insert sample word embeddings (simplified 5-dimensional vectors)
34+
// In practice, these would come from an embedding model
35+
await connection.ExecuteStatementAsync($@"
36+
INSERT INTO {tableName} VALUES
37+
('apple', [0.9, 0.1, 0.8, 0.2, 0.7]),
38+
('banana', [0.85, 0.15, 0.75, 0.25, 0.65]),
39+
('orange', [0.88, 0.12, 0.78, 0.22, 0.68]),
40+
('dog', [0.1, 0.9, 0.2, 0.8, 0.3]),
41+
('horse', [0.15, 0.85, 0.25, 0.75, 0.35]),
42+
('cat', [0.12, 0.88, 0.22, 0.78, 0.32])
43+
");
44+
45+
Console.WriteLine("Inserted 6 words with 5-dimensional QBit(Float32, 5) embeddings\n");
46+
47+
// Query vector: looking for words similar to "apple"
48+
var queryVector = "[0.9, 0.1, 0.8, 0.2, 0.7]";
49+
50+
// Example 1: High precision search (32 bits per component)
51+
Console.WriteLine("=== High Precision Search (32 bits) ===");
52+
Console.WriteLine("Using L2DistanceTransposed with precision=32\n");
53+
54+
using (var reader = await connection.ExecuteReaderAsync($@"
55+
SELECT
56+
word,
57+
L2DistanceTransposed(vec, {queryVector}, 32) AS distance
58+
FROM {tableName}
59+
ORDER BY distance
60+
"))
61+
{
62+
Console.WriteLine("Word\t\tDistance");
63+
Console.WriteLine("----\t\t--------");
64+
65+
while (reader.Read())
66+
{
67+
var word = reader.GetString(0);
68+
var distance = reader.GetFloat(1);
69+
Console.WriteLine($"{word,-12}\t{distance:F6}");
70+
}
71+
}
72+
73+
// Example 2: Low precision search (12 bits per component) - faster but less accurate
74+
Console.WriteLine("\n=== Low Precision Search (12 bits) ===");
75+
Console.WriteLine("Using L2DistanceTransposed with precision=12\n");
76+
77+
using (var reader = await connection.ExecuteReaderAsync($@"
78+
SELECT
79+
word,
80+
L2DistanceTransposed(vec, {queryVector}, 12) AS distance
81+
FROM {tableName}
82+
ORDER BY distance
83+
"))
84+
{
85+
Console.WriteLine("Word\t\tDistance");
86+
Console.WriteLine("----\t\t--------");
87+
88+
while (reader.Read())
89+
{
90+
var word = reader.GetString(0);
91+
var distance = reader.GetFloat(1);
92+
Console.WriteLine($"{word,-12}\t{distance:F6}");
93+
}
94+
}
95+
96+
// Read vector data back as float[]
97+
Console.WriteLine("\n=== Reading QBit Data ===\n");
98+
using (var reader = await connection.ExecuteReaderAsync($"SELECT word, vec FROM {tableName} LIMIT 3"))
99+
{
100+
while (reader.Read())
101+
{
102+
var word = reader.GetString(0);
103+
var vec = (float[])reader.GetValue(1);
104+
Console.WriteLine($"{word}: [{string.Join(", ", vec.Select(v => v.ToString("F2")))}]");
105+
}
106+
}
107+
108+
await connection.ExecuteStatementAsync($"DROP TABLE IF EXISTS {tableName}");
109+
Console.WriteLine($"\nCleaned up table '{tableName}'");
110+
}
111+
}

0 commit comments

Comments
 (0)