Skip to content

Commit a6ae744

Browse files
committed
Resize vector for writing list data
1 parent ec9db9f commit a6ae744

File tree

10 files changed

+130
-31
lines changed

10 files changed

+130
-31
lines changed

DuckDB.NET.Bindings/NativeMethods/NativeMethods.Vectors.cs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@ public static class Vectors
3333
[DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_list_vector_get_size")]
3434
public static extern long DuckDBListVectorGetSize(IntPtr vector);
3535

36+
[DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_list_vector_reserve")]
37+
public static extern DuckDBState DuckDBListVectorReserve(IntPtr vector, ulong requiredCapacity);
38+
3639
[DllImport(DuckDbLibrary, CallingConvention = CallingConvention.Cdecl, EntryPoint = "duckdb_struct_vector_get_child")]
3740
public static extern IntPtr DuckDBStructVectorGetChild(IntPtr vector, long index);
3841

DuckDB.NET.Data/DuckDBAppender.cs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using DuckDB.NET.Data.Internal.Writer;
1+
using DuckDB.NET.Data.Internal;
2+
using DuckDB.NET.Data.Internal.Writer;
23
using DuckDB.NET.Native;
34
using System;
45
using System.Diagnostics;
@@ -8,7 +9,7 @@ namespace DuckDB.NET.Data;
89

910
public class DuckDBAppender : IDisposable
1011
{
11-
private static readonly ulong DuckDBVectorSize = NativeMethods.Helpers.DuckDBVectorSize();
12+
private static readonly ulong DuckDBVectorSize = DuckDBGlobalData.VectorSize;
1213

1314
private bool closed;
1415
private readonly Native.DuckDBAppender nativeAppender;

DuckDB.NET.Data/DuckDBAppenderRow.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ public DuckDBAppenderRow AppendValue(TimeSpan? value)
104104

105105
#region Composite Types
106106

107-
public DuckDBAppenderRow AppendValue<T>(IReadOnlyCollection<T>? value) => AppendValueInternal(value);
107+
public DuckDBAppenderRow AppendValue<T>(IEnumerable<T>? value) => AppendValueInternal(value);
108108

109109
#endregion
110110

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
using DuckDB.NET.Native;
2+
3+
namespace DuckDB.NET.Data.Internal;
4+
5+
public static class DuckDBGlobalData
6+
{
7+
public static ulong VectorSize { get; } = NativeMethods.Helpers.DuckDBVectorSize();
8+
}

DuckDB.NET.Data/Internal/IsExternalInit.cs

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
using System.Collections.Generic;
1+
using System.Collections;
2+
using System.Collections.Generic;
23

34
namespace System.Runtime.CompilerServices
45
{
@@ -15,6 +16,21 @@ public static void Deconstruct<TKey, TValue>(this KeyValuePair<TKey, TValue> key
1516
}
1617
}
1718

19+
static class IEnumerableExtensions
20+
{
21+
public static bool TryGetNonEnumeratedCount<T>(this IEnumerable target, out int count)
22+
{
23+
if (target is ICollection collection)
24+
{
25+
count = collection.Count;
26+
return true;
27+
}
28+
29+
count = 0;
30+
return false;
31+
}
32+
}
33+
1834
namespace System.Diagnostics.CodeAnalysis
1935
{
2036
[AttributeUsage(AttributeTargets.Method)]

DuckDB.NET.Data/Internal/Writer/ListVectorDataWriter.cs

Lines changed: 54 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,35 @@
1-
using System;
1+
using DuckDB.NET.Native;
2+
using System;
23
using System.Collections;
34
using System.Collections.Generic;
45
using System.Numerics;
5-
using DuckDB.NET.Native;
66

77
namespace DuckDB.NET.Data.Internal.Writer;
88

99
internal sealed unsafe class ListVectorDataWriter : VectorDataWriterBase
1010
{
1111
private ulong offset = 0;
12+
private readonly ulong arraySize;
1213
private readonly VectorDataWriterBase listItemWriter;
1314

15+
public bool IsList => ColumnType == DuckDBType.List;
16+
private ulong vectorReservedSize = DuckDBGlobalData.VectorSize;
17+
1418
public ListVectorDataWriter(IntPtr vector, void* vectorData, DuckDBType columnType, DuckDBLogicalType logicalType) : base(vector, vectorData, columnType)
1519
{
16-
using var childType = NativeMethods.LogicalType.DuckDBListTypeChildType(logicalType);
17-
var childVector = NativeMethods.Vectors.DuckDBListVectorGetChild(vector);
20+
using var childType = IsList ? NativeMethods.LogicalType.DuckDBListTypeChildType(logicalType) : NativeMethods.LogicalType.DuckDBArrayTypeChildType(logicalType);
21+
var childVector = IsList ? NativeMethods.Vectors.DuckDBListVectorGetChild(vector) : NativeMethods.Vectors.DuckDBArrayVectorGetChild(vector); ;
22+
23+
arraySize = IsList ? 0 : (ulong)NativeMethods.LogicalType.DuckDBArrayVectorGetSize(logicalType);
1824
listItemWriter = VectorDataWriterFactory.CreateWriter(childVector, childType);
1925
}
2026

21-
internal override bool AppendCollection(IList value, int rowIndex)
27+
internal override bool AppendCollection(ICollection value, int rowIndex)
2228
{
29+
var count = (ulong)value.Count;
30+
31+
ResizeVector(rowIndex, count);
32+
2333
_ = value switch
2434
{
2535
IEnumerable<bool> items => WriteItems(items),
@@ -32,10 +42,10 @@ internal override bool AppendCollection(IList value, int rowIndex)
3242
IEnumerable<ushort> items => WriteItems(items),
3343
IEnumerable<uint> items => WriteItems(items),
3444
IEnumerable<ulong> items => WriteItems(items),
35-
45+
3646
IEnumerable<decimal> items => WriteItems(items),
3747
IEnumerable<BigInteger> items => WriteItems(items),
38-
48+
3949
IEnumerable<string> items => WriteItems(items),
4050
IEnumerable<Guid> items => WriteItems(items),
4151
IEnumerable<DateTime> items => WriteItems(items),
@@ -47,18 +57,24 @@ internal override bool AppendCollection(IList value, int rowIndex)
4757
IEnumerable<TimeOnly> items => WriteItems(items),
4858
#endif
4959
IEnumerable<DateTimeOffset> items => WriteItems(items),
50-
60+
5161
_ => WriteItems<object>((IEnumerable<object>)value)
5262
};
5363

54-
var result = AppendValueInternal(new DuckDBListEntry(offset, (ulong)value.Count), rowIndex);
64+
var duckDBListEntry = new DuckDBListEntry(offset, count);
65+
var result = !IsList || AppendValueInternal(duckDBListEntry, rowIndex);
5566

56-
offset += (ulong)value.Count;
67+
offset += count;
5768

5869
return result;
5970

6071
int WriteItems<T>(IEnumerable<T> items)
6172
{
73+
if (IsList == false && count != arraySize)
74+
{
75+
throw new InvalidOperationException($"Column has Array size of {arraySize} but the specified value has size of {count}");
76+
};
77+
6278
var index = 0;
6379

6480
foreach (var item in items)
@@ -69,4 +85,32 @@ int WriteItems<T>(IEnumerable<T> items)
6985
return 0;
7086
}
7187
}
88+
89+
private void ResizeVector(int rowIndex, ulong count)
90+
{
91+
//If writing to a list column we need to make sure that enough space is allocated. Not needed for Arrays as DuckDB does it for us.
92+
if (!IsList || offset + count <= vectorReservedSize) return;
93+
94+
var factor = 2d;
95+
96+
if (rowIndex > DuckDBGlobalData.VectorSize * 0.25 && rowIndex < DuckDBGlobalData.VectorSize * 0.5)
97+
{
98+
factor = 1.75;
99+
}
100+
101+
if (rowIndex > DuckDBGlobalData.VectorSize * 0.5 && rowIndex < DuckDBGlobalData.VectorSize * 0.75)
102+
{
103+
factor = 1.5;
104+
}
105+
106+
if (rowIndex > DuckDBGlobalData.VectorSize * 0.75)
107+
{
108+
factor = 1.25;
109+
}
110+
111+
vectorReservedSize = (ulong)Math.Max(vectorReservedSize * factor, offset + count);
112+
var state = NativeMethods.Vectors.DuckDBListVectorReserve(Vector, vectorReservedSize);
113+
114+
listItemWriter.FetchDataPointer();
115+
}
72116
}

DuckDB.NET.Data/Internal/Writer/VectorDataWriterBase.cs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@ public unsafe void AppendValue<T>(T value, int rowIndex)
6161
TimeOnly val => AppendTimeOnly(val, rowIndex),
6262
#endif
6363
DateTimeOffset val => AppendDateTimeOffset(val, rowIndex),
64-
IList val => AppendCollection(val, rowIndex),
64+
ICollection val => AppendCollection(val, rowIndex),
6565
_ => ThrowException<T>()
6666
};
6767
}
@@ -96,7 +96,7 @@ public unsafe void AppendValue<T>(T value, int rowIndex)
9696

9797
internal virtual bool AppendBigInteger(BigInteger value, int rowIndex) => ThrowException<BigInteger>();
9898

99-
internal virtual bool AppendCollection(IList value, int rowIndex) => ThrowException<bool>();
99+
internal virtual bool AppendCollection(ICollection value, int rowIndex) => ThrowException<bool>();
100100

101101
private bool ThrowException<T>()
102102
{
@@ -108,4 +108,9 @@ internal unsafe bool AppendValueInternal<T>(T value, int rowIndex) where T : unm
108108
((T*)vectorData)[rowIndex] = value;
109109
return true;
110110
}
111+
112+
internal void FetchDataPointer()
113+
{
114+
vectorData = NativeMethods.Vectors.DuckDBVectorGetData(Vector);
115+
}
111116
}

DuckDB.NET.Data/Internal/Writer/VectorDataWriterFactory.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ public static unsafe VectorDataWriterBase CreateWriter(IntPtr vector, DuckDBLogi
2323

2424
DuckDBType.Map => throw new NotImplementedException($"Writing {columnType} to data chunk is not yet supported"),
2525
DuckDBType.List => new ListVectorDataWriter(vector, dataPointer, columnType, logicalType),
26-
DuckDBType.Array => throw new NotImplementedException($"Writing {columnType} to data chunk is not yet supported"),
26+
DuckDBType.Array => new ListVectorDataWriter(vector, dataPointer, columnType, logicalType),
2727
DuckDBType.Blob => new StringVectorDataWriter(vector, dataPointer, columnType),
2828
DuckDBType.Varchar => new StringVectorDataWriter(vector, dataPointer, columnType),
2929
DuckDBType.Bit => throw new NotImplementedException($"Writing {columnType} to data chunk is not yet supported"),

DuckDB.NET.Test/DuckDBDatabaseFixture.cs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
using System;
2-
using System.Data;
1+
using Bogus;
32
using DuckDB.NET.Data;
3+
using System;
4+
using System.Collections.Generic;
5+
using System.Data;
6+
using System.Linq;
47
using Xunit;
58

69
namespace DuckDB.NET.Test;
@@ -24,7 +27,14 @@ public void Dispose()
2427
public class DuckDBTestBase : IDisposable, IClassFixture<DuckDBDatabaseFixture>
2528
{
2629
protected DuckDBCommand Command { get; }
27-
internal DuckDBConnection Connection { get; }
30+
protected DuckDBConnection Connection { get; }
31+
32+
protected Faker Faker { get; init; } = new Faker();
33+
34+
protected List<T> GetRandomList<T>(Func<Faker, T> generator, int? count = 20)
35+
{
36+
return Enumerable.Range(0, count ?? Faker.Random.Int(0, 50)).Select(i => generator(Faker)).ToList();
37+
}
2838

2939
public DuckDBTestBase(DuckDBDatabaseFixture db)
3040
{

DuckDB.NET.Test/ManagedAppenderTests.cs

Lines changed: 23 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -197,7 +197,7 @@ public void GuidValues()
197197
Command.CommandText = "CREATE TABLE managedAppenderGuids(a UUID);";
198198
Command.ExecuteNonQuery();
199199

200-
var guids = Enumerable.Range(0, 20).Select(i => (Guid?)Guid.NewGuid()).ToList();
200+
var guids = GetRandomList<Guid?>(faker => faker.Random.Guid());
201201
guids.Add(null);
202202

203203
using (var appender = Connection.CreateAppender("managedAppenderGuids"))
@@ -263,28 +263,36 @@ public void TemporalValues()
263263
result.Select(tuple => tuple.Item4).Should().BeEquivalentTo(dates);
264264
result.Select(tuple => tuple.Item5).Should().BeEquivalentTo(dates);
265265
result.Select(tuple => tuple.Item6).Should().BeEquivalentTo(dates);
266-
result.Select(tuple => tuple.Item7).Should().BeEquivalentTo(dates.Select(time => time.ToDateTimeOffset(TimeSpan.FromHours(1))),
266+
result.Select(tuple => tuple.Item7).Should().BeEquivalentTo(dates.Select(time => time.ToDateTimeOffset(TimeSpan.FromHours(1))),
267267
options => options.ComparingByMembers<DateTimeOffset>().Including(offset => offset.Offset).Including(offset => offset.TimeOfDay));
268268
result.Select(tuple => tuple.Item8).Should().BeEquivalentTo(dates.Select(TimeOnly.FromDateTime));
269269
}
270270

271271
[Fact]
272272
public void ListValues()
273273
{
274-
Command.CommandText = "CREATE TABLE managedAppenderLists(a INTEGER, b INTEGER[]" +
275-
", c INTEGER[][]" +
276-
");";
274+
Command.CommandText = "CREATE TABLE managedAppenderLists(a INTEGER, b INTEGER[], c INTEGER[][]);";
277275
Command.ExecuteNonQuery();
278276

279-
var rows = 2;
277+
var rows = 1000;
278+
279+
var lists = new List<List<int>>();
280+
281+
lists.Add(GetRandomList(faker => faker.Random.Int(), 2050 * 2));
282+
283+
for (int i = 0; i < rows; i++)
284+
{
285+
lists.Add(GetRandomList(faker => faker.Random.Int(), Random.Shared.Next(0, 200)));
286+
}
287+
280288
using (var appender = Connection.CreateAppender("managedAppenderLists"))
281289
{
282290
for (int i = 0; i < rows; i++)
283291
{
284292
appender.CreateRow()
285293
.AppendValue(i)
286-
.AppendValue(Enumerable.Range(0, 2).ToList())
287-
.AppendValue(new List<List<int>> { Enumerable.Range(0, 5).ToList(), Enumerable.Range(i + 2, 4).ToList() })
294+
.AppendValue(lists[i])
295+
.AppendValue(new List<List<int>> { Enumerable.Range(0, i % 10 + 1).ToList(), Enumerable.Range(i + 2, 4).ToList() })
288296
.EndRow();
289297
}
290298
}
@@ -295,9 +303,13 @@ public void ListValues()
295303
int index = 0;
296304
while (reader.Read())
297305
{
298-
var ints = reader.GetFieldValue<List<int>>(1);
299-
ints.Should().BeEquivalentTo(Enumerable.Range(0, 2).ToList());
300-
var fieldValue = reader.GetFieldValue<List<List<int>>>(2);
306+
var list = reader.GetFieldValue<List<int>>(1);
307+
list.Should().BeEquivalentTo(lists[index]);
308+
309+
var nestedList = reader.GetFieldValue<List<List<int>>>(2);
310+
nestedList.Should().BeEquivalentTo(new List<List<int>> { Enumerable.Range(0, index % 10 + 1).ToList(), Enumerable.Range(index + 2, 4).ToList() });
311+
312+
index++;
301313
}
302314
}
303315

0 commit comments

Comments
 (0)