Skip to content
Open
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions src/Microsoft.Data.Sqlite.Core/SqliteConnection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
using System.IO;
using System.Reflection;
using System.Runtime.InteropServices;
using System.Text;
using Microsoft.Data.Sqlite.Properties;
using SQLitePCL;
using static SQLitePCL.raw;
Expand All @@ -31,6 +32,7 @@ public partial class SqliteConnection : DbConnection
private readonly List<WeakReference<SqliteCommand>> _commands = [];

private Dictionary<string, (object? state, strdelegate_collation? collation)>? _collations;
private Dictionary<string, (object? state, delegate_collation? collation)>? _collationsSpan;

private Dictionary<(string name, int arity), (int flags, object? state, delegate_function_scalar? func)>? _functions;

Expand Down Expand Up @@ -278,6 +280,15 @@ public override void Open()
}
}

if (_collationsSpan != null)
{
foreach (var item in _collationsSpan)
{
rc = sqlite3__create_collation_utf8(Handle, item.Key, item.Value.state, item.Value.collation);
SqliteException.ThrowExceptionForRC(rc, Handle);
}
}

if (_functions != null)
{
foreach (var item in _functions)
Expand Down Expand Up @@ -372,6 +383,15 @@ internal void Deactivate()
}
}

if (_collationsSpan != null)
{
foreach (var item in _collationsSpan.Keys)
{
rc = sqlite3__create_collation_utf8(Handle, item, null, null);
SqliteException.ThrowExceptionForRC(rc, Handle);
}
}

if (_functions != null)
{
foreach (var (name, arity) in _functions.Keys)
Expand Down Expand Up @@ -493,6 +513,62 @@ public virtual void CreateCollation<T>(string name, T state, Func<T, string, str
_collations[name] = (state, collation);
}


/// <summary>
/// Create custom collation.
/// </summary>
/// <typeparam name="T">The type of the state object.</typeparam>
/// <param name="name">Name of the collation.</param>
/// <param name="state">State object passed to each invocation of the collation.</param>
/// <param name="comparison">Method that compares two char spans, using additional state.</param>
/// <seealso href="https://docs.microsoft.com/dotnet/standard/data/sqlite/collation">Collation</seealso>
public virtual void CreateSpanCollation<T>(string name, T state, SpanDelegateCollation? comparison)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to have the special new name CreateSpanCollation rather than just an overload of CreateCollation?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That would make this a breaking change because now something like con.CreateCollation("name", null, (t, s1, s2) => s1.Length - s2.Length) is ambiguous now. I'm not sure the policy on those kinds of breaking changes so that's why I didn't use the same name.

{
if (string.IsNullOrEmpty(name))
{
throw new ArgumentNullException(nameof(name));
}
#if NET5_0_OR_GREATER
delegate_collation? collation = comparison != null ? (v, s1, s2) =>
{
Span<char> s1Span = stackalloc char[Encoding.UTF8.GetCharCount(s1)];
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks unsafe: stackalloc should never be used with an unbounded size. Unless I'm missing something, a huge string in the database could cause an arbitrary stack allocation and potentially cause a stack overflow, no?

Regardless, this copies/decodes the SQLite-provided ReadOnlySpan<byte> to ReadOnlySpan<char>; the point of this PR is to provide better performance, so that's not great. You should be able to simply reinterpret-cast using MemoryMarshal.Cast<byte, char>(), but need to check that the memory is aligned first. I'm assuming that SQLite-provided memory will generally be aligned (this needs to be checked); if that's the case, we can fall back to inefficient copying for unaligned memory (if that's a rare case).

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right about the size issue. I'm not sure if there's an existing pattern but it seems like we should avoid a stack allocation if the size is over some limit? Not sure what that would be though.

As for copying, that's required as a char is utf16 and a byte is only 8 bits, so we can't just reinterpret a byte as a char because they have different sizes.

That said there's an argument for this whole method to use ReadOnlySpan<byte> and not ReadOnlySpan<char>, or we could make both? I'm not sure, it's not very easy to handle utf8 strings in dotnet and if someone is going down to that level then they can just call sqlite3_create_collation themselves. Though it would be pretty easy for CreateCollation char to be implemented on top of the utf8 version.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you're right about the size issue. I'm not sure if there's an existing pattern but it seems like we should avoid a stack allocation if the size is over some limit? Not sure what that would be though.

I think that would be

Span<char> s2Span = stackalloc char[Encoding.UTF8.GetCharCount(s2)];
Encoding.UTF8.GetChars(s1, s1Span);
Encoding.UTF8.GetChars(s2, s2Span);
return comparison((T)v, s1Span, s2Span);
}
: null;
#else
delegate_collation? collation = comparison != null ? (v, s1, s2) =>
{
return comparison((T)v, Encoding.UTF8.GetChars(s1.ToArray()), Encoding.UTF8.GetChars(s2.ToArray()));
}
: null;
#endif
if (State == ConnectionState.Open)
{
var rc = sqlite3__create_collation_utf8(Handle, name, state, collation);
SqliteException.ThrowExceptionForRC(rc, Handle);
}

_collationsSpan ??= new Dictionary<string, (object?, delegate_collation?)>(StringComparer.OrdinalIgnoreCase);
_collationsSpan[name] = (state, collation);
}

/// <summary>
/// Represents a method signature for a custom collation delegate that compares two read-only spans of characters.
/// </summary>
/// <param name="state">An optional user-defined state object to be passed to the comparison function.</param>
/// <param name="s1">The first read-only span of characters to compare.</param>
/// <param name="s2">The second read-only span of characters to compare.</param>
/// <returns>
/// A signed integer indicating the relative order of the strings being compared:
/// Less than zero if <paramref name="s1"/> precedes <paramref name="s2"/>;
/// Zero if <paramref name="s1"/> is equal to <paramref name="s2"/>;
/// Greater than zero if <paramref name="s1"/> follows <paramref name="s2"/>.
/// </returns>
public delegate int SpanDelegateCollation(in object? state, in ReadOnlySpan<char> s1, in ReadOnlySpan<char> s2);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should state be generic (T) instead of object?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why have in modifiers here? ReadOnlySpan is very small, so passing it in by reference doesn't seem to make sense, and in fact will make things slower, no?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure, I was copying Func, but looking at methods in CompareInfo none of them use the in modifier either so I'll drop it here.


/// <summary>
/// Begins a transaction on the connection.
/// </summary>
Expand Down
Loading