From 5a077b4b494b1100789bd4c81b562c01260d51f7 Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Tue, 27 May 2025 13:43:15 -0400 Subject: [PATCH 01/16] Lay foundation for workspace symbols again --- src/DocumentStore.zig | 33 +++ src/Server.zig | 64 +++++- src/TrigramStore.zig | 502 ++++++++++++++++++++++++++++++++++++++++++ src/zls.zig | 1 + 4 files changed, 599 insertions(+), 1 deletion(-) create mode 100644 src/TrigramStore.zig diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 5830e4cb2..31198fdfa 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -14,6 +14,7 @@ const tracy = @import("tracy"); const translate_c = @import("translate_c.zig"); const DocumentScope = @import("DocumentScope.zig"); const DiagnosticsCollection = @import("DiagnosticsCollection.zig"); +const TrigramStore = @import("TrigramStore.zig"); const DocumentStore = @This(); @@ -25,6 +26,7 @@ thread_pool: *std.Thread.Pool, handles: std.StringArrayHashMapUnmanaged(*Handle) = .empty, build_files: if (supports_build_system) std.StringArrayHashMapUnmanaged(*BuildFile) else void = if (supports_build_system) .empty else {}, cimports: if (supports_build_system) std.AutoArrayHashMapUnmanaged(Hash, translate_c.Result) else void = if (supports_build_system) .empty else {}, +trigram_stores: std.StringArrayHashMapUnmanaged(TrigramStore) = .empty, diagnostics_collection: *DiagnosticsCollection, builds_in_progress: std.atomic.Value(i32) = .init(0), transport: ?*lsp.Transport = null, @@ -603,6 +605,12 @@ pub fn deinit(self: *DocumentStore) void { } self.handles.deinit(self.allocator); + for (self.trigram_stores.keys(), self.trigram_stores.values()) |uri, *trigram_store| { + self.allocator.free(uri); + trigram_store.deinit(self.allocator); + } + self.trigram_stores.deinit(self.allocator); + if (supports_build_system) { for (self.build_files.values()) |build_file| { build_file.deinit(self.allocator); @@ -691,6 +699,31 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { }; } +pub fn trigramIndexUri( + store: *DocumentStore, + uri: Uri, + encoding: offsets.Encoding, +) error{OutOfMemory}!void { + const gop = try store.trigram_stores.getOrPut(store.allocator, uri); + + if (gop.found_existing) { + return; + } + + errdefer { + store.allocator.free(gop.key_ptr.*); + store.trigram_stores.swapRemoveAt(gop.index); + } + + gop.key_ptr.* = try store.allocator.dupe(u8, uri); + gop.value_ptr.* = .empty; + + const file_contents = store.readFile(uri) orelse return; + defer store.allocator.free(file_contents); + + try gop.value_ptr.fill(store.allocator, file_contents, encoding); +} + /// **Thread safe** takes a shared lock /// This function does not protect against data races from modifying the BuildFile pub fn getBuildFile(self: *DocumentStore, uri: Uri) ?*BuildFile { diff --git a/src/Server.zig b/src/Server.zig index 497ccb90c..3e39eb242 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -34,6 +34,7 @@ const goto = @import("features/goto.zig"); const hover_handler = @import("features/hover.zig"); const selection_range = @import("features/selection_range.zig"); const diagnostics_gen = @import("features/diagnostics.zig"); +const TrigramStore = @import("TrigramStore.zig"); const BuildOnSave = diagnostics_gen.BuildOnSave; const BuildOnSaveSupport = build_runner_shared.BuildOnSaveSupport; @@ -558,7 +559,7 @@ fn initializeHandler(server: *Server, arena: std.mem.Allocator, request: types.I .documentRangeFormattingProvider = .{ .bool = false }, .foldingRangeProvider = .{ .bool = true }, .selectionRangeProvider = .{ .bool = true }, - .workspaceSymbolProvider = .{ .bool = false }, + .workspaceSymbolProvider = .{ .bool = true }, .workspace = .{ .workspaceFolders = .{ .supported = true, @@ -1506,6 +1507,64 @@ fn selectionRangeHandler(server: *Server, arena: std.mem.Allocator, request: typ return try selection_range.generateSelectionRanges(arena, handle, request.positions, server.offset_encoding); } +fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.WorkspaceSymbolParams) Error!lsp.ResultType("workspace/symbol") { + if (request.query.len < 3) return null; + + for (server.workspaces.items) |workspace| { + const path = Uri.parse(arena, workspace.uri) catch return error.InternalError; + var dir = std.fs.cwd().openDir(path, .{ .iterate = true }) catch return error.InternalError; + defer dir.close(); + + var walker = try dir.walk(arena); + defer walker.deinit(); + + while (walker.next() catch return error.InternalError) |entry| { + if (std.mem.eql(u8, std.fs.path.extension(entry.basename), ".zig")) { + const uri = Uri.fromPath( + arena, + std.fs.path.join(arena, &.{ path, entry.path }) catch return error.InternalError, + ) catch return error.InternalError; + + server.document_store.trigramIndexUri( + uri, + server.offset_encoding, + ) catch return error.InternalError; + } + } + } + + var symbols: std.ArrayListUnmanaged(types.WorkspaceSymbol) = .empty; + var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; + + for ( + server.document_store.trigram_stores.keys(), + server.document_store.trigram_stores.values(), + ) |uri, trigram_store| { + try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); + + const slice = trigram_store.declarations.slice(); + const names = slice.items(.name); + const ranges = slice.items(.range); + + for (declaration_buffer.items) |declaration| { + const name = names[@intFromEnum(declaration)]; + const range = ranges[@intFromEnum(declaration)]; + try symbols.append(arena, .{ + .name = trigram_store.names.items[name.start..name.end], + .kind = .Variable, + .location = .{ + .Location = .{ + .uri = uri, + .range = range, + }, + }, + }); + } + } + + return .{ .array_of_WorkspaceSymbol = symbols.items }; +} + const HandledRequestParams = union(enum) { initialize: types.InitializeParams, shutdown, @@ -1529,6 +1588,7 @@ const HandledRequestParams = union(enum) { @"textDocument/codeAction": types.CodeActionParams, @"textDocument/foldingRange": types.FoldingRangeParams, @"textDocument/selectionRange": types.SelectionRangeParams, + @"workspace/symbol": types.WorkspaceSymbolParams, other: lsp.MethodWithParams, }; @@ -1573,6 +1633,7 @@ fn isBlockingMessage(msg: Message) bool { .@"textDocument/codeAction", .@"textDocument/foldingRange", .@"textDocument/selectionRange", + .@"workspace/symbol", => return false, .other => return false, }, @@ -1752,6 +1813,7 @@ pub fn sendRequestSync(server: *Server, arena: std.mem.Allocator, comptime metho .@"textDocument/codeAction" => try server.codeActionHandler(arena, params), .@"textDocument/foldingRange" => try server.foldingRangeHandler(arena, params), .@"textDocument/selectionRange" => try server.selectionRangeHandler(arena, params), + .@"workspace/symbol" => try server.workspaceSymbolHandler(arena, params), .other => return null, }; } diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig new file mode 100644 index 000000000..2fa5128df --- /dev/null +++ b/src/TrigramStore.zig @@ -0,0 +1,502 @@ +//! Per-file trigram store. + +const std = @import("std"); +const ast = @import("ast.zig"); +const Ast = std.zig.Ast; +const builtin = @import("builtin"); +const assert = std.debug.assert; +const offsets = @import("offsets.zig"); +const URI = @import("uri.zig"); +const log = std.log.scoped(.store); + +pub const TrigramStore = @This(); + +pub const Trigram = [3]u8; + +pub const NameSlice = struct { start: u32, end: u32 }; + +pub const Declaration = struct { + pub const Index = enum(u32) { _ }; + + name: NameSlice, + range: offsets.Range, +}; + +pub const empty: TrigramStore = .{ + .has_filter = false, + .filter_buckets = .empty, + .trigram_to_declarations = .empty, + .declarations = .empty, + .names = .empty, +}; + +has_filter: bool, +filter_buckets: std.ArrayListUnmanaged(CuckooFilter.Bucket), +trigram_to_declarations: std.AutoArrayHashMapUnmanaged(Trigram, std.ArrayListUnmanaged(Declaration.Index)), +declarations: std.MultiArrayList(Declaration), +names: std.ArrayListUnmanaged(u8), + +pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { + store.filter_buckets.deinit(allocator); + for (store.trigram_to_declarations.values()) |*list| { + list.deinit(allocator); + } + store.trigram_to_declarations.deinit(allocator); + store.declarations.deinit(allocator); + store.names.deinit(allocator); + store.* = undefined; +} + +fn clearRetainingCapacity(store: *TrigramStore) void { + store.filter_buckets.clearRetainingCapacity(); + store.has_filter = false; + for (store.trigram_to_declarations.values()) |*list| { + list.clearRetainingCapacity(); + } + store.declarations.clearRetainingCapacity(); + store.names.clearRetainingCapacity(); +} + +pub fn fill( + store: *TrigramStore, + allocator: std.mem.Allocator, + source: [:0]const u8, + encoding: offsets.Encoding, +) error{OutOfMemory}!void { + store.clearRetainingCapacity(); + + var tree = try Ast.parse(allocator, source, .zig); + defer tree.deinit(allocator); + + const Context = struct { + allocator: std.mem.Allocator, + store: *TrigramStore, + in_function: bool, + encoding: offsets.Encoding, + + const Error = error{OutOfMemory}; + fn callback(context: *@This(), cb_tree: Ast, node: Ast.Node.Index) Error!void { + const old_in_function = context.in_function; + defer context.in_function = old_in_function; + + switch (cb_tree.nodeTag(node)) { + .fn_decl => { + if (!context.in_function) {} + + context.in_function = true; + }, + .root => unreachable, + .container_decl, + .container_decl_trailing, + .container_decl_arg, + .container_decl_arg_trailing, + .container_decl_two, + .container_decl_two_trailing, + .tagged_union, + .tagged_union_trailing, + .tagged_union_enum_tag, + .tagged_union_enum_tag_trailing, + .tagged_union_two, + .tagged_union_two_trailing, + => context.in_function = false, + + .global_var_decl, + .local_var_decl, + .simple_var_decl, + .aligned_var_decl, + => { + if (!context.in_function) { + const token = cb_tree.fullVarDecl(node).?.ast.mut_token + 1; + const name = cb_tree.tokenSlice(token); + + if (name.len >= 3) { + try context.store.appendDeclaration( + context.allocator, + name, + offsets.tokenToRange(cb_tree, token, context.encoding), + ); + } + } + }, + + else => {}, + } + + try ast.iterateChildren(cb_tree, node, context, Error, callback); + } + }; + + var context = Context{ + .allocator = allocator, + .store = store, + .in_function = false, + .encoding = encoding, + }; + try ast.iterateChildren(tree, .root, &context, Context.Error, Context.callback); + + try store.finalize(allocator); +} + +/// Caller must not submit name.len < 3. +fn appendDeclaration( + store: *TrigramStore, + allocator: std.mem.Allocator, + name: []const u8, + range: offsets.Range, +) error{OutOfMemory}!void { + assert(name.len >= 3); + + const name_slice: NameSlice = blk: { + const start = store.names.items.len; + try store.names.appendSlice(allocator, name); + break :blk .{ + .start = @intCast(start), + .end = @intCast(store.names.items.len), + }; + }; + + try store.declarations.append(allocator, .{ + .name = name_slice, + .range = range, + }); + + for (0..name.len - 2) |index| { + const trigram = name[index..][0..3].*; + const gop = try store.trigram_to_declarations.getOrPutValue(allocator, trigram, .empty); + try gop.value_ptr.append(allocator, @enumFromInt(store.declarations.len - 1)); + } +} + +/// Must be called before any queries are executed. +fn finalize(store: *TrigramStore, allocator: std.mem.Allocator) error{OutOfMemory}!void { + { + const lists = store.trigram_to_declarations.values(); + var index: usize = 0; + while (index < lists.len) { + if (lists[index].items.len == 0) { + lists[index].deinit(allocator); + store.trigram_to_declarations.swapRemoveAt(index); + } else { + index += 1; + } + } + } + + const trigrams = store.trigram_to_declarations.keys(); + + if (trigrams.len > 0) { + var prng = std.Random.DefaultPrng.init(0); + + const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable; + try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity); + store.filter_buckets.items.len = filter_capacity; + + const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; + filter.reset(); + store.has_filter = true; + + for (trigrams) |trigram| { + filter.append(prng.random(), trigram) catch |err| switch (err) { + error.EvictionFailed => { + // NOTE: This should generally be quite rare. + store.has_filter = false; + break; + }, + }; + } + } +} + +pub fn declarationsForQuery( + store: *const TrigramStore, + allocator: std.mem.Allocator, + query: []const u8, + declaration_buffer: *std.ArrayListUnmanaged(Declaration.Index), +) error{OutOfMemory}!void { + assert(query.len >= 3); + + const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; + + if (store.has_filter) { + for (0..query.len - 2) |index| { + const trigram = query[index..][0..3].*; + if (!filter.contains(trigram)) { + return; + } + } + } + + const first = (store.trigram_to_declarations.get(query[0..3].*) orelse { + declaration_buffer.clearRetainingCapacity(); + return; + }).items; + + declaration_buffer.clearRetainingCapacity(); + try declaration_buffer.ensureTotalCapacity(allocator, first.len * 2); + declaration_buffer.items.len = first.len * 2; + + var len = first.len; + @memcpy(declaration_buffer.items[0..len], first); + + for (0..query.len - 2) |index| { + const trigram = query[index..][0..3].*; + const old_len = len; + len = mergeIntersection( + (store.trigram_to_declarations.get(trigram[0..3].*) orelse return { + declaration_buffer.clearRetainingCapacity(); + return; + }).items, + declaration_buffer.items[0..len], + declaration_buffer.items[len..], + ); + @memcpy(declaration_buffer.items[0..len], declaration_buffer.items[old_len..][0..len]); + declaration_buffer.items.len = len * 2; + } + + declaration_buffer.items.len = declaration_buffer.items.len / 2; +} + +/// Asserts `@min(a.len, b.len) <= out.len`. +fn mergeIntersection( + a: []const Declaration.Index, + b: []const Declaration.Index, + out: []Declaration.Index, +) u32 { + std.debug.assert(@min(a.len, b.len) <= out.len); + + var out_idx: u32 = 0; + + var a_idx: u32 = 0; + var b_idx: u32 = 0; + + while (a_idx < a.len and b_idx < b.len) { + const a_val = a[a_idx]; + const b_val = b[b_idx]; + + if (a_val == b_val) { + out[out_idx] = a_val; + out_idx += 1; + a_idx += 1; + b_idx += 1; + } else if (@intFromEnum(a_val) < @intFromEnum(b_val)) { + a_idx += 1; + } else { + b_idx += 1; + } + } + + return out_idx; +} + +// TODO: The pow2 requirement is quite inefficient: explore ideas posted in +// https://databasearchitects.blogspot.com/2019/07/cuckoo-filters-with-arbitrarily-sized.html +// (rocksdb even-odd scheme from comments looks interesting). +pub const CuckooFilter = struct { + /// len must be a power of 2. + /// + /// ### Pathological case with buckets.len power of 2 + /// + /// - `BucketIndex(alias_0)` -> `bucket_1`, `BucketIndex(alias_0).alternate()` -> `bucket_2` + /// - `BucketIndex(alias_1)` -> `bucket_1`, `BucketIndex(alias_1).alternate()` -> `bucket_2` + /// + /// Our alternate mappings hold and `contains()` will not return false negatives. + /// + /// ### Pathological case with buckets.len NOT power of 2: + /// + /// - `BucketIndex(alias_0)` -> `bucket_1`, `BucketIndex(alias_0).alternate()` -> `bucket_3` + /// - `BucketIndex(alias_1)` -> `bucket_2`, `BucketIndex(alias_1).alternate()` -> `bucket_4` + /// + /// Our alternate mappings do not hold and `contains()` can return false negatives. This is not + /// acceptable as the entire point of an AMQ datastructure is the presence of false positives + /// but not false negatives. + buckets: []Bucket, + + pub const Fingerprint = enum(u8) { + none = std.math.maxInt(u8), + _, + + pub fn hash(fingerprint: Fingerprint) u32 { + return @truncate(std.hash.Murmur2_64.hash(&.{@intFromEnum(fingerprint)})); + } + }; + pub const Bucket = [4]Fingerprint; + pub const BucketIndex = enum(u32) { + _, + + pub fn alternate(index: BucketIndex, fingerprint: Fingerprint) BucketIndex { + assert(fingerprint != .none); + return @enumFromInt(@intFromEnum(index) ^ fingerprint.hash()); + } + }; + + pub const Triplet = struct { + fingerprint: Fingerprint, + index_1: BucketIndex, + index_2: BucketIndex, + + pub fn initFromTrigram(trigram: Trigram) Triplet { + const split: packed struct { + fingerprint: Fingerprint, + padding: u24, + index_1: BucketIndex, + } = @bitCast(std.hash.Murmur2_64.hash(&trigram)); + + const fingerprint: Fingerprint = if (split.fingerprint == .none) + @enumFromInt(0) + else + split.fingerprint; + + const triplet: Triplet = .{ + .fingerprint = fingerprint, + .index_1 = split.index_1, + .index_2 = split.index_1.alternate(fingerprint), + }; + assert(triplet.index_2.alternate(fingerprint) == triplet.index_1); + + return triplet; + } + }; + + pub fn reset(filter: CuckooFilter) void { + @memset(filter.buckets, [1]Fingerprint{.none} ** 4); + } + + pub fn capacityForCount(count: usize) error{Overflow}!usize { + const fill_rate = 0.95; + return try std.math.ceilPowerOfTwo(usize, @intFromFloat(@ceil(@as(f32, @floatFromInt(count)) / fill_rate))); + } + + // Use a hash (fnv) for randomness. + pub fn append(filter: CuckooFilter, random: std.Random, trigram: Trigram) error{EvictionFailed}!void { + const triplet: Triplet = .initFromTrigram(trigram); + + if (filter.appendToBucket(triplet.index_1, triplet.fingerprint) or + filter.appendToBucket(triplet.index_2, triplet.fingerprint)) + { + return; + } + + var fingerprint = triplet.fingerprint; + var index = if (random.boolean()) triplet.index_1 else triplet.index_2; + for (0..500) |_| { + fingerprint = filter.swapFromBucket(random, index, fingerprint); + index = index.alternate(fingerprint); + + if (filter.appendToBucket(index, fingerprint)) { + return; + } + } + + return error.EvictionFailed; + } + + fn bucketAt(filter: CuckooFilter, index: BucketIndex) *Bucket { + assert(std.math.isPowerOfTwo(filter.buckets.len)); + return &filter.buckets[@intFromEnum(index) & (filter.buckets.len - 1)]; + } + + fn appendToBucket(filter: CuckooFilter, index: BucketIndex, fingerprint: Fingerprint) bool { + assert(fingerprint != .none); + + const bucket = filter.bucketAt(index); + for (bucket) |*slot| { + if (slot.* == .none) { + slot.* = fingerprint; + return true; + } + } + + return false; + } + + fn swapFromBucket( + filter: CuckooFilter, + random: std.Random, + index: BucketIndex, + fingerprint: Fingerprint, + ) Fingerprint { + assert(fingerprint != .none); + + const target = &filter.bucketAt(index)[random.int(u2)]; + + const old_fingerprint = target.*; + assert(old_fingerprint != .none); + + target.* = fingerprint; + + return old_fingerprint; + } + + pub fn contains(filter: CuckooFilter, trigram: Trigram) bool { + const triplet: Triplet = .initFromTrigram(trigram); + + return filter.containsInBucket(triplet.index_1, triplet.fingerprint) or + filter.containsInBucket(triplet.index_2, triplet.fingerprint); + } + + fn containsInBucket(filter: CuckooFilter, index: BucketIndex, fingerprint: Fingerprint) bool { + assert(fingerprint != .none); + + const bucket = filter.bucketAt(index); + for (bucket) |*slot| { + if (slot.* == fingerprint) { + return true; + } + } + + return false; + } +}; + +// TODO: More extensive (different capacities) testing. +test CuckooFilter { + const allocator = std.testing.allocator; + + const element_count = 486; + const filter_size = comptime CuckooFilter.capacityForCount(element_count) catch unreachable; + try std.testing.expectEqual(512, filter_size); + + var entries: std.AutoArrayHashMapUnmanaged(Trigram, void) = .empty; + defer entries.deinit(allocator); + try entries.ensureTotalCapacity(allocator, element_count); + + var buckets: [filter_size]CuckooFilter.Bucket = undefined; + var filter = CuckooFilter{ .buckets = &buckets }; + var filter_prng = std.Random.DefaultPrng.init(42); + + for (0..2_500) |gen_prng_seed| { + entries.clearRetainingCapacity(); + filter.reset(); + + var gen_prng = std.Random.DefaultPrng.init(gen_prng_seed); + for (0..element_count) |_| { + const trigram: Trigram = @bitCast(gen_prng.random().int(u24)); + try entries.put(allocator, trigram, {}); + try filter.append(filter_prng.random(), trigram); + } + + // No false negatives + for (entries.keys()) |trigram| { + try std.testing.expect(filter.contains(trigram)); + } + + // Reasonable false positive rate + const fpr_count = 2_500; + var false_positives: usize = 0; + var negative_prng = std.Random.DefaultPrng.init(~gen_prng_seed); + for (0..fpr_count) |_| { + var trigram: Trigram = @bitCast(negative_prng.random().int(u24)); + while (entries.contains(trigram)) { + trigram = @bitCast(negative_prng.random().int(u24)); + } + + false_positives += @intFromBool(filter.contains(trigram)); + } + + const fpr = @as(f32, @floatFromInt(false_positives)) / fpr_count; + std.testing.expect(fpr < 0.035) catch |err| { + std.log.err("fpr: {d}%", .{fpr * 100}); + return err; + }; + } +} diff --git a/src/zls.zig b/src/zls.zig index 4ecc96665..650ff66b8 100644 --- a/src/zls.zig +++ b/src/zls.zig @@ -18,6 +18,7 @@ pub const Server = @import("Server.zig"); pub const snippets = @import("snippets.zig"); pub const testing = @import("testing.zig"); pub const translate_c = @import("translate_c.zig"); +pub const TrigramStore = @import("TrigramStore.zig"); pub const URI = @import("uri.zig"); pub const code_actions = @import("features/code_actions.zig"); From a55254260754febc7038be339092f24b618c3380 Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Sat, 7 Jun 2025 19:38:58 -0400 Subject: [PATCH 02/16] Open imported documents preemptively Co-authored-by: Techatrix --- src/DocumentStore.zig | 49 +++++++++++++++++++++++++++++++++---------- 1 file changed, 38 insertions(+), 11 deletions(-) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 31198fdfa..3dbac4a49 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -35,6 +35,8 @@ lsp_capabilities: struct { supports_semantic_tokens_refresh: bool = false, supports_inlay_hints_refresh: bool = false, } = .{}, +currently_loading_uris: std.StringArrayHashMapUnmanaged(void) = .empty, +wait_for_currently_loading_uri: std.Thread.Condition = .{}, pub const Uri = []const u8; @@ -611,6 +613,9 @@ pub fn deinit(self: *DocumentStore) void { } self.trigram_stores.deinit(self.allocator); + std.debug.assert(self.currently_loading_uris.count() == 0); + self.currently_loading_uris.deinit(self.allocator); + if (supports_build_system) { for (self.build_files.values()) |build_file| { build_file.deinit(self.allocator); @@ -691,7 +696,34 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { const tracy_zone = tracy.trace(@src()); defer tracy_zone.end(); - if (self.getHandle(uri)) |handle| return handle; + { + self.lock.lock(); + defer self.lock.unlock(); + + while (true) { + if (self.handles.get(uri)) |handle| return handle; + + const gop = self.currently_loading_uris.getOrPutValue( + self.allocator, + uri, + {}, + ) catch return null; + + if (!gop.found_existing) { + break; + } + + var mutex: std.Thread.Mutex = .{}; + + mutex.lock(); + defer mutex.unlock(); + + self.lock.unlock(); + self.wait_for_currently_loading_uri.wait(&mutex); + self.lock.lock(); + } + } + const file_contents = self.readFile(uri) orelse return null; return self.createAndStoreDocument(uri, file_contents, false) catch |err| { log.err("failed to store document '{s}': {}", .{ uri, err }); @@ -1378,17 +1410,12 @@ fn createAndStoreDocument( errdefer if (!gop.found_existing) std.debug.assert(self.handles.swapRemove(uri)); if (gop.found_existing) { - if (lsp_synced) { - new_handle.impl.associated_build_file = gop.value_ptr.*.impl.associated_build_file; - gop.value_ptr.*.impl.associated_build_file = .init; + new_handle.impl.associated_build_file = gop.value_ptr.*.impl.associated_build_file; + gop.value_ptr.*.impl.associated_build_file = .init; - new_handle.uri = gop.key_ptr.*; - gop.value_ptr.*.deinit(); - gop.value_ptr.*.* = new_handle; - } else { - // TODO prevent concurrent `createAndStoreDocument` invocations from racing each other - new_handle.deinit(); - } + new_handle.uri = gop.key_ptr.*; + gop.value_ptr.*.deinit(); + gop.value_ptr.*.* = new_handle; } else { gop.key_ptr.* = try self.allocator.dupe(u8, uri); errdefer self.allocator.free(gop.key_ptr.*); From 216c822c542bfc622234c876c066646a6e84db7e Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Thu, 12 Jun 2025 19:15:16 -0400 Subject: [PATCH 03/16] Load all documents in workspace on start Co-authored-by: Techatrix --- src/DocumentStore.zig | 72 +++++++++++++++++++++++++++++++++++++++++++ src/Server.zig | 8 +++++ 2 files changed, 80 insertions(+) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 3dbac4a49..fe0df1756 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -724,6 +724,11 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { } } + defer { + std.debug.assert(self.currently_loading_uris.swapRemove(uri)); + self.wait_for_currently_loading_uri.broadcast(); + } + const file_contents = self.readFile(uri) orelse return null; return self.createAndStoreDocument(uri, file_contents, false) catch |err| { log.err("failed to store document '{s}': {}", .{ uri, err }); @@ -1430,6 +1435,73 @@ fn createAndStoreDocument( return gop.value_ptr.*; } +pub fn loadDirectoryRecursive(store: *DocumentStore, directory_uri: Uri) !usize { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + + const workspace_path = try URI.toFsPath(store.allocator, directory_uri); + defer store.allocator.free(workspace_path); + + var workspace_dir = try std.fs.openDirAbsolute(workspace_path, .{ .iterate = true }); + defer workspace_dir.close(); + + var walker = try workspace_dir.walk(store.allocator); + defer walker.deinit(); + + var not_currently_loading_uris: std.ArrayListUnmanaged(Uri) = .empty; + defer { + for (not_currently_loading_uris.items) |uri| store.allocator.free(uri); + not_currently_loading_uris.deinit(store.allocator); + } + + var file_count: usize = 0; + + { + while (try walker.next()) |entry| { + if (entry.kind == .directory) continue; + if (std.mem.indexOf(u8, entry.path, std.fs.path.sep_str ++ ".zig-cache" ++ std.fs.path.sep_str) != null) continue; + if (std.mem.startsWith(u8, entry.path, ".zig-cache" ++ std.fs.path.sep_str)) continue; + if (!std.mem.eql(u8, std.fs.path.extension(entry.basename), ".zig")) continue; + + file_count += 1; + + const path = try std.fs.path.join(store.allocator, &.{ workspace_path, entry.path }); + defer store.allocator.free(path); + + try not_currently_loading_uris.ensureUnusedCapacity(store.allocator, 1); + + const uri = try URI.fromPath(store.allocator, path); + errdefer comptime unreachable; + + store.lock.lockShared(); + defer store.lock.unlockShared(); + + if (!store.handles.contains(uri) and + !store.currently_loading_uris.contains(uri)) + { + not_currently_loading_uris.appendAssumeCapacity(uri); + } + } + } + + errdefer comptime unreachable; + + const S = struct { + fn getOrLoadHandleVoid(s: *DocumentStore, uri: Uri) void { + _ = s.getOrLoadHandle(uri); + s.allocator.free(uri); + } + }; + + var wait_group: std.Thread.WaitGroup = .{}; + while (not_currently_loading_uris.pop()) |uri| { + store.thread_pool.spawnWg(&wait_group, S.getOrLoadHandleVoid, .{ store, uri }); + } + store.thread_pool.waitAndWork(&wait_group); + + return file_count; +} + pub const CImportHandle = struct { /// the `@cImport` node node: Ast.Node.Index, diff --git a/src/Server.zig b/src/Server.zig index 3e39eb242..6dc08fb64 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -864,6 +864,14 @@ fn addWorkspace(server: *Server, uri: types.URI) error{OutOfMemory}!void { .restart = false, }); } + + server.document_store.loadDirectoryRecursive(uri) catch |err| switch (err) { + error.UnsupportedScheme => return, + else => { + log.err("failed to load files in workspace '{s}': {}", .{ uri, err }); + return; + }, + }; } fn removeWorkspace(server: *Server, uri: types.URI) void { From f59dfd55260d6c0e8c275cedbe65b9d68cdaeb56 Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Thu, 12 Jun 2025 20:09:34 -0400 Subject: [PATCH 04/16] Trigram indexing works and it's fast Co-authored-by: Techatrix --- src/DocumentStore.zig | 32 ++------- src/Server.zig | 33 ++------- src/TrigramStore.zig | 157 ++++++++++++++++++------------------------ 3 files changed, 81 insertions(+), 141 deletions(-) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index fe0df1756..965802ce7 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -182,6 +182,7 @@ pub const BuildFile = struct { pub const Handle = struct { uri: Uri, tree: Ast, + trigram_store: TrigramStore, /// Contains one entry for every cimport in the document cimports: std.MultiArrayList(CImportHandle), @@ -263,10 +264,14 @@ pub const Handle = struct { var cimports = try collectCIncludes(allocator, tree); errdefer cimports.deinit(allocator); + var trigram_store: TrigramStore = try .init(allocator, tree, .@"utf-16"); + errdefer trigram_store.deinit(); + return .{ .uri = uri, .tree = tree, .cimports = cimports, + .trigram_store = trigram_store, .impl = .{ .status = .init(@bitCast(Status{ .lsp_synced = lsp_synced, @@ -301,6 +306,8 @@ pub const Handle = struct { for (self.cimports.items(.source)) |source| allocator.free(source); self.cimports.deinit(allocator); + self.trigram_store.deinit(allocator); + switch (self.impl.associated_build_file) { .init, .none, .resolved => {}, .unresolved => |*payload| payload.deinit(allocator), @@ -736,31 +743,6 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { }; } -pub fn trigramIndexUri( - store: *DocumentStore, - uri: Uri, - encoding: offsets.Encoding, -) error{OutOfMemory}!void { - const gop = try store.trigram_stores.getOrPut(store.allocator, uri); - - if (gop.found_existing) { - return; - } - - errdefer { - store.allocator.free(gop.key_ptr.*); - store.trigram_stores.swapRemoveAt(gop.index); - } - - gop.key_ptr.* = try store.allocator.dupe(u8, uri); - gop.value_ptr.* = .empty; - - const file_contents = store.readFile(uri) orelse return; - defer store.allocator.free(file_contents); - - try gop.value_ptr.fill(store.allocator, file_contents, encoding); -} - /// **Thread safe** takes a shared lock /// This function does not protect against data races from modifying the BuildFile pub fn getBuildFile(self: *DocumentStore, uri: Uri) ?*BuildFile { diff --git a/src/Server.zig b/src/Server.zig index 6dc08fb64..b1f226cf8 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -1518,36 +1518,17 @@ fn selectionRangeHandler(server: *Server, arena: std.mem.Allocator, request: typ fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.WorkspaceSymbolParams) Error!lsp.ResultType("workspace/symbol") { if (request.query.len < 3) return null; - for (server.workspaces.items) |workspace| { - const path = Uri.parse(arena, workspace.uri) catch return error.InternalError; - var dir = std.fs.cwd().openDir(path, .{ .iterate = true }) catch return error.InternalError; - defer dir.close(); - - var walker = try dir.walk(arena); - defer walker.deinit(); - - while (walker.next() catch return error.InternalError) |entry| { - if (std.mem.eql(u8, std.fs.path.extension(entry.basename), ".zig")) { - const uri = Uri.fromPath( - arena, - std.fs.path.join(arena, &.{ path, entry.path }) catch return error.InternalError, - ) catch return error.InternalError; - - server.document_store.trigramIndexUri( - uri, - server.offset_encoding, - ) catch return error.InternalError; - } - } - } + // TODO: take this and get copy of handle ptrs + server.document_store.lock.lock(); + defer server.document_store.lock.unlock(); var symbols: std.ArrayListUnmanaged(types.WorkspaceSymbol) = .empty; var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; - for ( - server.document_store.trigram_stores.keys(), - server.document_store.trigram_stores.values(), - ) |uri, trigram_store| { + for (server.document_store.handles.keys(), server.document_store.handles.values()) |uri, handle| { + const trigram_store = &handle.trigram_store; + + declaration_buffer.clearRetainingCapacity(); try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); const slice = trigram_store.declarations.slice(); diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index 2fa5128df..e233d2ca6 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -22,51 +22,25 @@ pub const Declaration = struct { range: offsets.Range, }; -pub const empty: TrigramStore = .{ - .has_filter = false, - .filter_buckets = .empty, - .trigram_to_declarations = .empty, - .declarations = .empty, - .names = .empty, -}; - has_filter: bool, filter_buckets: std.ArrayListUnmanaged(CuckooFilter.Bucket), trigram_to_declarations: std.AutoArrayHashMapUnmanaged(Trigram, std.ArrayListUnmanaged(Declaration.Index)), declarations: std.MultiArrayList(Declaration), names: std.ArrayListUnmanaged(u8), -pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { - store.filter_buckets.deinit(allocator); - for (store.trigram_to_declarations.values()) |*list| { - list.deinit(allocator); - } - store.trigram_to_declarations.deinit(allocator); - store.declarations.deinit(allocator); - store.names.deinit(allocator); - store.* = undefined; -} - -fn clearRetainingCapacity(store: *TrigramStore) void { - store.filter_buckets.clearRetainingCapacity(); - store.has_filter = false; - for (store.trigram_to_declarations.values()) |*list| { - list.clearRetainingCapacity(); - } - store.declarations.clearRetainingCapacity(); - store.names.clearRetainingCapacity(); -} - -pub fn fill( - store: *TrigramStore, +pub fn init( allocator: std.mem.Allocator, - source: [:0]const u8, + tree: Ast, encoding: offsets.Encoding, -) error{OutOfMemory}!void { - store.clearRetainingCapacity(); - - var tree = try Ast.parse(allocator, source, .zig); - defer tree.deinit(allocator); +) error{OutOfMemory}!TrigramStore { + var store: TrigramStore = .{ + .has_filter = false, + .filter_buckets = .empty, + .trigram_to_declarations = .empty, + .declarations = .empty, + .names = .empty, + }; + errdefer store.deinit(allocator); const Context = struct { allocator: std.mem.Allocator, @@ -126,15 +100,61 @@ pub fn fill( } }; - var context = Context{ + var context: Context = .{ .allocator = allocator, - .store = store, + .store = &store, .in_function = false, .encoding = encoding, }; try ast.iterateChildren(tree, .root, &context, Context.Error, Context.callback); - try store.finalize(allocator); + const lists = store.trigram_to_declarations.values(); + var index: usize = 0; + while (index < lists.len) { + if (lists[index].items.len == 0) { + lists[index].deinit(allocator); + store.trigram_to_declarations.swapRemoveAt(index); + } else { + index += 1; + } + } + + const trigrams = store.trigram_to_declarations.keys(); + + if (trigrams.len > 0) { + var prng = std.Random.DefaultPrng.init(0); + + const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable; + try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity); + store.filter_buckets.items.len = filter_capacity; + + const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; + filter.reset(); + store.has_filter = true; + + for (trigrams) |trigram| { + filter.append(prng.random(), trigram) catch |err| switch (err) { + error.EvictionFailed => { + // NOTE: This should generally be quite rare. + store.has_filter = false; + break; + }, + }; + } + } + + return store; +} + +pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { + store.filter_buckets.deinit(allocator); + for (store.trigram_to_declarations.values()) |*list| { + list.deinit(allocator); + } + store.trigram_to_declarations.deinit(allocator); + store.declarations.deinit(allocator); + store.names.deinit(allocator); + store.* = undefined; } /// Caller must not submit name.len < 3. @@ -167,46 +187,7 @@ fn appendDeclaration( } } -/// Must be called before any queries are executed. -fn finalize(store: *TrigramStore, allocator: std.mem.Allocator) error{OutOfMemory}!void { - { - const lists = store.trigram_to_declarations.values(); - var index: usize = 0; - while (index < lists.len) { - if (lists[index].items.len == 0) { - lists[index].deinit(allocator); - store.trigram_to_declarations.swapRemoveAt(index); - } else { - index += 1; - } - } - } - - const trigrams = store.trigram_to_declarations.keys(); - - if (trigrams.len > 0) { - var prng = std.Random.DefaultPrng.init(0); - - const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable; - try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity); - store.filter_buckets.items.len = filter_capacity; - - const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; - filter.reset(); - store.has_filter = true; - - for (trigrams) |trigram| { - filter.append(prng.random(), trigram) catch |err| switch (err) { - error.EvictionFailed => { - // NOTE: This should generally be quite rare. - store.has_filter = false; - break; - }, - }; - } - } -} - +/// Asserts query.len >= 3. Asserts declaration_buffer.items.len == 0. pub fn declarationsForQuery( store: *const TrigramStore, allocator: std.mem.Allocator, @@ -214,6 +195,7 @@ pub fn declarationsForQuery( declaration_buffer: *std.ArrayListUnmanaged(Declaration.Index), ) error{OutOfMemory}!void { assert(query.len >= 3); + assert(declaration_buffer.items.len == 0); const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; @@ -226,14 +208,9 @@ pub fn declarationsForQuery( } } - const first = (store.trigram_to_declarations.get(query[0..3].*) orelse { - declaration_buffer.clearRetainingCapacity(); - return; - }).items; + const first = (store.trigram_to_declarations.get(query[0..3].*) orelse return).items; - declaration_buffer.clearRetainingCapacity(); - try declaration_buffer.ensureTotalCapacity(allocator, first.len * 2); - declaration_buffer.items.len = first.len * 2; + try declaration_buffer.resize(allocator, first.len * 2); var len = first.len; @memcpy(declaration_buffer.items[0..len], first); @@ -242,7 +219,7 @@ pub fn declarationsForQuery( const trigram = query[index..][0..3].*; const old_len = len; len = mergeIntersection( - (store.trigram_to_declarations.get(trigram[0..3].*) orelse return { + (store.trigram_to_declarations.get(trigram[0..3].*) orelse { declaration_buffer.clearRetainingCapacity(); return; }).items, @@ -250,10 +227,10 @@ pub fn declarationsForQuery( declaration_buffer.items[len..], ); @memcpy(declaration_buffer.items[0..len], declaration_buffer.items[old_len..][0..len]); - declaration_buffer.items.len = len * 2; + declaration_buffer.shrinkRetainingCapacity(len * 2); } - declaration_buffer.items.len = declaration_buffer.items.len / 2; + declaration_buffer.shrinkRetainingCapacity(declaration_buffer.items.len / 2); } /// Asserts `@min(a.len, b.len) <= out.len`. From 6fe46a9d40982c6e4f8da2e4e6357931cc79b3f8 Mon Sep 17 00:00:00 2001 From: Techatrix Date: Thu, 26 Jun 2025 23:17:17 +0200 Subject: [PATCH 05/16] lazy and parallel trigram store creation --- src/DocumentStore.zig | 85 +++++++++++++++++++++++++++++++++++++++---- src/Server.zig | 14 +++---- 2 files changed, 84 insertions(+), 15 deletions(-) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 965802ce7..8c3305a11 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -182,7 +182,6 @@ pub const BuildFile = struct { pub const Handle = struct { uri: Uri, tree: Ast, - trigram_store: TrigramStore, /// Contains one entry for every cimport in the document cimports: std.MultiArrayList(CImportHandle), @@ -198,6 +197,7 @@ pub const Handle = struct { lazy_condition: std.Thread.Condition = .{}, import_uris: ?[]Uri = null, + trigram_store: TrigramStore = undefined, document_scope: DocumentScope = undefined, zzoiir: ZirOrZoir = undefined, @@ -236,6 +236,11 @@ pub const Handle = struct { /// `false` indicates the document only exists because it is a dependency of another document /// or has been closed with `textDocument/didClose`. lsp_synced: bool = false, + /// true if a thread has acquired the permission to compute the `TrigramStore` + /// all other threads will wait until the given thread has computed the `TrigramStore` before reading it. + has_trigram_store_lock: bool = false, + /// true if `handle.impl.trigram_store` has been set + has_trigram_store: bool = false, /// true if a thread has acquired the permission to compute the `DocumentScope` /// all other threads will wait until the given thread has computed the `DocumentScope` before reading it. has_document_scope_lock: bool = false, @@ -246,7 +251,7 @@ pub const Handle = struct { /// all other threads will wait until the given thread has computed the `std.zig.Zir` or `std.zig.Zoir` before reading it. /// true if `handle.impl.zir` has been set has_zzoiir: bool = false, - _: u27 = 0, + _: u25 = 0, }; /// Takes ownership of `text` on success. @@ -264,14 +269,10 @@ pub const Handle = struct { var cimports = try collectCIncludes(allocator, tree); errdefer cimports.deinit(allocator); - var trigram_store: TrigramStore = try .init(allocator, tree, .@"utf-16"); - errdefer trigram_store.deinit(); - return .{ .uri = uri, .tree = tree, .cimports = cimports, - .trigram_store = trigram_store, .impl = .{ .status = .init(@bitCast(Status{ .lsp_synced = lsp_synced, @@ -295,6 +296,7 @@ pub const Handle = struct { .zon => self.impl.zzoiir.zon.deinit(allocator), }; if (status.has_document_scope) self.impl.document_scope.deinit(allocator); + if (status.has_trigram_store) self.impl.trigram_store.deinit(allocator); allocator.free(self.tree.source); self.tree.deinit(allocator); @@ -306,8 +308,6 @@ pub const Handle = struct { for (self.cimports.items(.source)) |source| allocator.free(source); self.cimports.deinit(allocator); - self.trigram_store.deinit(allocator); - switch (self.impl.associated_build_file) { .init, .none, .resolved => {}, .unresolved => |*payload| payload.deinit(allocator), @@ -377,6 +377,23 @@ pub const Handle = struct { return self.impl.document_scope; } + pub fn getTrigramStore(self: *Handle) error{OutOfMemory}!TrigramStore { + if (self.getStatus().has_trigram_store) return self.impl.trigram_store; + return try self.getLazy(TrigramStore, "trigram_store", struct { + fn create(handle: *Handle, allocator: std.mem.Allocator) error{OutOfMemory}!TrigramStore { + return try .init(allocator, handle.tree, .@"utf-16"); // TODO + } + }); + } + + /// Asserts that `getTrigramStore` has been previously called on `handle`. + pub fn getTrigramStoreCached(self: *Handle) TrigramStore { + if (builtin.mode == .Debug) { + std.debug.assert(self.getStatus().has_trigram_store); + } + return self.impl.trigram_store; + } + pub fn getZir(self: *Handle) error{OutOfMemory}!std.zig.Zir { std.debug.assert(self.tree.mode == .zig); const zir_or_zoir = try self.getZirOrZoir(); @@ -977,6 +994,9 @@ fn notifyBuildEnd(self: *DocumentStore, status: EndStatus) void { } fn invalidateBuildFileWorker(self: *DocumentStore, build_file: *BuildFile) void { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + { build_file.impl.mutex.lock(); defer build_file.impl.mutex.unlock(); @@ -1057,6 +1077,52 @@ fn invalidateBuildFileWorker(self: *DocumentStore, build_file: *BuildFile) void } } +pub fn loadTrigramStores(store: *DocumentStore) error{OutOfMemory}![]*DocumentStore.Handle { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + + var handles: std.ArrayListUnmanaged(*DocumentStore.Handle) = try .initCapacity(store.allocator, store.handles.count()); + errdefer handles.deinit(store.allocator); + + for (store.handles.values()) |handle| { + // TODO check if the handle is in a workspace folder instead + if (isInStd(handle.uri)) continue; + handles.appendAssumeCapacity(handle); + } + + if (builtin.single_threaded) { + for (handles.items) |handle| { + _ = try handle.getTrigramStore(); + } + return try handles.toOwnedSlice(store.allocator); + } + + const loadTrigramStore = struct { + fn loadTrigramStore( + handle: *Handle, + did_out_of_memory: *std.atomic.Value(bool), + ) void { + _ = handle.getTrigramStore() catch { + did_out_of_memory.store(true, .release); + }; + } + }.loadTrigramStore; + + var wait_group: std.Thread.WaitGroup = .{}; + var did_out_of_memory: std.atomic.Value(bool) = .init(false); + + for (handles.items) |handle| { + const status = handle.getStatus(); + if (status.has_trigram_store) continue; + store.thread_pool.spawnWg(&wait_group, loadTrigramStore, .{ handle, &did_out_of_memory }); + } + store.thread_pool.waitAndWork(&wait_group); + + if (did_out_of_memory.load(.acquire)) return error.OutOfMemory; + + return try handles.toOwnedSlice(store.allocator); +} + pub fn isBuildFile(uri: Uri) bool { return std.mem.endsWith(u8, uri, "/build.zig"); } @@ -1239,6 +1305,9 @@ fn buildDotZigExists(dir_path: []const u8) bool { /// See `Handle.getAssociatedBuildFileUri`. /// Caller owns returned memory. fn collectPotentialBuildFiles(self: *DocumentStore, uri: Uri) ![]*BuildFile { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + if (isInStd(uri)) return &.{}; var potential_build_files: std.ArrayList(*BuildFile) = .empty; diff --git a/src/Server.zig b/src/Server.zig index b1f226cf8..754890db1 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -1518,15 +1518,14 @@ fn selectionRangeHandler(server: *Server, arena: std.mem.Allocator, request: typ fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.WorkspaceSymbolParams) Error!lsp.ResultType("workspace/symbol") { if (request.query.len < 3) return null; - // TODO: take this and get copy of handle ptrs - server.document_store.lock.lock(); - defer server.document_store.lock.unlock(); + const handles = try server.document_store.loadTrigramStores(); + defer server.document_store.allocator.free(handles); var symbols: std.ArrayListUnmanaged(types.WorkspaceSymbol) = .empty; var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; - for (server.document_store.handles.keys(), server.document_store.handles.values()) |uri, handle| { - const trigram_store = &handle.trigram_store; + for (handles) |handle| { + const trigram_store = handle.getTrigramStoreCached(); declaration_buffer.clearRetainingCapacity(); try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); @@ -1535,15 +1534,16 @@ fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: ty const names = slice.items(.name); const ranges = slice.items(.range); + try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); for (declaration_buffer.items) |declaration| { const name = names[@intFromEnum(declaration)]; const range = ranges[@intFromEnum(declaration)]; - try symbols.append(arena, .{ + symbols.appendAssumeCapacity(.{ .name = trigram_store.names.items[name.start..name.end], .kind = .Variable, .location = .{ .Location = .{ - .uri = uri, + .uri = handle.uri, .range = range, }, }, From b71353a3174ba2cdb0ffc862f284e6194aa7324d Mon Sep 17 00:00:00 2001 From: Techatrix Date: Thu, 26 Jun 2025 23:40:42 +0200 Subject: [PATCH 06/16] move Loc to Position conversion out of TrigramStore --- src/DocumentStore.zig | 2 +- src/Server.zig | 21 ++++++++++++++++++--- src/TrigramStore.zig | 14 +++++++------- 3 files changed, 26 insertions(+), 11 deletions(-) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 8c3305a11..20c09846f 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -381,7 +381,7 @@ pub const Handle = struct { if (self.getStatus().has_trigram_store) return self.impl.trigram_store; return try self.getLazy(TrigramStore, "trigram_store", struct { fn create(handle: *Handle, allocator: std.mem.Allocator) error{OutOfMemory}!TrigramStore { - return try .init(allocator, handle.tree, .@"utf-16"); // TODO + return try .init(allocator, handle.tree); } }); } diff --git a/src/Server.zig b/src/Server.zig index 754890db1..0131e04d9 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -1523,6 +1523,8 @@ fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: ty var symbols: std.ArrayListUnmanaged(types.WorkspaceSymbol) = .empty; var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; + var loc_buffer: std.ArrayListUnmanaged(offsets.Loc) = .empty; + var range_buffer: std.ArrayListUnmanaged(offsets.Range) = .empty; for (handles) |handle| { const trigram_store = handle.getTrigramStoreCached(); @@ -1532,12 +1534,25 @@ fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: ty const slice = trigram_store.declarations.slice(); const names = slice.items(.name); - const ranges = slice.items(.range); + const locs = slice.items(.loc); + + { + // Convert `offsets.Loc` to `offsets.Range` + + try loc_buffer.resize(arena, declaration_buffer.items.len); + try range_buffer.resize(arena, declaration_buffer.items.len); + + for (declaration_buffer.items, loc_buffer.items) |declaration, *loc| { + const small_loc = locs[@intFromEnum(declaration)]; + loc.* = .{ .start = small_loc.start, .end = small_loc.end }; + } + + try offsets.multiple.locToRange(arena, handle.tree.source, loc_buffer.items, range_buffer.items, server.offset_encoding); + } try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); - for (declaration_buffer.items) |declaration| { + for (declaration_buffer.items, range_buffer.items) |declaration, range| { const name = names[@intFromEnum(declaration)]; - const range = ranges[@intFromEnum(declaration)]; symbols.appendAssumeCapacity(.{ .name = trigram_store.names.items[name.start..name.end], .kind = .Variable, diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index e233d2ca6..a70cb895c 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -14,12 +14,13 @@ pub const TrigramStore = @This(); pub const Trigram = [3]u8; pub const NameSlice = struct { start: u32, end: u32 }; +pub const Loc = struct { start: u32, end: u32 }; pub const Declaration = struct { pub const Index = enum(u32) { _ }; name: NameSlice, - range: offsets.Range, + loc: Loc, }; has_filter: bool, @@ -31,7 +32,6 @@ names: std.ArrayListUnmanaged(u8), pub fn init( allocator: std.mem.Allocator, tree: Ast, - encoding: offsets.Encoding, ) error{OutOfMemory}!TrigramStore { var store: TrigramStore = .{ .has_filter = false, @@ -46,7 +46,6 @@ pub fn init( allocator: std.mem.Allocator, store: *TrigramStore, in_function: bool, - encoding: offsets.Encoding, const Error = error{OutOfMemory}; fn callback(context: *@This(), cb_tree: Ast, node: Ast.Node.Index) Error!void { @@ -84,10 +83,12 @@ pub fn init( const name = cb_tree.tokenSlice(token); if (name.len >= 3) { + const loc = offsets.tokenToLoc(cb_tree, token); + try context.store.appendDeclaration( context.allocator, name, - offsets.tokenToRange(cb_tree, token, context.encoding), + .{ .start = @intCast(loc.start), .end = @intCast(loc.end) }, ); } } @@ -104,7 +105,6 @@ pub fn init( .allocator = allocator, .store = &store, .in_function = false, - .encoding = encoding, }; try ast.iterateChildren(tree, .root, &context, Context.Error, Context.callback); @@ -162,7 +162,7 @@ fn appendDeclaration( store: *TrigramStore, allocator: std.mem.Allocator, name: []const u8, - range: offsets.Range, + loc: Loc, ) error{OutOfMemory}!void { assert(name.len >= 3); @@ -177,7 +177,7 @@ fn appendDeclaration( try store.declarations.append(allocator, .{ .name = name_slice, - .range = range, + .loc = loc, }); for (0..name.len - 2) |index| { From 6ccab783c3992922553ecaf511a540f128c9036f Mon Sep 17 00:00:00 2001 From: Techatrix Date: Sun, 29 Jun 2025 22:55:16 +0200 Subject: [PATCH 07/16] some document store refactoring No more log entry per opened document. --- src/Server.zig | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/Server.zig b/src/Server.zig index 0131e04d9..79d94ff87 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -851,7 +851,6 @@ const Workspace = struct { fn addWorkspace(server: *Server, uri: types.URI) error{OutOfMemory}!void { try server.workspaces.ensureUnusedCapacity(server.allocator, 1); server.workspaces.appendAssumeCapacity(try Workspace.init(server, uri)); - log.info("added Workspace Folder: {s}", .{uri}); if (BuildOnSaveSupport.isSupportedComptime() and // Don't initialize build on save until initialization finished. @@ -865,13 +864,15 @@ fn addWorkspace(server: *Server, uri: types.URI) error{OutOfMemory}!void { }); } - server.document_store.loadDirectoryRecursive(uri) catch |err| switch (err) { + const file_count = server.document_store.loadDirectoryRecursive(uri) catch |err| switch (err) { error.UnsupportedScheme => return, else => { log.err("failed to load files in workspace '{s}': {}", .{ uri, err }); return; }, }; + + log.info("added Workspace Folder: {s} ({d} files)", .{ uri, file_count }); } fn removeWorkspace(server: *Server, uri: types.URI) void { From 73169abf3e08fd777b41c4104cf1026003db24a0 Mon Sep 17 00:00:00 2001 From: Techatrix Date: Wed, 2 Jul 2025 20:24:05 +0200 Subject: [PATCH 08/16] update tests --- src/TrigramStore.zig | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index a70cb895c..1e5524dc6 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -3,11 +3,8 @@ const std = @import("std"); const ast = @import("ast.zig"); const Ast = std.zig.Ast; -const builtin = @import("builtin"); const assert = std.debug.assert; const offsets = @import("offsets.zig"); -const URI = @import("uri.zig"); -const log = std.log.scoped(.store); pub const TrigramStore = @This(); @@ -239,7 +236,7 @@ fn mergeIntersection( b: []const Declaration.Index, out: []Declaration.Index, ) u32 { - std.debug.assert(@min(a.len, b.len) <= out.len); + assert(@min(a.len, b.len) <= out.len); var out_idx: u32 = 0; @@ -438,17 +435,17 @@ test CuckooFilter { try entries.ensureTotalCapacity(allocator, element_count); var buckets: [filter_size]CuckooFilter.Bucket = undefined; - var filter = CuckooFilter{ .buckets = &buckets }; - var filter_prng = std.Random.DefaultPrng.init(42); + var filter: CuckooFilter = .{ .buckets = &buckets }; + var filter_prng: std.Random.DefaultPrng = .init(42); for (0..2_500) |gen_prng_seed| { entries.clearRetainingCapacity(); filter.reset(); - var gen_prng = std.Random.DefaultPrng.init(gen_prng_seed); + var gen_prng: std.Random.DefaultPrng = .init(gen_prng_seed); for (0..element_count) |_| { const trigram: Trigram = @bitCast(gen_prng.random().int(u24)); - try entries.put(allocator, trigram, {}); + entries.putAssumeCapacity(trigram, {}); try filter.append(filter_prng.random(), trigram); } @@ -460,7 +457,7 @@ test CuckooFilter { // Reasonable false positive rate const fpr_count = 2_500; var false_positives: usize = 0; - var negative_prng = std.Random.DefaultPrng.init(~gen_prng_seed); + var negative_prng: std.Random.DefaultPrng = .init(~gen_prng_seed); for (0..fpr_count) |_| { var trigram: Trigram = @bitCast(negative_prng.random().int(u24)); while (entries.contains(trigram)) { @@ -471,9 +468,8 @@ test CuckooFilter { } const fpr = @as(f32, @floatFromInt(false_positives)) / fpr_count; - std.testing.expect(fpr < 0.035) catch |err| { - std.log.err("fpr: {d}%", .{fpr * 100}); - return err; - }; + + errdefer std.log.err("fpr: {d}%", .{fpr * 100}); + try std.testing.expect(fpr < 0.035); } } From c62ef53271a7cdf8943752af012f5b0ca354b07f Mon Sep 17 00:00:00 2001 From: Techatrix Date: Wed, 2 Jul 2025 19:48:30 +0200 Subject: [PATCH 09/16] add function declarations to workspace symbols --- src/TrigramStore.zig | 48 ++++++++++++++++++++++++++------------------ 1 file changed, 28 insertions(+), 20 deletions(-) diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index 1e5524dc6..46c9d668d 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -49,11 +49,20 @@ pub fn init( const old_in_function = context.in_function; defer context.in_function = old_in_function; + var name_token_maybe: ?Ast.TokenIndex = null; switch (cb_tree.nodeTag(node)) { - .fn_decl => { - if (!context.in_function) {} - - context.in_function = true; + .fn_proto, + .fn_proto_multi, + .fn_proto_one, + .fn_proto_simple, + .fn_decl, + => |tag| skip: { + context.in_function = tag == .fn_decl; + + const fn_token = cb_tree.nodeMainToken(node); + if (cb_tree.tokenTag(fn_token + 1) != .identifier) break :skip; + + name_token_maybe = fn_token + 1; }, .root => unreachable, .container_decl, @@ -74,26 +83,25 @@ pub fn init( .local_var_decl, .simple_var_decl, .aligned_var_decl, - => { - if (!context.in_function) { - const token = cb_tree.fullVarDecl(node).?.ast.mut_token + 1; - const name = cb_tree.tokenSlice(token); - - if (name.len >= 3) { - const loc = offsets.tokenToLoc(cb_tree, token); - - try context.store.appendDeclaration( - context.allocator, - name, - .{ .start = @intCast(loc.start), .end = @intCast(loc.end) }, - ); - } - } + => skip: { + if (context.in_function) break :skip; + name_token_maybe = cb_tree.nodeMainToken(node) + 1; }, - else => {}, } + if (name_token_maybe) |name_token| skip: { + const loc = offsets.tokenToLoc(cb_tree, name_token); + const name = offsets.locToSlice(cb_tree.source, loc); + if (name.len < 3) break :skip; + + try context.store.appendDeclaration( + context.allocator, + name, + .{ .start = @intCast(loc.start), .end = @intCast(loc.end) }, + ); + } + try ast.iterateChildren(cb_tree, node, context, Error, callback); } }; From 44fc2d1e8a1fe67680e14b3998a5054775f89e9c Mon Sep 17 00:00:00 2001 From: Techatrix Date: Wed, 30 Jul 2025 22:05:43 +0200 Subject: [PATCH 10/16] reduce size of TrigramStore.Declaration --- src/Server.zig | 49 +++++++++++++++++++++++++++----------------- src/TrigramStore.zig | 37 +++++++++------------------------ 2 files changed, 40 insertions(+), 46 deletions(-) diff --git a/src/Server.zig b/src/Server.zig index 79d94ff87..c3762b10b 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -1524,8 +1524,6 @@ fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: ty var symbols: std.ArrayListUnmanaged(types.WorkspaceSymbol) = .empty; var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; - var loc_buffer: std.ArrayListUnmanaged(offsets.Loc) = .empty; - var range_buffer: std.ArrayListUnmanaged(offsets.Range) = .empty; for (handles) |handle| { const trigram_store = handle.getTrigramStoreCached(); @@ -1533,34 +1531,47 @@ fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: ty declaration_buffer.clearRetainingCapacity(); try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); + const SortContext = struct { + names: []const std.zig.Ast.TokenIndex, + fn lessThan(ctx: @This(), lhs: TrigramStore.Declaration.Index, rhs: TrigramStore.Declaration.Index) bool { + return ctx.names[@intFromEnum(lhs)] < ctx.names[@intFromEnum(rhs)]; + } + }; + + std.mem.sortUnstable( + TrigramStore.Declaration.Index, + declaration_buffer.items, + SortContext{ .names = trigram_store.declarations.items(.name) }, + SortContext.lessThan, + ); + const slice = trigram_store.declarations.slice(); const names = slice.items(.name); - const locs = slice.items(.loc); - { - // Convert `offsets.Loc` to `offsets.Range` + var last_index: usize = 0; + var last_position: offsets.Position = .{ .line = 0, .character = 0 }; - try loc_buffer.resize(arena, declaration_buffer.items.len); - try range_buffer.resize(arena, declaration_buffer.items.len); + try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); + for (declaration_buffer.items) |declaration| { + const name_token = names[@intFromEnum(declaration)]; + const loc = offsets.identifierTokenToNameLoc(handle.tree, name_token); + const name = offsets.identifierTokenToNameSlice(handle.tree, name_token); - for (declaration_buffer.items, loc_buffer.items) |declaration, *loc| { - const small_loc = locs[@intFromEnum(declaration)]; - loc.* = .{ .start = small_loc.start, .end = small_loc.end }; - } + const start_position = offsets.advancePosition(handle.tree.source, last_position, last_index, loc.start, server.offset_encoding); + const end_position = offsets.advancePosition(handle.tree.source, start_position, loc.start, loc.end, server.offset_encoding); + last_index = loc.end; + last_position = end_position; - try offsets.multiple.locToRange(arena, handle.tree.source, loc_buffer.items, range_buffer.items, server.offset_encoding); - } - - try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); - for (declaration_buffer.items, range_buffer.items) |declaration, range| { - const name = names[@intFromEnum(declaration)]; symbols.appendAssumeCapacity(.{ - .name = trigram_store.names.items[name.start..name.end], + .name = name, .kind = .Variable, .location = .{ .Location = .{ .uri = handle.uri, - .range = range, + .range = .{ + .start = start_position, + .end = end_position, + }, }, }, }); diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index 46c9d668d..76e93c67f 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -10,14 +10,10 @@ pub const TrigramStore = @This(); pub const Trigram = [3]u8; -pub const NameSlice = struct { start: u32, end: u32 }; -pub const Loc = struct { start: u32, end: u32 }; - pub const Declaration = struct { pub const Index = enum(u32) { _ }; - name: NameSlice, - loc: Loc, + name: Ast.TokenIndex, }; has_filter: bool, @@ -55,7 +51,6 @@ pub fn init( .fn_proto_multi, .fn_proto_one, .fn_proto_simple, - .fn_decl, => |tag| skip: { context.in_function = tag == .fn_decl; @@ -90,15 +85,11 @@ pub fn init( else => {}, } - if (name_token_maybe) |name_token| skip: { - const loc = offsets.tokenToLoc(cb_tree, name_token); - const name = offsets.locToSlice(cb_tree.source, loc); - if (name.len < 3) break :skip; - + if (name_token_maybe) |name_token| { try context.store.appendDeclaration( context.allocator, - name, - .{ .start = @intCast(loc.start), .end = @intCast(loc.end) }, + cb_tree, + name_token, ); } @@ -166,23 +157,15 @@ pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { fn appendDeclaration( store: *TrigramStore, allocator: std.mem.Allocator, - name: []const u8, - loc: Loc, + tree: Ast, + name_token: Ast.TokenIndex, ) error{OutOfMemory}!void { - assert(name.len >= 3); - - const name_slice: NameSlice = blk: { - const start = store.names.items.len; - try store.names.appendSlice(allocator, name); - break :blk .{ - .start = @intCast(start), - .end = @intCast(store.names.items.len), - }; - }; + const loc = offsets.identifierTokenToNameLoc(tree, name_token); + const name = offsets.locToSlice(tree.source, loc); + if (name.len < 3) return; try store.declarations.append(allocator, .{ - .name = name_slice, - .loc = loc, + .name = name_token, }); for (0..name.len - 2) |index| { From cb8d0b9be5ed64fc1c1b98a6e806d1036470fbc6 Mon Sep 17 00:00:00 2001 From: Techatrix Date: Mon, 8 Sep 2025 14:34:53 +0200 Subject: [PATCH 11/16] move workspace symbols implementation into separate file --- src/Server.zig | 64 +------------------------ src/features/workspace_symbols.zig | 76 ++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 63 deletions(-) create mode 100644 src/features/workspace_symbols.zig diff --git a/src/Server.zig b/src/Server.zig index c3762b10b..7744cdbf9 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -34,7 +34,6 @@ const goto = @import("features/goto.zig"); const hover_handler = @import("features/hover.zig"); const selection_range = @import("features/selection_range.zig"); const diagnostics_gen = @import("features/diagnostics.zig"); -const TrigramStore = @import("TrigramStore.zig"); const BuildOnSave = diagnostics_gen.BuildOnSave; const BuildOnSaveSupport = build_runner_shared.BuildOnSaveSupport; @@ -1517,68 +1516,7 @@ fn selectionRangeHandler(server: *Server, arena: std.mem.Allocator, request: typ } fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.WorkspaceSymbolParams) Error!lsp.ResultType("workspace/symbol") { - if (request.query.len < 3) return null; - - const handles = try server.document_store.loadTrigramStores(); - defer server.document_store.allocator.free(handles); - - var symbols: std.ArrayListUnmanaged(types.WorkspaceSymbol) = .empty; - var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; - - for (handles) |handle| { - const trigram_store = handle.getTrigramStoreCached(); - - declaration_buffer.clearRetainingCapacity(); - try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); - - const SortContext = struct { - names: []const std.zig.Ast.TokenIndex, - fn lessThan(ctx: @This(), lhs: TrigramStore.Declaration.Index, rhs: TrigramStore.Declaration.Index) bool { - return ctx.names[@intFromEnum(lhs)] < ctx.names[@intFromEnum(rhs)]; - } - }; - - std.mem.sortUnstable( - TrigramStore.Declaration.Index, - declaration_buffer.items, - SortContext{ .names = trigram_store.declarations.items(.name) }, - SortContext.lessThan, - ); - - const slice = trigram_store.declarations.slice(); - const names = slice.items(.name); - - var last_index: usize = 0; - var last_position: offsets.Position = .{ .line = 0, .character = 0 }; - - try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); - for (declaration_buffer.items) |declaration| { - const name_token = names[@intFromEnum(declaration)]; - const loc = offsets.identifierTokenToNameLoc(handle.tree, name_token); - const name = offsets.identifierTokenToNameSlice(handle.tree, name_token); - - const start_position = offsets.advancePosition(handle.tree.source, last_position, last_index, loc.start, server.offset_encoding); - const end_position = offsets.advancePosition(handle.tree.source, start_position, loc.start, loc.end, server.offset_encoding); - last_index = loc.end; - last_position = end_position; - - symbols.appendAssumeCapacity(.{ - .name = name, - .kind = .Variable, - .location = .{ - .Location = .{ - .uri = handle.uri, - .range = .{ - .start = start_position, - .end = end_position, - }, - }, - }, - }); - } - } - - return .{ .array_of_WorkspaceSymbol = symbols.items }; + return try @import("features/workspace_symbols.zig").handler(server, arena, request); } const HandledRequestParams = union(enum) { diff --git a/src/features/workspace_symbols.zig b/src/features/workspace_symbols.zig new file mode 100644 index 000000000..a0ba89149 --- /dev/null +++ b/src/features/workspace_symbols.zig @@ -0,0 +1,76 @@ +//! Implementation of [`workspace/symbol`](https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#workspace_symbol) + +const std = @import("std"); + +const lsp = @import("lsp"); +const types = lsp.types; + +const DocumentStore = @import("../DocumentStore.zig"); +const offsets = @import("../offsets.zig"); +const Server = @import("../Server.zig"); +const TrigramStore = @import("../TrigramStore.zig"); + +pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.WorkspaceSymbolParams) error{OutOfMemory}!lsp.ResultType("workspace/symbol") { + if (request.query.len < 3) return null; + + const handles = try server.document_store.loadTrigramStores(); + defer server.document_store.allocator.free(handles); + + var symbols: std.ArrayListUnmanaged(lsp.types.WorkspaceSymbol) = .empty; + var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; + + for (handles) |handle| { + const trigram_store = handle.getTrigramStoreCached(); + + declaration_buffer.clearRetainingCapacity(); + try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); + + const SortContext = struct { + names: []const std.zig.Ast.TokenIndex, + fn lessThan(ctx: @This(), lhs: TrigramStore.Declaration.Index, rhs: TrigramStore.Declaration.Index) bool { + return ctx.names[@intFromEnum(lhs)] < ctx.names[@intFromEnum(rhs)]; + } + }; + + std.mem.sortUnstable( + TrigramStore.Declaration.Index, + declaration_buffer.items, + SortContext{ .names = trigram_store.declarations.items(.name) }, + SortContext.lessThan, + ); + + const slice = trigram_store.declarations.slice(); + const names = slice.items(.name); + + var last_index: usize = 0; + var last_position: offsets.Position = .{ .line = 0, .character = 0 }; + + try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); + for (declaration_buffer.items) |declaration| { + const name_token = names[@intFromEnum(declaration)]; + const loc = offsets.identifierTokenToNameLoc(handle.tree, name_token); + const name = offsets.identifierTokenToNameSlice(handle.tree, name_token); + + const start_position = offsets.advancePosition(handle.tree.source, last_position, last_index, loc.start, server.offset_encoding); + const end_position = offsets.advancePosition(handle.tree.source, start_position, loc.start, loc.end, server.offset_encoding); + last_index = loc.end; + last_position = end_position; + + symbols.appendAssumeCapacity(.{ + .name = name, + .kind = .Variable, + .location = .{ + .Location = .{ + .uri = handle.uri, + .range = .{ + .start = start_position, + .end = end_position, + }, + }, + }, + }); + } + } + + return .{ .array_of_WorkspaceSymbol = symbols.items }; +} From 778c5c48534cc0c0394b692508db7702c8a79b71 Mon Sep 17 00:00:00 2001 From: Techatrix Date: Mon, 8 Sep 2025 19:18:31 +0200 Subject: [PATCH 12/16] only report workspace symbols on files inside a workspace folder --- src/DocumentStore.zig | 18 +++++++++++++++--- src/features/workspace_symbols.zig | 13 ++++++++++++- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 20c09846f..6a1fd77d2 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -1077,7 +1077,10 @@ fn invalidateBuildFileWorker(self: *DocumentStore, build_file: *BuildFile) void } } -pub fn loadTrigramStores(store: *DocumentStore) error{OutOfMemory}![]*DocumentStore.Handle { +pub fn loadTrigramStores( + store: *DocumentStore, + filter_paths: []const []const u8, +) error{OutOfMemory}![]*DocumentStore.Handle { const tracy_zone = tracy.trace(@src()); defer tracy_zone.end(); @@ -1085,8 +1088,17 @@ pub fn loadTrigramStores(store: *DocumentStore) error{OutOfMemory}![]*DocumentSt errdefer handles.deinit(store.allocator); for (store.handles.values()) |handle| { - // TODO check if the handle is in a workspace folder instead - if (isInStd(handle.uri)) continue; + if (URI.toFsPath(store.allocator, handle.uri)) |path| { + defer store.allocator.free(path); + for (filter_paths) |filter_path| { + if (std.mem.startsWith(u8, path, filter_path)) break; + } else break; + } else |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => { + // The URI is either invalid or not a `file` scheme. Either way, we should include it. + }, + } handles.appendAssumeCapacity(handle); } diff --git a/src/features/workspace_symbols.zig b/src/features/workspace_symbols.zig index a0ba89149..79767d6d7 100644 --- a/src/features/workspace_symbols.zig +++ b/src/features/workspace_symbols.zig @@ -9,11 +9,22 @@ const DocumentStore = @import("../DocumentStore.zig"); const offsets = @import("../offsets.zig"); const Server = @import("../Server.zig"); const TrigramStore = @import("../TrigramStore.zig"); +const URI = @import("../uri.zig"); pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.WorkspaceSymbolParams) error{OutOfMemory}!lsp.ResultType("workspace/symbol") { if (request.query.len < 3) return null; - const handles = try server.document_store.loadTrigramStores(); + var workspace_paths: std.ArrayList([]const u8) = try .initCapacity(arena, server.workspaces.items.len); + for (server.workspaces.items) |workspace| { + const path = URI.toFsPath(arena, workspace.uri) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.UnsupportedScheme => continue, + else => continue, + }; + workspace_paths.appendAssumeCapacity(path); + } + + const handles = try server.document_store.loadTrigramStores(workspace_paths.items); defer server.document_store.allocator.free(handles); var symbols: std.ArrayListUnmanaged(lsp.types.WorkspaceSymbol) = .empty; From bd207fc220a3985e83ac63fbc40cf5a927d24145 Mon Sep 17 00:00:00 2001 From: Techatrix Date: Tue, 9 Sep 2025 00:12:54 +0200 Subject: [PATCH 13/16] improve trigramstore declarations - resolve symbol kind - include test declarations --- src/TrigramStore.zig | 62 +++++++++++++++++++++--------- src/features/workspace_symbols.zig | 10 ++++- 2 files changed, 53 insertions(+), 19 deletions(-) diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index 76e93c67f..647eb3636 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -13,14 +13,22 @@ pub const Trigram = [3]u8; pub const Declaration = struct { pub const Index = enum(u32) { _ }; + pub const Kind = enum { + variable, + constant, + function, + test_function, + }; + + /// Either `.identifier` or `.string_literal`. name: Ast.TokenIndex, + kind: Kind, }; has_filter: bool, filter_buckets: std.ArrayListUnmanaged(CuckooFilter.Bucket), trigram_to_declarations: std.AutoArrayHashMapUnmanaged(Trigram, std.ArrayListUnmanaged(Declaration.Index)), declarations: std.MultiArrayList(Declaration), -names: std.ArrayListUnmanaged(u8), pub fn init( allocator: std.mem.Allocator, @@ -31,7 +39,6 @@ pub fn init( .filter_buckets = .empty, .trigram_to_declarations = .empty, .declarations = .empty, - .names = .empty, }; errdefer store.deinit(allocator); @@ -45,7 +52,6 @@ pub fn init( const old_in_function = context.in_function; defer context.in_function = old_in_function; - var name_token_maybe: ?Ast.TokenIndex = null; switch (cb_tree.nodeTag(node)) { .fn_proto, .fn_proto_multi, @@ -57,7 +63,12 @@ pub fn init( const fn_token = cb_tree.nodeMainToken(node); if (cb_tree.tokenTag(fn_token + 1) != .identifier) break :skip; - name_token_maybe = fn_token + 1; + try context.store.appendDeclaration( + context.allocator, + offsets.identifierTokenToNameSlice(cb_tree, fn_token + 1), + fn_token + 1, + .function, + ); }, .root => unreachable, .container_decl, @@ -80,17 +91,34 @@ pub fn init( .aligned_var_decl, => skip: { if (context.in_function) break :skip; - name_token_maybe = cb_tree.nodeMainToken(node) + 1; + + const main_token = cb_tree.nodeMainToken(node); + + const kind: Declaration.Kind = switch (cb_tree.tokenTag(main_token)) { + .keyword_var => .variable, + .keyword_const => .constant, + else => unreachable, + }; + + try context.store.appendDeclaration( + context.allocator, + offsets.identifierTokenToNameSlice(cb_tree, main_token + 1), + main_token + 1, + kind, + ); }, - else => {}, - } - if (name_token_maybe) |name_token| { - try context.store.appendDeclaration( - context.allocator, - cb_tree, - name_token, - ); + .test_decl => skip: { + const test_name_token, const test_name = ast.testDeclNameAndToken(cb_tree, node) orelse break :skip; + + try context.store.appendDeclaration( + context.allocator, + test_name, + test_name_token, + .test_function, + ); + }, + else => {}, } try ast.iterateChildren(cb_tree, node, context, Error, callback); @@ -149,23 +177,21 @@ pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { } store.trigram_to_declarations.deinit(allocator); store.declarations.deinit(allocator); - store.names.deinit(allocator); store.* = undefined; } -/// Caller must not submit name.len < 3. fn appendDeclaration( store: *TrigramStore, allocator: std.mem.Allocator, - tree: Ast, + name: []const u8, name_token: Ast.TokenIndex, + kind: Declaration.Kind, ) error{OutOfMemory}!void { - const loc = offsets.identifierTokenToNameLoc(tree, name_token); - const name = offsets.locToSlice(tree.source, loc); if (name.len < 3) return; try store.declarations.append(allocator, .{ .name = name_token, + .kind = kind, }); for (0..name.len - 2) |index| { diff --git a/src/features/workspace_symbols.zig b/src/features/workspace_symbols.zig index 79767d6d7..797ce2727 100644 --- a/src/features/workspace_symbols.zig +++ b/src/features/workspace_symbols.zig @@ -52,6 +52,7 @@ pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.Workspa const slice = trigram_store.declarations.slice(); const names = slice.items(.name); + const kinds = slice.items(.kind); var last_index: usize = 0; var last_position: offsets.Position = .{ .line = 0, .character = 0 }; @@ -59,6 +60,8 @@ pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.Workspa try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); for (declaration_buffer.items) |declaration| { const name_token = names[@intFromEnum(declaration)]; + const kind = kinds[@intFromEnum(declaration)]; + const loc = offsets.identifierTokenToNameLoc(handle.tree, name_token); const name = offsets.identifierTokenToNameSlice(handle.tree, name_token); @@ -69,7 +72,12 @@ pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.Workspa symbols.appendAssumeCapacity(.{ .name = name, - .kind = .Variable, + .kind = switch (kind) { + .variable => .Variable, + .constant => .Constant, + .function => .Function, + .test_function => .Method, // there is no SymbolKind that represents a tests, + }, .location = .{ .Location = .{ .uri = handle.uri, From 6b9acfe6a0259bc4b74591d49fa5c59325f42677 Mon Sep 17 00:00:00 2001 From: Techatrix Date: Tue, 16 Sep 2025 22:15:28 +0200 Subject: [PATCH 14/16] fix race condition in getOrLoadHandle --- src/DocumentStore.zig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 6a1fd77d2..5d29f45e1 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -749,6 +749,8 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { } defer { + self.lock.lock(); + defer self.lock.unlock(); std.debug.assert(self.currently_loading_uris.swapRemove(uri)); self.wait_for_currently_loading_uri.broadcast(); } From 39fd9863c13ea667a0177ad7491f01ec7fa6fa7c Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Thu, 23 Oct 2025 17:38:09 -0400 Subject: [PATCH 15/16] Even/odd cuckoo --- src/TrigramStore.zig | 101 ++++++++++++++++++++++++------------------- 1 file changed, 57 insertions(+), 44 deletions(-) diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index 647eb3636..26fe1c709 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -148,7 +148,7 @@ pub fn init( if (trigrams.len > 0) { var prng = std.Random.DefaultPrng.init(0); - const filter_capacity = CuckooFilter.capacityForCount(store.trigram_to_declarations.count()) catch unreachable; + const filter_capacity = CuckooFilter.capacityForCount(@intCast(store.trigram_to_declarations.count())) catch unreachable; try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity); store.filter_buckets.items.len = filter_capacity; @@ -279,44 +279,53 @@ fn mergeIntersection( return out_idx; } -// TODO: The pow2 requirement is quite inefficient: explore ideas posted in -// https://databasearchitects.blogspot.com/2019/07/cuckoo-filters-with-arbitrarily-sized.html -// (rocksdb even-odd scheme from comments looks interesting). +fn parity(integer: anytype) enum(u1) { even, odd } { + return @enumFromInt(integer & 1); +} + pub const CuckooFilter = struct { - /// len must be a power of 2. - /// - /// ### Pathological case with buckets.len power of 2 - /// - /// - `BucketIndex(alias_0)` -> `bucket_1`, `BucketIndex(alias_0).alternate()` -> `bucket_2` - /// - `BucketIndex(alias_1)` -> `bucket_1`, `BucketIndex(alias_1).alternate()` -> `bucket_2` - /// - /// Our alternate mappings hold and `contains()` will not return false negatives. - /// - /// ### Pathological case with buckets.len NOT power of 2: - /// - /// - `BucketIndex(alias_0)` -> `bucket_1`, `BucketIndex(alias_0).alternate()` -> `bucket_3` - /// - `BucketIndex(alias_1)` -> `bucket_2`, `BucketIndex(alias_1).alternate()` -> `bucket_4` - /// - /// Our alternate mappings do not hold and `contains()` can return false negatives. This is not - /// acceptable as the entire point of an AMQ datastructure is the presence of false positives - /// but not false negatives. buckets: []Bucket, pub const Fingerprint = enum(u8) { none = std.math.maxInt(u8), _, - pub fn hash(fingerprint: Fingerprint) u32 { - return @truncate(std.hash.Murmur2_64.hash(&.{@intFromEnum(fingerprint)})); + const precomputed_odd_hashes = blk: { + var table: [255]u32 = undefined; + + for (&table, 0..) |*h, index| { + h.* = @truncate(std.hash.Murmur2_64.hash(&.{index}) | 1); + } + + break :blk table; + }; + + pub fn oddHash(fingerprint: Fingerprint) u32 { + assert(fingerprint != .none); + return precomputed_odd_hashes[@intFromEnum(fingerprint)]; } }; + pub const Bucket = [4]Fingerprint; pub const BucketIndex = enum(u32) { _, - pub fn alternate(index: BucketIndex, fingerprint: Fingerprint) BucketIndex { + pub fn alternate(index: BucketIndex, fingerprint: Fingerprint, len: u32) BucketIndex { + assert(@intFromEnum(index) < len); assert(fingerprint != .none); - return @enumFromInt(@intFromEnum(index) ^ fingerprint.hash()); + + const signed_index: i64 = @intFromEnum(index); + const odd_hash: i64 = fingerprint.oddHash(); + + const unbounded = switch (parity(signed_index)) { + .even => signed_index + odd_hash, + .odd => signed_index - odd_hash, + }; + const bounded: u32 = @intCast(@mod(unbounded, len)); + + assert(parity(signed_index) != parity(bounded)); + + return @enumFromInt(bounded); } }; @@ -325,41 +334,46 @@ pub const CuckooFilter = struct { index_1: BucketIndex, index_2: BucketIndex, - pub fn initFromTrigram(trigram: Trigram) Triplet { + pub fn initFromTrigram(trigram: Trigram, len: u32) Triplet { const split: packed struct { fingerprint: Fingerprint, padding: u24, - index_1: BucketIndex, + index_1: u32, } = @bitCast(std.hash.Murmur2_64.hash(&trigram)); + const index_1: BucketIndex = @enumFromInt(split.index_1 % len); + const fingerprint: Fingerprint = if (split.fingerprint == .none) - @enumFromInt(0) + @enumFromInt(1) else split.fingerprint; const triplet: Triplet = .{ .fingerprint = fingerprint, - .index_1 = split.index_1, - .index_2 = split.index_1.alternate(fingerprint), + .index_1 = index_1, + .index_2 = index_1.alternate(fingerprint, len), }; - assert(triplet.index_2.alternate(fingerprint) == triplet.index_1); + assert(triplet.index_2.alternate(fingerprint, len) == index_1); return triplet; } }; + pub fn init(buckets: []Bucket) CuckooFilter { + assert(parity(buckets.len) == .even); + return .{ .buckets = buckets }; + } + pub fn reset(filter: CuckooFilter) void { - @memset(filter.buckets, [1]Fingerprint{.none} ** 4); + @memset(filter.buckets, [1]Fingerprint{.none} ** @typeInfo(Bucket).array.len); } - pub fn capacityForCount(count: usize) error{Overflow}!usize { - const fill_rate = 0.95; - return try std.math.ceilPowerOfTwo(usize, @intFromFloat(@ceil(@as(f32, @floatFromInt(count)) / fill_rate))); + pub fn capacityForCount(count: u32) error{Overflow}!u32 { + return count + (count & 1); } - // Use a hash (fnv) for randomness. pub fn append(filter: CuckooFilter, random: std.Random, trigram: Trigram) error{EvictionFailed}!void { - const triplet: Triplet = .initFromTrigram(trigram); + const triplet: Triplet = .initFromTrigram(trigram, @intCast(filter.buckets.len)); if (filter.appendToBucket(triplet.index_1, triplet.fingerprint) or filter.appendToBucket(triplet.index_2, triplet.fingerprint)) @@ -371,7 +385,7 @@ pub const CuckooFilter = struct { var index = if (random.boolean()) triplet.index_1 else triplet.index_2; for (0..500) |_| { fingerprint = filter.swapFromBucket(random, index, fingerprint); - index = index.alternate(fingerprint); + index = index.alternate(fingerprint, @intCast(filter.buckets.len)); if (filter.appendToBucket(index, fingerprint)) { return; @@ -382,8 +396,7 @@ pub const CuckooFilter = struct { } fn bucketAt(filter: CuckooFilter, index: BucketIndex) *Bucket { - assert(std.math.isPowerOfTwo(filter.buckets.len)); - return &filter.buckets[@intFromEnum(index) & (filter.buckets.len - 1)]; + return &filter.buckets[@intFromEnum(index)]; } fn appendToBucket(filter: CuckooFilter, index: BucketIndex, fingerprint: Fingerprint) bool { @@ -408,6 +421,7 @@ pub const CuckooFilter = struct { ) Fingerprint { assert(fingerprint != .none); + comptime assert(@typeInfo(Bucket).array.len == 4); const target = &filter.bucketAt(index)[random.int(u2)]; const old_fingerprint = target.*; @@ -419,7 +433,7 @@ pub const CuckooFilter = struct { } pub fn contains(filter: CuckooFilter, trigram: Trigram) bool { - const triplet: Triplet = .initFromTrigram(trigram); + const triplet: Triplet = .initFromTrigram(trigram, @intCast(filter.buckets.len)); return filter.containsInBucket(triplet.index_1, triplet.fingerprint) or filter.containsInBucket(triplet.index_2, triplet.fingerprint); @@ -443,16 +457,15 @@ pub const CuckooFilter = struct { test CuckooFilter { const allocator = std.testing.allocator; - const element_count = 486; + const element_count = 499; const filter_size = comptime CuckooFilter.capacityForCount(element_count) catch unreachable; - try std.testing.expectEqual(512, filter_size); var entries: std.AutoArrayHashMapUnmanaged(Trigram, void) = .empty; defer entries.deinit(allocator); try entries.ensureTotalCapacity(allocator, element_count); var buckets: [filter_size]CuckooFilter.Bucket = undefined; - var filter: CuckooFilter = .{ .buckets = &buckets }; + var filter: CuckooFilter = .init(&buckets); var filter_prng: std.Random.DefaultPrng = .init(42); for (0..2_500) |gen_prng_seed| { From b5f2f42a4c494ea6cd04493262dc970ff527f6be Mon Sep 17 00:00:00 2001 From: SuperAuguste <19855629+SuperAuguste@users.noreply.github.com> Date: Tue, 28 Oct 2025 23:22:22 -0400 Subject: [PATCH 16/16] Case-sensitive trigram iterator Co-Authored-By: Techatrix --- src/TrigramStore.zig | 125 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig index 26fe1c709..d308f5dc1 100644 --- a/src/TrigramStore.zig +++ b/src/TrigramStore.zig @@ -30,6 +30,131 @@ filter_buckets: std.ArrayListUnmanaged(CuckooFilter.Bucket), trigram_to_declarations: std.AutoArrayHashMapUnmanaged(Trigram, std.ArrayListUnmanaged(Declaration.Index)), declarations: std.MultiArrayList(Declaration), +pub const TrigramIterator = struct { + buffer: []const u8, + index: usize, + boundary: Boundary, + + pub fn init(buffer: []const u8) TrigramIterator { + assert(buffer.len != 0); + return .{ .buffer = buffer, .index = 0, .boundary = .calculate(buffer, 0) }; + } + + pub const Boundary = struct { + end: usize, + next_start: ?usize, + + pub fn calculate(buffer: []const u8, index: usize) Boundary { + assert(buffer[index..].len > 0); + + if (std.ascii.isLower(buffer[index])) { + // First character lowercase + for (buffer[index + 1 ..], index + 1..) |c, i| { + if (!std.ascii.isLower(c)) { + return .{ + .end = i, + .next_start = i, + }; + } + } + } else { + if (index + 1 >= buffer.len) { + return .{ + .end = buffer.len, + .next_start = null, + }; + } + + if (std.ascii.isLower(buffer[index + 1])) { + // First char is uppercase, second char is lowercase + for (buffer[index + 2 ..], index + 2..) |c, i| { + if (!std.ascii.isLower(c)) { + return .{ + .end = i, + .next_start = i, + }; + } + } + } else { + // First and second chars are uppercase + for (buffer[index + 2 ..], index + 2..) |c, i| { + if (!std.ascii.isUpper(c)) { + return .{ + .end = i, + .next_start = i, + }; + } + } + } + } + + return .{ + .end = buffer.len, + .next_start = null, + }; + } + }; + + pub fn next(ti: *TrigramIterator) ?Trigram { + if (ti.index == ti.buffer.len) return null; + assert(ti.index < ti.boundary.end); + + var trigram: [3]u8 = @splat(0); + const unpadded = ti.buffer[ti.index..@min(ti.index + 3, ti.boundary.end)]; + _ = std.ascii.lowerString(&trigram, unpadded); + + if (unpadded.len < 3 or ti.index + 3 >= ti.boundary.end) { + ti.index = ti.boundary.next_start orelse { + ti.index = ti.buffer.len; + return trigram; + }; + ti.boundary = .calculate(ti.buffer, ti.index); + } else { + ti.index += 1; + } + + return trigram; + } +}; + +test "TrigramIterator.Boundary.calculate" { + var boundary: TrigramIterator.Boundary = .calculate("helloWORLD", 0); + try std.testing.expectEqual(5, boundary.end); + try std.testing.expectEqual(5, boundary.next_start.?); + + boundary = .calculate("helloWORLD", 5); + try std.testing.expectEqual(10, boundary.end); + try std.testing.expectEqual(null, boundary.next_start); +} + +test TrigramIterator { + const allocator = std.testing.allocator; + + const matrix: []const struct { []const u8, []const Trigram } = &.{ + .{ "a", &.{"a\x00\x00".*} }, + .{ "ab", &.{"ab\x00".*} }, + .{ "helloWORLD", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, + .{ "HelloWORLD", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, + .{ "HelloWorld", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, + }; + + var actual: std.ArrayList(Trigram) = .empty; + defer actual.deinit(allocator); + + for (matrix) |entry| { + const input, const expected = entry; + + actual.clearRetainingCapacity(); + + var it: TrigramIterator = .init(input); + while (it.next()) |trigram| { + try actual.append(allocator, trigram); + } + + try @import("testing.zig").expectEqual(expected, actual.items); + } +} + pub fn init( allocator: std.mem.Allocator, tree: Ast,