diff --git a/src/DocumentStore.zig b/src/DocumentStore.zig index 5830e4cb2..5d29f45e1 100644 --- a/src/DocumentStore.zig +++ b/src/DocumentStore.zig @@ -14,6 +14,7 @@ const tracy = @import("tracy"); const translate_c = @import("translate_c.zig"); const DocumentScope = @import("DocumentScope.zig"); const DiagnosticsCollection = @import("DiagnosticsCollection.zig"); +const TrigramStore = @import("TrigramStore.zig"); const DocumentStore = @This(); @@ -25,6 +26,7 @@ thread_pool: *std.Thread.Pool, handles: std.StringArrayHashMapUnmanaged(*Handle) = .empty, build_files: if (supports_build_system) std.StringArrayHashMapUnmanaged(*BuildFile) else void = if (supports_build_system) .empty else {}, cimports: if (supports_build_system) std.AutoArrayHashMapUnmanaged(Hash, translate_c.Result) else void = if (supports_build_system) .empty else {}, +trigram_stores: std.StringArrayHashMapUnmanaged(TrigramStore) = .empty, diagnostics_collection: *DiagnosticsCollection, builds_in_progress: std.atomic.Value(i32) = .init(0), transport: ?*lsp.Transport = null, @@ -33,6 +35,8 @@ lsp_capabilities: struct { supports_semantic_tokens_refresh: bool = false, supports_inlay_hints_refresh: bool = false, } = .{}, +currently_loading_uris: std.StringArrayHashMapUnmanaged(void) = .empty, +wait_for_currently_loading_uri: std.Thread.Condition = .{}, pub const Uri = []const u8; @@ -193,6 +197,7 @@ pub const Handle = struct { lazy_condition: std.Thread.Condition = .{}, import_uris: ?[]Uri = null, + trigram_store: TrigramStore = undefined, document_scope: DocumentScope = undefined, zzoiir: ZirOrZoir = undefined, @@ -231,6 +236,11 @@ pub const Handle = struct { /// `false` indicates the document only exists because it is a dependency of another document /// or has been closed with `textDocument/didClose`. lsp_synced: bool = false, + /// true if a thread has acquired the permission to compute the `TrigramStore` + /// all other threads will wait until the given thread has computed the `TrigramStore` before reading it. + has_trigram_store_lock: bool = false, + /// true if `handle.impl.trigram_store` has been set + has_trigram_store: bool = false, /// true if a thread has acquired the permission to compute the `DocumentScope` /// all other threads will wait until the given thread has computed the `DocumentScope` before reading it. has_document_scope_lock: bool = false, @@ -241,7 +251,7 @@ pub const Handle = struct { /// all other threads will wait until the given thread has computed the `std.zig.Zir` or `std.zig.Zoir` before reading it. /// true if `handle.impl.zir` has been set has_zzoiir: bool = false, - _: u27 = 0, + _: u25 = 0, }; /// Takes ownership of `text` on success. @@ -286,6 +296,7 @@ pub const Handle = struct { .zon => self.impl.zzoiir.zon.deinit(allocator), }; if (status.has_document_scope) self.impl.document_scope.deinit(allocator); + if (status.has_trigram_store) self.impl.trigram_store.deinit(allocator); allocator.free(self.tree.source); self.tree.deinit(allocator); @@ -366,6 +377,23 @@ pub const Handle = struct { return self.impl.document_scope; } + pub fn getTrigramStore(self: *Handle) error{OutOfMemory}!TrigramStore { + if (self.getStatus().has_trigram_store) return self.impl.trigram_store; + return try self.getLazy(TrigramStore, "trigram_store", struct { + fn create(handle: *Handle, allocator: std.mem.Allocator) error{OutOfMemory}!TrigramStore { + return try .init(allocator, handle.tree); + } + }); + } + + /// Asserts that `getTrigramStore` has been previously called on `handle`. + pub fn getTrigramStoreCached(self: *Handle) TrigramStore { + if (builtin.mode == .Debug) { + std.debug.assert(self.getStatus().has_trigram_store); + } + return self.impl.trigram_store; + } + pub fn getZir(self: *Handle) error{OutOfMemory}!std.zig.Zir { std.debug.assert(self.tree.mode == .zig); const zir_or_zoir = try self.getZirOrZoir(); @@ -603,6 +631,15 @@ pub fn deinit(self: *DocumentStore) void { } self.handles.deinit(self.allocator); + for (self.trigram_stores.keys(), self.trigram_stores.values()) |uri, *trigram_store| { + self.allocator.free(uri); + trigram_store.deinit(self.allocator); + } + self.trigram_stores.deinit(self.allocator); + + std.debug.assert(self.currently_loading_uris.count() == 0); + self.currently_loading_uris.deinit(self.allocator); + if (supports_build_system) { for (self.build_files.values()) |build_file| { build_file.deinit(self.allocator); @@ -683,7 +720,41 @@ pub fn getOrLoadHandle(self: *DocumentStore, uri: Uri) ?*Handle { const tracy_zone = tracy.trace(@src()); defer tracy_zone.end(); - if (self.getHandle(uri)) |handle| return handle; + { + self.lock.lock(); + defer self.lock.unlock(); + + while (true) { + if (self.handles.get(uri)) |handle| return handle; + + const gop = self.currently_loading_uris.getOrPutValue( + self.allocator, + uri, + {}, + ) catch return null; + + if (!gop.found_existing) { + break; + } + + var mutex: std.Thread.Mutex = .{}; + + mutex.lock(); + defer mutex.unlock(); + + self.lock.unlock(); + self.wait_for_currently_loading_uri.wait(&mutex); + self.lock.lock(); + } + } + + defer { + self.lock.lock(); + defer self.lock.unlock(); + std.debug.assert(self.currently_loading_uris.swapRemove(uri)); + self.wait_for_currently_loading_uri.broadcast(); + } + const file_contents = self.readFile(uri) orelse return null; return self.createAndStoreDocument(uri, file_contents, false) catch |err| { log.err("failed to store document '{s}': {}", .{ uri, err }); @@ -925,6 +996,9 @@ fn notifyBuildEnd(self: *DocumentStore, status: EndStatus) void { } fn invalidateBuildFileWorker(self: *DocumentStore, build_file: *BuildFile) void { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + { build_file.impl.mutex.lock(); defer build_file.impl.mutex.unlock(); @@ -1005,6 +1079,64 @@ fn invalidateBuildFileWorker(self: *DocumentStore, build_file: *BuildFile) void } } +pub fn loadTrigramStores( + store: *DocumentStore, + filter_paths: []const []const u8, +) error{OutOfMemory}![]*DocumentStore.Handle { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + + var handles: std.ArrayListUnmanaged(*DocumentStore.Handle) = try .initCapacity(store.allocator, store.handles.count()); + errdefer handles.deinit(store.allocator); + + for (store.handles.values()) |handle| { + if (URI.toFsPath(store.allocator, handle.uri)) |path| { + defer store.allocator.free(path); + for (filter_paths) |filter_path| { + if (std.mem.startsWith(u8, path, filter_path)) break; + } else break; + } else |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + else => { + // The URI is either invalid or not a `file` scheme. Either way, we should include it. + }, + } + handles.appendAssumeCapacity(handle); + } + + if (builtin.single_threaded) { + for (handles.items) |handle| { + _ = try handle.getTrigramStore(); + } + return try handles.toOwnedSlice(store.allocator); + } + + const loadTrigramStore = struct { + fn loadTrigramStore( + handle: *Handle, + did_out_of_memory: *std.atomic.Value(bool), + ) void { + _ = handle.getTrigramStore() catch { + did_out_of_memory.store(true, .release); + }; + } + }.loadTrigramStore; + + var wait_group: std.Thread.WaitGroup = .{}; + var did_out_of_memory: std.atomic.Value(bool) = .init(false); + + for (handles.items) |handle| { + const status = handle.getStatus(); + if (status.has_trigram_store) continue; + store.thread_pool.spawnWg(&wait_group, loadTrigramStore, .{ handle, &did_out_of_memory }); + } + store.thread_pool.waitAndWork(&wait_group); + + if (did_out_of_memory.load(.acquire)) return error.OutOfMemory; + + return try handles.toOwnedSlice(store.allocator); +} + pub fn isBuildFile(uri: Uri) bool { return std.mem.endsWith(u8, uri, "/build.zig"); } @@ -1187,6 +1319,9 @@ fn buildDotZigExists(dir_path: []const u8) bool { /// See `Handle.getAssociatedBuildFileUri`. /// Caller owns returned memory. fn collectPotentialBuildFiles(self: *DocumentStore, uri: Uri) ![]*BuildFile { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + if (isInStd(uri)) return &.{}; var potential_build_files: std.ArrayList(*BuildFile) = .empty; @@ -1345,17 +1480,12 @@ fn createAndStoreDocument( errdefer if (!gop.found_existing) std.debug.assert(self.handles.swapRemove(uri)); if (gop.found_existing) { - if (lsp_synced) { - new_handle.impl.associated_build_file = gop.value_ptr.*.impl.associated_build_file; - gop.value_ptr.*.impl.associated_build_file = .init; + new_handle.impl.associated_build_file = gop.value_ptr.*.impl.associated_build_file; + gop.value_ptr.*.impl.associated_build_file = .init; - new_handle.uri = gop.key_ptr.*; - gop.value_ptr.*.deinit(); - gop.value_ptr.*.* = new_handle; - } else { - // TODO prevent concurrent `createAndStoreDocument` invocations from racing each other - new_handle.deinit(); - } + new_handle.uri = gop.key_ptr.*; + gop.value_ptr.*.deinit(); + gop.value_ptr.*.* = new_handle; } else { gop.key_ptr.* = try self.allocator.dupe(u8, uri); errdefer self.allocator.free(gop.key_ptr.*); @@ -1370,6 +1500,73 @@ fn createAndStoreDocument( return gop.value_ptr.*; } +pub fn loadDirectoryRecursive(store: *DocumentStore, directory_uri: Uri) !usize { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + + const workspace_path = try URI.toFsPath(store.allocator, directory_uri); + defer store.allocator.free(workspace_path); + + var workspace_dir = try std.fs.openDirAbsolute(workspace_path, .{ .iterate = true }); + defer workspace_dir.close(); + + var walker = try workspace_dir.walk(store.allocator); + defer walker.deinit(); + + var not_currently_loading_uris: std.ArrayListUnmanaged(Uri) = .empty; + defer { + for (not_currently_loading_uris.items) |uri| store.allocator.free(uri); + not_currently_loading_uris.deinit(store.allocator); + } + + var file_count: usize = 0; + + { + while (try walker.next()) |entry| { + if (entry.kind == .directory) continue; + if (std.mem.indexOf(u8, entry.path, std.fs.path.sep_str ++ ".zig-cache" ++ std.fs.path.sep_str) != null) continue; + if (std.mem.startsWith(u8, entry.path, ".zig-cache" ++ std.fs.path.sep_str)) continue; + if (!std.mem.eql(u8, std.fs.path.extension(entry.basename), ".zig")) continue; + + file_count += 1; + + const path = try std.fs.path.join(store.allocator, &.{ workspace_path, entry.path }); + defer store.allocator.free(path); + + try not_currently_loading_uris.ensureUnusedCapacity(store.allocator, 1); + + const uri = try URI.fromPath(store.allocator, path); + errdefer comptime unreachable; + + store.lock.lockShared(); + defer store.lock.unlockShared(); + + if (!store.handles.contains(uri) and + !store.currently_loading_uris.contains(uri)) + { + not_currently_loading_uris.appendAssumeCapacity(uri); + } + } + } + + errdefer comptime unreachable; + + const S = struct { + fn getOrLoadHandleVoid(s: *DocumentStore, uri: Uri) void { + _ = s.getOrLoadHandle(uri); + s.allocator.free(uri); + } + }; + + var wait_group: std.Thread.WaitGroup = .{}; + while (not_currently_loading_uris.pop()) |uri| { + store.thread_pool.spawnWg(&wait_group, S.getOrLoadHandleVoid, .{ store, uri }); + } + store.thread_pool.waitAndWork(&wait_group); + + return file_count; +} + pub const CImportHandle = struct { /// the `@cImport` node node: Ast.Node.Index, diff --git a/src/Server.zig b/src/Server.zig index 497ccb90c..7744cdbf9 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -558,7 +558,7 @@ fn initializeHandler(server: *Server, arena: std.mem.Allocator, request: types.I .documentRangeFormattingProvider = .{ .bool = false }, .foldingRangeProvider = .{ .bool = true }, .selectionRangeProvider = .{ .bool = true }, - .workspaceSymbolProvider = .{ .bool = false }, + .workspaceSymbolProvider = .{ .bool = true }, .workspace = .{ .workspaceFolders = .{ .supported = true, @@ -850,7 +850,6 @@ const Workspace = struct { fn addWorkspace(server: *Server, uri: types.URI) error{OutOfMemory}!void { try server.workspaces.ensureUnusedCapacity(server.allocator, 1); server.workspaces.appendAssumeCapacity(try Workspace.init(server, uri)); - log.info("added Workspace Folder: {s}", .{uri}); if (BuildOnSaveSupport.isSupportedComptime() and // Don't initialize build on save until initialization finished. @@ -863,6 +862,16 @@ fn addWorkspace(server: *Server, uri: types.URI) error{OutOfMemory}!void { .restart = false, }); } + + const file_count = server.document_store.loadDirectoryRecursive(uri) catch |err| switch (err) { + error.UnsupportedScheme => return, + else => { + log.err("failed to load files in workspace '{s}': {}", .{ uri, err }); + return; + }, + }; + + log.info("added Workspace Folder: {s} ({d} files)", .{ uri, file_count }); } fn removeWorkspace(server: *Server, uri: types.URI) void { @@ -1506,6 +1515,10 @@ fn selectionRangeHandler(server: *Server, arena: std.mem.Allocator, request: typ return try selection_range.generateSelectionRanges(arena, handle, request.positions, server.offset_encoding); } +fn workspaceSymbolHandler(server: *Server, arena: std.mem.Allocator, request: types.WorkspaceSymbolParams) Error!lsp.ResultType("workspace/symbol") { + return try @import("features/workspace_symbols.zig").handler(server, arena, request); +} + const HandledRequestParams = union(enum) { initialize: types.InitializeParams, shutdown, @@ -1529,6 +1542,7 @@ const HandledRequestParams = union(enum) { @"textDocument/codeAction": types.CodeActionParams, @"textDocument/foldingRange": types.FoldingRangeParams, @"textDocument/selectionRange": types.SelectionRangeParams, + @"workspace/symbol": types.WorkspaceSymbolParams, other: lsp.MethodWithParams, }; @@ -1573,6 +1587,7 @@ fn isBlockingMessage(msg: Message) bool { .@"textDocument/codeAction", .@"textDocument/foldingRange", .@"textDocument/selectionRange", + .@"workspace/symbol", => return false, .other => return false, }, @@ -1752,6 +1767,7 @@ pub fn sendRequestSync(server: *Server, arena: std.mem.Allocator, comptime metho .@"textDocument/codeAction" => try server.codeActionHandler(arena, params), .@"textDocument/foldingRange" => try server.foldingRangeHandler(arena, params), .@"textDocument/selectionRange" => try server.selectionRangeHandler(arena, params), + .@"workspace/symbol" => try server.workspaceSymbolHandler(arena, params), .other => return null, }; } diff --git a/src/TrigramStore.zig b/src/TrigramStore.zig new file mode 100644 index 000000000..d308f5dc1 --- /dev/null +++ b/src/TrigramStore.zig @@ -0,0 +1,630 @@ +//! Per-file trigram store. + +const std = @import("std"); +const ast = @import("ast.zig"); +const Ast = std.zig.Ast; +const assert = std.debug.assert; +const offsets = @import("offsets.zig"); + +pub const TrigramStore = @This(); + +pub const Trigram = [3]u8; + +pub const Declaration = struct { + pub const Index = enum(u32) { _ }; + + pub const Kind = enum { + variable, + constant, + function, + test_function, + }; + + /// Either `.identifier` or `.string_literal`. + name: Ast.TokenIndex, + kind: Kind, +}; + +has_filter: bool, +filter_buckets: std.ArrayListUnmanaged(CuckooFilter.Bucket), +trigram_to_declarations: std.AutoArrayHashMapUnmanaged(Trigram, std.ArrayListUnmanaged(Declaration.Index)), +declarations: std.MultiArrayList(Declaration), + +pub const TrigramIterator = struct { + buffer: []const u8, + index: usize, + boundary: Boundary, + + pub fn init(buffer: []const u8) TrigramIterator { + assert(buffer.len != 0); + return .{ .buffer = buffer, .index = 0, .boundary = .calculate(buffer, 0) }; + } + + pub const Boundary = struct { + end: usize, + next_start: ?usize, + + pub fn calculate(buffer: []const u8, index: usize) Boundary { + assert(buffer[index..].len > 0); + + if (std.ascii.isLower(buffer[index])) { + // First character lowercase + for (buffer[index + 1 ..], index + 1..) |c, i| { + if (!std.ascii.isLower(c)) { + return .{ + .end = i, + .next_start = i, + }; + } + } + } else { + if (index + 1 >= buffer.len) { + return .{ + .end = buffer.len, + .next_start = null, + }; + } + + if (std.ascii.isLower(buffer[index + 1])) { + // First char is uppercase, second char is lowercase + for (buffer[index + 2 ..], index + 2..) |c, i| { + if (!std.ascii.isLower(c)) { + return .{ + .end = i, + .next_start = i, + }; + } + } + } else { + // First and second chars are uppercase + for (buffer[index + 2 ..], index + 2..) |c, i| { + if (!std.ascii.isUpper(c)) { + return .{ + .end = i, + .next_start = i, + }; + } + } + } + } + + return .{ + .end = buffer.len, + .next_start = null, + }; + } + }; + + pub fn next(ti: *TrigramIterator) ?Trigram { + if (ti.index == ti.buffer.len) return null; + assert(ti.index < ti.boundary.end); + + var trigram: [3]u8 = @splat(0); + const unpadded = ti.buffer[ti.index..@min(ti.index + 3, ti.boundary.end)]; + _ = std.ascii.lowerString(&trigram, unpadded); + + if (unpadded.len < 3 or ti.index + 3 >= ti.boundary.end) { + ti.index = ti.boundary.next_start orelse { + ti.index = ti.buffer.len; + return trigram; + }; + ti.boundary = .calculate(ti.buffer, ti.index); + } else { + ti.index += 1; + } + + return trigram; + } +}; + +test "TrigramIterator.Boundary.calculate" { + var boundary: TrigramIterator.Boundary = .calculate("helloWORLD", 0); + try std.testing.expectEqual(5, boundary.end); + try std.testing.expectEqual(5, boundary.next_start.?); + + boundary = .calculate("helloWORLD", 5); + try std.testing.expectEqual(10, boundary.end); + try std.testing.expectEqual(null, boundary.next_start); +} + +test TrigramIterator { + const allocator = std.testing.allocator; + + const matrix: []const struct { []const u8, []const Trigram } = &.{ + .{ "a", &.{"a\x00\x00".*} }, + .{ "ab", &.{"ab\x00".*} }, + .{ "helloWORLD", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, + .{ "HelloWORLD", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, + .{ "HelloWorld", &.{ "hel".*, "ell".*, "llo".*, "wor".*, "orl".*, "rld".* } }, + }; + + var actual: std.ArrayList(Trigram) = .empty; + defer actual.deinit(allocator); + + for (matrix) |entry| { + const input, const expected = entry; + + actual.clearRetainingCapacity(); + + var it: TrigramIterator = .init(input); + while (it.next()) |trigram| { + try actual.append(allocator, trigram); + } + + try @import("testing.zig").expectEqual(expected, actual.items); + } +} + +pub fn init( + allocator: std.mem.Allocator, + tree: Ast, +) error{OutOfMemory}!TrigramStore { + var store: TrigramStore = .{ + .has_filter = false, + .filter_buckets = .empty, + .trigram_to_declarations = .empty, + .declarations = .empty, + }; + errdefer store.deinit(allocator); + + const Context = struct { + allocator: std.mem.Allocator, + store: *TrigramStore, + in_function: bool, + + const Error = error{OutOfMemory}; + fn callback(context: *@This(), cb_tree: Ast, node: Ast.Node.Index) Error!void { + const old_in_function = context.in_function; + defer context.in_function = old_in_function; + + switch (cb_tree.nodeTag(node)) { + .fn_proto, + .fn_proto_multi, + .fn_proto_one, + .fn_proto_simple, + => |tag| skip: { + context.in_function = tag == .fn_decl; + + const fn_token = cb_tree.nodeMainToken(node); + if (cb_tree.tokenTag(fn_token + 1) != .identifier) break :skip; + + try context.store.appendDeclaration( + context.allocator, + offsets.identifierTokenToNameSlice(cb_tree, fn_token + 1), + fn_token + 1, + .function, + ); + }, + .root => unreachable, + .container_decl, + .container_decl_trailing, + .container_decl_arg, + .container_decl_arg_trailing, + .container_decl_two, + .container_decl_two_trailing, + .tagged_union, + .tagged_union_trailing, + .tagged_union_enum_tag, + .tagged_union_enum_tag_trailing, + .tagged_union_two, + .tagged_union_two_trailing, + => context.in_function = false, + + .global_var_decl, + .local_var_decl, + .simple_var_decl, + .aligned_var_decl, + => skip: { + if (context.in_function) break :skip; + + const main_token = cb_tree.nodeMainToken(node); + + const kind: Declaration.Kind = switch (cb_tree.tokenTag(main_token)) { + .keyword_var => .variable, + .keyword_const => .constant, + else => unreachable, + }; + + try context.store.appendDeclaration( + context.allocator, + offsets.identifierTokenToNameSlice(cb_tree, main_token + 1), + main_token + 1, + kind, + ); + }, + + .test_decl => skip: { + const test_name_token, const test_name = ast.testDeclNameAndToken(cb_tree, node) orelse break :skip; + + try context.store.appendDeclaration( + context.allocator, + test_name, + test_name_token, + .test_function, + ); + }, + else => {}, + } + + try ast.iterateChildren(cb_tree, node, context, Error, callback); + } + }; + + var context: Context = .{ + .allocator = allocator, + .store = &store, + .in_function = false, + }; + try ast.iterateChildren(tree, .root, &context, Context.Error, Context.callback); + + const lists = store.trigram_to_declarations.values(); + var index: usize = 0; + while (index < lists.len) { + if (lists[index].items.len == 0) { + lists[index].deinit(allocator); + store.trigram_to_declarations.swapRemoveAt(index); + } else { + index += 1; + } + } + + const trigrams = store.trigram_to_declarations.keys(); + + if (trigrams.len > 0) { + var prng = std.Random.DefaultPrng.init(0); + + const filter_capacity = CuckooFilter.capacityForCount(@intCast(store.trigram_to_declarations.count())) catch unreachable; + try store.filter_buckets.ensureTotalCapacityPrecise(allocator, filter_capacity); + store.filter_buckets.items.len = filter_capacity; + + const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; + filter.reset(); + store.has_filter = true; + + for (trigrams) |trigram| { + filter.append(prng.random(), trigram) catch |err| switch (err) { + error.EvictionFailed => { + // NOTE: This should generally be quite rare. + store.has_filter = false; + break; + }, + }; + } + } + + return store; +} + +pub fn deinit(store: *TrigramStore, allocator: std.mem.Allocator) void { + store.filter_buckets.deinit(allocator); + for (store.trigram_to_declarations.values()) |*list| { + list.deinit(allocator); + } + store.trigram_to_declarations.deinit(allocator); + store.declarations.deinit(allocator); + store.* = undefined; +} + +fn appendDeclaration( + store: *TrigramStore, + allocator: std.mem.Allocator, + name: []const u8, + name_token: Ast.TokenIndex, + kind: Declaration.Kind, +) error{OutOfMemory}!void { + if (name.len < 3) return; + + try store.declarations.append(allocator, .{ + .name = name_token, + .kind = kind, + }); + + for (0..name.len - 2) |index| { + const trigram = name[index..][0..3].*; + const gop = try store.trigram_to_declarations.getOrPutValue(allocator, trigram, .empty); + try gop.value_ptr.append(allocator, @enumFromInt(store.declarations.len - 1)); + } +} + +/// Asserts query.len >= 3. Asserts declaration_buffer.items.len == 0. +pub fn declarationsForQuery( + store: *const TrigramStore, + allocator: std.mem.Allocator, + query: []const u8, + declaration_buffer: *std.ArrayListUnmanaged(Declaration.Index), +) error{OutOfMemory}!void { + assert(query.len >= 3); + assert(declaration_buffer.items.len == 0); + + const filter: CuckooFilter = .{ .buckets = store.filter_buckets.items }; + + if (store.has_filter) { + for (0..query.len - 2) |index| { + const trigram = query[index..][0..3].*; + if (!filter.contains(trigram)) { + return; + } + } + } + + const first = (store.trigram_to_declarations.get(query[0..3].*) orelse return).items; + + try declaration_buffer.resize(allocator, first.len * 2); + + var len = first.len; + @memcpy(declaration_buffer.items[0..len], first); + + for (0..query.len - 2) |index| { + const trigram = query[index..][0..3].*; + const old_len = len; + len = mergeIntersection( + (store.trigram_to_declarations.get(trigram[0..3].*) orelse { + declaration_buffer.clearRetainingCapacity(); + return; + }).items, + declaration_buffer.items[0..len], + declaration_buffer.items[len..], + ); + @memcpy(declaration_buffer.items[0..len], declaration_buffer.items[old_len..][0..len]); + declaration_buffer.shrinkRetainingCapacity(len * 2); + } + + declaration_buffer.shrinkRetainingCapacity(declaration_buffer.items.len / 2); +} + +/// Asserts `@min(a.len, b.len) <= out.len`. +fn mergeIntersection( + a: []const Declaration.Index, + b: []const Declaration.Index, + out: []Declaration.Index, +) u32 { + assert(@min(a.len, b.len) <= out.len); + + var out_idx: u32 = 0; + + var a_idx: u32 = 0; + var b_idx: u32 = 0; + + while (a_idx < a.len and b_idx < b.len) { + const a_val = a[a_idx]; + const b_val = b[b_idx]; + + if (a_val == b_val) { + out[out_idx] = a_val; + out_idx += 1; + a_idx += 1; + b_idx += 1; + } else if (@intFromEnum(a_val) < @intFromEnum(b_val)) { + a_idx += 1; + } else { + b_idx += 1; + } + } + + return out_idx; +} + +fn parity(integer: anytype) enum(u1) { even, odd } { + return @enumFromInt(integer & 1); +} + +pub const CuckooFilter = struct { + buckets: []Bucket, + + pub const Fingerprint = enum(u8) { + none = std.math.maxInt(u8), + _, + + const precomputed_odd_hashes = blk: { + var table: [255]u32 = undefined; + + for (&table, 0..) |*h, index| { + h.* = @truncate(std.hash.Murmur2_64.hash(&.{index}) | 1); + } + + break :blk table; + }; + + pub fn oddHash(fingerprint: Fingerprint) u32 { + assert(fingerprint != .none); + return precomputed_odd_hashes[@intFromEnum(fingerprint)]; + } + }; + + pub const Bucket = [4]Fingerprint; + pub const BucketIndex = enum(u32) { + _, + + pub fn alternate(index: BucketIndex, fingerprint: Fingerprint, len: u32) BucketIndex { + assert(@intFromEnum(index) < len); + assert(fingerprint != .none); + + const signed_index: i64 = @intFromEnum(index); + const odd_hash: i64 = fingerprint.oddHash(); + + const unbounded = switch (parity(signed_index)) { + .even => signed_index + odd_hash, + .odd => signed_index - odd_hash, + }; + const bounded: u32 = @intCast(@mod(unbounded, len)); + + assert(parity(signed_index) != parity(bounded)); + + return @enumFromInt(bounded); + } + }; + + pub const Triplet = struct { + fingerprint: Fingerprint, + index_1: BucketIndex, + index_2: BucketIndex, + + pub fn initFromTrigram(trigram: Trigram, len: u32) Triplet { + const split: packed struct { + fingerprint: Fingerprint, + padding: u24, + index_1: u32, + } = @bitCast(std.hash.Murmur2_64.hash(&trigram)); + + const index_1: BucketIndex = @enumFromInt(split.index_1 % len); + + const fingerprint: Fingerprint = if (split.fingerprint == .none) + @enumFromInt(1) + else + split.fingerprint; + + const triplet: Triplet = .{ + .fingerprint = fingerprint, + .index_1 = index_1, + .index_2 = index_1.alternate(fingerprint, len), + }; + assert(triplet.index_2.alternate(fingerprint, len) == index_1); + + return triplet; + } + }; + + pub fn init(buckets: []Bucket) CuckooFilter { + assert(parity(buckets.len) == .even); + return .{ .buckets = buckets }; + } + + pub fn reset(filter: CuckooFilter) void { + @memset(filter.buckets, [1]Fingerprint{.none} ** @typeInfo(Bucket).array.len); + } + + pub fn capacityForCount(count: u32) error{Overflow}!u32 { + return count + (count & 1); + } + + pub fn append(filter: CuckooFilter, random: std.Random, trigram: Trigram) error{EvictionFailed}!void { + const triplet: Triplet = .initFromTrigram(trigram, @intCast(filter.buckets.len)); + + if (filter.appendToBucket(triplet.index_1, triplet.fingerprint) or + filter.appendToBucket(triplet.index_2, triplet.fingerprint)) + { + return; + } + + var fingerprint = triplet.fingerprint; + var index = if (random.boolean()) triplet.index_1 else triplet.index_2; + for (0..500) |_| { + fingerprint = filter.swapFromBucket(random, index, fingerprint); + index = index.alternate(fingerprint, @intCast(filter.buckets.len)); + + if (filter.appendToBucket(index, fingerprint)) { + return; + } + } + + return error.EvictionFailed; + } + + fn bucketAt(filter: CuckooFilter, index: BucketIndex) *Bucket { + return &filter.buckets[@intFromEnum(index)]; + } + + fn appendToBucket(filter: CuckooFilter, index: BucketIndex, fingerprint: Fingerprint) bool { + assert(fingerprint != .none); + + const bucket = filter.bucketAt(index); + for (bucket) |*slot| { + if (slot.* == .none) { + slot.* = fingerprint; + return true; + } + } + + return false; + } + + fn swapFromBucket( + filter: CuckooFilter, + random: std.Random, + index: BucketIndex, + fingerprint: Fingerprint, + ) Fingerprint { + assert(fingerprint != .none); + + comptime assert(@typeInfo(Bucket).array.len == 4); + const target = &filter.bucketAt(index)[random.int(u2)]; + + const old_fingerprint = target.*; + assert(old_fingerprint != .none); + + target.* = fingerprint; + + return old_fingerprint; + } + + pub fn contains(filter: CuckooFilter, trigram: Trigram) bool { + const triplet: Triplet = .initFromTrigram(trigram, @intCast(filter.buckets.len)); + + return filter.containsInBucket(triplet.index_1, triplet.fingerprint) or + filter.containsInBucket(triplet.index_2, triplet.fingerprint); + } + + fn containsInBucket(filter: CuckooFilter, index: BucketIndex, fingerprint: Fingerprint) bool { + assert(fingerprint != .none); + + const bucket = filter.bucketAt(index); + for (bucket) |*slot| { + if (slot.* == fingerprint) { + return true; + } + } + + return false; + } +}; + +// TODO: More extensive (different capacities) testing. +test CuckooFilter { + const allocator = std.testing.allocator; + + const element_count = 499; + const filter_size = comptime CuckooFilter.capacityForCount(element_count) catch unreachable; + + var entries: std.AutoArrayHashMapUnmanaged(Trigram, void) = .empty; + defer entries.deinit(allocator); + try entries.ensureTotalCapacity(allocator, element_count); + + var buckets: [filter_size]CuckooFilter.Bucket = undefined; + var filter: CuckooFilter = .init(&buckets); + var filter_prng: std.Random.DefaultPrng = .init(42); + + for (0..2_500) |gen_prng_seed| { + entries.clearRetainingCapacity(); + filter.reset(); + + var gen_prng: std.Random.DefaultPrng = .init(gen_prng_seed); + for (0..element_count) |_| { + const trigram: Trigram = @bitCast(gen_prng.random().int(u24)); + entries.putAssumeCapacity(trigram, {}); + try filter.append(filter_prng.random(), trigram); + } + + // No false negatives + for (entries.keys()) |trigram| { + try std.testing.expect(filter.contains(trigram)); + } + + // Reasonable false positive rate + const fpr_count = 2_500; + var false_positives: usize = 0; + var negative_prng: std.Random.DefaultPrng = .init(~gen_prng_seed); + for (0..fpr_count) |_| { + var trigram: Trigram = @bitCast(negative_prng.random().int(u24)); + while (entries.contains(trigram)) { + trigram = @bitCast(negative_prng.random().int(u24)); + } + + false_positives += @intFromBool(filter.contains(trigram)); + } + + const fpr = @as(f32, @floatFromInt(false_positives)) / fpr_count; + + errdefer std.log.err("fpr: {d}%", .{fpr * 100}); + try std.testing.expect(fpr < 0.035); + } +} diff --git a/src/features/workspace_symbols.zig b/src/features/workspace_symbols.zig new file mode 100644 index 000000000..797ce2727 --- /dev/null +++ b/src/features/workspace_symbols.zig @@ -0,0 +1,95 @@ +//! Implementation of [`workspace/symbol`](https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#workspace_symbol) + +const std = @import("std"); + +const lsp = @import("lsp"); +const types = lsp.types; + +const DocumentStore = @import("../DocumentStore.zig"); +const offsets = @import("../offsets.zig"); +const Server = @import("../Server.zig"); +const TrigramStore = @import("../TrigramStore.zig"); +const URI = @import("../uri.zig"); + +pub fn handler(server: *Server, arena: std.mem.Allocator, request: types.WorkspaceSymbolParams) error{OutOfMemory}!lsp.ResultType("workspace/symbol") { + if (request.query.len < 3) return null; + + var workspace_paths: std.ArrayList([]const u8) = try .initCapacity(arena, server.workspaces.items.len); + for (server.workspaces.items) |workspace| { + const path = URI.toFsPath(arena, workspace.uri) catch |err| switch (err) { + error.OutOfMemory => return error.OutOfMemory, + error.UnsupportedScheme => continue, + else => continue, + }; + workspace_paths.appendAssumeCapacity(path); + } + + const handles = try server.document_store.loadTrigramStores(workspace_paths.items); + defer server.document_store.allocator.free(handles); + + var symbols: std.ArrayListUnmanaged(lsp.types.WorkspaceSymbol) = .empty; + var declaration_buffer: std.ArrayListUnmanaged(TrigramStore.Declaration.Index) = .empty; + + for (handles) |handle| { + const trigram_store = handle.getTrigramStoreCached(); + + declaration_buffer.clearRetainingCapacity(); + try trigram_store.declarationsForQuery(arena, request.query, &declaration_buffer); + + const SortContext = struct { + names: []const std.zig.Ast.TokenIndex, + fn lessThan(ctx: @This(), lhs: TrigramStore.Declaration.Index, rhs: TrigramStore.Declaration.Index) bool { + return ctx.names[@intFromEnum(lhs)] < ctx.names[@intFromEnum(rhs)]; + } + }; + + std.mem.sortUnstable( + TrigramStore.Declaration.Index, + declaration_buffer.items, + SortContext{ .names = trigram_store.declarations.items(.name) }, + SortContext.lessThan, + ); + + const slice = trigram_store.declarations.slice(); + const names = slice.items(.name); + const kinds = slice.items(.kind); + + var last_index: usize = 0; + var last_position: offsets.Position = .{ .line = 0, .character = 0 }; + + try symbols.ensureUnusedCapacity(arena, declaration_buffer.items.len); + for (declaration_buffer.items) |declaration| { + const name_token = names[@intFromEnum(declaration)]; + const kind = kinds[@intFromEnum(declaration)]; + + const loc = offsets.identifierTokenToNameLoc(handle.tree, name_token); + const name = offsets.identifierTokenToNameSlice(handle.tree, name_token); + + const start_position = offsets.advancePosition(handle.tree.source, last_position, last_index, loc.start, server.offset_encoding); + const end_position = offsets.advancePosition(handle.tree.source, start_position, loc.start, loc.end, server.offset_encoding); + last_index = loc.end; + last_position = end_position; + + symbols.appendAssumeCapacity(.{ + .name = name, + .kind = switch (kind) { + .variable => .Variable, + .constant => .Constant, + .function => .Function, + .test_function => .Method, // there is no SymbolKind that represents a tests, + }, + .location = .{ + .Location = .{ + .uri = handle.uri, + .range = .{ + .start = start_position, + .end = end_position, + }, + }, + }, + }); + } + } + + return .{ .array_of_WorkspaceSymbol = symbols.items }; +} diff --git a/src/zls.zig b/src/zls.zig index 4ecc96665..650ff66b8 100644 --- a/src/zls.zig +++ b/src/zls.zig @@ -18,6 +18,7 @@ pub const Server = @import("Server.zig"); pub const snippets = @import("snippets.zig"); pub const testing = @import("testing.zig"); pub const translate_c = @import("translate_c.zig"); +pub const TrigramStore = @import("TrigramStore.zig"); pub const URI = @import("uri.zig"); pub const code_actions = @import("features/code_actions.zig");