From 1c94ceedfa52cb2db09661f53f84bf88857c3a3f Mon Sep 17 00:00:00 2001 From: Sekky61 Date: Sat, 30 Nov 2024 09:59:40 +0100 Subject: [PATCH 01/10] String literal to multiline literal code action --- src/Server.zig | 1 + src/features/code_actions.zig | 63 +++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/Server.zig b/src/Server.zig index cd50bdf7c..1056df3cf 100644 --- a/src/Server.zig +++ b/src/Server.zig @@ -1660,6 +1660,7 @@ fn codeActionHandler(server: *Server, arena: std.mem.Allocator, request: types.C var actions: std.ArrayListUnmanaged(types.CodeAction) = .{}; try builder.generateCodeAction(error_bundle, &actions); + try builder.generateCodeActionsInRange(request.range, &actions); const Result = lsp.types.getRequestMetadata("textDocument/codeAction").?.Result; const result = try arena.alloc(std.meta.Child(std.meta.Child(Result)), actions.items.len); diff --git a/src/features/code_actions.zig b/src/features/code_actions.zig index 890d43102..f16517f63 100644 --- a/src/features/code_actions.zig +++ b/src/features/code_actions.zig @@ -75,6 +75,29 @@ pub const Builder = struct { return only_kinds.contains(kind); } + pub fn generateCodeActionsInRange( + builder: *Builder, + range: types.Range, + actions: *std.ArrayListUnmanaged(types.CodeAction), + ) error{OutOfMemory}!void { + const tree = builder.handle.tree; + const source_index = offsets.positionToIndex(tree.source, range.start, builder.offset_encoding); + + const token_idx = offsets.sourceIndexToTokenIndex(tree, source_index); + const token_tags = tree.tokens.items(.tag); + const position_token = token_tags[token_idx]; + + const ctx = try Analyser.getPositionContext(builder.arena, builder.handle.tree, source_index, true); + + switch (ctx) { + .string_literal => switch (position_token) { + .string_literal => try generateStringLiteralCodeActions(builder, token_idx, actions), + else => {}, + }, + else => {}, + } + } + pub fn createTextEditLoc(self: *Builder, loc: offsets.Loc, new_text: []const u8) types.TextEdit { const range = offsets.locToRange(self.handle.tree.source, loc, self.offset_encoding); return types.TextEdit{ .range = range, .newText = new_text }; @@ -93,6 +116,46 @@ pub const Builder = struct { } }; +pub fn generateStringLiteralCodeActions( + builder: *Builder, + token: Ast.TokenIndex, + actions: *std.ArrayListUnmanaged(types.CodeAction), +) !void { + const tags = builder.handle.tree.tokens.items(.tag); + switch (tags[token -| 1]) { + // Not covered by position context + .keyword_test, .keyword_extern => return, + else => {}, + } + + const token_text = offsets.tokenToSlice(builder.handle.tree, token); // Includes quotes + const parsed = std.zig.string_literal.parseAlloc(builder.arena, token_text) catch |err| switch (err) { + error.InvalidLiteral => return, + else => |other| return other, + }; + // Check for disallowed characters + for (parsed) |c| { + switch (c) { + 0x01...0x09, 0x0b...0x0c, 0x0e...0x1f, 0x7f => return, + else => {}, + } + } + const with_slashes = try std.mem.replaceOwned(u8, builder.arena, parsed, "\n", "\n \\\\"); // Hardcoded 4 spaces + + var result = try std.ArrayListUnmanaged(u8).initCapacity(builder.arena, with_slashes.len + 3); + result.appendSliceAssumeCapacity("\\\\"); + result.appendSliceAssumeCapacity(with_slashes); + result.appendAssumeCapacity('\n'); + + const loc = offsets.tokenToLoc(builder.handle.tree, token); + try actions.append(builder.arena, .{ + .title = "convert to a multiline string literal", + .kind = .{ .custom_value = "refactor.convertStringLiteral" }, + .isPreferred = false, + .edit = try builder.createWorkspaceEdit(&.{builder.createTextEditLoc(loc, result.items)}), + }); +} + /// To report server capabilities pub const supported_code_actions: []const types.CodeActionKind = &.{ .quickfix, From 413785951b0f2fdd978ff8d4efc2a9d90590f3f8 Mon Sep 17 00:00:00 2001 From: Sekky61 Date: Sat, 30 Nov 2024 10:00:06 +0100 Subject: [PATCH 02/10] Multiline string literal to string literal code action --- src/features/code_actions.zig | 66 +++++++++++++++++++++++++++++++++-- 1 file changed, 63 insertions(+), 3 deletions(-) diff --git a/src/features/code_actions.zig b/src/features/code_actions.zig index f16517f63..10877a11b 100644 --- a/src/features/code_actions.zig +++ b/src/features/code_actions.zig @@ -2,6 +2,7 @@ const std = @import("std"); const Ast = std.zig.Ast; +const Token = std.zig.Token; const DocumentStore = @import("../DocumentStore.zig"); const DocumentScope = @import("../DocumentScope.zig"); @@ -91,6 +92,7 @@ pub const Builder = struct { switch (ctx) { .string_literal => switch (position_token) { + .multiline_string_literal_line => try generateMultilineStringCodeActions(builder, token_idx, actions), .string_literal => try generateStringLiteralCodeActions(builder, token_idx, actions), else => {}, }, @@ -156,6 +158,64 @@ pub fn generateStringLiteralCodeActions( }); } +pub fn generateMultilineStringCodeActions( + builder: *Builder, + token: Ast.TokenIndex, + actions: *std.ArrayListUnmanaged(types.CodeAction), +) !void { + const token_tags = builder.handle.tree.tokens.items(.tag); + std.debug.assert(.multiline_string_literal_line == token_tags[token]); + // Collect (exclusive) token range of the literal (one token per literal line) + const start = if (std.mem.lastIndexOfNone(Token.Tag, token_tags[0..(token + 1)], &.{.multiline_string_literal_line})) |i| i + 1 else 0; + const end = std.mem.indexOfNonePos(Token.Tag, token_tags, token, &.{.multiline_string_literal_line}) orelse token_tags.len; + + // collect the text in the literal + var str_escaped = std.ArrayListUnmanaged(u8){}; + try str_escaped.append(builder.arena, '"'); + for (start..end) |i| { + std.debug.assert(token_tags[i] == .multiline_string_literal_line); + const string_part = offsets.tokenToSlice(builder.handle.tree, @intCast(i)); + // Iterate without the leading \\ + for (string_part[2..]) |c| { + const chunk = switch (c) { + '\\' => "\\\\", + '"' => "\\\"", + '\n' => "\\n", + 0x01...0x09, 0x0b...0x0c, 0x0e...0x1f, 0x7f => unreachable, + else => &.{c}, + }; + try str_escaped.appendSlice(builder.arena, chunk); + } + if (i != end - 1) { + try str_escaped.appendSlice(builder.arena, "\\n"); + } + } + try str_escaped.append(builder.arena, '"'); + + // Get Loc of the whole literal to delete it + // Multiline string literal ends before the \n or \r, but it must be deleted too + const first_token_start = builder.handle.tree.tokens.items(.start)[start]; + const last_token_end = blk: { + var i = offsets.tokenToLoc(builder.handle.tree, @intCast(end - 1)).end + 1; + const source = builder.handle.tree.source; + while (i < source.len) : (i += 1) { + switch (source[i]) { + '\n', '\r' => {}, + else => break, + } + } + break :blk i; + }; + const remove_loc = offsets.Loc{ .start = first_token_start, .end = last_token_end }; + + try actions.append(builder.arena, .{ + .title = "convert to a string literal", + .kind = .{ .custom_value = "refactor.convertStringLiteral" }, + .isPreferred = false, + .edit = try builder.createWorkspaceEdit(&.{builder.createTextEditLoc(remove_loc, str_escaped.items)}), + }); +} + /// To report server capabilities pub const supported_code_actions: []const types.CodeActionKind = &.{ .quickfix, @@ -183,7 +243,7 @@ pub fn collectAutoDiscardDiagnostics( var i: usize = 0; while (i < tree.tokens.len) { const first_token: Ast.TokenIndex = @intCast(std.mem.indexOfPos( - std.zig.Token.Tag, + Token.Tag, token_tags, i, &.{ .identifier, .equal, .identifier, .semicolon }, @@ -397,7 +457,7 @@ fn handleUnusedCapture( const identifier_name = offsets.locToSlice(source, loc); - const capture_end: Ast.TokenIndex = @intCast(std.mem.indexOfScalarPos(std.zig.Token.Tag, token_tags, identifier_token, .pipe) orelse return); + const capture_end: Ast.TokenIndex = @intCast(std.mem.indexOfScalarPos(Token.Tag, token_tags, identifier_token, .pipe) orelse return); var lbrace_token = capture_end + 1; @@ -527,7 +587,7 @@ fn handleUnorganizedImport(builder: *Builder, actions: *std.ArrayListUnmanaged(t try writer.writeByte('\n'); const tokens = tree.tokens.items(.tag); - const first_token = std.mem.indexOfNone(std.zig.Token.Tag, tokens, &.{.container_doc_comment}) orelse tokens.len; + const first_token = std.mem.indexOfNone(Token.Tag, tokens, &.{.container_doc_comment}) orelse tokens.len; const insert_pos = offsets.tokenToPosition(tree, @intCast(first_token), builder.offset_encoding); try edits.append(builder.arena, .{ From 214fdb0daf35c93e35a60ec5c2ffbc0759655d99 Mon Sep 17 00:00:00 2001 From: Sekky61 Date: Sat, 30 Nov 2024 10:00:27 +0100 Subject: [PATCH 03/10] Tests of string literal code action --- tests/lsp_features/code_actions.zig | 181 +++++++++++++++++++++++++++- 1 file changed, 175 insertions(+), 6 deletions(-) diff --git a/tests/lsp_features/code_actions.zig b/tests/lsp_features/code_actions.zig index 84ccd1178..428e1cc9b 100644 --- a/tests/lsp_features/code_actions.zig +++ b/tests/lsp_features/code_actions.zig @@ -2,6 +2,7 @@ const std = @import("std"); const zls = @import("zls"); const Context = @import("../context.zig").Context; +const helper = @import("../helper.zig"); const types = zls.types; const offsets = zls.offsets; @@ -643,6 +644,157 @@ test "organize imports - edge cases" { ); } +test "convert multiline string literal" { + try testConvertString( + \\const foo = \\Hello + \\ \\World + \\; + , + \\const foo = "Hello\nWorld"; + ); + // Empty + try testConvertString( + \\const foo = \\ + \\; + , + \\const foo = ""; + ); + // Multi-byte characters + try testConvertString( + \\const foo = \\He😂llo + \\ \\Wo🤓rld + \\; + , + \\const foo = "He😂llo\nWo🤓rld"; + ); + // Quotes + try testConvertString( + \\const foo = \\The "cure" + \\; + , + \\const foo = "The \"cure\""; + ); + try testConvertString( + \\const foo = \\\x49 \u{0033} + \\ \\\n' + \\ \\ + \\; + , + \\const foo = "\\x49 \\u{0033}\n\\n'\n"; + ); + // The control characters TAB and CR are rejected by the grammar inside multi-line string literals, + // except if CR is directly before NL. + try testConvertString( // (force format) + "const foo = \\\\Hello\r\n;", + \\const foo = "Hello"; + ); +} + +test "convert string literal to multiline" { + try testConvertString( + \\const foo = "Hello\nWorld"; + , + \\const foo = \\Hello + \\ \\World + \\; + ); + // Empty + try testConvertString( + \\const foo = ""; + , + \\const foo = \\ + \\; + ); + // In function + try testConvertString( + \\const x = foo("bar\nbaz"); + , + \\const x = foo(\\bar + \\ \\baz + \\); + ); +} + +test "convert string literal to multiline - escapes" { + // Hex escapes + try testConvertString( + \\const foo = "\x41\x42\x43"; + , + \\const foo = \\ABC + \\; + ); + // Hex escapes that form a unicode character in utf-8 + try testConvertString( + \\const foo = "\xE2\x9C\x85"; + , + \\const foo = \\✅ + \\; + ); + // Newlines + try testConvertString( + \\const foo = "\nhello\n\n"; + , + \\const foo = \\ + \\ \\hello + \\ \\ + \\ \\ + \\; + ); + // Quotes and slashes + try testConvertString( + \\const foo = "A slash: \'\\\'"; + , + \\const foo = \\A slash: '\' + \\; + ); + // Unicode + try testConvertString( + \\const foo = "Smile: \u{1F913}"; + , + \\const foo = \\Smile: 🤓 + \\; + ); +} + +test "convert string literal to multiline - invalid" { + // Invalid unicode + try testConvertString( + \\const foo = "Smile: \u{1F9131}"; + , + \\const foo = "Smile: \u{1F9131}"; + ); + // Hex escaped unprintable character + try testConvertString( + \\const foo = "\x7f"; + , + \\const foo = "\x7f"; + ); + // Tabs are invalid too + try testConvertString( + \\const foo = "\tWe use tabs"; + , + \\const foo = "\tWe use tabs"; + ); + // Not in @import + try testConvertString( + \\const std = @import("std"); + , + \\const std = @import("std"); + ); + // Not in test + try testConvertString( + \\test "addition" { } + , + \\test "addition" { } + ); + // Not in extern + try testConvertString( + \\pub extern "c" fn printf(format: [*:0]const u8) c_int; + , + \\pub extern "c" fn printf(format: [*:0]const u8) c_int; + ); +} + fn testAutofix(before: []const u8, after: []const u8) !void { try testDiagnostic(before, after, .{ .filter_kind = .@"source.fixAll", .want_zir = true }); // diagnostics come from our AstGen fork try testDiagnostic(before, after, .{ .filter_kind = .@"source.fixAll", .want_zir = false }); // diagnostics come from calling zig ast-check @@ -652,6 +804,10 @@ fn testOrganizeImports(before: []const u8, after: []const u8) !void { try testDiagnostic(before, after, .{ .filter_kind = .@"source.organizeImports" }); } +fn testConvertString(before: []const u8, after: []const u8) !void { + try testDiagnostic(before, after, .{ .filter_kind = types.CodeActionKind{ .custom_value = "refactor.convertStringLiteral" } }); +} + fn testDiagnostic( before: []const u8, after: []const u8, @@ -665,7 +821,23 @@ fn testDiagnostic( defer ctx.deinit(); ctx.server.config.prefer_ast_check_as_child_process = !options.want_zir; - const uri = try ctx.addDocument(.{ .source = before }); + const placeholders = try helper.collectPlaceholderLocs(allocator, before); + defer allocator.free(placeholders); + const range: types.Range = switch (placeholders.len) { + 0 => .{ + .start = .{ .line = 0, .character = 0 }, + .end = offsets.indexToPosition(before, before.len, ctx.server.offset_encoding), + }, + 1 => blk: { + const point = offsets.indexToPosition(before, placeholders[0].start, ctx.server.offset_encoding); + break :blk .{ .start = point, .end = point }; + }, + else => unreachable, + }; + const source = try helper.clearPlaceholders(allocator, before); + defer allocator.free(source); + + const uri = try ctx.addDocument(.{ .source = source }); const handle = ctx.server.document_store.getHandle(uri).?; var error_bundle = try zls.diagnostics.getAstCheckDiagnostics(ctx.server, handle); @@ -681,10 +853,7 @@ fn testDiagnostic( const params: types.CodeActionParams = .{ .textDocument = .{ .uri = uri }, - .range = .{ - .start = .{ .line = 0, .character = 0 }, - .end = offsets.indexToPosition(before, before.len, ctx.server.offset_encoding), - }, + .range = range, .context = .{ .diagnostics = diagnostics, .only = if (options.filter_kind) |kind| &.{kind} else null, @@ -719,7 +888,7 @@ fn testDiagnostic( try text_edits.appendSlice(allocator, changes.get(uri).?); } - const actual = try zls.diff.applyTextEdits(allocator, before, text_edits.items, ctx.server.offset_encoding); + const actual = try zls.diff.applyTextEdits(allocator, source, text_edits.items, ctx.server.offset_encoding); defer allocator.free(actual); try ctx.server.document_store.refreshDocument(uri, try allocator.dupeZ(u8, actual)); From c129bd341f7babe5e51676ea2a4e733700731894 Mon Sep 17 00:00:00 2001 From: Sekky61 Date: Mon, 2 Dec 2024 21:41:11 +0100 Subject: [PATCH 04/10] Position context of strings expanded to the markup See the tests for changes. Tested on VS Code and Neovim. There are two tests that I would like to see passing, but I failed to make them --- src/analysis.zig | 52 +++++++++++++++--------- src/features/completions.zig | 2 +- tests/utility/position_context.zig | 65 ++++++++++++++++++------------ 3 files changed, 73 insertions(+), 46 deletions(-) diff --git a/src/analysis.zig b/src/analysis.zig index 889468283..f18822c49 100644 --- a/src/analysis.zig +++ b/src/analysis.zig @@ -3404,6 +3404,33 @@ pub const PositionContext = union(enum) { => return null, }; } + + /// Expects that `self` is one of the following: + /// - `.import_string_literal` + /// - `.cinclude_string_literal` + /// - `.embedfile_string_literal` + /// - `.string_literal` + pub fn content_loc(self: PositionContext, source: []const u8) ?offsets.Loc { + var location = switch (self) { + .import_string_literal, + .cinclude_string_literal, + .embedfile_string_literal, + .string_literal, + => |l| l, + else => return null, + }; + + const string_literal_slice = offsets.locToSlice(source, location); + if (std.mem.startsWith(u8, string_literal_slice, "\"")) { + location.start += 1; + if (std.mem.endsWith(u8, string_literal_slice[1..], "\"")) { + location.end -= 1; + } + } else if (std.mem.startsWith(u8, string_literal_slice, "\\")) { + location.start += 2; + } + return location; + } }; const StackState = struct { @@ -3488,10 +3515,7 @@ pub fn getPositionContext( // `tok` is the latter of the two. if (!should_do_lookahead) break; switch (tok.tag) { - .identifier, - .builtin, - .number_literal, - => should_do_lookahead = false, + .identifier, .builtin, .number_literal, .string_literal, .multiline_string_literal_line => should_do_lookahead = false, else => break, } } @@ -3518,19 +3542,11 @@ pub fn getPositionContext( var curr_ctx = try peek(allocator, &stack); switch (tok.tag) { .string_literal, .multiline_string_literal_line => string_lit_block: { - const string_literal_slice = offsets.locToSlice(tree.source, tok.loc); - var string_literal_loc = tok.loc; - - if (std.mem.startsWith(u8, string_literal_slice, "\"")) { - string_literal_loc.start += 1; - if (std.mem.endsWith(u8, string_literal_slice[1..], "\"")) { - string_literal_loc.end -= 1; - } - } else if (std.mem.startsWith(u8, string_literal_slice, "\\")) { - string_literal_loc.start += 2; - } + const string_literal_loc = tok.loc; - if (!(string_literal_loc.start <= source_index and source_index <= string_literal_loc.end)) break :string_lit_block; + if (string_literal_loc.start > source_index or source_index > string_literal_loc.end) break :string_lit_block; + if (tok.tag != .multiline_string_literal_line and lookahead and source_index == string_literal_loc.end) break :string_lit_block; + curr_ctx.ctx = .{ .string_literal = string_literal_loc }; if (curr_ctx.stack_id == .Paren and stack.items.len >= 2) { const perhaps_builtin = stack.items[stack.items.len - 2]; @@ -3540,19 +3556,15 @@ pub fn getPositionContext( const builtin_name = tree.source[loc.start..loc.end]; if (std.mem.eql(u8, builtin_name, "@import")) { curr_ctx.ctx = .{ .import_string_literal = string_literal_loc }; - break :string_lit_block; } else if (std.mem.eql(u8, builtin_name, "@cInclude")) { curr_ctx.ctx = .{ .cinclude_string_literal = string_literal_loc }; - break :string_lit_block; } else if (std.mem.eql(u8, builtin_name, "@embedFile")) { curr_ctx.ctx = .{ .embedfile_string_literal = string_literal_loc }; - break :string_lit_block; } }, else => {}, } } - curr_ctx.ctx = .{ .string_literal = string_literal_loc }; }, .identifier => switch (curr_ctx.ctx) { .enum_literal => curr_ctx.ctx = .{ .enum_literal = tokenLocAppend(curr_ctx.ctx.loc().?, tok) }, diff --git a/src/features/completions.zig b/src/features/completions.zig index a68e40271..dd7cb7fc0 100644 --- a/src/features/completions.zig +++ b/src/features/completions.zig @@ -697,7 +697,7 @@ fn completeFileSystemStringLiteral(builder: *Builder, pos_context: Analyser.Posi const store = &builder.server.document_store; const source = builder.orig_handle.tree.source; - var string_content_loc = pos_context.loc().?; + var string_content_loc = pos_context.content_loc(source).?; // the position context is without lookahead so we have to do it ourself while (string_content_loc.end < source.len) : (string_content_loc.end += 1) { diff --git a/tests/utility/position_context.zig b/tests/utility/position_context.zig index 61b59c8b9..62fdb2c23 100644 --- a/tests/utility/position_context.zig +++ b/tests/utility/position_context.zig @@ -281,76 +281,91 @@ test "comment" { test "import/embedfile string literal" { try testContext( - \\const std = @import("st"); + \\const std = @import("st"); , .import_string_literal, .{ .lookahead = false }); try testContext( - \\const std = @import("st"); + \\const std = @import("st"); , .import_string_literal, .{ .lookahead = false }); try testContext( - \\const std = @embedFile("file."); + \\const std = @embedFile("file."); , .embedfile_string_literal, .{ .lookahead = false }); try testContext( - \\const std = @embedFile("file."); + \\const std = @embedFile("file."); , .embedfile_string_literal, .{ .lookahead = false }); try testContext( \\const std = @import("std"); - , .empty, .{}); + , .empty, .{ .lookahead = true }); + // TODO + // try testContext( + // \\const std = @import("std"); + // , .empty, .{ .lookahead = false }); try testContext( - \\const std = @import("std"); - , .empty, .{}); + \\const std = @import("std"); + , .import_string_literal, .{ .lookahead = true }); } test "string literal" { try testContext( - \\var foo = "hello world!"; - , .empty, .{}); + \\var foo = "hello world!"; + , .string_literal, .{ .lookahead = true }); + try testContext( + \\var foo = "hello world!"; + , .string_literal, .{ .lookahead = true }); + try testContext( + \\var foo = "hello world!"; + , .string_literal, .{ .lookahead = false }); + // TODO + // try testContext( + // \\var foo = "hello world!"; + // , .empty, .{ .lookahead = false }); try testContext( \\var foo = "hello world!"; - , .empty, .{}); + , .empty, .{ .lookahead = true }); try testContext( - \\var foo = ""; - , .string_literal, .{}); + \\var foo = ""; + , .string_literal, .{ .lookahead = false }); + try testContext( + \\var foo = ""; + , .string_literal, .{ .lookahead = true }); // TODO // try testContext( // \\var foo = "\""; // , .string_literal, .{}); try testContext( - \\var foo = "hello world!"; + \\var foo = "hello world!"; , .string_literal, .{ .lookahead = false }); try testContext( - \\var foo = "hello world!"; + \\var foo = "hello world!"; , .string_literal, .{ .lookahead = true }); - - // TODO - // try testContext( - // \\var foo = "hello world!"; - // , .empty, .{}); } test "multi-line string literal" { try testContext( \\var foo = \\hello; - , .empty, .{}); + , .empty, .{ .lookahead = false }); + try testContext( + \\var foo = \\hello; + , .string_literal, .{ .lookahead = true }); try testContext( - \\var foo = \\ + \\var foo = \\ , .string_literal, .{}); try testContext( - \\var foo = \\\" + \\var foo = \\\" , .string_literal, .{}); try testContext( - \\var foo = \\hello world! + \\var foo = \\hello world! , .string_literal, .{ .lookahead = false }); try testContext( - \\var foo = \\hello world! + \\var foo = \\hello world! , .string_literal, .{ .lookahead = true }); try testContext( - \\var foo = \\hello; + \\var foo = \\hello; , .string_literal, .{}); } From 6ac39a15410ee42e4a0bc497354a7a8bc35504e3 Mon Sep 17 00:00:00 2001 From: Sekky61 Date: Mon, 2 Dec 2024 21:50:33 +0100 Subject: [PATCH 05/10] Check for validity of resulting utf-8 string --- src/features/code_actions.zig | 9 ++++----- tests/lsp_features/code_actions.zig | 6 ++++++ 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/src/features/code_actions.zig b/src/features/code_actions.zig index 10877a11b..6b85445bd 100644 --- a/src/features/code_actions.zig +++ b/src/features/code_actions.zig @@ -135,13 +135,12 @@ pub fn generateStringLiteralCodeActions( error.InvalidLiteral => return, else => |other| return other, }; - // Check for disallowed characters + // Check for disallowed characters and utf-8 validity for (parsed) |c| { - switch (c) { - 0x01...0x09, 0x0b...0x0c, 0x0e...0x1f, 0x7f => return, - else => {}, - } + if (c == '\n' or c == '\r') continue; + if (std.ascii.isControl(c)) return; } + if (!std.unicode.utf8ValidateSlice(parsed)) return; const with_slashes = try std.mem.replaceOwned(u8, builder.arena, parsed, "\n", "\n \\\\"); // Hardcoded 4 spaces var result = try std.ArrayListUnmanaged(u8).initCapacity(builder.arena, with_slashes.len + 3); diff --git a/tests/lsp_features/code_actions.zig b/tests/lsp_features/code_actions.zig index 428e1cc9b..e7958aaeb 100644 --- a/tests/lsp_features/code_actions.zig +++ b/tests/lsp_features/code_actions.zig @@ -763,6 +763,12 @@ test "convert string literal to multiline - invalid" { , \\const foo = "Smile: \u{1F9131}"; ); + // Invalid utf-8 + try testConvertString( + \\const foo = "\xaa"; + , + \\const foo = "\xaa"; + ); // Hex escaped unprintable character try testConvertString( \\const foo = "\x7f"; From 95ecd80b64045c0be01060792dd7ebe91e42fa62 Mon Sep 17 00:00:00 2001 From: Sekky61 Date: Mon, 2 Dec 2024 22:04:45 +0100 Subject: [PATCH 06/10] Small refactorings in string conversion code action --- src/features/code_actions.zig | 37 ++++++++++++++++------------- tests/lsp_features/code_actions.zig | 9 +++---- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/src/features/code_actions.zig b/src/features/code_actions.zig index 6b85445bd..a0aad2298 100644 --- a/src/features/code_actions.zig +++ b/src/features/code_actions.zig @@ -81,6 +81,9 @@ pub const Builder = struct { range: types.Range, actions: *std.ArrayListUnmanaged(types.CodeAction), ) error{OutOfMemory}!void { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + const tree = builder.handle.tree; const source_index = offsets.positionToIndex(tree.source, range.start, builder.offset_encoding); @@ -123,6 +126,9 @@ pub fn generateStringLiteralCodeActions( token: Ast.TokenIndex, actions: *std.ArrayListUnmanaged(types.CodeAction), ) !void { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + const tags = builder.handle.tree.tokens.items(.tag); switch (tags[token -| 1]) { // Not covered by position context @@ -162,6 +168,9 @@ pub fn generateMultilineStringCodeActions( token: Ast.TokenIndex, actions: *std.ArrayListUnmanaged(types.CodeAction), ) !void { + const tracy_zone = tracy.trace(@src()); + defer tracy_zone.end(); + const token_tags = builder.handle.tree.tokens.items(.tag); std.debug.assert(.multiline_string_literal_line == token_tags[token]); // Collect (exclusive) token range of the literal (one token per literal line) @@ -169,8 +178,9 @@ pub fn generateMultilineStringCodeActions( const end = std.mem.indexOfNonePos(Token.Tag, token_tags, token, &.{.multiline_string_literal_line}) orelse token_tags.len; // collect the text in the literal - var str_escaped = std.ArrayListUnmanaged(u8){}; - try str_escaped.append(builder.arena, '"'); + const loc = offsets.tokensToLoc(builder.handle.tree, @intCast(start), @intCast(end)); + var str_escaped = try std.ArrayListUnmanaged(u8).initCapacity(builder.arena, 2 * (loc.end - loc.start)); + str_escaped.appendAssumeCapacity('"'); for (start..end) |i| { std.debug.assert(token_tags[i] == .multiline_string_literal_line); const string_part = offsets.tokenToSlice(builder.handle.tree, @intCast(i)); @@ -183,28 +193,23 @@ pub fn generateMultilineStringCodeActions( 0x01...0x09, 0x0b...0x0c, 0x0e...0x1f, 0x7f => unreachable, else => &.{c}, }; - try str_escaped.appendSlice(builder.arena, chunk); + str_escaped.appendSliceAssumeCapacity(chunk); } if (i != end - 1) { - try str_escaped.appendSlice(builder.arena, "\\n"); + str_escaped.appendSliceAssumeCapacity("\\n"); } } - try str_escaped.append(builder.arena, '"'); + str_escaped.appendAssumeCapacity('"'); // Get Loc of the whole literal to delete it // Multiline string literal ends before the \n or \r, but it must be deleted too const first_token_start = builder.handle.tree.tokens.items(.start)[start]; - const last_token_end = blk: { - var i = offsets.tokenToLoc(builder.handle.tree, @intCast(end - 1)).end + 1; - const source = builder.handle.tree.source; - while (i < source.len) : (i += 1) { - switch (source[i]) { - '\n', '\r' => {}, - else => break, - } - } - break :blk i; - }; + const last_token_end = std.mem.indexOfNonePos( + u8, + builder.handle.tree.source, + offsets.tokenToLoc(builder.handle.tree, @intCast(end - 1)).end + 1, + "\n\r", + ) orelse builder.handle.tree.source.len; const remove_loc = offsets.Loc{ .start = first_token_start, .end = last_token_end }; try actions.append(builder.arena, .{ diff --git a/tests/lsp_features/code_actions.zig b/tests/lsp_features/code_actions.zig index e7958aaeb..9c261fd8b 100644 --- a/tests/lsp_features/code_actions.zig +++ b/tests/lsp_features/code_actions.zig @@ -827,8 +827,11 @@ fn testDiagnostic( defer ctx.deinit(); ctx.server.config.prefer_ast_check_as_child_process = !options.want_zir; - const placeholders = try helper.collectPlaceholderLocs(allocator, before); - defer allocator.free(placeholders); + var phr = try helper.collectClearPlaceholders(allocator, before); + defer phr.deinit(allocator); + const placeholders = phr.locations.items(.new); + const source = phr.new_source; + const range: types.Range = switch (placeholders.len) { 0 => .{ .start = .{ .line = 0, .character = 0 }, @@ -840,8 +843,6 @@ fn testDiagnostic( }, else => unreachable, }; - const source = try helper.clearPlaceholders(allocator, before); - defer allocator.free(source); const uri = try ctx.addDocument(.{ .source = source }); const handle = ctx.server.document_store.getHandle(uri).?; From f1a422f1cb26579758938a659734c89397b2925d Mon Sep 17 00:00:00 2001 From: Sekky61 Date: Mon, 2 Dec 2024 22:10:39 +0100 Subject: [PATCH 07/10] Rename code action from `"refactor.convertStringLiteral"` to `.refactor` --- src/features/code_actions.zig | 8 ++++++-- tests/lsp_features/code_actions.zig | 2 +- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/src/features/code_actions.zig b/src/features/code_actions.zig index a0aad2298..145949385 100644 --- a/src/features/code_actions.zig +++ b/src/features/code_actions.zig @@ -129,6 +129,8 @@ pub fn generateStringLiteralCodeActions( const tracy_zone = tracy.trace(@src()); defer tracy_zone.end(); + if (!builder.wantKind(.refactor)) return; + const tags = builder.handle.tree.tokens.items(.tag); switch (tags[token -| 1]) { // Not covered by position context @@ -157,7 +159,7 @@ pub fn generateStringLiteralCodeActions( const loc = offsets.tokenToLoc(builder.handle.tree, token); try actions.append(builder.arena, .{ .title = "convert to a multiline string literal", - .kind = .{ .custom_value = "refactor.convertStringLiteral" }, + .kind = .refactor, .isPreferred = false, .edit = try builder.createWorkspaceEdit(&.{builder.createTextEditLoc(loc, result.items)}), }); @@ -171,6 +173,8 @@ pub fn generateMultilineStringCodeActions( const tracy_zone = tracy.trace(@src()); defer tracy_zone.end(); + if (!builder.wantKind(.refactor)) return; + const token_tags = builder.handle.tree.tokens.items(.tag); std.debug.assert(.multiline_string_literal_line == token_tags[token]); // Collect (exclusive) token range of the literal (one token per literal line) @@ -214,7 +218,7 @@ pub fn generateMultilineStringCodeActions( try actions.append(builder.arena, .{ .title = "convert to a string literal", - .kind = .{ .custom_value = "refactor.convertStringLiteral" }, + .kind = .refactor, .isPreferred = false, .edit = try builder.createWorkspaceEdit(&.{builder.createTextEditLoc(remove_loc, str_escaped.items)}), }); diff --git a/tests/lsp_features/code_actions.zig b/tests/lsp_features/code_actions.zig index 9c261fd8b..be05b3c1b 100644 --- a/tests/lsp_features/code_actions.zig +++ b/tests/lsp_features/code_actions.zig @@ -811,7 +811,7 @@ fn testOrganizeImports(before: []const u8, after: []const u8) !void { } fn testConvertString(before: []const u8, after: []const u8) !void { - try testDiagnostic(before, after, .{ .filter_kind = types.CodeActionKind{ .custom_value = "refactor.convertStringLiteral" } }); + try testDiagnostic(before, after, .{ .filter_kind = types.CodeActionKind.refactor }); } fn testDiagnostic( From 08e46b46eb72b757b3135b4243f00491485f5d7f Mon Sep 17 00:00:00 2001 From: Techatrix Date: Tue, 3 Dec 2024 15:22:22 +0100 Subject: [PATCH 08/10] further adjust position context for string literals --- src/analysis.zig | 40 +++++++++++++++++++++--------- src/features/code_actions.zig | 24 ++++++++++-------- src/features/completions.zig | 15 +++++------ tests/utility/position_context.zig | 36 ++++++++++----------------- 4 files changed, 61 insertions(+), 54 deletions(-) diff --git a/src/analysis.zig b/src/analysis.zig index f18822c49..c083343be 100644 --- a/src/analysis.zig +++ b/src/analysis.zig @@ -3405,19 +3405,19 @@ pub const PositionContext = union(enum) { }; } - /// Expects that `self` is one of the following: + /// Asserts that `self` is one of the following: /// - `.import_string_literal` /// - `.cinclude_string_literal` /// - `.embedfile_string_literal` /// - `.string_literal` - pub fn content_loc(self: PositionContext, source: []const u8) ?offsets.Loc { + pub fn stringLiteralContentLoc(self: PositionContext, source: []const u8) offsets.Loc { var location = switch (self) { .import_string_literal, .cinclude_string_literal, .embedfile_string_literal, .string_literal, => |l| l, - else => return null, + else => unreachable, }; const string_literal_slice = offsets.locToSlice(source, location); @@ -3515,7 +3515,12 @@ pub fn getPositionContext( // `tok` is the latter of the two. if (!should_do_lookahead) break; switch (tok.tag) { - .identifier, .builtin, .number_literal, .string_literal, .multiline_string_literal_line => should_do_lookahead = false, + .identifier, + .builtin, + .number_literal, + .string_literal, + .multiline_string_literal_line, + => should_do_lookahead = false, else => break, } } @@ -3542,24 +3547,35 @@ pub fn getPositionContext( var curr_ctx = try peek(allocator, &stack); switch (tok.tag) { .string_literal, .multiline_string_literal_line => string_lit_block: { - const string_literal_loc = tok.loc; + curr_ctx.ctx = .{ .string_literal = tok.loc }; + if (tok.tag != .string_literal) break :string_lit_block; + + const string_literal_slice = offsets.locToSlice(tree.source, tok.loc); + var content_loc = tok.loc; + + if (std.mem.startsWith(u8, string_literal_slice, "\"")) { + content_loc.start += 1; + if (std.mem.endsWith(u8, string_literal_slice[1..], "\"")) { + content_loc.end -= 1; + } + } - if (string_literal_loc.start > source_index or source_index > string_literal_loc.end) break :string_lit_block; - if (tok.tag != .multiline_string_literal_line and lookahead and source_index == string_literal_loc.end) break :string_lit_block; - curr_ctx.ctx = .{ .string_literal = string_literal_loc }; + if (source_index < content_loc.start or content_loc.end < source_index) break :string_lit_block; - if (curr_ctx.stack_id == .Paren and stack.items.len >= 2) { + if (curr_ctx.stack_id == .Paren and + stack.items.len >= 2) + { const perhaps_builtin = stack.items[stack.items.len - 2]; switch (perhaps_builtin.ctx) { .builtin => |loc| { const builtin_name = tree.source[loc.start..loc.end]; if (std.mem.eql(u8, builtin_name, "@import")) { - curr_ctx.ctx = .{ .import_string_literal = string_literal_loc }; + curr_ctx.ctx = .{ .import_string_literal = tok.loc }; } else if (std.mem.eql(u8, builtin_name, "@cInclude")) { - curr_ctx.ctx = .{ .cinclude_string_literal = string_literal_loc }; + curr_ctx.ctx = .{ .cinclude_string_literal = tok.loc }; } else if (std.mem.eql(u8, builtin_name, "@embedFile")) { - curr_ctx.ctx = .{ .embedfile_string_literal = string_literal_loc }; + curr_ctx.ctx = .{ .embedfile_string_literal = tok.loc }; } }, else => {}, diff --git a/src/features/code_actions.zig b/src/features/code_actions.zig index 145949385..e0252724f 100644 --- a/src/features/code_actions.zig +++ b/src/features/code_actions.zig @@ -85,20 +85,24 @@ pub const Builder = struct { defer tracy_zone.end(); const tree = builder.handle.tree; - const source_index = offsets.positionToIndex(tree.source, range.start, builder.offset_encoding); - - const token_idx = offsets.sourceIndexToTokenIndex(tree, source_index); const token_tags = tree.tokens.items(.tag); - const position_token = token_tags[token_idx]; + + const source_index = offsets.positionToIndex(tree.source, range.start, builder.offset_encoding); const ctx = try Analyser.getPositionContext(builder.arena, builder.handle.tree, source_index, true); + if (ctx != .string_literal) return; - switch (ctx) { - .string_literal => switch (position_token) { - .multiline_string_literal_line => try generateMultilineStringCodeActions(builder, token_idx, actions), - .string_literal => try generateStringLiteralCodeActions(builder, token_idx, actions), - else => {}, - }, + var token_idx = offsets.sourceIndexToTokenIndex(tree, source_index); + + // if `offsets.sourceIndexToTokenIndex` is called with a source index between two tokens, it will be the token to the right. + switch (token_tags[token_idx]) { + .string_literal, .multiline_string_literal_line => {}, + else => token_idx -|= 1, + } + + switch (token_tags[token_idx]) { + .multiline_string_literal_line => try generateMultilineStringCodeActions(builder, token_idx, actions), + .string_literal => try generateStringLiteralCodeActions(builder, token_idx, actions), else => {}, } } diff --git a/src/features/completions.zig b/src/features/completions.zig index dd7cb7fc0..662a23d9d 100644 --- a/src/features/completions.zig +++ b/src/features/completions.zig @@ -687,7 +687,7 @@ fn completeDot(builder: *Builder, loc: offsets.Loc) error{OutOfMemory}!void { try globalSetCompletions(builder, .enum_set); } -/// Expects that `pos_context` is one of the following: +/// Asserts that `pos_context` is one of the following: /// - `.import_string_literal` /// - `.cinclude_string_literal` /// - `.embedfile_string_literal` @@ -697,17 +697,14 @@ fn completeFileSystemStringLiteral(builder: *Builder, pos_context: Analyser.Posi const store = &builder.server.document_store; const source = builder.orig_handle.tree.source; - var string_content_loc = pos_context.content_loc(source).?; + if (pos_context == .string_literal and !DocumentStore.isBuildFile(builder.orig_handle.uri)) return; + + var string_content_loc = pos_context.stringLiteralContentLoc(source); // the position context is without lookahead so we have to do it ourself - while (string_content_loc.end < source.len) : (string_content_loc.end += 1) { - switch (source[string_content_loc.end]) { - 0, '\n', '\r', '\"' => break, - else => continue, - } - } + string_content_loc.end = std.mem.indexOfAnyPos(u8, source, string_content_loc.end, &.{ 0, '\n', '\r', '\"' }) orelse source.len; - if (pos_context == .string_literal and !DocumentStore.isBuildFile(builder.orig_handle.uri)) return; + if (builder.source_index < string_content_loc.start or string_content_loc.end < builder.source_index) return; const previous_separator_index: ?usize = blk: { var index: usize = builder.source_index; diff --git a/tests/utility/position_context.zig b/tests/utility/position_context.zig index 62fdb2c23..cec07a679 100644 --- a/tests/utility/position_context.zig +++ b/tests/utility/position_context.zig @@ -294,34 +294,24 @@ test "import/embedfile string literal" { , .embedfile_string_literal, .{ .lookahead = false }); try testContext( - \\const std = @import("std"); - , .empty, .{ .lookahead = true }); - // TODO - // try testContext( - // \\const std = @import("std"); - // , .empty, .{ .lookahead = false }); + \\const std = @import("std"); + , .string_literal, .{}); try testContext( \\const std = @import("std"); - , .import_string_literal, .{ .lookahead = true }); + , .string_literal, .{ .lookahead = true }); } test "string literal" { try testContext( - \\var foo = "hello world!"; - , .string_literal, .{ .lookahead = true }); + \\var foo = "hello world!"; + , .empty, .{ .lookahead = false }); try testContext( - \\var foo = "hello world!"; + \\var foo = "hello world!"; , .string_literal, .{ .lookahead = true }); + try testContext( - \\var foo = "hello world!"; - , .string_literal, .{ .lookahead = false }); - // TODO - // try testContext( - // \\var foo = "hello world!"; - // , .empty, .{ .lookahead = false }); - try testContext( - \\var foo = "hello world!"; - , .empty, .{ .lookahead = true }); + \\var foo = "hello world!"; + , .string_literal, .{}); try testContext( \\var foo = ""; @@ -331,8 +321,8 @@ test "string literal" { , .string_literal, .{ .lookahead = true }); // TODO // try testContext( - // \\var foo = "\""; - // , .string_literal, .{}); + // \\var foo = "\""; + // , .string_literal, .{ .lookahead = true }); try testContext( \\var foo = "hello world!"; @@ -344,10 +334,10 @@ test "string literal" { test "multi-line string literal" { try testContext( - \\var foo = \\hello; + \\var foo = \\hello , .empty, .{ .lookahead = false }); try testContext( - \\var foo = \\hello; + \\var foo = \\hello , .string_literal, .{ .lookahead = true }); try testContext( From d31a391e6fad082e45a6c12849181670878925d7 Mon Sep 17 00:00:00 2001 From: Techatrix Date: Tue, 3 Dec 2024 15:22:47 +0100 Subject: [PATCH 09/10] add more test cases for string literal conversion code actions --- tests/lsp_features/code_actions.zig | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/tests/lsp_features/code_actions.zig b/tests/lsp_features/code_actions.zig index be05b3c1b..c78c3a7c2 100644 --- a/tests/lsp_features/code_actions.zig +++ b/tests/lsp_features/code_actions.zig @@ -715,6 +715,32 @@ test "convert string literal to multiline" { ); } +test "convert string literal to multiline - cursor outside of string literal" { + try testConvertString( + \\const foo = "hello"; + , + \\const foo = "hello"; + ); + try testConvertString( + \\const foo = "hello"; + , + \\const foo = \\hello + \\; + ); + try testConvertString( + \\const foo = "hello"; + , + \\const foo = \\hello + \\; + ); + // TODO + // try testConvertString( + // \\const foo = "hello" ; + // , + // \\const foo = "hello" ; + // ); +} + test "convert string literal to multiline - escapes" { // Hex escapes try testConvertString( From 9316eaafc3797e4a448e49bb261dca18453f0267 Mon Sep 17 00:00:00 2001 From: Techatrix Date: Tue, 3 Dec 2024 15:28:42 +0100 Subject: [PATCH 10/10] no string conversion code actions for strings with carriage returns --- src/features/code_actions.zig | 2 +- tests/lsp_features/code_actions.zig | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/features/code_actions.zig b/src/features/code_actions.zig index e0252724f..888695fa7 100644 --- a/src/features/code_actions.zig +++ b/src/features/code_actions.zig @@ -149,7 +149,7 @@ pub fn generateStringLiteralCodeActions( }; // Check for disallowed characters and utf-8 validity for (parsed) |c| { - if (c == '\n' or c == '\r') continue; + if (c == '\n') continue; if (std.ascii.isControl(c)) return; } if (!std.unicode.utf8ValidateSlice(parsed)) return; diff --git a/tests/lsp_features/code_actions.zig b/tests/lsp_features/code_actions.zig index c78c3a7c2..c4587101f 100644 --- a/tests/lsp_features/code_actions.zig +++ b/tests/lsp_features/code_actions.zig @@ -807,6 +807,12 @@ test "convert string literal to multiline - invalid" { , \\const foo = "\tWe use tabs"; ); + // A Multi-Line String Literals can't contain carriage returns + try testConvertString( + \\const foo = "\r"; + , + \\const foo = "\r"; + ); // Not in @import try testConvertString( \\const std = @import("std");