diff --git a/benchmark.log b/benchmark.log new file mode 100644 index 000000000..a776e2970 --- /dev/null +++ b/benchmark.log @@ -0,0 +1,14 @@ +Benchmarked in Buk main repository having ~25K ruby files. + + user system total real +old-glob 1.019865 0.292442 1.312307 ( 1.312549) +new-index 0.000049 0.000007 0.000056 ( 0.000056) + +Beign N = number of files +Beign K = number of files matching the subject/test +And should be very low (Almost constant) + +Space Complexity O(N) +Time complexity is O(K) + +The feature become almost instant diff --git a/lib/ruby_indexer/lib/ruby_indexer/file_index.rb b/lib/ruby_indexer/lib/ruby_indexer/file_index.rb new file mode 100644 index 000000000..06cc306de --- /dev/null +++ b/lib/ruby_indexer/lib/ruby_indexer/file_index.rb @@ -0,0 +1,89 @@ +# typed: false +# frozen_string_literal: true + +# Index: +# { +# "c": ["a/b/c.rb", "a/b/c_test.rb", "a/b/c_integration_test.rb", "a/b/test_c.rb", "a/b/sepc_c.rb", "a/b/integration_test_c.rb"] +# } + +# API: +# index.search("c") -> ["a/b/c.rb", "a/b/c_test.rb", "a/b/c_integration_test.rb", "a/b/test_c.rb", "a/b/sepc_c.rb", "a/b/integration_test_c.rb"] +# index.insert("a/b/d.rb") +# index.insert("a/b/d_test.rb") +# index.search("d") -> ["a/b/d.rb", "a/b/d_test.rb"] +# index.delete("a/b/d.rb") +# index.search("d") -> ["a/b/d_test.rb"] +# index.delete("a/b/d_test.rb") +# index.search("d") -> [] + +module RubyIndexer + # The FileIndex class maintains an in-memory index of files for fast lookup. + # It allows searching for files based on their base name, handling various test file patterns. + class FileIndex # TODO: Provide better naming for just subject/test file index. + TEST_REGEX = /^(test_|spec_|integration_test_)|((_test|_spec|_integration_test)$)/ + def initialize + @index = {} + end + + # Extract the base name from a file path, removing test prefixes and suffixes + def extract_base_name(path) + basename = File.basename(path, ".*") + + # Handle patterns like test_hello, spec_hello, integration_test_hello + basename = basename.sub(/^(test_|spec_|integration_test_)/, "") + + # Handle patterns like hello_test, hello_spec, hello_integration_test + basename = basename.sub(/((_test|_spec|_integration_test)$)/, "") + + basename + end + + # Insert a file path into the index + def insert(path) + base_name = extract_base_name(path) + @index[base_name] ||= [] + @index[base_name] << path + end + + # Delete a file path from the index + def delete(path) + base_name = extract_base_name(path) + if @index.key?(base_name) + @index[base_name].delete(path) + @index.delete(base_name) if @index[base_name].empty? + end + end + + # Search for files that match the given base name + def search(base_name) + @index[base_name] || [] + end + + # Search for files that can be tests of the given base_name + def search_test(base_name) + return [] if @index[base_name].nil? + + search(base_name).select do |file| + File.basename(file, ".*").match?(TEST_REGEX) + end + end + + def search_subject(base_name) + return [] if @index[base_name].nil? + + search(base_name).reject do |file| + File.basename(file, ".*").match?(TEST_REGEX) + end + end + + # Bulk load multiple file paths into the index + def bulk_load(paths) + paths.each { |path| insert(path) } + end + + # Clear the entire index + def clear + @index.clear + end + end +end diff --git a/lib/ruby_indexer/lib/ruby_indexer/index.rb b/lib/ruby_indexer/lib/ruby_indexer/index.rb index ec4ecc45b..916929f62 100644 --- a/lib/ruby_indexer/lib/ruby_indexer/index.rb +++ b/lib/ruby_indexer/lib/ruby_indexer/index.rb @@ -1,6 +1,8 @@ # typed: strict # frozen_string_literal: true +require_relative "file_index" + module RubyIndexer class Index class UnresolvableAliasError < StandardError; end @@ -13,6 +15,9 @@ class IndexNotEmptyError < StandardError; end #: Configuration attr_reader :configuration + #: RubyIndexer::FileIndex + attr_reader :file_index + #: bool attr_reader :initial_indexing_completed @@ -80,6 +85,9 @@ def initialize @configuration = RubyIndexer::Configuration.new #: Configuration @initial_indexing_completed = false #: bool + + # File index for tracking files and their relationships + @file_index = FileIndex.new #: FileIndex end # Register an included `hook` that will be executed when `module_name` is included into any namespace @@ -93,6 +101,7 @@ def delete(uri, skip_require_paths_tree: false) key = uri.to_s # For each constant discovered in `path`, delete the associated entry from the index. If there are no entries # left, delete the constant from the index. + # TODO: Adjust file index on delete. @uris_to_entries[key]&.each do |entry| name = entry.name entries = @entries[name] @@ -348,6 +357,9 @@ def index_all(uris: @configuration.indexable_uris, &block) "The index is not empty. To prevent invalid entries, `index_all` can only be called once." end + # Clear the file index at the start of a full indexing + @file_index.clear + RBSIndexer.new(self).index_ruby_core # Calculate how many paths are worth 1% of progress progress_step = (uris.length / 100.0).ceil @@ -361,6 +373,9 @@ def index_all(uris: @configuration.indexable_uris, &block) index_file(uri, collect_comments: false) end + # Bulk load all file paths into the file index + @file_index.bulk_load(Dir.glob("**/*.rb")) + @initial_indexing_completed = true end @@ -377,6 +392,8 @@ def index_single(uri, source, collect_comments: true) indexing_errors = listener.indexing_errors.uniq indexing_errors.each { |error| $stderr.puts(error) } if indexing_errors.any? + + # TODO: Add file to file index rescue SystemStackError => e if e.backtrace&.first&.include?("prism") $stderr.puts "Prism error indexing #{uri}: #{e.message}" diff --git a/lib/ruby_lsp/internal.rb b/lib/ruby_lsp/internal.rb index 420ca665e..c98e813a6 100644 --- a/lib/ruby_lsp/internal.rb +++ b/lib/ruby_lsp/internal.rb @@ -86,6 +86,7 @@ require "ruby_lsp/requests/folding_ranges" require "ruby_lsp/requests/formatting" require "ruby_lsp/requests/go_to_relevant_file" +require "ruby_lsp/requests/go_to_relevant_file_v2" require "ruby_lsp/requests/hover" require "ruby_lsp/requests/inlay_hints" require "ruby_lsp/requests/on_type_formatting" diff --git a/lib/ruby_lsp/requests/go_to_relevant_file_v2.rb b/lib/ruby_lsp/requests/go_to_relevant_file_v2.rb new file mode 100644 index 000000000..7bba497ec --- /dev/null +++ b/lib/ruby_lsp/requests/go_to_relevant_file_v2.rb @@ -0,0 +1,115 @@ +# typed: strict +# frozen_string_literal: true + +module RubyLsp + module Requests + # GoTo Relevant File is a custom [LSP + # request](https://microsoft.github.io/language-server-protocol/specification#requestMessage) + # that navigates to the relevant file for the current document. + # Currently, it supports source code file <> test file navigation. + class GoToRelevantFileV2 < Request + TEST_KEYWORDS = ["test", "spec", "integration_test"] + + TEST_PREFIX_PATTERN = /^(#{TEST_KEYWORDS.join("_|")}_)/ + TEST_SUFFIX_PATTERN = /(_#{TEST_KEYWORDS.join("|_")})$/ + TEST_PATTERN = /#{TEST_PREFIX_PATTERN}|#{TEST_SUFFIX_PATTERN}/ + + TEST_PREFIX_GLOB = "#{TEST_KEYWORDS.join("_,")}_" #: String + TEST_SUFFIX_GLOB = "_#{TEST_KEYWORDS.join(",_")}" #: String + + TEST_REGEX = /^(test_|spec_|integration_test_)|((_test|_spec|_integration_test)$)/ + + #: (String path, String workspace_path, RubyIndexer::FileIndex file_index) -> void + def initialize(path, workspace_path, file_index) + super() + + @file_index = file_index + @workspace_path = workspace_path + @path = path.delete_prefix(workspace_path) #: String + end + + # @override + #: -> Array[String] + def perform + find_relevant_paths + end + + private + + #: -> Array[String] + def find_relevant_paths + # Extract the base name from the current file (without the extension) + input_basename = File.basename(@path, File.extname(@path)) + + base_name = if input_basename.match?(TEST_PATTERN) + # If it's a test file, remove the test prefix/suffix + input_basename.gsub(TEST_PATTERN, "") + else + # If it's a source file, use its base name + input_basename + end + + # Use the FileIndex to find relevant files via the index + candidate_paths = if input_basename.match?(TEST_REGEX) + @file_index.search_subject(base_name) + else + @file_index.search_test(base_name) + end + + return [] if candidate_paths.empty? + + # Filter out the current file from the candidates + full_path = File.join(@workspace_path, @path) + candidates = candidate_paths.reject do |path| + path == full_path + end + + # Filter to include only files with matching extensions + extension = File.extname(@path) + candidates = candidates.select do |path| + File.extname(path) == extension + end + + candidates = candidates.map { |path| File.join(@workspace_path, path) } + + return [] if candidates.empty? + + # Find the most similar paths using Jaccard similarity + find_most_similar_with_jaccard(candidates) + end + + # Using the Jaccard algorithm to determine the similarity between the + # input path and the candidate relevant file paths. + # Ref: https://en.wikipedia.org/wiki/Jaccard_index + # The main idea of this algorithm is to take the size of interaction and divide + # it by the size of union between two sets (in our case the elements in each set + # would be the parts of the path separated by path divider.) + #: (Array[String] candidates) -> Array[String] + def find_most_similar_with_jaccard(candidates) + dirs = get_dir_parts(@path) + + _, results = candidates + .group_by do |other_path| + # For consistency with the current path, convert absolute paths to relative if needed + relative_path = if other_path.start_with?(@workspace_path) + other_path.delete_prefix(@workspace_path) + else + other_path + end + + other_dirs = get_dir_parts(relative_path) + # Similarity score between the two directories + (dirs & other_dirs).size.to_f / (dirs | other_dirs).size + end + .max_by(&:first) + + results || [] + end + + #: (String path) -> Set[String] + def get_dir_parts(path) + Set.new(File.dirname(path).split(File::SEPARATOR)) + end + end + end +end diff --git a/lib/ruby_lsp/server.rb b/lib/ruby_lsp/server.rb index 14f178d13..431060edf 100644 --- a/lib/ruby_lsp/server.rb +++ b/lib/ruby_lsp/server.rb @@ -1,6 +1,8 @@ # typed: strict # frozen_string_literal: true +require "benchmark" + module RubyLsp class Server < BaseServer # Only for testing @@ -1181,8 +1183,28 @@ def experimental_go_to_relevant_file(message) return end + $stdout = File.new("benchmark.log", "w") + $stdout.sync = true + + Benchmark.bm(15) do |x| + x.report("old-glob") do + Requests::GoToRelevantFile.new(path, @global_state.workspace_path).perform + end + x.report("new-index") do + Requests::GoToRelevantFileV2.new( + path, + @global_state.workspace_path, + @global_state.index.file_index, + ).perform + end + end + response = { - locations: Requests::GoToRelevantFile.new(path, @global_state.workspace_path).perform, + locations: Requests::GoToRelevantFileV2.new( + path, + @global_state.workspace_path, + @global_state.index.file_index, + ).perform, } send_message(Result.new(id: message[:id], response: response)) end diff --git a/test/requests/go_to_relevant_file_test.rb b/test/requests/go_to_relevant_file_test.rb index a8c93f86d..c93a598e3 100644 --- a/test/requests/go_to_relevant_file_test.rb +++ b/test/requests/go_to_relevant_file_test.rb @@ -10,7 +10,11 @@ def test_when_input_is_test_file_returns_array_of_implementation_file_locations test_file_path = "/workspace/test/requests/go_to_relevant_file_test.rb" expected = ["/workspace/lib/ruby_lsp/requests/go_to_relevant_file.rb"] - result = RubyLsp::Requests::GoToRelevantFile.new(test_file_path, "/workspace").perform + file_index = RubyIndexer::FileIndex.new + file_index.insert("/workspace/lib/ruby_lsp/requests/go_to_relevant_file.rb") + file_index.insert("/workspace/test/requests/go_to_relevant_file_test.rb") + + result = RubyLsp::Requests::GoToRelevantFile.new(test_file_path, "/workspace", file_index).perform assert_equal(expected, result) end @@ -22,7 +26,11 @@ def test_when_input_is_implementation_file_returns_array_of_test_file_locations impl_path = "/workspace/lib/ruby_lsp/requests/go_to_relevant_file.rb" expected = ["/workspace/test/requests/go_to_relevant_file_test.rb"] - result = RubyLsp::Requests::GoToRelevantFile.new(impl_path, "/workspace").perform + file_index = RubyIndexer::FileIndex.new + file_index.insert("/workspace/lib/ruby_lsp/requests/go_to_relevant_file.rb") + file_index.insert("/workspace/test/requests/go_to_relevant_file_test.rb") + + result = RubyLsp::Requests::GoToRelevantFile.new(impl_path, "/workspace", file_index).perform assert_equal(expected, result) end