Skip to content

Commit af304ad

Browse files
martinemdehsbt
authored andcommitted
[rubygems/rubygems] Revert to splitting parser due to performance regression
* The string search parser was more memory efficient but in some cases, much slower. Reverting until a better solution is found. * Handle the situation where the line might be blank (Artifactory bug) rubygems/rubygems@222d38737d
1 parent 0e1182f commit af304ad

File tree

2 files changed

+101
-150
lines changed

2 files changed

+101
-150
lines changed

lib/bundler/compact_index_client/parser.rb

Lines changed: 23 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ def initialize(compact_index)
1111
@versions_by_name = nil
1212
@available = nil
1313
@gem_parser = nil
14-
@versions_data = nil
1514
end
1615

1716
def names
@@ -40,71 +39,45 @@ def versions
4039
end
4140

4241
def info(name)
43-
data = @compact_index.info(name, info_checksum(name))
42+
data = @compact_index.info(name, info_checksums[name])
4443
lines(data).map {|line| gem_parser.parse(line).unshift(name) }
4544
end
4645

47-
# parse the last, most recently updated line of the versions file to determine availability
4846
def available?
4947
return @available unless @available.nil?
50-
return @available = false unless versions_data&.size&.nonzero?
51-
52-
line_end = versions_data.size - 1
53-
return @available = false if versions_data[line_end] != "\n"
54-
55-
line_start = versions_data.rindex("\n", line_end - 1)
56-
line_start ||= -1 # allow a single line versions file
57-
58-
@available = !split_last_word(versions_data, line_start + 1, line_end).nil?
48+
@available = !info_checksums.empty?
5949
end
6050

6151
private
6252

63-
# Search for a line starting with gem name, then return last space-separated word (the checksum)
64-
def info_checksum(name)
65-
return unless versions_data
66-
return unless (line_start = rindex_of_gem(name))
67-
return unless (line_end = versions_data.index("\n", line_start))
68-
split_last_word(versions_data, line_start, line_end)
69-
end
70-
71-
def gem_parser
72-
@gem_parser ||= GemParser.new
73-
end
74-
75-
def versions_data
76-
@versions_data ||= begin
77-
data = @compact_index.versions
78-
strip_header!(data) if data
79-
data.freeze
53+
def info_checksums
54+
@info_checksums ||= lines(@compact_index.versions).each_with_object({}) do |line, checksums|
55+
parse_version_checksum(line, checksums)
8056
end
8157
end
8258

83-
def rindex_of_gem(name)
84-
if (pos = versions_data.rindex("\n#{name} "))
85-
pos + 1
86-
elsif versions_data.start_with?("#{name} ")
87-
0
88-
end
59+
def lines(data)
60+
return [] if data.nil? || data.empty?
61+
lines = data.split("\n")
62+
header = lines.index("---")
63+
header ? lines[header + 1..-1] : lines
8964
end
9065

91-
# This is similar to `string.split(" ").last` but it avoids allocating extra objects.
92-
def split_last_word(string, line_start, line_end)
93-
return unless line_start < line_end && line_start >= 0
94-
word_start = string.rindex(" ", line_end).to_i + 1
95-
return if word_start < line_start
96-
string[word_start, line_end - word_start]
97-
end
98-
99-
def lines(string)
100-
return [] if string.nil? || string.empty?
101-
strip_header!(string)
102-
string.split("\n")
66+
def gem_parser
67+
@gem_parser ||= GemParser.new
10368
end
10469

105-
def strip_header!(string)
106-
header_end = string.index("---\n")
107-
string.slice!(0, header_end + 4) if header_end
70+
# This is mostly the same as `split(" ", 3)` but it avoids allocating extra objects.
71+
# This method gets called at least once for every gem when parsing versions.
72+
def parse_version_checksum(line, checksums)
73+
return unless (name_end = line.index(" ")) # Artifactory bug causes blank lines in artifactor index files
74+
return unless (checksum_start = line.index(" ", name_end + 1) + 1)
75+
checksum_end = line.size - checksum_start
76+
77+
line.freeze # allows slicing into the string to not allocate a copy of the line
78+
name = line[0, name_end]
79+
checksum = line[checksum_start, checksum_end]
80+
checksums[name.freeze] = checksum # freeze name since it is used as a hash key
10881
end
10982
end
11083
end

spec/bundler/bundler/compact_index_client/parser_spec.rb

Lines changed: 78 additions & 100 deletions
Original file line numberDiff line numberDiff line change
@@ -89,11 +89,6 @@ def set_info_data(name, value)
8989
compact_index.versions = +""
9090
expect(parser).not_to be_available
9191
end
92-
93-
it "returns false when versions ends improperly without a newline" do
94-
compact_index.versions = "a 1.0.0 aaa1"
95-
expect(parser).not_to be_available
96-
end
9792
end
9893

9994
describe "#names" do
@@ -143,108 +138,78 @@ def set_info_data(name, value)
143138
end
144139

145140
describe "#info" do
146-
it "returns the info for example gem 'a' which has no deps" do
147-
expect(parser.info("a")).to eq(
141+
let(:a_result) do
142+
[
148143
[
149-
[
150-
"a",
151-
"1.0.0",
152-
nil,
153-
[],
154-
[
155-
["checksum", ["aaa1"]],
156-
["ruby", [">= 3.0.0"]],
157-
["rubygems", [">= 3.2.3"]],
158-
],
159-
],
160-
[
161-
"a",
162-
"1.0.1",
163-
nil,
164-
[],
165-
[
166-
["checksum", ["aaa2"]],
167-
["ruby", [">= 3.0.0"]],
168-
["rubygems", [">= 3.2.3"]],
169-
],
170-
],
171-
[
172-
"a",
173-
"1.1.0",
174-
nil,
175-
[],
176-
[
177-
["checksum", ["aaa3"]],
178-
["ruby", [">= 3.0.0"]],
179-
["rubygems", [">= 3.2.3"]],
180-
],
181-
],
182-
]
183-
)
144+
"a",
145+
"1.0.0",
146+
nil,
147+
[],
148+
[["checksum", ["aaa1"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]],
149+
],
150+
[
151+
"a",
152+
"1.0.1",
153+
nil,
154+
[],
155+
[["checksum", ["aaa2"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]],
156+
],
157+
[
158+
"a",
159+
"1.1.0",
160+
nil,
161+
[],
162+
[["checksum", ["aaa3"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]],
163+
],
164+
]
165+
end
166+
let(:b_result) do
167+
[
168+
[
169+
"b",
170+
"2.0.0",
171+
nil,
172+
[["a", ["~> 1.0", "<= 3.0"]]],
173+
[["checksum", ["bbb1"]]],
174+
],
175+
[
176+
"b",
177+
"2.0.0",
178+
"java",
179+
[["a", ["~> 1.0", "<= 3.0"]]],
180+
[["checksum", ["bbb2"]]],
181+
],
182+
]
183+
end
184+
let(:c_result) do
185+
[
186+
[
187+
"c",
188+
"3.0.0",
189+
nil,
190+
[["a", ["= 1.0.0"]], ["b", ["~> 2.0"]]],
191+
[["checksum", ["ccc1"]], ["ruby", [">= 2.7.0"]], ["rubygems", [">= 3.0.0"]]],
192+
],
193+
[
194+
"c",
195+
"3.3.3",
196+
nil,
197+
[["a", [">= 1.1.0"]], ["b", ["~> 2.0"]]],
198+
[["checksum", ["ccc3"]], ["ruby", [">= 3.0.0"]], ["rubygems", [">= 3.2.3"]]],
199+
],
200+
]
201+
end
202+
203+
it "returns the info for example gem 'a' which has no deps" do
204+
expect(parser.info("a")).to eq(a_result)
184205
end
185206

186207
it "returns the info for example gem 'b' which has platform and compound deps" do
187-
expect(parser.info("b")).to eq(
188-
[
189-
[
190-
"b",
191-
"2.0.0",
192-
nil,
193-
[
194-
["a", ["~> 1.0", "<= 3.0"]],
195-
],
196-
[
197-
["checksum", ["bbb1"]],
198-
],
199-
],
200-
[
201-
"b",
202-
"2.0.0",
203-
"java",
204-
[
205-
["a", ["~> 1.0", "<= 3.0"]],
206-
],
207-
[
208-
["checksum", ["bbb2"]],
209-
],
210-
],
211-
]
212-
)
208+
expect(parser.info("b")).to eq(b_result)
213209
end
214210

215211
it "returns the info for example gem 'c' which has deps and yanked version (requires use of correct info checksum)" do
216-
expect(parser.info("c")).to eq(
217-
[
218-
[
219-
"c",
220-
"3.0.0",
221-
nil,
222-
[
223-
["a", ["= 1.0.0"]],
224-
["b", ["~> 2.0"]],
225-
],
226-
[
227-
["checksum", ["ccc1"]],
228-
["ruby", [">= 2.7.0"]],
229-
["rubygems", [">= 3.0.0"]],
230-
],
231-
],
232-
[
233-
"c",
234-
"3.3.3",
235-
nil,
236-
[
237-
["a", [">= 1.1.0"]],
238-
["b", ["~> 2.0"]],
239-
],
240-
[
241-
["checksum", ["ccc3"]],
242-
["ruby", [">= 3.0.0"]],
243-
["rubygems", [">= 3.2.3"]],
244-
],
245-
],
246-
]
247-
)
212+
expect(parser.info("c")).to eq(c_result)
248213
end
249214

250215
it "returns an empty array when the info is empty" do
@@ -255,5 +220,18 @@ def set_info_data(name, value)
255220
it "returns an empty array when the info is not readable" do
256221
expect(parser.info("d")).to eq([])
257222
end
223+
224+
it "handles empty lines in the versions file (Artifactory bug that they have yet to fix)" do
225+
compact_index.versions = +<<~VERSIONS
226+
created_at: 2024-05-01T00:00:04Z
227+
---
228+
a 1.0.0,1.0.1,1.1.0 aaa111
229+
b 2.0.0,2.0.0-java bbb222
230+
231+
c 3.0.0,3.0.3,3.3.3 ccc333
232+
c -3.0.3 ccc333yanked
233+
VERSIONS
234+
expect(parser.info("a")).to eq(a_result)
235+
end
258236
end
259237
end

0 commit comments

Comments
 (0)