Skip to content

Commit 284f409

Browse files
authored
Make get_tree_size robust against git bombs (#7766)
* get_tree_size: add tree count limit to prevent DoS For pathological repositories like git bombs with deeply nested tree structures, the previous implementation could hang indefinitely even with a file limit, because it had to visit every tree object to discover the blobs. This change adds a separate tree count that also triggers the limit, ensuring the method terminates promptly regardless of repository structure. The return value is still the blob count for normal repos, preserving existing behavior. * Add tests * Improve test (and make it run much faster) The previous test created 1000 git trees, taking 11s. This creates 32 in <0.5s. This now creates an actual git bomb, if you cloned the repo you'd get 2^32-1 directories. * Simplify get_tree_size
1 parent 6fc1cc6 commit 284f409

File tree

2 files changed

+73
-1
lines changed

2 files changed

+73
-1
lines changed

lib/linguist/source/rugged.rb

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,22 @@ def initialize(rugged)
4444
end
4545

4646
def get_tree_size(commit_id, limit)
47-
get_tree(commit_id).count_recursive(limit)
47+
tree_count = 0
48+
count = 0
49+
50+
get_tree(commit_id).walk(:preorder) do |root, entry|
51+
case entry[:type]
52+
when :blob
53+
count += 1
54+
return limit if count >= limit
55+
when :tree
56+
tree_count += 1
57+
return limit if tree_count >= limit
58+
end
59+
true
60+
end
61+
62+
count
4863
end
4964

5065
def set_attribute_source(commit_id)

test/test_repository.rb

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -230,3 +230,60 @@ def diff(old_commit, new_commit)
230230
Diff.new
231231
end
232232
end
233+
234+
################################################################################
235+
236+
class TestGetTreeSize < Minitest::Test
237+
def test_get_tree_size_normal_repo
238+
rugged = Rugged::Repository.new(File.expand_path("../../.git", __FILE__))
239+
source = Linguist::Repository.new(rugged, rugged.head.target_id)
240+
241+
# With a high limit, should return the actual blob count
242+
size = source.repository.get_tree_size(rugged.head.target_id, 100_000)
243+
assert size > 0
244+
assert size < 100_000
245+
246+
# With a low limit, should return the limit
247+
assert_equal 10, source.repository.get_tree_size(rugged.head.target_id, 10)
248+
end
249+
250+
def test_get_tree_size_pathological_repo
251+
# Create a minimal git bomb in a temp directory
252+
Dir.mktmpdir("git-bomb-test") do |dir|
253+
# Initialize repo
254+
system("git", "-C", dir, "init", "-q", out: File::NULL, err: File::NULL)
255+
system("git", "-C", dir, "config", "user.email", "test@test.com")
256+
system("git", "-C", dir, "config", "user.name", "Test")
257+
258+
# Create git bomb, 2^32-1 directories, no files
259+
current_sha = nil
260+
mode = "40000" # tree mode
261+
32.times do |i|
262+
current_sha = IO.popen(["git", "-C", dir, "hash-object", "-t", "tree", "-w", "--stdin"], "r+b") do |io|
263+
if current_sha then
264+
# Tree entry format: "<mode> <name>\0<20-byte SHA>"
265+
sha = [current_sha].pack('H*')
266+
entry0 = "#{mode} entry0\0#{sha}"
267+
entry1 = "#{mode} entry1\0#{sha}"
268+
io.write(entry0 + entry1)
269+
end
270+
io.close_write
271+
io.read.strip
272+
end
273+
end
274+
275+
# Create commit
276+
commit_content = "tree #{current_sha}\nauthor Test <test@test.com> 0 +0000\ncommitter Test <test@test.com> 0 +0000\n\ntest"
277+
commit_sha = IO.popen(["git", "-C", dir, "hash-object", "-t", "commit", "-w", "--stdin"], "r+") do |io|
278+
io.write(commit_content)
279+
io.close_write
280+
io.read.strip
281+
end
282+
283+
# Should hit tree limit quickly (2^32 trees > 500)
284+
rugged = Rugged::Repository.new(dir)
285+
source = Linguist::Repository.new(rugged, commit_sha)
286+
assert_equal 500, source.repository.get_tree_size(commit_sha, 500)
287+
end
288+
end
289+
end

0 commit comments

Comments
 (0)