Skip to content

Commit a49166a

Browse files
author
Thomas Boyt
committed
Improve get_paths performance by not descending into ignored directories
Signed-off-by: Thomas Boyt <[email protected]>
1 parent 28864df commit a49166a

File tree

3 files changed

+62
-27
lines changed

3 files changed

+62
-27
lines changed

docker/utils/utils.py

Lines changed: 51 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -107,38 +107,68 @@ def exclude_paths(root, patterns, dockerfile=None):
107107

108108
exclude_patterns = list(set(patterns) - set(exceptions))
109109

110-
all_paths = get_paths(root)
111-
112-
# Remove all paths that are matched by any exclusion pattern
113-
paths = [
114-
p for p in all_paths
115-
if not any(match_path(p, pattern) for pattern in exclude_patterns)
116-
]
117-
118-
# Add back the set of paths that are matched by any inclusion pattern.
119-
# Include parent dirs - if we add back 'foo/bar', add 'foo' as well
120-
for p in all_paths:
121-
if any(match_path(p, pattern) for pattern in include_patterns):
122-
components = p.split('/')
123-
paths += [
124-
'/'.join(components[:end])
125-
for end in range(1, len(components) + 1)
126-
]
110+
paths = get_paths(root, exclude_patterns, include_patterns,
111+
has_exceptions=len(exceptions) > 0)
127112

128113
return set(paths)
129114

130115

131-
def get_paths(root):
116+
def should_include(path, exclude_patterns, include_patterns):
117+
"""
118+
Given a path, a list of exclude patterns, and a list of inclusion patterns:
119+
120+
1. Returns True if the path doesn't match any exclusion pattern
121+
2. Returns False if the path matches an exclusion pattern and doesn't match
122+
an inclusion pattern
123+
3. Returns true if the path matches an exclusion pattern and matches an
124+
inclusion pattern
125+
"""
126+
for pattern in exclude_patterns:
127+
if match_path(path, pattern):
128+
for pattern in include_patterns:
129+
if match_path(path, pattern):
130+
return True
131+
return False
132+
return True
133+
134+
135+
def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False):
132136
paths = []
133137

134-
for parent, dirs, files in os.walk(root, followlinks=False):
138+
for parent, dirs, files in os.walk(root, topdown=True, followlinks=False):
135139
parent = os.path.relpath(parent, root)
136140
if parent == '.':
137141
parent = ''
142+
143+
# If exception rules exist, we can't skip recursing into ignored
144+
# directories, as we need to look for exceptions in them.
145+
#
146+
# It may be possible to optimize this further for exception patterns
147+
# that *couldn't* match within ignored directores.
148+
#
149+
# This matches the current docker logic (as of 2015-11-24):
150+
# https://github.com/docker/docker/blob/37ba67bf636b34dc5c0c0265d62a089d0492088f/pkg/archive/archive.go#L555-L557
151+
152+
if not has_exceptions:
153+
154+
# Remove excluded patterns from the list of directories to traverse
155+
# by mutating the dirs we're iterating over.
156+
# This looks strange, but is considered the correct way to skip
157+
# traversal. See https://docs.python.org/2/library/os.html#os.walk
158+
159+
dirs[:] = [d for d in dirs if
160+
should_include(os.path.join(parent, d),
161+
exclude_patterns, include_patterns)]
162+
138163
for path in dirs:
139-
paths.append(os.path.join(parent, path))
164+
if should_include(os.path.join(parent, path),
165+
exclude_patterns, include_patterns):
166+
paths.append(os.path.join(parent, path))
167+
140168
for path in files:
141-
paths.append(os.path.join(parent, path))
169+
if should_include(os.path.join(parent, path),
170+
exclude_patterns, include_patterns):
171+
paths.append(os.path.join(parent, path))
142172

143173
return paths
144174

tests/integration/build_test.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ def test_build_with_dockerignore(self):
6565
'ignored',
6666
'Dockerfile',
6767
'.dockerignore',
68+
'!ignored/subdir/excepted-file',
6869
'', # empty line
6970
]))
7071

@@ -76,6 +77,9 @@ def test_build_with_dockerignore(self):
7677
with open(os.path.join(subdir, 'file'), 'w') as f:
7778
f.write("this file should be ignored")
7879

80+
with open(os.path.join(subdir, 'excepted-file'), 'w') as f:
81+
f.write("this file should not be ignored")
82+
7983
tag = 'docker-py-test-build-with-dockerignore'
8084
stream = self.client.build(
8185
path=base_dir,
@@ -84,7 +88,7 @@ def test_build_with_dockerignore(self):
8488
for chunk in stream:
8589
pass
8690

87-
c = self.client.create_container(tag, ['ls', '-1A', '/test'])
91+
c = self.client.create_container(tag, ['find', '/test', '-type', 'f'])
8892
self.client.start(c)
8993
self.client.wait(c)
9094
logs = self.client.logs(c)
@@ -93,8 +97,9 @@ def test_build_with_dockerignore(self):
9397
logs = logs.decode('utf-8')
9498

9599
self.assertEqual(
96-
list(filter(None, logs.split('\n'))),
97-
['not-ignored'],
100+
sorted(list(filter(None, logs.split('\n')))),
101+
sorted(['/test/ignored/subdir/excepted-file',
102+
'/test/not-ignored']),
98103
)
99104

100105
@requires_api_version('1.21')

tests/unit/utils_test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -671,17 +671,17 @@ def test_directory_with_trailing_slash(self):
671671

672672
def test_directory_with_single_exception(self):
673673
assert self.exclude(['foo', '!foo/bar/a.py']) == self.all_paths - set([
674-
'foo/a.py', 'foo/b.py',
674+
'foo/a.py', 'foo/b.py', 'foo', 'foo/bar'
675675
])
676676

677677
def test_directory_with_subdir_exception(self):
678678
assert self.exclude(['foo', '!foo/bar']) == self.all_paths - set([
679-
'foo/a.py', 'foo/b.py',
679+
'foo/a.py', 'foo/b.py', 'foo'
680680
])
681681

682682
def test_directory_with_wildcard_exception(self):
683683
assert self.exclude(['foo', '!foo/*.py']) == self.all_paths - set([
684-
'foo/bar', 'foo/bar/a.py',
684+
'foo/bar', 'foo/bar/a.py', 'foo'
685685
])
686686

687687
def test_subdirectory(self):

0 commit comments

Comments
 (0)