Skip to content

Commit 5017de4

Browse files
committed
Improved .dockerignore pattern processing to better match Docker CLI behavior
Signed-off-by: Joffrey F <[email protected]>
1 parent 180a0fd commit 5017de4

File tree

3 files changed

+123
-89
lines changed

3 files changed

+123
-89
lines changed

docker/utils/build.py

Lines changed: 111 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,7 @@
66
import tempfile
77

88
from ..constants import IS_WINDOWS_PLATFORM
9-
from fnmatch import fnmatch
10-
from itertools import chain
9+
from .fnmatch import fnmatch
1110

1211

1312
_SEP = re.compile('/|\\\\') if IS_WINDOWS_PLATFORM else re.compile('/')
@@ -44,92 +43,9 @@ def exclude_paths(root, patterns, dockerfile=None):
4443
if dockerfile is None:
4544
dockerfile = 'Dockerfile'
4645

47-
def split_path(p):
48-
return [pt for pt in re.split(_SEP, p) if pt and pt != '.']
49-
50-
def normalize(p):
51-
# Leading and trailing slashes are not relevant. Yes,
52-
# "foo.py/" must exclude the "foo.py" regular file. "."
53-
# components are not relevant either, even if the whole
54-
# pattern is only ".", as the Docker reference states: "For
55-
# historical reasons, the pattern . is ignored."
56-
# ".." component must be cleared with the potential previous
57-
# component, regardless of whether it exists: "A preprocessing
58-
# step [...] eliminates . and .. elements using Go's
59-
# filepath.".
60-
i = 0
61-
split = split_path(p)
62-
while i < len(split):
63-
if split[i] == '..':
64-
del split[i]
65-
if i > 0:
66-
del split[i - 1]
67-
i -= 1
68-
else:
69-
i += 1
70-
return split
71-
72-
patterns = (
73-
(True, normalize(p[1:]))
74-
if p.startswith('!') else
75-
(False, normalize(p))
76-
for p in patterns)
77-
patterns = list(reversed(list(chain(
78-
# Exclude empty patterns such as "." or the empty string.
79-
filter(lambda p: p[1], patterns),
80-
# Always include the Dockerfile and .dockerignore
81-
[(True, split_path(dockerfile)), (True, ['.dockerignore'])]))))
82-
return set(walk(root, patterns))
83-
84-
85-
def walk(root, patterns, default=True):
86-
"""
87-
A collection of file lying below root that should be included according to
88-
patterns.
89-
"""
90-
91-
def match(p):
92-
if p[1][0] == '**':
93-
rec = (p[0], p[1][1:])
94-
return [p] + (match(rec) if rec[1] else [rec])
95-
elif fnmatch(f, p[1][0]):
96-
return [(p[0], p[1][1:])]
97-
else:
98-
return []
99-
100-
for f in os.listdir(root):
101-
cur = os.path.join(root, f)
102-
# The patterns if recursing in that directory.
103-
sub = list(chain(*(match(p) for p in patterns)))
104-
# Whether this file is explicitely included / excluded.
105-
hit = next((p[0] for p in sub if not p[1]), None)
106-
# Whether this file is implicitely included / excluded.
107-
matched = default if hit is None else hit
108-
sub = list(filter(lambda p: p[1], sub))
109-
if os.path.isdir(cur) and not os.path.islink(cur):
110-
# Entirely skip directories if there are no chance any subfile will
111-
# be included.
112-
if all(not p[0] for p in sub) and not matched:
113-
continue
114-
# I think this would greatly speed up dockerignore handling by not
115-
# recursing into directories we are sure would be entirely
116-
# included, and only yielding the directory itself, which will be
117-
# recursively archived anyway. However the current unit test expect
118-
# the full list of subfiles and I'm not 100% sure it would make no
119-
# difference yet.
120-
# if all(p[0] for p in sub) and matched:
121-
# yield f
122-
# continue
123-
children = False
124-
for r in (os.path.join(f, p) for p in walk(cur, sub, matched)):
125-
yield r
126-
children = True
127-
# The current unit tests expect directories only under those
128-
# conditions. It might be simplifiable though.
129-
if (not sub or not children) and hit or hit is None and default:
130-
yield f
131-
elif matched:
132-
yield f
46+
patterns.append('!' + dockerfile)
47+
pm = PatternMatcher(patterns)
48+
return set(pm.walk(root))
13349

13450

13551
def build_file_list(root):
@@ -217,3 +133,110 @@ def mkbuildcontext(dockerfile):
217133
t.close()
218134
f.seek(0)
219135
return f
136+
137+
138+
def split_path(p):
139+
return [pt for pt in re.split(_SEP, p) if pt and pt != '.']
140+
141+
142+
# Heavily based on
143+
# https://github.com/moby/moby/blob/master/pkg/fileutils/fileutils.go
144+
class PatternMatcher(object):
145+
def __init__(self, patterns):
146+
self.patterns = list(filter(
147+
lambda p: p.dirs, [Pattern(p) for p in patterns]
148+
))
149+
self.patterns.append(Pattern('!.dockerignore'))
150+
151+
def matches(self, filepath):
152+
matched = False
153+
parent_path = os.path.dirname(filepath)
154+
parent_path_dirs = split_path(parent_path)
155+
156+
for pattern in self.patterns:
157+
negative = pattern.exclusion
158+
match = pattern.match(filepath)
159+
if not match and parent_path != '':
160+
if len(pattern.dirs) <= len(parent_path_dirs):
161+
match = pattern.match(
162+
os.path.sep.join(parent_path_dirs[:len(pattern.dirs)])
163+
)
164+
165+
if match:
166+
matched = not negative
167+
168+
return matched
169+
170+
def walk(self, root):
171+
def rec_walk(current_dir):
172+
for f in os.listdir(current_dir):
173+
fpath = os.path.join(
174+
os.path.relpath(current_dir, root), f
175+
)
176+
if fpath.startswith('.' + os.path.sep):
177+
fpath = fpath[2:]
178+
match = self.matches(fpath)
179+
if not match:
180+
yield fpath
181+
182+
cur = os.path.join(root, fpath)
183+
if not os.path.isdir(cur) or os.path.islink(cur):
184+
continue
185+
186+
if match:
187+
# If we want to skip this file and its a directory
188+
# then we should first check to see if there's an
189+
# excludes pattern (e.g. !dir/file) that starts with this
190+
# dir. If so then we can't skip this dir.
191+
skip = True
192+
193+
for pat in self.patterns:
194+
if not pat.exclusion:
195+
continue
196+
if pat.cleaned_pattern.startswith(fpath):
197+
skip = False
198+
break
199+
if skip:
200+
continue
201+
for sub in rec_walk(cur):
202+
yield sub
203+
204+
return rec_walk(root)
205+
206+
207+
class Pattern(object):
208+
def __init__(self, pattern_str):
209+
self.exclusion = False
210+
if pattern_str.startswith('!'):
211+
self.exclusion = True
212+
pattern_str = pattern_str[1:]
213+
214+
self.dirs = self.normalize(pattern_str)
215+
self.cleaned_pattern = '/'.join(self.dirs)
216+
217+
@classmethod
218+
def normalize(cls, p):
219+
220+
# Leading and trailing slashes are not relevant. Yes,
221+
# "foo.py/" must exclude the "foo.py" regular file. "."
222+
# components are not relevant either, even if the whole
223+
# pattern is only ".", as the Docker reference states: "For
224+
# historical reasons, the pattern . is ignored."
225+
# ".." component must be cleared with the potential previous
226+
# component, regardless of whether it exists: "A preprocessing
227+
# step [...] eliminates . and .. elements using Go's
228+
# filepath.".
229+
i = 0
230+
split = split_path(p)
231+
while i < len(split):
232+
if split[i] == '..':
233+
del split[i]
234+
if i > 0:
235+
del split[i - 1]
236+
i -= 1
237+
else:
238+
i += 1
239+
return split
240+
241+
def match(self, filepath):
242+
return fnmatch(filepath, self.cleaned_pattern)

docker/utils/fnmatch.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -111,4 +111,5 @@ def translate(pat):
111111
res = '%s[%s]' % (res, stuff)
112112
else:
113113
res = res + re.escape(c)
114+
114115
return res + '$'

tests/unit/utils_test.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -887,12 +887,22 @@ def test_trailing_double_wildcard(self):
887887
)
888888
)
889889

890+
def test_double_wildcard_with_exception(self):
891+
assert self.exclude(['**', '!bar', '!foo/bar']) == convert_paths(
892+
set([
893+
'foo/bar', 'foo/bar/a.py', 'bar', 'bar/a.py', 'Dockerfile',
894+
'.dockerignore',
895+
])
896+
)
897+
890898
def test_include_wildcard(self):
899+
# This may be surprising but it matches the CLI's behavior
900+
# (tested with 18.05.0-ce on linux)
891901
base = make_tree(['a'], ['a/b.py'])
892902
assert exclude_paths(
893903
base,
894904
['*', '!*/b.py']
895-
) == convert_paths(['a/b.py'])
905+
) == set()
896906

897907
def test_last_line_precedence(self):
898908
base = make_tree(

0 commit comments

Comments
 (0)