Skip to content

Commit bb3ad64

Browse files
committed
Fix .dockerignore: accept wildcard in inclusion pattern, honor last line precedence.
Signed-off-by: mefyl <[email protected]>
1 parent c8f5a5a commit bb3ad64

File tree

2 files changed

+94
-178
lines changed

2 files changed

+94
-178
lines changed

docker/utils/build.py

Lines changed: 74 additions & 114 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
import os
2+
import re
23

34
from ..constants import IS_WINDOWS_PLATFORM
4-
from .fnmatch import fnmatch
5+
from fnmatch import fnmatch
6+
from itertools import chain
57
from .utils import create_archive
68

79

810
def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False):
911
root = os.path.abspath(path)
1012
exclude = exclude or []
11-
1213
return create_archive(
1314
files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)),
1415
root=root, fileobj=fileobj, gzip=gzip
1516
)
1617

1718

19+
_SEP = re.compile('/|\\\\') if IS_WINDOWS_PLATFORM else re.compile('/')
20+
21+
1822
def exclude_paths(root, patterns, dockerfile=None):
1923
"""
2024
Given a root directory path and a list of .dockerignore patterns, return
@@ -23,121 +27,77 @@ def exclude_paths(root, patterns, dockerfile=None):
2327
2428
All paths returned are relative to the root.
2529
"""
30+
2631
if dockerfile is None:
2732
dockerfile = 'Dockerfile'
2833

29-
exceptions = [p for p in patterns if p.startswith('!')]
30-
31-
include_patterns = [p[1:].lstrip('/') for p in exceptions]
32-
include_patterns += [dockerfile, '.dockerignore']
33-
34-
exclude_patterns = [
35-
p.lstrip('/') for p in list(set(patterns) - set(exceptions))]
36-
37-
paths = get_paths(root, exclude_patterns, include_patterns,
38-
has_exceptions=len(exceptions) > 0)
39-
40-
return set(paths).union(
41-
# If the Dockerfile is in a subdirectory that is excluded, get_paths
42-
# will not descend into it and the file will be skipped. This ensures
43-
# it doesn't happen.
44-
set([dockerfile.replace('/', os.path.sep)])
45-
if os.path.exists(os.path.join(root, dockerfile)) else set()
46-
)
47-
48-
49-
def should_include(path, exclude_patterns, include_patterns):
50-
"""
51-
Given a path, a list of exclude patterns, and a list of inclusion patterns:
52-
53-
1. Returns True if the path doesn't match any exclusion pattern
54-
2. Returns False if the path matches an exclusion pattern and doesn't match
55-
an inclusion pattern
56-
3. Returns true if the path matches an exclusion pattern and matches an
57-
inclusion pattern
34+
def normalize(p):
35+
# Leading and trailing slashes are not relevant. Yes,
36+
# "foo.py/" must exclude the "foo.py" regular file. "."
37+
# components are not relevant either, even if the whole
38+
# pattern is only ".", as the Docker reference states: "For
39+
# historical reasons, the pattern . is ignored."
40+
split = [pt for pt in re.split(_SEP, p) if pt and pt != '.']
41+
# ".." component must be cleared with the potential previous
42+
# component, regardless of whether it exists: "A preprocessing
43+
# step [...] eliminates . and .. elements using Go's
44+
# filepath.".
45+
i = 0
46+
while i < len(split):
47+
if split[i] == '..':
48+
del split[i]
49+
if i > 0:
50+
del split[i - 1]
51+
i -= 1
52+
else:
53+
i += 1
54+
return split
55+
56+
patterns = (
57+
(True, normalize(p[1:]))
58+
if p.startswith('!') else
59+
(False, normalize(p))
60+
for p in patterns)
61+
patterns = list(reversed(list(chain(
62+
# Exclude empty patterns such as "." or the empty string.
63+
filter(lambda p: p[1], patterns),
64+
# Always include the Dockerfile and .dockerignore
65+
[(True, dockerfile.split('/')), (True, ['.dockerignore'])]))))
66+
return set(walk(root, patterns))
67+
68+
69+
def walk(root, patterns, default=True):
5870
"""
59-
for pattern in exclude_patterns:
60-
if match_path(path, pattern):
61-
for pattern in include_patterns:
62-
if match_path(path, pattern):
63-
return True
64-
return False
65-
return True
66-
67-
68-
def should_check_directory(directory_path, exclude_patterns, include_patterns):
71+
A collection of file lying below root that should be included according to
72+
patterns.
6973
"""
70-
Given a directory path, a list of exclude patterns, and a list of inclusion
71-
patterns:
72-
73-
1. Returns True if the directory path should be included according to
74-
should_include.
75-
2. Returns True if the directory path is the prefix for an inclusion
76-
pattern
77-
3. Returns False otherwise
78-
"""
79-
80-
# To account for exception rules, check directories if their path is a
81-
# a prefix to an inclusion pattern. This logic conforms with the current
82-
# docker logic (2016-10-27):
83-
# https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671
84-
85-
def normalize_path(path):
86-
return path.replace(os.path.sep, '/')
87-
88-
path_with_slash = normalize_path(directory_path) + '/'
89-
possible_child_patterns = [
90-
pattern for pattern in map(normalize_path, include_patterns)
91-
if (pattern + '/').startswith(path_with_slash)
92-
]
93-
directory_included = should_include(
94-
directory_path, exclude_patterns, include_patterns
95-
)
96-
return directory_included or len(possible_child_patterns) > 0
97-
98-
99-
def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False):
100-
paths = []
101-
102-
for parent, dirs, files in os.walk(root, topdown=True, followlinks=False):
103-
parent = os.path.relpath(parent, root)
104-
if parent == '.':
105-
parent = ''
106-
107-
# Remove excluded patterns from the list of directories to traverse
108-
# by mutating the dirs we're iterating over.
109-
# This looks strange, but is considered the correct way to skip
110-
# traversal. See https://docs.python.org/2/library/os.html#os.walk
111-
dirs[:] = [
112-
d for d in dirs if should_check_directory(
113-
os.path.join(parent, d), exclude_patterns, include_patterns
114-
)
115-
]
116-
117-
for path in dirs:
118-
if should_include(os.path.join(parent, path),
119-
exclude_patterns, include_patterns):
120-
paths.append(os.path.join(parent, path))
121-
122-
for path in files:
123-
if should_include(os.path.join(parent, path),
124-
exclude_patterns, include_patterns):
125-
paths.append(os.path.join(parent, path))
126-
127-
return paths
128-
129-
130-
def match_path(path, pattern):
131-
pattern = pattern.rstrip('/' + os.path.sep)
132-
if pattern:
133-
pattern = os.path.relpath(pattern)
134-
135-
pattern_components = pattern.split(os.path.sep)
136-
if len(pattern_components) == 1 and IS_WINDOWS_PLATFORM:
137-
pattern_components = pattern.split('/')
13874

139-
if '**' not in pattern:
140-
path_components = path.split(os.path.sep)[:len(pattern_components)]
141-
else:
142-
path_components = path.split(os.path.sep)
143-
return fnmatch('/'.join(path_components), '/'.join(pattern_components))
75+
def match(p):
76+
if p[1][0] == '**':
77+
rec = (p[0], p[1][1:])
78+
return [p] + (match(rec) if rec[1] else [rec])
79+
elif fnmatch(f, p[1][0]):
80+
return [(p[0], p[1][1:])]
81+
else:
82+
return []
83+
84+
for f in os.listdir(root):
85+
cur = os.path.join(root, f)
86+
# The patterns if recursing in that directory.
87+
sub = list(chain(*(match(p) for p in patterns)))
88+
# Whether this file is explicitely included / excluded.
89+
hit = next((p[0] for p in sub if not p[1]), None)
90+
# Whether this file is implicitely included / excluded.
91+
matched = default if hit is None else hit
92+
sub = list(filter(lambda p: p[1], sub))
93+
if os.path.isdir(cur):
94+
children = False
95+
for r in (os.path.join(f, p) for p in walk(cur, sub, matched)):
96+
yield r
97+
children = True
98+
# The current unit tests expect directories only under those
99+
# conditions. It might be simplifiable though.
100+
if (not sub or not children) and hit or hit is None and default:
101+
yield f
102+
elif matched:
103+
yield f

tests/unit/utils_test.py

Lines changed: 20 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
decode_json_header, tar, split_command, parse_devices, update_headers,
2424
)
2525

26-
from docker.utils.build import should_check_directory
2726
from docker.utils.ports import build_port_bindings, split_port
2827
from docker.utils.utils import format_environment
2928

@@ -883,6 +882,26 @@ def test_trailing_double_wildcard(self):
883882
)
884883
)
885884

885+
def test_include_wildcard(self):
886+
base = make_tree(['a'], ['a/b.py'])
887+
assert exclude_paths(
888+
base,
889+
['*', '!*/b.py']
890+
) == convert_paths(['a/b.py'])
891+
892+
def test_last_line_precedence(self):
893+
base = make_tree(
894+
[],
895+
['garbage.md',
896+
'thrash.md',
897+
'README.md',
898+
'README-bis.md',
899+
'README-secret.md'])
900+
assert exclude_paths(
901+
base,
902+
['*.md', '!README*.md', 'README-secret.md']
903+
) == set(['README.md', 'README-bis.md'])
904+
886905

887906
class TarTest(unittest.TestCase):
888907
def test_tar_with_excludes(self):
@@ -1019,69 +1038,6 @@ def tar_test_negative_mtime_bug(self):
10191038
assert tar_data.getmember('th.txt').mtime == -3600
10201039

10211040

1022-
class ShouldCheckDirectoryTest(unittest.TestCase):
1023-
exclude_patterns = [
1024-
'exclude_rather_large_directory',
1025-
'dir/with/subdir_excluded',
1026-
'dir/with/exceptions'
1027-
]
1028-
1029-
include_patterns = [
1030-
'dir/with/exceptions/like_this_one',
1031-
'dir/with/exceptions/in/descendents'
1032-
]
1033-
1034-
def test_should_check_directory_not_excluded(self):
1035-
assert should_check_directory(
1036-
'not_excluded', self.exclude_patterns, self.include_patterns
1037-
)
1038-
assert should_check_directory(
1039-
convert_path('dir/with'), self.exclude_patterns,
1040-
self.include_patterns
1041-
)
1042-
1043-
def test_shoud_check_parent_directories_of_excluded(self):
1044-
assert should_check_directory(
1045-
'dir', self.exclude_patterns, self.include_patterns
1046-
)
1047-
assert should_check_directory(
1048-
convert_path('dir/with'), self.exclude_patterns,
1049-
self.include_patterns
1050-
)
1051-
1052-
def test_should_not_check_excluded_directories_with_no_exceptions(self):
1053-
assert not should_check_directory(
1054-
'exclude_rather_large_directory', self.exclude_patterns,
1055-
self.include_patterns
1056-
)
1057-
assert not should_check_directory(
1058-
convert_path('dir/with/subdir_excluded'), self.exclude_patterns,
1059-
self.include_patterns
1060-
)
1061-
1062-
def test_should_check_excluded_directory_with_exceptions(self):
1063-
assert should_check_directory(
1064-
convert_path('dir/with/exceptions'), self.exclude_patterns,
1065-
self.include_patterns
1066-
)
1067-
assert should_check_directory(
1068-
convert_path('dir/with/exceptions/in'), self.exclude_patterns,
1069-
self.include_patterns
1070-
)
1071-
1072-
def test_should_not_check_siblings_of_exceptions(self):
1073-
assert not should_check_directory(
1074-
convert_path('dir/with/exceptions/but_not_here'),
1075-
self.exclude_patterns, self.include_patterns
1076-
)
1077-
1078-
def test_should_check_subdirectories_of_exceptions(self):
1079-
assert should_check_directory(
1080-
convert_path('dir/with/exceptions/like_this_one/subdir'),
1081-
self.exclude_patterns, self.include_patterns
1082-
)
1083-
1084-
10851041
class FormatEnvironmentTest(unittest.TestCase):
10861042
def test_format_env_binary_unicode_value(self):
10871043
env_dict = {

0 commit comments

Comments
 (0)