Skip to content

Commit cc6e1b1

Browse files
authored
Merge pull request #1914 from mefyl/master
Improve .dockerignore compliance
2 parents ad5f49b + 0c948c7 commit cc6e1b1

File tree

2 files changed

+111
-188
lines changed

2 files changed

+111
-188
lines changed

docker/utils/build.py

Lines changed: 87 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
import os
2+
import re
23

34
from ..constants import IS_WINDOWS_PLATFORM
4-
from .fnmatch import fnmatch
5+
from fnmatch import fnmatch
6+
from itertools import chain
57
from .utils import create_archive
68

79

810
def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False):
911
root = os.path.abspath(path)
1012
exclude = exclude or []
11-
1213
return create_archive(
1314
files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)),
1415
root=root, fileobj=fileobj, gzip=gzip
1516
)
1617

1718

19+
_SEP = re.compile('/|\\\\') if IS_WINDOWS_PLATFORM else re.compile('/')
20+
21+
1822
def exclude_paths(root, patterns, dockerfile=None):
1923
"""
2024
Given a root directory path and a list of .dockerignore patterns, return
@@ -23,127 +27,90 @@ def exclude_paths(root, patterns, dockerfile=None):
2327
2428
All paths returned are relative to the root.
2529
"""
30+
2631
if dockerfile is None:
2732
dockerfile = 'Dockerfile'
2833

29-
patterns = [p.lstrip('/') for p in patterns]
30-
exceptions = [p for p in patterns if p.startswith('!')]
31-
32-
include_patterns = [p[1:] for p in exceptions]
33-
include_patterns += [dockerfile, '.dockerignore']
34-
35-
exclude_patterns = list(set(patterns) - set(exceptions))
36-
37-
paths = get_paths(root, exclude_patterns, include_patterns,
38-
has_exceptions=len(exceptions) > 0)
39-
40-
return set(paths).union(
41-
# If the Dockerfile is in a subdirectory that is excluded, get_paths
42-
# will not descend into it and the file will be skipped. This ensures
43-
# it doesn't happen.
44-
set([dockerfile.replace('/', os.path.sep)])
45-
if os.path.exists(os.path.join(root, dockerfile)) else set()
46-
)
47-
48-
49-
def should_include(path, exclude_patterns, include_patterns, root):
50-
"""
51-
Given a path, a list of exclude patterns, and a list of inclusion patterns:
52-
53-
1. Returns True if the path doesn't match any exclusion pattern
54-
2. Returns False if the path matches an exclusion pattern and doesn't match
55-
an inclusion pattern
56-
3. Returns true if the path matches an exclusion pattern and matches an
57-
inclusion pattern
58-
"""
59-
for pattern in exclude_patterns:
60-
if match_path(path, pattern):
61-
for pattern in include_patterns:
62-
if match_path(path, pattern):
63-
return True
64-
if os.path.isabs(pattern) and match_path(
65-
os.path.join(root, path), pattern):
66-
return True
67-
return False
68-
return True
69-
70-
71-
def should_check_directory(directory_path, exclude_patterns, include_patterns,
72-
root):
34+
def normalize(p):
35+
# Leading and trailing slashes are not relevant. Yes,
36+
# "foo.py/" must exclude the "foo.py" regular file. "."
37+
# components are not relevant either, even if the whole
38+
# pattern is only ".", as the Docker reference states: "For
39+
# historical reasons, the pattern . is ignored."
40+
split = [pt for pt in re.split(_SEP, p) if pt and pt != '.']
41+
# ".." component must be cleared with the potential previous
42+
# component, regardless of whether it exists: "A preprocessing
43+
# step [...] eliminates . and .. elements using Go's
44+
# filepath.".
45+
i = 0
46+
while i < len(split):
47+
if split[i] == '..':
48+
del split[i]
49+
if i > 0:
50+
del split[i - 1]
51+
i -= 1
52+
else:
53+
i += 1
54+
return split
55+
56+
patterns = (
57+
(True, normalize(p[1:]))
58+
if p.startswith('!') else
59+
(False, normalize(p))
60+
for p in patterns)
61+
patterns = list(reversed(list(chain(
62+
# Exclude empty patterns such as "." or the empty string.
63+
filter(lambda p: p[1], patterns),
64+
# Always include the Dockerfile and .dockerignore
65+
[(True, dockerfile.split('/')), (True, ['.dockerignore'])]))))
66+
return set(walk(root, patterns))
67+
68+
69+
def walk(root, patterns, default=True):
7370
"""
74-
Given a directory path, a list of exclude patterns, and a list of inclusion
75-
patterns:
76-
77-
1. Returns True if the directory path should be included according to
78-
should_include.
79-
2. Returns True if the directory path is the prefix for an inclusion
80-
pattern
81-
3. Returns False otherwise
71+
A collection of file lying below root that should be included according to
72+
patterns.
8273
"""
8374

84-
# To account for exception rules, check directories if their path is a
85-
# a prefix to an inclusion pattern. This logic conforms with the current
86-
# docker logic (2016-10-27):
87-
# https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671
88-
89-
def normalize_path(path):
90-
return path.replace(os.path.sep, '/')
91-
92-
path_with_slash = normalize_path(directory_path) + '/'
93-
possible_child_patterns = [
94-
pattern for pattern in map(normalize_path, include_patterns)
95-
if (pattern + '/').startswith(path_with_slash)
96-
]
97-
directory_included = should_include(
98-
directory_path, exclude_patterns, include_patterns, root
99-
)
100-
return directory_included or len(possible_child_patterns) > 0
101-
102-
103-
def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False):
104-
paths = []
105-
106-
for parent, dirs, files in os.walk(root, topdown=True, followlinks=False):
107-
parent = os.path.relpath(parent, root)
108-
if parent == '.':
109-
parent = ''
110-
111-
# Remove excluded patterns from the list of directories to traverse
112-
# by mutating the dirs we're iterating over.
113-
# This looks strange, but is considered the correct way to skip
114-
# traversal. See https://docs.python.org/2/library/os.html#os.walk
115-
dirs[:] = [
116-
d for d in dirs if should_check_directory(
117-
os.path.join(parent, d), exclude_patterns, include_patterns,
118-
root
119-
)
120-
]
121-
122-
for path in dirs:
123-
if should_include(os.path.join(parent, path),
124-
exclude_patterns, include_patterns, root):
125-
paths.append(os.path.join(parent, path))
126-
127-
for path in files:
128-
if should_include(os.path.join(parent, path),
129-
exclude_patterns, include_patterns, root):
130-
paths.append(os.path.join(parent, path))
131-
132-
return paths
133-
134-
135-
def match_path(path, pattern):
136-
137-
pattern = pattern.rstrip('/' + os.path.sep)
138-
if pattern and not os.path.isabs(pattern):
139-
pattern = os.path.relpath(pattern)
140-
141-
pattern_components = pattern.split(os.path.sep)
142-
if len(pattern_components) == 1 and IS_WINDOWS_PLATFORM:
143-
pattern_components = pattern.split('/')
144-
145-
if '**' not in pattern:
146-
path_components = path.split(os.path.sep)[:len(pattern_components)]
147-
else:
148-
path_components = path.split(os.path.sep)
149-
return fnmatch('/'.join(path_components), '/'.join(pattern_components))
75+
def match(p):
76+
if p[1][0] == '**':
77+
rec = (p[0], p[1][1:])
78+
return [p] + (match(rec) if rec[1] else [rec])
79+
elif fnmatch(f, p[1][0]):
80+
return [(p[0], p[1][1:])]
81+
else:
82+
return []
83+
84+
for f in os.listdir(root):
85+
cur = os.path.join(root, f)
86+
# The patterns if recursing in that directory.
87+
sub = list(chain(*(match(p) for p in patterns)))
88+
# Whether this file is explicitely included / excluded.
89+
hit = next((p[0] for p in sub if not p[1]), None)
90+
# Whether this file is implicitely included / excluded.
91+
matched = default if hit is None else hit
92+
sub = list(filter(lambda p: p[1], sub))
93+
if os.path.isdir(cur):
94+
# Entirely skip directories if there are no chance any subfile will
95+
# be included.
96+
if all(not p[0] for p in sub) and not matched:
97+
continue
98+
# I think this would greatly speed up dockerignore handling by not
99+
# recursing into directories we are sure would be entirely
100+
# included, and only yielding the directory itself, which will be
101+
# recursively archived anyway. However the current unit test expect
102+
# the full list of subfiles and I'm not 100% sure it would make no
103+
# difference yet.
104+
# if all(p[0] for p in sub) and matched:
105+
# yield f
106+
# continue
107+
children = False
108+
for r in (os.path.join(f, p) for p in walk(cur, sub, matched)):
109+
yield r
110+
children = True
111+
# The current unit tests expect directories only under those
112+
# conditions. It might be simplifiable though.
113+
if (not sub or not children) and hit or hit is None and default:
114+
yield f
115+
elif matched:
116+
yield f

tests/unit/utils_test.py

Lines changed: 24 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,6 @@
2323
decode_json_header, tar, split_command, parse_devices, update_headers,
2424
)
2525

26-
from docker.utils.build import should_check_directory
2726
from docker.utils.ports import build_port_bindings, split_port
2827
from docker.utils.utils import format_environment
2928

@@ -758,6 +757,13 @@ def test_single_subdir_single_filename_leading_slash(self):
758757
self.all_paths - set(['foo/a.py'])
759758
)
760759

760+
def test_exclude_include_absolute_path(self):
761+
base = make_tree([], ['a.py', 'b.py'])
762+
assert exclude_paths(
763+
base,
764+
['/*', '!/*.py']
765+
) == set(['a.py', 'b.py'])
766+
761767
def test_single_subdir_with_path_traversal(self):
762768
assert self.exclude(['foo/whoops/../a.py']) == convert_paths(
763769
self.all_paths - set(['foo/a.py'])
@@ -876,12 +882,25 @@ def test_trailing_double_wildcard(self):
876882
)
877883
)
878884

879-
def test_exclude_include_absolute_path(self):
880-
base = make_tree([], ['a.py', 'b.py'])
885+
def test_include_wildcard(self):
886+
base = make_tree(['a'], ['a/b.py'])
881887
assert exclude_paths(
882888
base,
883-
['/*', '!' + os.path.join(base, '*.py')]
884-
) == set(['a.py', 'b.py'])
889+
['*', '!*/b.py']
890+
) == convert_paths(['a/b.py'])
891+
892+
def test_last_line_precedence(self):
893+
base = make_tree(
894+
[],
895+
['garbage.md',
896+
'thrash.md',
897+
'README.md',
898+
'README-bis.md',
899+
'README-secret.md'])
900+
assert exclude_paths(
901+
base,
902+
['*.md', '!README*.md', 'README-secret.md']
903+
) == set(['README.md', 'README-bis.md'])
885904

886905

887906
class TarTest(unittest.TestCase):
@@ -1019,69 +1038,6 @@ def tar_test_negative_mtime_bug(self):
10191038
assert tar_data.getmember('th.txt').mtime == -3600
10201039

10211040

1022-
class ShouldCheckDirectoryTest(unittest.TestCase):
1023-
exclude_patterns = [
1024-
'exclude_rather_large_directory',
1025-
'dir/with/subdir_excluded',
1026-
'dir/with/exceptions'
1027-
]
1028-
1029-
include_patterns = [
1030-
'dir/with/exceptions/like_this_one',
1031-
'dir/with/exceptions/in/descendents'
1032-
]
1033-
1034-
def test_should_check_directory_not_excluded(self):
1035-
assert should_check_directory(
1036-
'not_excluded', self.exclude_patterns, self.include_patterns, '.'
1037-
)
1038-
assert should_check_directory(
1039-
convert_path('dir/with'), self.exclude_patterns,
1040-
self.include_patterns, '.'
1041-
)
1042-
1043-
def test_shoud_check_parent_directories_of_excluded(self):
1044-
assert should_check_directory(
1045-
'dir', self.exclude_patterns, self.include_patterns, '.'
1046-
)
1047-
assert should_check_directory(
1048-
convert_path('dir/with'), self.exclude_patterns,
1049-
self.include_patterns, '.'
1050-
)
1051-
1052-
def test_should_not_check_excluded_directories_with_no_exceptions(self):
1053-
assert not should_check_directory(
1054-
'exclude_rather_large_directory', self.exclude_patterns,
1055-
self.include_patterns, '.'
1056-
)
1057-
assert not should_check_directory(
1058-
convert_path('dir/with/subdir_excluded'), self.exclude_patterns,
1059-
self.include_patterns, '.'
1060-
)
1061-
1062-
def test_should_check_excluded_directory_with_exceptions(self):
1063-
assert should_check_directory(
1064-
convert_path('dir/with/exceptions'), self.exclude_patterns,
1065-
self.include_patterns, '.'
1066-
)
1067-
assert should_check_directory(
1068-
convert_path('dir/with/exceptions/in'), self.exclude_patterns,
1069-
self.include_patterns, '.'
1070-
)
1071-
1072-
def test_should_not_check_siblings_of_exceptions(self):
1073-
assert not should_check_directory(
1074-
convert_path('dir/with/exceptions/but_not_here'),
1075-
self.exclude_patterns, self.include_patterns, '.'
1076-
)
1077-
1078-
def test_should_check_subdirectories_of_exceptions(self):
1079-
assert should_check_directory(
1080-
convert_path('dir/with/exceptions/like_this_one/subdir'),
1081-
self.exclude_patterns, self.include_patterns, '.'
1082-
)
1083-
1084-
10851041
class FormatEnvironmentTest(unittest.TestCase):
10861042
def test_format_env_binary_unicode_value(self):
10871043
env_dict = {

0 commit comments

Comments
 (0)