1
1
import os
2
+ import re
2
3
3
4
from ..constants import IS_WINDOWS_PLATFORM
4
- from .fnmatch import fnmatch
5
+ from fnmatch import fnmatch
6
+ from itertools import chain
5
7
from .utils import create_archive
6
8
7
9
8
10
def tar (path , exclude = None , dockerfile = None , fileobj = None , gzip = False ):
9
11
root = os .path .abspath (path )
10
12
exclude = exclude or []
11
-
12
13
return create_archive (
13
14
files = sorted (exclude_paths (root , exclude , dockerfile = dockerfile )),
14
15
root = root , fileobj = fileobj , gzip = gzip
15
16
)
16
17
17
18
19
+ _SEP = re .compile ('/|\\ \\ ' ) if IS_WINDOWS_PLATFORM else re .compile ('/' )
20
+
21
+
18
22
def exclude_paths (root , patterns , dockerfile = None ):
19
23
"""
20
24
Given a root directory path and a list of .dockerignore patterns, return
@@ -23,127 +27,90 @@ def exclude_paths(root, patterns, dockerfile=None):
23
27
24
28
All paths returned are relative to the root.
25
29
"""
30
+
26
31
if dockerfile is None :
27
32
dockerfile = 'Dockerfile'
28
33
29
- patterns = [p .lstrip ('/' ) for p in patterns ]
30
- exceptions = [p for p in patterns if p .startswith ('!' )]
31
-
32
- include_patterns = [p [1 :] for p in exceptions ]
33
- include_patterns += [dockerfile , '.dockerignore' ]
34
-
35
- exclude_patterns = list (set (patterns ) - set (exceptions ))
36
-
37
- paths = get_paths (root , exclude_patterns , include_patterns ,
38
- has_exceptions = len (exceptions ) > 0 )
39
-
40
- return set (paths ).union (
41
- # If the Dockerfile is in a subdirectory that is excluded, get_paths
42
- # will not descend into it and the file will be skipped. This ensures
43
- # it doesn't happen.
44
- set ([dockerfile .replace ('/' , os .path .sep )])
45
- if os .path .exists (os .path .join (root , dockerfile )) else set ()
46
- )
47
-
48
-
49
- def should_include (path , exclude_patterns , include_patterns , root ):
50
- """
51
- Given a path, a list of exclude patterns, and a list of inclusion patterns:
52
-
53
- 1. Returns True if the path doesn't match any exclusion pattern
54
- 2. Returns False if the path matches an exclusion pattern and doesn't match
55
- an inclusion pattern
56
- 3. Returns true if the path matches an exclusion pattern and matches an
57
- inclusion pattern
58
- """
59
- for pattern in exclude_patterns :
60
- if match_path (path , pattern ):
61
- for pattern in include_patterns :
62
- if match_path (path , pattern ):
63
- return True
64
- if os .path .isabs (pattern ) and match_path (
65
- os .path .join (root , path ), pattern ):
66
- return True
67
- return False
68
- return True
69
-
70
-
71
- def should_check_directory (directory_path , exclude_patterns , include_patterns ,
72
- root ):
34
+ def normalize (p ):
35
+ # Leading and trailing slashes are not relevant. Yes,
36
+ # "foo.py/" must exclude the "foo.py" regular file. "."
37
+ # components are not relevant either, even if the whole
38
+ # pattern is only ".", as the Docker reference states: "For
39
+ # historical reasons, the pattern . is ignored."
40
+ split = [pt for pt in re .split (_SEP , p ) if pt and pt != '.' ]
41
+ # ".." component must be cleared with the potential previous
42
+ # component, regardless of whether it exists: "A preprocessing
43
+ # step [...] eliminates . and .. elements using Go's
44
+ # filepath.".
45
+ i = 0
46
+ while i < len (split ):
47
+ if split [i ] == '..' :
48
+ del split [i ]
49
+ if i > 0 :
50
+ del split [i - 1 ]
51
+ i -= 1
52
+ else :
53
+ i += 1
54
+ return split
55
+
56
+ patterns = (
57
+ (True , normalize (p [1 :]))
58
+ if p .startswith ('!' ) else
59
+ (False , normalize (p ))
60
+ for p in patterns )
61
+ patterns = list (reversed (list (chain (
62
+ # Exclude empty patterns such as "." or the empty string.
63
+ filter (lambda p : p [1 ], patterns ),
64
+ # Always include the Dockerfile and .dockerignore
65
+ [(True , dockerfile .split ('/' )), (True , ['.dockerignore' ])]))))
66
+ return set (walk (root , patterns ))
67
+
68
+
69
+ def walk (root , patterns , default = True ):
73
70
"""
74
- Given a directory path, a list of exclude patterns, and a list of inclusion
75
- patterns:
76
-
77
- 1. Returns True if the directory path should be included according to
78
- should_include.
79
- 2. Returns True if the directory path is the prefix for an inclusion
80
- pattern
81
- 3. Returns False otherwise
71
+ A collection of file lying below root that should be included according to
72
+ patterns.
82
73
"""
83
74
84
- # To account for exception rules, check directories if their path is a
85
- # a prefix to an inclusion pattern. This logic conforms with the current
86
- # docker logic (2016-10-27):
87
- # https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671
88
-
89
- def normalize_path (path ):
90
- return path .replace (os .path .sep , '/' )
91
-
92
- path_with_slash = normalize_path (directory_path ) + '/'
93
- possible_child_patterns = [
94
- pattern for pattern in map (normalize_path , include_patterns )
95
- if (pattern + '/' ).startswith (path_with_slash )
96
- ]
97
- directory_included = should_include (
98
- directory_path , exclude_patterns , include_patterns , root
99
- )
100
- return directory_included or len (possible_child_patterns ) > 0
101
-
102
-
103
- def get_paths (root , exclude_patterns , include_patterns , has_exceptions = False ):
104
- paths = []
105
-
106
- for parent , dirs , files in os .walk (root , topdown = True , followlinks = False ):
107
- parent = os .path .relpath (parent , root )
108
- if parent == '.' :
109
- parent = ''
110
-
111
- # Remove excluded patterns from the list of directories to traverse
112
- # by mutating the dirs we're iterating over.
113
- # This looks strange, but is considered the correct way to skip
114
- # traversal. See https://docs.python.org/2/library/os.html#os.walk
115
- dirs [:] = [
116
- d for d in dirs if should_check_directory (
117
- os .path .join (parent , d ), exclude_patterns , include_patterns ,
118
- root
119
- )
120
- ]
121
-
122
- for path in dirs :
123
- if should_include (os .path .join (parent , path ),
124
- exclude_patterns , include_patterns , root ):
125
- paths .append (os .path .join (parent , path ))
126
-
127
- for path in files :
128
- if should_include (os .path .join (parent , path ),
129
- exclude_patterns , include_patterns , root ):
130
- paths .append (os .path .join (parent , path ))
131
-
132
- return paths
133
-
134
-
135
- def match_path (path , pattern ):
136
-
137
- pattern = pattern .rstrip ('/' + os .path .sep )
138
- if pattern and not os .path .isabs (pattern ):
139
- pattern = os .path .relpath (pattern )
140
-
141
- pattern_components = pattern .split (os .path .sep )
142
- if len (pattern_components ) == 1 and IS_WINDOWS_PLATFORM :
143
- pattern_components = pattern .split ('/' )
144
-
145
- if '**' not in pattern :
146
- path_components = path .split (os .path .sep )[:len (pattern_components )]
147
- else :
148
- path_components = path .split (os .path .sep )
149
- return fnmatch ('/' .join (path_components ), '/' .join (pattern_components ))
75
+ def match (p ):
76
+ if p [1 ][0 ] == '**' :
77
+ rec = (p [0 ], p [1 ][1 :])
78
+ return [p ] + (match (rec ) if rec [1 ] else [rec ])
79
+ elif fnmatch (f , p [1 ][0 ]):
80
+ return [(p [0 ], p [1 ][1 :])]
81
+ else :
82
+ return []
83
+
84
+ for f in os .listdir (root ):
85
+ cur = os .path .join (root , f )
86
+ # The patterns if recursing in that directory.
87
+ sub = list (chain (* (match (p ) for p in patterns )))
88
+ # Whether this file is explicitely included / excluded.
89
+ hit = next ((p [0 ] for p in sub if not p [1 ]), None )
90
+ # Whether this file is implicitely included / excluded.
91
+ matched = default if hit is None else hit
92
+ sub = list (filter (lambda p : p [1 ], sub ))
93
+ if os .path .isdir (cur ):
94
+ # Entirely skip directories if there are no chance any subfile will
95
+ # be included.
96
+ if all (not p [0 ] for p in sub ) and not matched :
97
+ continue
98
+ # I think this would greatly speed up dockerignore handling by not
99
+ # recursing into directories we are sure would be entirely
100
+ # included, and only yielding the directory itself, which will be
101
+ # recursively archived anyway. However the current unit test expect
102
+ # the full list of subfiles and I'm not 100% sure it would make no
103
+ # difference yet.
104
+ # if all(p[0] for p in sub) and matched:
105
+ # yield f
106
+ # continue
107
+ children = False
108
+ for r in (os .path .join (f , p ) for p in walk (cur , sub , matched )):
109
+ yield r
110
+ children = True
111
+ # The current unit tests expect directories only under those
112
+ # conditions. It might be simplifiable though.
113
+ if (not sub or not children ) and hit or hit is None and default :
114
+ yield f
115
+ elif matched :
116
+ yield f
0 commit comments