1
1
import os
2
+ import re
2
3
3
4
from ..constants import IS_WINDOWS_PLATFORM
4
- from .fnmatch import fnmatch
5
+ from fnmatch import fnmatch
6
+ from itertools import chain
5
7
from .utils import create_archive
6
8
7
9
8
10
def tar (path , exclude = None , dockerfile = None , fileobj = None , gzip = False ):
9
11
root = os .path .abspath (path )
10
12
exclude = exclude or []
11
-
12
13
return create_archive (
13
14
files = sorted (exclude_paths (root , exclude , dockerfile = dockerfile )),
14
15
root = root , fileobj = fileobj , gzip = gzip
15
16
)
16
17
17
18
19
+ _SEP = re .compile ('/|\\ \\ ' ) if IS_WINDOWS_PLATFORM else re .compile ('/' )
20
+
21
+
18
22
def exclude_paths (root , patterns , dockerfile = None ):
19
23
"""
20
24
Given a root directory path and a list of .dockerignore patterns, return
@@ -23,121 +27,77 @@ def exclude_paths(root, patterns, dockerfile=None):
23
27
24
28
All paths returned are relative to the root.
25
29
"""
30
+
26
31
if dockerfile is None :
27
32
dockerfile = 'Dockerfile'
28
33
29
- exceptions = [p for p in patterns if p .startswith ('!' )]
30
-
31
- include_patterns = [p [1 :].lstrip ('/' ) for p in exceptions ]
32
- include_patterns += [dockerfile , '.dockerignore' ]
33
-
34
- exclude_patterns = [
35
- p .lstrip ('/' ) for p in list (set (patterns ) - set (exceptions ))]
36
-
37
- paths = get_paths (root , exclude_patterns , include_patterns ,
38
- has_exceptions = len (exceptions ) > 0 )
39
-
40
- return set (paths ).union (
41
- # If the Dockerfile is in a subdirectory that is excluded, get_paths
42
- # will not descend into it and the file will be skipped. This ensures
43
- # it doesn't happen.
44
- set ([dockerfile .replace ('/' , os .path .sep )])
45
- if os .path .exists (os .path .join (root , dockerfile )) else set ()
46
- )
47
-
48
-
49
- def should_include (path , exclude_patterns , include_patterns ):
50
- """
51
- Given a path, a list of exclude patterns, and a list of inclusion patterns:
52
-
53
- 1. Returns True if the path doesn't match any exclusion pattern
54
- 2. Returns False if the path matches an exclusion pattern and doesn't match
55
- an inclusion pattern
56
- 3. Returns true if the path matches an exclusion pattern and matches an
57
- inclusion pattern
34
+ def normalize (p ):
35
+ # Leading and trailing slashes are not relevant. Yes,
36
+ # "foo.py/" must exclude the "foo.py" regular file. "."
37
+ # components are not relevant either, even if the whole
38
+ # pattern is only ".", as the Docker reference states: "For
39
+ # historical reasons, the pattern . is ignored."
40
+ split = [pt for pt in re .split (_SEP , p ) if pt and pt != '.' ]
41
+ # ".." component must be cleared with the potential previous
42
+ # component, regardless of whether it exists: "A preprocessing
43
+ # step [...] eliminates . and .. elements using Go's
44
+ # filepath.".
45
+ i = 0
46
+ while i < len (split ):
47
+ if split [i ] == '..' :
48
+ del split [i ]
49
+ if i > 0 :
50
+ del split [i - 1 ]
51
+ i -= 1
52
+ else :
53
+ i += 1
54
+ return split
55
+
56
+ patterns = (
57
+ (True , normalize (p [1 :]))
58
+ if p .startswith ('!' ) else
59
+ (False , normalize (p ))
60
+ for p in patterns )
61
+ patterns = list (reversed (list (chain (
62
+ # Exclude empty patterns such as "." or the empty string.
63
+ filter (lambda p : p [1 ], patterns ),
64
+ # Always include the Dockerfile and .dockerignore
65
+ [(True , dockerfile .split ('/' )), (True , ['.dockerignore' ])]))))
66
+ return set (walk (root , patterns ))
67
+
68
+
69
+ def walk (root , patterns , default = True ):
58
70
"""
59
- for pattern in exclude_patterns :
60
- if match_path (path , pattern ):
61
- for pattern in include_patterns :
62
- if match_path (path , pattern ):
63
- return True
64
- return False
65
- return True
66
-
67
-
68
- def should_check_directory (directory_path , exclude_patterns , include_patterns ):
71
+ A collection of file lying below root that should be included according to
72
+ patterns.
69
73
"""
70
- Given a directory path, a list of exclude patterns, and a list of inclusion
71
- patterns:
72
-
73
- 1. Returns True if the directory path should be included according to
74
- should_include.
75
- 2. Returns True if the directory path is the prefix for an inclusion
76
- pattern
77
- 3. Returns False otherwise
78
- """
79
-
80
- # To account for exception rules, check directories if their path is a
81
- # a prefix to an inclusion pattern. This logic conforms with the current
82
- # docker logic (2016-10-27):
83
- # https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671
84
-
85
- def normalize_path (path ):
86
- return path .replace (os .path .sep , '/' )
87
-
88
- path_with_slash = normalize_path (directory_path ) + '/'
89
- possible_child_patterns = [
90
- pattern for pattern in map (normalize_path , include_patterns )
91
- if (pattern + '/' ).startswith (path_with_slash )
92
- ]
93
- directory_included = should_include (
94
- directory_path , exclude_patterns , include_patterns
95
- )
96
- return directory_included or len (possible_child_patterns ) > 0
97
-
98
-
99
- def get_paths (root , exclude_patterns , include_patterns , has_exceptions = False ):
100
- paths = []
101
-
102
- for parent , dirs , files in os .walk (root , topdown = True , followlinks = False ):
103
- parent = os .path .relpath (parent , root )
104
- if parent == '.' :
105
- parent = ''
106
-
107
- # Remove excluded patterns from the list of directories to traverse
108
- # by mutating the dirs we're iterating over.
109
- # This looks strange, but is considered the correct way to skip
110
- # traversal. See https://docs.python.org/2/library/os.html#os.walk
111
- dirs [:] = [
112
- d for d in dirs if should_check_directory (
113
- os .path .join (parent , d ), exclude_patterns , include_patterns
114
- )
115
- ]
116
-
117
- for path in dirs :
118
- if should_include (os .path .join (parent , path ),
119
- exclude_patterns , include_patterns ):
120
- paths .append (os .path .join (parent , path ))
121
-
122
- for path in files :
123
- if should_include (os .path .join (parent , path ),
124
- exclude_patterns , include_patterns ):
125
- paths .append (os .path .join (parent , path ))
126
-
127
- return paths
128
-
129
-
130
- def match_path (path , pattern ):
131
- pattern = pattern .rstrip ('/' + os .path .sep )
132
- if pattern :
133
- pattern = os .path .relpath (pattern )
134
-
135
- pattern_components = pattern .split (os .path .sep )
136
- if len (pattern_components ) == 1 and IS_WINDOWS_PLATFORM :
137
- pattern_components = pattern .split ('/' )
138
74
139
- if '**' not in pattern :
140
- path_components = path .split (os .path .sep )[:len (pattern_components )]
141
- else :
142
- path_components = path .split (os .path .sep )
143
- return fnmatch ('/' .join (path_components ), '/' .join (pattern_components ))
75
+ def match (p ):
76
+ if p [1 ][0 ] == '**' :
77
+ rec = (p [0 ], p [1 ][1 :])
78
+ return [p ] + (match (rec ) if rec [1 ] else [rec ])
79
+ elif fnmatch (f , p [1 ][0 ]):
80
+ return [(p [0 ], p [1 ][1 :])]
81
+ else :
82
+ return []
83
+
84
+ for f in os .listdir (root ):
85
+ cur = os .path .join (root , f )
86
+ # The patterns if recursing in that directory.
87
+ sub = list (chain (* (match (p ) for p in patterns )))
88
+ # Whether this file is explicitely included / excluded.
89
+ hit = next ((p [0 ] for p in sub if not p [1 ]), None )
90
+ # Whether this file is implicitely included / excluded.
91
+ matched = default if hit is None else hit
92
+ sub = list (filter (lambda p : p [1 ], sub ))
93
+ if os .path .isdir (cur ):
94
+ children = False
95
+ for r in (os .path .join (f , p ) for p in walk (cur , sub , matched )):
96
+ yield r
97
+ children = True
98
+ # The current unit tests expect directories only under those
99
+ # conditions. It might be simplifiable though.
100
+ if (not sub or not children ) and hit or hit is None and default :
101
+ yield f
102
+ elif matched :
103
+ yield f
0 commit comments