|
| 1 | +import os |
| 2 | + |
| 3 | +from .fnmatch import fnmatch |
| 4 | +from .utils import create_archive |
| 5 | + |
| 6 | + |
| 7 | +def tar(path, exclude=None, dockerfile=None, fileobj=None, gzip=False): |
| 8 | + root = os.path.abspath(path) |
| 9 | + exclude = exclude or [] |
| 10 | + |
| 11 | + return create_archive( |
| 12 | + files=sorted(exclude_paths(root, exclude, dockerfile=dockerfile)), |
| 13 | + root=root, fileobj=fileobj, gzip=gzip |
| 14 | + ) |
| 15 | + |
| 16 | + |
| 17 | +def exclude_paths(root, patterns, dockerfile=None): |
| 18 | + """ |
| 19 | + Given a root directory path and a list of .dockerignore patterns, return |
| 20 | + an iterator of all paths (both regular files and directories) in the root |
| 21 | + directory that do *not* match any of the patterns. |
| 22 | +
|
| 23 | + All paths returned are relative to the root. |
| 24 | + """ |
| 25 | + if dockerfile is None: |
| 26 | + dockerfile = 'Dockerfile' |
| 27 | + |
| 28 | + exceptions = [p for p in patterns if p.startswith('!')] |
| 29 | + |
| 30 | + include_patterns = [p[1:] for p in exceptions] |
| 31 | + include_patterns += [dockerfile, '.dockerignore'] |
| 32 | + |
| 33 | + exclude_patterns = list(set(patterns) - set(exceptions)) |
| 34 | + |
| 35 | + paths = get_paths(root, exclude_patterns, include_patterns, |
| 36 | + has_exceptions=len(exceptions) > 0) |
| 37 | + |
| 38 | + return set(paths).union( |
| 39 | + # If the Dockerfile is in a subdirectory that is excluded, get_paths |
| 40 | + # will not descend into it and the file will be skipped. This ensures |
| 41 | + # it doesn't happen. |
| 42 | + set([dockerfile]) |
| 43 | + if os.path.exists(os.path.join(root, dockerfile)) else set() |
| 44 | + ) |
| 45 | + |
| 46 | + |
| 47 | +def should_include(path, exclude_patterns, include_patterns): |
| 48 | + """ |
| 49 | + Given a path, a list of exclude patterns, and a list of inclusion patterns: |
| 50 | +
|
| 51 | + 1. Returns True if the path doesn't match any exclusion pattern |
| 52 | + 2. Returns False if the path matches an exclusion pattern and doesn't match |
| 53 | + an inclusion pattern |
| 54 | + 3. Returns true if the path matches an exclusion pattern and matches an |
| 55 | + inclusion pattern |
| 56 | + """ |
| 57 | + for pattern in exclude_patterns: |
| 58 | + if match_path(path, pattern): |
| 59 | + for pattern in include_patterns: |
| 60 | + if match_path(path, pattern): |
| 61 | + return True |
| 62 | + return False |
| 63 | + return True |
| 64 | + |
| 65 | + |
| 66 | +def should_check_directory(directory_path, exclude_patterns, include_patterns): |
| 67 | + """ |
| 68 | + Given a directory path, a list of exclude patterns, and a list of inclusion |
| 69 | + patterns: |
| 70 | +
|
| 71 | + 1. Returns True if the directory path should be included according to |
| 72 | + should_include. |
| 73 | + 2. Returns True if the directory path is the prefix for an inclusion |
| 74 | + pattern |
| 75 | + 3. Returns False otherwise |
| 76 | + """ |
| 77 | + |
| 78 | + # To account for exception rules, check directories if their path is a |
| 79 | + # a prefix to an inclusion pattern. This logic conforms with the current |
| 80 | + # docker logic (2016-10-27): |
| 81 | + # https://github.com/docker/docker/blob/bc52939b0455116ab8e0da67869ec81c1a1c3e2c/pkg/archive/archive.go#L640-L671 |
| 82 | + |
| 83 | + def normalize_path(path): |
| 84 | + return path.replace(os.path.sep, '/') |
| 85 | + |
| 86 | + path_with_slash = normalize_path(directory_path) + '/' |
| 87 | + possible_child_patterns = [ |
| 88 | + pattern for pattern in map(normalize_path, include_patterns) |
| 89 | + if (pattern + '/').startswith(path_with_slash) |
| 90 | + ] |
| 91 | + directory_included = should_include( |
| 92 | + directory_path, exclude_patterns, include_patterns |
| 93 | + ) |
| 94 | + return directory_included or len(possible_child_patterns) > 0 |
| 95 | + |
| 96 | + |
| 97 | +def get_paths(root, exclude_patterns, include_patterns, has_exceptions=False): |
| 98 | + paths = [] |
| 99 | + |
| 100 | + for parent, dirs, files in os.walk(root, topdown=True, followlinks=False): |
| 101 | + parent = os.path.relpath(parent, root) |
| 102 | + if parent == '.': |
| 103 | + parent = '' |
| 104 | + |
| 105 | + # Remove excluded patterns from the list of directories to traverse |
| 106 | + # by mutating the dirs we're iterating over. |
| 107 | + # This looks strange, but is considered the correct way to skip |
| 108 | + # traversal. See https://docs.python.org/2/library/os.html#os.walk |
| 109 | + dirs[:] = [ |
| 110 | + d for d in dirs if should_check_directory( |
| 111 | + os.path.join(parent, d), exclude_patterns, include_patterns |
| 112 | + ) |
| 113 | + ] |
| 114 | + |
| 115 | + for path in dirs: |
| 116 | + if should_include(os.path.join(parent, path), |
| 117 | + exclude_patterns, include_patterns): |
| 118 | + paths.append(os.path.join(parent, path)) |
| 119 | + |
| 120 | + for path in files: |
| 121 | + if should_include(os.path.join(parent, path), |
| 122 | + exclude_patterns, include_patterns): |
| 123 | + paths.append(os.path.join(parent, path)) |
| 124 | + |
| 125 | + return paths |
| 126 | + |
| 127 | + |
| 128 | +def match_path(path, pattern): |
| 129 | + pattern = pattern.rstrip('/' + os.path.sep) |
| 130 | + if pattern: |
| 131 | + pattern = os.path.relpath(pattern) |
| 132 | + |
| 133 | + if '**' not in pattern: |
| 134 | + pattern_components = pattern.split(os.path.sep) |
| 135 | + path_components = path.split(os.path.sep)[:len(pattern_components)] |
| 136 | + else: |
| 137 | + path_components = path.split(os.path.sep) |
| 138 | + return fnmatch('/'.join(path_components), pattern) |
0 commit comments