|
1 | 1 | #!/usr/bin/env python3
|
2 | 2 | # SPDX-License-Identifier: Apache-2.0
|
3 | 3 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
4 |
| -import os |
5 | 4 | import sys
|
6 | 5 |
|
7 | 6 | import regex as re
|
8 | 7 |
|
9 |
| -try: |
10 |
| - import pathspec |
11 |
| -except ImportError: |
12 |
| - print( |
13 |
| - "ERROR: The 'pathspec' library is required. " |
14 |
| - "Install it with 'pip install pathspec'.", |
15 |
| - file=sys.stderr) |
16 |
| - sys.exit(2) |
17 |
| - |
18 | 8 | # List of files (relative to repo root) that are allowed to import pickle or
|
19 | 9 | # cloudpickle
|
20 | 10 | #
|
|
25 | 15 | # Before adding new uses of pickle/cloudpickle, please consider safer
|
26 | 16 | # alternatives like msgpack or pydantic that are already in use in vLLM. Only
|
27 | 17 | # add to this list if absolutely necessary and after careful security review.
|
28 |
| -ALLOWED_FILES = set([ |
| 18 | +ALLOWED_FILES = { |
29 | 19 | # pickle
|
30 | 20 | 'vllm/v1/serial_utils.py',
|
31 | 21 | 'vllm/v1/executor/multiproc_executor.py',
|
|
36 | 26 | 'tests/tokenization/test_cached_tokenizer.py',
|
37 | 27 | 'vllm/distributed/utils.py',
|
38 | 28 | 'vllm/distributed/parallel_state.py',
|
39 |
| - 'vllm/engine/multiprocessing/client.py', |
40 | 29 | 'vllm/distributed/device_communicators/all_reduce_utils.py',
|
41 | 30 | 'vllm/distributed/device_communicators/shm_broadcast.py',
|
42 | 31 | 'vllm/distributed/device_communicators/shm_object_storage.py',
|
43 |
| - 'vllm/engine/multiprocessing/engine.py', |
44 | 32 | 'benchmarks/kernels/graph_machete_bench.py',
|
45 | 33 | 'benchmarks/kernels/benchmark_lora.py',
|
46 | 34 | 'benchmarks/kernels/benchmark_machete.py',
|
|
55 | 43 | 'tests/utils.py',
|
56 | 44 | # pickle and cloudpickle
|
57 | 45 | 'vllm/utils/__init__.py',
|
58 |
| - 'vllm/v1/serial_utils.py', |
59 |
| - 'vllm/v1/executor/multiproc_executor.py', |
60 |
| - 'vllm/transformers_utils/config.py', |
61 |
| - 'vllm/model_executor/models/registry.py', |
62 |
| - 'vllm/engine/multiprocessing/client.py', |
63 |
| - 'vllm/engine/multiprocessing/engine.py', |
64 |
| -]) |
| 46 | +} |
65 | 47 |
|
66 | 48 | PICKLE_RE = re.compile(r"^\s*(import\s+(pickle|cloudpickle)(\s|$|\sas)"
|
67 | 49 | r"|from\s+(pickle|cloudpickle)\s+import\b)")
|
68 | 50 |
|
69 | 51 |
|
70 |
| -def is_python_file(path): |
71 |
| - return path.endswith('.py') |
72 |
| - |
73 |
| - |
74 |
| -def scan_file(path): |
| 52 | +def scan_file(path: str) -> int: |
75 | 53 | with open(path, encoding='utf-8') as f:
|
76 |
| - for line in f: |
| 54 | + for i, line in enumerate(f, 1): |
77 | 55 | if PICKLE_RE.match(line):
|
78 |
| - return True |
79 |
| - return False |
80 |
| - |
81 |
| - |
82 |
| -def load_gitignore(repo_root): |
83 |
| - gitignore_path = os.path.join(repo_root, '.gitignore') |
84 |
| - patterns = [] |
85 |
| - if os.path.exists(gitignore_path): |
86 |
| - with open(gitignore_path, encoding='utf-8') as f: |
87 |
| - patterns = f.read().splitlines() |
88 |
| - # Always ignore .git directory |
89 |
| - patterns.append('.git/') |
90 |
| - return pathspec.PathSpec.from_lines('gitwildmatch', patterns) |
| 56 | + print(f"{path}:{i}: " |
| 57 | + "\033[91merror:\033[0m " # red color |
| 58 | + "Found pickle/cloudpickle import") |
| 59 | + return 1 |
| 60 | + return 0 |
91 | 61 |
|
92 | 62 |
|
93 | 63 | def main():
|
94 |
| - repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
95 |
| - spec = load_gitignore(repo_root) |
96 |
| - bad_files = [] |
97 |
| - for dirpath, _, filenames in os.walk(repo_root): |
98 |
| - for filename in filenames: |
99 |
| - if not is_python_file(filename): |
100 |
| - continue |
101 |
| - abs_path = os.path.join(dirpath, filename) |
102 |
| - rel_path = os.path.relpath(abs_path, repo_root) |
103 |
| - # Skip ignored files |
104 |
| - if spec.match_file(rel_path): |
105 |
| - continue |
106 |
| - if scan_file(abs_path) and rel_path not in ALLOWED_FILES: |
107 |
| - bad_files.append(rel_path) |
108 |
| - if bad_files: |
109 |
| - print("\nERROR: The following files import 'pickle' or 'cloudpickle' " |
110 |
| - "but are not in the allowed list:") |
111 |
| - for f in bad_files: |
112 |
| - print(f" {f}") |
113 |
| - print("\nIf this is intentional, update the allowed list in " |
114 |
| - "tools/check_pickle_imports.py.") |
115 |
| - sys.exit(1) |
116 |
| - sys.exit(0) |
| 64 | + returncode = 0 |
| 65 | + for filename in sys.argv[1:]: |
| 66 | + if filename in ALLOWED_FILES: |
| 67 | + continue |
| 68 | + returncode |= scan_file(filename) |
| 69 | + return returncode |
117 | 70 |
|
118 | 71 |
|
119 | 72 | def test_regex():
|
@@ -149,4 +102,4 @@ def test_regex():
|
149 | 102 | if '--test-regex' in sys.argv:
|
150 | 103 | test_regex()
|
151 | 104 | else:
|
152 |
| - main() |
| 105 | + sys.exit(main()) |
0 commit comments