|
1 | 1 | #!/usr/bin/env python3 |
2 | 2 | # SPDX-License-Identifier: Apache-2.0 |
3 | 3 | # SPDX-FileCopyrightText: Copyright contributors to the vLLM project |
4 | | -import os |
5 | 4 | import sys |
6 | 5 |
|
7 | 6 | import regex as re |
8 | 7 |
|
9 | | -try: |
10 | | - import pathspec |
11 | | -except ImportError: |
12 | | - print( |
13 | | - "ERROR: The 'pathspec' library is required. " |
14 | | - "Install it with 'pip install pathspec'.", |
15 | | - file=sys.stderr) |
16 | | - sys.exit(2) |
17 | | - |
18 | 8 | # List of files (relative to repo root) that are allowed to import pickle or |
19 | 9 | # cloudpickle |
20 | 10 | # |
|
25 | 15 | # Before adding new uses of pickle/cloudpickle, please consider safer |
26 | 16 | # alternatives like msgpack or pydantic that are already in use in vLLM. Only |
27 | 17 | # add to this list if absolutely necessary and after careful security review. |
28 | | -ALLOWED_FILES = set([ |
| 18 | +ALLOWED_FILES = { |
29 | 19 | # pickle |
30 | 20 | 'vllm/v1/serial_utils.py', |
31 | 21 | 'vllm/v1/executor/multiproc_executor.py', |
|
36 | 26 | 'tests/tokenization/test_cached_tokenizer.py', |
37 | 27 | 'vllm/distributed/utils.py', |
38 | 28 | 'vllm/distributed/parallel_state.py', |
39 | | - 'vllm/engine/multiprocessing/client.py', |
40 | 29 | 'vllm/distributed/device_communicators/all_reduce_utils.py', |
41 | 30 | 'vllm/distributed/device_communicators/shm_broadcast.py', |
42 | 31 | 'vllm/distributed/device_communicators/shm_object_storage.py', |
43 | | - 'vllm/engine/multiprocessing/engine.py', |
44 | 32 | 'benchmarks/kernels/graph_machete_bench.py', |
45 | 33 | 'benchmarks/kernels/benchmark_lora.py', |
46 | 34 | 'benchmarks/kernels/benchmark_machete.py', |
|
55 | 43 | 'tests/utils.py', |
56 | 44 | # pickle and cloudpickle |
57 | 45 | 'vllm/utils/__init__.py', |
58 | | - 'vllm/v1/serial_utils.py', |
59 | | - 'vllm/v1/executor/multiproc_executor.py', |
60 | | - 'vllm/transformers_utils/config.py', |
61 | | - 'vllm/model_executor/models/registry.py', |
62 | | - 'vllm/engine/multiprocessing/client.py', |
63 | | - 'vllm/engine/multiprocessing/engine.py', |
64 | | -]) |
| 46 | +} |
65 | 47 |
|
66 | 48 | PICKLE_RE = re.compile(r"^\s*(import\s+(pickle|cloudpickle)(\s|$|\sas)" |
67 | 49 | r"|from\s+(pickle|cloudpickle)\s+import\b)") |
68 | 50 |
|
69 | 51 |
|
70 | | -def is_python_file(path): |
71 | | - return path.endswith('.py') |
72 | | - |
73 | | - |
74 | | -def scan_file(path): |
| 52 | +def scan_file(path: str) -> int: |
75 | 53 | with open(path, encoding='utf-8') as f: |
76 | | - for line in f: |
| 54 | + for i, line in enumerate(f, 1): |
77 | 55 | if PICKLE_RE.match(line): |
78 | | - return True |
79 | | - return False |
80 | | - |
81 | | - |
82 | | -def load_gitignore(repo_root): |
83 | | - gitignore_path = os.path.join(repo_root, '.gitignore') |
84 | | - patterns = [] |
85 | | - if os.path.exists(gitignore_path): |
86 | | - with open(gitignore_path, encoding='utf-8') as f: |
87 | | - patterns = f.read().splitlines() |
88 | | - # Always ignore .git directory |
89 | | - patterns.append('.git/') |
90 | | - return pathspec.PathSpec.from_lines('gitwildmatch', patterns) |
| 56 | + print(f"{path}:{i}: " |
| 57 | + "\033[91merror:\033[0m " # red color |
| 58 | + "Found pickle/cloudpickle import") |
| 59 | + return 1 |
| 60 | + return 0 |
91 | 61 |
|
92 | 62 |
|
93 | 63 | def main(): |
94 | | - repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) |
95 | | - spec = load_gitignore(repo_root) |
96 | | - bad_files = [] |
97 | | - for dirpath, _, filenames in os.walk(repo_root): |
98 | | - for filename in filenames: |
99 | | - if not is_python_file(filename): |
100 | | - continue |
101 | | - abs_path = os.path.join(dirpath, filename) |
102 | | - rel_path = os.path.relpath(abs_path, repo_root) |
103 | | - # Skip ignored files |
104 | | - if spec.match_file(rel_path): |
105 | | - continue |
106 | | - if scan_file(abs_path) and rel_path not in ALLOWED_FILES: |
107 | | - bad_files.append(rel_path) |
108 | | - if bad_files: |
109 | | - print("\nERROR: The following files import 'pickle' or 'cloudpickle' " |
110 | | - "but are not in the allowed list:") |
111 | | - for f in bad_files: |
112 | | - print(f" {f}") |
113 | | - print("\nIf this is intentional, update the allowed list in " |
114 | | - "tools/check_pickle_imports.py.") |
115 | | - sys.exit(1) |
116 | | - sys.exit(0) |
| 64 | + returncode = 0 |
| 65 | + for filename in sys.argv[1:]: |
| 66 | + if filename in ALLOWED_FILES: |
| 67 | + continue |
| 68 | + returncode |= scan_file(filename) |
| 69 | + return returncode |
117 | 70 |
|
118 | 71 |
|
119 | 72 | def test_regex(): |
@@ -149,4 +102,4 @@ def test_regex(): |
149 | 102 | if '--test-regex' in sys.argv: |
150 | 103 | test_regex() |
151 | 104 | else: |
152 | | - main() |
| 105 | + sys.exit(main()) |
0 commit comments