Skip to content

Commit 64c824c

Browse files
authored
Make pickle import check fast (#25379)
Signed-off-by: Harry Mellor <[email protected]>
1 parent 417a164 commit 64c824c

File tree

2 files changed

+18
-66
lines changed

2 files changed

+18
-66
lines changed

.pre-commit-config.yaml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -155,11 +155,10 @@ repos:
155155
additional_dependencies: [regex]
156156
- id: check-pickle-imports
157157
name: Prevent new pickle/cloudpickle imports
158-
entry: python tools/check_pickle_imports.py
158+
entry: python tools/pre_commit/check_pickle_imports.py
159159
language: python
160160
types: [python]
161-
pass_filenames: false
162-
additional_dependencies: [pathspec, regex]
161+
additional_dependencies: [regex]
163162
- id: validate-config
164163
name: Validate configuration has default values and that each field has a docstring
165164
entry: python tools/validate_config.py
Lines changed: 16 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,10 @@
11
#!/usr/bin/env python3
22
# SPDX-License-Identifier: Apache-2.0
33
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
4-
import os
54
import sys
65

76
import regex as re
87

9-
try:
10-
import pathspec
11-
except ImportError:
12-
print(
13-
"ERROR: The 'pathspec' library is required. "
14-
"Install it with 'pip install pathspec'.",
15-
file=sys.stderr)
16-
sys.exit(2)
17-
188
# List of files (relative to repo root) that are allowed to import pickle or
199
# cloudpickle
2010
#
@@ -25,7 +15,7 @@
2515
# Before adding new uses of pickle/cloudpickle, please consider safer
2616
# alternatives like msgpack or pydantic that are already in use in vLLM. Only
2717
# add to this list if absolutely necessary and after careful security review.
28-
ALLOWED_FILES = set([
18+
ALLOWED_FILES = {
2919
# pickle
3020
'vllm/v1/serial_utils.py',
3121
'vllm/v1/executor/multiproc_executor.py',
@@ -36,11 +26,9 @@
3626
'tests/tokenization/test_cached_tokenizer.py',
3727
'vllm/distributed/utils.py',
3828
'vllm/distributed/parallel_state.py',
39-
'vllm/engine/multiprocessing/client.py',
4029
'vllm/distributed/device_communicators/all_reduce_utils.py',
4130
'vllm/distributed/device_communicators/shm_broadcast.py',
4231
'vllm/distributed/device_communicators/shm_object_storage.py',
43-
'vllm/engine/multiprocessing/engine.py',
4432
'benchmarks/kernels/graph_machete_bench.py',
4533
'benchmarks/kernels/benchmark_lora.py',
4634
'benchmarks/kernels/benchmark_machete.py',
@@ -55,65 +43,30 @@
5543
'tests/utils.py',
5644
# pickle and cloudpickle
5745
'vllm/utils/__init__.py',
58-
'vllm/v1/serial_utils.py',
59-
'vllm/v1/executor/multiproc_executor.py',
60-
'vllm/transformers_utils/config.py',
61-
'vllm/model_executor/models/registry.py',
62-
'vllm/engine/multiprocessing/client.py',
63-
'vllm/engine/multiprocessing/engine.py',
64-
])
46+
}
6547

6648
PICKLE_RE = re.compile(r"^\s*(import\s+(pickle|cloudpickle)(\s|$|\sas)"
6749
r"|from\s+(pickle|cloudpickle)\s+import\b)")
6850

6951

70-
def is_python_file(path):
71-
return path.endswith('.py')
72-
73-
74-
def scan_file(path):
52+
def scan_file(path: str) -> int:
7553
with open(path, encoding='utf-8') as f:
76-
for line in f:
54+
for i, line in enumerate(f, 1):
7755
if PICKLE_RE.match(line):
78-
return True
79-
return False
80-
81-
82-
def load_gitignore(repo_root):
83-
gitignore_path = os.path.join(repo_root, '.gitignore')
84-
patterns = []
85-
if os.path.exists(gitignore_path):
86-
with open(gitignore_path, encoding='utf-8') as f:
87-
patterns = f.read().splitlines()
88-
# Always ignore .git directory
89-
patterns.append('.git/')
90-
return pathspec.PathSpec.from_lines('gitwildmatch', patterns)
56+
print(f"{path}:{i}: "
57+
"\033[91merror:\033[0m " # red color
58+
"Found pickle/cloudpickle import")
59+
return 1
60+
return 0
9161

9262

9363
def main():
94-
repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
95-
spec = load_gitignore(repo_root)
96-
bad_files = []
97-
for dirpath, _, filenames in os.walk(repo_root):
98-
for filename in filenames:
99-
if not is_python_file(filename):
100-
continue
101-
abs_path = os.path.join(dirpath, filename)
102-
rel_path = os.path.relpath(abs_path, repo_root)
103-
# Skip ignored files
104-
if spec.match_file(rel_path):
105-
continue
106-
if scan_file(abs_path) and rel_path not in ALLOWED_FILES:
107-
bad_files.append(rel_path)
108-
if bad_files:
109-
print("\nERROR: The following files import 'pickle' or 'cloudpickle' "
110-
"but are not in the allowed list:")
111-
for f in bad_files:
112-
print(f" {f}")
113-
print("\nIf this is intentional, update the allowed list in "
114-
"tools/check_pickle_imports.py.")
115-
sys.exit(1)
116-
sys.exit(0)
64+
returncode = 0
65+
for filename in sys.argv[1:]:
66+
if filename in ALLOWED_FILES:
67+
continue
68+
returncode |= scan_file(filename)
69+
return returncode
11770

11871

11972
def test_regex():
@@ -149,4 +102,4 @@ def test_regex():
149102
if '--test-regex' in sys.argv:
150103
test_regex()
151104
else:
152-
main()
105+
sys.exit(main())

0 commit comments

Comments
 (0)