Skip to content

Commit de79bbd

Browse files
dguidoclaude
authored andcommitted
Add HuggingFace Hub direct scanning support
- Add --huggingface REPO_ID argument to scan models directly from HuggingFace Hub - Add --hf-revision and --hf-token arguments for specific revisions and private repos - Add huggingface optional dependency (huggingface_hub >= 0.20.0) - Automatically filter for pickle-based files and skip safe formats like safetensors Usage: fickling --huggingface bert-base-uncased --print-results Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent b6a983a commit de79bbd

File tree

2 files changed

+151
-1
lines changed

2 files changed

+151
-1
lines changed

fickling/cli.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,127 @@
33
import sys
44
from argparse import ArgumentParser
55
from ast import unparse
6+
from pathlib import PurePosixPath
67

78
from . import __version__, fickle, tracing
89
from .analysis import Severity, check_safety
910
from .constants import EXIT_CLEAN, EXIT_ERROR, EXIT_UNSAFE
1011

1112
DEFAULT_JSON_OUTPUT_FILE = "safety_results.json"
1213

14+
HF_PICKLE_EXTENSIONS = frozenset({".bin", ".pt", ".pth", ".pkl", ".pickle"})
15+
16+
17+
def _scan_huggingface(
18+
repo_id: str,
19+
revision: str | None = None,
20+
token: str | None = None,
21+
json_output_path: str | None = None,
22+
print_results: bool = False,
23+
) -> int:
24+
"""Scan a HuggingFace Hub repository for potentially malicious pickle files.
25+
26+
Args:
27+
repo_id: HuggingFace repository ID (e.g., 'bert-base-uncased')
28+
revision: Specific revision (branch, tag, or commit) to scan
29+
token: HuggingFace API token for private repositories
30+
json_output_path: Path to write JSON results
31+
print_results: Whether to print results to console
32+
33+
Returns:
34+
EXIT_CLEAN (0), EXIT_UNSAFE (1), or EXIT_ERROR (2)
35+
"""
36+
try:
37+
from huggingface_hub import HfApi, hf_hub_download
38+
except ImportError:
39+
sys.stderr.write(
40+
"Error: huggingface_hub is required for --huggingface scanning.\n"
41+
"Install with: pip install fickling[huggingface]\n"
42+
)
43+
return EXIT_ERROR
44+
45+
api = HfApi(token=token)
46+
47+
try:
48+
repo_info = api.repo_info(repo_id=repo_id, revision=revision, token=token)
49+
except Exception as e: # noqa: BLE001 -- HF Hub may raise unpredictable HTTP errors
50+
sys.stderr.write(f"Error accessing HuggingFace repository '{repo_id}': {e!s}\n")
51+
return EXIT_ERROR
52+
53+
if repo_info is None or not repo_info.siblings:
54+
if print_results:
55+
print(f"No files found in {repo_id}")
56+
return EXIT_CLEAN
57+
58+
pickle_files = [
59+
f.rfilename
60+
for f in repo_info.siblings
61+
if PurePosixPath(f.rfilename).suffix.lower() in HF_PICKLE_EXTENSIONS
62+
]
63+
64+
if not pickle_files:
65+
if print_results:
66+
print(f"No pickle files found in {repo_id}")
67+
return EXIT_CLEAN
68+
69+
if print_results:
70+
print(f"Scanning {len(pickle_files)} file(s) in {repo_id}...")
71+
72+
overall_safe = True
73+
failed = 0
74+
json_output = json_output_path or DEFAULT_JSON_OUTPUT_FILE
75+
76+
for filename in pickle_files:
77+
if print_results:
78+
print(f"\n Scanning: {filename}")
79+
80+
try:
81+
local_path = hf_hub_download(
82+
repo_id=repo_id,
83+
filename=filename,
84+
revision=revision,
85+
token=token,
86+
)
87+
88+
with open(local_path, "rb") as f:
89+
stacked_pickled = fickle.StackedPickle.load(f, fail_on_decode_error=False)
90+
91+
for pickled in stacked_pickled:
92+
safety_results = check_safety(pickled, json_output_path=json_output)
93+
94+
if print_results:
95+
result_str = safety_results.to_string()
96+
if result_str:
97+
print(f" {result_str}")
98+
99+
if safety_results.severity > Severity.LIKELY_SAFE:
100+
overall_safe = False
101+
if print_results:
102+
sys.stderr.write(f" WARNING: {filename} may contain unsafe content!\n")
103+
104+
except fickle.PickleDecodeError as e:
105+
sys.stderr.write(f"Error parsing {filename}: {e!s}\n")
106+
sys.stderr.write(
107+
"Parsing errors may indicate a maliciously crafted pickle file. "
108+
"DO NOT TRUST this file without further analysis!\n"
109+
)
110+
overall_safe = False
111+
failed += 1
112+
except Exception as e: # noqa: BLE001 -- HF Hub may raise unpredictable errors
113+
sys.stderr.write(f"Error scanning {filename}: {e!s}\n")
114+
overall_safe = False
115+
failed += 1
116+
117+
if print_results:
118+
if failed > 0:
119+
sys.stderr.write(f"\nWARNING: {failed}/{len(pickle_files)} file(s) failed to scan\n")
120+
if overall_safe:
121+
print(f"\n{repo_id}: No obvious safety issues detected")
122+
else:
123+
print(f"\n{repo_id}: Potentially unsafe content detected!")
124+
125+
return EXIT_CLEAN if overall_safe else EXIT_UNSAFE
126+
13127

14128
def main(argv: list[str] | None = None) -> int:
15129
if argv is None:
@@ -97,6 +211,29 @@ def main(argv: list[str] | None = None) -> int:
97211
help="print a runtime trace while interpreting the input pickle file",
98212
)
99213
parser.add_argument("--version", "-v", action="store_true", help="print the version and exit")
214+
options.add_argument(
215+
"--huggingface",
216+
"--hf",
217+
type=str,
218+
default=None,
219+
metavar="REPO_ID",
220+
help="scan a model from HuggingFace Hub by repository ID (e.g., 'bert-base-uncased'). "
221+
"Requires huggingface_hub: pip install fickling[huggingface]",
222+
)
223+
parser.add_argument(
224+
"--hf-revision",
225+
type=str,
226+
default=None,
227+
help="specific revision (branch, tag, or commit) to scan from HuggingFace Hub",
228+
)
229+
parser.add_argument(
230+
"--hf-token",
231+
type=str,
232+
default=None,
233+
help="HuggingFace API token for private repositories. "
234+
"Prefer setting the HF_TOKEN environment variable to avoid token exposure in "
235+
"process listings",
236+
)
100237

101238
args = parser.parse_args(argv[1:])
102239

@@ -107,6 +244,16 @@ def main(argv: list[str] | None = None) -> int:
107244
print(__version__)
108245
return EXIT_CLEAN
109246

247+
# HuggingFace scanning mode
248+
if args.huggingface is not None:
249+
return _scan_huggingface(
250+
repo_id=args.huggingface,
251+
revision=args.hf_revision,
252+
token=args.hf_token,
253+
json_output_path=args.json_output,
254+
print_results=args.print_results,
255+
)
256+
110257
if args.create is None:
111258
if args.PICKLE_FILE == "-":
112259
if hasattr(sys.stdin, "buffer") and sys.stdin.buffer is not None:

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,11 @@ test = [
4949
archive = [
5050
"py7zr >= 1.1.0, != 1.1.2", # For 7z archive support (1.1.2 yanked)
5151
]
52+
huggingface = [
53+
"huggingface_hub >= 0.20.0",
54+
]
5255
dev = [
53-
"fickling[lint,test,torch,archive]",
56+
"fickling[lint,test,torch,archive,huggingface]",
5457
# For git hooks: install prek (brew install prek / pipx install prek), then run: prek install
5558
]
5659
examples = ["numpy", "pytorchfi"]

0 commit comments

Comments
 (0)