Skip to content

Commit 381f325

Browse files
dguidoclaude
authored andcommitted
Add HuggingFace Hub direct scanning support
- Add --huggingface REPO_ID argument to scan models directly from HuggingFace Hub - Add --hf-revision and --hf-token arguments for specific revisions and private repos - Add huggingface optional dependency (huggingface_hub >= 0.20.0) - Automatically filter for pickle-based files and skip safe formats like safetensors Usage: fickling --huggingface bert-base-uncased --print-results Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent e5e34bb commit 381f325

File tree

2 files changed

+151
-1
lines changed

2 files changed

+151
-1
lines changed

fickling/cli.py

Lines changed: 147 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
import sys
44
from argparse import ArgumentParser
55
from ast import unparse
6+
from pathlib import PurePosixPath
67

78
from . import __version__, fickle, tracing
89
from .analysis import Severity, check_safety
@@ -11,6 +12,119 @@
1112

1213
DEFAULT_JSON_OUTPUT_FILE = "safety_results.json"
1314

15+
HF_PICKLE_EXTENSIONS = frozenset({".bin", ".pt", ".pth", ".pkl", ".pickle"})
16+
17+
18+
def _scan_huggingface(
19+
repo_id: str,
20+
revision: str | None = None,
21+
token: str | None = None,
22+
json_output_path: str | None = None,
23+
print_results: bool = False,
24+
) -> int:
25+
"""Scan a HuggingFace Hub repository for potentially malicious pickle files.
26+
27+
Args:
28+
repo_id: HuggingFace repository ID (e.g., 'bert-base-uncased')
29+
revision: Specific revision (branch, tag, or commit) to scan
30+
token: HuggingFace API token for private repositories
31+
json_output_path: Path to write JSON results
32+
print_results: Whether to print results to console
33+
34+
Returns:
35+
EXIT_CLEAN (0), EXIT_UNSAFE (1), or EXIT_ERROR (2)
36+
"""
37+
try:
38+
from huggingface_hub import HfApi, hf_hub_download
39+
except ImportError:
40+
sys.stderr.write(
41+
"Error: huggingface_hub is required for --huggingface scanning.\n"
42+
"Install with: pip install fickling[huggingface]\n"
43+
)
44+
return EXIT_ERROR
45+
46+
api = HfApi(token=token)
47+
48+
try:
49+
repo_info = api.repo_info(repo_id=repo_id, revision=revision, token=token)
50+
except Exception as e: # noqa: BLE001 -- HF Hub may raise unpredictable HTTP errors
51+
sys.stderr.write(f"Error accessing HuggingFace repository '{repo_id}': {e!s}\n")
52+
return EXIT_ERROR
53+
54+
if repo_info is None or not repo_info.siblings:
55+
if print_results:
56+
print(f"No files found in {repo_id}")
57+
return EXIT_CLEAN
58+
59+
pickle_files = [
60+
f.rfilename
61+
for f in repo_info.siblings
62+
if PurePosixPath(f.rfilename).suffix.lower() in HF_PICKLE_EXTENSIONS
63+
]
64+
65+
if not pickle_files:
66+
if print_results:
67+
print(f"No pickle files found in {repo_id}")
68+
return EXIT_CLEAN
69+
70+
if print_results:
71+
print(f"Scanning {len(pickle_files)} file(s) in {repo_id}...")
72+
73+
overall_safe = True
74+
failed = 0
75+
json_output = json_output_path or DEFAULT_JSON_OUTPUT_FILE
76+
77+
for filename in pickle_files:
78+
if print_results:
79+
print(f"\n Scanning: {filename}")
80+
81+
try:
82+
local_path = hf_hub_download(
83+
repo_id=repo_id,
84+
filename=filename,
85+
revision=revision,
86+
token=token,
87+
)
88+
89+
with open(local_path, "rb") as f:
90+
stacked_pickled = fickle.StackedPickle.load(f, fail_on_decode_error=False)
91+
92+
for pickled in stacked_pickled:
93+
safety_results = check_safety(pickled, json_output_path=json_output)
94+
95+
if print_results:
96+
result_str = safety_results.to_string()
97+
if result_str:
98+
print(f" {result_str}")
99+
100+
if safety_results.severity > Severity.LIKELY_SAFE:
101+
overall_safe = False
102+
if print_results:
103+
sys.stderr.write(f" WARNING: {filename} may contain unsafe content!\n")
104+
105+
except fickle.PickleDecodeError as e:
106+
sys.stderr.write(f"Error parsing {filename}: {e!s}\n")
107+
sys.stderr.write(
108+
"Parsing errors may indicate a maliciously crafted pickle file. "
109+
"DO NOT TRUST this file without further analysis!\n"
110+
)
111+
overall_safe = False
112+
failed += 1
113+
except Exception as e: # noqa: BLE001 -- HF Hub may raise unpredictable errors
114+
sys.stderr.write(f"Error scanning {filename}: {e!s}\n")
115+
overall_safe = False
116+
failed += 1
117+
118+
if print_results:
119+
if failed > 0:
120+
sys.stderr.write(f"\nWARNING: {failed}/{len(pickle_files)} file(s) failed to scan\n")
121+
if overall_safe:
122+
print(f"\n{repo_id}: No obvious safety issues detected")
123+
else:
124+
print(f"\n{repo_id}: Potentially unsafe content detected!")
125+
126+
return EXIT_CLEAN if overall_safe else EXIT_UNSAFE
127+
14128

15129
def main(argv: list[str] | None = None) -> int:
16130
if argv is None:
@@ -98,6 +212,29 @@ def main(argv: list[str] | None = None) -> int:
98212
help="print a runtime trace while interpreting the input pickle file",
99213
)
100214
parser.add_argument("--version", "-v", action="store_true", help="print the version and exit")
215+
options.add_argument(
216+
"--huggingface",
217+
"--hf",
218+
type=str,
219+
default=None,
220+
metavar="REPO_ID",
221+
help="scan a model from HuggingFace Hub by repository ID (e.g., 'bert-base-uncased'). "
222+
"Requires huggingface_hub: pip install fickling[huggingface]",
223+
)
224+
parser.add_argument(
225+
"--hf-revision",
226+
type=str,
227+
default=None,
228+
help="specific revision (branch, tag, or commit) to scan from HuggingFace Hub",
229+
)
230+
parser.add_argument(
231+
"--hf-token",
232+
type=str,
233+
default=None,
234+
help="HuggingFace API token for private repositories. "
235+
"Prefer setting the HF_TOKEN environment variable to avoid token exposure in "
236+
"process listings",
237+
)
101238

102239
args = parser.parse_args(argv[1:])
103240

@@ -108,6 +245,16 @@ def main(argv: list[str] | None = None) -> int:
108245
print(__version__)
109246
return EXIT_CLEAN
110247

248+
# HuggingFace scanning mode
249+
if args.huggingface is not None:
250+
return _scan_huggingface(
251+
repo_id=args.huggingface,
252+
revision=args.hf_revision,
253+
token=args.hf_token,
254+
json_output_path=args.json_output,
255+
print_results=args.print_results,
256+
)
257+
111258
if args.create is None:
112259
if args.PICKLE_FILE == "-":
113260
if hasattr(sys.stdin, "buffer") and sys.stdin.buffer is not None:

pyproject.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,11 @@ test = [
4949
archive = [
5050
"py7zr >= 1.1.0, != 1.1.2", # For 7z archive support (1.1.2 yanked)
5151
]
52+
huggingface = [
53+
"huggingface_hub >= 0.20.0",
54+
]
5255
dev = [
53-
"fickling[lint,test,torch,archive]",
56+
"fickling[lint,test,torch,archive,huggingface]",
5457
# For git hooks: install prek (brew install prek / pipx install prek), then run: prek install
5558
]
5659
examples = ["numpy", "pytorchfi"]

0 commit comments

Comments
 (0)