33import sys
44from argparse import ArgumentParser
55from ast import unparse
6+ from pathlib import PurePosixPath
67
78from . import __version__ , fickle , tracing
89from .analysis import Severity , check_safety
1112
1213DEFAULT_JSON_OUTPUT_FILE = "safety_results.json"
1314
15+ HF_PICKLE_EXTENSIONS = frozenset ({".bin" , ".pt" , ".pth" , ".pkl" , ".pickle" })
16+
17+
18+ def _scan_huggingface (
19+ repo_id : str ,
20+ revision : str | None = None ,
21+ token : str | None = None ,
22+ json_output_path : str | None = None ,
23+ print_results : bool = False ,
24+ ) -> int :
25+ """Scan a HuggingFace Hub repository for potentially malicious pickle files.
26+
27+ Args:
28+ repo_id: HuggingFace repository ID (e.g., 'bert-base-uncased')
29+ revision: Specific revision (branch, tag, or commit) to scan
30+ token: HuggingFace API token for private repositories
31+ json_output_path: Path to write JSON results
32+ print_results: Whether to print results to console
33+
34+ Returns:
35+ EXIT_CLEAN (0), EXIT_UNSAFE (1), or EXIT_ERROR (2)
36+ """
37+ try :
38+ from huggingface_hub import HfApi , hf_hub_download
39+ except ImportError :
40+ sys .stderr .write (
41+ "Error: huggingface_hub is required for --huggingface scanning.\n "
42+ "Install with: pip install fickling[huggingface]\n "
43+ )
44+ return EXIT_ERROR
45+
46+ api = HfApi (token = token )
47+
48+ try :
49+ repo_info = api .repo_info (repo_id = repo_id , revision = revision , token = token )
50+ except Exception as e : # noqa: BLE001 -- HF Hub may raise unpredictable HTTP errors
51+ sys .stderr .write (f"Error accessing HuggingFace repository '{ repo_id } ': { e !s} \n " )
52+ return EXIT_ERROR
53+
54+ if repo_info is None or not repo_info .siblings :
55+ if print_results :
56+ print (f"No files found in { repo_id } " )
57+ return EXIT_CLEAN
58+
59+ pickle_files = [
60+ f .rfilename
61+ for f in repo_info .siblings
62+ if PurePosixPath (f .rfilename ).suffix .lower () in HF_PICKLE_EXTENSIONS
63+ ]
64+
65+ if not pickle_files :
66+ if print_results :
67+ print (f"No pickle files found in { repo_id } " )
68+ return EXIT_CLEAN
69+
70+ if print_results :
71+ print (f"Scanning { len (pickle_files )} file(s) in { repo_id } ..." )
72+
73+ overall_safe = True
74+ failed = 0
75+ json_output = json_output_path or DEFAULT_JSON_OUTPUT_FILE
76+
77+ for filename in pickle_files :
78+ if print_results :
79+ print (f"\n Scanning: { filename } " )
80+
81+ try :
82+ local_path = hf_hub_download (
83+ repo_id = repo_id ,
84+ filename = filename ,
85+ revision = revision ,
86+ token = token ,
87+ )
88+
89+ with open (local_path , "rb" ) as f :
90+ stacked_pickled = fickle .StackedPickle .load (f , fail_on_decode_error = False )
91+
92+ for pickled in stacked_pickled :
93+ safety_results = check_safety (pickled , json_output_path = json_output )
94+
95+ if print_results :
96+ result_str = safety_results .to_string ()
97+ if result_str :
98+ print (f" { result_str } " )
99+
100+ if safety_results .severity > Severity .LIKELY_SAFE :
101+ overall_safe = False
102+ if print_results :
103+ sys .stderr .write (f" WARNING: { filename } may contain unsafe content!\n " )
104+
105+ except fickle .PickleDecodeError as e :
106+ sys .stderr .write (f"Error parsing { filename } : { e !s} \n " )
107+ sys .stderr .write (
108+ "Parsing errors may indicate a maliciously crafted pickle file. "
109+ "DO NOT TRUST this file without further analysis!\n "
110+ )
111+ overall_safe = False
112+ failed += 1
113+ except Exception as e : # noqa: BLE001 -- HF Hub may raise unpredictable errors
114+ sys .stderr .write (f"Error scanning { filename } : { e !s} \n " )
115+ overall_safe = False
116+ failed += 1
117+
118+ if print_results :
119+ if failed > 0 :
120+ sys .stderr .write (f"\n WARNING: { failed } /{ len (pickle_files )} file(s) failed to scan\n " )
121+ if overall_safe :
122+ print (f"\n { repo_id } : No obvious safety issues detected" )
123+ else :
124+ print (f"\n { repo_id } : Potentially unsafe content detected!" )
125+
126+ return EXIT_CLEAN if overall_safe else EXIT_UNSAFE
127+
14128
15129def main (argv : list [str ] | None = None ) -> int :
16130 if argv is None :
@@ -98,6 +212,29 @@ def main(argv: list[str] | None = None) -> int:
98212 help = "print a runtime trace while interpreting the input pickle file" ,
99213 )
100214 parser .add_argument ("--version" , "-v" , action = "store_true" , help = "print the version and exit" )
215+ options .add_argument (
216+ "--huggingface" ,
217+ "--hf" ,
218+ type = str ,
219+ default = None ,
220+ metavar = "REPO_ID" ,
221+ help = "scan a model from HuggingFace Hub by repository ID (e.g., 'bert-base-uncased'). "
222+ "Requires huggingface_hub: pip install fickling[huggingface]" ,
223+ )
224+ parser .add_argument (
225+ "--hf-revision" ,
226+ type = str ,
227+ default = None ,
228+ help = "specific revision (branch, tag, or commit) to scan from HuggingFace Hub" ,
229+ )
230+ parser .add_argument (
231+ "--hf-token" ,
232+ type = str ,
233+ default = None ,
234+ help = "HuggingFace API token for private repositories. "
235+ "Prefer setting the HF_TOKEN environment variable to avoid token exposure in "
236+ "process listings" ,
237+ )
101238
102239 args = parser .parse_args (argv [1 :])
103240
@@ -108,6 +245,16 @@ def main(argv: list[str] | None = None) -> int:
108245 print (__version__ )
109246 return EXIT_CLEAN
110247
248+ # HuggingFace scanning mode
249+ if args .huggingface is not None :
250+ return _scan_huggingface (
251+ repo_id = args .huggingface ,
252+ revision = args .hf_revision ,
253+ token = args .hf_token ,
254+ json_output_path = args .json_output ,
255+ print_results = args .print_results ,
256+ )
257+
111258 if args .create is None :
112259 if args .PICKLE_FILE == "-" :
113260 if hasattr (sys .stdin , "buffer" ) and sys .stdin .buffer is not None :
0 commit comments