33import sys
44from argparse import ArgumentParser
55from ast import unparse
6+ from pathlib import PurePosixPath
67
78from . import __version__ , fickle , tracing
89from .analysis import Severity , check_safety
910from .constants import EXIT_CLEAN , EXIT_ERROR , EXIT_UNSAFE
1011
1112DEFAULT_JSON_OUTPUT_FILE = "safety_results.json"
1213
14+ HF_PICKLE_EXTENSIONS = frozenset ({".bin" , ".pt" , ".pth" , ".pkl" , ".pickle" })
15+
16+
17+ def _scan_huggingface (
18+ repo_id : str ,
19+ revision : str | None = None ,
20+ token : str | None = None ,
21+ json_output_path : str | None = None ,
22+ print_results : bool = False ,
23+ ) -> int :
24+ """Scan a HuggingFace Hub repository for potentially malicious pickle files.
25+
26+ Args:
27+ repo_id: HuggingFace repository ID (e.g., 'bert-base-uncased')
28+ revision: Specific revision (branch, tag, or commit) to scan
29+ token: HuggingFace API token for private repositories
30+ json_output_path: Path to write JSON results
31+ print_results: Whether to print results to console
32+
33+ Returns:
34+ EXIT_CLEAN (0), EXIT_UNSAFE (1), or EXIT_ERROR (2)
35+ """
36+ try :
37+ from huggingface_hub import HfApi , hf_hub_download
38+ except ImportError :
39+ sys .stderr .write (
40+ "Error: huggingface_hub is required for --huggingface scanning.\n "
41+ "Install with: pip install fickling[huggingface]\n "
42+ )
43+ return EXIT_ERROR
44+
45+ api = HfApi (token = token )
46+
47+ try :
48+ repo_info = api .repo_info (repo_id = repo_id , revision = revision , token = token )
49+ except Exception as e : # noqa: BLE001 -- HF Hub may raise unpredictable HTTP errors
50+ sys .stderr .write (f"Error accessing HuggingFace repository '{ repo_id } ': { e !s} \n " )
51+ return EXIT_ERROR
52+
53+ if repo_info is None or not repo_info .siblings :
54+ if print_results :
55+ print (f"No files found in { repo_id } " )
56+ return EXIT_CLEAN
57+
58+ pickle_files = [
59+ f .rfilename
60+ for f in repo_info .siblings
61+ if PurePosixPath (f .rfilename ).suffix .lower () in HF_PICKLE_EXTENSIONS
62+ ]
63+
64+ if not pickle_files :
65+ if print_results :
66+ print (f"No pickle files found in { repo_id } " )
67+ return EXIT_CLEAN
68+
69+ if print_results :
70+ print (f"Scanning { len (pickle_files )} file(s) in { repo_id } ..." )
71+
72+ overall_safe = True
73+ failed = 0
74+ json_output = json_output_path or DEFAULT_JSON_OUTPUT_FILE
75+
76+ for filename in pickle_files :
77+ if print_results :
78+ print (f"\n Scanning: { filename } " )
79+
80+ try :
81+ local_path = hf_hub_download (
82+ repo_id = repo_id ,
83+ filename = filename ,
84+ revision = revision ,
85+ token = token ,
86+ )
87+
88+ with open (local_path , "rb" ) as f :
89+ stacked_pickled = fickle .StackedPickle .load (f , fail_on_decode_error = False )
90+
91+ for pickled in stacked_pickled :
92+ safety_results = check_safety (pickled , json_output_path = json_output )
93+
94+ if print_results :
95+ result_str = safety_results .to_string ()
96+ if result_str :
97+ print (f" { result_str } " )
98+
99+ if safety_results .severity > Severity .LIKELY_SAFE :
100+ overall_safe = False
101+ if print_results :
102+ sys .stderr .write (f" WARNING: { filename } may contain unsafe content!\n " )
103+
104+ except fickle .PickleDecodeError as e :
105+ sys .stderr .write (f"Error parsing { filename } : { e !s} \n " )
106+ sys .stderr .write (
107+ "Parsing errors may indicate a maliciously crafted pickle file. "
108+ "DO NOT TRUST this file without further analysis!\n "
109+ )
110+ overall_safe = False
111+ failed += 1
112+ except Exception as e : # noqa: BLE001 -- HF Hub may raise unpredictable errors
113+ sys .stderr .write (f"Error scanning { filename } : { e !s} \n " )
114+ overall_safe = False
115+ failed += 1
116+
117+ if print_results :
118+ if failed > 0 :
119+ sys .stderr .write (f"\n WARNING: { failed } /{ len (pickle_files )} file(s) failed to scan\n " )
120+ if overall_safe :
121+ print (f"\n { repo_id } : No obvious safety issues detected" )
122+ else :
123+ print (f"\n { repo_id } : Potentially unsafe content detected!" )
124+
125+ return EXIT_CLEAN if overall_safe else EXIT_UNSAFE
126+
13127
14128def main (argv : list [str ] | None = None ) -> int :
15129 if argv is None :
@@ -97,6 +211,29 @@ def main(argv: list[str] | None = None) -> int:
97211 help = "print a runtime trace while interpreting the input pickle file" ,
98212 )
99213 parser .add_argument ("--version" , "-v" , action = "store_true" , help = "print the version and exit" )
214+ options .add_argument (
215+ "--huggingface" ,
216+ "--hf" ,
217+ type = str ,
218+ default = None ,
219+ metavar = "REPO_ID" ,
220+ help = "scan a model from HuggingFace Hub by repository ID (e.g., 'bert-base-uncased'). "
221+ "Requires huggingface_hub: pip install fickling[huggingface]" ,
222+ )
223+ parser .add_argument (
224+ "--hf-revision" ,
225+ type = str ,
226+ default = None ,
227+ help = "specific revision (branch, tag, or commit) to scan from HuggingFace Hub" ,
228+ )
229+ parser .add_argument (
230+ "--hf-token" ,
231+ type = str ,
232+ default = None ,
233+ help = "HuggingFace API token for private repositories. "
234+ "Prefer setting the HF_TOKEN environment variable to avoid token exposure in "
235+ "process listings" ,
236+ )
100237
101238 args = parser .parse_args (argv [1 :])
102239
@@ -107,6 +244,16 @@ def main(argv: list[str] | None = None) -> int:
107244 print (__version__ )
108245 return EXIT_CLEAN
109246
247+ # HuggingFace scanning mode
248+ if args .huggingface is not None :
249+ return _scan_huggingface (
250+ repo_id = args .huggingface ,
251+ revision = args .hf_revision ,
252+ token = args .hf_token ,
253+ json_output_path = args .json_output ,
254+ print_results = args .print_results ,
255+ )
256+
110257 if args .create is None :
111258 if args .PICKLE_FILE == "-" :
112259 if hasattr (sys .stdin , "buffer" ) and sys .stdin .buffer is not None :
0 commit comments