@@ -118,21 +118,44 @@ def _load_defaults(self, resources_path: str) -> tuple[str, str | None, set[str]
118118 logger .debug (error_msg )
119119 raise ConfigurationError (error_msg )
120120
121- semgrep_commands : list [str ] = ["semgrep" , "scan" , "--validate" , "--oss-only" , "--config" , custom_rule_path ]
121+ # Extra argument explanation:
122+ # --metrics off is used to disable metric collection. This makes a network connection, which can
123+ # impact running Semgrep offline.
124+ # --disable-version-check is used to disable the network connection made to semgrep.dev to check
125+ # if this is the latest version. This network connection can also impact running Semgrep offline.
126+ # --oss-only is used to ensure only the open-source offering of Semgrep is run.
127+ # Note, validation with --validate still currently makes a network connection to download linting
128+ # rules, which cannot be turned off.
129+ semgrep_commands : list [str ] = [
130+ "semgrep" ,
131+ "scan" ,
132+ "--metrics" ,
133+ "off" ,
134+ "--disable-version-check" ,
135+ "--validate" ,
136+ "--oss-only" ,
137+ "--config" ,
138+ custom_rule_path ,
139+ ]
122140 try :
123141 process = subprocess .run (semgrep_commands , check = True , capture_output = True ) # nosec B603
142+ if process .returncode != 0 :
143+ # Only a warning is used here, so that if running offline, the analysis can continue. Erroneous Semgrep files
144+ # will be picked up at analysis time in this case.
145+ warning_msg = (
146+ f"Running semgrep validation on { custom_rule_path } with argument(s)"
147+ f" { process .args } "
148+ f" was not successful: { process .returncode } ."
149+ " These custom rule(s) may not run successfully."
150+ )
151+ logger .warning (warning_msg )
152+
124153 except (subprocess .CalledProcessError , subprocess .TimeoutExpired ) as semgrep_error :
125- error_msg = (
126- f"Unable to run semgrep validation on { custom_rule_path } with arguments "
154+ warning_msg = (
155+ f"Unable to run semgrep validation on { custom_rule_path } with argument(s) "
127156 f"{ semgrep_commands } : { semgrep_error } ."
128157 )
129- logger .debug (error_msg )
130- raise ConfigurationError (error_msg ) from semgrep_error
131-
132- if process .returncode != 0 :
133- error_msg = f"Error running semgrep validation on { custom_rule_path } with arguments" f" { process .args } ."
134- logger .debug (error_msg )
135- raise ConfigurationError (error_msg )
158+ logger .warning (warning_msg )
136159
137160 logger .debug ("Including custom ruleset from %s." , custom_rule_path )
138161
@@ -245,10 +268,24 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
245268 analysis_result : dict = {}
246269 # Since we have to run them anyway, return disabled rule findings for debug information.
247270 disabled_results : dict = {}
248- # Here, we disable 'nosemgrep' ignoring so that this is not an evasion method of our scan (i.e. malware includes
249- # 'nosemgrep' comments to prevent our scan detecting those code lines). Read more about the 'nosemgrep' feature
250- # here: https://semgrep.dev/docs/ignoring-files-folders-code
251- semgrep_commands : list [str ] = ["semgrep" , "scan" , "--oss-only" , "--disable-nosem" ]
271+ # Extra argument explanation:
272+ # --metrics off is used to disable metric collection. This makes a network connection, which can
273+ # impact running Semgrep offline.
274+ # --disable-version-check is used to disable the network connection made to semgrep.dev to check
275+ # if this is the latest version. This network connection can also impact running Semgrep offline.
276+ # --oss-only is used to ensure only the open-source offering of Semgrep is run.
277+ # --disable-nosem is used to disable 'nosemgrep' ignoring so that this is not an evasion method of
278+ # our scan (i.e. malware includes 'nosemgrep' comments to prevent our scan detecting those code lines).
279+ # Read more about the 'nosemgrep' feature here: https://semgrep.dev/docs/ignoring-files-folders-code
280+ semgrep_commands : list [str ] = [
281+ "semgrep" ,
282+ "scan" ,
283+ "--metrics" ,
284+ "off" ,
285+ "--disable-version-check" ,
286+ "--oss-only" ,
287+ "--disable-nosem" ,
288+ ]
252289 result : HeuristicResult = HeuristicResult .PASS
253290
254291 source_code_path = pypi_package_json .package_sourcecode_path
@@ -269,13 +306,13 @@ def analyze(self, pypi_package_json: PyPIPackageJsonAsset) -> tuple[HeuristicRes
269306 process = subprocess .run (semgrep_commands , check = True , capture_output = True ) # nosec B603
270307 except (subprocess .CalledProcessError , subprocess .TimeoutExpired ) as semgrep_error :
271308 error_msg = (
272- f"Unable to run semgrep on { source_code_path } with arguments { semgrep_commands } : { semgrep_error } "
309+ f"Unable to run semgrep on { source_code_path } with argument(s) { semgrep_commands } : { semgrep_error } "
273310 )
274311 logger .debug (error_msg )
275312 raise HeuristicAnalyzerValueError (error_msg ) from semgrep_error
276313
277314 if process .returncode != 0 :
278- error_msg = f"Error running semgrep on { source_code_path } with arguments " f" { process .args } "
315+ error_msg = f"Error running semgrep on { source_code_path } with argument(s) " f" { process .args } "
279316 logger .debug (error_msg )
280317 raise HeuristicAnalyzerValueError (error_msg )
281318
0 commit comments