New Semgrep codemod: sandbox process creation (#831)

drdavella · web-flow · commit a95f3e7e0e70 · 2024-09-12T14:29:51.000Z
diff --git a/src/codemodder/semgrep.py b/src/codemodder/semgrep.py
@@ -28,15 +28,18 @@ class SemgrepLocation(SarifLocation):
     def from_sarif(cls, sarif_location) -> Self:
         artifact_location = sarif_location["physicalLocation"]["artifactLocation"]
         file = Path(artifact_location["uri"])
+        snippet = (
+            sarif_location["physicalLocation"]["region"].get("snippet", {}).get("text")
+        )
         start = LineInfo(
             line=sarif_location["physicalLocation"]["region"]["startLine"],
             column=sarif_location["physicalLocation"]["region"]["startColumn"],
-            snippet=sarif_location["physicalLocation"]["region"]["snippet"]["text"],
+            snippet=snippet,
         )
         end = LineInfo(
             line=sarif_location["physicalLocation"]["region"]["endLine"],
             column=sarif_location["physicalLocation"]["region"]["endColumn"],
-            snippet=sarif_location["physicalLocation"]["region"]["snippet"]["text"],
+            snippet=snippet,
         )
         return cls(file=file, start=start, end=end)
 
diff --git a/src/core_codemods/__init__.py b/src/core_codemods/__init__.py
@@ -62,6 +62,7 @@
 from .semgrep.semgrep_nan_injection import SemgrepNanInjection
 from .semgrep.semgrep_no_csrf_exempt import SemgrepNoCsrfExempt
 from .semgrep.semgrep_rsa_key_size import SemgrepRsaKeySize
+from .semgrep.semgrep_sandbox_process_creation import SemgrepSandboxProcessCreation
 from .semgrep.semgrep_sql_parameterization import SemgrepSQLParameterization
 from .semgrep.semgrep_subprocess_shell_false import SemgrepSubprocessShellFalse
 from .semgrep.semgrep_url_sandbox import SemgrepUrlSandbox
@@ -222,6 +223,7 @@
         SemgrepNoCsrfExempt,
         SemgrepJwtDecodeVerify,
         SemgrepUseDefusedXml,
+        SemgrepSandboxProcessCreation,
         SemgrepSubprocessShellFalse,
         SemgrepDjangoSecureSetCookie,
         SemgrepHardenPyyaml,
diff --git a/src/core_codemods/semgrep/semgrep_sandbox_process_creation.py b/src/core_codemods/semgrep/semgrep_sandbox_process_creation.py
@@ -0,0 +1,16 @@
+from core_codemods.process_creation_sandbox import ProcessSandbox
+from core_codemods.semgrep.api import SemgrepCodemod, ToolRule, semgrep_url_from_id
+
+SemgrepSandboxProcessCreation = SemgrepCodemod.from_core_codemod(
+    name="sandbox-process-creation",
+    other=ProcessSandbox(),
+    rules=[
+        ToolRule(
+            id=(
+                rule_id := "python.lang.security.dangerous-system-call.dangerous-system-call"
+            ),
+            name="dangerous-system-call",
+            url=semgrep_url_from_id(rule_id),
+        ),
+    ],
+)
diff --git a/tests/codemods/semgrep/test_semgrep_sandbox_process_creation.py b/tests/codemods/semgrep/test_semgrep_sandbox_process_creation.py
@@ -0,0 +1,106 @@
+import mock
+
+from codemodder.codemods.test import BaseSASTCodemodTest
+from codemodder.dependency import Security
+from core_codemods.semgrep.semgrep_sandbox_process_creation import (
+    SemgrepSandboxProcessCreation,
+)
+
+
+class TestSonarSandboxProcessCreation(BaseSASTCodemodTest):
+    codemod = SemgrepSandboxProcessCreation
+    tool = "semgrep"
+
+    def test_name(self):
+        assert self.codemod.name == "sandbox-process-creation"
+
+    @mock.patch("codemodder.codemods.api.FileContext.add_dependency")
+    def test_simple(self, adds_dependency, tmpdir):
+        input_code = """
+        import os
+        from flask import render_template, request
+
+        @app.route('/vuln', methods=['GET', 'POST'])
+        def vuln():
+            output = ""
+            if request.method == 'POST':
+                command = request.form.get('command')
+                output = os.popen(command).read()
+            return render_template('vuln.html', output=output)
+        """.lstrip(
+            "\n"
+        )
+        expected = """
+        import os
+        from flask import render_template, request
+        from security import safe_command
+
+        @app.route('/vuln', methods=['GET', 'POST'])
+        def vuln():
+            output = ""
+            if request.method == 'POST':
+                command = request.form.get('command')
+                output = safe_command.run(os.popen, command).read()
+            return render_template('vuln.html', output=output)
+        """.lstrip(
+            "\n"
+        )
+        self.run_and_assert(tmpdir, input_code, expected, results=SARIF)
+        adds_dependency.assert_called_once_with(Security)
+
+
+SARIF = """
+{
+  "runs": [
+    {
+      "automationDetails": {
+        "id": ".github/workflows/semgrep.yml:semgrep_scan/"
+      },
+      "conversion": {
+        "tool": {
+          "driver": {
+            "name": "GitHub Code Scanning"
+          }
+        }
+      },
+      "results": [
+        {
+          "correlationGuid": "a90240a2-8d09-47eb-a1c5-0af9d5b225c9",
+          "level": "error",
+          "locations": [
+            {
+              "physicalLocation": {
+                "artifactLocation": {
+                  "index": 1,
+                  "uri": "code.py"
+                },
+                "region": {
+                  "endColumn": 35,
+                  "endLine": 9,
+                  "startColumn": 18,
+                  "startLine": 9
+                }
+              }
+            }
+          ],
+          "message": {
+            "text": "Found user-controlled data used in a system call. This could allow a malicious actor to execute commands. Use the 'subprocess' module instead, which is easier to use without accidentally exposing a command injection vulnerability."
+          },
+          "partialFingerprints": {
+            "primaryLocationLineHash": "b897622e8906ac69:1"
+          },
+          "properties": {
+            "github/alertNumber": 2,
+            "github/alertUrl": "https://api.github.com/repos/nahsra/vulnerable-app-sample/code-scanning/alerts/2"
+          },
+          "rule": {
+            "id": "python.lang.security.dangerous-system-call.dangerous-system-call",
+            "index": 723
+          },
+          "ruleId": "python.lang.security.dangerous-system-call.dangerous-system-call"
+        }
+      ]
+    }
+  ]
+}
+"""