diff --git a/integration_tests/sonar/test_sonar_fix_missing_self_or_cls.py b/integration_tests/sonar/test_sonar_fix_missing_self_or_cls.py index 51f40080..8a799fa8 100644 --- a/integration_tests/sonar/test_sonar_fix_missing_self_or_cls.py +++ b/integration_tests/sonar/test_sonar_fix_missing_self_or_cls.py @@ -1,38 +1,17 @@ -from codemodder.codemods.test import SonarIntegrationTest +from codemodder.codemods.test.integration_utils import SonarRemediationIntegrationTest from core_codemods.fix_missing_self_or_cls import FixMissingSelfOrClsTransformer from core_codemods.sonar.sonar_fix_missing_self_or_cls import SonarFixMissingSelfOrCls -class TestSonarFixMissingSelfOrCls(SonarIntegrationTest): +class TestSonarFixMissingSelfOrCls(SonarRemediationIntegrationTest): codemod = SonarFixMissingSelfOrCls code_path = "tests/samples/fix_missing_self_or_cls.py" - replacement_lines = [ - ( - 2, - """ def instance_method(self):\n""", - ), - ( - 6, - """ def class_method(cls):\n""", - ), + + expected_diff_per_change = [ + '--- \n+++ \n@@ -1,5 +1,5 @@\n class MyClass:\n- def instance_method():\n+ def instance_method(self):\n print("instance_method")\n \n @classmethod\n', + '--- \n+++ \n@@ -3,5 +3,5 @@\n print("instance_method")\n \n @classmethod\n- def class_method():\n+ def class_method(cls):\n print("class_method")\n', ] - # fmt: off - expected_diff = ( - """--- \n""" - """+++ \n""" - """@@ -1,7 +1,7 @@\n""" - """ class MyClass:\n""" - """- def instance_method():\n""" - """+ def instance_method(self):\n""" - """ print("instance_method")\n""" - """ \n""" - """ @classmethod\n""" - """- def class_method():\n""" - """+ def class_method(cls):\n""" - """ print("class_method")\n""" - ) - # fmt: on - expected_line_change = "2" + expected_lines_changed = [2, 6] change_description = FixMissingSelfOrClsTransformer.change_description num_changes = 2 diff --git a/integration_tests/sonar/test_sonar_jinja2_autoescape.py b/integration_tests/sonar/test_sonar_jinja2_autoescape.py index b96cbeec..41ae9458 100644 --- a/integration_tests/sonar/test_sonar_jinja2_autoescape.py +++ b/integration_tests/sonar/test_sonar_jinja2_autoescape.py @@ -1,18 +1,18 @@ -from codemodder.codemods.test import SonarIntegrationTest +from codemodder.codemods.test.integration_utils import SonarRemediationIntegrationTest from core_codemods.enable_jinja2_autoescape import EnableJinja2AutoescapeTransformer from core_codemods.sonar.sonar_enable_jinja2_autoescape import ( SonarEnableJinja2Autoescape, ) -class TestSonarEnableJinja2Autoescape(SonarIntegrationTest): +class TestSonarEnableJinja2Autoescape(SonarRemediationIntegrationTest): codemod = SonarEnableJinja2Autoescape code_path = "tests/samples/jinja2_autoescape.py" - replacement_lines = [ - (3, "env = Environment(autoescape=True)\n"), - (4, "env = Environment(autoescape=True)\n"), + expected_diff_per_change = [ + "--- \n+++ \n@@ -1,4 +1,4 @@\n from jinja2 import Environment\n \n-env = Environment()\n+env = Environment(autoescape=True)\n env = Environment(autoescape=False)\n", + "--- \n+++ \n@@ -1,4 +1,4 @@\n from jinja2 import Environment\n \n env = Environment()\n-env = Environment(autoescape=False)\n+env = Environment(autoescape=True)\n", ] - expected_diff = "--- \n+++ \n@@ -1,4 +1,4 @@\n from jinja2 import Environment\n \n-env = Environment()\n-env = Environment(autoescape=False)\n+env = Environment(autoescape=True)\n+env = Environment(autoescape=True)\n" - expected_line_change = "3" + + expected_lines_changed = [3, 4] num_changes = 2 change_description = EnableJinja2AutoescapeTransformer.change_description diff --git a/integration_tests/sonar/test_sonar_jwt_decode_verify.py b/integration_tests/sonar/test_sonar_jwt_decode_verify.py index f8c17f16..97be4f79 100644 --- a/integration_tests/sonar/test_sonar_jwt_decode_verify.py +++ b/integration_tests/sonar/test_sonar_jwt_decode_verify.py @@ -1,25 +1,19 @@ -from codemodder.codemods.test import SonarIntegrationTest +from codemodder.codemods.test.integration_utils import SonarRemediationIntegrationTest from core_codemods.sonar.sonar_jwt_decode_verify import ( JwtDecodeVerifySASTTransformer, SonarJwtDecodeVerify, ) -class TestJwtDecodeVerify(SonarIntegrationTest): +class TestJwtDecodeVerify(SonarRemediationIntegrationTest): codemod = SonarJwtDecodeVerify code_path = "tests/samples/jwt_decode_verify.py" - replacement_lines = [ - ( - 11, - """decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n""", - ), - ( - 12, - """decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n""", - ), + + expected_diff_per_change = [ + '--- \n+++ \n@@ -8,7 +8,7 @@\n \n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n \n var = "something"\n', + '--- \n+++ \n@@ -9,6 +9,6 @@\n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n \n var = "something"\n', ] - expected_diff = '--- \n+++ \n@@ -8,7 +8,7 @@\n \n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n \n var = "something"\n' - expected_line_change = "11" + expected_lines_changed = [11, 12] num_changes = 2 change_description = JwtDecodeVerifySASTTransformer.change_description diff --git a/integration_tests/sonar/test_sonar_secure_cookie.py b/integration_tests/sonar/test_sonar_secure_cookie.py index 409592ae..b882b566 100644 --- a/integration_tests/sonar/test_sonar_secure_cookie.py +++ b/integration_tests/sonar/test_sonar_secure_cookie.py @@ -1,19 +1,18 @@ -from codemodder.codemods.test import SonarIntegrationTest +from codemodder.codemods.test.integration_utils import SonarRemediationIntegrationTest from core_codemods.sonar.sonar_secure_cookie import ( SonarSecureCookie, SonarSecureCookieTransformer, ) -class TestSonarSecureCookie(SonarIntegrationTest): +class TestSonarSecureCookie(SonarRemediationIntegrationTest): codemod = SonarSecureCookie code_path = "tests/samples/secure_cookie.py" - replacement_lines = [ - ( - 8, - """ resp.set_cookie('custom_cookie', 'value', secure=True, httponly=True, samesite='Lax')\n""", - ), + expected_diff_per_change = [ + "--- \n+++ \n@@ -5,5 +5,5 @@\n @app.route('/')\n def index():\n resp = make_response('Custom Cookie Set')\n- resp.set_cookie('custom_cookie', 'value')\n+ resp.set_cookie('custom_cookie', 'value', secure=True, httponly=True, samesite='Lax')\n return resp\n", + "--- \n+++ \n@@ -5,5 +5,5 @@\n @app.route('/')\n def index():\n resp = make_response('Custom Cookie Set')\n- resp.set_cookie('custom_cookie', 'value')\n+ resp.set_cookie('custom_cookie', 'value', secure=True, httponly=True, samesite='Lax')\n return resp\n", ] - expected_diff = "--- \n+++ \n@@ -5,5 +5,5 @@\n @app.route('/')\n def index():\n resp = make_response('Custom Cookie Set')\n- resp.set_cookie('custom_cookie', 'value')\n+ resp.set_cookie('custom_cookie', 'value', secure=True, httponly=True, samesite='Lax')\n return resp\n" - expected_line_change = "8" + + expected_lines_changed = [8, 8] + num_changes = 2 change_description = SonarSecureCookieTransformer.change_description diff --git a/integration_tests/sonar/test_sonar_secure_random.py b/integration_tests/sonar/test_sonar_secure_random.py index fe165587..b0116b29 100644 --- a/integration_tests/sonar/test_sonar_secure_random.py +++ b/integration_tests/sonar/test_sonar_secure_random.py @@ -1,29 +1,16 @@ -from codemodder.codemods.test import SonarIntegrationTest +from codemodder.codemods.test.integration_utils import SonarRemediationIntegrationTest from core_codemods.secure_random import SecureRandomTransformer from core_codemods.sonar.sonar_secure_random import SonarSecureRandom -class TestSonarDjangoJsonResponseType(SonarIntegrationTest): +class TestSonarSecureRandom(SonarRemediationIntegrationTest): codemod = SonarSecureRandom code_path = "tests/samples/secure_random.py" - replacement_lines = [ - (1, """import secrets\n"""), - (3, """secrets.SystemRandom().random()\n"""), - (4, """secrets.SystemRandom().getrandbits(1)\n"""), + expected_diff_per_change = [ + "--- \n+++ \n@@ -1,4 +1,5 @@\n import random\n+import secrets\n \n-random.random()\n+secrets.SystemRandom().random()\n random.getrandbits(1)\n", + "--- \n+++ \n@@ -1,4 +1,5 @@\n import random\n+import secrets\n \n random.random()\n-random.getrandbits(1)\n+secrets.SystemRandom().getrandbits(1)\n", ] - # fmt: off - expected_diff = ( - """--- \n""" - """+++ \n""" - """@@ -1,4 +1,4 @@\n""" - """-import random\n""" - """+import secrets\n""" - """ \n""" - """-random.random()\n""" - """-random.getrandbits(1)\n""" - """+secrets.SystemRandom().random()\n""" - """+secrets.SystemRandom().getrandbits(1)\n""") - # fmt: on - expected_line_change = "3" + + expected_lines_changed = [3, 4] change_description = SecureRandomTransformer.change_description num_changes = 2 diff --git a/integration_tests/test_add_requests_timeout.py b/integration_tests/test_add_requests_timeout.py index 34eafbc5..70537292 100644 --- a/integration_tests/test_add_requests_timeout.py +++ b/integration_tests/test_add_requests_timeout.py @@ -1,13 +1,13 @@ from requests.exceptions import ConnectionError -from codemodder.codemods.test import BaseIntegrationTest +from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest from core_codemods.add_requests_timeouts import ( AddRequestsTimeouts, TransformAddRequestsTimeouts, ) -class TestAddRequestsTimeouts(BaseIntegrationTest): +class TestAddRequestsTimeouts(BaseRemediationIntegrationTest): codemod = AddRequestsTimeouts original_code = """ import requests @@ -18,27 +18,13 @@ class TestAddRequestsTimeouts(BaseIntegrationTest): requests.post("https://example.com", verify=False) """ - replacement_lines = [ - (3, 'requests.get("https://example.com", timeout=60)\n'), - (6, 'requests.post("https://example.com", verify=False, timeout=60)\n'), + expected_diff_per_change = [ + '--- \n+++ \n@@ -1,6 +1,6 @@\n import requests\n \n-requests.get("https://example.com")\n+requests.get("https://example.com", timeout=60)\n requests.get("https://example.com", timeout=1)\n requests.get("https://example.com", timeout=(1, 10), verify=False)\n requests.post("https://example.com", verify=False)', + '--- \n+++ \n@@ -3,4 +3,4 @@\n requests.get("https://example.com")\n requests.get("https://example.com", timeout=1)\n requests.get("https://example.com", timeout=(1, 10), verify=False)\n-requests.post("https://example.com", verify=False)\n+requests.post("https://example.com", verify=False, timeout=60)', ] - expected_diff = """\ ---- -+++ -@@ -1,6 +1,6 @@ - import requests - --requests.get("https://example.com") -+requests.get("https://example.com", timeout=60) - requests.get("https://example.com", timeout=1) - requests.get("https://example.com", timeout=(1, 10), verify=False) --requests.post("https://example.com", verify=False) -+requests.post("https://example.com", verify=False, timeout=60) -""" - num_changes = 2 - expected_line_change = "3" + expected_lines_changed = [3, 6] change_description = TransformAddRequestsTimeouts.change_description # expected because requests are made allowed_exceptions = (ConnectionError,) diff --git a/integration_tests/test_harden_ruamel.py b/integration_tests/test_harden_ruamel.py index c9cd9d0f..a7aa6ab2 100644 --- a/integration_tests/test_harden_ruamel.py +++ b/integration_tests/test_harden_ruamel.py @@ -1,8 +1,8 @@ -from codemodder.codemods.test import BaseIntegrationTest +from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest from core_codemods.harden_ruamel import HardenRuamel -class TestHardenRuamel(BaseIntegrationTest): +class TestHardenRuamel(BaseRemediationIntegrationTest): codemod = HardenRuamel original_code = """ from ruamel.yaml import YAML @@ -10,11 +10,11 @@ class TestHardenRuamel(BaseIntegrationTest): serializer = YAML(typ="unsafe") serializer = YAML(typ="base") """ - replacement_lines = [ - (3, 'serializer = YAML(typ="safe")\n'), - (4, 'serializer = YAML(typ="safe")\n'), + expected_diff_per_change = [ + '--- \n+++ \n@@ -1,4 +1,4 @@\n from ruamel.yaml import YAML\n \n-serializer = YAML(typ="unsafe")\n+serializer = YAML(typ="safe")\n serializer = YAML(typ="base")', + '--- \n+++ \n@@ -1,4 +1,4 @@\n from ruamel.yaml import YAML\n \n serializer = YAML(typ="unsafe")\n-serializer = YAML(typ="base")\n+serializer = YAML(typ="safe")', ] - expected_diff = '--- \n+++ \n@@ -1,4 +1,4 @@\n from ruamel.yaml import YAML\n \n-serializer = YAML(typ="unsafe")\n-serializer = YAML(typ="base")\n+serializer = YAML(typ="safe")\n+serializer = YAML(typ="safe")\n' - expected_line_change = "3" + + expected_lines_changed = [3, 4] num_changes = 2 change_description = HardenRuamel.change_description diff --git a/integration_tests/test_jinja2_autoescape.py b/integration_tests/test_jinja2_autoescape.py index 1ec23386..48d1aafd 100644 --- a/integration_tests/test_jinja2_autoescape.py +++ b/integration_tests/test_jinja2_autoescape.py @@ -1,11 +1,11 @@ -from codemodder.codemods.test import BaseIntegrationTest +from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest from core_codemods.enable_jinja2_autoescape import ( EnableJinja2Autoescape, EnableJinja2AutoescapeTransformer, ) -class TestEnableJinja2Autoescape(BaseIntegrationTest): +class TestEnableJinja2Autoescape(BaseRemediationIntegrationTest): codemod = EnableJinja2Autoescape original_code = """ from jinja2 import Environment @@ -13,11 +13,12 @@ class TestEnableJinja2Autoescape(BaseIntegrationTest): env = Environment() env = Environment(autoescape=False) """ - replacement_lines = [ - (3, "env = Environment(autoescape=True)\n"), - (4, "env = Environment(autoescape=True)\n"), + + expected_diff_per_change = [ + "--- \n+++ \n@@ -1,4 +1,4 @@\n from jinja2 import Environment\n \n-env = Environment()\n+env = Environment(autoescape=True)\n env = Environment(autoescape=False)", + "--- \n+++ \n@@ -1,4 +1,4 @@\n from jinja2 import Environment\n \n env = Environment()\n-env = Environment(autoescape=False)\n+env = Environment(autoescape=True)", ] - expected_diff = "--- \n+++ \n@@ -1,4 +1,4 @@\n from jinja2 import Environment\n \n-env = Environment()\n-env = Environment(autoescape=False)\n+env = Environment(autoescape=True)\n+env = Environment(autoescape=True)\n" - expected_line_change = "3" + + expected_lines_changed = [3, 4] num_changes = 2 change_description = EnableJinja2AutoescapeTransformer.change_description diff --git a/integration_tests/test_jwt_decode_verify.py b/integration_tests/test_jwt_decode_verify.py index edbb4b39..3846324c 100644 --- a/integration_tests/test_jwt_decode_verify.py +++ b/integration_tests/test_jwt_decode_verify.py @@ -1,8 +1,8 @@ -from codemodder.codemods.test import BaseIntegrationTest +from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest from core_codemods.jwt_decode_verify import JwtDecodeVerify, JwtDecodeVerifyTransformer -class TestJwtDecodeVerify(BaseIntegrationTest): +class TestJwtDecodeVerify(BaseRemediationIntegrationTest): codemod = JwtDecodeVerify original_code = """ import jwt @@ -20,17 +20,11 @@ class TestJwtDecodeVerify(BaseIntegrationTest): var = "something" """ - replacement_lines = [ - ( - 11, - """decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n""", - ), - ( - 12, - """decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n""", - ), + expected_diff_per_change = [ + '--- \n+++ \n@@ -8,7 +8,7 @@\n \n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n \n var = "something"', + '--- \n+++ \n@@ -9,6 +9,6 @@\n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n \n var = "something"', ] - expected_diff = '--- \n+++ \n@@ -8,7 +8,7 @@\n \n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n \n var = "something"\n' - expected_line_change = "11" + + expected_lines_changed = [11, 12] num_changes = 2 change_description = JwtDecodeVerifyTransformer.change_description diff --git a/integration_tests/test_lazy_logging.py b/integration_tests/test_lazy_logging.py index ee2975a3..a20a44ac 100644 --- a/integration_tests/test_lazy_logging.py +++ b/integration_tests/test_lazy_logging.py @@ -1,8 +1,8 @@ -from codemodder.codemods.test import BaseIntegrationTest +from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest from core_codemods.lazy_logging import LazyLogging -class TestLazyLogging(BaseIntegrationTest): +class TestLazyLogging(BaseRemediationIntegrationTest): codemod = LazyLogging original_code = """ import logging @@ -10,23 +10,11 @@ class TestLazyLogging(BaseIntegrationTest): logging.error("Error occurred: %s" % e) logging.error("Error occurred: " + e) """ - replacement_lines = [ - (3, """logging.error("Error occurred: %s", e)\n"""), - (4, """logging.error("Error occurred: %s", e)\n"""), + expected_diff_per_change = [ + '--- \n+++ \n@@ -1,4 +1,4 @@\n import logging\n e = "Some error"\n-logging.error("Error occurred: %s" % e)\n+logging.error("Error occurred: %s", e)\n logging.error("Error occurred: " + e)', + '--- \n+++ \n@@ -1,4 +1,4 @@\n import logging\n e = "Some error"\n logging.error("Error occurred: %s" % e)\n-logging.error("Error occurred: " + e)\n+logging.error("Error occurred: %s", e)', ] - # fmt: off - expected_diff = ( - """--- \n""" - """+++ \n""" - """@@ -1,4 +1,4 @@\n""" - """ import logging\n""" - """ e = "Some error"\n""" - """-logging.error("Error occurred: %s" % e)\n""" - """-logging.error("Error occurred: " + e)\n""" - """+logging.error("Error occurred: %s", e)\n""" - """+logging.error("Error occurred: %s", e)\n""") - # fmt: on - expected_line_change = "3" + expected_lines_changed = [3, 4] change_description = LazyLogging.change_description num_changes = 2 diff --git a/integration_tests/test_lxml_safe_parsing.py b/integration_tests/test_lxml_safe_parsing.py index 0e851b1f..da5dd708 100644 --- a/integration_tests/test_lxml_safe_parsing.py +++ b/integration_tests/test_lxml_safe_parsing.py @@ -1,26 +1,19 @@ -from codemodder.codemods.test import BaseIntegrationTest +from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest from core_codemods.lxml_safe_parsing import LxmlSafeParsing -class TestLxmlSafeParsing(BaseIntegrationTest): +class TestLxmlSafeParsing(BaseRemediationIntegrationTest): codemod = LxmlSafeParsing original_code = """ import lxml.etree lxml.etree.parse("path_to_file") lxml.etree.fromstring("xml_str") """ - replacement_lines = [ - ( - 2, - 'lxml.etree.parse("path_to_file", parser=lxml.etree.XMLParser(resolve_entities=False))\n', - ), - ( - 3, - 'lxml.etree.fromstring("xml_str", parser=lxml.etree.XMLParser(resolve_entities=False))\n', - ), + expected_lines_changed = [2, 3] + expected_diff_per_change = [ + '--- \n+++ \n@@ -1,3 +1,3 @@\n import lxml.etree\n-lxml.etree.parse("path_to_file")\n+lxml.etree.parse("path_to_file", parser=lxml.etree.XMLParser(resolve_entities=False))\n lxml.etree.fromstring("xml_str")', + '--- \n+++ \n@@ -1,3 +1,3 @@\n import lxml.etree\n lxml.etree.parse("path_to_file")\n-lxml.etree.fromstring("xml_str")\n+lxml.etree.fromstring("xml_str", parser=lxml.etree.XMLParser(resolve_entities=False))', ] - expected_diff = '--- \n+++ \n@@ -1,3 +1,3 @@\n import lxml.etree\n-lxml.etree.parse("path_to_file")\n-lxml.etree.fromstring("xml_str")\n+lxml.etree.parse("path_to_file", parser=lxml.etree.XMLParser(resolve_entities=False))\n+lxml.etree.fromstring("xml_str", parser=lxml.etree.XMLParser(resolve_entities=False))\n' - expected_line_change = "2" num_changes = 2 change_description = LxmlSafeParsing.change_description allowed_exceptions = (OSError,) diff --git a/integration_tests/test_process_sandbox.py b/integration_tests/test_process_sandbox.py index 37976653..97bca572 100644 --- a/integration_tests/test_process_sandbox.py +++ b/integration_tests/test_process_sandbox.py @@ -1,9 +1,9 @@ -from codemodder.codemods.test import BaseIntegrationTest +from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest from codemodder.dependency import Security from core_codemods.process_creation_sandbox import ProcessSandbox -class TestProcessSandbox(BaseIntegrationTest): +class TestProcessSandbox(BaseRemediationIntegrationTest): codemod = ProcessSandbox original_code = """ import subprocess @@ -22,17 +22,15 @@ class TestProcessSandbox(BaseIntegrationTest): var = "hello" """ - replacement_lines = [ - (2, """from security import safe_command\n\n"""), - (5, """safe_command.run(subprocess.run, cmd, shell=True)\n"""), - (6, """safe_command.run(subprocess.run, [cmd, "-l"])\n"""), - (8, """safe_command.run(subprocess.call, cmd, shell=True)\n"""), - (9, """safe_command.run(subprocess.call, [cmd, "-l"])\n"""), + expected_diff_per_change = [ + '--- \n+++ \n@@ -1,8 +1,9 @@\n import subprocess\n+from security import safe_command\n \n cmd = " ".join(["ls"])\n \n-subprocess.run(cmd, shell=True)\n+safe_command.run(subprocess.run, cmd, shell=True)\n subprocess.run([cmd, "-l"])\n \n subprocess.call(cmd, shell=True)\n', + '--- \n+++ \n@@ -1,9 +1,10 @@\n import subprocess\n+from security import safe_command\n \n cmd = " ".join(["ls"])\n \n subprocess.run(cmd, shell=True)\n-subprocess.run([cmd, "-l"])\n+safe_command.run(subprocess.run, [cmd, "-l"])\n \n subprocess.call(cmd, shell=True)\n subprocess.call([cmd, "-l"])\n', + '--- \n+++ \n@@ -1,11 +1,12 @@\n import subprocess\n+from security import safe_command\n \n cmd = " ".join(["ls"])\n \n subprocess.run(cmd, shell=True)\n subprocess.run([cmd, "-l"])\n \n-subprocess.call(cmd, shell=True)\n+safe_command.run(subprocess.call, cmd, shell=True)\n subprocess.call([cmd, "-l"])\n \n subprocess.check_output([cmd, "-l"])\n', + '--- \n+++ \n@@ -1,4 +1,5 @@\n import subprocess\n+from security import safe_command\n \n cmd = " ".join(["ls"])\n \n@@ -6,7 +7,7 @@\n subprocess.run([cmd, "-l"])\n \n subprocess.call(cmd, shell=True)\n-subprocess.call([cmd, "-l"])\n+safe_command.run(subprocess.call, [cmd, "-l"])\n \n subprocess.check_output([cmd, "-l"])\n \n', ] - expected_diff = '--- \n+++ \n@@ -1,12 +1,13 @@\n import subprocess\n+from security import safe_command\n \n cmd = " ".join(["ls"])\n \n-subprocess.run(cmd, shell=True)\n-subprocess.run([cmd, "-l"])\n+safe_command.run(subprocess.run, cmd, shell=True)\n+safe_command.run(subprocess.run, [cmd, "-l"])\n \n-subprocess.call(cmd, shell=True)\n-subprocess.call([cmd, "-l"])\n+safe_command.run(subprocess.call, cmd, shell=True)\n+safe_command.run(subprocess.call, [cmd, "-l"])\n \n subprocess.check_output([cmd, "-l"])\n \n' - expected_line_change = "5" + + expected_lines_changed = [5, 6, 8, 9] num_changes = 4 - num_changed_files = 2 change_description = ProcessSandbox.change_description requirements_file_name = "requirements.txt" diff --git a/integration_tests/test_request_verify.py b/integration_tests/test_request_verify.py index c03a09d2..1a4fbfd6 100644 --- a/integration_tests/test_request_verify.py +++ b/integration_tests/test_request_verify.py @@ -1,10 +1,10 @@ from requests import exceptions -from codemodder.codemods.test import BaseIntegrationTest +from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest from core_codemods.requests_verify import RequestsVerify -class TestRequestsVerify(BaseIntegrationTest): +class TestRequestsVerify(BaseRemediationIntegrationTest): codemod = RequestsVerify original_code = """ import requests @@ -14,15 +14,12 @@ class TestRequestsVerify(BaseIntegrationTest): var = "hello" """ - replacement_lines = [ - (3, """requests.get("https://www.google.com", verify=True)\n"""), - ( - 4, - """requests.post("https://some-api/", json={"id": 1234, "price": 18}, verify=True)\n""", - ), + expected_diff_per_change = [ + '--- \n+++ \n@@ -1,5 +1,5 @@\n import requests\n \n-requests.get("https://www.google.com", verify=False)\n+requests.get("https://www.google.com", verify=True)\n requests.post("https://some-api/", json={"id": 1234, "price": 18}, verify=False)\n var = "hello"', + '--- \n+++ \n@@ -1,5 +1,5 @@\n import requests\n \n requests.get("https://www.google.com", verify=False)\n-requests.post("https://some-api/", json={"id": 1234, "price": 18}, verify=False)\n+requests.post("https://some-api/", json={"id": 1234, "price": 18}, verify=True)\n var = "hello"', ] - expected_diff = '--- \n+++ \n@@ -1,5 +1,5 @@\n import requests\n \n-requests.get("https://www.google.com", verify=False)\n-requests.post("https://some-api/", json={"id": 1234, "price": 18}, verify=False)\n+requests.get("https://www.google.com", verify=True)\n+requests.post("https://some-api/", json={"id": 1234, "price": 18}, verify=True)\n var = "hello"\n' - expected_line_change = "3" + + expected_lines_changed = [3, 4] num_changes = 2 change_description = RequestsVerify.change_description # expected because when executing the output code it will make a request which fails, which is OK. diff --git a/pyproject.toml b/pyproject.toml index 091b5d98..e6db227b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ dependencies = [ "isort>=5.12,<6.1", "libcst>=1.7,<1.8", "packaging>=23.2,<25.0", - "pydantic~=2.11.1", + "pydantic~=2.10.6", "pylint>=3.3,<3.4", "python-json-logger~=3.3.0", "PyYAML~=6.0.0", @@ -25,7 +25,7 @@ dependencies = [ "tomlkit~=0.13.0", "wrapt~=1.17.0", "chardet~=5.2.0", - "sarif-pydantic~=0.5.0", + "sarif-pydantic~=0.5.1", "setuptools~=78.1", ] keywords = ["codemod", "codemods", "security", "fix", "fixes"] @@ -46,6 +46,7 @@ Repository = "https://github.com/pixee/codemodder-python" [project.scripts] codemodder = "codemodder.codemodder:main" +codemodder-remediation = "codemodder.codemodder:remediate" generate-docs = 'codemodder.scripts.generate_docs:main' get-hashes = 'codemodder.scripts.get_hashes:main' @@ -81,6 +82,7 @@ test = [ "flask_wtf~=1.2.0", "fickling~=0.1.0,>=0.1.3", "graphql-server~=3.0.0b7", + "unidiff>=0.7.5", ] complexity = [ "radon==6.0.*", @@ -131,7 +133,7 @@ extend-exclude = ''' ''' [coverage-threshold] -line_coverage_min = 93 +line_coverage_min = 92 [coverage-threshold.modules."src/core_codemods/"] # Detect if a codemod is missing unit or integration tests file_line_coverage_min = 50 diff --git a/src/codemodder/codemodder.py b/src/codemodder/codemodder.py index 821c6bef..54ae063b 100644 --- a/src/codemodder/codemodder.py +++ b/src/codemodder/codemodder.py @@ -73,6 +73,7 @@ def log_report(context, output, elapsed_ms, files_to_analyze, token_usage): def apply_codemods( context: CodemodExecutionContext, codemods_to_run: Sequence[BaseCodemod], + remediation: bool, ) -> TokenUsage: log_section("scanning") token_usage = TokenUsage() @@ -89,7 +90,7 @@ def apply_codemods( for codemod in codemods_to_run: # NOTE: this may be used as a progress indicator by upstream tools logger.info("running codemod %s", codemod.id) - if codemod_token_usage := codemod.apply(context): + if codemod_token_usage := codemod.apply(context, remediation): log_token_usage(f"Codemod {codemod.id}", codemod_token_usage) token_usage += codemod_token_usage @@ -135,6 +136,7 @@ def run( sast_only: bool = False, ai_client: bool = True, log_matched_files: bool = False, + remediation: bool = False, ) -> tuple[CodeTF | None, int, TokenUsage]: start = datetime.datetime.now() @@ -206,7 +208,7 @@ def run( context.find_and_fix_paths, ) - token_usage = apply_codemods(context, codemods_to_run) + token_usage = apply_codemods(context, codemods_to_run, remediation) elapsed = datetime.datetime.now() - start elapsed_ms = int(elapsed.total_seconds() * 1000) @@ -231,7 +233,7 @@ def run( return codetf, 0, token_usage -def _run_cli(original_args) -> int: +def _run_cli(original_args, remediation=False) -> int: codemod_registry = registry.load_registered_codemods() argv = parse_args(original_args, codemod_registry) if not os.path.exists(argv.directory): @@ -270,7 +272,8 @@ def _run_cli(original_args) -> int: _, status, _ = run( argv.directory, - argv.dry_run, + # Force dry-run if remediation + True if remediation else argv.dry_run, argv.output, argv.output_format, argv.verbose, @@ -284,10 +287,22 @@ def _run_cli(original_args) -> int: codemod_registry=codemod_registry, sast_only=argv.sonar_issues_json or argv.sarif, log_matched_files=True, + remediation=remediation, ) return status def main(): + """ + Hardens a project. The application will write all the fixes into the files. + """ sys_argv = sys.argv[1:] sys.exit(_run_cli(sys_argv)) + + +def remediate(): + """ + Remediates a project. The application will suggest fix for each separate issue found. No files will be written. + """ + sys_argv = sys.argv[1:] + sys.exit(_run_cli(sys_argv, True)) diff --git a/src/codemodder/codemods/base_codemod.py b/src/codemodder/codemods/base_codemod.py index 5b568113..bd36afb4 100644 --- a/src/codemodder/codemods/base_codemod.py +++ b/src/codemodder/codemods/base_codemod.py @@ -185,38 +185,73 @@ def get_files_to_analyze( """ ... - def _apply( + def _apply_remediation( self, context: CodemodExecutionContext, rules: list[str], ) -> None | TokenUsage: - if self.provider and ( - not (provider := context.providers.get_provider(self.provider)) - or not provider.is_available - ): - logger.warning( - "provider %s is not available, skipping codemod", self.provider - ) + """ + Applies remediation behavior to a codemod, that is, each changeset will only be associated with a single finging and no files will be written. + """ + if self._should_skip(context): return None + results: ResultSet | None = self._apply_detector(context) - if isinstance(self.detector, SemgrepRuleDetector): - if ( - context.semgrep_prefilter_results - and self._internal_name - not in context.semgrep_prefilter_results.all_rule_ids() - ): - logger.debug( - "no results from semgrep for %s, skipping analysis", - self.id, + if results is not None and not results: + logger.debug("No results for %s", self.id) + return None + + if not (files_to_analyze := self.get_files_to_analyze(context, results)): + logger.debug("No files matched for %s", self.id) + return None + + # Do each result independently and outputs the diffs + # gather positional arguments for the map + resultset_arguments: list[ResultSet | None] = [] + path_arguments = [] + if results: + for result in results.results_for_rules(rules): + # this need to be the same type of ResultSet as results + singleton = results.from_single_result(result) + result_locations = self.get_files_to_analyze(context, singleton) + # We do an execution for each location in the result + # So we duplicate the resultset argument for each location + for loc in result_locations: + resultset_arguments.append(singleton) + path_arguments.append(loc) + # An exception for find-and-fix codemods + else: + resultset_arguments = [None] + path_arguments = files_to_analyze + + contexts: list = [] + with ThreadPoolExecutor() as executor: + logger.debug("using executor with %s workers", context.max_workers) + contexts.extend( + executor.map( + lambda path, resultset: self._process_file( + path, context, resultset, rules + ), + path_arguments, + resultset_arguments or [None], ) - return None + ) + executor.shutdown(wait=True) - results: ResultSet | None = ( - # It seems like semgrep doesn't like our fully-specified id format so pass in short name instead. - self.detector.apply(self._internal_name, context) - if self.detector - else None - ) + context.process_results(self.id, contexts) + return None + + def _apply_hardening( + self, + context: CodemodExecutionContext, + rules: list[str], + ) -> None | TokenUsage: + """ + Applies hardening behavior to a codemod with the goal of integrating all fixes for each finding into the files. + """ + if self._should_skip(context): + return None + results: ResultSet | None = self._apply_detector(context) if results is not None and not results: logger.debug("No results for %s", self.id) @@ -226,6 +261,7 @@ def _apply( logger.debug("No files matched for %s", self.id) return None + # Hardens all findings per file at once and writes the fixed code into the file process_file = functools.partial( self._process_file, context=context, results=results, rules=rules ) @@ -243,7 +279,43 @@ def _apply( context.process_results(self.id, contexts) return None - def apply(self, context: CodemodExecutionContext) -> None | TokenUsage: + def _should_skip(self, context: CodemodExecutionContext): + if self.provider and ( + not (provider := context.providers.get_provider(self.provider)) + or not provider.is_available + ): + logger.warning( + "provider %s is not available, skipping codemod", self.provider + ) + return True + + if isinstance(self.detector, SemgrepRuleDetector): + if ( + context.semgrep_prefilter_results + and self._internal_name + not in context.semgrep_prefilter_results.all_rule_ids() + ): + logger.debug( + "no results from semgrep for %s, skipping analysis", + self.id, + ) + return True + return False + + def _apply_detector(self, context: CodemodExecutionContext) -> ResultSet | None: + + results: ResultSet | None = ( + # It seems like semgrep doesn't like our fully-specified id format so pass in short name instead. + self.detector.apply(self._internal_name, context) + if self.detector + else None + ) + + return results + + def apply( + self, context: CodemodExecutionContext, remediation: bool = False + ) -> None | TokenUsage: """ Apply the codemod with the given codemod execution context @@ -259,7 +331,9 @@ def apply(self, context: CodemodExecutionContext) -> None | TokenUsage: :param context: The codemod execution context """ - return self._apply(context, [self._internal_name]) + if remediation: + return self._apply_remediation(context, [self._internal_name]) + return self._apply_hardening(context, [self._internal_name]) def _process_file( self, @@ -357,8 +431,12 @@ def __init__( if requested_rules: self.requested_rules.extend(requested_rules) - def apply(self, context: CodemodExecutionContext) -> None | TokenUsage: - return self._apply(context, self.requested_rules) + def apply( + self, context: CodemodExecutionContext, remediation: bool = False + ) -> None | TokenUsage: + if remediation: + return self._apply_remediation(context, self.requested_rules) + return self._apply_hardening(context, self.requested_rules) def get_files_to_analyze( self, diff --git a/src/codemodder/codemods/base_visitor.py b/src/codemodder/codemods/base_visitor.py index 2159f026..cd05c7b9 100644 --- a/src/codemodder/codemods/base_visitor.py +++ b/src/codemodder/codemods/base_visitor.py @@ -58,6 +58,21 @@ def node_is_selected(self, node) -> bool: pos_to_match ) + def node_is_selected_by_line_only(self, node) -> bool: + pos_to_match = self.node_position(node) + return self.filter_by_result_line_only( + pos_to_match + ) and self.filter_by_path_includes_or_excludes(pos_to_match) + + def filter_by_result_line_only(self, pos_to_match) -> bool: + # Codemods with detectors will only run their transformations if there are results. + return self.results is None or any( + pos_to_match.start.line >= location.start.line + and pos_to_match.end.line <= location.end.line + for r in self.results + for location in r.locations + ) + def node_position(self, node): # See https://github.com/Instagram/LibCST/blob/main/libcst/_metadata_dependent.py#L112 match node: diff --git a/src/codemodder/codemods/test/integration_utils.py b/src/codemodder/codemods/test/integration_utils.py index 40da6299..917b2d0f 100644 --- a/src/codemodder/codemods/test/integration_utils.py +++ b/src/codemodder/codemods/test/integration_utils.py @@ -9,6 +9,7 @@ from types import ModuleType import jsonschema +import unidiff from codemodder import __version__ from core_codemods.sonar.api import process_sonar_findings @@ -35,16 +36,7 @@ def check_dependencies_after(self): assert new_requirements_txt == self.expected_requirements -class BaseIntegrationTest(DependencyTestMixin): - codemod = NotImplementedError - original_code = NotImplementedError - replacement_lines = NotImplementedError - num_changes = 1 - _lines: list = [] - num_changed_files = 1 - allowed_exceptions = () - sonar_issues_json: str | None = None - sonar_hotspots_json: str | None = None +class BaseIntegrationTestMixin: @classmethod def setup_class(cls): @@ -69,6 +61,271 @@ def setup_class(cls): manage_py_path = parent_dir / "manage.py" manage_py_path.touch() + def _assert_sonar_fields(self, result): + del result + + def _assert_codetf_output(self, codetf_schema): + with open(self.output_path, "r", encoding="utf-8") as f: + codetf = json.load(f) + + jsonschema.validate(codetf, codetf_schema) + + assert sorted(codetf.keys()) == ["results", "run"] + run = codetf["run"] + self._assert_run_fields(run, self.output_path) + results = codetf["results"] + # CodeTf2 spec requires relative paths + self._assert_results_fields(results, self.code_filename) + + def write_original_code(self): + with open(self.code_path, "w", encoding="utf-8") as f: + f.write(self.original_code) + + def _assert_results_fields(self, results, output_path): + assert len(results) == 1 + result = results[0] + assert result["codemod"] == self.codemod_instance.id + assert result["references"] == [ + ref.model_dump(exclude_none=True) + for ref in self.codemod_instance.references + ] + + assert ("detectionTool" in result) == bool(self.sonar_issues_json) or ( + "detectionTool" in result + ) == bool(self.sonar_hotspots_json) + + # TODO: if/when we add description for each url + for reference in result["references"][ + # Last references for Sonar has a different description + : ( + -len(self.codemod.requested_rules) + if self.sonar_issues_json or self.sonar_hotspots_json + else None + ) + ]: + assert reference["url"] == reference["description"] + + self._assert_sonar_fields(result) + + def _assert_command_line(self, run, output_path): + pass + + def _assert_run_fields(self, run, output_path): + self._assert_command_line(run, output_path) + assert run["vendor"] == "pixee" + assert run["tool"] == "codemodder-python" + assert run["version"] == __version__ + assert run["elapsed"] != "" + assert run["directory"] == os.path.abspath(self.code_dir) + assert run["sarifs"] == [] + + +class BaseRemediationIntegrationTest(BaseIntegrationTestMixin): + codemod = NotImplementedError + original_code = NotImplementedError + expected_diff_per_change = NotImplementedError + num_changes = 1 + num_changed_files = 1 + allowed_exceptions = () + sonar_issues_json: str | None = None + sonar_hotspots_json: str | None = None + + @classmethod + def setup_class(cls): + super().setup_class() + + if cls.original_code is not NotImplementedError: + # Some tests are easier to understand with the expected new code provided + # instead of calculated + cls.original_code = dedent(cls.original_code).strip("\n") + else: + with open(cls.code_path, "r", encoding="utf-8") as f: # type: ignore + cls.original_code = f.read() + + def _assert_command_line(self, run, output_path): + assert run[ + "commandLine" + ] == f'codemodder-remediation {self.code_dir} --output {output_path} --codemod-include={self.codemod_instance.id} --path-include={self.code_filename} --path-exclude=""' + ( + f" --sonar-issues-json={self.sonar_issues_json}" + if self.sonar_issues_json + else "" + ) + ( + f" --sonar-hotspots-json={self.sonar_hotspots_json}" + if self.sonar_hotspots_json + else "" + ) + + def _assert_results_fields(self, results, output_path): + super()._assert_results_fields(results, output_path) + result = results[0] + assert len(result["changeset"]) == self.num_changes + # gather all the change files and test against the expected number + assert len({c["path"] for c in result["changeset"]}) == self.num_changed_files + + # A codemod may change multiple files. For now we will + # assert the resulting data for one file only. + changes = [ + result for result in result["changeset"] if result["path"] == output_path + ] + assert {c["path"] for c in changes} == {output_path} + + changes_diff = [c["diff"] for c in changes] + assert changes_diff == self.expected_diff_per_change + + assert len(changes) == self.num_changes + lines_changed = [c["changes"][0]["lineNumber"] for c in changes] + assert lines_changed == self.expected_lines_changed + assert {c["changes"][0]["description"] for c in changes} == { + self.change_description + } + + def test_codetf_output(self, codetf_schema): + """ + Tests correct codetf output. + """ + + command = [ + "codemodder-remediation", + self.code_dir, + "--output", + self.output_path, + f"--codemod-include={self.codemod_instance.id}", + f"--path-include={self.code_filename}", + '--path-exclude=""', + ] + + if self.sonar_issues_json: + command.append(f"--sonar-issues-json={self.sonar_issues_json}") + if self.sonar_hotspots_json: + command.append(f"--sonar-hotspots-json={self.sonar_hotspots_json}") + + self.write_original_code() + + completed_process = subprocess.run( + command, + check=False, + shell=False, + ) + assert completed_process.returncode == 0 + + self._assert_codetf_output(codetf_schema) + patched_codes = self._get_patched_code_for_each_change() + self._check_code_after(patched_codes) + + # check that the original file is not rewritten + with open(self.code_path, "r", encoding="utf-8") as f: + original_file_code = f.read() + assert original_file_code == self.original_code + + def apply_hunk_to_lines(self, lines, hunk): + # The hunk target line numbers are 1-indexed. + start_index = hunk.target_start - 1 + new_lines = lines[:start_index] + orig_index = start_index + + for hunk_line in hunk: + if hunk_line.is_context: + # For a context line, check that content matches. + if orig_index >= len(lines): + raise ValueError( + "Context line beyond available lines: " + hunk_line.value + ) + if lines[orig_index].rstrip("\n") != hunk_line.value.rstrip("\n"): + raise ValueError( + "Context line mismatch:\nExpected: " + + lines[orig_index] + + "\nGot: " + + hunk_line.value + ) + new_lines.append(lines[orig_index]) + orig_index += 1 + elif hunk_line.is_removed: + # Expect the original line to match, but then skip it. + if orig_index >= len(lines): + raise ValueError( + "Removal line beyond available lines: " + hunk_line.value + ) + if lines[orig_index].rstrip("\n") != hunk_line.value.rstrip("\n"): + raise ValueError( + "Removal line mismatch:\nExpected: " + + lines[orig_index] + + "\nGot: " + + hunk_line.value + ) + orig_index += 1 + elif hunk_line.is_added: + # For an added line, insert the new content. + new_lines.append(hunk_line.value) + # Append any remaining lines after the hunk. + new_lines.extend(lines[orig_index:]) + return new_lines + + def apply_diff(self, diff_str, original_str): + # unidiff expect the hunk header to have a filename, append it + diff_lines = diff_str.splitlines() + patched_diff = [] + for line in diff_lines: + if line.startswith("+++") or line.startswith("---"): + line = line + " " + self.code_filename + patched_diff.append(line) + fixed_diff_str = "\n".join(patched_diff) + + patch_set = unidiff.PatchSet(fixed_diff_str) + + # Make a list of lines from the original string. + # Assumes original_str uses newline characters. + patched_lines = original_str.splitlines(keepends=True) + + # For simplicity, assume the diff only contains modifications for one file. + if len(patch_set) != 1: + raise ValueError("Only single-file patches are supported in this example.") + + file_patch = list(patch_set)[0] + # Process each hunk from the patch sequentially. + for hunk in file_patch: + try: + patched_lines = self.apply_hunk_to_lines(patched_lines, hunk) + except ValueError as e: + print("Error applying hunk:", e) + sys.exit(1) + + return "".join(patched_lines) + + def _get_patched_code_for_each_change(self) -> list[str]: + with open(self.output_path, "r", encoding="utf-8") as f: # type: ignore + codetf = json.load(f) + changes = codetf["results"][0]["changeset"] + patched_codes = [] + with open(self.code_path, "r", encoding="utf-8") as f: # type: ignore + original_code = f.read() + for c in changes: + patched_codes.append(self.apply_diff(c["diff"], original_code)) + return patched_codes + + def _check_code_after(self, patched_codes): + """ + Check if each change will produce executable code. + """ + for patched_code in patched_codes: + execute_code( + code=patched_code, allowed_exceptions=self.allowed_exceptions # type: ignore + ) + + +class BaseIntegrationTest(BaseIntegrationTestMixin, DependencyTestMixin): + codemod = NotImplementedError + original_code = NotImplementedError + replacement_lines = NotImplementedError + num_changes = 1 + _lines: list = [] + num_changed_files = 1 + allowed_exceptions = () + sonar_issues_json: str | None = None + sonar_hotspots_json: str | None = None + + @classmethod + def setup_class(cls): + super().setup_class() if hasattr(cls, "expected_new_code"): # Some tests are easier to understand with the expected new code provided # instead of calculated @@ -91,11 +348,7 @@ def teardown_class(cls): if cls.requirements_file_name: pathlib.Path(cls.dependency_path).unlink(missing_ok=True) - def _assert_run_fields(self, run, output_path): - assert run["vendor"] == "pixee" - assert run["tool"] == "codemodder-python" - assert run["version"] == __version__ - assert run["elapsed"] != "" + def _assert_command_line(self, run, output_path): assert run[ "commandLine" ] == f'codemodder {self.code_dir} --output {output_path} --codemod-include={self.codemod_instance.id} --path-include={self.code_filename} --path-exclude=""' + ( @@ -107,34 +360,11 @@ def _assert_run_fields(self, run, output_path): if self.sonar_hotspots_json else "" ) - assert run["directory"] == os.path.abspath(self.code_dir) - assert run["sarifs"] == [] def _assert_results_fields(self, results, output_path): - assert len(results) == 1 - result = results[0] - assert result["codemod"] == self.codemod_instance.id - assert result["references"] == [ - ref.model_dump(exclude_none=True) - for ref in self.codemod_instance.references - ] - - assert ("detectionTool" in result) == bool(self.sonar_issues_json) - assert ("detectionTool" in result) == bool(self.sonar_hotspots_json) - - # TODO: if/when we add description for each url - for reference in result["references"][ - # Last references for Sonar has a different description - : ( - -len(self.codemod.requested_rules) - if self.sonar_issues_json or self.sonar_hotspots_json - else None - ) - ]: - assert reference["url"] == reference["description"] - - self._assert_sonar_fields(result) + super()._assert_results_fields(results, output_path) + result = results[0] assert len(result["changeset"]) == self.num_changed_files # A codemod may change multiple files. For now we will @@ -150,26 +380,6 @@ def _assert_results_fields(self, results, output_path): assert line_change["lineNumber"] == int(self.expected_line_change) assert line_change["description"] == self.change_description - def _assert_sonar_fields(self, result): - del result - - def _assert_codetf_output(self, codetf_schema): - with open(self.output_path, "r", encoding="utf-8") as f: - codetf = json.load(f) - - jsonschema.validate(codetf, codetf_schema) - - assert sorted(codetf.keys()) == ["results", "run"] - run = codetf["run"] - self._assert_run_fields(run, self.output_path) - results = codetf["results"] - # CodeTf2 spec requires relative paths - self._assert_results_fields(results, self.code_filename) - - def write_original_code(self): - with open(self.code_path, "w", encoding="utf-8") as f: - f.write(self.original_code) - def check_code_after(self) -> ModuleType: with open(self.code_path, "r", encoding="utf-8") as f: # type: ignore new_code = f.read() @@ -238,6 +448,70 @@ def _run_idempotency_check(self, command): sys.path.append(SAMPLES_DIR) +class SonarRemediationIntegrationTest(BaseRemediationIntegrationTest): + """ + Sonar integration tests must use code from a file in tests/samples + because those files are what appears in sonar_issues.json + """ + + code_path = NotImplementedError + sonar_issues_json = "tests/samples/sonar_issues.json" + sonar_hotspots_json = "tests/samples/sonar_hotspots.json" + + @classmethod + def setup_class(cls): + codemod_id = ( + cls.codemod().id if isinstance(cls.codemod, type) else cls.codemod.id + ) + cls.codemod_instance = validate_codemod_registration(codemod_id) + + cls.output_path = tempfile.mkstemp()[1] + cls.code_dir = SAMPLES_DIR + cls.code_filename = os.path.relpath(cls.code_path, SAMPLES_DIR) + + if cls.original_code is not NotImplementedError: + # Some tests are easier to understand with the expected new code provided + # instead of calculated + cls.original_code = dedent(cls.original_code).strip("\n") + else: + with open(cls.code_path, "r", encoding="utf-8") as f: # type: ignore + cls.original_code = f.read() + + # TODO: support sonar integration tests that add a dependency to + # `requirements_file_name`. These tests would not be able to run + # in parallel at this time since they would all override the same + # tests/samples/requirements.txt file, unless we change that to + # a temporary file. + cls.check_sonar_issues() + + @classmethod + def check_sonar_issues(cls): + sonar_results = process_sonar_findings( + (cls.sonar_issues_json, cls.sonar_hotspots_json) + ) + + assert any( + x in sonar_results for x in cls.codemod.requested_rules + ), f"Make sure to add a sonar issue/hotspot for {cls.codemod.rule_id} in {cls.sonar_issues_json} or {cls.sonar_hotspots_json}" + results_for_codemod = sonar_results[cls.codemod.requested_rules[-1]] + file_path = pathlib.Path(cls.code_filename) + assert ( + file_path in results_for_codemod + ), f"Make sure to add a sonar issue/hotspot for file `{cls.code_filename}` under one of the rules `{cls.codemod.requested_rules}`in {cls.sonar_issues_json} or {cls.sonar_hotspots_json}" + + def _assert_sonar_fields(self, result): + assert self.codemod_instance._metadata.tool is not None + rules = self.codemod_instance._metadata.tool.rules + for i in range(len(rules)): + assert ( + result["references"][len(result["references"]) - len(rules) + i][ + "description" + ] + == self.codemod_instance._metadata.tool.rules[i].name + ) + assert result["detectionTool"]["name"] == "Sonar" + + class SonarIntegrationTest(BaseIntegrationTest): """ Sonar integration tests must use code from a file in tests/samples diff --git a/src/codemodder/codemods/test/utils.py b/src/codemodder/codemods/test/utils.py index 3ccdbdd7..60f71206 100644 --- a/src/codemodder/codemods/test/utils.py +++ b/src/codemodder/codemods/test/utils.py @@ -7,7 +7,7 @@ from codemodder import registry from codemodder.codemods.api import BaseCodemod -from codemodder.codetf import Change +from codemodder.codetf.v2.codetf import ChangeSet from codemodder.context import CodemodExecutionContext from codemodder.diff import create_diff from codemodder.providers import load_providers @@ -58,6 +58,7 @@ def run_and_assert( tmpdir, input_code, expected, + expected_diff_per_change: list[str] = [], num_changes: int = 1, min_num_changes: int | None = None, root: Path | None = None, @@ -74,7 +75,7 @@ def run_and_assert( self.execution_context = CodemodExecutionContext( directory=root, - dry_run=False, + dry_run=True if expected_diff_per_change else False, verbose=False, registry=mock.MagicMock(), providers=load_providers(), @@ -83,7 +84,10 @@ def run_and_assert( path_exclude=path_exclude, ) - self.codemod.apply(self.execution_context) + self.codemod.apply( + self.execution_context, + remediation=True if expected_diff_per_change else False, + ) changes = self.execution_context.get_changesets(self.codemod.id) self.changeset = changes @@ -92,20 +96,29 @@ def run_and_assert( assert not changes return - self.assert_num_changes(changes, num_changes, min_num_changes) + self.assert_num_changes( + changes, num_changes, expected_diff_per_change, min_num_changes + ) self.assert_changes( tmpdir, tmp_file_path, input_code, expected, - changes[0], + expected_diff_per_change, + num_changes, + changes, ) - def assert_num_changes(self, changes, expected_num_changes, min_num_changes): - assert len(changes) == 1 - - actual_num = len(changes[0].changes) + def assert_num_changes( + self, changes, expected_num_changes, expected_diff_per_change, min_num_changes + ): + if expected_diff_per_change: + assert len(changes) == expected_num_changes + actual_num = len(changes) + else: + assert len(changes[0].changes) == expected_num_changes + actual_num = len(changes[0].changes) if min_num_changes is not None: assert ( @@ -116,25 +129,43 @@ def assert_num_changes(self, changes, expected_num_changes, min_num_changes): actual_num == expected_num_changes ), f"Expected {expected_num_changes} changes but {actual_num} were created." - def assert_changes(self, root, file_path, input_code, expected, changes): - assert os.path.relpath(file_path, root) == changes.path - assert all(change.description for change in changes.changes) - - expected_diff = create_diff( - dedent(input_code).splitlines(keepends=True), - dedent(expected).splitlines(keepends=True), + def assert_changes( + self, + root, + file_path, + input_code, + expected, + expected_diff_per_change, + num_changes, + changes, + ): + assert all( + os.path.relpath(file_path, root) == change.path for change in changes ) - try: - assert expected_diff == changes.diff - except AssertionError: - raise DiffError(expected_diff, changes.diff) - - output_code = file_path.read_bytes().decode("utf-8") + assert all(c.description for change in changes for c in change.changes) - try: - assert output_code == (format_expected := dedent(expected)) - except AssertionError: - raise DiffError(format_expected, output_code) + # assert each change individually + if expected_diff_per_change and num_changes > 1: + assert num_changes == len(expected_diff_per_change) + for change, diff in zip(changes, expected_diff_per_change): + assert change.diff == diff + else: + # generate diff from expected code + expected_diff = create_diff( + dedent(input_code).splitlines(keepends=True), + dedent(expected).splitlines(keepends=True), + ) + try: + assert expected_diff == changes[0].diff + except AssertionError: + raise DiffError(expected_diff, changes[0].diff) + + output_code = file_path.read_bytes().decode("utf-8") + + try: + assert output_code == (format_expected := dedent(expected)) + except AssertionError: + raise DiffError(format_expected, output_code) def run_and_assert_filepath( self, @@ -171,6 +202,7 @@ def run_and_assert( tmpdir, input_code, expected, + expected_diff_per_change: list[str] | None = None, num_changes: int = 1, min_num_changes: int | None = None, root: Path | None = None, @@ -191,7 +223,7 @@ def run_and_assert( self.execution_context = CodemodExecutionContext( directory=root, - dry_run=False, + dry_run=True if expected_diff_per_change else False, verbose=False, tool_result_files_map={self.tool: [tmp_results_file_path]}, registry=mock.MagicMock(), @@ -201,28 +233,35 @@ def run_and_assert( path_exclude=path_exclude, ) - self.codemod.apply(self.execution_context) + self.codemod.apply( + self.execution_context, + remediation=True if expected_diff_per_change else False, + ) changes = self.execution_context.get_changesets(self.codemod.id) if input_code == expected: assert not changes return - self.assert_num_changes(changes, num_changes, min_num_changes) + self.assert_num_changes( + changes, num_changes, expected_diff_per_change, min_num_changes + ) - self.assert_findings(changes[0].changes) + self.assert_findings(changes) self.assert_changes( tmpdir, tmp_file_path, input_code, expected, - changes[0], + expected_diff_per_change, + num_changes, + changes, ) return changes - def assert_findings(self, changes: list[Change]): + def assert_findings(self, changes: list[ChangeSet]): assert all( - x.fixedFindings for x in changes + c.fixedFindings for a in changes for c in a.changes ), f"Expected all changes to have findings: {changes}" diff --git a/src/codemodder/result.py b/src/codemodder/result.py index e6b473de..f38a4af5 100644 --- a/src/codemodder/result.py +++ b/src/codemodder/result.py @@ -284,6 +284,15 @@ def files_for_rule(self, rule_id: str) -> list[Path]: def all_rule_ids(self) -> list[str]: return list(self.keys()) + @classmethod + def from_single_result(cls, result: ResultType) -> Self: + """ + Creates a new ResultSet of the same type with a give result. + """ + new = cls() + new.add_result(result) + return new + def __or__(self, other): result = self.__class__() for k in self.keys() | other.keys(): diff --git a/src/core_codemods/defectdojo/api.py b/src/core_codemods/defectdojo/api.py index abc7bbdd..318993cc 100644 --- a/src/core_codemods/defectdojo/api.py +++ b/src/core_codemods/defectdojo/api.py @@ -8,6 +8,7 @@ from codemodder.codemods.api import Metadata, Reference, ToolMetadata, ToolRule from codemodder.codemods.base_detector import BaseDetector from codemodder.context import CodemodExecutionContext +from codemodder.llm import TokenUsage from codemodder.result import ResultSet from core_codemods.api import CoreCodemod, SASTCodemod @@ -76,8 +77,15 @@ def from_core_codemod( def apply( self, context: CodemodExecutionContext, - ) -> None: - self._apply( + remediation: bool = False, + ) -> None | TokenUsage: + if remediation: + return self._apply_remediation( + context, + # We know this has a tool because we created it with `from_core_codemod` + cast(ToolMetadata, self._metadata.tool).rule_ids, + ) + return self._apply_hardening( context, # We know this has a tool because we created it with `from_core_codemod` cast(ToolMetadata, self._metadata.tool).rule_ids, diff --git a/src/core_codemods/semgrep/semgrep_no_csrf_exempt.py b/src/core_codemods/semgrep/semgrep_no_csrf_exempt.py index 5e03891c..cd494c06 100644 --- a/src/core_codemods/semgrep/semgrep_no_csrf_exempt.py +++ b/src/core_codemods/semgrep/semgrep_no_csrf_exempt.py @@ -26,14 +26,14 @@ def leave_Decorator( self.node_position(original_node) ): return updated_node - - if ( + # Due to semgrep's odd way of reporting the position for this (decorators + functiondef), we match by line only + if self.node_is_selected_by_line_only(original_node) and ( self.find_base_name(original_node.decorator) == "django.views.decorators.csrf.csrf_exempt" ): self.report_change(original_node) return cst.RemovalSentinel.REMOVE - return original_node + return updated_node SemgrepNoCsrfExempt = SemgrepCodemod( diff --git a/tests/codemods/defectdojo/semgrep/test_avoid_insecure_deserialization.py b/tests/codemods/defectdojo/semgrep/test_avoid_insecure_deserialization.py index 76cd9944..3542d95c 100644 --- a/tests/codemods/defectdojo/semgrep/test_avoid_insecure_deserialization.py +++ b/tests/codemods/defectdojo/semgrep/test_avoid_insecure_deserialization.py @@ -62,7 +62,6 @@ def test_pickle_load(self, adds_dependency, tmpdir): result = fickling.load("data") """ - findings = { "results": [ { @@ -100,6 +99,31 @@ def test_pickle_and_yaml(self, adds_dependency, tmpdir): result = fickling.load("data") result = yaml.load("data", Loader=yaml.SafeLoader) """ + expected_diff_per_change = [ + """\ +--- ++++ +@@ -1,6 +1,6 @@ + +-import pickle + import yaml ++import fickling + +-result = pickle.load("data") ++result = fickling.load("data") + result = yaml.load("data") +""", + """\ +--- ++++ +@@ -3,4 +3,4 @@ + import yaml + + result = pickle.load("data") +-result = yaml.load("data") ++result = yaml.load("data", Loader=yaml.SafeLoader) +""", + ] findings = { "results": [ @@ -122,6 +146,7 @@ def test_pickle_and_yaml(self, adds_dependency, tmpdir): tmpdir, input_code, expected, + expected_diff_per_change, results=json.dumps(findings), num_changes=2, ) @@ -129,11 +154,11 @@ def test_pickle_and_yaml(self, adds_dependency, tmpdir): assert changes is not None assert changes[0].changes[0].fixedFindings is not None - assert changes[0].changes[0].fixedFindings[0].id == "4" + assert changes[0].changes[0].fixedFindings[0].id == "3" assert changes[0].changes[0].fixedFindings[0].rule.id == RULE_ID - assert changes[0].changes[1].fixedFindings is not None - assert changes[0].changes[1].fixedFindings[0].id == "3" - assert changes[0].changes[1].fixedFindings[0].rule.id == RULE_ID + assert changes[1].changes[0].fixedFindings is not None + assert changes[1].changes[0].fixedFindings[0].id == "4" + assert changes[1].changes[0].fixedFindings[0].rule.id == RULE_ID @mock.patch("codemodder.codemods.api.FileContext.add_dependency") def test_pickle_loads(self, adds_dependency, tmpdir): diff --git a/tests/codemods/semgrep/test_semgrep_nan_injection.py b/tests/codemods/semgrep/test_semgrep_nan_injection.py index 6a4d7b49..1ab02f28 100644 --- a/tests/codemods/semgrep/test_semgrep_nan_injection.py +++ b/tests/codemods/semgrep/test_semgrep_nan_injection.py @@ -66,8 +66,8 @@ def home(request): tmpdir, input_code, expected_output, - results=json.dumps(results), num_changes=4, + results=json.dumps(results), ) def test_multiple(self, tmpdir): @@ -109,6 +109,68 @@ def view(request): else: return [1, 2, float(tid), 3] """ + expected_diff_per_change = [ + """\ +--- ++++ +@@ -2,7 +2,10 @@ + tid = request.POST.get("tid") + some_list = [1, 2, 3, float('nan')] + +- float(tid) in some_list ++ if tid.lower() == "nan": ++ raise ValueError ++ else: ++ float(tid) in some_list + + z = [1, 2, complex(tid), 3] + +""", + """\ +--- ++++ +@@ -4,7 +4,10 @@ + + float(tid) in some_list + +- z = [1, 2, complex(tid), 3] ++ if tid.lower() == "nan": ++ raise ValueError ++ else: ++ z = [1, 2, complex(tid), 3] + + x = [float(tid), 1.0, 2.0] + +""", + """\ +--- ++++ +@@ -6,6 +6,9 @@ + + z = [1, 2, complex(tid), 3] + +- x = [float(tid), 1.0, 2.0] ++ if tid.lower() == "nan": ++ raise ValueError ++ else: ++ x = [float(tid), 1.0, 2.0] + + return [1, 2, float(tid), 3] +""", + """\ +--- ++++ +@@ -8,4 +8,7 @@ + + x = [float(tid), 1.0, 2.0] + +- return [1, 2, float(tid), 3] ++ if tid.lower() == "nan": ++ raise ValueError ++ else: ++ return [1, 2, float(tid), 3] +""", + ] results = { "runs": [ @@ -227,8 +289,9 @@ def view(request): tmpdir, input_code, expected_output, + expected_diff_per_change, + num_changes=4, results=json.dumps(results), - num_changes=16, ) def test_once_nested(self, tmpdir): @@ -286,8 +349,8 @@ def view(request): tmpdir, input_code, expected_output, - results=json.dumps(results), num_changes=4, + results=json.dumps(results), ) def test_twice_nested(self, tmpdir): @@ -344,8 +407,8 @@ def view(request): tmpdir, input_code, expected_output, - results=json.dumps(results), num_changes=4, + results=json.dumps(results), ) def test_direct_source(self, tmpdir): @@ -400,8 +463,8 @@ def view(request): tmpdir, input_code, expected_output, - results=json.dumps(results), num_changes=4, + results=json.dumps(results), ) def test_binop(self, tmpdir): @@ -458,8 +521,8 @@ def view(request): tmpdir, input_code, expected_output, - results=json.dumps(results), num_changes=4, + results=json.dumps(results), ) diff --git a/tests/codemods/semgrep/test_semgrep_no_csrf_exempt.py b/tests/codemods/semgrep/test_semgrep_no_csrf_exempt.py index 80e7e287..26684eac 100644 --- a/tests/codemods/semgrep/test_semgrep_no_csrf_exempt.py +++ b/tests/codemods/semgrep/test_semgrep_no_csrf_exempt.py @@ -48,7 +48,31 @@ def ssrf_code_checker(request): def foo(): pass """ - + expected_diff_per_change = [ + """\ +--- ++++ +@@ -3,7 +3,6 @@ + from django.dispatch import receiver + from django.core.signals import request_finished + +-@csrf_exempt + def ssrf_code_checker(request): + if request.user.is_authenticated: + if request.method == 'POST': +""", + """\ +--- ++++ +@@ -12,6 +12,5 @@ + + + @receiver(request_finished) +-@csrf_exempt + def foo(): + pass +""", + ] results = { "runs": [ { @@ -114,6 +138,7 @@ def foo(): tmpdir, input_code, expected_output, + expected_diff_per_change, results=json.dumps(results), num_changes=2, ) diff --git a/tests/codemods/semgrep/test_semgrep_sql_parametrization.py b/tests/codemods/semgrep/test_semgrep_sql_parametrization.py index 11dbd1a2..d4520953 100644 --- a/tests/codemods/semgrep/test_semgrep_sql_parametrization.py +++ b/tests/codemods/semgrep/test_semgrep_sql_parametrization.py @@ -46,6 +46,37 @@ def f(): conn = sqlite3.connect("example") conn.cursor().execute(sql, ((user), )) ''' + expected_diff_per_change = [ + '''\ +--- ++++ +@@ -8,7 +8,7 @@ + @app.route("/example") + def f(): + user = request.args["user"] +- sql = """SELECT user FROM users WHERE user = '%s'""" ++ sql = """SELECT user FROM users WHERE user = ?""" + + conn = sqlite3.connect("example") +- conn.cursor().execute(sql % (user)) ++ conn.cursor().execute(sql, ((user), )) +''', + '''\ +--- ++++ +@@ -8,7 +8,7 @@ + @app.route("/example") + def f(): + user = request.args["user"] +- sql = """SELECT user FROM users WHERE user = '%s'""" ++ sql = """SELECT user FROM users WHERE user = ?""" + + conn = sqlite3.connect("example") +- conn.cursor().execute(sql % (user)) ++ conn.cursor().execute(sql, ((user), )) +''', + ] + results = { "runs": [ { @@ -190,12 +221,11 @@ def f(): } ] } - changes = self.run_and_assert( + self.run_and_assert( tmpdir, input_code, expexted_output, + expected_diff_per_change, results=json.dumps(results), - ) - assert len(changes[0].changes[0].fixedFindings) == len( - results["runs"][0]["results"] + num_changes=2, ) diff --git a/tests/codemods/sonar/test_sonar_django_receiver_on_top.py b/tests/codemods/sonar/test_sonar_django_receiver_on_top.py index 77d2c013..0ec3513a 100644 --- a/tests/codemods/sonar/test_sonar_django_receiver_on_top.py +++ b/tests/codemods/sonar/test_sonar_django_receiver_on_top.py @@ -13,8 +13,8 @@ def test_name(self): def assert_findings(self, changes): # For now we can only link the finding to the line with the receiver decorator - assert changes[0].fixedFindings - assert not changes[1].fixedFindings + assert changes[0].changes[0].fixedFindings + assert not changes[0].changes[1].fixedFindings def test_simple(self, tmpdir): input_code = """ diff --git a/tests/codemods/sonar/test_sonar_enable_jinja2_autoescape.py b/tests/codemods/sonar/test_sonar_enable_jinja2_autoescape.py index ea3feaf8..3e8bd941 100644 --- a/tests/codemods/sonar/test_sonar_enable_jinja2_autoescape.py +++ b/tests/codemods/sonar/test_sonar_enable_jinja2_autoescape.py @@ -24,6 +24,29 @@ def test_simple(self, tmpdir): env = Environment(autoescape=True) env = Environment(autoescape=True) """ + expected_diff_per_change = [ + """\ +--- ++++ +@@ -1,4 +1,4 @@ + + from jinja2 import Environment +-env = Environment() ++env = Environment(autoescape=True) + env = Environment(autoescape=False) +""", + """\ +--- ++++ +@@ -1,4 +1,4 @@ + + from jinja2 import Environment + env = Environment() +-env = Environment(autoescape=False) ++env = Environment(autoescape=True) +""", + ] + hotspots = { "hotspots": [ { @@ -54,6 +77,7 @@ def test_simple(self, tmpdir): tmpdir, input_code, expected_output, + expected_diff_per_change, results=json.dumps(hotspots), num_changes=2, ) diff --git a/tests/codemods/sonar/test_sonar_fix_assert_tuple.py b/tests/codemods/sonar/test_sonar_fix_assert_tuple.py index 573634cc..a31903bd 100644 --- a/tests/codemods/sonar/test_sonar_fix_assert_tuple.py +++ b/tests/codemods/sonar/test_sonar_fix_assert_tuple.py @@ -13,9 +13,9 @@ def test_name(self): def assert_findings(self, changes): # For now we can only link the finding to the first line changed - assert changes[0].fixedFindings - assert not changes[1].fixedFindings - assert not changes[2].fixedFindings + assert changes[0].changes[0].fixedFindings + assert not changes[0].changes[1].fixedFindings + assert not changes[0].changes[2].fixedFindings def test_simple(self, tmpdir): input_code = """ diff --git a/tests/codemods/sonar/test_sonar_fix_missing_self_or_cls.py b/tests/codemods/sonar/test_sonar_fix_missing_self_or_cls.py index b9362dcc..d1247a07 100644 --- a/tests/codemods/sonar/test_sonar_fix_missing_self_or_cls.py +++ b/tests/codemods/sonar/test_sonar_fix_missing_self_or_cls.py @@ -30,6 +30,32 @@ def instance_method(self): def class_method(cls): pass """ + expected_diff_per_change = [ + """\ +--- ++++ +@@ -1,6 +1,6 @@ + + class A: +- def instance_method(): ++ def instance_method(self): + pass + + @classmethod +""", + """\ +--- ++++ +@@ -4,5 +4,5 @@ + pass + + @classmethod +- def class_method(): ++ def class_method(cls): + pass +""", + ] + issues = { "issues": [ { @@ -60,6 +86,7 @@ def class_method(cls): tmpdir, input_code, expected_output, + expected_diff_per_change, results=json.dumps(issues), num_changes=2, ) diff --git a/tests/codemods/sonar/test_sonar_invert_boolean_check.py b/tests/codemods/sonar/test_sonar_invert_boolean_check.py index c483096a..41267656 100644 --- a/tests/codemods/sonar/test_sonar_invert_boolean_check.py +++ b/tests/codemods/sonar/test_sonar_invert_boolean_check.py @@ -20,6 +20,27 @@ def test_simple(self, tmpdir): if a != 2: b = i >= 10 """ + expected_diff_per_change = [ + """\ +--- ++++ +@@ -1,3 +1,3 @@ + +-if not a == 2: ++if a != 2: + b = not i < 10 +""", + """\ +--- ++++ +@@ -1,3 +1,3 @@ + + if not a == 2: +- b = not i < 10 ++ b = i >= 10 +""", + ] + issues = { "issues": [ { @@ -50,6 +71,7 @@ def test_simple(self, tmpdir): tmpdir, input_code, expected_output, + expected_diff_per_change, results=json.dumps(issues), num_changes=2, ) diff --git a/tests/codemods/sonar/test_sonar_jwt_decode_verify.py b/tests/codemods/sonar/test_sonar_jwt_decode_verify.py index fa30f3a2..c17c99ae 100644 --- a/tests/codemods/sonar/test_sonar_jwt_decode_verify.py +++ b/tests/codemods/sonar/test_sonar_jwt_decode_verify.py @@ -38,6 +38,31 @@ def test_simple(self, tmpdir): decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True) decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True}) """ + + expected_diff_per_change = [ + """\ +--- ++++ +@@ -8,5 +8,5 @@ + } + + encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256") +-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False) ++decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True) + decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False}) +""", + """\ +--- ++++ +@@ -9,4 +9,4 @@ + + encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256") + decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False) +-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False}) ++decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True}) +""", + ] + issues = { "issues": [ { @@ -65,5 +90,10 @@ def test_simple(self, tmpdir): ] } self.run_and_assert( - tmpdir, input_code, expected, results=json.dumps(issues), num_changes=2 + tmpdir, + input_code, + expected, + expected_diff_per_change, + results=json.dumps(issues), + num_changes=2, ) diff --git a/tests/codemods/sonar/test_sonar_secure_cookie.py b/tests/codemods/sonar/test_sonar_secure_cookie.py index 922fbc00..b9d56ac7 100644 --- a/tests/codemods/sonar/test_sonar_secure_cookie.py +++ b/tests/codemods/sonar/test_sonar_secure_cookie.py @@ -34,6 +34,55 @@ def test_simple(self, tmpdir): var = "hello" response2.set_cookie("name", "value", secure=True, httponly=True, samesite='Lax') """ + expected_diff_per_change = [ + """\ +--- ++++ +@@ -3,7 +3,7 @@ + + response = flask.make_response() + var = "hello" +-response.set_cookie("name", "value") ++response.set_cookie("name", "value", secure=True, httponly=True, samesite='Lax') + + response2 = flask.Response() + var = "hello" +""", + """\ +--- ++++ +@@ -7,4 +7,4 @@ + + response2 = flask.Response() + var = "hello" +-response2.set_cookie("name", "value") ++response2.set_cookie("name", "value", secure=True, httponly=True, samesite='Lax') +""", + """\ +--- ++++ +@@ -3,7 +3,7 @@ + + response = flask.make_response() + var = "hello" +-response.set_cookie("name", "value") ++response.set_cookie("name", "value", secure=True, httponly=True, samesite='Lax') + + response2 = flask.Response() + var = "hello" +""", + """\ +--- ++++ +@@ -7,4 +7,4 @@ + + response2 = flask.Response() + var = "hello" +-response2.set_cookie("name", "value") ++response2.set_cookie("name", "value", secure=True, httponly=True, samesite='Lax') +""", + ] + issues = { "hotspots": [ { @@ -83,5 +132,10 @@ def test_simple(self, tmpdir): ], } self.run_and_assert( - tmpdir, input_code, expected, results=json.dumps(issues), num_changes=2 + tmpdir, + input_code, + expected, + expected_diff_per_change, + results=json.dumps(issues), + num_changes=4, ) diff --git a/tests/codemods/sonar/test_sonar_secure_random.py b/tests/codemods/sonar/test_sonar_secure_random.py index 383c9e3a..d4499c39 100644 --- a/tests/codemods/sonar/test_sonar_secure_random.py +++ b/tests/codemods/sonar/test_sonar_secure_random.py @@ -26,6 +26,48 @@ def test_simple(self, tmpdir): secrets.SystemRandom().randint(0, 9) secrets.SystemRandom().random() """ + expected_diff_per_change = [ + """\ +--- ++++ +@@ -1,6 +1,7 @@ + + import random ++import secrets + +-random.getrandbits(1) ++secrets.SystemRandom().getrandbits(1) + random.randint(0, 9) + random.random() +""", + """\ +--- ++++ +@@ -1,6 +1,7 @@ + + import random ++import secrets + + random.getrandbits(1) +-random.randint(0, 9) ++secrets.SystemRandom().randint(0, 9) + random.random() +""", + """\ +--- ++++ +@@ -1,6 +1,7 @@ + + import random ++import secrets + + random.getrandbits(1) + random.randint(0, 9) +-random.random() ++secrets.SystemRandom().random() +""", + ] + hotspots = { "hotspots": [ { @@ -67,6 +109,7 @@ def test_simple(self, tmpdir): tmpdir, input_code, expected_output, + expected_diff_per_change, results=json.dumps(hotspots), num_changes=3, ) diff --git a/tests/codemods/sonar/test_sonar_timezone_aware_datetime.py b/tests/codemods/sonar/test_sonar_timezone_aware_datetime.py index f93e4ed7..8c679e88 100644 --- a/tests/codemods/sonar/test_sonar_timezone_aware_datetime.py +++ b/tests/codemods/sonar/test_sonar_timezone_aware_datetime.py @@ -4,7 +4,7 @@ from core_codemods.sonar.sonar_timezone_aware_datetime import SonarTimezoneAwareDatetime -class TestSonarSQLParameterization(BaseSASTCodemodTest): +class TestSonarTimezoneAwareDatetime(BaseSASTCodemodTest): codemod = SonarTimezoneAwareDatetime tool = "sonar" @@ -26,6 +26,30 @@ def test_simple(self, tmpdir): timestamp = 1571595618.0 datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc) """ + expected_diff_per_change = [ + """\ +--- ++++ +@@ -1,5 +1,5 @@ + import datetime + +-datetime.datetime.utcnow() ++datetime.datetime.now(tz=datetime.timezone.utc) + timestamp = 1571595618.0 + datetime.datetime.utcfromtimestamp(timestamp) +""", + """\ +--- ++++ +@@ -2,4 +2,4 @@ + + datetime.datetime.utcnow() + timestamp = 1571595618.0 +-datetime.datetime.utcfromtimestamp(timestamp) ++datetime.datetime.fromtimestamp(timestamp, tz=datetime.timezone.utc) +""", + ] + issues = { "issues": [ { @@ -60,5 +84,10 @@ def test_simple(self, tmpdir): ] } self.run_and_assert( - tmpdir, input_code, expected, results=json.dumps(issues), num_changes=2 + tmpdir, + input_code, + expected, + expected_diff_per_change, + results=json.dumps(issues), + num_changes=2, ) diff --git a/tests/codemods/test_combine_isinstance_issubclass.py b/tests/codemods/test_combine_isinstance_issubclass.py index 3a158a9c..75f48b41 100644 --- a/tests/codemods/test_combine_isinstance_issubclass.py +++ b/tests/codemods/test_combine_isinstance_issubclass.py @@ -60,7 +60,7 @@ def _format_func_run_test(self, tmpdir, func, input_code, expected, num_changes= tmpdir, input_code.replace("{func}", func), expected.replace("{func}", func), - num_changes, + num_changes=num_changes, ) @each_func diff --git a/tests/codemods/test_combine_startswith_endswith.py b/tests/codemods/test_combine_startswith_endswith.py index f3ed2808..444c37b8 100644 --- a/tests/codemods/test_combine_startswith_endswith.py +++ b/tests/codemods/test_combine_startswith_endswith.py @@ -62,7 +62,7 @@ def _format_func_run_test(self, tmpdir, func, input_code, expected, num_changes= tmpdir, input_code.replace("{func}", func), expected.replace("{func}", func), - num_changes, + num_changes=num_changes, ) @each_func diff --git a/tests/codemods/test_fix_hasattr_call.py b/tests/codemods/test_fix_hasattr_call.py index 5feb01a5..ff1c2161 100644 --- a/tests/codemods/test_fix_hasattr_call.py +++ b/tests/codemods/test_fix_hasattr_call.py @@ -37,7 +37,75 @@ class Test: if callable(obj): print(1) """ - self.run_and_assert(tmpdir, input_code, expected, num_changes=5) + expected_diff_per_change = [ + """\ +--- ++++ +@@ -2,7 +2,7 @@ + class Test: + pass + +-hasattr(Test(), "__call__") ++callable(Test()) + hasattr("hi", '__call__') + + assert hasattr(1, '__call__') +""", + """\ +--- ++++ +@@ -3,7 +3,7 @@ + pass + + hasattr(Test(), "__call__") +-hasattr("hi", '__call__') ++callable("hi") + + assert hasattr(1, '__call__') + obj = Test() +""", + """\ +--- ++++ +@@ -5,7 +5,7 @@ + hasattr(Test(), "__call__") + hasattr("hi", '__call__') + +-assert hasattr(1, '__call__') ++assert callable(1) + obj = Test() + var = hasattr(obj, "__call__") + +""", + """\ +--- ++++ +@@ -7,7 +7,7 @@ + + assert hasattr(1, '__call__') + obj = Test() +-var = hasattr(obj, "__call__") ++var = callable(obj) + + if hasattr(obj, "__call__"): + print(1) +""", + """\ +--- ++++ +@@ -9,5 +9,5 @@ + obj = Test() + var = hasattr(obj, "__call__") + +-if hasattr(obj, "__call__"): ++if callable(obj): + print(1) +""", + ] + + self.run_and_assert( + tmpdir, input_code, expected, expected_diff_per_change, num_changes=5 + ) def test_other_hasattr(self, tmpdir): code = """ diff --git a/tests/codemods/test_secure_random.py b/tests/codemods/test_secure_random.py index 89bde2b8..37717eec 100644 --- a/tests/codemods/test_secure_random.py +++ b/tests/codemods/test_secure_random.py @@ -25,8 +25,38 @@ def test_import_random(self, tmpdir): secrets.SystemRandom().getrandbits(1) var = "hello" """ - - self.run_and_assert(tmpdir, input_code, expected_output, num_changes=2) + expected_diff_per_change = [ + """\ +--- ++++ +@@ -1,6 +1,7 @@ + + import random ++import secrets + +-random.random() ++secrets.SystemRandom().random() + random.getrandbits(1) + var = "hello" +""", + """\ +--- ++++ +@@ -1,6 +1,7 @@ + + import random ++import secrets + + random.random() +-random.getrandbits(1) ++secrets.SystemRandom().getrandbits(1) + var = "hello" +""", + ] + + self.run_and_assert( + tmpdir, input_code, expected_output, expected_diff_per_change, num_changes=2 + ) def test_from_random(self, tmpdir): input_code = """ @@ -109,7 +139,38 @@ def test_multiple_calls(self, tmpdir): secrets.SystemRandom().randint() var = "hello" """ - self.run_and_assert(tmpdir, input_code, expected_output, num_changes=2) + expected_diff_per_change = [ + """\ +--- ++++ +@@ -1,6 +1,7 @@ + + import random ++import secrets + +-random.random() ++secrets.SystemRandom().random() + random.randint() + var = "hello" +""", + """\ +--- ++++ +@@ -1,6 +1,7 @@ + + import random ++import secrets + + random.random() +-random.randint() ++secrets.SystemRandom().randint() + var = "hello" +""", + ] + + self.run_and_assert( + tmpdir, input_code, expected_output, expected_diff_per_change, num_changes=2 + ) @pytest.mark.parametrize( "input_code,expected_output", @@ -217,8 +278,51 @@ def test_sampling(self, tmpdir): secrets.choice(["a", "b"]) secrets.SystemRandom().choices(["a", "b"]) """ - - self.run_and_assert(tmpdir, input_code, expected_output, num_changes=3) + expected_diff_per_change = [ + """\ +--- ++++ +@@ -1,6 +1,7 @@ + + import random ++import secrets + +-random.sample(["a", "b"], 1) ++secrets.SystemRandom().sample(["a", "b"], 1) + random.choice(["a", "b"]) + random.choices(["a", "b"]) +""", + """\ +--- ++++ +@@ -1,6 +1,7 @@ + + import random ++import secrets + + random.sample(["a", "b"], 1) +-random.choice(["a", "b"]) ++secrets.choice(["a", "b"]) + random.choices(["a", "b"]) +""", + """\ +--- ++++ +@@ -1,6 +1,7 @@ + + import random ++import secrets + + random.sample(["a", "b"], 1) + random.choice(["a", "b"]) +-random.choices(["a", "b"]) ++secrets.SystemRandom().choices(["a", "b"]) +""", + ] + + self.run_and_assert( + tmpdir, input_code, expected_output, expected_diff_per_change, num_changes=3 + ) def test_from_import_choice(self, tmpdir): input_code = """ diff --git a/tests/codemods/test_tempfile_mktemp.py b/tests/codemods/test_tempfile_mktemp.py index 77917845..42ea295b 100644 --- a/tests/codemods/test_tempfile_mktemp.py +++ b/tests/codemods/test_tempfile_mktemp.py @@ -79,7 +79,7 @@ def test_import_with_arg(self, tmpdir): filename = tf.name var = "hello" """ - self.run_and_assert(tmpdir, input_code, expected_output, 5) + self.run_and_assert(tmpdir, input_code, expected_output, num_changes=5) def test_from_import(self, tmpdir): input_code = """ diff --git a/tests/codemods/test_with_threading_lock.py b/tests/codemods/test_with_threading_lock.py index 1701b23b..5f00c23e 100644 --- a/tests/codemods/test_with_threading_lock.py +++ b/tests/codemods/test_with_threading_lock.py @@ -93,7 +93,7 @@ class TestThreadingNameResolution(BaseCodemodTest): codemod = WithThreadingLock @pytest.mark.parametrize( - "input_code,expected_code,num_changes", + "input_code,expected_code,expected_diff_per_change,num_changes", [ ( """ @@ -111,6 +111,7 @@ class TestThreadingNameResolution(BaseCodemodTest): with lock_1: ... """, + [], 1, ), ( @@ -127,6 +128,7 @@ class TestThreadingNameResolution(BaseCodemodTest): with lock_1: ... """, + [], 1, ), ( @@ -147,6 +149,7 @@ def f(l): with lock_2: return [lock_1 for lock_1 in l] """, + [], 1, ), ( @@ -173,25 +176,49 @@ def f(l): with lock_2: print() """, + [ + """\ +--- ++++ +@@ -1,6 +1,7 @@ + + import threading +-with threading.Lock(): ++lock = threading.Lock() ++with lock: + int("1") + with threading.Lock(): + print() +""", + """\ +--- ++++ +@@ -2,7 +2,8 @@ + import threading + with threading.Lock(): + int("1") +-with threading.Lock(): ++lock = threading.Lock() ++with lock: + print() + var = 1 + with threading.Lock(): +""", + """\ +--- ++++ +@@ -5,5 +5,6 @@ + with threading.Lock(): + print() + var = 1 +-with threading.Lock(): ++lock = threading.Lock() ++with lock: + print() +""", + ], 3, ), - ( - """ - import threading - with threading.Lock(): - with threading.Lock(): - print() - """, - """ - import threading - lock_1 = threading.Lock() - with lock_1: - lock = threading.Lock() - with lock: - print() - """, - 2, - ), ( """ import threading @@ -210,9 +237,18 @@ def my_func(): with lock_1: foo() """, + [], 1, ), ], ) - def test_name_resolution(self, tmpdir, input_code, expected_code, num_changes): - self.run_and_assert(tmpdir, input_code, expected_code, num_changes=num_changes) + def test_name_resolution( + self, tmpdir, input_code, expected_code, expected_diff_per_change, num_changes + ): + self.run_and_assert( + tmpdir, + input_code, + expected_code, + expected_diff_per_change, + num_changes=num_changes, + )