From d161b95bcea911902f2a6e6c9336249baf228e66 Mon Sep 17 00:00:00 2001 From: NucleonGodX Date: Thu, 12 Jun 2025 23:07:23 +0530 Subject: [PATCH 1/5] initial addition of license clarity logic Signed-off-by: NucleonGodX --- scanpipe/pipes/license_clarity_compliance.py | 161 +++++++++++++++++ scanpipe/tests/pipes/test_license_clarity.py | 175 +++++++++++++++++++ 2 files changed, 336 insertions(+) create mode 100644 scanpipe/pipes/license_clarity_compliance.py create mode 100644 scanpipe/tests/pipes/test_license_clarity.py diff --git a/scanpipe/pipes/license_clarity_compliance.py b/scanpipe/pipes/license_clarity_compliance.py new file mode 100644 index 0000000000..cd846af3be --- /dev/null +++ b/scanpipe/pipes/license_clarity_compliance.py @@ -0,0 +1,161 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +""" +License Clarity Thresholds Management + +This module provides an independent mechanism to read, validate, and evaluate +license clarity score thresholds from policy files. Unlike license policies +which are applied during scan processing, clarity thresholds are evaluated +post-scan during summary generation. + +The clarity thresholds system uses a simple key-value mapping where: +- Keys are integer threshold values (minimum scores) +- Values are compliance alert levels ('ok', 'warning', 'error') + +Example policies.yml structure: + +license_clarity_thresholds: +80: ok # Scores >= 80 get 'ok' alert +50: warning # Scores 50-79 get 'warning' alert + +""" + +from pathlib import Path + +from django.core.exceptions import ValidationError + +import saneyaml + + +class ClarityThresholdsPolicy: + """ + Manages clarity score thresholds and compliance evaulation. + + This class reads clarity thresholds from a dictionary, validates them + against threshold configurations and determines compliance alets based on + clarity scores. + """ + + def __init__(self, threshold_dict): + """Initialize with validated threshold dictionary.""" + self.thresholds = self.validate_thresholds(threshold_dict) + + @staticmethod + def validate_thresholds(threshold_dict): + if not isinstance(threshold_dict, dict): + raise ValidationError( + "The `license_clarity_thresholds` must be a dictionary" + ) + validated = {} + seen = set() + for key, value in threshold_dict.items(): + try: + threshold = int(key) + except (ValueError, TypeError): + raise ValidationError(f"Threshold keys must be integers, got: {key}") + if threshold in seen: + raise ValidationError(f"Duplicate threshold key: {threshold}") + seen.add(threshold) + if value not in ["ok", "warning", "error"]: + raise ValidationError( + f"Compliance alert must be one of 'ok', 'warning', 'error', " + f"got: {value}" + ) + validated[threshold] = value + sorted_keys = sorted(validated.keys(), reverse=True) + if list(validated.keys()) != sorted_keys: + raise ValidationError("Thresholds must be strictly descending") + return validated + + def get_alert_for_score(self, score): + """ + Determine compliance alert level for a given clarity score + + Returns: + str: Compliance alert level ('ok', 'warning', 'error') + + """ + if score is None: + return "error" + + # Find the highest threshold that the score meets or exceeds + applicable_thresholds = [t for t in self.thresholds if score >= t] + if not applicable_thresholds: + return "error" + + max_threshold = max(applicable_thresholds) + return self.thresholds[max_threshold] + + def get_thresholds_summary(self): + """ + Get a summary of configured thresholds for reporting + + Returns: + dict: Summary of thresholds and their alert levels + + """ + return dict(sorted(self.thresholds.items()), reverse=True) + + +def load_clarity_thresholds_from_yaml(yaml_content): + """ + Load clarity thresholds from YAML content. + + Returns: + ClarityThresholdsPolicy: Configured policy object + + """ + try: + data = saneyaml.load(yaml_content) + except saneyaml.YAMLError as e: + raise ValidationError(f"YAML format error: {e}") + + if not isinstance(data, dict): + raise ValidationError("YAML content must be a dictionary.") + + if "license_clarity_thresholds" not in data: + raise ValidationError( + "Missing 'license_clarity_thresholds' key in policies file." + ) + + return ClarityThresholdsPolicy(data["license_clarity_thresholds"]) + + +def load_clarity_thresholds_from_file(file_path): + """ + Load clarity thresholds from a YAML file. + + Returns: + ClarityThresholdsPolicy: Configured policy object or None if file not found + + """ + file_path = Path(file_path) + + if not file_path.exists(): + return None + + try: + yaml_content = file_path.read_text(encoding="utf-8") + return load_clarity_thresholds_from_yaml(yaml_content) + except (OSError, UnicodeDecodeError) as e: + raise ValidationError(f"Error reading file {file_path}: {e}") diff --git a/scanpipe/tests/pipes/test_license_clarity.py b/scanpipe/tests/pipes/test_license_clarity.py new file mode 100644 index 0000000000..af1aa50261 --- /dev/null +++ b/scanpipe/tests/pipes/test_license_clarity.py @@ -0,0 +1,175 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +import tempfile +from pathlib import Path + +from django.core.exceptions import ValidationError +from django.test import TestCase + +from scanpipe.pipes.license_clarity_compliance import ClarityThresholdsPolicy +from scanpipe.pipes.license_clarity_compliance import load_clarity_thresholds_from_file +from scanpipe.pipes.license_clarity_compliance import load_clarity_thresholds_from_yaml + + +class ClarityThresholdsPolicyTest(TestCase): + """Test ClarityThresholdsPolicy class functionality.""" + + def test_valid_thresholds_initialization(self): + thresholds = {80: "ok", 50: "warning", 20: "error"} + policy = ClarityThresholdsPolicy(thresholds) + self.assertEqual(policy.thresholds, thresholds) + + def test_string_keys_converted_to_integers(self): + thresholds = {"80": "ok", "50": "warning"} + policy = ClarityThresholdsPolicy(thresholds) + expected = {80: "ok", 50: "warning"} + self.assertEqual(policy.thresholds, expected) + + def test_invalid_threshold_key_raises_error(self): + with self.assertRaises(ValidationError) as cm: + ClarityThresholdsPolicy({"invalid": "ok"}) + self.assertIn("must be integers", str(cm.exception)) + + def test_invalid_alert_value_raises_error(self): + with self.assertRaises(ValidationError) as cm: + ClarityThresholdsPolicy({80: "invalid"}) + self.assertIn("must be one of 'ok', 'warning', 'error'", str(cm.exception)) + + def test_non_dict_input_raises_error(self): + with self.assertRaises(ValidationError) as cm: + ClarityThresholdsPolicy([80, 50]) + self.assertIn("must be a dictionary", str(cm.exception)) + + def test_duplicate_threshold_keys_raise_error(self): + with self.assertRaises(ValidationError) as cm: + ClarityThresholdsPolicy({80: "ok", "80": "warning"}) + self.assertIn("Duplicate threshold key", str(cm.exception)) + + def test_overlapping_thresholds_wrong_order(self): + with self.assertRaises(ValidationError) as cm: + ClarityThresholdsPolicy({70: "ok", 80: "warning"}) + self.assertIn("Thresholds must be strictly descending", str(cm.exception)) + + def test_float_threshold_keys(self): + thresholds = {80.5: "ok", 50.9: "warning"} + policy = ClarityThresholdsPolicy(thresholds) + expected = {80: "ok", 50: "warning"} + self.assertEqual(policy.thresholds, expected) + + def test_negative_threshold_values(self): + thresholds = {50: "ok", 0: "warning", -10: "error"} + policy = ClarityThresholdsPolicy(thresholds) + self.assertEqual(policy.get_alert_for_score(60), "ok") + self.assertEqual(policy.get_alert_for_score(25), "warning") + self.assertEqual(policy.get_alert_for_score(-5), "error") + self.assertEqual(policy.get_alert_for_score(-20), "error") + + def test_empty_thresholds_dict(self): + policy = ClarityThresholdsPolicy({}) + self.assertEqual(policy.get_alert_for_score(100), "error") + self.assertEqual(policy.get_alert_for_score(50), "error") + self.assertEqual(policy.get_alert_for_score(0), "error") + self.assertEqual(policy.get_alert_for_score(None), "error") + + def test_very_high_threshold_values(self): + thresholds = {150: "ok", 100: "warning"} + policy = ClarityThresholdsPolicy(thresholds) + self.assertEqual(policy.get_alert_for_score(100), "warning") + self.assertEqual(policy.get_alert_for_score(90), "error") + self.assertEqual(policy.get_alert_for_score(50), "error") + self.assertEqual(policy.get_alert_for_score(99), "error") + + # Policy logic via YAML string (mock policies.yml content) + def test_yaml_string_ok_and_warning(self): + yaml_content = """ +license_clarity_thresholds: + 90: ok + 30: warning +""" + policy = load_clarity_thresholds_from_yaml(yaml_content) + self.assertEqual(policy.get_alert_for_score(95), "ok") + self.assertEqual(policy.get_alert_for_score(60), "warning") + self.assertEqual(policy.get_alert_for_score(20), "error") + + def test_yaml_string_single_threshold(self): + yaml_content = """ +license_clarity_thresholds: + 80: ok +""" + policy = load_clarity_thresholds_from_yaml(yaml_content) + self.assertEqual(policy.get_alert_for_score(90), "ok") + self.assertEqual(policy.get_alert_for_score(79), "error") + + def test_yaml_string_invalid_alert(self): + yaml_content = """ +license_clarity_thresholds: + 80: great +""" + with self.assertRaises(ValidationError): + load_clarity_thresholds_from_yaml(yaml_content) + + def test_yaml_string_invalid_key(self): + yaml_content = """ +license_clarity_thresholds: + eighty: ok +""" + with self.assertRaises(ValidationError): + load_clarity_thresholds_from_yaml(yaml_content) + + def test_yaml_string_missing_key(self): + yaml_content = """ +license_policies: + - license_key: mit +""" + with self.assertRaises(ValidationError): + load_clarity_thresholds_from_yaml(yaml_content) + + def test_yaml_string_invalid_yaml(self): + yaml_content = "license_clarity_thresholds: [80, 50" + with self.assertRaises(ValidationError): + load_clarity_thresholds_from_yaml(yaml_content) + + +class ClarityThresholdsFileLoadingTest(TestCase): + """Test file loading functionality.""" + + def test_load_from_existing_file(self): + yaml_content = """ +license_clarity_thresholds: + 90: ok + 70: warning +""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as f: + f.write(yaml_content) + temp_path = f.name + + try: + policy = load_clarity_thresholds_from_file(temp_path) + self.assertIsNotNone(policy) + self.assertEqual(policy.get_alert_for_score(95), "ok") + finally: + Path(temp_path).unlink() + + def test_load_from_nonexistent_file(self): + policy = load_clarity_thresholds_from_file("/nonexistent/file.yml") + self.assertIsNone(policy) From 54e5db24ea2033db2db9c2b60b12c012f0eb2b32 Mon Sep 17 00:00:00 2001 From: NucleonGodX Date: Fri, 20 Jun 2025 23:42:29 +0530 Subject: [PATCH 2/5] integrate clarity compliance Signed-off-by: NucleonGodX --- scanpipe/apps.py | 4 +- scanpipe/forms.py | 4 +- scanpipe/{policies.py => license_policies.py} | 13 +- scanpipe/models.py | 12 +- scanpipe/pipes/license_clarity.py | 166 ++++++++++++++++++ scanpipe/tests/pipes/test_license_clarity.py | 32 +--- ...t_policies.py => test_license_policies.py} | 8 +- 7 files changed, 193 insertions(+), 46 deletions(-) rename scanpipe/{policies.py => license_policies.py} (91%) create mode 100644 scanpipe/pipes/license_clarity.py rename scanpipe/tests/{test_policies.py => test_license_policies.py} (96%) diff --git a/scanpipe/apps.py b/scanpipe/apps.py index b679fe0d9d..104c37e250 100644 --- a/scanpipe/apps.py +++ b/scanpipe/apps.py @@ -37,8 +37,8 @@ from licensedcode.models import load_licenses -from scanpipe.policies import load_policies_file -from scanpipe.policies import make_license_policy_index +from scanpipe.license_policies import load_policies_file +from scanpipe.license_policies import make_license_policy_index try: from importlib import metadata as importlib_metadata diff --git a/scanpipe/forms.py b/scanpipe/forms.py index 30f472c20c..67a239546f 100644 --- a/scanpipe/forms.py +++ b/scanpipe/forms.py @@ -29,13 +29,13 @@ from taggit.forms import TagField from taggit.forms import TagWidget +from scanpipe.license_policies import load_policies_yaml +from scanpipe.license_policies import validate_policies from scanpipe.models import Project from scanpipe.models import Run from scanpipe.models import WebhookSubscription from scanpipe.pipelines import convert_markdown_to_html from scanpipe.pipes import fetch -from scanpipe.policies import load_policies_yaml -from scanpipe.policies import validate_policies scanpipe_app = apps.get_app_config("scanpipe") diff --git a/scanpipe/policies.py b/scanpipe/license_policies.py similarity index 91% rename from scanpipe/policies.py rename to scanpipe/license_policies.py index d0ea94e5c3..3967ad0ab6 100644 --- a/scanpipe/policies.py +++ b/scanpipe/license_policies.py @@ -25,14 +25,21 @@ import saneyaml -def load_policies_yaml(policies_yaml): - """Load provided ``policies_yaml``.""" +def load_yaml_content(yaml_content): + """Load and parse YAML content into a Python dictionary.""" try: - return saneyaml.load(policies_yaml) + return saneyaml.load(yaml_content) except saneyaml.YAMLError as e: raise ValidationError(f"Policies file format error: {e}") +def load_policies_yaml(policies_yaml): + """Load provided ``policies_yaml``.""" + data = load_yaml_content(policies_yaml) + validate_policies(data) + return data + + def load_policies_file(policies_file, validate=True): """ Load provided ``policies_file`` into a Python dictionary. diff --git a/scanpipe/models.py b/scanpipe/models.py index 93e31448f0..258d063ef3 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -94,7 +94,7 @@ import scancodeio from scanpipe import humanize_time -from scanpipe import policies +from scanpipe import license_policies from scanpipe import tasks logger = logging.getLogger(__name__) @@ -1508,12 +1508,14 @@ def get_policy_index(self): if policies_from_settings := self.get_env("policies"): policies_dict = policies_from_settings if isinstance(policies_from_settings, str): - policies_dict = policies.load_policies_yaml(policies_from_settings) - return policies.make_license_policy_index(policies_dict) + policies_dict = license_policies.load_policies_yaml( + policies_from_settings + ) + return license_policies.make_license_policy_index(policies_dict) elif policies_file := self.get_input_policies_file(): - policies_dict = policies.load_policies_file(policies_file) - return policies.make_license_policy_index(policies_dict) + policies_dict = license_policies.load_policies_file(policies_file) + return license_policies.make_license_policy_index(policies_dict) else: return scanpipe_app.license_policies_index diff --git a/scanpipe/pipes/license_clarity.py b/scanpipe/pipes/license_clarity.py new file mode 100644 index 0000000000..44e70b1310 --- /dev/null +++ b/scanpipe/pipes/license_clarity.py @@ -0,0 +1,166 @@ +# SPDX-License-Identifier: Apache-2.0 +# +# http://nexb.com and https://github.com/nexB/scancode.io +# The ScanCode.io software is licensed under the Apache License version 2.0. +# Data generated with ScanCode.io is provided as-is without warranties. +# ScanCode is a trademark of nexB Inc. +# +# You may not use this software except in compliance with the License. +# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, software distributed +# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +# CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# +# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES +# OR CONDITIONS OF ANY KIND, either express or implied. No content created from +# ScanCode.io should be considered or used as legal advice. Consult an Attorney +# for any legal advice. +# +# ScanCode.io is a free software code scanning tool from nexB Inc. and others. +# Visit https://github.com/nexB/scancode.io for support and download. + +# clarity_thresholds.py (updated) +""" +License Clarity Thresholds Management + +This module provides an independent mechanism to read, validate, and evaluate +license clarity score thresholds from policy files. Unlike license policies +which are applied during scan processing, clarity thresholds are evaluated +post-scan during summary generation. + +The clarity thresholds system uses a simple key-value mapping where: +- Keys are integer threshold values (minimum scores) +- Values are compliance alert levels ('ok', 'warning', 'error') + +Example policies.yml structure: + +license_clarity_thresholds: + 80: ok # Scores >= 80 get 'ok' alert + 50: warning # Scores 50-79 get 'warning' alert +""" + +from django.core.exceptions import ValidationError + +import saneyaml + + +def load_yaml_content(yaml_content): + """Load and parse YAML content into a Python dictionary.""" + try: + return saneyaml.load(yaml_content) + except saneyaml.YAMLError as e: + raise ValidationError(f"Policies file format error: {e}") + + +class ClarityThresholdsPolicy: + """ + Manages clarity score thresholds and compliance evaluation. + + This class reads clarity thresholds from a dictionary, validates them + against threshold configurations and determines compliance alerts based on + clarity scores. + """ + + def __init__(self, threshold_dict): + """Initialize with validated threshold dictionary.""" + self.thresholds = self.validate_thresholds(threshold_dict) + + @staticmethod + def validate_thresholds(threshold_dict): + if not isinstance(threshold_dict, dict): + raise ValidationError( + "The `license_clarity_thresholds` must be a dictionary" + ) + validated = {} + seen = set() + for key, value in threshold_dict.items(): + try: + threshold = int(key) + except (ValueError, TypeError): + raise ValidationError(f"Threshold keys must be integers, got: {key}") + if threshold in seen: + raise ValidationError(f"Duplicate threshold key: {threshold}") + seen.add(threshold) + if value not in ["ok", "warning", "error"]: + raise ValidationError( + f"Compliance alert must be one of 'ok', 'warning', 'error', " + f"got: {value}" + ) + validated[threshold] = value + sorted_keys = sorted(validated.keys(), reverse=True) + if list(validated.keys()) != sorted_keys: + raise ValidationError("Thresholds must be strictly descending") + return validated + + def get_alert_for_score(self, score): + """ + Determine compliance alert level for a given clarity score + + Returns: + str: Compliance alert level ('ok', 'warning', 'error') + + """ + if score is None: + return "error" + + # Find the highest threshold that the score meets or exceeds + applicable_thresholds = [t for t in self.thresholds if score >= t] + if not applicable_thresholds: + return "error" + + max_threshold = max(applicable_thresholds) + return self.thresholds[max_threshold] + + def get_thresholds_summary(self): + """ + Get a summary of configured thresholds for reporting + + Returns: + dict: Summary of thresholds and their alert levels + + """ + return dict(sorted(self.thresholds.items(), reverse=True)) + + +def load_clarity_thresholds_from_yaml(yaml_content): + """ + Load clarity thresholds from YAML content. + + Returns: + ClarityThresholdsPolicy: Configured policy object + + """ + data = load_yaml_content(yaml_content) + + if not isinstance(data, dict): + raise ValidationError("YAML content must be a dictionary.") + + if "license_clarity_thresholds" not in data: + raise ValidationError( + "Missing 'license_clarity_thresholds' key in policies file." + ) + + return ClarityThresholdsPolicy(data["license_clarity_thresholds"]) + + +def load_clarity_thresholds_from_file(file_path): + """ + Load clarity thresholds from a YAML file. + + Returns: + ClarityThresholdsPolicy: Configured policy object or None if file not found + + """ + from pathlib import Path + + file_path = Path(file_path) + + if not file_path.exists(): + return None + + try: + yaml_content = file_path.read_text(encoding="utf-8") + return load_clarity_thresholds_from_yaml(yaml_content) + except (OSError, UnicodeDecodeError) as e: + raise ValidationError(f"Error reading file {file_path}: {e}") diff --git a/scanpipe/tests/pipes/test_license_clarity.py b/scanpipe/tests/pipes/test_license_clarity.py index af1aa50261..506b7e72ce 100644 --- a/scanpipe/tests/pipes/test_license_clarity.py +++ b/scanpipe/tests/pipes/test_license_clarity.py @@ -20,15 +20,12 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. -import tempfile -from pathlib import Path from django.core.exceptions import ValidationError from django.test import TestCase -from scanpipe.pipes.license_clarity_compliance import ClarityThresholdsPolicy -from scanpipe.pipes.license_clarity_compliance import load_clarity_thresholds_from_file -from scanpipe.pipes.license_clarity_compliance import load_clarity_thresholds_from_yaml +from scanpipe.pipes.license_clarity import ClarityThresholdsPolicy +from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_yaml class ClarityThresholdsPolicyTest(TestCase): @@ -148,28 +145,3 @@ def test_yaml_string_invalid_yaml(self): yaml_content = "license_clarity_thresholds: [80, 50" with self.assertRaises(ValidationError): load_clarity_thresholds_from_yaml(yaml_content) - - -class ClarityThresholdsFileLoadingTest(TestCase): - """Test file loading functionality.""" - - def test_load_from_existing_file(self): - yaml_content = """ -license_clarity_thresholds: - 90: ok - 70: warning -""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as f: - f.write(yaml_content) - temp_path = f.name - - try: - policy = load_clarity_thresholds_from_file(temp_path) - self.assertIsNotNone(policy) - self.assertEqual(policy.get_alert_for_score(95), "ok") - finally: - Path(temp_path).unlink() - - def test_load_from_nonexistent_file(self): - policy = load_clarity_thresholds_from_file("/nonexistent/file.yml") - self.assertIsNone(policy) diff --git a/scanpipe/tests/test_policies.py b/scanpipe/tests/test_license_policies.py similarity index 96% rename from scanpipe/tests/test_policies.py rename to scanpipe/tests/test_license_policies.py index 9dfed0d688..fe7c1fbd6e 100644 --- a/scanpipe/tests/test_policies.py +++ b/scanpipe/tests/test_license_policies.py @@ -27,11 +27,11 @@ from django.core.exceptions import ValidationError from django.test import TestCase +from scanpipe.license_policies import load_policies_file +from scanpipe.license_policies import load_policies_yaml +from scanpipe.license_policies import make_license_policy_index +from scanpipe.license_policies import validate_policies from scanpipe.pipes.input import copy_input -from scanpipe.policies import load_policies_file -from scanpipe.policies import load_policies_yaml -from scanpipe.policies import make_license_policy_index -from scanpipe.policies import validate_policies from scanpipe.tests import global_policies from scanpipe.tests import license_policies_index from scanpipe.tests import make_project From 11a91167af78601acf299d4c17d0f5dbfa4e65cc Mon Sep 17 00:00:00 2001 From: NucleonGodX Date: Sat, 21 Jun 2025 00:25:30 +0530 Subject: [PATCH 3/5] testing extend for clarity-score Signed-off-by: NucleonGodX --- scanpipe/pipes/license_clarity_compliance.py | 161 ------------------- scanpipe/tests/pipes/test_license_clarity.py | 25 ++- 2 files changed, 24 insertions(+), 162 deletions(-) delete mode 100644 scanpipe/pipes/license_clarity_compliance.py diff --git a/scanpipe/pipes/license_clarity_compliance.py b/scanpipe/pipes/license_clarity_compliance.py deleted file mode 100644 index cd846af3be..0000000000 --- a/scanpipe/pipes/license_clarity_compliance.py +++ /dev/null @@ -1,161 +0,0 @@ -# SPDX-License-Identifier: Apache-2.0 -# -# http://nexb.com and https://github.com/nexB/scancode.io -# The ScanCode.io software is licensed under the Apache License version 2.0. -# Data generated with ScanCode.io is provided as-is without warranties. -# ScanCode is a trademark of nexB Inc. -# -# You may not use this software except in compliance with the License. -# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0 -# Unless required by applicable law or agreed to in writing, software distributed -# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. -# -# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES -# OR CONDITIONS OF ANY KIND, either express or implied. No content created from -# ScanCode.io should be considered or used as legal advice. Consult an Attorney -# for any legal advice. -# -# ScanCode.io is a free software code scanning tool from nexB Inc. and others. -# Visit https://github.com/nexB/scancode.io for support and download. - -""" -License Clarity Thresholds Management - -This module provides an independent mechanism to read, validate, and evaluate -license clarity score thresholds from policy files. Unlike license policies -which are applied during scan processing, clarity thresholds are evaluated -post-scan during summary generation. - -The clarity thresholds system uses a simple key-value mapping where: -- Keys are integer threshold values (minimum scores) -- Values are compliance alert levels ('ok', 'warning', 'error') - -Example policies.yml structure: - -license_clarity_thresholds: -80: ok # Scores >= 80 get 'ok' alert -50: warning # Scores 50-79 get 'warning' alert - -""" - -from pathlib import Path - -from django.core.exceptions import ValidationError - -import saneyaml - - -class ClarityThresholdsPolicy: - """ - Manages clarity score thresholds and compliance evaulation. - - This class reads clarity thresholds from a dictionary, validates them - against threshold configurations and determines compliance alets based on - clarity scores. - """ - - def __init__(self, threshold_dict): - """Initialize with validated threshold dictionary.""" - self.thresholds = self.validate_thresholds(threshold_dict) - - @staticmethod - def validate_thresholds(threshold_dict): - if not isinstance(threshold_dict, dict): - raise ValidationError( - "The `license_clarity_thresholds` must be a dictionary" - ) - validated = {} - seen = set() - for key, value in threshold_dict.items(): - try: - threshold = int(key) - except (ValueError, TypeError): - raise ValidationError(f"Threshold keys must be integers, got: {key}") - if threshold in seen: - raise ValidationError(f"Duplicate threshold key: {threshold}") - seen.add(threshold) - if value not in ["ok", "warning", "error"]: - raise ValidationError( - f"Compliance alert must be one of 'ok', 'warning', 'error', " - f"got: {value}" - ) - validated[threshold] = value - sorted_keys = sorted(validated.keys(), reverse=True) - if list(validated.keys()) != sorted_keys: - raise ValidationError("Thresholds must be strictly descending") - return validated - - def get_alert_for_score(self, score): - """ - Determine compliance alert level for a given clarity score - - Returns: - str: Compliance alert level ('ok', 'warning', 'error') - - """ - if score is None: - return "error" - - # Find the highest threshold that the score meets or exceeds - applicable_thresholds = [t for t in self.thresholds if score >= t] - if not applicable_thresholds: - return "error" - - max_threshold = max(applicable_thresholds) - return self.thresholds[max_threshold] - - def get_thresholds_summary(self): - """ - Get a summary of configured thresholds for reporting - - Returns: - dict: Summary of thresholds and their alert levels - - """ - return dict(sorted(self.thresholds.items()), reverse=True) - - -def load_clarity_thresholds_from_yaml(yaml_content): - """ - Load clarity thresholds from YAML content. - - Returns: - ClarityThresholdsPolicy: Configured policy object - - """ - try: - data = saneyaml.load(yaml_content) - except saneyaml.YAMLError as e: - raise ValidationError(f"YAML format error: {e}") - - if not isinstance(data, dict): - raise ValidationError("YAML content must be a dictionary.") - - if "license_clarity_thresholds" not in data: - raise ValidationError( - "Missing 'license_clarity_thresholds' key in policies file." - ) - - return ClarityThresholdsPolicy(data["license_clarity_thresholds"]) - - -def load_clarity_thresholds_from_file(file_path): - """ - Load clarity thresholds from a YAML file. - - Returns: - ClarityThresholdsPolicy: Configured policy object or None if file not found - - """ - file_path = Path(file_path) - - if not file_path.exists(): - return None - - try: - yaml_content = file_path.read_text(encoding="utf-8") - return load_clarity_thresholds_from_yaml(yaml_content) - except (OSError, UnicodeDecodeError) as e: - raise ValidationError(f"Error reading file {file_path}: {e}") diff --git a/scanpipe/tests/pipes/test_license_clarity.py b/scanpipe/tests/pipes/test_license_clarity.py index 506b7e72ce..cfc0f38fce 100644 --- a/scanpipe/tests/pipes/test_license_clarity.py +++ b/scanpipe/tests/pipes/test_license_clarity.py @@ -20,13 +20,15 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. +import tempfile +from pathlib import Path from django.core.exceptions import ValidationError from django.test import TestCase from scanpipe.pipes.license_clarity import ClarityThresholdsPolicy from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_yaml - +from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_file class ClarityThresholdsPolicyTest(TestCase): """Test ClarityThresholdsPolicy class functionality.""" @@ -145,3 +147,24 @@ def test_yaml_string_invalid_yaml(self): yaml_content = "license_clarity_thresholds: [80, 50" with self.assertRaises(ValidationError): load_clarity_thresholds_from_yaml(yaml_content) + + def test_load_from_existing_file(self): + yaml_content = """ +license_clarity_thresholds: + 90: ok + 70: warning +""" + with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as f: + f.write(yaml_content) + temp_path = f.name + + try: + policy = load_clarity_thresholds_from_file(temp_path) + self.assertIsNotNone(policy) + self.assertEqual(policy.get_alert_for_score(95), "ok") + finally: + Path(temp_path).unlink() + + def test_load_from_nonexistent_file(self): + policy = load_clarity_thresholds_from_file("/nonexistent/file.yml") + self.assertIsNone(policy) \ No newline at end of file From 5494cb09c2bdeedee7820263319e2e5734c36b16 Mon Sep 17 00:00:00 2001 From: NucleonGodX Date: Mon, 23 Jun 2025 19:17:47 +0530 Subject: [PATCH 4/5] test file added in data and suggestions applied Signed-off-by: NucleonGodX --- scanpipe/apps.py | 4 +-- scanpipe/forms.py | 4 +-- scanpipe/models.py | 12 ++++---- scanpipe/pipes/license_clarity.py | 5 ++-- scanpipe/{license_policies.py => policies.py} | 13 ++------- .../license_clarity/sample_thresholds.yml | 4 +++ scanpipe/tests/pipes/test_license_clarity.py | 28 +++++++------------ scanpipe/tests/test_license_policies.py | 8 +++--- 8 files changed, 32 insertions(+), 46 deletions(-) rename scanpipe/{license_policies.py => policies.py} (91%) create mode 100644 scanpipe/tests/data/license_clarity/sample_thresholds.yml diff --git a/scanpipe/apps.py b/scanpipe/apps.py index 104c37e250..b679fe0d9d 100644 --- a/scanpipe/apps.py +++ b/scanpipe/apps.py @@ -37,8 +37,8 @@ from licensedcode.models import load_licenses -from scanpipe.license_policies import load_policies_file -from scanpipe.license_policies import make_license_policy_index +from scanpipe.policies import load_policies_file +from scanpipe.policies import make_license_policy_index try: from importlib import metadata as importlib_metadata diff --git a/scanpipe/forms.py b/scanpipe/forms.py index 67a239546f..30f472c20c 100644 --- a/scanpipe/forms.py +++ b/scanpipe/forms.py @@ -29,13 +29,13 @@ from taggit.forms import TagField from taggit.forms import TagWidget -from scanpipe.license_policies import load_policies_yaml -from scanpipe.license_policies import validate_policies from scanpipe.models import Project from scanpipe.models import Run from scanpipe.models import WebhookSubscription from scanpipe.pipelines import convert_markdown_to_html from scanpipe.pipes import fetch +from scanpipe.policies import load_policies_yaml +from scanpipe.policies import validate_policies scanpipe_app = apps.get_app_config("scanpipe") diff --git a/scanpipe/models.py b/scanpipe/models.py index 258d063ef3..93e31448f0 100644 --- a/scanpipe/models.py +++ b/scanpipe/models.py @@ -94,7 +94,7 @@ import scancodeio from scanpipe import humanize_time -from scanpipe import license_policies +from scanpipe import policies from scanpipe import tasks logger = logging.getLogger(__name__) @@ -1508,14 +1508,12 @@ def get_policy_index(self): if policies_from_settings := self.get_env("policies"): policies_dict = policies_from_settings if isinstance(policies_from_settings, str): - policies_dict = license_policies.load_policies_yaml( - policies_from_settings - ) - return license_policies.make_license_policy_index(policies_dict) + policies_dict = policies.load_policies_yaml(policies_from_settings) + return policies.make_license_policy_index(policies_dict) elif policies_file := self.get_input_policies_file(): - policies_dict = license_policies.load_policies_file(policies_file) - return license_policies.make_license_policy_index(policies_dict) + policies_dict = policies.load_policies_file(policies_file) + return policies.make_license_policy_index(policies_dict) else: return scanpipe_app.license_policies_index diff --git a/scanpipe/pipes/license_clarity.py b/scanpipe/pipes/license_clarity.py index 44e70b1310..0f230976dc 100644 --- a/scanpipe/pipes/license_clarity.py +++ b/scanpipe/pipes/license_clarity.py @@ -20,7 +20,6 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. -# clarity_thresholds.py (updated) """ License Clarity Thresholds Management @@ -40,6 +39,8 @@ 50: warning # Scores 50-79 get 'warning' alert """ +from pathlib import Path + from django.core.exceptions import ValidationError import saneyaml @@ -152,8 +153,6 @@ def load_clarity_thresholds_from_file(file_path): ClarityThresholdsPolicy: Configured policy object or None if file not found """ - from pathlib import Path - file_path = Path(file_path) if not file_path.exists(): diff --git a/scanpipe/license_policies.py b/scanpipe/policies.py similarity index 91% rename from scanpipe/license_policies.py rename to scanpipe/policies.py index 3967ad0ab6..d0ea94e5c3 100644 --- a/scanpipe/license_policies.py +++ b/scanpipe/policies.py @@ -25,21 +25,14 @@ import saneyaml -def load_yaml_content(yaml_content): - """Load and parse YAML content into a Python dictionary.""" +def load_policies_yaml(policies_yaml): + """Load provided ``policies_yaml``.""" try: - return saneyaml.load(yaml_content) + return saneyaml.load(policies_yaml) except saneyaml.YAMLError as e: raise ValidationError(f"Policies file format error: {e}") -def load_policies_yaml(policies_yaml): - """Load provided ``policies_yaml``.""" - data = load_yaml_content(policies_yaml) - validate_policies(data) - return data - - def load_policies_file(policies_file, validate=True): """ Load provided ``policies_file`` into a Python dictionary. diff --git a/scanpipe/tests/data/license_clarity/sample_thresholds.yml b/scanpipe/tests/data/license_clarity/sample_thresholds.yml new file mode 100644 index 0000000000..9754ade331 --- /dev/null +++ b/scanpipe/tests/data/license_clarity/sample_thresholds.yml @@ -0,0 +1,4 @@ +license_clarity_thresholds: + 90: ok + 70: warning + 40: error diff --git a/scanpipe/tests/pipes/test_license_clarity.py b/scanpipe/tests/pipes/test_license_clarity.py index cfc0f38fce..f00723007b 100644 --- a/scanpipe/tests/pipes/test_license_clarity.py +++ b/scanpipe/tests/pipes/test_license_clarity.py @@ -20,17 +20,18 @@ # ScanCode.io is a free software code scanning tool from nexB Inc. and others. # Visit https://github.com/nexB/scancode.io for support and download. -import tempfile from pathlib import Path from django.core.exceptions import ValidationError from django.test import TestCase from scanpipe.pipes.license_clarity import ClarityThresholdsPolicy -from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_yaml from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_file +from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_yaml + class ClarityThresholdsPolicyTest(TestCase): + data = Path(__file__).parent.parent / "data" """Test ClarityThresholdsPolicy class functionality.""" def test_valid_thresholds_initialization(self): @@ -149,22 +150,13 @@ def test_yaml_string_invalid_yaml(self): load_clarity_thresholds_from_yaml(yaml_content) def test_load_from_existing_file(self): - yaml_content = """ -license_clarity_thresholds: - 90: ok - 70: warning -""" - with tempfile.NamedTemporaryFile(mode="w", suffix=".yml", delete=False) as f: - f.write(yaml_content) - temp_path = f.name - - try: - policy = load_clarity_thresholds_from_file(temp_path) - self.assertIsNotNone(policy) - self.assertEqual(policy.get_alert_for_score(95), "ok") - finally: - Path(temp_path).unlink() + test_file = self.data / "license_clarity" / "sample_thresholds.yml" + policy = load_clarity_thresholds_from_file(test_file) + self.assertIsNotNone(policy) + self.assertEqual(policy.get_alert_for_score(95), "ok") + self.assertEqual(policy.get_alert_for_score(75), "warning") + self.assertEqual(policy.get_alert_for_score(50), "error") def test_load_from_nonexistent_file(self): policy = load_clarity_thresholds_from_file("/nonexistent/file.yml") - self.assertIsNone(policy) \ No newline at end of file + self.assertIsNone(policy) diff --git a/scanpipe/tests/test_license_policies.py b/scanpipe/tests/test_license_policies.py index fe7c1fbd6e..9dfed0d688 100644 --- a/scanpipe/tests/test_license_policies.py +++ b/scanpipe/tests/test_license_policies.py @@ -27,11 +27,11 @@ from django.core.exceptions import ValidationError from django.test import TestCase -from scanpipe.license_policies import load_policies_file -from scanpipe.license_policies import load_policies_yaml -from scanpipe.license_policies import make_license_policy_index -from scanpipe.license_policies import validate_policies from scanpipe.pipes.input import copy_input +from scanpipe.policies import load_policies_file +from scanpipe.policies import load_policies_yaml +from scanpipe.policies import make_license_policy_index +from scanpipe.policies import validate_policies from scanpipe.tests import global_policies from scanpipe.tests import license_policies_index from scanpipe.tests import make_project From 9202edb46947e92ca9ac2105eecbc5296b9eeaa2 Mon Sep 17 00:00:00 2001 From: NucleonGodX Date: Mon, 23 Jun 2025 19:22:55 +0530 Subject: [PATCH 5/5] rename changes reverted Signed-off-by: NucleonGodX --- scanpipe/tests/{test_license_policies.py => test_policies.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename scanpipe/tests/{test_license_policies.py => test_policies.py} (100%) diff --git a/scanpipe/tests/test_license_policies.py b/scanpipe/tests/test_policies.py similarity index 100% rename from scanpipe/tests/test_license_policies.py rename to scanpipe/tests/test_policies.py