aboutcode-org · tdruez · Jun 30, 2025 · Jun 12, 2025 · Jun 20, 2025 · Jun 20, 2025
diff --git a/scanpipe/apps.py b/scanpipe/apps.py
@@ -37,8 +37,8 @@
 
 from licensedcode.models import load_licenses
 
-from scanpipe.policies import load_policies_file
-from scanpipe.policies import make_license_policy_index
+from scanpipe.license_policies import load_policies_file
+from scanpipe.license_policies import make_license_policy_index
 
 try:
     from importlib import metadata as importlib_metadata

diff --git a/scanpipe/forms.py b/scanpipe/forms.py
@@ -29,13 +29,13 @@
 from taggit.forms import TagField
 from taggit.forms import TagWidget
 
+from scanpipe.license_policies import load_policies_yaml
+from scanpipe.license_policies import validate_policies
 from scanpipe.models import Project
 from scanpipe.models import Run
 from scanpipe.models import WebhookSubscription
 from scanpipe.pipelines import convert_markdown_to_html
 from scanpipe.pipes import fetch
-from scanpipe.policies import load_policies_yaml
-from scanpipe.policies import validate_policies
 
 scanpipe_app = apps.get_app_config("scanpipe")
 

diff --git a/scanpipe/policies.py → scanpipe/license_policies.py b/scanpipe/policies.py → scanpipe/license_policies.py
@@ -25,14 +25,21 @@
 import saneyaml
 
 
-def load_policies_yaml(policies_yaml):
-    """Load provided ``policies_yaml``."""
+def load_yaml_content(yaml_content):
+    """Load and parse YAML content into a Python dictionary."""
     try:
-        return saneyaml.load(policies_yaml)
+        return saneyaml.load(yaml_content)
     except saneyaml.YAMLError as e:
         raise ValidationError(f"Policies file format error: {e}")
 
 
+def load_policies_yaml(policies_yaml):
+    """Load provided ``policies_yaml``."""
+    data = load_yaml_content(policies_yaml)
+    validate_policies(data)
+    return data
+
+
 def load_policies_file(policies_file, validate=True):
     """
     Load provided ``policies_file`` into a Python dictionary.

diff --git a/scanpipe/models.py b/scanpipe/models.py
@@ -94,7 +94,7 @@
 
 import scancodeio
 from scanpipe import humanize_time
-from scanpipe import policies
+from scanpipe import license_policies
 from scanpipe import tasks
 
 logger = logging.getLogger(__name__)
@@ -1508,12 +1508,14 @@ def get_policy_index(self):
         if policies_from_settings := self.get_env("policies"):
             policies_dict = policies_from_settings
             if isinstance(policies_from_settings, str):
-                policies_dict = policies.load_policies_yaml(policies_from_settings)
-            return policies.make_license_policy_index(policies_dict)
+                policies_dict = license_policies.load_policies_yaml(
+                    policies_from_settings
+                )
+            return license_policies.make_license_policy_index(policies_dict)
 
         elif policies_file := self.get_input_policies_file():
-            policies_dict = policies.load_policies_file(policies_file)
-            return policies.make_license_policy_index(policies_dict)
+            policies_dict = license_policies.load_policies_file(policies_file)
+            return license_policies.make_license_policy_index(policies_dict)
 
         else:
             return scanpipe_app.license_policies_index

diff --git a/scanpipe/pipes/license_clarity.py b/scanpipe/pipes/license_clarity.py
@@ -0,0 +1,166 @@
+# SPDX-License-Identifier: Apache-2.0
+#
+# http://nexb.com and https://github.com/nexB/scancode.io
+# The ScanCode.io software is licensed under the Apache License version 2.0.
+# Data generated with ScanCode.io is provided as-is without warranties.
+# ScanCode is a trademark of nexB Inc.
+#
+# You may not use this software except in compliance with the License.
+# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
+# Unless required by applicable law or agreed to in writing, software distributed
+# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
+# CONDITIONS OF ANY KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations under the License.
+#
+# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
+# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
+# ScanCode.io should be considered or used as legal advice. Consult an Attorney
+# for any legal advice.
+#
+# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
+# Visit https://github.com/nexB/scancode.io for support and download.
+
+# clarity_thresholds.py (updated)
+"""
+License Clarity Thresholds Management
+
+This module provides an independent mechanism to read, validate, and evaluate
+license clarity score thresholds from policy files. Unlike license policies
+which are applied during scan processing, clarity thresholds are evaluated
+post-scan during summary generation.
+
+The clarity thresholds system uses a simple key-value mapping where:
+- Keys are integer threshold values (minimum scores)
+- Values are compliance alert levels ('ok', 'warning', 'error')
+
+Example policies.yml structure:
+
+license_clarity_thresholds:
+  80: ok # Scores >= 80 get 'ok' alert
+  50: warning # Scores 50-79 get 'warning' alert
+"""
+
+from django.core.exceptions import ValidationError
+
+import saneyaml
+
+
+def load_yaml_content(yaml_content):
+    """Load and parse YAML content into a Python dictionary."""
+    try:
+        return saneyaml.load(yaml_content)
+    except saneyaml.YAMLError as e:
+        raise ValidationError(f"Policies file format error: {e}")
+
+
+class ClarityThresholdsPolicy:
+    """
+    Manages clarity score thresholds and compliance evaluation.
+
+    This class reads clarity thresholds from a dictionary, validates them
+    against threshold configurations and determines compliance alerts based on
+    clarity scores.
+    """
+
+    def __init__(self, threshold_dict):
+        """Initialize with validated threshold dictionary."""
+        self.thresholds = self.validate_thresholds(threshold_dict)
+
+    @staticmethod
+    def validate_thresholds(threshold_dict):
+        if not isinstance(threshold_dict, dict):
+            raise ValidationError(
+                "The `license_clarity_thresholds` must be a dictionary"
+            )
+        validated = {}
+        seen = set()
+        for key, value in threshold_dict.items():
+            try:
+                threshold = int(key)
+            except (ValueError, TypeError):
+                raise ValidationError(f"Threshold keys must be integers, got: {key}")
+            if threshold in seen:
+                raise ValidationError(f"Duplicate threshold key: {threshold}")
+            seen.add(threshold)
+            if value not in ["ok", "warning", "error"]:
+                raise ValidationError(
+                    f"Compliance alert must be one of 'ok', 'warning', 'error', "
+                    f"got: {value}"
+                )
+            validated[threshold] = value
+        sorted_keys = sorted(validated.keys(), reverse=True)
+        if list(validated.keys()) != sorted_keys:
+            raise ValidationError("Thresholds must be strictly descending")
+        return validated
+
+    def get_alert_for_score(self, score):
+        """
+        Determine compliance alert level for a given clarity score
+
+        Returns:
+            str: Compliance alert level ('ok', 'warning', 'error')
+
+        """
+        if score is None:
+            return "error"
+
+        # Find the highest threshold that the score meets or exceeds
+        applicable_thresholds = [t for t in self.thresholds if score >= t]
+        if not applicable_thresholds:
+            return "error"
+
+        max_threshold = max(applicable_thresholds)
+        return self.thresholds[max_threshold]
+
+    def get_thresholds_summary(self):
+        """
+        Get a summary of configured thresholds for reporting
+
+        Returns:
+            dict: Summary of thresholds and their alert levels
+
+        """
+        return dict(sorted(self.thresholds.items(), reverse=True))
+
+
+def load_clarity_thresholds_from_yaml(yaml_content):
+    """
+    Load clarity thresholds from YAML content.
+
+    Returns:
+        ClarityThresholdsPolicy: Configured policy object
+
+    """
+    data = load_yaml_content(yaml_content)
+
+    if not isinstance(data, dict):
+        raise ValidationError("YAML content must be a dictionary.")
+
+    if "license_clarity_thresholds" not in data:
+        raise ValidationError(
+            "Missing 'license_clarity_thresholds' key in policies file."
+        )
+
+    return ClarityThresholdsPolicy(data["license_clarity_thresholds"])
+
+
+def load_clarity_thresholds_from_file(file_path):
+    """
+    Load clarity thresholds from a YAML file.
+
+    Returns:
+        ClarityThresholdsPolicy: Configured policy object or None if file not found
+
+    """
+    from pathlib import Path
+
+    file_path = Path(file_path)
+
+    if not file_path.exists():
+        return None
+
+    try:
+        yaml_content = file_path.read_text(encoding="utf-8")
+        return load_clarity_thresholds_from_yaml(yaml_content)
+    except (OSError, UnicodeDecodeError) as e:
+        raise ValidationError(f"Error reading file {file_path}: {e}")