Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
165 changes: 165 additions & 0 deletions scanpipe/pipes/license_clarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/nexB/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

"""
License Clarity Thresholds Management

This module provides an independent mechanism to read, validate, and evaluate
license clarity score thresholds from policy files. Unlike license policies
which are applied during scan processing, clarity thresholds are evaluated
post-scan during summary generation.

The clarity thresholds system uses a simple key-value mapping where:
- Keys are integer threshold values (minimum scores)
- Values are compliance alert levels ('ok', 'warning', 'error')

Example policies.yml structure:

license_clarity_thresholds:
80: ok # Scores >= 80 get 'ok' alert
50: warning # Scores 50-79 get 'warning' alert
"""

from pathlib import Path

from django.core.exceptions import ValidationError

import saneyaml


def load_yaml_content(yaml_content):
"""Load and parse YAML content into a Python dictionary."""
try:
return saneyaml.load(yaml_content)
except saneyaml.YAMLError as e:
raise ValidationError(f"Policies file format error: {e}")


class ClarityThresholdsPolicy:
"""
Manages clarity score thresholds and compliance evaluation.

This class reads clarity thresholds from a dictionary, validates them
against threshold configurations and determines compliance alerts based on
clarity scores.
"""

def __init__(self, threshold_dict):
"""Initialize with validated threshold dictionary."""
self.thresholds = self.validate_thresholds(threshold_dict)

@staticmethod
def validate_thresholds(threshold_dict):
if not isinstance(threshold_dict, dict):
raise ValidationError(
"The `license_clarity_thresholds` must be a dictionary"
)
validated = {}
seen = set()
for key, value in threshold_dict.items():
try:
threshold = int(key)
except (ValueError, TypeError):
raise ValidationError(f"Threshold keys must be integers, got: {key}")
if threshold in seen:
raise ValidationError(f"Duplicate threshold key: {threshold}")
seen.add(threshold)
if value not in ["ok", "warning", "error"]:
raise ValidationError(
f"Compliance alert must be one of 'ok', 'warning', 'error', "
f"got: {value}"
)
validated[threshold] = value
sorted_keys = sorted(validated.keys(), reverse=True)
if list(validated.keys()) != sorted_keys:
raise ValidationError("Thresholds must be strictly descending")
return validated

def get_alert_for_score(self, score):
"""
Determine compliance alert level for a given clarity score

Returns:
str: Compliance alert level ('ok', 'warning', 'error')

"""
if score is None:
return "error"

# Find the highest threshold that the score meets or exceeds
applicable_thresholds = [t for t in self.thresholds if score >= t]
if not applicable_thresholds:
return "error"

max_threshold = max(applicable_thresholds)
return self.thresholds[max_threshold]

def get_thresholds_summary(self):
"""
Get a summary of configured thresholds for reporting

Returns:
dict: Summary of thresholds and their alert levels

"""
return dict(sorted(self.thresholds.items(), reverse=True))


def load_clarity_thresholds_from_yaml(yaml_content):
"""
Load clarity thresholds from YAML content.

Returns:
ClarityThresholdsPolicy: Configured policy object

"""
data = load_yaml_content(yaml_content)

if not isinstance(data, dict):
raise ValidationError("YAML content must be a dictionary.")

if "license_clarity_thresholds" not in data:
raise ValidationError(
"Missing 'license_clarity_thresholds' key in policies file."
)

return ClarityThresholdsPolicy(data["license_clarity_thresholds"])


def load_clarity_thresholds_from_file(file_path):
"""
Load clarity thresholds from a YAML file.

Returns:
ClarityThresholdsPolicy: Configured policy object or None if file not found

"""
file_path = Path(file_path)

if not file_path.exists():
return None

try:
yaml_content = file_path.read_text(encoding="utf-8")
return load_clarity_thresholds_from_yaml(yaml_content)
except (OSError, UnicodeDecodeError) as e:
raise ValidationError(f"Error reading file {file_path}: {e}")
4 changes: 4 additions & 0 deletions scanpipe/tests/data/license_clarity/sample_thresholds.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
license_clarity_thresholds:
90: ok
70: warning
40: error
162 changes: 162 additions & 0 deletions scanpipe/tests/pipes/test_license_clarity.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
# SPDX-License-Identifier: Apache-2.0
#
# http://nexb.com and https://github.com/nexB/scancode.io
# The ScanCode.io software is licensed under the Apache License version 2.0.
# Data generated with ScanCode.io is provided as-is without warranties.
# ScanCode is a trademark of nexB Inc.
#
# You may not use this software except in compliance with the License.
# You may obtain a copy of the License at: http://apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software distributed
# under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
# CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
# Data Generated with ScanCode.io is provided on an "AS IS" BASIS, WITHOUT WARRANTIES
# OR CONDITIONS OF ANY KIND, either express or implied. No content created from
# ScanCode.io should be considered or used as legal advice. Consult an Attorney
# for any legal advice.
#
# ScanCode.io is a free software code scanning tool from nexB Inc. and others.
# Visit https://github.com/nexB/scancode.io for support and download.

from pathlib import Path

from django.core.exceptions import ValidationError
from django.test import TestCase

from scanpipe.pipes.license_clarity import ClarityThresholdsPolicy
from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_file
from scanpipe.pipes.license_clarity import load_clarity_thresholds_from_yaml


class ClarityThresholdsPolicyTest(TestCase):
data = Path(__file__).parent.parent / "data"
"""Test ClarityThresholdsPolicy class functionality."""

def test_valid_thresholds_initialization(self):
thresholds = {80: "ok", 50: "warning", 20: "error"}
policy = ClarityThresholdsPolicy(thresholds)
self.assertEqual(policy.thresholds, thresholds)

def test_string_keys_converted_to_integers(self):
thresholds = {"80": "ok", "50": "warning"}
policy = ClarityThresholdsPolicy(thresholds)
expected = {80: "ok", 50: "warning"}
self.assertEqual(policy.thresholds, expected)

def test_invalid_threshold_key_raises_error(self):
with self.assertRaises(ValidationError) as cm:
ClarityThresholdsPolicy({"invalid": "ok"})
self.assertIn("must be integers", str(cm.exception))

def test_invalid_alert_value_raises_error(self):
with self.assertRaises(ValidationError) as cm:
ClarityThresholdsPolicy({80: "invalid"})
self.assertIn("must be one of 'ok', 'warning', 'error'", str(cm.exception))

def test_non_dict_input_raises_error(self):
with self.assertRaises(ValidationError) as cm:
ClarityThresholdsPolicy([80, 50])
self.assertIn("must be a dictionary", str(cm.exception))

def test_duplicate_threshold_keys_raise_error(self):
with self.assertRaises(ValidationError) as cm:
ClarityThresholdsPolicy({80: "ok", "80": "warning"})
self.assertIn("Duplicate threshold key", str(cm.exception))

def test_overlapping_thresholds_wrong_order(self):
with self.assertRaises(ValidationError) as cm:
ClarityThresholdsPolicy({70: "ok", 80: "warning"})
self.assertIn("Thresholds must be strictly descending", str(cm.exception))

def test_float_threshold_keys(self):
thresholds = {80.5: "ok", 50.9: "warning"}
policy = ClarityThresholdsPolicy(thresholds)
expected = {80: "ok", 50: "warning"}
self.assertEqual(policy.thresholds, expected)

def test_negative_threshold_values(self):
thresholds = {50: "ok", 0: "warning", -10: "error"}
policy = ClarityThresholdsPolicy(thresholds)
self.assertEqual(policy.get_alert_for_score(60), "ok")
self.assertEqual(policy.get_alert_for_score(25), "warning")
self.assertEqual(policy.get_alert_for_score(-5), "error")
self.assertEqual(policy.get_alert_for_score(-20), "error")

def test_empty_thresholds_dict(self):
policy = ClarityThresholdsPolicy({})
self.assertEqual(policy.get_alert_for_score(100), "error")
self.assertEqual(policy.get_alert_for_score(50), "error")
self.assertEqual(policy.get_alert_for_score(0), "error")
self.assertEqual(policy.get_alert_for_score(None), "error")

def test_very_high_threshold_values(self):
thresholds = {150: "ok", 100: "warning"}
policy = ClarityThresholdsPolicy(thresholds)
self.assertEqual(policy.get_alert_for_score(100), "warning")
self.assertEqual(policy.get_alert_for_score(90), "error")
self.assertEqual(policy.get_alert_for_score(50), "error")
self.assertEqual(policy.get_alert_for_score(99), "error")

# Policy logic via YAML string (mock policies.yml content)
def test_yaml_string_ok_and_warning(self):
yaml_content = """
license_clarity_thresholds:
90: ok
30: warning
"""
policy = load_clarity_thresholds_from_yaml(yaml_content)
self.assertEqual(policy.get_alert_for_score(95), "ok")
self.assertEqual(policy.get_alert_for_score(60), "warning")
self.assertEqual(policy.get_alert_for_score(20), "error")

def test_yaml_string_single_threshold(self):
yaml_content = """
license_clarity_thresholds:
80: ok
"""
policy = load_clarity_thresholds_from_yaml(yaml_content)
self.assertEqual(policy.get_alert_for_score(90), "ok")
self.assertEqual(policy.get_alert_for_score(79), "error")

def test_yaml_string_invalid_alert(self):
yaml_content = """
license_clarity_thresholds:
80: great
"""
with self.assertRaises(ValidationError):
load_clarity_thresholds_from_yaml(yaml_content)

def test_yaml_string_invalid_key(self):
yaml_content = """
license_clarity_thresholds:
eighty: ok
"""
with self.assertRaises(ValidationError):
load_clarity_thresholds_from_yaml(yaml_content)

def test_yaml_string_missing_key(self):
yaml_content = """
license_policies:
- license_key: mit
"""
with self.assertRaises(ValidationError):
load_clarity_thresholds_from_yaml(yaml_content)

def test_yaml_string_invalid_yaml(self):
yaml_content = "license_clarity_thresholds: [80, 50"
with self.assertRaises(ValidationError):
load_clarity_thresholds_from_yaml(yaml_content)

def test_load_from_existing_file(self):
test_file = self.data / "license_clarity" / "sample_thresholds.yml"
policy = load_clarity_thresholds_from_file(test_file)
self.assertIsNotNone(policy)
self.assertEqual(policy.get_alert_for_score(95), "ok")
self.assertEqual(policy.get_alert_for_score(75), "warning")
self.assertEqual(policy.get_alert_for_score(50), "error")

def test_load_from_nonexistent_file(self):
policy = load_clarity_thresholds_from_file("/nonexistent/file.yml")
self.assertIsNone(policy)