Skip to content

Commit d9da93d

Browse files
committed
feat: safe ValidationError message access via .get_squeezed_message()
ValidationError.data and .message is provided by third party libraries, and they can give a message of any length. E.g. jsonschema inserts its input in all of its messages, which could be arbitrary big. To be able to show these errors to the user, some pre-processing is needed. The new method allows for squeezing these messages in a way, that is least disruptive, and has special knowledge how to shorten jsonschema messages. It is definitely still a workaround, and ideally the libraries should not yield unlimited messages. Signed-off-by: Krisztian Fekete <[email protected]>
1 parent c7351a6 commit d9da93d

File tree

5 files changed

+211
-4
lines changed

5 files changed

+211
-4
lines changed

cyclonedx/validation/__init__.py

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,35 @@
2727
from .xml import XmlValidator
2828

2929

30+
def squeeze(text: str, size: int, replacement: str = ' ... ') -> str:
31+
"""Replaces the middle of ``text`` with ``replacement``.
32+
33+
:param size: the length of the output, -1 to make no squeezing.
34+
:return: potentially shorter text
35+
:retval: ``text`` if ``size`` is -1 (for easy pass-through)
36+
:retval: ``text`` if it is shorter than ``size``
37+
:retval: ``text`` with the middle of it replaced with ``replacement``,
38+
if ``text`` is longer, than ``size``
39+
40+
Raises error if ``replacement`` is longer than ``size``, and replacement
41+
would happen.
42+
"""
43+
if size == -1:
44+
return text
45+
46+
if size < len(replacement):
47+
raise ValueError(f'squeeze: {size = } < {len(replacement) = }')
48+
49+
if len(text) <= size:
50+
return text
51+
52+
left_size = (size - len(replacement)) // 2
53+
right_size = size - len(replacement) - left_size
54+
right_offset = len(text) - right_size
55+
56+
return f'{text[:left_size]}{replacement}{text[right_offset:]}'
57+
58+
3059
class ValidationError:
3160
"""Validation failed with this specific error.
3261
@@ -51,6 +80,23 @@ def path(self) -> str:
5180
# only subclasses know how to extract this info
5281
return str(getattr(self.data, 'path', ''))
5382

83+
def get_squeezed_message(self, *, context_limit: int = -1, max_size: int = -1, replacement: str = ' ... ') -> str:
84+
"""Extracts, and sanitizes the error message.
85+
86+
Messages can be quite big from underlying libraries, as they sometimes
87+
add context to the error message: both the input or the rule can be big.
88+
89+
This can be amended both in a generic and library specific ways.
90+
91+
:param max_size: squeeze message to this size.
92+
:param context_limit: limit of tolerated context length.
93+
:param replacement: to mark place of dropped text bit[s]
94+
95+
With the defaults, no squeezing happens.
96+
"""
97+
# subclasses may know how to do it better
98+
return squeeze(self.message, max_size, replacement)
99+
54100
def __init__(self, data: Any) -> None:
55101
self.data = data
56102

cyclonedx/validation/json.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@
2929

3030
from ..exception import MissingOptionalDependencyException
3131
from ..schema._res import BOM_JSON as _S_BOM, BOM_JSON_STRICT as _S_BOM_STRICT, JSF as _S_JSF, SPDX_JSON as _S_SPDX
32-
from . import BaseSchemabasedValidator, SchemabasedValidator, ValidationError
32+
from . import BaseSchemabasedValidator, SchemabasedValidator, ValidationError, squeeze
3333

3434
_missing_deps_error: Optional[tuple[MissingOptionalDependencyException, ImportError]] = None
3535
try:
@@ -47,7 +47,42 @@
4747
), err
4848

4949

50+
def _get_message_with_squeezed_context(error: 'JsonSchemaValidationError', context_limit: int, replacement: str) -> str:
51+
# The below code depends on jsonschema internals, that messages are created
52+
# like `yield ValidationError(f"{instance!r} has non-unique elements")`
53+
# and tries to replace `{instance!r}` with a shortened version, if needed
54+
message: str = error.message
55+
if context_limit <= 0 or len(message) <= context_limit:
56+
return message
57+
58+
repr_context = repr(error.instance)
59+
if len(repr_context) <= context_limit:
60+
return message
61+
62+
return message.replace(repr_context, squeeze(repr_context, context_limit, replacement))
63+
64+
5065
class _JsonValidationError(ValidationError):
66+
def get_squeezed_message(self, *, context_limit: int = -1, max_size: int = -1, replacement: str = ' ... ') -> str:
67+
"""Extracts, and sanitizes the error message.
68+
69+
Messages can be quite big from underlying libraries, as they sometimes
70+
add context to the error message..
71+
72+
This is amended both in a generic and library specific ways here.
73+
74+
:param max_size: squeeze message to this size.
75+
:param context_limit: jsonschema messages most of the time include the
76+
instance repr as context, which can be very big
77+
(in the megabytes range), so an attempt is made to
78+
shorten context to this size.
79+
:param replacement: to mark place of dropped text bit[s]
80+
81+
With the defaults, no squeezing happens.
82+
"""
83+
message = _get_message_with_squeezed_context(self.data, context_limit, replacement)
84+
return squeeze(message, max_size, replacement)
85+
5186
@property
5287
def path(self) -> str:
5388
"""Path to the location of the problem in the document.

tests/test_validation.py

Lines changed: 121 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,13 @@
1616
# Copyright (c) OWASP Foundation. All Rights Reserved.
1717

1818

19+
import unittest
1920
from itertools import product
20-
from unittest import TestCase
2121

2222
from ddt import data, ddt, named_data, unpack
2323

2424
from cyclonedx.schema import OutputFormat, SchemaVersion
25-
from cyclonedx.validation import make_schemabased_validator
25+
from cyclonedx.validation import make_schemabased_validator, squeeze
2626

2727
UNDEFINED_FORMAT_VERSION = {
2828
(OutputFormat.JSON, SchemaVersion.V1_1),
@@ -31,7 +31,7 @@
3131

3232

3333
@ddt
34-
class TestGetSchemabasedValidator(TestCase):
34+
class TestGetSchemabasedValidator(unittest.TestCase):
3535

3636
@named_data(*([f'{f.name} {v.name}', f, v]
3737
for f, v
@@ -51,3 +51,121 @@ def test_as_expected(self, of: OutputFormat, sv: SchemaVersion) -> None:
5151
def test_fails_on_wrong_args(self, of: OutputFormat, sv: SchemaVersion, raises_regex: tuple) -> None:
5252
with self.assertRaisesRegex(*raises_regex):
5353
make_schemabased_validator(of, sv)
54+
55+
56+
class TestSqueeze(unittest.TestCase):
57+
58+
def test_squeeze_size_minus_one_returns_original_text(self) -> None:
59+
"""Test that size=-1 returns original text unchanged."""
60+
self.assertEqual(squeeze('hello world', -1), 'hello world')
61+
self.assertEqual(squeeze('', -1), '')
62+
self.assertEqual(squeeze('a', -1), 'a')
63+
self.assertEqual(squeeze('very long text that would normally be squeezed', -1),
64+
'very long text that would normally be squeezed')
65+
66+
def test_squeeze_size_zero_returns_empty_text(self) -> None:
67+
"""Test that size=-1 returns original text unchanged."""
68+
self.assertEqual(squeeze('hello world', 0, ''), '')
69+
self.assertEqual(squeeze('', 0, ''), '')
70+
71+
def test_squeeze_text_shorter_than_or_equal_size_returns_original(self) -> None:
72+
"""Test that text shorter than or equal to size returns original text."""
73+
self.assertEqual(squeeze('hello', 10), 'hello')
74+
self.assertEqual(squeeze('hello', 5), 'hello')
75+
self.assertEqual(squeeze('', 5), '')
76+
self.assertEqual(squeeze('a', 5), 'a')
77+
self.assertEqual(squeeze('ab', 10), 'ab')
78+
79+
def test_squeeze_with_default_replacement(self) -> None:
80+
"""Test squeezing with default ' ... ' replacement."""
81+
self.assertEqual(squeeze('hello world', 8), 'h ... ld')
82+
self.assertEqual(squeeze('hello world', 7), 'h ... d')
83+
self.assertEqual(squeeze('hello world', 9), 'he ... ld')
84+
self.assertEqual(squeeze('hello world', 10), 'he ... rld')
85+
self.assertEqual(squeeze('hello world', 11), 'hello world')
86+
87+
def test_squeeze_with_custom_replacement(self) -> None:
88+
"""Test squeezing with custom replacement strings."""
89+
self.assertEqual(squeeze('hello world', 8, '..'), 'hel..rld')
90+
self.assertEqual(squeeze('hello world', 7, '..'), 'he..rld')
91+
self.assertEqual(squeeze('hello world', 9, '---'), 'hel---rld')
92+
self.assertEqual(squeeze('hello world', 10, 'XX'), 'hellXXorld')
93+
94+
def test_squeeze_with_single_character_replacement(self) -> None:
95+
"""Test squeezing with single character replacement."""
96+
self.assertEqual(squeeze('hello world', 5, '*'), 'he*ld')
97+
self.assertEqual(squeeze('hello world', 6, '*'), 'he*rld')
98+
self.assertEqual(squeeze('hello world', 7, '*'), 'hel*rld')
99+
100+
def test_squeeze_with_empty_replacement(self) -> None:
101+
"""Test squeezing with empty replacement string."""
102+
self.assertEqual(squeeze('hello world', 5, ''), 'herld')
103+
self.assertEqual(squeeze('hello world', 6, ''), 'helrld')
104+
self.assertEqual(squeeze('hello world', 7, ''), 'helorld')
105+
106+
def test_squeeze_replacement_equals_target_size(self) -> None:
107+
"""Test when replacement string equals the target size."""
108+
self.assertEqual(squeeze('hello world', 4, '....'), '....')
109+
self.assertEqual(squeeze('hello world', 3, '***'), '***')
110+
111+
def test_squeeze_very_short_target_sizes(self) -> None:
112+
"""Test edge cases with very short target sizes."""
113+
self.assertEqual(squeeze('hello world', 5, '.'), 'he.ld')
114+
self.assertEqual(squeeze('hello world', 6, '.'), 'he.rld')
115+
self.assertEqual(squeeze('hello world', 1, 'X'), 'X')
116+
117+
def test_squeeze_with_long_text(self) -> None:
118+
"""Test squeezing with very long text."""
119+
long_text = 'a' * 100
120+
result = squeeze(long_text, 10, '...')
121+
self.assertEqual(len(result), 10)
122+
self.assertEqual(result, 'aaa...aaaa')
123+
124+
# Test with different replacement
125+
result2 = squeeze(long_text, 8, '--')
126+
self.assertEqual(len(result2), 8)
127+
self.assertEqual(result2, 'aaa--aaa')
128+
129+
def test_squeeze_size_distribution_even(self) -> None:
130+
"""Test size distribution when remaining space is even."""
131+
# size=8, replacement="--" (len=2), remaining=6, left=3, right=3
132+
self.assertEqual(squeeze('abcdefghijk', 8, '--'), 'abc--ijk')
133+
# size=10, replacement="...." (len=4), remaining=6, left=3, right=3
134+
self.assertEqual(squeeze('abcdefghijk', 10, '....'), 'abc....ijk')
135+
136+
def test_squeeze_size_distribution_odd(self) -> None:
137+
"""Test size distribution when remaining space is odd."""
138+
# size=9, replacement="--" (len=2), remaining=7, left=3, right=4
139+
self.assertEqual(squeeze('abcdefghijk', 9, '--'), 'abc--hijk')
140+
# size=11, replacement="..." (len=3), remaining=8, left=4, right=4
141+
self.assertEqual(squeeze('abcdefghijk', 11, '...'), 'abcdefghijk')
142+
143+
def test_squeeze_raises_error_when_replacement_too_long(self) -> None:
144+
"""Test that ValueError is raised when replacement is longer than target size."""
145+
with self.assertRaises(ValueError) as context:
146+
squeeze('hello world', 3, ' ... ')
147+
self.assertIn('size = 3 < len(replacement) = 5', str(context.exception))
148+
149+
with self.assertRaises(ValueError) as context:
150+
squeeze('hello world', 2, 'abc')
151+
self.assertIn('size = 2 < len(replacement) = 3', str(context.exception))
152+
153+
with self.assertRaises(ValueError) as context:
154+
squeeze('hello world', 1, 'ab')
155+
self.assertIn('size = 1 < len(replacement) = 2', str(context.exception))
156+
157+
def test_squeeze_error_when_replacement_long_but_no_squeeze_needed(self) -> None:
158+
"""Test that no error is raised when replacement is long but text doesn't need squeezing."""
159+
# Text is shorter than size, so no squeezing would occur,
160+
# yet, the replacement is longer than the requested size, so error is raised
161+
with self.assertRaises(ValueError) as context:
162+
self.assertEqual(squeeze('abc', 10, 'very long replacement'), 'abc')
163+
self.assertIn('size = 10 < len(replacement) = 21', str(context.exception))
164+
165+
with self.assertRaises(ValueError) as context:
166+
self.assertEqual(squeeze('', 3, 'abcd'), '')
167+
self.assertIn('size = 3 < len(replacement) = 4', str(context.exception))
168+
169+
170+
if __name__ == '__main__':
171+
unittest.main()

tests/test_validation_json.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,4 +133,8 @@ def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_
133133
self.assertIsNotNone(validation_error)
134134
self.assertIsNotNone(validation_error.data)
135135
self.assertTrue(bool(validation_error.message))
136+
self.assertTrue(bool(validation_error.get_squeezed_message(context_limit=22)))
136137
self.assertTrue(bool(validation_error.path))
138+
139+
squeezed_message = validation_error.get_squeezed_message(max_size=100)
140+
self.assertLessEqual(len(squeezed_message), 100, squeezed_message)

tests/test_validation_xml.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,4 +93,8 @@ def test_validate_expected_error(self, schema_version: SchemaVersion, test_data_
9393
self.assertIsNotNone(validation_error)
9494
self.assertIsNotNone(validation_error.data)
9595
self.assertTrue(bool(validation_error.message))
96+
self.assertTrue(bool(validation_error.get_squeezed_message()))
9697
self.assertTrue(bool(validation_error.path))
98+
99+
squeezed_message = validation_error.get_squeezed_message(max_size=100)
100+
self.assertLessEqual(len(squeezed_message), 100, squeezed_message)

0 commit comments

Comments
 (0)