diff --git a/CHANGELOG.md b/CHANGELOG.md index 3fbc5144..15592e43 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- `jsonschema::value_format` helper lets `ValidationOptions::with_format` register value-aware checkers that inspect non-string instances. + ## [0.37.1] - 2025-11-19 ### Fixed diff --git a/crates/jsonschema-py/CHANGELOG.md b/crates/jsonschema-py/CHANGELOG.md index 5b2da55f..4613e0af 100644 --- a/crates/jsonschema-py/CHANGELOG.md +++ b/crates/jsonschema-py/CHANGELOG.md @@ -2,6 +2,10 @@ ## [Unreleased] +### Added + +- `jsonschema_rs.value_format()` decorator allows for registering value-aware checkers that inspect non-string instances. + ## [0.37.1] - 2025-11-19 ### Fixed diff --git a/crates/jsonschema-py/Cargo.toml b/crates/jsonschema-py/Cargo.toml index f1b868b9..f8eb99d2 100644 --- a/crates/jsonschema-py/Cargo.toml +++ b/crates/jsonschema-py/Cargo.toml @@ -19,7 +19,6 @@ crate-type = ["cdylib"] jsonschema = { path = "../jsonschema/", features = ["arbitrary-precision"] } pyo3 = { version = "0.27", features = ["extension-module", "abi3-py310"] } pyo3-built = "0.6" -pythonize = "0.27" serde.workspace = true serde_json.workspace = true diff --git a/crates/jsonschema-py/README.md b/crates/jsonschema-py/README.md index 93e1427d..f0c86a48 100644 --- a/crates/jsonschema-py/README.md +++ b/crates/jsonschema-py/README.md @@ -119,6 +119,27 @@ validator.is_valid("USD") # True validator.is_valid("invalid") # False ``` +If you need to inspect instances that are not strings (for example integers or entire objects), +wrap your callable with `jsonschema_rs.value_format` or use it as a decorator: + +```python +import jsonschema_rs + +@jsonschema_rs.value_format +def is_answer(instance): + return not isinstance(instance, int) or instance == 42 + + +validator = jsonschema_rs.validator_for( + {"format": "knows-answer"}, + formats={"knows-answer": is_answer}, + validate_formats=True, +) +validator.is_valid(42) # True +validator.is_valid(41) # False +validator.is_valid("still a string") # True +``` + Additional configuration options are available for fine-tuning the validation process: - `validate_formats`: Override the draft-specific default behavior for format validation. diff --git a/crates/jsonschema-py/python/jsonschema_rs/__init__.py b/crates/jsonschema-py/python/jsonschema_rs/__init__.py index 7e83f7fc..b08a1c4a 100644 --- a/crates/jsonschema-py/python/jsonschema_rs/__init__.py +++ b/crates/jsonschema-py/python/jsonschema_rs/__init__.py @@ -20,8 +20,10 @@ is_valid, iter_errors, meta, + value_format, validate, validator_for, + ValueFormat, ) Validator: TypeAlias = ( @@ -109,6 +111,7 @@ def __hash__(self) -> int: "iter_errors", "evaluate", "validator_for", + "value_format", "Draft4", "Draft6", "Draft7", @@ -124,4 +127,5 @@ def __hash__(self) -> int: "FancyRegexOptions", "RegexOptions", "meta", + "ValueFormat", ] diff --git a/crates/jsonschema-py/python/jsonschema_rs/__init__.pyi b/crates/jsonschema-py/python/jsonschema_rs/__init__.pyi index c1970236..e4ff3316 100644 --- a/crates/jsonschema-py/python/jsonschema_rs/__init__.pyi +++ b/crates/jsonschema-py/python/jsonschema_rs/__init__.pyi @@ -3,10 +3,14 @@ from decimal import Decimal from typing import Any, Callable, List, Protocol, TypeAlias, TypeVar, TypedDict, Union _SchemaT = TypeVar("_SchemaT", bool, dict[str, Any]) -_FormatFunc = TypeVar("_FormatFunc", bound=Callable[[str], bool]) JSONType: TypeAlias = dict[str, Any] | list | str | int | float | Decimal | bool | None JSONPrimitive: TypeAlias = str | int | float | Decimal | bool | None +class ValueFormat: + def __call__(self, instance: JSONType) -> bool: ... + +FormatChecker: TypeAlias = Callable[[str], bool] | ValueFormat + class EvaluationAnnotation(TypedDict): schemaLocation: str absoluteKeywordLocation: str | None @@ -66,12 +70,14 @@ PatternOptionsType = Union[FancyRegexOptions, RegexOptions] class RetrieverProtocol(Protocol): def __call__(self, uri: str) -> JSONType: ... +def value_format(func: Callable[[JSONType], bool]) -> ValueFormat: ... + def is_valid( schema: _SchemaT, instance: Any, draft: int | None = None, with_meta_schemas: bool | None = None, - formats: dict[str, _FormatFunc] | None = None, + formats: dict[str, FormatChecker] | None = None, validate_formats: bool | None = None, ignore_unknown_formats: bool = True, retriever: RetrieverProtocol | None = None, @@ -91,7 +97,7 @@ def validate( instance: Any, draft: int | None = None, with_meta_schemas: bool | None = None, - formats: dict[str, _FormatFunc] | None = None, + formats: dict[str, FormatChecker] | None = None, validate_formats: bool | None = None, ignore_unknown_formats: bool = True, retriever: RetrieverProtocol | None = None, @@ -111,7 +117,7 @@ def iter_errors( instance: Any, draft: int | None = None, with_meta_schemas: bool | None = None, - formats: dict[str, _FormatFunc] | None = None, + formats: dict[str, FormatChecker] | None = None, validate_formats: bool | None = None, ignore_unknown_formats: bool = True, retriever: RetrieverProtocol | None = None, @@ -130,7 +136,7 @@ def evaluate( schema: _SchemaT, instance: Any, draft: int | None = None, - formats: dict[str, _FormatFunc] | None = None, + formats: dict[str, FormatChecker] | None = None, validate_formats: bool | None = None, ignore_unknown_formats: bool = True, retriever: RetrieverProtocol | None = None, @@ -268,7 +274,7 @@ class Draft4Validator: def __init__( self, schema: _SchemaT | str, - formats: dict[str, _FormatFunc] | None = None, + formats: dict[str, FormatChecker] | None = None, validate_formats: bool | None = None, ignore_unknown_formats: bool = True, retriever: RetrieverProtocol | None = None, @@ -287,7 +293,7 @@ class Draft6Validator: def __init__( self, schema: _SchemaT | str, - formats: dict[str, _FormatFunc] | None = None, + formats: dict[str, FormatChecker] | None = None, validate_formats: bool | None = None, ignore_unknown_formats: bool = True, retriever: RetrieverProtocol | None = None, @@ -306,7 +312,7 @@ class Draft7Validator: def __init__( self, schema: _SchemaT | str, - formats: dict[str, _FormatFunc] | None = None, + formats: dict[str, FormatChecker] | None = None, validate_formats: bool | None = None, ignore_unknown_formats: bool = True, retriever: RetrieverProtocol | None = None, @@ -325,7 +331,7 @@ class Draft201909Validator: def __init__( self, schema: _SchemaT | str, - formats: dict[str, _FormatFunc] | None = None, + formats: dict[str, FormatChecker] | None = None, validate_formats: bool | None = None, ignore_unknown_formats: bool = True, retriever: RetrieverProtocol | None = None, @@ -344,7 +350,7 @@ class Draft202012Validator: def __init__( self, schema: _SchemaT | str, - formats: dict[str, _FormatFunc] | None = None, + formats: dict[str, FormatChecker] | None = None, validate_formats: bool | None = None, ignore_unknown_formats: bool = True, retriever: RetrieverProtocol | None = None, @@ -363,7 +369,7 @@ Validator: TypeAlias = Draft4Validator | Draft6Validator | Draft7Validator | Dra def validator_for( schema: _SchemaT, - formats: dict[str, _FormatFunc] | None = None, + formats: dict[str, FormatChecker] | None = None, validate_formats: bool | None = None, ignore_unknown_formats: bool = True, retriever: RetrieverProtocol | None = None, diff --git a/crates/jsonschema-py/src/lib.rs b/crates/jsonschema-py/src/lib.rs index a0c03486..4f077e3b 100644 --- a/crates/jsonschema-py/src/lib.rs +++ b/crates/jsonschema-py/src/lib.rs @@ -17,8 +17,8 @@ use pyo3::{ exceptions::{self, PyValueError}, ffi::PyUnicode_AsUTF8AndSize, prelude::*, - types::{PyAny, PyDict, PyList, PyString, PyType}, - wrap_pyfunction, + types::{PyAny, PyDict, PyList, PyString, PyTuple, PyType}, + wrap_pyfunction, PyRef, }; use regex::{FancyRegexOptions, RegexOptions}; use retriever::{into_retriever, Retriever}; @@ -104,6 +104,51 @@ fn value_to_python(py: Python<'_>, value: &serde_json::Value) -> PyResult, +} + +impl PyValueFormat { + fn new(callback: Py) -> Self { + PyValueFormat { callback } + } +} + +#[pymethods] +impl PyValueFormat { + #[pyo3(name = "__call__")] + fn call( + &self, + py: Python<'_>, + args: &Bound<'_, PyTuple>, + kwargs: Option<&Bound<'_, PyDict>>, + ) -> PyResult> { + self.callback + .bind(py) + .call(args, kwargs) + .map(pyo3::Bound::unbind) + } +} + +#[pyfunction] +fn value_format(py: Python<'_>, callback: &Bound<'_, PyAny>) -> PyResult> { + if !callback.is_callable() { + return Err(exceptions::PyValueError::new_err( + "value_format decorator requires a callable", + )); + } + let wrapper = Py::new(py, PyValueFormat::new(callback.clone().unbind()))?; + wrapper.bind(py).setattr("__wrapped__", callback)?; + if let Ok(update_wrapper) = py + .import("functools") + .and_then(|module| module.getattr("update_wrapper")) + { + let _ = update_wrapper.call1((wrapper.bind(py), callback)); + } + Ok(wrapper) +} + fn evaluation_output_to_python(py: Python<'_>, output: &T) -> PyResult> where T: Serialize + ?Sized, @@ -619,6 +664,33 @@ fn make_options( } if let Some(formats) = formats { for (name, callback) in formats.iter() { + if let Ok(wrapper) = callback.extract::>() { + let py = wrapper.py(); + let callback = wrapper.callback.clone_ref(py); + let call_py_callback = move |value: &serde_json::Value| { + Python::attach(|py| { + let value = value_to_python(py, value)?; + callback.bind(py).call((value.bind(py),), None)?.is_truthy() + }) + }; + options = options.with_format( + name.to_string(), + jsonschema::value_format( + move |value: &serde_json::Value| match call_py_callback(value) { + Ok(r) => r, + Err(e) => { + LAST_FORMAT_ERROR.with(|last| { + *last.borrow_mut() = Some(e); + }); + std::panic::set_hook(Box::new(|_| {})); + panic!("Format checker failed") + } + }, + ), + ); + continue; + } + if !callback.is_callable() { return Err(exceptions::PyValueError::new_err(format!( "Format checker for '{name}' must be a callable", @@ -1617,6 +1689,7 @@ fn jsonschema_rs(py: Python<'_>, module: &Bound<'_, PyModule>) -> PyResult<()> { module.add_wrapped(wrap_pyfunction!(iter_errors))?; module.add_wrapped(wrap_pyfunction!(evaluate))?; module.add_wrapped(wrap_pyfunction!(validator_for))?; + module.add_wrapped(wrap_pyfunction!(value_format))?; module.add_class::()?; module.add_class::()?; module.add_class::()?; @@ -1626,6 +1699,7 @@ fn jsonschema_rs(py: Python<'_>, module: &Bound<'_, PyModule>) -> PyResult<()> { module.add_class::()?; module.add_class::()?; module.add_class::()?; + module.add_class::()?; module.add("ValidationErrorKind", py.get_type::())?; module.add("Draft4", DRAFT4)?; module.add("Draft6", DRAFT6)?; diff --git a/crates/jsonschema-py/tests-py/test_jsonschema.py b/crates/jsonschema-py/tests-py/test_jsonschema.py index 5405303b..4f498c4d 100644 --- a/crates/jsonschema-py/tests-py/test_jsonschema.py +++ b/crates/jsonschema-py/tests-py/test_jsonschema.py @@ -23,6 +23,7 @@ iter_errors, validate, validator_for, + value_format, ) json = st.recursive( @@ -484,6 +485,30 @@ def is_currency(_): pass +def test_value_format_allows_non_strings(): + formats = { + "magic-number": value_format(lambda instance: not isinstance(instance, int) or instance == 42) + } + validator = validator_for({"format": "magic-number"}, formats=formats, validate_formats=True) + + assert validator.is_valid(42) + assert not validator.is_valid(41) + assert validator.is_valid("still a string") + + +def test_value_format_decorator_keeps_callable(): + @value_format + def is_answer(instance): + return isinstance(instance, dict) and instance.get("answer") == 42 + + schema = {"format": "knows-answer"} + validator = validator_for(schema, formats={"knows-answer": is_answer}, validate_formats=True) + + assert validator.is_valid({"answer": 42}) + assert not validator.is_valid({"answer": 41}) + assert is_answer({"answer": 42}) is True + + @pytest.mark.parametrize( "cls,validate_formats,input,expected", [ diff --git a/crates/jsonschema/src/keywords/format.rs b/crates/jsonschema/src/keywords/format.rs index 8d5cce2f..2b67517b 100644 --- a/crates/jsonschema/src/keywords/format.rs +++ b/crates/jsonschema/src/keywords/format.rs @@ -814,28 +814,93 @@ impl Validate for CustomFormatValidator { } fn is_valid(&self, instance: &Value) -> bool { - if let Value::String(item) = instance { - self.check.is_valid(item) + self.check.is_valid(instance) + } +} + +#[doc(hidden)] +pub trait Format: ThreadBound + 'static { + fn is_valid(&self, value: &Value) -> bool; +} + +mod private { + pub trait Sealed {} +} + +#[doc(hidden)] +pub trait IntoFormat: private::Sealed + ThreadBound + 'static { + fn into_format(self) -> Arc; +} + +#[doc(hidden)] +pub struct ValueFormat { + func: F, +} + +/// Wrap a custom format validator that should receive the entire [`serde_json::Value`]. +pub fn value_format(func: F) -> ValueFormat +where + F: Fn(&Value) -> bool + ThreadBound + 'static, +{ + ValueFormat { func } +} + +struct StringFormatAdapter { + func: F, +} + +impl Format for StringFormatAdapter +where + F: Fn(&str) -> bool + ThreadBound + 'static, +{ + #[inline] + fn is_valid(&self, value: &Value) -> bool { + if let Value::String(value) = value { + (self.func)(value) } else { true } } } -pub(crate) trait Format: ThreadBound + 'static { - fn is_valid(&self, value: &str) -> bool; +struct ValueFormatAdapter { + func: F, } -impl Format for F +impl Format for ValueFormatAdapter +where + F: Fn(&Value) -> bool + ThreadBound + 'static, +{ + #[inline] + fn is_valid(&self, value: &Value) -> bool { + (self.func)(value) + } +} + +impl IntoFormat for F where F: Fn(&str) -> bool + ThreadBound + 'static, { #[inline] - fn is_valid(&self, value: &str) -> bool { - self(value) + fn into_format(self) -> Arc { + Arc::new(StringFormatAdapter { func: self }) } } +impl private::Sealed for F where F: Fn(&str) -> bool + ThreadBound + 'static {} + +impl IntoFormat for ValueFormat +where + F: Fn(&Value) -> bool + ThreadBound + 'static, +{ + #[inline] + fn into_format(self) -> Arc { + Arc::new(ValueFormatAdapter { func: self.func }) + } +} + +impl private::Sealed for ValueFormat where F: Fn(&Value) -> bool + ThreadBound + 'static {} + #[inline] pub(crate) fn compile<'a>( ctx: &compiler::Context, @@ -903,7 +968,7 @@ pub(crate) fn compile<'a>( #[cfg(test)] mod tests { use referencing::Draft; - use serde_json::json; + use serde_json::{json, Value}; use test_case::test_case; use crate::tests_util; @@ -939,6 +1004,26 @@ mod tests { assert!(without_validation.is_valid(¬_email_instance)); } + #[test] + fn custom_format_can_inspect_full_value() { + let schema = json!({"format": "magic-number"}); + let validator = crate::options() + .should_validate_formats(true) + .with_format( + "magic-number", + crate::value_format(|instance: &Value| match instance { + Value::Number(number) => number.as_i64() == Some(42), + _ => true, + }), + ) + .build(&schema) + .unwrap(); + + assert!(validator.is_valid(&json!(42))); + assert!(!validator.is_valid(&json!(41))); + assert!(validator.is_valid(&json!("still string"))); + } + #[test] fn ecma_regex() { // See GH-230 diff --git a/crates/jsonschema/src/lib.rs b/crates/jsonschema/src/lib.rs index c27bee74..7139af99 100644 --- a/crates/jsonschema/src/lib.rs +++ b/crates/jsonschema/src/lib.rs @@ -765,11 +765,12 @@ //! //! JSON Schema allows for format validation through the `format` keyword. While `jsonschema` //! provides built-in validators for standard formats, you can also define custom format validators -//! for domain-specific string formats. +//! for domain-specific formats. //! //! To implement a custom format validator: //! -//! 1. Define a function or a closure that takes a `&str` and returns a `bool`. +//! 1. Define a function or a closure that takes either a `&str` (string-only) or a `&serde_json::Value` +//! (any instance type) and returns a `bool`. Use [`jsonschema::value_format`] to wrap the latter. //! 2. Register the function with `jsonschema::options().with_format()`. //! //! ```rust @@ -791,6 +792,10 @@ //! let validator = jsonschema::options() //! .with_format("ends-with-42", ends_with_42) //! .with_format("ends-with-43", |s| s.ends_with("43!")) +//! .with_format( +//! "is-answer", +//! jsonschema::value_format(|value| value == &json!(42)), +//! ) //! .should_validate_formats(true) //! .build(&schema)?; //! @@ -804,7 +809,9 @@ //! //! ### Notes on Custom Format Validators //! -//! - Custom format validators are only called for string instances. +//! - Validators that take a `&str` are only called for string instances. To validate other types, +//! wrap your function or closure with [`jsonschema::value_format`] so it can inspect the entire +//! `serde_json::Value`. //! - In newer drafts, `format` is purely an annotation and won’t do any checking unless you //! opt in by calling `.should_validate_formats(true)` on your options builder. If you omit //! it, all `format` keywords are ignored at validation time. @@ -913,7 +920,7 @@ pub use error::{ErrorIterator, MaskedValidationError, ValidationError, Validatio pub use evaluation::{ AnnotationEntry, ErrorEntry, Evaluation, FlagOutput, HierarchicalOutput, ListOutput, }; -pub use keywords::custom::Keyword; +pub use keywords::{custom::Keyword, format::value_format}; pub use options::{FancyRegex, PatternOptions, Regex, ValidationOptions}; pub use referencing::{ Draft, Error as ReferencingError, Registry, RegistryOptions, Resource, Retrieve, Uri, diff --git a/crates/jsonschema/src/options.rs b/crates/jsonschema/src/options.rs index e83f9102..24b028f6 100644 --- a/crates/jsonschema/src/options.rs +++ b/crates/jsonschema/src/options.rs @@ -5,7 +5,10 @@ use crate::{ DEFAULT_CONTENT_ENCODING_CHECKS_AND_CONVERTERS, }, content_media_type::{ContentMediaTypeCheckType, DEFAULT_CONTENT_MEDIA_TYPE_CHECKS}, - keywords::{custom::KeywordFactory, format::Format}, + keywords::{ + custom::KeywordFactory, + format::{Format, IntoFormat}, + }, paths::Location, retriever::DefaultRetriever, thread::ThreadBound, @@ -372,29 +375,40 @@ impl ValidationOptions { /// # Example /// /// ```rust - /// # use serde_json::json; + /// # use serde_json::{json, Value}; /// fn my_format(s: &str) -> bool { /// // Your awesome format check! /// s.ends_with("42!") /// } + /// fn my_value_format(value: &Value) -> bool { + /// value == &json!(1337) + /// } /// # fn foo() { - /// let schema = json!({"type": "string", "format": "custom"}); + /// let string_schema = json!({"type": "string", "format": "custom"}); + /// let number_schema = json!({"format": "leet-number"}); /// let validator = jsonschema::options() /// .with_format("custom", my_format) - /// .build(&schema) + /// .with_format("leet-number", jsonschema::value_format(my_value_format)) + /// .should_validate_formats(true) + /// .build(&json!({"allOf": [string_schema, number_schema]})) /// .expect("Valid schema"); /// /// assert!(!validator.is_valid(&json!("foo"))); /// assert!(validator.is_valid(&json!("foo42!"))); + /// assert!(!validator.is_valid(&json!(42))); + /// assert!(validator.is_valid(&json!(1337))); /// # } /// ``` + /// + /// Use [`jsonschema::value_format`] to wrap functions or closures that operate on the entire + /// [`serde_json::Value`]; string-based closures continue to work without any extra adapters. #[must_use] pub fn with_format(mut self, name: N, format: F) -> Self where N: Into, - F: Fn(&str) -> bool + ThreadBound + 'static, + F: IntoFormat, { - self.formats.insert(name.into(), Arc::new(format)); + self.formats.insert(name.into(), format.into_format()); self } pub(crate) fn get_format(&self, format: &str) -> Option<(&String, &Arc)> {