Skip to content

Commit e09095b

Browse files
authored
Fix validity checks for char (#3853)
Ensure value validity checks for `char` in Kani matches the Rust documentation: > A char value must not be a surrogate (i.e., must not be in the range 0xD800..=0xDFFF) and must be equal to or less than char::MAX. The existing code was relying on the compiler ABI information, which can only express one continuous value range for value validity. But `char` is a special case, and the Rust compiler understands the gap in valid `char`. This change makes Kani compiler aware of this gap too. Resolves #3241 By submitting this pull request, I confirm that my contribution is made under the terms of the Apache 2.0 and MIT licenses.
1 parent 3e93311 commit e09095b

File tree

3 files changed

+255
-24
lines changed

3 files changed

+255
-24
lines changed

kani-compiler/src/kani_middle/transform/check_values.rs

Lines changed: 106 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ use stable_mir::mir::{
3232
Statement, StatementKind, Terminator, TerminatorKind,
3333
};
3434
use stable_mir::target::{MachineInfo, MachineSize};
35-
use stable_mir::ty::{AdtKind, IndexedVal, MirConst, RigidTy, Ty, TyKind, UintTy};
35+
use stable_mir::ty::{AdtKind, IndexedVal, MirConst, RigidTy, Span, Ty, TyKind, UintTy};
3636
use std::fmt::Debug;
3737
use strum_macros::AsRefStr;
3838
use tracing::{debug, trace};
@@ -164,7 +164,18 @@ pub struct ValidValueReq {
164164
/// Size of this requirement.
165165
size: MachineSize,
166166
/// The range restriction is represented by a Scalar.
167-
valid_range: WrappingRange,
167+
valid_range: ValidityRange,
168+
}
169+
170+
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
171+
enum ValidityRange {
172+
/// The value validity fits in a single value range.
173+
/// This includes cases where the full range is covered.
174+
Single(WrappingRange),
175+
/// The validity includes more than one value range.
176+
/// Currently, this is only the case for `char`, which has two ranges.
177+
/// If more cases come up, we could turn this into a vector instead.
178+
Multiple([WrappingRange; 2]),
168179
}
169180

170181
// TODO: Optimize checks by merging requirements whenever possible.
@@ -180,44 +191,88 @@ impl ValidValueReq {
180191
/// It's not possible to define a `rustc_layout_scalar_valid_range_*` to any other structure.
181192
/// Note that this annotation only applies to the first scalar in the layout.
182193
pub fn try_from_ty(machine_info: &MachineInfo, ty: Ty) -> Option<ValidValueReq> {
183-
let shape = ty.layout().unwrap().shape();
184-
match shape.abi {
185-
ValueAbi::Scalar(Scalar::Initialized { value, valid_range })
186-
| ValueAbi::ScalarPair(Scalar::Initialized { value, valid_range }, _) => {
187-
Some(ValidValueReq { offset: 0, size: value.size(machine_info), valid_range })
194+
if ty.kind().is_char() {
195+
Some(ValidValueReq {
196+
offset: 0,
197+
size: MachineSize::from_bits(size_of::<char>() * 8),
198+
valid_range: ValidityRange::Multiple([
199+
WrappingRange { start: 0, end: 0xD7FF },
200+
WrappingRange { start: 0xE000, end: char::MAX.into() },
201+
]),
202+
})
203+
} else {
204+
let shape = ty.layout().unwrap().shape();
205+
match shape.abi {
206+
ValueAbi::Scalar(Scalar::Initialized { value, valid_range })
207+
| ValueAbi::ScalarPair(Scalar::Initialized { value, valid_range }, _) => {
208+
Some(ValidValueReq {
209+
offset: 0,
210+
size: value.size(machine_info),
211+
valid_range: ValidityRange::Single(valid_range),
212+
})
213+
}
214+
ValueAbi::Scalar(_)
215+
| ValueAbi::ScalarPair(_, _)
216+
| ValueAbi::Uninhabited
217+
| ValueAbi::Vector { .. }
218+
| ValueAbi::Aggregate { .. } => None,
188219
}
189-
ValueAbi::Scalar(_)
190-
| ValueAbi::ScalarPair(_, _)
191-
| ValueAbi::Uninhabited
192-
| ValueAbi::Vector { .. }
193-
| ValueAbi::Aggregate { .. } => None,
194220
}
195221
}
196222

197223
/// Check if range is full.
198224
pub fn is_full(&self) -> bool {
199-
self.valid_range.is_full(self.size).unwrap()
225+
if let ValidityRange::Single(valid_range) = self.valid_range {
226+
valid_range.is_full(self.size).unwrap()
227+
} else {
228+
false
229+
}
200230
}
201231

202232
/// Check if this range contains `other` range.
203233
///
204234
/// I.e., `scalar_2` ⊆ `scalar_1`
205235
pub fn contains(&self, other: &ValidValueReq) -> bool {
206236
assert_eq!(self.size, other.size);
207-
match (self.valid_range.wraps_around(), other.valid_range.wraps_around()) {
208-
(true, true) | (false, false) => {
209-
self.valid_range.start <= other.valid_range.start
210-
&& self.valid_range.end >= other.valid_range.end
237+
match (&self.valid_range, &other.valid_range) {
238+
(ValidityRange::Single(this_range), ValidityRange::Single(other_range)) => {
239+
range_contains(this_range, other_range, self.size)
240+
}
241+
(ValidityRange::Multiple(this_ranges), ValidityRange::Single(other_range)) => {
242+
range_contains(&this_ranges[0], other_range, self.size)
243+
|| range_contains(&this_ranges[1], other_range, self.size)
244+
}
245+
(ValidityRange::Single(this_range), ValidityRange::Multiple(other_ranges)) => {
246+
range_contains(this_range, &other_ranges[0], self.size)
247+
&& range_contains(this_range, &other_ranges[1], self.size)
211248
}
212-
(true, false) => {
213-
self.valid_range.start <= other.valid_range.start
214-
|| self.valid_range.end >= other.valid_range.end
249+
(ValidityRange::Multiple(this_ranges), ValidityRange::Multiple(other_ranges)) => {
250+
let contains = (range_contains(&this_ranges[0], &other_ranges[0], self.size)
251+
|| range_contains(&this_ranges[1], &other_ranges[0], self.size))
252+
&& (range_contains(&this_ranges[0], &other_ranges[1], self.size)
253+
|| range_contains(&this_ranges[1], &other_ranges[1], self.size));
254+
// Multiple today only cover `char` case.
255+
debug_assert!(
256+
contains,
257+
"Expected validity of `char` for Multiple ranges. Found: {self:?}, {other:?}"
258+
);
259+
contains
215260
}
216-
(false, true) => self.is_full(),
217261
}
218262
}
219263
}
220264

265+
/// Check if range `r1` contains range `r2`.
266+
///
267+
/// I.e., `r2` ⊆ `r1`
268+
fn range_contains(r1: &WrappingRange, r2: &WrappingRange, sz: MachineSize) -> bool {
269+
match (r1.wraps_around(), r2.wraps_around()) {
270+
(true, true) | (false, false) => r1.start <= r2.start && r1.end >= r2.end,
271+
(true, false) => r1.start <= r2.start || r1.end >= r2.end,
272+
(false, true) => r1.is_full(sz).unwrap(),
273+
}
274+
}
275+
221276
#[derive(AsRefStr, Clone, Debug)]
222277
enum SourceOp {
223278
/// Validity checks are done on a byte level when the Rvalue can generate invalid value.
@@ -763,8 +818,6 @@ pub fn build_limits(
763818
let span = source.span(body.blocks());
764819
debug!(?req, ?rvalue_ptr, ?span, "build_limits");
765820
let primitive_ty = uint_ty(req.size.bytes());
766-
let start_const = body.new_uint_operand(req.valid_range.start, primitive_ty, span);
767-
let end_const = body.new_uint_operand(req.valid_range.end, primitive_ty, span);
768821
let orig_ptr = if req.offset != 0 {
769822
let start_ptr =
770823
move_local(body.insert_assignment(rvalue_ptr, source, InsertPosition::Before));
@@ -799,6 +852,35 @@ pub fn build_limits(
799852
InsertPosition::Before,
800853
);
801854
let value = Operand::Copy(Place { local: value_ptr, projection: vec![ProjectionElem::Deref] });
855+
match &req.valid_range {
856+
ValidityRange::Single(range) => {
857+
build_single_limit(body, range, source, span, primitive_ty, value)
858+
}
859+
ValidityRange::Multiple([range1, range2]) => {
860+
// Build `let valid = range1.contains(value) || range2.contains(value);
861+
let cond1 = build_single_limit(body, range1, source, span, primitive_ty, value.clone());
862+
let cond2 = build_single_limit(body, range2, source, span, primitive_ty, value);
863+
body.insert_binary_op(
864+
BinOp::BitOr,
865+
move_local(cond1),
866+
move_local(cond2),
867+
source,
868+
InsertPosition::Before,
869+
)
870+
}
871+
}
872+
}
873+
874+
fn build_single_limit(
875+
body: &mut MutableBody,
876+
range: &WrappingRange,
877+
source: &mut SourceInstruction,
878+
span: Span,
879+
primitive_ty: UintTy,
880+
value: Operand,
881+
) -> Local {
882+
let start_const = body.new_uint_operand(range.start, primitive_ty, span);
883+
let end_const = body.new_uint_operand(range.end, primitive_ty, span);
802884
let start_result = body.insert_binary_op(
803885
BinOp::Ge,
804886
value.clone(),
@@ -808,7 +890,7 @@ pub fn build_limits(
808890
);
809891
let end_result =
810892
body.insert_binary_op(BinOp::Le, value, end_const, source, InsertPosition::Before);
811-
if req.valid_range.wraps_around() {
893+
if range.wraps_around() {
812894
// valid >= start || valid <= end
813895
body.insert_binary_op(
814896
BinOp::BitOr,
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
Checking harness check_invalid_char_unit_wrapper_should_fail...
2+
- Status: UNREACHABLE\
3+
- Description: ""Unreachable code: Expected invalid char wrapper detection""
4+
Failed Checks: Undefined Behavior: Invalid value of type `TwoFields<char, ()>`
5+
Failed Checks: Undefined Behavior: Invalid value of type `TwoFields<(), char>`
6+
VERIFICATION:- FAILED
7+
8+
Checking harness check_invalid_char_nonzero_wrapper_should_fail...
9+
- Status: UNREACHABLE\
10+
- Description: ""Unreachable code: Expected invalid char / NonZero detection""
11+
Failed Checks: Undefined Behavior: Invalid value of type `TwoFields<char, std::num::NonZero<u32>>`
12+
VERIFICATION:- FAILED
13+
14+
Checking harness check_invalid_char_should_fail...
15+
- Status: UNREACHABLE\
16+
- Description: ""Unreachable code: Expected invalid char detection""
17+
Failed Checks: Undefined Behavior: Invalid value of type `char`
18+
VERIFICATION:- FAILED
19+
20+
21+
Checking harness check_valid_mixed_wrapper...
22+
VERIFICATION:- SUCCESSFUL
23+
24+
Checking harness check_valid_char_wrappers...
25+
VERIFICATION:- SUCCESSFUL
26+
27+
Checking harness cannot_dereference_invalid_char...
28+
VERIFICATION:- SUCCESSFUL
29+
30+
Checking harness check_char_ok...
31+
VERIFICATION:- SUCCESSFUL
32+
33+
Summary:
34+
Verification failed for - check_invalid_char_unit_wrapper_should_fail
35+
Verification failed for - check_invalid_char_nonzero_wrapper_should_fail
36+
Verification failed for - check_invalid_char_should_fail
37+
Complete - 4 successfully verified harnesses, 3 failures, 7 total.
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
// Copyright Kani Contributors
2+
// SPDX-License-Identifier: Apache-2.0 OR MIT
3+
// kani-flags: -Z valid-value-checks -Z mem-predicates
4+
//! Check that Kani can correctly identify value validity of `char` and structures with `char`.
5+
//! Note that we use `black_box` hint to ensure the logic doesn't get removed as dead code.
6+
7+
use std::num::NonZeroU32;
8+
9+
#[repr(C)]
10+
#[derive(Copy, Clone, kani::Arbitrary)]
11+
struct OneField<T>(T);
12+
13+
#[repr(C)]
14+
#[derive(Copy, Clone, kani::Arbitrary)]
15+
struct TwoFields<T, U>(T, U);
16+
17+
#[repr(C)]
18+
#[derive(Copy, Clone, kani::Arbitrary)]
19+
struct ThreeFields<T, U, V>(T, U, V);
20+
21+
/// Check that valid u32's are all identified as valid.
22+
#[kani::proof]
23+
fn check_char_ok() {
24+
let val = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
25+
assert!(kani::mem::can_dereference(&val as *const _ as *const char));
26+
let c1: char = unsafe { std::mem::transmute(val) };
27+
let c2 = unsafe { char::from_u32_unchecked(val) };
28+
let c3 = char::from_u32(val).unwrap();
29+
assert_eq!(c1, c2);
30+
assert_eq!(c2, c3);
31+
}
32+
33+
/// Check that all invalid u32's identified as invalid.
34+
#[kani::proof]
35+
fn cannot_dereference_invalid_char() {
36+
let val = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
37+
assert!(!kani::mem::can_dereference(&val as *const _ as *const char));
38+
}
39+
40+
/// Check that transmuting from invalid u32's trigger a UB check.
41+
#[kani::proof]
42+
fn check_invalid_char_should_fail() {
43+
let val = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
44+
let _ = if kani::any() {
45+
unsafe { char::from_u32_unchecked(val) }
46+
} else {
47+
unsafe { std::mem::transmute(val) }
48+
};
49+
assert!(false, "Unreachable code: Expected invalid char detection");
50+
}
51+
52+
#[kani::proof]
53+
fn check_valid_char_wrappers() {
54+
let v1 = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
55+
let v2 = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
56+
let v3 = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
57+
assert!(kani::mem::can_dereference(&OneField(v1) as *const _ as *const OneField<char>));
58+
assert!(kani::mem::can_dereference(
59+
&TwoFields(v1, v2) as *const _ as *const TwoFields<char, char>
60+
));
61+
assert!(kani::mem::can_dereference(
62+
&ThreeFields(v1, v2, v3) as *const _ as *const ThreeFields<char, char, char>
63+
));
64+
}
65+
66+
/// Ensure that we correctly identify validity of a structure with fields with different
67+
/// requirements.
68+
#[kani::proof]
69+
fn check_valid_mixed_wrapper() {
70+
let unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
71+
let non_zero = kani::any_where(|v: &u32| *v != 0);
72+
assert!(kani::mem::can_dereference(
73+
&TwoFields(unicode, non_zero) as *const _ as *const TwoFields<char, NonZeroU32>
74+
));
75+
assert!(kani::mem::can_dereference(
76+
&TwoFields(non_zero, unicode) as *const _ as *const TwoFields<NonZeroU32, char>
77+
));
78+
assert!(kani::mem::can_dereference(
79+
&TwoFields((), unicode) as *const _ as *const TwoFields<(), char>
80+
));
81+
}
82+
83+
/// Check that transmuting from invalid wrappers trigger UB check failure.
84+
#[kani::proof]
85+
fn check_invalid_char_nonzero_wrapper_should_fail() {
86+
let unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_some());
87+
let non_unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
88+
let non_zero = kani::any_where(|v: &u32| *v != 0);
89+
let var: TwoFields<char, NonZeroU32> = if kani::any() {
90+
unsafe { std::mem::transmute(TwoFields(non_unicode, non_zero)) }
91+
} else {
92+
unsafe { std::mem::transmute(TwoFields(unicode, 0)) }
93+
};
94+
// Ensure the condition above does not get pruned.
95+
std::hint::black_box(var);
96+
assert!(false, "Unreachable code: Expected invalid char / NonZero detection");
97+
}
98+
99+
/// Check that transmuting from invalid wrappers trigger UB check failure independent
100+
/// on the position of the unit field.
101+
#[kani::proof]
102+
fn check_invalid_char_unit_wrapper_should_fail() {
103+
let non_unicode = kani::any_where(|v: &u32| char::from_u32(*v).is_none());
104+
if kani::any() {
105+
let var: TwoFields<char, ()> = unsafe { std::mem::transmute(TwoFields(non_unicode, ())) };
106+
std::hint::black_box(var);
107+
} else {
108+
let var: TwoFields<(), char> = unsafe { std::mem::transmute(TwoFields((), non_unicode)) };
109+
std::hint::black_box(var);
110+
}
111+
assert!(false, "Unreachable code: Expected invalid char wrapper detection");
112+
}

0 commit comments

Comments
 (0)