Skip to content

Commit c1de3f7

Browse files
bors[bot]lassade
andauthored
Merge #161
161: expose the GodotString char slice via chars_checked and chars_unchecked r=Bromeon a=lassade Closes #152 Co-authored-by: Felipe Jorge <[email protected]>
2 parents 7a0415f + 4c2723a commit c1de3f7

File tree

3 files changed

+212
-1
lines changed

3 files changed

+212
-1
lines changed

godot-core/src/builtin/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ mod packed_array;
9393
mod projection;
9494
mod quaternion;
9595
mod string;
96+
mod string_chars;
9697
mod string_name;
9798
mod transform2d;
9899
mod transform3d;

godot-core/src/builtin/string.rs

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,10 @@ use godot_ffi as sys;
1010
use sys::types::OpaqueString;
1111
use sys::{ffi_methods, interface_fn, GodotFfi};
1212

13-
use super::{FromVariant, ToVariant, Variant, VariantConversionError};
13+
use super::{
14+
string_chars::validate_unicode_scalar_sequence, FromVariant, ToVariant, Variant,
15+
VariantConversionError,
16+
};
1417

1518
#[repr(C, align(8))]
1619
pub struct GodotString {
@@ -34,6 +37,35 @@ impl GodotString {
3437
fn string_sys = sys;
3538
fn write_string_sys = write_sys;
3639
}
40+
41+
/// Gets the internal chars slice from a [`GodotString`].
42+
///
43+
/// Note: This operation is *O*(*n*). Consider using [`chars_unchecked`]
44+
/// if you can make sure the string is a valid UTF-32.
45+
pub fn chars_checked(&self) -> &[char] {
46+
unsafe {
47+
let s = self.string_sys();
48+
let len = interface_fn!(string_to_utf32_chars)(s, std::ptr::null_mut(), 0);
49+
let ptr = interface_fn!(string_operator_index_const)(s, 0);
50+
51+
validate_unicode_scalar_sequence(std::slice::from_raw_parts(ptr, len as usize))
52+
.expect("GodotString::chars_checked: string contains invalid unicode scalar values")
53+
}
54+
}
55+
56+
/// Gets the internal chars slice from a [`GodotString`].
57+
///
58+
/// # Safety
59+
///
60+
/// Make sure the string only contains valid unicode scalar values, currently
61+
/// Godot allows for unpaired surrogates and out of range code points to be appended
62+
/// into the string.
63+
pub unsafe fn chars_unchecked(&self) -> &[char] {
64+
let s = self.string_sys();
65+
let len = interface_fn!(string_to_utf32_chars)(s, std::ptr::null_mut(), 0);
66+
let ptr = interface_fn!(string_operator_index_const)(s, 0);
67+
std::slice::from_raw_parts(ptr as *const char, len as usize)
68+
}
3769
}
3870

3971
impl GodotFfi for GodotString {
Lines changed: 178 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,178 @@
1+
/*
2+
* This Source Code Form is subject to the terms of the Mozilla Public
3+
* License, v. 2.0. If a copy of the MPL was not distributed with this
4+
* file, You can obtain one at https://mozilla.org/MPL/2.0/.
5+
*/
6+
7+
#[cfg(target_arch = "aarch64")]
8+
use std::arch::aarch64::*;
9+
#[cfg(target_arch = "x86")]
10+
use std::arch::x86::*;
11+
#[cfg(target_arch = "x86_64")]
12+
use std::arch::x86_64::*;
13+
14+
/// Validates is a [`u32`] slice contains only valid [unicode scalar values](https://www.unicode.org/glossary/#unicode_scalar_value)
15+
pub fn validate_unicode_scalar_sequence(seq: &[u32]) -> Option<&[char]> {
16+
unsafe {
17+
let mut ptr = seq.as_ptr();
18+
let ptr_end = seq.as_ptr().add(seq.len());
19+
20+
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
21+
loop {
22+
let ptr_next = ptr.add(4);
23+
if ptr_next > ptr_end {
24+
break;
25+
}
26+
27+
let block = _mm_loadu_si128(ptr as *const __m128i);
28+
29+
// check if has any character greater than `char::MAX` or less than 0, (SSE2 uses signed math)
30+
if _mm_movemask_epi8(_mm_and_si128(
31+
_mm_cmpgt_epi32(block, _mm_set1_epi32(-1)),
32+
_mm_cmplt_epi32(block, _mm_set1_epi32(char::MAX as i32 + 1)),
33+
)) != 0xFFFF
34+
{
35+
return None;
36+
}
37+
38+
// check if has any high-surrogate and low-surrogate code points
39+
if _mm_testz_si128(
40+
_mm_cmpgt_epi32(block, _mm_set1_epi32(0xD7FF)),
41+
_mm_cmplt_epi32(block, _mm_set1_epi32(0xE000)),
42+
) == 0
43+
{
44+
return None;
45+
}
46+
47+
ptr = ptr_next;
48+
}
49+
50+
// still untested but it should work
51+
#[cfg(target_arch = "aarch64")]
52+
loop {
53+
let ptr_next = ptr.add(4);
54+
if ptr_next > ptr_end {
55+
break;
56+
}
57+
58+
let block = uint32x4_t::load_unaligned(ptr as *const u32);
59+
60+
// check if has any character bigger than `char::MAX`
61+
if (vqmovltq_u32(block, vdupq_n_u32(char::MAX as u32))).any() {
62+
return None;
63+
}
64+
65+
// check if has any high-surrogate and low-surrogate code points
66+
if !vandq_u32(
67+
vcgtq_u32(block, vdupq_n_u32(0xD7FF)),
68+
vcltq_u32(block, vdupq_n_u32(0xE000)),
69+
)
70+
.is_zero()
71+
{
72+
return None;
73+
}
74+
75+
ptr = ptr_next;
76+
}
77+
78+
loop {
79+
if ptr >= ptr_end {
80+
break;
81+
}
82+
83+
char::from_u32(*ptr)?;
84+
85+
ptr = ptr.add(1);
86+
}
87+
88+
Some(std::slice::from_raw_parts(
89+
seq.as_ptr() as *const char,
90+
seq.len(),
91+
))
92+
}
93+
}
94+
95+
#[cfg(test)]
96+
mod tests {
97+
// simple random pseudorandom number generator using the linear congruential method
98+
struct Rand {
99+
state: u64,
100+
}
101+
102+
impl Rand {
103+
const A: u64 = 6364136223846793005;
104+
const C: u64 = 1442695040888963407;
105+
106+
fn new(seed: u64) -> Self {
107+
Self { state: seed }
108+
}
109+
110+
fn next(&mut self) -> u32 {
111+
self.state = Self::A.wrapping_mul(self.state).wrapping_add(Self::C);
112+
self.state as u32
113+
}
114+
}
115+
116+
#[test]
117+
fn check_valid_unicode() {
118+
let mut rand = Rand::new(0xA102FE1);
119+
for _ in 0..16 {
120+
let len = (rand.next() % 128).min(80);
121+
let chars: Vec<u32> = (0..len)
122+
.map(|_| rand.next() % (char::MAX as u32))
123+
.filter_map(char::from_u32)
124+
.map(|x| x as u32)
125+
.collect();
126+
127+
assert!(!chars.is_empty());
128+
129+
assert!(super::validate_unicode_scalar_sequence(chars.as_slice()).is_some());
130+
}
131+
}
132+
133+
#[test]
134+
fn check_unpaired_surrogate_unicode() {
135+
let mut rand = Rand::new(0xA102FE1);
136+
for _ in 0..16 {
137+
let len = (rand.next() % 128).min(80);
138+
let mut chars: Vec<u32> = (0..len)
139+
.map(|_| rand.next() % char::MAX as u32)
140+
.filter_map(char::from_u32)
141+
.map(|x| x as u32)
142+
.collect();
143+
144+
assert!(!chars.is_empty());
145+
146+
for _ in 0..4 {
147+
let surrogate = rand.next() % (0xE000 - 0xD800) + 0xD800;
148+
assert!(char::from_u32(surrogate).is_none());
149+
chars.insert(rand.next() as usize % chars.len(), surrogate);
150+
}
151+
152+
assert!(super::validate_unicode_scalar_sequence(chars.as_slice()).is_none());
153+
}
154+
}
155+
156+
#[test]
157+
fn check_out_of_range_unicode() {
158+
let mut rand = Rand::new(0xA102FE1);
159+
for _ in 0..16 {
160+
let len = (rand.next() % 128).min(80);
161+
let mut chars: Vec<u32> = (0..len)
162+
.map(|_| rand.next() % char::MAX as u32)
163+
.filter_map(char::from_u32)
164+
.map(|x| x as u32)
165+
.collect();
166+
167+
assert!(!chars.is_empty());
168+
169+
for _ in 0..4 {
170+
let out_of_range = rand.next() % (u32::MAX - char::MAX as u32) + char::MAX as u32;
171+
assert!(char::from_u32(out_of_range).is_none());
172+
chars.insert(rand.next() as usize % chars.len(), out_of_range);
173+
}
174+
175+
assert!(super::validate_unicode_scalar_sequence(chars.as_slice()).is_none());
176+
}
177+
}
178+
}

0 commit comments

Comments
 (0)