Skip to content

Commit 718073e

Browse files
committed
non-serde conversion
1 parent 95774f9 commit 718073e

File tree

12 files changed

+192
-48
lines changed

12 files changed

+192
-48
lines changed

src/bson.rs

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ use std::{
3131
use serde_json::{json, Value};
3232

3333
pub use crate::document::Document;
34-
use crate::{base64, oid, spec::ElementType, Binary, Decimal128};
34+
use crate::{base64, oid, raw::CString, spec::ElementType, Binary, Decimal128};
3535

3636
/// Possible BSON value types.
3737
#[derive(Clone, Default, PartialEq)]
@@ -480,14 +480,14 @@ impl Bson {
480480
Bson::Boolean(v) => json!(v),
481481
Bson::Null => Value::Null,
482482
Bson::RegularExpression(Regex { pattern, options }) => {
483-
let mut chars: Vec<_> = options.chars().collect();
483+
let mut chars: Vec<_> = options.as_str().chars().collect();
484484
chars.sort_unstable();
485485

486486
let options: String = chars.into_iter().collect();
487487

488488
json!({
489489
"$regularExpression": {
490-
"pattern": pattern,
490+
"pattern": pattern.into_string(),
491491
"options": options,
492492
}
493493
})
@@ -1147,7 +1147,7 @@ impl Timestamp {
11471147
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
11481148
pub struct Regex {
11491149
/// The regex pattern to match.
1150-
pub pattern: String,
1150+
pub pattern: CString,
11511151

11521152
/// The options for the regex.
11531153
///
@@ -1156,18 +1156,22 @@ pub struct Regex {
11561156
/// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent,
11571157
/// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match
11581158
/// unicode.
1159-
pub options: String,
1159+
pub options: CString,
11601160
}
11611161

11621162
impl Regex {
1163-
pub(crate) fn new(pattern: impl AsRef<str>, options: impl AsRef<str>) -> Self {
1163+
#[cfg(test)]
1164+
pub(crate) fn new(
1165+
pattern: impl AsRef<str>,
1166+
options: impl AsRef<str>,
1167+
) -> crate::error::Result<Self> {
11641168
let mut chars: Vec<_> = options.as_ref().chars().collect();
11651169
chars.sort_unstable();
11661170
let options: String = chars.into_iter().collect();
1167-
Self {
1168-
pattern: pattern.as_ref().to_string(),
1169-
options,
1170-
}
1171+
Ok(Self {
1172+
pattern: pattern.as_ref().to_string().try_into()?,
1173+
options: options.try_into()?,
1174+
})
11711175
}
11721176
}
11731177

src/raw.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ mod array;
116116
mod array_buf;
117117
mod bson;
118118
mod bson_ref;
119+
mod cstr;
119120
mod document;
120121
mod document_buf;
121122
mod iter;
@@ -142,6 +143,7 @@ pub use self::{
142143
RawJavaScriptCodeWithScopeRef,
143144
RawRegexRef,
144145
},
146+
cstr::{assert_valid_cstr, cstr, validate_cstr, CStr, CString, IsValidCStr},
145147
document::RawDocument,
146148
document_buf::{BindRawBsonRef, BindValue, RawDocumentBuf},
147149
iter::{RawElement, RawIter},

src/raw/bson.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -204,8 +204,8 @@ impl RawBson {
204204
pub fn as_regex(&self) -> Option<RawRegexRef<'_>> {
205205
match self {
206206
RawBson::RegularExpression(v) => Some(RawRegexRef {
207-
pattern: v.pattern.as_str(),
208-
options: v.options.as_str(),
207+
pattern: v.pattern.as_ref(),
208+
options: v.options.as_ref(),
209209
}),
210210
_ => None,
211211
}
@@ -289,8 +289,8 @@ impl RawBson {
289289
RawBson::Boolean(b) => RawBsonRef::Boolean(*b),
290290
RawBson::Null => RawBsonRef::Null,
291291
RawBson::RegularExpression(re) => RawBsonRef::RegularExpression(RawRegexRef {
292-
options: re.options.as_str(),
293-
pattern: re.pattern.as_str(),
292+
options: re.options.as_ref(),
293+
pattern: re.pattern.as_ref(),
294294
}),
295295
RawBson::JavaScriptCode(c) => RawBsonRef::JavaScriptCode(c.as_str()),
296296
RawBson::JavaScriptCodeWithScope(code_w_scope) => {

src/raw/bson_ref.rs

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ use std::convert::{TryFrom, TryInto};
33
use super::{bson::RawBson, Error, RawArray, RawDocument, Result};
44
use crate::{
55
oid::{self, ObjectId},
6-
raw::{write_cstring, write_string, RawJavaScriptCodeWithScope},
6+
raw::{write_string, CStr, RawJavaScriptCodeWithScope},
77
spec::{BinarySubtype, ElementType},
88
Binary,
99
Bson,
@@ -255,9 +255,10 @@ impl<'a> RawBsonRef<'a> {
255255
RawBsonRef::Document(d) => RawBson::Document(d.to_owned()),
256256
RawBsonRef::Boolean(b) => RawBson::Boolean(b),
257257
RawBsonRef::Null => RawBson::Null,
258-
RawBsonRef::RegularExpression(re) => {
259-
RawBson::RegularExpression(Regex::new(re.pattern, re.options))
260-
}
258+
RawBsonRef::RegularExpression(re) => RawBson::RegularExpression(Regex {
259+
pattern: re.pattern.into(),
260+
options: re.options.into(),
261+
}),
261262
RawBsonRef::JavaScriptCode(c) => RawBson::JavaScriptCode(c.to_owned()),
262263
RawBsonRef::JavaScriptCodeWithScope(c_w_s) => {
263264
RawBson::JavaScriptCodeWithScope(RawJavaScriptCodeWithScope {
@@ -306,8 +307,8 @@ impl<'a> RawBsonRef<'a> {
306307
Self::Document(raw_document) => dest.extend(raw_document.as_bytes()),
307308
Self::Boolean(b) => dest.push(b as u8),
308309
Self::RegularExpression(re) => {
309-
write_cstring(dest, re.pattern)?;
310-
write_cstring(dest, re.options)?;
310+
re.pattern.append_to(dest);
311+
re.options.append_to(dest);
311312
}
312313
Self::JavaScriptCode(js) => write_string(dest, js),
313314
Self::JavaScriptCodeWithScope(code_w_scope) => {
@@ -592,7 +593,7 @@ impl<'a> From<&'a Binary> for RawBsonRef<'a> {
592593
#[derive(Clone, Copy, Debug, PartialEq)]
593594
pub struct RawRegexRef<'a> {
594595
/// The regex pattern to match.
595-
pub pattern: &'a str,
596+
pub pattern: &'a CStr,
596597

597598
/// The options for the regex.
598599
///
@@ -601,7 +602,7 @@ pub struct RawRegexRef<'a> {
601602
/// multiline matching, 'x' for verbose mode, 'l' to make \w, \W, etc. locale dependent,
602603
/// 's' for dotall mode ('.' matches everything), and 'u' to make \w, \W, etc. match
603604
/// unicode.
604-
pub options: &'a str,
605+
pub options: &'a CStr,
605606
}
606607

607608
#[cfg(feature = "serde")]

src/raw/cstr.rs

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
use core::str;
2+
3+
use crate::error::{Error, Result};
4+
5+
// A BSON-spec cstring: Zero or more UTF-8 encoded characters, excluding the null byte.
6+
#[derive(Debug)]
7+
#[repr(transparent)]
8+
pub struct CStr {
9+
data: [u8],
10+
}
11+
12+
impl<'a> TryFrom<&'a str> for &'a CStr {
13+
type Error = Error;
14+
15+
fn try_from(value: &str) -> Result<&CStr> {
16+
match validate_cstr(value) {
17+
Some(cs) => Ok(cs),
18+
None => Err(Error::malformed_bytes(format!(
19+
"cstring with interior null: {:?}",
20+
value,
21+
))),
22+
}
23+
}
24+
}
25+
26+
impl CStr {
27+
const fn from_str_unchecked(value: &str) -> &Self {
28+
// Safety: the conversion is safe because CStr is repr(transparent), and the deref is safe
29+
// because the pointer came from a safe reference.
30+
unsafe { &*(value.as_bytes() as *const [u8] as *const CStr) }
31+
}
32+
33+
pub fn as_str(&self) -> &str {
34+
// Safety: the only way to constrct a CStr is from a valid &str.
35+
unsafe { str::from_utf8_unchecked(&self.data) }
36+
}
37+
38+
pub fn len(&self) -> usize {
39+
self.as_str().len()
40+
}
41+
42+
pub(crate) fn append_to(&self, buf: &mut Vec<u8>) {
43+
buf.extend(&self.data);
44+
buf.push(0);
45+
}
46+
}
47+
48+
impl<'a, 'b> PartialEq<&'b CStr> for &'a CStr {
49+
fn eq(&self, other: &&CStr) -> bool {
50+
self.as_str() == other.as_str()
51+
}
52+
}
53+
54+
#[diagnostic::on_unimplemented(message = "the string literal contains a zero byte")]
55+
pub trait ValidCStr {}
56+
pub struct IsValidCStr<const VALID: bool>;
57+
impl ValidCStr for IsValidCStr<true> {}
58+
59+
#[derive(Clone, Eq, PartialEq, Hash)]
60+
#[repr(transparent)]
61+
pub struct CString {
62+
data: String,
63+
}
64+
65+
pub const fn validate_cstr(text: &str) -> Option<&CStr> {
66+
let bytes = text.as_bytes();
67+
let mut i = 0;
68+
while i < bytes.len() {
69+
if bytes[i] == 0 {
70+
return None;
71+
}
72+
i += 1;
73+
}
74+
Some(CStr::from_str_unchecked(text))
75+
}
76+
pub const fn assert_valid_cstr<T: ValidCStr>() {}
77+
78+
#[macro_export]
79+
macro_rules! cstr {
80+
($text:expr) => {{
81+
const VALIDATED: Option<&$crate::raw::CStr> = $crate::raw::validate_cstr($text);
82+
const VALID: bool = VALIDATED.is_some();
83+
$crate::raw::assert_valid_cstr::<$crate::raw::IsValidCStr<VALID>>();
84+
VALIDATED.unwrap()
85+
}};
86+
}
87+
pub use cstr;
88+
89+
impl TryFrom<String> for CString {
90+
type Error = Error;
91+
92+
fn try_from(data: String) -> Result<Self> {
93+
let _: &CStr = data.as_str().try_into()?;
94+
Ok(Self { data })
95+
}
96+
}
97+
98+
impl CString {
99+
pub fn into_string(self) -> String {
100+
self.data
101+
}
102+
103+
pub fn as_str(&self) -> &str {
104+
self.as_ref().as_str()
105+
}
106+
}
107+
108+
impl From<&CStr> for CString {
109+
fn from(value: &CStr) -> Self {
110+
Self {
111+
data: value.as_str().into(),
112+
}
113+
}
114+
}
115+
116+
impl AsRef<CStr> for CString {
117+
fn as_ref(&self) -> &CStr {
118+
CStr::from_str_unchecked(self.data.as_str())
119+
}
120+
}
121+
122+
impl std::fmt::Debug for CString {
123+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
124+
self.data.fmt(f)
125+
}
126+
}
127+
128+
impl std::fmt::Display for CString {
129+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
130+
self.data.fmt(f)
131+
}
132+
}

src/raw/document.rs

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use std::{
55

66
use crate::{
77
error::{Error, Result},
8+
raw::CStr,
89
Bson,
910
DateTime,
1011
JavaScriptCodeWithScope,
@@ -505,9 +506,10 @@ impl RawDocument {
505506
}
506507
}
507508

508-
pub(crate) fn read_cstring_at(&self, start_at: usize) -> RawResult<&str> {
509+
pub(crate) fn read_cstring_at(&self, start_at: usize) -> RawResult<&CStr> {
509510
let bytes = self.cstring_bytes_at(start_at)?;
510-
try_to_str(bytes)
511+
let s = try_to_str(bytes)?;
512+
s.try_into()
511513
}
512514

513515
/// Copy this into a [`Document`], returning an error if invalid BSON is encountered.

src/raw/iter.rs

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use std::convert::TryInto;
22

33
use crate::{
44
oid::ObjectId,
5-
raw::{Error, Result, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE},
5+
raw::{CStr, Error, Result, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE},
66
spec::{BinarySubtype, ElementType},
77
Bson,
88
DateTime,
@@ -50,7 +50,7 @@ impl<'a> Iterator for Iter<'a> {
5050
match self.inner.next() {
5151
Some(Ok(elem)) => match elem.value() {
5252
Err(e) => Some(Err(e)),
53-
Ok(value) => Some(Ok((elem.key, value))),
53+
Ok(value) => Some(Ok((elem.key.as_str(), value))),
5454
},
5555
Some(Err(e)) => Some(Err(e)),
5656
None => None,
@@ -111,7 +111,7 @@ impl<'a> RawIter<'a> {
111111

112112
#[derive(Clone)]
113113
pub struct RawElement<'a> {
114-
key: &'a str,
114+
key: &'a CStr,
115115
kind: ElementType,
116116
doc: &'a RawDocument,
117117
start_at: usize,
@@ -160,7 +160,7 @@ impl<'a> RawElement<'a> {
160160
}
161161

162162
pub fn key(&self) -> &'a str {
163-
self.key
163+
self.key.as_str()
164164
}
165165

166166
pub fn element_type(&self) -> ElementType {
@@ -305,19 +305,20 @@ impl<'a> RawElement<'a> {
305305
String::from_utf8_lossy(self.doc.cstring_bytes_at(self.start_at)?).into_owned();
306306
let pattern_len = pattern.len();
307307
Utf8LossyBson::RegularExpression(crate::Regex {
308-
pattern,
308+
pattern: pattern.try_into()?,
309309
options: String::from_utf8_lossy(
310310
self.doc.cstring_bytes_at(self.start_at + pattern_len + 1)?,
311311
)
312-
.into_owned(),
312+
.into_owned()
313+
.try_into()?,
313314
})
314315
}
315316
_ => return Ok(None),
316317
}))
317318
}
318319

319320
fn malformed_error(&self, e: impl ToString) -> Error {
320-
Error::malformed_bytes(e).with_key(self.key)
321+
Error::malformed_bytes(e).with_key(self.key.as_str())
321322
}
322323

323324
pub(crate) fn slice(&self) -> &'a [u8] {
@@ -344,7 +345,7 @@ impl<'a> RawElement<'a> {
344345
Ok(ObjectId::from_bytes(
345346
self.doc.as_bytes()[start_at..(start_at + 12)]
346347
.try_into()
347-
.map_err(|e| Error::malformed_bytes(e).with_key(self.key))?,
348+
.map_err(|e| Error::malformed_bytes(e).with_key(self.key.as_str()))?,
348349
))
349350
}
350351
}
@@ -443,7 +444,7 @@ impl<'a> Iterator for RawIter<'a> {
443444
}),
444445
Err(error) => {
445446
self.valid = false;
446-
Err(error.with_key(key))
447+
Err(error.with_key(key.as_str()))
447448
}
448449
})
449450
}

0 commit comments

Comments
 (0)