diff --git a/.rustfmt.toml b/.rustfmt.toml new file mode 100644 index 0000000..250124b --- /dev/null +++ b/.rustfmt.toml @@ -0,0 +1,5 @@ +unstable_features = true + +use_small_heuristics = "max" +imports_granularity = "Module" +group_imports = "StdExternalCrate" diff --git a/Cargo.toml b/Cargo.toml index 6d2995d..2450588 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,11 +11,10 @@ readme = "README.md" default-run = "ojc" [dependencies] -packed_simd = { version = "0.3.3", optional = true } [dev-dependencies] snap = "1.0.0" # for the lib.rs example [features] default = [] -nightly = ["packed_simd"] +nightly = [] diff --git a/README.md b/README.md index 3781848..17e1fb3 100644 --- a/README.md +++ b/README.md @@ -42,7 +42,7 @@ sys 0m12.628s ### `ojc` with SIMD -How many times does it takes to `ojc` already? 56s, that can't be true, we are in 2020... +How many times does it takes to `ojc` already? 56s, that can't be true, we are in 2022... What about enabling some SIMD optimizations? Compile the binary with the `nightly` feature and here we go! ```bash diff --git a/src/bin/ojc.rs b/src/bin/ojc.rs index b306cad..052d85f 100644 --- a/src/bin/ojc.rs +++ b/src/bin/ojc.rs @@ -1,4 +1,5 @@ use std::io; + use oxidized_json_checker::JsonChecker; fn fmain() -> io::Result<()> { diff --git a/src/internals.rs b/src/internals.rs index 9636bf3..75f7862 100644 --- a/src/internals.rs +++ b/src/internals.rs @@ -20,7 +20,7 @@ pub enum Class { CPlus, // + CMinus, // - CPoint, // . - CZero , // 0 + CZero, // 0 CDigit, // 123456789 CLowA, // a CLowB, // b @@ -43,6 +43,7 @@ pub enum Class { /// This array maps the 128 ASCII characters into character classes. /// The remaining Unicode characters should be mapped to C_ETC. /// Non-whitespace control characters are errors. +#[rustfmt::skip] pub const ASCII_CLASS: [Class; 128] = [ ___, ___, ___, ___, ___, ___, ___, ___, ___, CWhite, CWhite, ___, ___, CWhite, ___, ___, @@ -111,6 +112,7 @@ pub enum State { } // Number of states by number of classes +#[rustfmt::skip] pub const STATE_TRANSITION_TABLE: [[State; 31]; 31] = [ /* The state transition table takes the current state and the current symbol, diff --git a/src/lib.rs b/src/lib.rs index 0008da2..cdcff12 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -76,13 +76,15 @@ //! ``` //! +#![cfg_attr(feature = "nightly", feature(portable_simd))] + use std::{fmt, io}; -use crate::internals::{State, Class, Mode}; -use crate::internals::{STATE_TRANSITION_TABLE, ASCII_CLASS}; +use crate::internals::{Class, Mode, State, ASCII_CLASS, STATE_TRANSITION_TABLE}; + +mod internals; #[cfg(test)] mod tests; -mod internals; /// The error type returned by the `JsonChecker` type. #[derive(Copy, Clone, Debug)] @@ -234,15 +236,15 @@ impl JsonChecker { #[inline] #[cfg(feature = "nightly")] fn next_bytes(&mut self, bytes: &[u8]) -> Result<(), Error> { - use packed_simd::u8x8; + use core::simd::u8x8; // TODO use chunks_exact instead? // By using u8x8 instead of u8x16 we lost 2s on 16s but // we are less prone to find state change requirements. - for chunk in bytes.chunks(u8x8::lanes()) { - if chunk.len() == u8x8::lanes() && self.state == State::St { + for chunk in bytes.chunks(u8x8::LANES) { + if chunk.len() == u8x8::LANES && self.state == State::St { // Load the bytes into a SIMD type - let bytes = u8x8::from_slice_unaligned(chunk); + let bytes = u8x8::from_slice(chunk); // According to the state STATE_TRANSITION_TABLE we are in the `St` state // and *none of those bytes* are in the `CWhite`, `CQuote` or `CBacks` ascci class @@ -258,18 +260,17 @@ impl JsonChecker { // We first compare with quotes because this is the most // common character we can encounter in valid JSON strings // and this way we are able to skip other comparisons faster - if bytes.eq(cquotes).any() || - bytes.eq(cbacks).any() || - bytes.eq(cwhites1).any() || - bytes.eq(cwhites2).any() || - bytes.eq(cwhites3).any() + if bytes.lanes_eq(cquotes).any() + || bytes.lanes_eq(cbacks).any() + || bytes.lanes_eq(cwhites1).any() + || bytes.lanes_eq(cwhites2).any() + || bytes.lanes_eq(cwhites3).any() { chunk.iter().try_for_each(|b| self.next_byte(*b))?; } // Now that we checked that these bytes will not change // the state we can continue to the next chunk and ignore them - } else { chunk.iter().try_for_each(|b| self.next_byte(*b))?; } @@ -293,11 +294,8 @@ impl JsonChecker { // We can potentially use try_blocks in the future. fn internal_next_byte(jc: &mut JsonChecker, next_byte: u8) -> Result<(), Error> { // Determine the character's class. - let next_class = if next_byte >= 128 { - Class::CEtc - } else { - ASCII_CLASS[next_byte as usize] - }; + let next_class = + if next_byte >= 128 { Class::CEtc } else { ASCII_CLASS[next_byte as usize] }; if next_class == Class::Invalid { return Err(Error::InvalidCharacter); @@ -321,55 +319,61 @@ impl JsonChecker { } match next_state { - State::Wec => { // Empty } + State::Wec => { + // Empty } if !jc.pop(Mode::Key) { return Err(Error::EmptyCurlyBraces); } jc.state = State::Ok; - }, - State::Wcu => { // } + } + State::Wcu => { + // } if !jc.pop(Mode::Object) { return Err(Error::OrphanCurlyBrace); } jc.state = State::Ok; - }, - State::Ws => { // ] + } + State::Ws => { + // ] if !jc.pop(Mode::Array) { return Err(Error::OrphanSquareBrace); } jc.state = State::Ok; - }, - State::Woc => { // { + } + State::Woc => { + // { if !jc.push(Mode::Key) { return Err(Error::MaxDepthReached); } jc.state = State::Ob; - }, - State::Wos => { // [ + } + State::Wos => { + // [ if !jc.push(Mode::Array) { return Err(Error::MaxDepthReached); } jc.state = State::Ar; } - State::Wq => { // " + State::Wq => { + // " match jc.stack.last() { Some(Mode::Done) => { if !jc.push(Mode::String) { return Err(Error::MaxDepthReached); } jc.state = State::St; - }, + } Some(Mode::String) => { jc.pop(Mode::String); jc.state = State::Ok; - }, + } Some(Mode::Key) => jc.state = State::Co, - Some(Mode::Array) | - Some(Mode::Object) => jc.state = State::Ok, + Some(Mode::Array) | Some(Mode::Object) => jc.state = State::Ok, _ => return Err(Error::InvalidQuote), } - }, - State::Wcm => { // , + } + State::Wcm => { + // , match jc.stack.last() { Some(Mode::Object) => { // A comma causes a flip from object mode to key mode. @@ -381,17 +385,16 @@ impl JsonChecker { Some(Mode::Array) => jc.state = State::Va, _ => return Err(Error::InvalidComma), } - }, - State::Wcl => { // : + } + State::Wcl => { + // : // A colon causes a flip from key mode to object mode. if !jc.pop(Mode::Key) || !jc.push(Mode::Object) { return Err(Error::InvalidColon); } jc.state = State::Va; - }, - State::Invalid => { - return Err(Error::InvalidState) - }, + } + State::Invalid => return Err(Error::InvalidState), // Or change the state. state => jc.state = state, @@ -429,7 +432,7 @@ impl JsonChecker { if is_state_valid && self.pop(Mode::Done) { let outer_type = self.outer_type.expect("BUG: the outer type must have been guessed"); - return Ok((self.reader, outer_type)) + return Ok((self.reader, outer_type)); } // We do not need to catch this error to *fuse* the checker because this method @@ -467,7 +470,7 @@ impl io::Read for JsonChecker { // type instead we use the IncompleteElement error. self.error = Some(Error::IncompleteElement); return Err(error); - }, + } Ok(len) => len, }; diff --git a/src/tests.rs b/src/tests.rs index eb1cf45..c2fb5fd 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -1,4 +1,5 @@ use std::io::Read; + use crate::*; fn parse(text: &str) -> io::Result { @@ -246,7 +247,8 @@ fn pass_single_null() { #[test] fn pass_1() { - let outer_type = parse(r##" + let outer_type = parse( + r##" [ "JSON Test Pattern pass1", @@ -307,7 +309,9 @@ fn pass_1() { 1e00,2e+00,2e-00 ,"rosebud"] - "##).unwrap(); + "##, + ) + .unwrap(); assert_eq!(outer_type, JsonType::Array); } @@ -321,7 +325,8 @@ fn pass_2() { #[test] fn pass_3() { - let outer_type = parse(r#" + let outer_type = parse( + r#" { "JSON Test Pattern pass3": { @@ -330,7 +335,9 @@ fn pass_3() { } } - "#).unwrap(); + "#, + ) + .unwrap(); assert_eq!(outer_type, JsonType::Object); }