From d2c02ea8fa25cb1920b1f55e99bf9e974131c2b3 Mon Sep 17 00:00:00 2001 From: Ben Lubas Date: Sat, 16 Nov 2024 19:40:52 -0500 Subject: [PATCH 1/4] feat: working metadata parser --- Cargo.lock | 159 ++++++++++++++++++ Cargo.toml | 2 + src/error.rs | 7 + src/lib.rs | 11 +- src/metadata/mod.rs | 114 +++++++++++++ .../rust_norg__metadata__tests__arrays.snap | 20 +++ ...org__metadata__tests__common_metadata.snap | 45 +++++ .../rust_norg__metadata__tests__keys.snap | 25 +++ src/metadata/stage_1.rs | 136 +++++++++++++++ src/stage_2.rs | 2 +- test.txt | 19 +++ 11 files changed, 534 insertions(+), 6 deletions(-) create mode 100644 src/metadata/mod.rs create mode 100644 src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap create mode 100644 src/metadata/snapshots/rust_norg__metadata__tests__common_metadata.snap create mode 100644 src/metadata/snapshots/rust_norg__metadata__tests__keys.snap create mode 100644 src/metadata/stage_1.rs create mode 100644 test.txt diff --git a/Cargo.lock b/Cargo.lock index e664f2a..6edf3c8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,6 +20,21 @@ version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "autocfg" version = "1.3.0" @@ -47,6 +62,12 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" +[[package]] +name = "bumpalo" +version = "3.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" + [[package]] name = "cc" version = "1.0.97" @@ -59,6 +80,21 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-targets", +] + [[package]] name = "chumsky" version = "0.9.3" @@ -81,6 +117,12 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "core-foundation-sys" +version = "0.8.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" + [[package]] name = "either" version = "1.11.0" @@ -136,6 +178,29 @@ dependencies = [ "allocator-api2", ] +[[package]] +name = "iana-time-zone" +version = "0.1.61" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "insta" version = "1.39.0" @@ -158,6 +223,15 @@ dependencies = [ "either", ] +[[package]] +name = "js-sys" +version = "0.3.72" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" +dependencies = [ + "wasm-bindgen", +] + [[package]] name = "lazy_static" version = "1.4.0" @@ -188,6 +262,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "log" +version = "0.4.22" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" + [[package]] name = "num-traits" version = "0.2.19" @@ -204,6 +284,17 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "ordered-float" +version = "4.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c65ee1f9701bf938026630b455d5315f490640234259037edb259798b3bcf85e" +dependencies = [ + "num-traits", + "rand", + "serde", +] + [[package]] name = "ppv-lite86" version = "0.2.17" @@ -272,6 +363,7 @@ dependencies = [ "libc", "rand_chacha", "rand_core", + "serde", ] [[package]] @@ -291,6 +383,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", + "serde", ] [[package]] @@ -312,9 +405,11 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" name = "rust-norg" version = "0.1.0" dependencies = [ + "chrono", "chumsky", "insta", "itertools", + "ordered-float", "proptest", "serde", "unicode_categories", @@ -446,6 +541,61 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +[[package]] +name = "wasm-bindgen" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" +dependencies = [ + "cfg-if", + "once_cell", + "wasm-bindgen-macro", +] + +[[package]] +name = "wasm-bindgen-backend" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" +dependencies = [ + "bumpalo", + "log", + "once_cell", + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-macro" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" +dependencies = [ + "quote", + "wasm-bindgen-macro-support", +] + +[[package]] +name = "wasm-bindgen-macro-support" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "wasm-bindgen-backend", + "wasm-bindgen-shared", +] + +[[package]] +name = "wasm-bindgen-shared" +version = "0.2.95" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" + [[package]] name = "winapi" version = "0.3.9" @@ -468,6 +618,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index b0c334e..95463ce 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,8 +4,10 @@ version = "0.1.0" edition = "2021" [dependencies] +chrono = { version = "0.4.38", features = ["serde"] } chumsky = "0.9.3" itertools = "0.13.0" +ordered-float = { version = "4.5.0", features = ["serde"] } serde = { version = "1.0.203", features = ["derive"] } unicode_categories = "0.1.1" diff --git a/src/error.rs b/src/error.rs index 7418348..5ae8c81 100644 --- a/src/error.rs +++ b/src/error.rs @@ -9,6 +9,7 @@ pub enum NorgParseError { Stage2(Vec>), Stage3(Vec>), Stage4(Vec>), + Meta(Simple), } impl From>> for NorgParseError { @@ -34,3 +35,9 @@ impl From>> for NorgParseError { NorgParseError::Stage4(error) } } + +impl From> for NorgParseError { + fn from(error: Simple) -> Self { + NorgParseError::Meta(error) + } +} diff --git a/src/lib.rs b/src/lib.rs index 72b9e7a..218b4f9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -10,6 +10,7 @@ pub use crate::stage_3::*; pub use crate::stage_4::NorgAST; mod error; +pub mod metadata; mod stage_1; mod stage_2; mod stage_3; @@ -101,11 +102,11 @@ mod tests { * Back to regular heading ", ] - .into_iter() - .map(|example| example.to_string() + "\n") - .map(|str| parse_tree(&str)) - .try_collect() - .unwrap(); + .into_iter() + .map(|example| example.to_string() + "\n") + .map(|str| parse_tree(&str)) + .try_collect() + .unwrap(); assert_yaml_snapshot!(headings_tree_examples); } diff --git a/src/metadata/mod.rs b/src/metadata/mod.rs new file mode 100644 index 0000000..c5d9448 --- /dev/null +++ b/src/metadata/mod.rs @@ -0,0 +1,114 @@ +use chumsky::Parser; +pub use stage_1::NorgMeta; + +use crate::error::NorgParseError; + +pub mod stage_1; + +/// Parses the given input string to produce an AST for the metadata +pub fn parse_metadata(input: &str) -> Result { + // don't ask me why this fixes it. I don't even care + let processed = input.replace("]", " ]"); + let processed = format!("{{\n{}\n}}\n", processed.trim()); + Ok(stage_1::meta_parser().parse(processed)?) +} + +#[cfg(test)] +mod tests { + use insta::assert_yaml_snapshot; + use itertools::Itertools; + + use crate::metadata::parse_metadata; + + #[test] + fn common_metadata() { + let examples: Vec<_> = [ + " + title: Sunday November 17, 2024 + description: We Cooked + authors: benlubas + categories: journal + created: 2024-11-18 + updated: 2024-11-18T17:58:21-0500 + version: 1.1.1 + ", + " + title: Neorg Extras + description: Extra lua code to configure Neorg + authors: benlubas + categories: [ + neorg + nvim + config + ] + tangle: { + languages: { + lua: ~/github/.dotfiles/nvim/lua/benlubas/neorg/extras.lua + } + delimiter: heading + } + created: 2024-05-03T13:36:42-0500 + updated: 2024-10-27T11:12:32-0500 + version: 1.1.1 + ", + ] + .into_iter() + .map(|example| example.to_string() + "\n") + .map(|str| parse_metadata(&str)) + .try_collect() + .unwrap(); + + assert_yaml_snapshot!(examples); + } + + #[test] + fn arrays() { + let examples: Vec<_> = [ + "empty_arr: []", + " + categories: [ + one + two + 45 + ]", + " + arr: [ + arrays can contain a ton of shit + 5 + -5 + 6.02e27 + ]", + ] + .into_iter() + .map(|example| example.to_string() + "\n") + .map(|str| parse_metadata(&str)) + .try_collect() + .unwrap(); + + assert_yaml_snapshot!(examples); + } + + #[test] + fn keys() { + let examples: Vec<_> = [ + "key: value", + "x:y", + "x:5", + "x:-4", + "x:-4b", + " + arr: [ + is this okay? + ] + huh: string + ", + ] + .into_iter() + .map(|example| example.to_string() + "\n") + .map(|str| parse_metadata(&str)) + .try_collect() + .unwrap(); + + assert_yaml_snapshot!(examples); + } +} diff --git a/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap b/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap new file mode 100644 index 0000000..7dcdbaa --- /dev/null +++ b/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap @@ -0,0 +1,20 @@ +--- +source: src/metadata/mod.rs +expression: examples +--- +- Object: + empty_arr: + Array: [] +- Object: + categories: + Array: + - Str: one + - Str: two + - Num: 45 +- Object: + arr: + Array: + - Str: arrays can contain a ton of shit + - Num: 5 + - Num: -5 + - Num: 6020000000000000000000000000 diff --git a/src/metadata/snapshots/rust_norg__metadata__tests__common_metadata.snap b/src/metadata/snapshots/rust_norg__metadata__tests__common_metadata.snap new file mode 100644 index 0000000..a932c75 --- /dev/null +++ b/src/metadata/snapshots/rust_norg__metadata__tests__common_metadata.snap @@ -0,0 +1,45 @@ +--- +source: src/metadata/mod.rs +expression: examples +--- +- Object: + authors: + Str: benlubas + categories: + Str: journal + created: + Str: 2024-11-18 + description: + Str: We Cooked + title: + Str: "Sunday November 17, 2024" + updated: + Str: "2024-11-18T17:58:21-0500" + version: + Str: 1.1.1 +- Object: + authors: + Str: benlubas + categories: + Array: + - Str: neorg + - Str: nvim + - Str: config + created: + Str: "2024-05-03T13:36:42-0500" + description: + Str: Extra lua code to configure Neorg + tangle: + Object: + delimiter: + Str: heading + languages: + Object: + lua: + Str: ~/github/.dotfiles/nvim/lua/benlubas/neorg/extras.lua + title: + Str: Neorg Extras + updated: + Str: "2024-10-27T11:12:32-0500" + version: + Str: 1.1.1 diff --git a/src/metadata/snapshots/rust_norg__metadata__tests__keys.snap b/src/metadata/snapshots/rust_norg__metadata__tests__keys.snap new file mode 100644 index 0000000..102649f --- /dev/null +++ b/src/metadata/snapshots/rust_norg__metadata__tests__keys.snap @@ -0,0 +1,25 @@ +--- +source: src/metadata/mod.rs +expression: examples +--- +- Object: + key: + Str: value +- Object: + x: + Str: y +- Object: + x: + Num: 5 +- Object: + x: + Num: -4 +- Object: + x: + Str: "-4b" +- Object: + arr: + Array: + - Str: is this okay? + huh: + Str: string diff --git a/src/metadata/stage_1.rs b/src/metadata/stage_1.rs new file mode 100644 index 0000000..58b935a --- /dev/null +++ b/src/metadata/stage_1.rs @@ -0,0 +1,136 @@ +use chumsky::prelude::*; +use serde::Serialize; +use std::collections::BTreeMap; +use text::TextParser; + +#[derive(Clone, Debug, Serialize)] +pub enum NorgMeta { + Invalid, + Nil, + Bool(bool), + Str(String), + EmptyKey(String), + Num(f64), + Array(Vec), + Object(BTreeMap), +} + +const SPECIAL: &str = "{}[]:\n"; + +pub fn meta_parser() -> impl Parser> { + recursive(|value| { + let frac = just('.').chain(text::digits(10)); + + let exp = just('e') + .or(just('E')) + .chain(just('+').or(just('-')).or_not()) + .chain::(text::digits(10)); + + let number = just(' ') + .repeated() + .ignore_then(just('-').or_not()) + .chain::(text::int(10)) + .chain::(frac.or_not().flatten()) + .chain::(exp.or_not().flatten()) + .then_ignore(just('\n').rewind()) + .collect::() + .from_str() + .unwrapped() + .labelled("number"); + + let escape = just('\\').ignore_then( + just('\\') + .or(just('/')) + .or(one_of(SPECIAL)) + .or(just('b').to('\x08')) + .or(just('f').to('\x0C')) + .or(just('n').to('\n')) + .or(just('r').to('\r')) + .or(just('t').to('\t')) + .or(just('u').ignore_then( + filter(|c: &char| c.is_ascii_hexdigit()) + .repeated() + .exactly(4) + .collect::() + .validate(|digits, span, emit| { + char::from_u32(u32::from_str_radix(&digits, 16).unwrap()) + .unwrap_or_else(|| { + emit(Simple::custom(span, "invalid unicode character")); + '\u{FFFD}' // unicode replacement character + }) + }), + )), + ); + + let string = none_of("{}[]\n") + .or(escape.clone()) + .repeated() + .at_least(1) + .try_map(|x, span| { + let binding = x.clone().into_iter().collect::(); + let s = binding.trim(); + if s.is_empty() { + Err(Simple::custom( + span, + format!("strings can't be all whitespace, got {x:?}"), + )) + } else { + Ok(s.to_string()) + } + }) + .labelled("string"); + + let key = none_of(SPECIAL) + .repeated() + .at_least(1) + .then_ignore(just(':').padded()) + .collect::() + .map(|s| s.trim().to_string()) + .labelled("key"); + + let array = value + .clone() + .separated_by(just('\n')) + .allow_trailing() + .padded() + .delimited_by(just('[').padded(), just(']').ignored()) + .map(NorgMeta::Array) + .labelled("array"); + + let property = key + .clone() + .then_ignore(one_of(" \n\t").or_not()) + .then(value.or(empty().to(NorgMeta::Nil))) + .then_ignore(just('\n').or_not()) + .labelled("property"); + + let object = property + .clone() + .then_ignore(just('\n').or_not()) + .repeated() + .padded() + .collect() + .delimited_by(just('{').padded(), just('}').ignored()) + .map(NorgMeta::Object) + .labelled("object"); + + choice(( + just("nil").to(NorgMeta::Nil).labelled("nil"), + just("true").to(NorgMeta::Bool(true)).labelled("true"), + just("false").to(NorgMeta::Bool(false)).labelled("false"), + number.map(NorgMeta::Num), + key.then_ignore(just('\n')).map(NorgMeta::EmptyKey), + array, + object, + string.map(NorgMeta::Str), + )) + .recover_with(nested_delimiters('{', '}', [('[', ']')], |_| { + NorgMeta::Invalid + })) + .recover_with(nested_delimiters('[', ']', [('{', '}')], |_| { + NorgMeta::Invalid + })) + .recover_with(skip_then_retry_until(['}', ']'])) + }) + .then_ignore(end().padded().recover_with(skip_then_retry_until([]))) +} diff --git a/src/stage_2.rs b/src/stage_2.rs index 09b68ef..05fc41f 100644 --- a/src/stage_2.rs +++ b/src/stage_2.rs @@ -57,7 +57,7 @@ fn tokens_to_paragraph_segment(tokens: Vec) -> ParagraphTokenList { Some(ParagraphSegmentToken::Text(result)) } None => None, - x => { + _x => { unreachable!(); } }) diff --git a/test.txt b/test.txt new file mode 100644 index 0000000..22c4da6 --- /dev/null +++ b/test.txt @@ -0,0 +1,19 @@ +key: value +obj: { + x: y + a: [ + b: c + ] +} +arr: [ + x + { + k: v + } +] +h: i +a: 1 +b: 7 +x: 2024-10-15:1000:15:10.000 +y: +z: From 91fe20eb2681cd6b58cf97aee748942331a613bf Mon Sep 17 00:00:00 2001 From: Ben Lubas Date: Tue, 19 Nov 2024 17:30:00 -0500 Subject: [PATCH 2/4] tests, also the parser is broken for empty arrays --- src/metadata/mod.rs | 12 +++++++++ .../rust_norg__metadata__tests__arrays.snap | 7 +++++ src/metadata/stage_1.rs | 18 +++++++++---- test.txt | 26 ++++++------------- 4 files changed, 40 insertions(+), 23 deletions(-) diff --git a/src/metadata/mod.rs b/src/metadata/mod.rs index c5d9448..26a11fa 100644 --- a/src/metadata/mod.rs +++ b/src/metadata/mod.rs @@ -77,6 +77,18 @@ mod tests { 5 -5 6.02e27 + nil + { + x: y + a: [ + b + ] + } + [] + [ + hi + hi + ] ]", ] .into_iter() diff --git a/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap b/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap index 7dcdbaa..8cee8b5 100644 --- a/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap +++ b/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap @@ -18,3 +18,10 @@ expression: examples - Num: 5 - Num: -5 - Num: 6020000000000000000000000000 + - Nil + - Object: + a: + Array: + - Str: b + x: + Str: y diff --git a/src/metadata/stage_1.rs b/src/metadata/stage_1.rs index 58b935a..911b525 100644 --- a/src/metadata/stage_1.rs +++ b/src/metadata/stage_1.rs @@ -78,7 +78,12 @@ pub fn meta_parser() -> impl Parser> { Ok(s.to_string()) } }) - .labelled("string"); + .map(|s| match &s[..] { + "true" => NorgMeta::Bool(true), + "false" => NorgMeta::Bool(false), + "nil" => NorgMeta::Nil, + _ => NorgMeta::Str(s), + }); let key = none_of(SPECIAL) .repeated() @@ -97,6 +102,11 @@ pub fn meta_parser() -> impl Parser> { .map(NorgMeta::Array) .labelled("array"); + let empty_array = empty() + .padded() + .delimited_by(just('['), just(']')) + .to(NorgMeta::Array(vec![])); + let property = key .clone() .then_ignore(one_of(" \n\t").or_not()) @@ -115,14 +125,12 @@ pub fn meta_parser() -> impl Parser> { .labelled("object"); choice(( - just("nil").to(NorgMeta::Nil).labelled("nil"), - just("true").to(NorgMeta::Bool(true)).labelled("true"), - just("false").to(NorgMeta::Bool(false)).labelled("false"), number.map(NorgMeta::Num), key.then_ignore(just('\n')).map(NorgMeta::EmptyKey), + empty_array, array, object, - string.map(NorgMeta::Str), + string, )) .recover_with(nested_delimiters('{', '}', [('[', ']')], |_| { NorgMeta::Invalid diff --git a/test.txt b/test.txt index 22c4da6..321929a 100644 --- a/test.txt +++ b/test.txt @@ -1,19 +1,9 @@ -key: value -obj: { - x: y - a: [ - b: c - ] +x: { + + v: hi + + x: + hi: there } -arr: [ - x - { - k: v - } -] -h: i -a: 1 -b: 7 -x: 2024-10-15:1000:15:10.000 -y: -z: +a: [] +hi: there From 76ecf9cd2d276fe77b8543018be236df8019d6c3 Mon Sep 17 00:00:00 2001 From: Ben Lubas Date: Tue, 19 Nov 2024 19:55:28 -0500 Subject: [PATCH 3/4] more tests --- Cargo.lock | 159 ------------------ Cargo.toml | 2 - src/metadata/mod.rs | 26 ++- .../rust_norg__metadata__tests__arrays.snap | 8 +- .../rust_norg__metadata__tests__keys.snap | 25 ++- ...org__metadata__tests__keys_and_values.snap | 41 +++++ src/metadata/stage_1.rs | 8 +- test.txt | 9 - 8 files changed, 88 insertions(+), 190 deletions(-) create mode 100644 src/metadata/snapshots/rust_norg__metadata__tests__keys_and_values.snap delete mode 100644 test.txt diff --git a/Cargo.lock b/Cargo.lock index 6edf3c8..e664f2a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -20,21 +20,6 @@ version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5c6cb57a04249c6480766f7f7cef5467412af1490f8d1e243141daddada3264f" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - [[package]] name = "autocfg" version = "1.3.0" @@ -62,12 +47,6 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" -[[package]] -name = "bumpalo" -version = "3.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" - [[package]] name = "cc" version = "1.0.97" @@ -80,21 +59,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chrono" -version = "0.4.38" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" -dependencies = [ - "android-tzdata", - "iana-time-zone", - "js-sys", - "num-traits", - "serde", - "wasm-bindgen", - "windows-targets", -] - [[package]] name = "chumsky" version = "0.9.3" @@ -117,12 +81,6 @@ dependencies = [ "windows-sys", ] -[[package]] -name = "core-foundation-sys" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" - [[package]] name = "either" version = "1.11.0" @@ -178,29 +136,6 @@ dependencies = [ "allocator-api2", ] -[[package]] -name = "iana-time-zone" -version = "0.1.61" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "235e081f3925a06703c2d0117ea8b91f042756fd6e7a6e5d901e8ca1a996b220" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - [[package]] name = "insta" version = "1.39.0" @@ -223,15 +158,6 @@ dependencies = [ "either", ] -[[package]] -name = "js-sys" -version = "0.3.72" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a88f1bda2bd75b0452a14784937d796722fdebfe50df998aeb3f0b7603019a9" -dependencies = [ - "wasm-bindgen", -] - [[package]] name = "lazy_static" version = "1.4.0" @@ -262,12 +188,6 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" -[[package]] -name = "log" -version = "0.4.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" - [[package]] name = "num-traits" version = "0.2.19" @@ -284,17 +204,6 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" -[[package]] -name = "ordered-float" -version = "4.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c65ee1f9701bf938026630b455d5315f490640234259037edb259798b3bcf85e" -dependencies = [ - "num-traits", - "rand", - "serde", -] - [[package]] name = "ppv-lite86" version = "0.2.17" @@ -363,7 +272,6 @@ dependencies = [ "libc", "rand_chacha", "rand_core", - "serde", ] [[package]] @@ -383,7 +291,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom", - "serde", ] [[package]] @@ -405,11 +312,9 @@ checksum = "7a66a03ae7c801facd77a29370b4faec201768915ac14a721ba36f20bc9c209b" name = "rust-norg" version = "0.1.0" dependencies = [ - "chrono", "chumsky", "insta", "itertools", - "ordered-float", "proptest", "serde", "unicode_categories", @@ -541,61 +446,6 @@ version = "0.11.0+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" -[[package]] -name = "wasm-bindgen" -version = "0.2.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "128d1e363af62632b8eb57219c8fd7877144af57558fb2ef0368d0087bddeb2e" -dependencies = [ - "cfg-if", - "once_cell", - "wasm-bindgen-macro", -] - -[[package]] -name = "wasm-bindgen-backend" -version = "0.2.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb6dd4d3ca0ddffd1dd1c9c04f94b868c37ff5fac97c30b97cff2d74fce3a358" -dependencies = [ - "bumpalo", - "log", - "once_cell", - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-macro" -version = "0.2.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e79384be7f8f5a9dd5d7167216f022090cf1f9ec128e6e6a482a2cb5c5422c56" -dependencies = [ - "quote", - "wasm-bindgen-macro-support", -] - -[[package]] -name = "wasm-bindgen-macro-support" -version = "0.2.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26c6ab57572f7a24a4985830b120de1594465e5d500f24afe89e16b4e833ef68" -dependencies = [ - "proc-macro2", - "quote", - "syn", - "wasm-bindgen-backend", - "wasm-bindgen-shared", -] - -[[package]] -name = "wasm-bindgen-shared" -version = "0.2.95" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65fc09f10666a9f147042251e0dda9c18f166ff7de300607007e96bdebc1068d" - [[package]] name = "winapi" version = "0.3.9" @@ -618,15 +468,6 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" -[[package]] -name = "windows-core" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" -dependencies = [ - "windows-targets", -] - [[package]] name = "windows-sys" version = "0.52.0" diff --git a/Cargo.toml b/Cargo.toml index 95463ce..b0c334e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,10 +4,8 @@ version = "0.1.0" edition = "2021" [dependencies] -chrono = { version = "0.4.38", features = ["serde"] } chumsky = "0.9.3" itertools = "0.13.0" -ordered-float = { version = "4.5.0", features = ["serde"] } serde = { version = "1.0.203", features = ["derive"] } unicode_categories = "0.1.1" diff --git a/src/metadata/mod.rs b/src/metadata/mod.rs index 26a11fa..47b1ad8 100644 --- a/src/metadata/mod.rs +++ b/src/metadata/mod.rs @@ -64,7 +64,10 @@ mod tests { #[test] fn arrays() { let examples: Vec<_> = [ - "empty_arr: []", + "empty_arr: [] + arr: [ + + ]", " categories: [ one @@ -73,7 +76,7 @@ mod tests { ]", " arr: [ - arrays can contain a ton of shit + arrays can contain everything 5 -5 6.02e27 @@ -101,19 +104,26 @@ mod tests { } #[test] - fn keys() { + fn keys_and_values() { let examples: Vec<_> = [ "key: value", "x:y", + "x :y", "x:5", "x:-4", - "x:-4b", + "str:-4b", + "nil:nil", + "nil:", + "still_nil: + x: y", " - arr: [ - is this okay? - ] - huh: string + key: value with : in it + key_2: value with: in it ", + "keys: { + in: + objects: [] + }" ] .into_iter() .map(|example| example.to_string() + "\n") diff --git a/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap b/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap index 8cee8b5..83b3acf 100644 --- a/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap +++ b/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap @@ -3,6 +3,8 @@ source: src/metadata/mod.rs expression: examples --- - Object: + arr: + Array: [] empty_arr: Array: [] - Object: @@ -14,7 +16,7 @@ expression: examples - Object: arr: Array: - - Str: arrays can contain a ton of shit + - Str: arrays can contain everything - Num: 5 - Num: -5 - Num: 6020000000000000000000000000 @@ -25,3 +27,7 @@ expression: examples - Str: b x: Str: y + - Array: [] + - Array: + - Str: hi + - Str: hi diff --git a/src/metadata/snapshots/rust_norg__metadata__tests__keys.snap b/src/metadata/snapshots/rust_norg__metadata__tests__keys.snap index 102649f..7f6390d 100644 --- a/src/metadata/snapshots/rust_norg__metadata__tests__keys.snap +++ b/src/metadata/snapshots/rust_norg__metadata__tests__keys.snap @@ -15,11 +15,24 @@ expression: examples x: Num: -4 - Object: - x: + str: Str: "-4b" - Object: - arr: - Array: - - Str: is this okay? - huh: - Str: string + nil: Nil +- Object: + nil: Nil +- Object: + still_nil: Nil + x: + Str: y +- Object: + key: + Str: "value with : in it" + key_2: + Str: "value with: in it" +- Object: + keys: + Object: + in: Nil + objects: + Array: [] diff --git a/src/metadata/snapshots/rust_norg__metadata__tests__keys_and_values.snap b/src/metadata/snapshots/rust_norg__metadata__tests__keys_and_values.snap new file mode 100644 index 0000000..eed039b --- /dev/null +++ b/src/metadata/snapshots/rust_norg__metadata__tests__keys_and_values.snap @@ -0,0 +1,41 @@ +--- +source: src/metadata/mod.rs +expression: examples +--- +- Object: + key: + Str: value +- Object: + x: + Str: y +- Object: + x: + Str: y +- Object: + x: + Num: 5 +- Object: + x: + Num: -4 +- Object: + str: + Str: "-4b" +- Object: + nil: Nil +- Object: + nil: Nil +- Object: + still_nil: Nil + x: + Str: y +- Object: + key: + Str: "value with : in it" + key_2: + Str: "value with: in it" +- Object: + keys: + Object: + in: Nil + objects: + Array: [] diff --git a/src/metadata/stage_1.rs b/src/metadata/stage_1.rs index 911b525..122e7ff 100644 --- a/src/metadata/stage_1.rs +++ b/src/metadata/stage_1.rs @@ -88,7 +88,7 @@ pub fn meta_parser() -> impl Parser> { let key = none_of(SPECIAL) .repeated() .at_least(1) - .then_ignore(just(':').padded()) + .then_ignore(just(':').then(one_of(" \t").repeated())) .collect::() .map(|s| s.trim().to_string()) .labelled("key"); @@ -104,12 +104,11 @@ pub fn meta_parser() -> impl Parser> { let empty_array = empty() .padded() - .delimited_by(just('['), just(']')) + .delimited_by(just('[').padded(), just(']')) .to(NorgMeta::Array(vec![])); let property = key - .clone() - .then_ignore(one_of(" \n\t").or_not()) + .then_ignore(one_of(" \t").repeated()) .then(value.or(empty().to(NorgMeta::Nil))) .then_ignore(just('\n').or_not()) .labelled("property"); @@ -126,7 +125,6 @@ pub fn meta_parser() -> impl Parser> { choice(( number.map(NorgMeta::Num), - key.then_ignore(just('\n')).map(NorgMeta::EmptyKey), empty_array, array, object, diff --git a/test.txt b/test.txt deleted file mode 100644 index 321929a..0000000 --- a/test.txt +++ /dev/null @@ -1,9 +0,0 @@ -x: { - - v: hi - - x: - hi: there -} -a: [] -hi: there From f0022375c1c1deedf1778625e4608fdfbf56a03b Mon Sep 17 00:00:00 2001 From: Ben Lubas Date: Sun, 24 Nov 2024 05:52:55 -0500 Subject: [PATCH 4/4] fix: that extra space is no longer needed --- src/metadata/mod.rs | 5 ++--- .../snapshots/rust_norg__metadata__tests__arrays.snap | 7 +++++++ 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/src/metadata/mod.rs b/src/metadata/mod.rs index 47b1ad8..a238481 100644 --- a/src/metadata/mod.rs +++ b/src/metadata/mod.rs @@ -7,9 +7,7 @@ pub mod stage_1; /// Parses the given input string to produce an AST for the metadata pub fn parse_metadata(input: &str) -> Result { - // don't ask me why this fixes it. I don't even care - let processed = input.replace("]", " ]"); - let processed = format!("{{\n{}\n}}\n", processed.trim()); + let processed = format!("{{\n{}\n}}\n", input.trim()); Ok(stage_1::meta_parser().parse(processed)?) } @@ -93,6 +91,7 @@ mod tests { hi ] ]", + "arr:[]\na2:[\n]x: y", ] .into_iter() .map(|example| example.to_string() + "\n") diff --git a/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap b/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap index 83b3acf..3591774 100644 --- a/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap +++ b/src/metadata/snapshots/rust_norg__metadata__tests__arrays.snap @@ -31,3 +31,10 @@ expression: examples - Array: - Str: hi - Str: hi +- Object: + a2: + Array: [] + arr: + Array: [] + x: + Str: y