From 20b14280ecd48e97ae4a9accf477e170cabc1e6c Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Wed, 22 Oct 2025 06:46:49 -0500 Subject: [PATCH 1/4] perf: increase parsing speed by 2x by being careful around parsing tree-sitter log events --- .../src/utils/tree_sitter_log_observer.rs | 104 ++++++++---------- 1 file changed, 46 insertions(+), 58 deletions(-) diff --git a/crates/quarto-markdown-pandoc/src/utils/tree_sitter_log_observer.rs b/crates/quarto-markdown-pandoc/src/utils/tree_sitter_log_observer.rs index 4a2bbc3..e1ae909 100644 --- a/crates/quarto-markdown-pandoc/src/utils/tree_sitter_log_observer.rs +++ b/crates/quarto-markdown-pandoc/src/utils/tree_sitter_log_observer.rs @@ -3,7 +3,7 @@ * Copyright (c) 2025 Posit, PBC */ -use std::collections::HashMap; +use std::collections::HashMap; // Still needed for TreeSitterParseLog::processes #[derive(Debug, PartialEq, Eq, Clone)] pub enum TreeSitterLogState { @@ -82,24 +82,39 @@ impl TreeSitterLogObserver { } pub fn log(&mut self, _log_type: tree_sitter::LogType, message: &str) { // Implement your logging logic here - let str = message.to_string(); - - let words: Vec<&str> = str.split_whitespace().collect(); - if words.len() == 0 { + let words: Vec<&str> = message.split_whitespace().collect(); + if words.is_empty() { eprintln!("Empty log message from tree-sitter"); return; } - let params: HashMap = words[1..] - .iter() - .filter_map(|pair| { - let pair = pair.trim_suffix(","); - let mut split = pair.splitn(2, ':'); - if let (Some(key), Some(value_str)) = (split.next(), split.next()) { - return Some((key.to_string(), value_str.to_string())); + + // Extract parameters directly into typed variables instead of creating a HashMap. + // This eliminates expensive allocations: each HashMap creation costs ~608 bytes + // (HashMap allocation + String allocations for keys and values + hash computations). + // With only 6 possible parameters that are always parsed to the same types, + // we can use simple Option variables on the stack (48 bytes total). + let mut version: Option = None; + let mut state: Option = None; + let mut row: Option = None; + let mut col: Option = None; + let mut sym: Option<&str> = None; + let mut size: Option = None; + + // Single pass through parameters + for pair in &words[1..] { + let pair = pair.trim_end_matches(','); + if let Some((key, value)) = pair.split_once(':') { + match key { + "version" => version = value.parse().ok(), + "state" => state = value.parse().ok(), + "row" => row = value.parse().ok(), + "col" => col = value.parse().ok(), + "sym" => sym = Some(value), + "size" => size = value.parse().ok(), + _ => {} // Ignore unknown parameters } - None - }) - .collect(); + } + } match words[0] { "new_parse" => { if self.state != TreeSitterLogState::Idle { @@ -121,11 +136,7 @@ impl TreeSitterLogObserver { self.state = TreeSitterLogState::Idle; } "resume" => { - let version = params - .get("version") - .expect("Missing 'version' in process log") - .parse::() - .expect("Failed to parse 'version' as usize"); + let version = version.expect("Missing 'version' in process log"); let current_parse = self .parses .last_mut() @@ -133,26 +144,10 @@ impl TreeSitterLogObserver { current_parse.current_process = Some(version); } "process" => { - let version = params - .get("version") - .expect("Missing 'version' in process log") - .parse::() - .expect("Failed to parse 'version' as usize"); - let state = params - .get("state") - .expect("Missing 'state' in process log") - .parse::() - .expect("Failed to parse 'state' as usize"); - let row = params - .get("row") - .expect("Missing 'row' in process log") - .parse::() - .expect("Failed to parse 'row' as usize"); - let column = params - .get("col") - .expect("Missing 'col' in process log") - .parse::() - .expect("Failed to parse 'col' as usize"); + let version = version.expect("Missing 'version' in process log"); + let state = state.expect("Missing 'state' in process log"); + let row = row.expect("Missing 'row' in process log"); + let column = col.expect("Missing 'col' in process log"); let current_parse = self .parses @@ -196,6 +191,9 @@ impl TreeSitterLogObserver { .push(current_process_message.clone()); } "lexed_lookahead" => { + let sym_str = sym.expect("Missing 'sym' in lexed_lookahead log"); + let size_val = size.expect("Missing 'size' in lexed_lookahead log"); + let current_parse = self .parses .last_mut() @@ -208,15 +206,13 @@ impl TreeSitterLogObserver { .current_message .as_mut() .expect("No current process message"); - current_parse.current_lookahead = Some(( - params.get("sym").unwrap().to_string(), - params.get("size").unwrap().parse::().unwrap(), - )); - current_process_message.sym = params.get("sym").unwrap().to_string(); - current_process_message.size = - params.get("size").unwrap().parse::().unwrap(); + current_parse.current_lookahead = Some((sym_str.to_string(), size_val)); + current_process_message.sym = sym_str.to_string(); + current_process_message.size = size_val; } "shift" => { + let state_val = state.expect("Missing 'state' in shift log"); + let current_parse = self .parses .last_mut() @@ -229,18 +225,13 @@ impl TreeSitterLogObserver { .current_message .as_mut() .expect("No current process message"); - let state = params - .get("state") - .expect("Missing 'state' in shift log") - .parse::() - .expect("Failed to parse 'state' as usize"); let size = current_parse .current_lookahead .as_ref() .map(|(_, s)| *s) .unwrap_or(0); current_parse.consumed_tokens.push(ConsumedToken { - lr_state: state, + lr_state: state_val, row: current_process_message.row, column: current_process_message.column, size, @@ -278,11 +269,8 @@ impl TreeSitterLogObserver { } } } - match self.parses.last_mut() { - Some(current_parse) => { - current_parse.messages.push(str); - } - _ => {} + if let Some(current_parse) = self.parses.last_mut() { + current_parse.messages.push(message.to_string()); } } } From 26420e4c9e1d5d793e65a4cc4399477d82d07bd5 Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Wed, 22 Oct 2025 06:59:22 -0500 Subject: [PATCH 2/4] merge work --- Cargo.lock | 259 ++++++++------- crates/qmd-syntax-helper/Cargo.toml | 1 + .../src/conversions/definition_lists.rs | 14 + .../src/conversions/div_whitespace.rs | 89 ++---- .../src/diagnostics/parse_check.rs | 8 - crates/qmd-syntax-helper/src/main.rs | 63 +++- .../src/utils/glob_expand.rs | 54 +++- crates/quarto-error-reporting/src/builder.rs | 59 ++++ .../quarto-error-reporting/src/diagnostic.rs | 296 ++++++++++++++---- .../fuzz/fuzz_targets/hello_fuzz.rs | 2 - crates/quarto-markdown-pandoc/src/main.rs | 45 +-- .../quarto-markdown-pandoc/src/pandoc/meta.rs | 264 ++++++++++++---- .../quarto-markdown-pandoc/src/readers/qmd.rs | 129 ++++---- .../src/readers/qmd_error_messages.rs | 167 ++++++++++ .../src/wasm_entry_points/mod.rs | 18 +- .../tests/claude-examples/meta-error.qmd | 7 + .../tests/claude-examples/meta-warning.qmd | 7 + crates/quarto-markdown-pandoc/tests/test.rs | 55 +--- .../tests/test_inline_locations.rs | 2 +- .../tests/test_json_errors.rs | 177 ++--------- .../tests/test_json_roundtrip.rs | 1 - .../tests/test_metadata_source_tracking.rs | 34 +- .../tests/test_nested_yaml_serialization.rs | 52 +-- .../tests/test_ordered_list_formatting.rs | 32 +- .../tests/test_warnings.rs | 23 +- .../tests/test_yaml_tag_regression.rs | 34 +- crates/quarto-source-map/src/context.rs | 35 ++- .../claude-notes/implementation-plan.md | 11 +- 28 files changed, 1185 insertions(+), 753 deletions(-) create mode 100644 crates/quarto-markdown-pandoc/tests/claude-examples/meta-error.qmd create mode 100644 crates/quarto-markdown-pandoc/tests/claude-examples/meta-warning.qmd diff --git a/Cargo.lock b/Cargo.lock index e6207fb..e9f19e9 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.20" +version = "0.6.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae563653d1938f79b1ab1b5e668c87c76a9930414574a6583a7b7e11a8e6192" +checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" dependencies = [ "anstyle", "anstyle-parse", @@ -28,9 +28,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.11" +version = "1.0.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" +checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" [[package]] name = "anstyle-parse" @@ -97,10 +97,11 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "cc" -version = "1.2.34" +version = "1.2.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42bc4aea80032b7bf409b0bc7ccad88853858911b7713a8062fdc0623867bedc" +checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7" dependencies = [ + "find-msvc-tools", "jobserver", "libc", "shlex", @@ -108,15 +109,15 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fd1289c04a9ea8cb22300a459a72a385d7c73d3259e2ed7dcb2af674838cfa9" +checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801" [[package]] name = "clap" -version = "4.5.46" +version = "4.5.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c5e4fcf9c21d2e544ca1ee9d8552de13019a42aa7dbf32747fa7aaf1df76e57" +checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623" dependencies = [ "clap_builder", "clap_derive", @@ -124,9 +125,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.46" +version = "4.5.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fecb53a0e6fcfb055f686001bc2e2592fa527efaf38dbe81a6a9563562e57d41" +checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0" dependencies = [ "anstream", "anstyle", @@ -136,9 +137,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.45" +version = "4.5.49" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14cb31bb0a7d536caef2639baa7fad459e15c3144efefa6dbd1c84562c4739f6" +checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" dependencies = [ "heck", "proc-macro2", @@ -148,9 +149,9 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.5" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" +checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" [[package]] name = "colorchoice" @@ -204,6 +205,12 @@ dependencies = [ "syn", ] +[[package]] +name = "find-msvc-tools" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" + [[package]] name = "foldhash" version = "0.1.5" @@ -212,14 +219,14 @@ checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" [[package]] name = "getrandom" -version = "0.3.3" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26145e563e54f2cadc477553f1ec5ee650b00862f0a58bcd12cbdc5f0ea2d2f4" +checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd" dependencies = [ "cfg-if", "libc", "r-efi", - "wasi", + "wasip2", ] [[package]] @@ -237,13 +244,19 @@ dependencies = [ "foldhash", ] +[[package]] +name = "hashbrown" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" + [[package]] name = "hashlink" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7382cf6263419f2d8df38c55d7da83da5c18aef87fc7a7fc1fb1e344edfe14c1" dependencies = [ - "hashbrown", + "hashbrown 0.15.5", "serde", ] @@ -274,19 +287,19 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2481980430f9f78649238835720ddccc57e52df14ffce1c6f37391d61b563e9" +checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.16.0", ] [[package]] name = "is_terminal_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" [[package]] name = "itoa" @@ -306,9 +319,9 @@ dependencies = [ [[package]] name = "js-sys" -version = "0.3.77" +version = "0.3.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" +checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" dependencies = [ "once_cell", "wasm-bindgen", @@ -322,9 +335,9 @@ checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.175" +version = "0.2.177" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a82ae493e598baaea5209805c49bbf2ea7de956d50d7da0da1164f9c6d28543" +checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" [[package]] name = "libfuzzer-sys" @@ -338,15 +351,15 @@ dependencies = [ [[package]] name = "log" -version = "0.4.27" +version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" +checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" [[package]] name = "memchr" -version = "2.7.5" +version = "2.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" +checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" [[package]] name = "minicov" @@ -366,9 +379,9 @@ checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" [[package]] name = "once_cell_polyfill" -version = "1.70.1" +version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "paste" @@ -394,6 +407,7 @@ dependencies = [ "colored", "glob", "include_dir", + "quarto-error-reporting", "quarto-markdown-pandoc", "regex", "serde", @@ -464,9 +478,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.40" +version = "1.0.41" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1885c039570dc00dcb4ff087a89e185fd56bae234ddc7f056a945bf36467248d" +checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" dependencies = [ "proc-macro2", ] @@ -479,9 +493,9 @@ checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "regex" -version = "1.11.2" +version = "1.12.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23d7fd106d8c02486a8d64e778353d1cffe08ce79ac2e82f540c86d0facf6912" +checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" dependencies = [ "aho-corasick", "memchr", @@ -491,9 +505,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.10" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b9458fa0bfeeac22b5ca447c63aaf45f28439a709ccd244698632f9aa6394d6" +checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" dependencies = [ "aho-corasick", "memchr", @@ -502,9 +516,9 @@ dependencies = [ [[package]] name = "regex-syntax" -version = "0.8.6" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "caf4aa5b0f434c91fe5c7f1ecb6a5ece2130b02ad2a590589dda5146df959001" +checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "rustversion" @@ -529,18 +543,28 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f0e2c6ed6606019b4e29e69dbaba95b11854410e5347d525002456dbbb786b6" +checksum = "9a8e94ea7f378bd32cbbd37198a4a91436180c5bb472411e48b5ec2e2124ae9e" +dependencies = [ + "serde_core", + "serde_derive", +] + +[[package]] +name = "serde_core" +version = "1.0.228" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41d385c7d4ca58e59fc732af25c3983b67ac852c1a25000afe1175de458b67ad" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.219" +version = "1.0.228" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" +checksum = "d540f220d3187173da220f885ab66608367b6574e925011a9353e4badda91d79" dependencies = [ "proc-macro2", "quote", @@ -549,15 +573,16 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.143" +version = "1.0.145" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" dependencies = [ "indexmap", "itoa", "memchr", "ryu", "serde", + "serde_core", ] [[package]] @@ -580,9 +605,9 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "syn" -version = "2.0.106" +version = "2.0.107" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ede7c438028d4436d71104916910f5bb611972c5cfd7f89b8300a8186e6fada6" +checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b" dependencies = [ "proc-macro2", "quote", @@ -611,9 +636,9 @@ dependencies = [ [[package]] name = "tree-sitter" -version = "0.25.8" +version = "0.25.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d7b8994f367f16e6fa14b5aebbcb350de5d7cbea82dc5b00ae997dd71680dd2" +checksum = "78f873475d258561b06f1c595d93308a7ed124d9977cb26b148c2084a4a3cc87" dependencies = [ "cc", "regex", @@ -640,9 +665,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.18" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +checksum = "462eeb75aeb73aea900253ce739c8e18a67423fadf006037cd3ff27e82748a06" [[package]] name = "unicode-width" @@ -667,31 +692,32 @@ dependencies = [ ] [[package]] -name = "wasi" -version = "0.14.3+wasi-0.2.4" +name = "wasip2" +version = "1.0.1+wasi-0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a51ae83037bdd272a9e28ce236db8c07016dd0d50c27038b3f407533c030c95" +checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" dependencies = [ "wit-bindgen", ] [[package]] name = "wasm-bindgen" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1edc8929d7499fc4e8f0be2262a241556cfc54a0bea223790e71446f2aab1ef5" +checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" dependencies = [ "cfg-if", "once_cell", "rustversion", "wasm-bindgen-macro", + "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-backend" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f0a0651a5c2bc21487bde11ee802ccaf4c51935d0d3d42a6101f98161700bc6" +checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" dependencies = [ "bumpalo", "log", @@ -703,9 +729,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-futures" -version = "0.4.50" +version = "0.4.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "555d470ec0bc3bb57890405e5d4322cc9ea83cebb085523ced7be4144dac1e61" +checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" dependencies = [ "cfg-if", "js-sys", @@ -716,9 +742,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fe63fc6d09ed3792bd0897b314f53de8e16568c2b3f7982f468c0bf9bd0b407" +checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -726,9 +752,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" +checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" dependencies = [ "proc-macro2", "quote", @@ -739,18 +765,18 @@ dependencies = [ [[package]] name = "wasm-bindgen-shared" -version = "0.2.100" +version = "0.2.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a05d73b933a847d6cccdda8f838a22ff101ad9bf93e33684f39c1f5f0eece3d" +checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" dependencies = [ "unicode-ident", ] [[package]] name = "wasm-bindgen-test" -version = "0.3.50" +version = "0.3.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "66c8d5e33ca3b6d9fa3b4676d774c5778031d27a578c2b007f905acf816152c3" +checksum = "4e381134e148c1062f965a42ed1f5ee933eef2927c3f70d1812158f711d39865" dependencies = [ "js-sys", "minicov", @@ -761,9 +787,9 @@ dependencies = [ [[package]] name = "wasm-bindgen-test-macro" -version = "0.3.50" +version = "0.3.54" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17d5042cc5fa009658f9a7333ef24291b1291a25b6382dd68862a7f3b969f69b" +checksum = "b673bca3298fe582aeef8352330ecbad91849f85090805582400850f8270a2e8" dependencies = [ "proc-macro2", "quote", @@ -782,9 +808,9 @@ dependencies = [ [[package]] name = "web-sys" -version = "0.3.77" +version = "0.3.81" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33b6dd2ef9186f1f2072e409e99cd22a975331a6b3591b12c764e0e55c60d5d2" +checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" dependencies = [ "js-sys", "wasm-bindgen", @@ -792,18 +818,18 @@ dependencies = [ [[package]] name = "winapi-util" -version = "0.1.10" +version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0978bf7171b3d90bac376700cb56d606feb40f251a475a5d6634613564460b22" +checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "windows-link" -version = "0.1.3" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-sys" @@ -820,7 +846,16 @@ version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets 0.53.3", + "windows-targets 0.53.5", +] + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", ] [[package]] @@ -841,19 +876,19 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.53.3" +version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d5fe6031c4041849d7c496a8ded650796e7b6ecc19df1a431c1a363342e5dc91" +checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ "windows-link", - "windows_aarch64_gnullvm 0.53.0", - "windows_aarch64_msvc 0.53.0", - "windows_i686_gnu 0.53.0", - "windows_i686_gnullvm 0.53.0", - "windows_i686_msvc 0.53.0", - "windows_x86_64_gnu 0.53.0", - "windows_x86_64_gnullvm 0.53.0", - "windows_x86_64_msvc 0.53.0", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] [[package]] @@ -864,9 +899,9 @@ checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" [[package]] name = "windows_aarch64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "86b8d5f90ddd19cb4a147a5fa63ca848db3df085e25fee3cc10b39b6eebae764" +checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" [[package]] name = "windows_aarch64_msvc" @@ -876,9 +911,9 @@ checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" [[package]] name = "windows_aarch64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7651a1f62a11b8cbd5e0d42526e55f2c99886c77e007179efff86c2b137e66c" +checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" [[package]] name = "windows_i686_gnu" @@ -888,9 +923,9 @@ checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" [[package]] name = "windows_i686_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1dc67659d35f387f5f6c479dc4e28f1d4bb90ddd1a5d3da2e5d97b42d6272c3" +checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" [[package]] name = "windows_i686_gnullvm" @@ -900,9 +935,9 @@ checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" [[package]] name = "windows_i686_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ce6ccbdedbf6d6354471319e781c0dfef054c81fbc7cf83f338a4296c0cae11" +checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" [[package]] name = "windows_i686_msvc" @@ -912,9 +947,9 @@ checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" [[package]] name = "windows_i686_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "581fee95406bb13382d2f65cd4a908ca7b1e4c2f1917f143ba16efe98a589b5d" +checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" [[package]] name = "windows_x86_64_gnu" @@ -924,9 +959,9 @@ checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" [[package]] name = "windows_x86_64_gnu" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e55b5ac9ea33f2fc1716d1742db15574fd6fc8dadc51caab1c16a3d3b4190ba" +checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" [[package]] name = "windows_x86_64_gnullvm" @@ -936,9 +971,9 @@ checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" [[package]] name = "windows_x86_64_gnullvm" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a6e035dd0599267ce1ee132e51c27dd29437f63325753051e71dd9e42406c57" +checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" [[package]] name = "windows_x86_64_msvc" @@ -948,21 +983,21 @@ checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" [[package]] name = "windows_x86_64_msvc" -version = "0.53.0" +version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" +checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "wit-bindgen" -version = "0.45.0" +version = "0.46.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "052283831dbae3d879dc7f51f3d92703a316ca49f91540417d38591826127814" +checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" [[package]] name = "yaml-rust2" -version = "0.10.3" +version = "0.10.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ce2a4ff45552406d02501cea6c18d8a7e50228e7736a872951fe2fe75c91be7" +checksum = "2462ea039c445496d8793d052e13787f2b90e750b833afee748e601c17621ed9" dependencies = [ "arraydeque", "encoding_rs", diff --git a/crates/qmd-syntax-helper/Cargo.toml b/crates/qmd-syntax-helper/Cargo.toml index 9b4f98c..da5b9ae 100644 --- a/crates/qmd-syntax-helper/Cargo.toml +++ b/crates/qmd-syntax-helper/Cargo.toml @@ -23,6 +23,7 @@ anyhow = "1.0" regex = "1.10" colored = "2.1" quarto-markdown-pandoc.workspace = true +quarto-error-reporting.workspace = true include_dir = "0.7" serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" diff --git a/crates/qmd-syntax-helper/src/conversions/definition_lists.rs b/crates/qmd-syntax-helper/src/conversions/definition_lists.rs index 9124fd6..be74972 100644 --- a/crates/qmd-syntax-helper/src/conversions/definition_lists.rs +++ b/crates/qmd-syntax-helper/src/conversions/definition_lists.rs @@ -56,6 +56,20 @@ impl DefinitionListConverter { start_idx -= 1; } + // Check if the "term" is actually a table row or grid table border + // Table rows contain multiple pipe characters (e.g., | cell | cell |) + // Grid table borders start with + and contain multiple + characters + // This helps distinguish table captions from definition lists + let has_pipes = lines[start_idx].matches('|').count() >= 2; + let is_grid_border = + lines[start_idx].starts_with('+') && lines[start_idx].matches('+').count() >= 2; + + if has_pipes || is_grid_border { + // This is likely a table caption, not a definition list + i += 1; + continue; + } + // Now scan forward to collect all terms and definitions in this list let mut end_idx = i; i += 1; diff --git a/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs b/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs index 0b1f33f..c8119a4 100644 --- a/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs +++ b/crates/qmd-syntax-helper/src/conversions/div_whitespace.rs @@ -1,28 +1,11 @@ use anyhow::{Context, Result}; use colored::Colorize; -use serde::{Deserialize, Serialize}; use std::fs; use std::path::Path; use crate::rule::{CheckResult, ConvertResult, Rule}; use crate::utils::file_io::{read_file, write_file}; -#[derive(Debug, Serialize, Deserialize)] -struct ErrorLocation { - row: usize, - column: usize, - byte_offset: usize, - size: usize, -} - -#[derive(Debug, Serialize, Deserialize)] -struct ParseError { - filename: String, - title: String, - message: String, - location: ErrorLocation, -} - pub struct DivWhitespaceConverter {} impl DivWhitespaceConverter { @@ -30,8 +13,11 @@ impl DivWhitespaceConverter { Ok(Self {}) } - /// Parse a file and get error locations as JSON - fn get_parse_errors(&self, file_path: &Path) -> Result> { + /// Parse a file and get diagnostic messages + fn get_parse_errors( + &self, + file_path: &Path, + ) -> Result> { let content = fs::read_to_string(file_path) .with_context(|| format!("Failed to read file: {}", file_path.display()))?; @@ -44,46 +30,34 @@ impl DivWhitespaceConverter { false, // not loose mode &filename, &mut sink, - Some( - quarto_markdown_pandoc::readers::qmd_error_messages::produce_json_error_messages - as fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - ), ); match result { Ok(_) => Ok(Vec::new()), // No errors - Err(error_messages) => { - // Parse the JSON error output - // The error messages come as a single JSON array string - if error_messages.is_empty() { - return Ok(Vec::new()); - } - - let json_str = error_messages.join(""); - - // Try to parse as JSON array - match serde_json::from_str::>(&json_str) { - Ok(errors) => Ok(errors), - Err(_) => { - // If parsing fails, the messages are likely plain text warnings/debug messages - // rather than actual syntax errors. These don't indicate div whitespace issues, - // so we can safely ignore them for this specific rule. - Ok(Vec::new()) - } - } + Err(diagnostics) => { + // Return diagnostic messages directly + Ok(diagnostics) } } } /// Find div fence errors that need whitespace fixes - fn find_div_whitespace_errors(&self, content: &str, errors: &[ParseError]) -> Vec { + fn find_div_whitespace_errors( + &self, + content: &str, + errors: &[quarto_error_reporting::DiagnosticMessage], + ) -> Vec { let mut fix_positions = Vec::new(); let lines: Vec<&str> = content.lines().collect(); + // Pre-compute line start offsets for O(1) lookup instead of O(N) per error + let mut line_starts = Vec::with_capacity(lines.len()); + let mut offset = 0; + for line in &lines { + line_starts.push(offset); + offset += line.len() + 1; // +1 for newline + } + for error in errors { // Skip errors that are not about div fences // We're looking for "Missing Space After Div Fence" or errors on lines with ::: @@ -93,12 +67,20 @@ impl DivWhitespaceConverter { continue; } + // Extract row from location (if available) + // SourceInfo uses 0-indexed rows, div_whitespace uses them too + let error_row = error + .location + .as_ref() + .map(|loc| loc.range.start.row) + .unwrap_or(0); + // The error might be on the line itself or the line before (for div fences) // Check both the current line and the previous line - let lines_to_check = if error.location.row > 0 { - vec![error.location.row - 1, error.location.row] + let lines_to_check = if error_row > 0 { + vec![error_row - 1, error_row] } else { - vec![error.location.row] + vec![error_row] }; for &line_idx in &lines_to_check { @@ -114,11 +96,8 @@ impl DivWhitespaceConverter { if after_colon.starts_with('{') { // Calculate the position right after ::: // We need byte offset, not char offset - let line_start = content - .lines() - .take(line_idx) - .map(|l| l.len() + 1) // +1 for newline - .sum::(); + // Use pre-computed offset for O(1) lookup + let line_start = line_starts[line_idx]; let indent_bytes = line.len() - trimmed.len(); let fix_pos = line_start + indent_bytes + 3; // +3 for ":::" diff --git a/crates/qmd-syntax-helper/src/diagnostics/parse_check.rs b/crates/qmd-syntax-helper/src/diagnostics/parse_check.rs index 9dc25a8..406497e 100644 --- a/crates/qmd-syntax-helper/src/diagnostics/parse_check.rs +++ b/crates/qmd-syntax-helper/src/diagnostics/parse_check.rs @@ -24,14 +24,6 @@ impl ParseChecker { false, &filename, &mut sink, - Some( - quarto_markdown_pandoc::readers::qmd_error_messages::produce_json_error_messages - as fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - ), ); Ok(result.is_ok()) diff --git a/crates/qmd-syntax-helper/src/main.rs b/crates/qmd-syntax-helper/src/main.rs index dbfceb1..ae7be58 100644 --- a/crates/qmd-syntax-helper/src/main.rs +++ b/crates/qmd-syntax-helper/src/main.rs @@ -85,41 +85,76 @@ fn main() -> Result<()> { output, } => { let file_paths = expand_globs(&files)?; + let total_files_checked = file_paths.len(); let rules = resolve_rules(®istry, &rule_names)?; let mut all_results = Vec::new(); for file_path in file_paths { + // Print filename first in verbose mode if verbose && !json { println!("Checking: {}", file_path.display()); } + // Collect results for this file + let mut file_results = Vec::new(); + for rule in &rules { match rule.check(&file_path, verbose && !json) { Ok(results) => { - for result in results { - all_results.push(result.clone()); - if !json && result.has_issue { + file_results.extend(results); + } + Err(e) => { + if !json { + // For errors, print filename first if not verbose + if !verbose { + println!("{}", file_path.display()); + } + eprintln!(" {} Error checking {}: {}", "✗".red(), rule.name(), e); + } + } + } + } + + // Print results based on mode + if !json { + if verbose { + // Verbose: filename already printed, just print issues + for result in &file_results { + if result.has_issue { + println!( + " {} {}", + "✗".red(), + result.message.as_ref().unwrap_or(&String::new()) + ); + } + } + } else { + // Non-verbose: only print filename if there are issues + let has_issues = file_results.iter().any(|r| r.has_issue); + if has_issues { + println!("{}", file_path.display()); + for result in &file_results { + if result.has_issue { println!( " {} {}", "✗".red(), - result.message.unwrap_or_default() + result.message.as_ref().unwrap_or(&String::new()) ); } } - } - Err(e) => { - if !json { - eprintln!(" {} Error checking {}: {}", "✗".red(), rule.name(), e); - } + println!(); // Blank line between files } } } + + // Add to overall results + all_results.extend(file_results); } // Print summary if not in JSON mode - if !json && !all_results.is_empty() { - print_check_summary(&all_results); + if !json { + print_check_summary(&all_results, total_files_checked); } // Output handling @@ -214,13 +249,9 @@ fn resolve_rules( } } -fn print_check_summary(results: &[rule::CheckResult]) { +fn print_check_summary(results: &[rule::CheckResult], total_files: usize) { use std::collections::{HashMap, HashSet}; - // Get unique files checked - let unique_files: HashSet<&str> = results.iter().map(|r| r.file_path.as_str()).collect(); - let total_files = unique_files.len(); - // Count files with issues (at least one result with has_issue=true) let mut files_with_issues = HashSet::new(); let mut total_issues = 0; diff --git a/crates/qmd-syntax-helper/src/utils/glob_expand.rs b/crates/qmd-syntax-helper/src/utils/glob_expand.rs index 09762f4..7c18c31 100644 --- a/crates/qmd-syntax-helper/src/utils/glob_expand.rs +++ b/crates/qmd-syntax-helper/src/utils/glob_expand.rs @@ -15,14 +15,25 @@ pub fn expand_globs(patterns: &[String]) -> Result> { let paths = glob::glob(pattern) .with_context(|| format!("Invalid glob pattern: {}", pattern))?; + let mut match_count = 0; for path in paths { let path = path.with_context(|| format!("Failed to read glob match for: {}", pattern))?; files.push(path); + match_count += 1; + } + + // Warn if glob matched nothing + if match_count == 0 { + eprintln!("Warning: No files matched pattern: {}", pattern); } } else { - // It's a literal path - use as-is - files.push(PathBuf::from(pattern)); + // It's a literal path - verify it exists + let path = PathBuf::from(pattern); + if !path.exists() { + anyhow::bail!("File not found: {}", pattern); + } + files.push(path); } } @@ -32,19 +43,52 @@ pub fn expand_globs(patterns: &[String]) -> Result> { #[cfg(test)] mod tests { use super::*; + use std::fs::File; + use std::io::Write; #[test] fn test_literal_path() { - let patterns = vec!["test.qmd".to_string()]; + // Create a temporary test file + let test_file = "test-glob-expand.qmd"; + File::create(test_file).unwrap().write_all(b"test").unwrap(); + + let patterns = vec![test_file.to_string()]; let result = expand_globs(&patterns).unwrap(); assert_eq!(result.len(), 1); - assert_eq!(result[0], PathBuf::from("test.qmd")); + assert_eq!(result[0], PathBuf::from(test_file)); + + // Clean up + std::fs::remove_file(test_file).unwrap(); } #[test] fn test_multiple_literals() { - let patterns = vec!["a.qmd".to_string(), "b.qmd".to_string()]; + // Create temporary test files + let test_file_a = "test-a-glob-expand.qmd"; + let test_file_b = "test-b-glob-expand.qmd"; + File::create(test_file_a) + .unwrap() + .write_all(b"test") + .unwrap(); + File::create(test_file_b) + .unwrap() + .write_all(b"test") + .unwrap(); + + let patterns = vec![test_file_a.to_string(), test_file_b.to_string()]; let result = expand_globs(&patterns).unwrap(); assert_eq!(result.len(), 2); + + // Clean up + std::fs::remove_file(test_file_a).unwrap(); + std::fs::remove_file(test_file_b).unwrap(); + } + + #[test] + fn test_nonexistent_file_errors() { + let patterns = vec!["file-that-does-not-exist.qmd".to_string()]; + let result = expand_globs(&patterns); + assert!(result.is_err()); + assert!(result.unwrap_err().to_string().contains("File not found")); } } diff --git a/crates/quarto-error-reporting/src/builder.rs b/crates/quarto-error-reporting/src/builder.rs index 59d7240..b76888d 100644 --- a/crates/quarto-error-reporting/src/builder.rs +++ b/crates/quarto-error-reporting/src/builder.rs @@ -256,6 +256,35 @@ impl DiagnosticMessageBuilder { self.details.push(DetailItem { kind: DetailKind::Error, content: detail.into(), + location: None, + }); + self + } + + /// Add an error detail with a source location. + /// + /// This allows adding contextual information that points to specific locations + /// in the source code, creating rich multi-location error messages. + /// + /// # Example + /// + /// ```ignore + /// use quarto_error_reporting::DiagnosticMessageBuilder; + /// + /// let error = DiagnosticMessageBuilder::error("Mismatched brackets") + /// .add_detail_at("Opening bracket here", opening_location) + /// .add_detail_at("But no closing bracket found", end_location) + /// .build(); + /// ``` + pub fn add_detail_at( + mut self, + detail: impl Into, + location: quarto_source_map::SourceInfo, + ) -> Self { + self.details.push(DetailItem { + kind: DetailKind::Error, + content: detail.into(), + location: Some(location), }); self } @@ -278,6 +307,21 @@ impl DiagnosticMessageBuilder { self.details.push(DetailItem { kind: DetailKind::Info, content: info.into(), + location: None, + }); + self + } + + /// Add an info detail with a source location. + pub fn add_info_at( + mut self, + info: impl Into, + location: quarto_source_map::SourceInfo, + ) -> Self { + self.details.push(DetailItem { + kind: DetailKind::Info, + content: info.into(), + location: Some(location), }); self } @@ -297,6 +341,21 @@ impl DiagnosticMessageBuilder { self.details.push(DetailItem { kind: DetailKind::Note, content: note.into(), + location: None, + }); + self + } + + /// Add a note detail with a source location. + pub fn add_note_at( + mut self, + note: impl Into, + location: quarto_source_map::SourceInfo, + ) -> Self { + self.details.push(DetailItem { + kind: DetailKind::Note, + content: note.into(), + location: Some(location), }); self } diff --git a/crates/quarto-error-reporting/src/diagnostic.rs b/crates/quarto-error-reporting/src/diagnostic.rs index 08cf24f..5608f71 100644 --- a/crates/quarto-error-reporting/src/diagnostic.rs +++ b/crates/quarto-error-reporting/src/diagnostic.rs @@ -83,14 +83,18 @@ impl From<&str> for MessageContent { /// /// Following tidyverse guidelines, details provide specific information about /// the error (what went wrong, where, with what values). -#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] +#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)] pub struct DetailItem { /// The kind of detail (error, info, note) pub kind: DetailKind, /// The content of the detail pub content: MessageContent, - // Future: Optional source span for details that point to specific code locations - // pub span: Option, + /// Optional source location for this detail + /// + /// When present, this identifies where in the source code this detail applies. + /// This allows error messages to highlight multiple related locations. + #[serde(skip_serializing_if = "Option::is_none")] + pub location: Option, } /// A diagnostic message following tidyverse-style structure. @@ -290,66 +294,117 @@ impl DiagnosticMessage { let mut result = String::new(); - // Title line with kind - let kind_str = match self.kind { - DiagnosticKind::Error => "Error", - DiagnosticKind::Warning => "Warning", - DiagnosticKind::Info => "Info", - DiagnosticKind::Note => "Note", + // Check if we have any location info that could be displayed with ariadne + // This includes the main diagnostic location OR any detail with a location + let has_any_location = self.location.is_some() + || self.details.iter().any(|d| d.location.is_some()); + + // If we have location info and source context, render ariadne source display + let has_ariadne = if has_any_location && ctx.is_some() { + // Use main location if available, otherwise use first detail location + let location = self.location.as_ref() + .or_else(|| self.details.iter().find_map(|d| d.location.as_ref())); + + if let Some(loc) = location { + if let Some(ariadne_output) = self.render_ariadne_source_context(loc, ctx.unwrap()) { + result.push_str(&ariadne_output); + true + } else { + false + } + } else { + false + } + } else { + false }; - if let Some(code) = &self.code { - write!(result, "{} [{}]: {}", kind_str, code, self.title).unwrap(); - } else { - write!(result, "{}: {}", kind_str, self.title).unwrap(); - } + // If we don't have ariadne output, show full tidyverse-style content + // If we do have ariadne, only show details without locations and hints + // (ariadne already shows: title, code, problem, and details with locations) + if !has_ariadne { + // No ariadne - show everything in tidyverse style + + // Title with kind prefix and error code (e.g., "Error [Q-1-1]: Invalid input") + let kind_str = match self.kind { + DiagnosticKind::Error => "Error", + DiagnosticKind::Warning => "Warning", + DiagnosticKind::Info => "Info", + DiagnosticKind::Note => "Note", + }; + if let Some(code) = &self.code { + write!(result, "{} [{}]: {}\n", kind_str, code, self.title).unwrap(); + } else { + write!(result, "{}: {}\n", kind_str, self.title).unwrap(); + } - // Add location if present - if let Some(loc) = &self.location { - if let Some(ctx) = ctx { - // Try to map to original source - if let Some(mapped) = loc.map_offset(loc.range.start.offset, ctx) { - if let Some(file) = ctx.get_file(mapped.file_id) { - write!( - result, - " at {}:{}:{}", - file.path, - mapped.location.row + 1, // Display as 1-based - mapped.location.column + 1 - ) - .unwrap(); + // Show location info if available (but no ariadne rendering) + if let Some(loc) = &self.location { + // Try to map with context if available + if let Some(ctx) = ctx { + if let Some(mapped) = loc.map_offset(loc.range.start.offset, ctx) { + if let Some(file) = ctx.get_file(mapped.file_id) { + write!( + result, + " at {}:{}:{}\n", + file.path, + mapped.location.row + 1, + mapped.location.column + 1 + ) + .unwrap(); + } } + } else { + // No context: show immediate location (1-indexed for display) + write!( + result, + " at {}:{}\n", + loc.range.start.row + 1, + loc.range.start.column + 1 + ) + .unwrap(); } - } else { - // No context, show immediate location - write!( - result, - " at {}:{}", - loc.range.start.row + 1, - loc.range.start.column + 1 - ) - .unwrap(); } - } - // Problem statement - if let Some(problem) = &self.problem { - write!(result, "\n{}", problem.as_str()).unwrap(); - } + // Problem statement (optional additional context) + if let Some(problem) = &self.problem { + write!(result, "{}\n", problem.as_str()).unwrap(); + } - // Details with appropriate bullets - for detail in &self.details { - let bullet = match detail.kind { - DetailKind::Error => "✖", - DetailKind::Info => "ℹ", - DetailKind::Note => "•", - }; - write!(result, "\n{} {}", bullet, detail.content.as_str()).unwrap(); - } + // All details with appropriate bullets + for detail in &self.details { + let bullet = match detail.kind { + DetailKind::Error => "✖", + DetailKind::Info => "ℹ", + DetailKind::Note => "•", + }; + write!(result, "{} {}\n", bullet, detail.content.as_str()).unwrap(); + } + + // All hints + for hint in &self.hints { + write!(result, "? {}\n", hint.as_str()).unwrap(); + } + } else { + // Have ariadne - only show details without locations and hints + // (ariadne shows title, code, problem, and located details) + + // Details without locations (ariadne can't show these) + for detail in &self.details { + if detail.location.is_none() { + let bullet = match detail.kind { + DetailKind::Error => "✖", + DetailKind::Info => "ℹ", + DetailKind::Note => "•", + }; + write!(result, "{} {}\n", bullet, detail.content.as_str()).unwrap(); + } + } - // Hints - for hint in &self.hints { - write!(result, "\n? {}", hint.as_str()).unwrap(); + // All hints (ariadne doesn't show hints) + for hint in &self.hints { + write!(result, "? {}\n", hint.as_str()).unwrap(); + } } result @@ -413,10 +468,14 @@ impl DiagnosticMessage { DetailKind::Info => "info", DetailKind::Note => "note", }; - json!({ + let mut detail_obj = json!({ "kind": detail_kind, "content": d.content.to_json() - }) + }); + if let Some(location) = &d.location { + detail_obj["location"] = json!(location); + } + detail_obj }) .collect(); obj["details"] = json!(details); @@ -433,6 +492,127 @@ impl DiagnosticMessage { obj } + + /// Extract the original file_id from a SourceInfo by traversing the mapping chain + fn extract_file_id(source_info: &quarto_source_map::SourceInfo) -> Option { + match &source_info.mapping { + quarto_source_map::SourceMapping::Original { file_id } => Some(*file_id), + quarto_source_map::SourceMapping::Substring { parent, .. } => Self::extract_file_id(parent), + quarto_source_map::SourceMapping::Transformed { parent, .. } => Self::extract_file_id(parent), + quarto_source_map::SourceMapping::Concat { pieces } => { + // For concatenated sources, use the first piece's file_id + pieces.first().and_then(|p| Self::extract_file_id(&p.source_info)) + } + } + } + + /// Render source context using ariadne (private helper for to_text). + /// + /// This produces the visual source code snippet with highlighting. + /// The tidyverse-style problem/details/hints are added separately by to_text(). + fn render_ariadne_source_context( + &self, + main_location: &quarto_source_map::SourceInfo, + ctx: &quarto_source_map::SourceContext, + ) -> Option { + use ariadne::{Color, Label, Report, ReportKind, Source}; + + // Extract file_id from the source mapping by traversing the chain + let file_id = Self::extract_file_id(main_location)?; + + let file = ctx.get_file(file_id)?; + + // Get file content: use stored content for ephemeral files, or read from disk + let content = match &file.content { + Some(c) => c.clone(), // Ephemeral file: use stored content + None => { + // Disk-backed file: read from disk + std::fs::read_to_string(&file.path) + .unwrap_or_else(|e| panic!("Failed to read file '{}': {}", file.path, e)) + } + }; + + // Map the location offsets back to original file positions + let start_mapped = main_location.map_offset(main_location.range.start.offset, ctx)?; + let end_mapped = main_location.map_offset(main_location.range.end.offset, ctx)?; + + // Determine report kind and color + let (report_kind, main_color) = match self.kind { + DiagnosticKind::Error => (ReportKind::Error, Color::Red), + DiagnosticKind::Warning => (ReportKind::Warning, Color::Yellow), + DiagnosticKind::Info => (ReportKind::Advice, Color::Cyan), + DiagnosticKind::Note => (ReportKind::Advice, Color::Blue), + }; + + // Build the report using the mapped offset for proper line:column display + let mut report = Report::build( + report_kind, + file.path.clone(), + start_mapped.location.offset, + ); + + // Add title with error code + if let Some(code) = &self.code { + report = report.with_message(format!("[{}] {}", code, self.title)); + } else { + report = report.with_message(&self.title); + } + + // Add main location label using mapped offsets + let main_span = start_mapped.location.offset..end_mapped.location.offset; + let main_message = if let Some(problem) = &self.problem { + problem.as_str() + } else { + &self.title + }; + + report = report.with_label( + Label::new((file.path.clone(), main_span)) + .with_message(main_message) + .with_color(main_color), + ); + + // Add detail locations as additional labels (only those with locations) + for detail in &self.details { + if let Some(detail_loc) = &detail.location { + // Extract file_id from detail location + let detail_file_id = match Self::extract_file_id(detail_loc) { + Some(fid) => fid, + None => continue, // Skip if we can't extract file_id + }; + + if detail_file_id == file_id { + // Map detail offsets to original file positions + if let (Some(detail_start), Some(detail_end)) = ( + detail_loc.map_offset(detail_loc.range.start.offset, ctx), + detail_loc.map_offset(detail_loc.range.end.offset, ctx), + ) { + let detail_span = detail_start.location.offset..detail_end.location.offset; + let detail_color = match detail.kind { + DetailKind::Error => Color::Red, + DetailKind::Info => Color::Cyan, + DetailKind::Note => Color::Blue, + }; + + report = report.with_label( + Label::new((file.path.clone(), detail_span)) + .with_message(detail.content.as_str()) + .with_color(detail_color), + ); + } + } + } + } + + // Render to string + let report = report.finish(); + let mut output = Vec::new(); + report + .write((file.path.clone(), Source::from(content.as_str())), &mut output) + .ok()?; + + String::from_utf8(output).ok() + } } #[cfg(test)] @@ -503,13 +683,13 @@ mod tests { #[test] fn test_to_text_simple_error() { let msg = DiagnosticMessage::error("Something went wrong"); - assert_eq!(msg.to_text(None), "Error: Something went wrong"); + assert_eq!(msg.to_text(None), "Error: Something went wrong\n"); } #[test] fn test_to_text_with_code() { let msg = DiagnosticMessage::error("Something went wrong").with_code("Q-1-1"); - assert_eq!(msg.to_text(None), "Error [Q-1-1]: Something went wrong"); + assert_eq!(msg.to_text(None), "Error [Q-1-1]: Something went wrong\n"); } #[test] diff --git a/crates/quarto-markdown-pandoc/fuzz/fuzz_targets/hello_fuzz.rs b/crates/quarto-markdown-pandoc/fuzz/fuzz_targets/hello_fuzz.rs index 76250e8..134639f 100644 --- a/crates/quarto-markdown-pandoc/fuzz/fuzz_targets/hello_fuzz.rs +++ b/crates/quarto-markdown-pandoc/fuzz/fuzz_targets/hello_fuzz.rs @@ -7,7 +7,6 @@ #[macro_use] extern crate libfuzzer_sys; use quarto_markdown_pandoc::readers; -use quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver; fuzz_target!(|data: &[u8]| { if let Ok(s) = std::str::from_utf8(data) { @@ -16,7 +15,6 @@ fuzz_target!(|data: &[u8]| { false, "", &mut std::io::sink(), - None:: Vec>, ); } }); diff --git a/crates/quarto-markdown-pandoc/src/main.rs b/crates/quarto-markdown-pandoc/src/main.rs index 9bf4f60..f60e9c7 100644 --- a/crates/quarto-markdown-pandoc/src/main.rs +++ b/crates/quarto-markdown-pandoc/src/main.rs @@ -113,35 +113,42 @@ fn main() { let (pandoc, context) = match args.from.as_str() { "markdown" | "qmd" => { - let error_formatter = if args.json_errors { - Some( - readers::qmd_error_messages::produce_json_error_messages - as fn( - &[u8], - &utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - ) - } else { - None - }; - let result = readers::qmd::read( input.as_bytes(), args.loose, input_filename, &mut output_stream, - error_formatter, ); match result { - Ok(p) => p, - Err(error_messages) => { + Ok((pandoc, context, warnings)) => { + // Output warnings to stderr + if args.json_errors { + // JSON format + for warning in warnings { + eprintln!("{}", warning.to_json()); + } + } else { + // Text format (default) - pass source_context for Ariadne rendering + for warning in warnings { + eprintln!("{}", warning.to_text(Some(&context.source_context))); + } + } + (pandoc, context) + } + Err(diagnostics) => { + // Output errors if args.json_errors { // For JSON errors, print to stdout as a JSON array - println!("{}", error_messages.join("")); + for diagnostic in diagnostics { + println!("{}", diagnostic.to_json()); + } } else { - for msg in error_messages { - eprintln!("{}", msg); + // Build a minimal source context for Ariadne rendering + let mut source_context = quarto_source_map::SourceContext::new(); + source_context.add_file(input_filename.to_string(), Some(input.clone())); + + for diagnostic in diagnostics { + eprintln!("{}", diagnostic.to_text(Some(&source_context))); } } std::process::exit(1); diff --git a/crates/quarto-markdown-pandoc/src/pandoc/meta.rs b/crates/quarto-markdown-pandoc/src/pandoc/meta.rs index 68f8d59..1c2a94c 100644 --- a/crates/quarto-markdown-pandoc/src/pandoc/meta.rs +++ b/crates/quarto-markdown-pandoc/src/pandoc/meta.rs @@ -104,6 +104,30 @@ impl MetaValueWithSourceInfo { } } + /// Check if this MetaValue represents a string with a specific value + /// + /// This handles both: + /// - MetaString { value, .. } where value == expected + /// - MetaInlines { content, .. } where content is a single Str with text == expected + /// + /// This is needed because after k-90/k-95, YAML strings are parsed as markdown + /// and become MetaInlines containing a single Str node. + pub fn is_string_value(&self, expected: &str) -> bool { + match self { + MetaValueWithSourceInfo::MetaString { value, .. } => value == expected, + MetaValueWithSourceInfo::MetaInlines { content, .. } => { + // Check if it's a single Str inline with the expected text + if content.len() == 1 { + if let crate::pandoc::Inline::Str(str_node) = &content[0] { + return str_node.text == expected; + } + } + false + } + _ => false, + } + } + /// Convert to old Meta format (loses source info) pub fn to_meta_value(&self) -> MetaValue { match self { @@ -203,6 +227,113 @@ pub fn meta_value_from_legacy(value: MetaValue) -> MetaValueWithSourceInfo { } } +/// Parse a YAML string value as markdown +/// +/// - If tag_source_info is Some: This is a !md tagged value, ERROR on parse failure +/// - If tag_source_info is None: This is an untagged value, WARN on parse failure +/// +/// On success: Returns MetaInlines or MetaBlocks +/// On failure with !md: Returns error (will need to panic or collect diagnostic) +/// On failure untagged: Returns MetaInlines with yaml-markdown-syntax-error Span + warning +fn parse_yaml_string_as_markdown( + value: &str, + source_info: &quarto_source_map::SourceInfo, + _context: &crate::pandoc::ast_context::ASTContext, + tag_source_info: Option, + diagnostics: &mut crate::utils::diagnostic_collector::DiagnosticCollector, +) -> MetaValueWithSourceInfo { + use quarto_error_reporting::DiagnosticMessageBuilder; + + let mut output_stream = VerboseOutput::Sink(io::sink()); + let result = readers::qmd::read(value.as_bytes(), false, "", &mut output_stream); + + match result { + Ok((mut pandoc, _, warnings)) => { + // Propagate warnings from recursive parse + for warning in warnings { + diagnostics.add(warning); + } + // Parse succeeded - return as MetaInlines or MetaBlocks + if pandoc.blocks.len() == 1 { + if let crate::pandoc::Block::Paragraph(p) = &mut pandoc.blocks[0] { + return MetaValueWithSourceInfo::MetaInlines { + content: mem::take(&mut p.content), + source_info: source_info.clone(), + }; + } + } + MetaValueWithSourceInfo::MetaBlocks { + content: pandoc.blocks, + source_info: source_info.clone(), + } + } + Err(_parse_errors) => { + if let Some(_tag_loc) = tag_source_info { + // !md tag: ERROR on parse failure + let diagnostic = + DiagnosticMessageBuilder::error("Failed to parse !md tagged value") + .with_code("Q-1-100") + .with_location(source_info.clone()) + .problem("The `!md` tag requires valid markdown syntax") + .add_detail(format!("Could not parse: {}", value)) + .add_hint("Remove the `!md` tag or fix the markdown syntax") + .build(); + + // Collect diagnostic instead of printing + diagnostics.add(diagnostic); + + // For now, also return the error span so we can continue + // In the future, we might want to actually fail the parse + let span = Span { + attr: ( + String::new(), + vec!["yaml-markdown-syntax-error".to_string()], + HashMap::new(), + ), + content: vec![Inline::Str(Str { + text: value.to_string(), + source_info: quarto_source_map::SourceInfo::default(), + })], + source_info: quarto_source_map::SourceInfo::default(), + }; + MetaValueWithSourceInfo::MetaInlines { + content: vec![Inline::Span(span)], + source_info: source_info.clone(), + } + } else { + // Untagged: WARN on parse failure + let diagnostic = DiagnosticMessageBuilder::warning("Failed to parse metadata value as markdown") + .with_code("Q-1-101") + .with_location(source_info.clone()) + .problem(format!("Could not parse '{}' as markdown", value)) + .add_hint("Add the `!str` tag to treat this as a plain string, or fix the markdown syntax") + .build(); + + // Collect diagnostic instead of printing + diagnostics.add(diagnostic); + + // Return span with yaml-markdown-syntax-error class + let span = Span { + attr: ( + String::new(), + vec!["yaml-markdown-syntax-error".to_string()], + HashMap::new(), + ), + content: vec![Inline::Str(Str { + text: value.to_string(), + source_info: quarto_source_map::SourceInfo::default(), + })], + source_info: quarto_source_map::SourceInfo::default(), + }; + MetaValueWithSourceInfo::MetaInlines { + content: vec![Inline::Span(span)], + source_info: source_info.clone(), + } + } + } + } +} + /// Transform YamlWithSourceInfo to MetaValueWithSourceInfo /// /// This is the core transformation that: @@ -215,6 +346,7 @@ pub fn meta_value_from_legacy(value: MetaValue) -> MetaValueWithSourceInfo { pub fn yaml_to_meta_with_source_info( yaml: quarto_yaml::YamlWithSourceInfo, _context: &crate::pandoc::ast_context::ASTContext, + diagnostics: &mut crate::utils::diagnostic_collector::DiagnosticCollector, ) -> MetaValueWithSourceInfo { use yaml_rust2::Yaml; @@ -224,7 +356,7 @@ pub fn yaml_to_meta_with_source_info( let (items, source_info) = yaml.into_array().unwrap(); let meta_items = items .into_iter() - .map(|item| yaml_to_meta_with_source_info(item, _context)) + .map(|item| yaml_to_meta_with_source_info(item, _context, diagnostics)) .collect(); return MetaValueWithSourceInfo::MetaList { @@ -242,7 +374,7 @@ pub fn yaml_to_meta_with_source_info( entry.key.yaml.as_str().map(|key_str| MetaMapEntry { key: key_str.to_string(), key_source: entry.key_span, - value: yaml_to_meta_with_source_info(entry.value, _context), + value: yaml_to_meta_with_source_info(entry.value, _context, diagnostics), }) }) .collect(); @@ -263,35 +395,57 @@ pub fn yaml_to_meta_with_source_info( match yaml_value { Yaml::String(s) => { - // Check for YAML tags (e.g., !path, !glob, !str) - if let Some((tag_suffix, _tag_source_info)) = tag { - // Tagged string - bypass markdown parsing - // Wrap in Span with class "yaml-tagged-string" and tag attribute - let mut attributes = HashMap::new(); - attributes.insert("tag".to_string(), tag_suffix.clone()); - - let span = Span { - attr: ( - String::new(), - vec!["yaml-tagged-string".to_string()], - attributes, - ), - content: vec![Inline::Str(Str { - text: s.clone(), - source_info: source_info.clone(), - })], - source_info: quarto_source_map::SourceInfo::default(), - }; - MetaValueWithSourceInfo::MetaInlines { - content: vec![Inline::Span(span)], - source_info, // Overall node source + // Check for YAML tags (e.g., !path, !glob, !str, !md) + if let Some((tag_suffix, tag_source_info)) = tag { + match tag_suffix.as_str() { + "str" | "path" => { + // !str and !path: Emit plain Str without markdown parsing + // No wrapper span, just a plain Str node + MetaValueWithSourceInfo::MetaInlines { + content: vec![Inline::Str(Str { + text: s.clone(), + source_info: source_info.clone(), + })], + source_info, + } + } + "md" => { + // !md: Parse as markdown immediately, ERROR if fails + parse_yaml_string_as_markdown( + &s, + &source_info, + _context, + Some(tag_source_info), + diagnostics, + ) + } + _ => { + // Other tags (!glob, !expr, etc.): Keep current behavior + // Wrap in Span with class "yaml-tagged-string" and tag attribute + let mut attributes = HashMap::new(); + attributes.insert("tag".to_string(), tag_suffix.clone()); + + let span = Span { + attr: ( + String::new(), + vec!["yaml-tagged-string".to_string()], + attributes, + ), + content: vec![Inline::Str(Str { + text: s.clone(), + source_info: source_info.clone(), + })], + source_info: quarto_source_map::SourceInfo::default(), + }; + MetaValueWithSourceInfo::MetaInlines { + content: vec![Inline::Span(span)], + source_info, // Overall node source + } + } } } else { - // Untagged string - return as MetaString for later markdown parsing - MetaValueWithSourceInfo::MetaString { - value: s, - source_info, - } + // Untagged string: Parse as markdown immediately, WARN if fails + parse_yaml_string_as_markdown(&s, &source_info, _context, None, diagnostics) } } @@ -468,6 +622,7 @@ impl MarkedEventReceiver for YamlEventHandler { pub fn rawblock_to_meta_with_source_info( block: &RawBlock, context: &crate::pandoc::ast_context::ASTContext, + diagnostics: &mut crate::utils::diagnostic_collector::DiagnosticCollector, ) -> MetaValueWithSourceInfo { if block.format != "quarto_minus_metadata" { panic!( @@ -501,7 +656,7 @@ pub fn rawblock_to_meta_with_source_info( // Transform YamlWithSourceInfo to MetaValueWithSourceInfo // Pass by value since yaml is no longer needed - yaml_to_meta_with_source_info(yaml, context) + yaml_to_meta_with_source_info(yaml, context, diagnostics) } /// Legacy version: Convert RawBlock to Meta (old implementation) @@ -532,25 +687,19 @@ pub fn rawblock_to_meta(block: RawBlock) -> Meta { pub fn parse_metadata_strings_with_source_info( meta: MetaValueWithSourceInfo, outer_metadata: &mut Vec, + diagnostics: &mut crate::utils::diagnostic_collector::DiagnosticCollector, ) -> MetaValueWithSourceInfo { match meta { MetaValueWithSourceInfo::MetaString { value, source_info } => { let mut output_stream = VerboseOutput::Sink(io::sink()); - let result = readers::qmd::read( - value.as_bytes(), - false, - "", - &mut output_stream, - None::< - fn( - &[u8], - &crate::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ); + let result = + readers::qmd::read(value.as_bytes(), false, "", &mut output_stream); match result { - Ok((mut pandoc, _context)) => { + Ok((mut pandoc, _context, warnings)) => { + // Propagate warnings from recursive parse + for warning in warnings { + diagnostics.add(warning); + } // Merge parsed metadata, preserving full MetaMapEntry with key_source if let MetaValueWithSourceInfo::MetaMap { entries, .. } = pandoc.meta { for entry in entries { @@ -595,7 +744,9 @@ pub fn parse_metadata_strings_with_source_info( MetaValueWithSourceInfo::MetaList { items, source_info } => { let parsed_items = items .into_iter() - .map(|item| parse_metadata_strings_with_source_info(item, outer_metadata)) + .map(|item| { + parse_metadata_strings_with_source_info(item, outer_metadata, diagnostics) + }) .collect(); MetaValueWithSourceInfo::MetaList { items: parsed_items, @@ -611,7 +762,11 @@ pub fn parse_metadata_strings_with_source_info( .map(|entry| MetaMapEntry { key: entry.key, key_source: entry.key_source, - value: parse_metadata_strings_with_source_info(entry.value, outer_metadata), + value: parse_metadata_strings_with_source_info( + entry.value, + outer_metadata, + diagnostics, + ), }) .collect(); MetaValueWithSourceInfo::MetaMap { @@ -627,21 +782,10 @@ pub fn parse_metadata_strings(meta: MetaValue, outer_metadata: &mut Meta) -> Met match meta { MetaValue::MetaString(s) => { let mut output_stream = VerboseOutput::Sink(io::sink()); - let result = readers::qmd::read( - s.as_bytes(), - false, - "", - &mut output_stream, - None::< - fn( - &[u8], - &crate::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ); + let result = readers::qmd::read(s.as_bytes(), false, "", &mut output_stream); match result { - Ok((mut pandoc, _context)) => { + Ok((mut pandoc, _context, _warnings)) => { + // TODO: Handle warnings from recursive parse // pandoc.meta is now MetaValueWithSourceInfo, convert it to Meta if let MetaValueWithSourceInfo::MetaMap { entries, .. } = pandoc.meta { for entry in entries { diff --git a/crates/quarto-markdown-pandoc/src/readers/qmd.rs b/crates/quarto-markdown-pandoc/src/readers/qmd.rs index 42ab1fb..c86ab18 100644 --- a/crates/quarto-markdown-pandoc/src/readers/qmd.rs +++ b/crates/quarto-markdown-pandoc/src/readers/qmd.rs @@ -13,7 +13,7 @@ use crate::pandoc::block::MetaBlock; use crate::pandoc::meta::parse_metadata_strings_with_source_info; use crate::pandoc::rawblock_to_meta_with_source_info; use crate::pandoc::{self, Block, MetaValueWithSourceInfo}; -use crate::readers::qmd_error_messages::{produce_error_message, produce_error_message_json}; +use crate::readers::qmd_error_messages::{produce_diagnostic_messages, produce_error_message_json}; use crate::traversals; use crate::utils::diagnostic_collector::DiagnosticCollector; use std::io::Write; @@ -50,22 +50,20 @@ pub fn read_bad_qmd_for_error_message(input_bytes: &[u8]) -> Vec { return produce_error_message_json(&log_observer); } -pub fn read( +pub fn read( input_bytes: &[u8], _loose: bool, filename: &str, mut output_stream: &mut T, - error_formatter: Option, -) -> Result<(pandoc::Pandoc, ASTContext), Vec> -where - F: Fn( - &[u8], - &crate::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, -{ +) -> Result< + ( + pandoc::Pandoc, + ASTContext, + Vec, + ), + Vec, +> { let mut parser = MarkdownParser::default(); - let mut error_messages: Vec = Vec::new(); let mut log_observer = crate::utils::tree_sitter_log_observer::TreeSitterLogObserver::default(); parser @@ -82,18 +80,22 @@ where let mut input_bytes_with_newline = Vec::with_capacity(input_bytes.len() + 1); input_bytes_with_newline.extend_from_slice(input_bytes); input_bytes_with_newline.push(b'\n'); - return read( - &input_bytes_with_newline, - _loose, - filename, - output_stream, - error_formatter, - ); + return read(&input_bytes_with_newline, _loose, filename, output_stream); } let tree = parser .parse(&input_bytes, None) .expect("Failed to parse input"); + + // Create ASTContext early so we can use it for error diagnostics + let mut context = ASTContext::with_filename(filename.to_string()); + // Add the input content to the SourceContext for proper error rendering + let input_str = String::from_utf8_lossy(input_bytes).to_string(); + context.source_context = quarto_source_map::SourceContext::new(); + context + .source_context + .add_file(filename.to_string(), Some(input_str)); + log_observer.parses.iter().for_each(|parse| { writeln!(output_stream, "tree-sitter parse:").unwrap(); parse @@ -103,41 +105,40 @@ where writeln!(output_stream, "---").unwrap(); }); if log_observer.had_errors() { - if let Some(formatter) = error_formatter { - // Use the provided error formatter - return Err(formatter(input_bytes, &log_observer, filename)); - } else { - // Use the default ariadne formatter - return Err(produce_error_message(input_bytes, &log_observer, filename)); - } + // Produce structured DiagnosticMessage objects with proper source locations + let diagnostics = produce_diagnostic_messages( + input_bytes, + &log_observer, + filename, + &context.source_context, + ); + return Err(diagnostics); } let depth = crate::utils::concrete_tree_depth::concrete_tree_depth(&tree); // this is here mostly to prevent our fuzzer from blowing the stack // with a deeply nested document if depth > 100 { - error_messages.push(format!( + let diagnostic = quarto_error_reporting::generic_error!(format!( "The input document is too deeply nested (max depth: {} > 100).", depth )); - return Err(error_messages); + return Err(vec![diagnostic]); } let errors = parse_is_good(&tree); print_whole_tree(&mut tree.walk(), &mut output_stream); if !errors.is_empty() { let mut cursor = tree.walk(); + let mut diagnostics = Vec::new(); for error in errors { cursor.goto_id(error); - error_messages.push(errors::error_message(&mut cursor, &input_bytes)); + let error_msg = errors::error_message(&mut cursor, &input_bytes); + diagnostics.push(quarto_error_reporting::generic_error!(error_msg)); } - } - if !error_messages.is_empty() { - return Err(error_messages); + return Err(diagnostics); } - let context = ASTContext::with_filename(filename.to_string()); - // Create diagnostic collector and convert to Pandoc AST let mut error_collector = DiagnosticCollector::new(); let mut result = match pandoc::treesitter_to_pandoc( @@ -149,34 +150,14 @@ where ) { Ok(pandoc) => pandoc, Err(diagnostics) => { - // Convert diagnostics to strings based on format - if error_formatter.is_some() { - return Err(diagnostics - .iter() - .map(|d| d.to_json().to_string()) - .collect()); - } else { - return Err(diagnostics.iter().map(|d| d.to_text(None)).collect()); - } + // Return diagnostics directly + return Err(diagnostics); } }; - - // Output warnings to stderr in appropriate format - if error_formatter.is_some() { - // JSON format - let warnings = error_collector.to_json(); - for warning in warnings { - eprintln!("{}", warning); - } - } else { - // Text format (default) - let warnings = error_collector.to_text(); - for warning in warnings { - eprintln!("{}", warning); - } - } // Store complete MetaMapEntry objects to preserve key_source information let mut meta_from_parses: Vec = Vec::new(); + // Create a separate diagnostic collector for metadata parsing warnings + let mut meta_diagnostics = DiagnosticCollector::new(); result = { let mut filter = Filter::new().with_raw_block(|rb| { @@ -184,18 +165,15 @@ where return Unchanged(rb); } // Use new rawblock_to_meta_with_source_info - preserves source info! - let meta_with_source = rawblock_to_meta_with_source_info(&rb, &context); + let meta_with_source = + rawblock_to_meta_with_source_info(&rb, &context, &mut meta_diagnostics); // Check if this is lexical metadata let is_lexical = if let MetaValueWithSourceInfo::MetaMap { ref entries, .. } = meta_with_source { - entries.iter().any(|e| { - e.key == "_scope" - && matches!( - &e.value, - MetaValueWithSourceInfo::MetaString { value, .. } if value == "lexical" - ) - }) + entries + .iter() + .any(|e| e.key == "_scope" && e.value.is_string_value("lexical")) } else { false }; @@ -206,6 +184,7 @@ where let parsed_meta = parse_metadata_strings_with_source_info( meta_with_source, &mut inner_meta_from_parses, + &mut meta_diagnostics, ); // Merge inner metadata if needed @@ -236,8 +215,11 @@ where } else { // Document-level metadata - parse strings and merge into meta_from_parses let mut inner_meta = Vec::new(); - let parsed_meta = - parse_metadata_strings_with_source_info(meta_with_source, &mut inner_meta); + let parsed_meta = parse_metadata_strings_with_source_info( + meta_with_source, + &mut inner_meta, + &mut meta_diagnostics, + ); // Extract MetaMapEntry objects (preserving key_source) and store them if let MetaValueWithSourceInfo::MetaMap { entries, .. } = parsed_meta { @@ -263,5 +245,14 @@ where entries.push(entry); } } - Ok((result, context)) + + // Merge metadata diagnostics into main error_collector + for diagnostic in meta_diagnostics.into_diagnostics() { + error_collector.add(diagnostic); + } + + // Collect all warnings + let warnings = error_collector.into_diagnostics(); + + Ok((result, context, warnings)) } diff --git a/crates/quarto-markdown-pandoc/src/readers/qmd_error_messages.rs b/crates/quarto-markdown-pandoc/src/readers/qmd_error_messages.rs index f615abf..0f3cb6b 100644 --- a/crates/quarto-markdown-pandoc/src/readers/qmd_error_messages.rs +++ b/crates/quarto-markdown-pandoc/src/readers/qmd_error_messages.rs @@ -45,6 +45,43 @@ pub fn produce_error_message( return result; } +/// Produce structured DiagnosticMessage objects from parse errors +/// Uses the SourceContext to properly calculate source locations +pub fn produce_diagnostic_messages( + input_bytes: &[u8], + tree_sitter_log: &crate::utils::tree_sitter_log_observer::TreeSitterLogObserver, + filename: &str, + source_context: &quarto_source_map::SourceContext, +) -> Vec { + assert!(tree_sitter_log.had_errors()); + assert!(tree_sitter_log.parses.len() > 0); + + let mut result: Vec = vec![]; + let mut seen_errors: std::collections::HashSet<(usize, usize)> = + std::collections::HashSet::new(); + + for parse in &tree_sitter_log.parses { + for (_, process_log) in &parse.processes { + for state in process_log.error_states.iter() { + if seen_errors.contains(&(state.row, state.column)) { + continue; + } + seen_errors.insert((state.row, state.column)); + let diagnostic = error_diagnostic_from_parse_state( + input_bytes, + state, + &parse.consumed_tokens, + filename, + source_context, + ); + result.push(diagnostic); + } + } + } + + return result; +} + fn error_message_from_parse_state( input_bytes: &[u8], parse_state: &crate::utils::tree_sitter_log_observer::ProcessMessage, @@ -327,6 +364,136 @@ fn find_matching_token<'a>( .find(|token| token.lr_state == capture.lr_state && token.sym == capture.sym) } +/// Convert a parse state error into a structured DiagnosticMessage +fn error_diagnostic_from_parse_state( + input_bytes: &[u8], + parse_state: &crate::utils::tree_sitter_log_observer::ProcessMessage, + consumed_tokens: &[ConsumedToken], + _filename: &str, + _source_context: &quarto_source_map::SourceContext, +) -> quarto_error_reporting::DiagnosticMessage { + use quarto_error_reporting::DiagnosticMessageBuilder; + + // Look up the error entry from the table + let error_entry = crate::readers::qmd_error_message_table::lookup_error_entry(parse_state); + + // Convert input to string for offset calculation + let input_str = String::from_utf8_lossy(input_bytes); + + // Calculate byte offset and create proper locations using quarto-source-map utilities + let byte_offset = calculate_byte_offset(&input_str, parse_state.row, parse_state.column); + let span_end = byte_offset + parse_state.size.max(1); + + // Use quarto_source_map::utils::offset_to_location to properly calculate locations + let start_location = quarto_source_map::utils::offset_to_location(&input_str, byte_offset) + .unwrap_or(quarto_source_map::Location { + offset: byte_offset, + row: parse_state.row, + column: parse_state.column, + }); + let end_location = quarto_source_map::utils::offset_to_location(&input_str, span_end) + .unwrap_or(quarto_source_map::Location { + offset: span_end, + row: parse_state.row, + column: parse_state.column + parse_state.size.max(1), + }); + + // Create SourceInfo for the error location + let range = quarto_source_map::Range { + start: start_location, + end: end_location, + }; + let source_info = quarto_source_map::SourceInfo::original( + quarto_source_map::FileId(0), // File ID 0 (set up in ASTContext) + range, + ); + + if let Some(entry) = error_entry { + // Build diagnostic from error table entry + let mut builder = DiagnosticMessageBuilder::error(entry.error_info.title) + .with_location(source_info.clone()) + .problem(entry.error_info.message); + + // Add notes with their corresponding source locations + for note in entry.error_info.notes { + match note.note_type { + "simple" => { + // Find the capture that this note refers to + if let Some(capture) = + entry.error_info.captures.iter().find(|c| match note.label { + None => false, + Some(l) => c.label == l, + }) + { + // Find the consumed token that matches this capture + if let Some(token) = find_matching_token(consumed_tokens, capture) { + // Calculate the byte offset for this token + let token_byte_offset = + calculate_byte_offset(&input_str, token.row, token.column); + let token_span_end = token_byte_offset + token.size.max(1); + + // Use SourceInfo::substring to create a SourceInfo for this token + // This properly uses the quarto-source-map infrastructure + let token_source_info = quarto_source_map::SourceInfo::substring( + source_info.clone(), + token_byte_offset, + token_span_end, + ); + + // Add as info detail with location (will show as blue label in Ariadne) + builder = builder.add_info_at(note.message, token_source_info); + } + } + } + "label-range" => { + // Find the begin and end captures + let begin_capture = note.label_begin.and_then(|label| { + entry.error_info.captures.iter().find(|c| c.label == label) + }); + let end_capture = note.label_end.and_then(|label| { + entry.error_info.captures.iter().find(|c| c.label == label) + }); + + if let (Some(begin_cap), Some(end_cap)) = (begin_capture, end_capture) { + // Find the consumed tokens that match these captures + let begin_token = find_matching_token(consumed_tokens, begin_cap); + let end_token = find_matching_token(consumed_tokens, end_cap); + + if let (Some(begin_tok), Some(end_tok)) = (begin_token, end_token) { + // Calculate the span from the beginning of begin_token to the end of end_token + let begin_byte_offset = + calculate_byte_offset(&input_str, begin_tok.row, begin_tok.column); + let end_byte_offset = + calculate_byte_offset(&input_str, end_tok.row, end_tok.column); + let range_span_end = end_byte_offset + end_tok.size.max(1); + + // Use SourceInfo::substring to create a SourceInfo for this range + // This properly uses the quarto-source-map infrastructure + let range_source_info = quarto_source_map::SourceInfo::substring( + source_info.clone(), + begin_byte_offset, + range_span_end, + ); + + // Add as info detail with location + builder = builder.add_info_at(note.message, range_source_info); + } + } + } + _ => {} + } + } + + builder.build() + } else { + // Fallback for errors not in the table + DiagnosticMessageBuilder::error("Parse error") + .with_location(source_info) + .problem("unexpected character or token here") + .build() + } +} + fn calculate_byte_offset(input: &str, row: usize, column: usize) -> usize { let mut current_row = 0; let mut current_col = 0; diff --git a/crates/quarto-markdown-pandoc/src/wasm_entry_points/mod.rs b/crates/quarto-markdown-pandoc/src/wasm_entry_points/mod.rs index eca9d4b..e6dd1e1 100644 --- a/crates/quarto-markdown-pandoc/src/wasm_entry_points/mod.rs +++ b/crates/quarto-markdown-pandoc/src/wasm_entry_points/mod.rs @@ -5,7 +5,6 @@ use crate::readers; use crate::utils::output::VerboseOutput; -use crate::utils::tree_sitter_log_observer::TreeSitterLogObserver; use std::io; fn pandoc_to_json( @@ -38,13 +37,16 @@ pub fn qmd_to_pandoc( Vec, > { let mut output = VerboseOutput::Sink(io::sink()); - readers::qmd::read( - input, - false, - "", - &mut output, - None:: Vec>, - ) + match readers::qmd::read(input, false, "", &mut output) { + Ok((pandoc, context, _warnings)) => { + // TODO: Decide how to handle warnings in WASM context + Ok((pandoc, context)) + } + Err(diagnostics) => { + // Convert diagnostics to strings for backward compatibility + Err(diagnostics.iter().map(|d| d.to_text(None)).collect()) + } + } } pub fn parse_qmd(input: &[u8]) -> String { diff --git a/crates/quarto-markdown-pandoc/tests/claude-examples/meta-error.qmd b/crates/quarto-markdown-pandoc/tests/claude-examples/meta-error.qmd new file mode 100644 index 0000000..46418ec --- /dev/null +++ b/crates/quarto-markdown-pandoc/tests/claude-examples/meta-error.qmd @@ -0,0 +1,7 @@ +--- +title: hello +resources: + - !md images/*.png +# - !str "images/*.png" +# - !md images/*.png +--- \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/claude-examples/meta-warning.qmd b/crates/quarto-markdown-pandoc/tests/claude-examples/meta-warning.qmd new file mode 100644 index 0000000..54c8208 --- /dev/null +++ b/crates/quarto-markdown-pandoc/tests/claude-examples/meta-warning.qmd @@ -0,0 +1,7 @@ +--- +title: hello +resources: + - images/*.png +# - !str "images/*.png" +# - !md images/*.png +--- \ No newline at end of file diff --git a/crates/quarto-markdown-pandoc/tests/test.rs b/crates/quarto-markdown-pandoc/tests/test.rs index 52f63fe..68ce4c0 100644 --- a/crates/quarto-markdown-pandoc/tests/test.rs +++ b/crates/quarto-markdown-pandoc/tests/test.rs @@ -95,20 +95,8 @@ fn matches_pandoc_markdown_reader(input: &str) -> bool { let mut buf1 = Vec::new(); let mut buf2 = Vec::new(); - let (doc, context) = readers::qmd::read( - input.as_bytes(), - false, - "", - &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .unwrap(); + let (doc, context, _warnings) = + readers::qmd::read(input.as_bytes(), false, "", &mut std::io::sink()).unwrap(); writers::native::write(&doc, &mut buf1).unwrap(); let native_output = String::from_utf8(buf1).expect("Invalid UTF-8 in output"); writers::json::write(&doc, &context, &mut buf2).unwrap(); @@ -286,13 +274,13 @@ where input.push('\n'); // ensure the input ends with a newline } let mut output_stream = VerboseOutput::Sink(io::sink()); - let (pandoc, context) = readers::qmd::read( + let (pandoc, context, _warnings) = readers::qmd::read( input.as_bytes(), false, &path.to_string_lossy(), &mut output_stream, - None:: Vec>, - ).unwrap(); + ) + .unwrap(); writer(&pandoc, &context, &mut buffer).unwrap(); let output = String::from_utf8(buffer).expect("Invalid UTF-8 in output"); @@ -613,11 +601,10 @@ fn test_markdown_writer_smoke() { false, path.to_str().unwrap(), &mut std::io::sink(), - None:: Vec> ); match doc_result { - Ok((doc, _context)) => { + Ok((doc, _context, _warnings)) => { // Write it back out using the markdown writer let mut buf = Vec::new(); writers::qmd::write(&doc, &mut buf).expect("Failed to write markdown"); @@ -657,13 +644,13 @@ fn test_qmd_roundtrip_consistency() { let original_qmd = std::fs::read_to_string(&path).expect("Failed to read file"); // Step 1: QMD -> JSON - let (doc1, context1) = readers::qmd::read( + let (doc1, context1, _warnings) = readers::qmd::read( original_qmd.as_bytes(), false, path.to_str().unwrap(), &mut std::io::sink(), - None:: Vec> - ).expect("Failed to parse original QMD"); + ) + .expect("Failed to parse original QMD"); let mut json_buf = Vec::new(); writers::json::write(&doc1, &context1, &mut json_buf) @@ -679,13 +666,13 @@ fn test_qmd_roundtrip_consistency() { let regenerated_qmd = String::from_utf8(qmd_buf).expect("Invalid UTF-8 in QMD"); // Step 3: QMD -> JSON again - let (doc3, context3) = readers::qmd::read( + let (doc3, context3, _warnings) = readers::qmd::read( regenerated_qmd.as_bytes(), false, "", &mut std::io::sink(), - None:: Vec> - ).expect("Failed to parse regenerated QMD"); + ) + .expect("Failed to parse regenerated QMD"); // Compare JSON representations (without location fields) let mut json1_buf = Vec::new(); @@ -737,18 +724,11 @@ fn test_empty_blockquote_roundtrip() { let original_qmd = std::fs::read_to_string(test_file).expect("Failed to read file"); // Step 1: QMD -> JSON - let (doc1, context1) = readers::qmd::read( + let (doc1, context1, _warnings) = readers::qmd::read( original_qmd.as_bytes(), false, test_file, &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, ) .expect("Failed to parse original QMD"); @@ -765,18 +745,11 @@ fn test_empty_blockquote_roundtrip() { let regenerated_qmd = String::from_utf8(qmd_buf).expect("Invalid UTF-8 in QMD"); // Step 3: QMD -> JSON again - let (doc3, context3) = readers::qmd::read( + let (doc3, context3, _warnings) = readers::qmd::read( regenerated_qmd.as_bytes(), false, "", &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, ) .expect("Failed to parse regenerated QMD"); diff --git a/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs b/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs index 51acb18..73aed64 100644 --- a/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs +++ b/crates/quarto-markdown-pandoc/tests/test_inline_locations.rs @@ -78,7 +78,7 @@ fn test_inline_source_locations() { let hello_str = &inlines[0]; assert_eq!(hello_str["t"], "Str"); assert_eq!(hello_str["c"], "hello"); - let (start_off, start_row, start_col, end_off, end_row, end_col, _type) = + let (start_off, _start_row, start_col, end_off, _end_row, end_col, _type) = resolve_source_ref(&hello_str["s"], pool); assert_eq!(start_col, 0); assert_eq!(start_off, 0); diff --git a/crates/quarto-markdown-pandoc/tests/test_json_errors.rs b/crates/quarto-markdown-pandoc/tests/test_json_errors.rs index 80fd0d8..71b8f77 100644 --- a/crates/quarto-markdown-pandoc/tests/test_json_errors.rs +++ b/crates/quarto-markdown-pandoc/tests/test_json_errors.rs @@ -1,46 +1,23 @@ use quarto_markdown_pandoc::readers; -use quarto_markdown_pandoc::utils; #[test] fn test_json_error_format() { // Create input with a malformed code block to trigger an error let input = "```{python\n"; - // Test with JSON errors enabled using the formatter closure - let json_formatter = readers::qmd_error_messages::produce_json_error_messages; - let result = readers::qmd::read( - input.as_bytes(), - false, - "test.md", - &mut std::io::sink(), - Some(json_formatter), - ); + // Test with new API + let result = readers::qmd::read(input.as_bytes(), false, "test.md", &mut std::io::sink()); assert!(result.is_err()); - let error_messages = result.unwrap_err(); - assert_eq!(error_messages.len(), 1); + let diagnostics = result.unwrap_err(); + assert!(diagnostics.len() > 0, "Should have at least one diagnostic"); - // Verify the error is valid JSON - let json_str = &error_messages[0]; - let parsed: serde_json::Value = serde_json::from_str(json_str).expect("Should be valid JSON"); + // Verify the first diagnostic can be serialized to JSON + let json_value = diagnostics[0].to_json(); - // Verify it's an array - assert!(parsed.is_array()); - let errors = parsed.as_array().unwrap(); - assert!(errors.len() > 0); - - // Verify the structure of the first error - let first_error = &errors[0]; - assert!(first_error.get("filename").is_some()); - assert!(first_error.get("title").is_some()); - assert!(first_error.get("message").is_some()); - assert!(first_error.get("location").is_some()); - - let location = first_error.get("location").unwrap(); - assert!(location.get("row").is_some()); - assert!(location.get("column").is_some()); - assert!(location.get("byte_offset").is_some()); - assert!(location.get("size").is_some()); + // Verify the structure - DiagnosticMessage has a different structure than the old format + assert!(json_value.get("kind").is_some()); + assert!(json_value.get("title").is_some()); } #[test] @@ -48,134 +25,30 @@ fn test_regular_error_format() { // Create input with a malformed code block to trigger an error let input = "```{python\n"; - // Test with JSON errors disabled (None for formatter) - let result = readers::qmd::read( - input.as_bytes(), - false, - "test.md", - &mut std::io::sink(), - None::< - fn(&[u8], &utils::tree_sitter_log_observer::TreeSitterLogObserver, &str) -> Vec, - >, - ); + // Test with new API + let result = readers::qmd::read(input.as_bytes(), false, "test.md", &mut std::io::sink()); assert!(result.is_err()); - let error_messages = result.unwrap_err(); + let diagnostics = result.unwrap_err(); - // Regular errors should be plain strings, not JSON - for msg in &error_messages { - // Verify it's NOT valid JSON (should be a formatted error message) - if msg.starts_with("[") || msg.starts_with("{") { - let parse_result: Result = serde_json::from_str(msg); - assert!( - parse_result.is_err(), - "Regular error messages should not be JSON" - ); - } + // Diagnostics can be formatted as text + for diag in &diagnostics { + let text = diag.to_text(None); + // Verify it's a non-empty formatted error message + assert!(!text.is_empty()); } } #[test] -fn test_label_range_note_type() { - // Create input that triggers a label-range error (error 003 from corpus) - let input = "[foo]{#id key=value .class}"; - - // Test with JSON errors enabled using the formatter closure - let json_formatter = readers::qmd_error_messages::produce_json_error_messages; - let result = readers::qmd::read( - input.as_bytes(), - false, - "test.md", - &mut std::io::sink(), - Some(json_formatter), - ); - - assert!(result.is_err()); - let error_messages = result.unwrap_err(); - assert_eq!(error_messages.len(), 1); - - // Verify the error is valid JSON - let json_str = &error_messages[0]; - let parsed: serde_json::Value = serde_json::from_str(json_str).expect("Should be valid JSON"); - - // Verify it's an array - assert!(parsed.is_array()); - let errors = parsed.as_array().unwrap(); - assert!(errors.len() > 0); - - // Find the error with label-range note type - let mut found_label_range = false; - for error in errors { - if let Some(notes) = error.get("notes") { - if let Some(notes_array) = notes.as_array() { - for note in notes_array { - if let Some(note_type) = note.get("noteType") { - if note_type.as_str() == Some("label-range") { - found_label_range = true; - - // Verify the label-range note has a "range" field instead of "location" - assert!( - note.get("range").is_some(), - "label-range note should have a 'range' field" - ); - assert!( - note.get("location").is_none(), - "label-range note should not have a 'location' field" - ); - - let range = note.get("range").unwrap(); - assert!( - range.get("start").is_some(), - "range should have a 'start' field" - ); - assert!( - range.get("end").is_some(), - "range should have an 'end' field" - ); - - let start = range.get("start").unwrap(); - let end = range.get("end").unwrap(); - - // Verify start and end have required fields - assert!(start.get("row").is_some()); - assert!(start.get("column").is_some()); - assert!(start.get("byte_offset").is_some()); - - assert!(end.get("row").is_some()); - assert!(end.get("column").is_some()); - assert!(end.get("byte_offset").is_some()); - - break; - } - } - } - } - } - } - - assert!( - found_label_range, - "Should find at least one label-range note in the error" - ); -} - -#[test] -fn test_missing_newline_warning_json_format() { - // This test verifies that the missing newline warning is formatted as JSON - // when --json-errors is used. Currently this test will fail because the - // warning is always output as plain text. - - // Create input without trailing newline +fn test_newline_warning() { + // Test file without trailing newline let input = "# Hello World"; - // We can't easily test the binary's stderr output from here, but we can - // document the expected behavior: when --json-errors is used, the warning - // should be output as: - // {"title":"Warning","message":"Adding missing newline to end of input"} - // - // Currently it outputs: - // (Warning) Adding missing newline to end of input. + let result = readers::qmd::read(input.as_bytes(), false, "test.md", &mut std::io::sink()); + + // Should succeed (the newline is added automatically) + assert!(result.is_ok()); - // This test just documents the issue. The actual fix will be in main.rs - // where the warning is emitted. + // The newline warning is currently emitted in main.rs, not in the library + // This test just verifies that the parse succeeds } diff --git a/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs b/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs index be55280..8ace709 100644 --- a/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs +++ b/crates/quarto-markdown-pandoc/tests/test_json_roundtrip.rs @@ -3,7 +3,6 @@ * Copyright (c) 2025 Posit, PBC */ -use hashlink::LinkedHashMap; use quarto_markdown_pandoc::pandoc::ast_context::ASTContext; use quarto_markdown_pandoc::pandoc::{Block, Inline, Pandoc, Paragraph, Str}; use quarto_markdown_pandoc::readers; diff --git a/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs b/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs index dcf680b..f63b3a0 100644 --- a/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs +++ b/crates/quarto-markdown-pandoc/tests/test_metadata_source_tracking.rs @@ -61,20 +61,9 @@ fn test_metadata_source_tracking_002_qmd() { // Step 1: Read QMD to PandocAST let mut output_stream = quarto_markdown_pandoc::utils::output::VerboseOutput::Sink(std::io::sink()); - let (pandoc, context) = readers::qmd::read( - content.as_bytes(), - false, - test_file, - &mut output_stream, - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .expect("Failed to parse QMD"); + let (pandoc, context, _warnings) = + readers::qmd::read(content.as_bytes(), false, test_file, &mut output_stream) + .expect("Failed to parse QMD"); // Verify document-level metadata: title: metadata1 if let MetaValueWithSourceInfo::MetaMap { ref entries, .. } = pandoc.meta { @@ -166,20 +155,9 @@ title: Simple title description: This is a description ---"#; - let (pandoc, _context) = readers::qmd::read( - input.as_bytes(), - false, - "test.qmd", - &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .expect("Failed to parse"); + let (pandoc, _context, _warnings) = + readers::qmd::read(input.as_bytes(), false, "test.qmd", &mut std::io::sink()) + .expect("Failed to parse"); // Extract metadata let MetaValueWithSourceInfo::MetaMap { entries, .. } = pandoc.meta else { diff --git a/crates/quarto-markdown-pandoc/tests/test_nested_yaml_serialization.rs b/crates/quarto-markdown-pandoc/tests/test_nested_yaml_serialization.rs index 8834e94..58f31d8 100644 --- a/crates/quarto-markdown-pandoc/tests/test_nested_yaml_serialization.rs +++ b/crates/quarto-markdown-pandoc/tests/test_nested_yaml_serialization.rs @@ -38,18 +38,11 @@ fn test_yaml_serialization_size_scaling() { // Parse QMD to PandocAST let mut output_stream = quarto_markdown_pandoc::utils::output::VerboseOutput::Sink(std::io::sink()); - let (pandoc, context) = readers::qmd::read( + let (pandoc, context, _warnings) = readers::qmd::read( qmd_content.as_bytes(), false, "test.qmd", &mut output_stream, - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, ) .expect("Failed to parse QMD"); @@ -101,20 +94,9 @@ fn test_yaml_serialization_with_siblings() { // Parse and serialize let mut output_stream = quarto_markdown_pandoc::utils::output::VerboseOutput::Sink(std::io::sink()); - let (pandoc, context) = readers::qmd::read( - yaml.as_bytes(), - false, - "test.qmd", - &mut output_stream, - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .expect("Failed to parse QMD"); + let (pandoc, context, _warnings) = + readers::qmd::read(yaml.as_bytes(), false, "test.qmd", &mut output_stream) + .expect("Failed to parse QMD"); let mut json_output = Vec::new(); writers::json::write(&pandoc, &context, &mut json_output).expect("Failed to write JSON"); @@ -148,20 +130,9 @@ Some content. let mut output_stream = quarto_markdown_pandoc::utils::output::VerboseOutput::Sink(std::io::sink()); - let (pandoc, context) = readers::qmd::read( - yaml.as_bytes(), - false, - "test.qmd", - &mut output_stream, - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .expect("Failed to parse QMD"); + let (pandoc, context, _warnings) = + readers::qmd::read(yaml.as_bytes(), false, "test.qmd", &mut output_stream) + .expect("Failed to parse QMD"); let mut json_output = Vec::new(); writers::json::write(&pandoc, &context, &mut json_output).expect("Failed to write JSON"); @@ -236,18 +207,11 @@ fn test_binary_tree_serialization() { // Parse QMD to PandocAST let mut output_stream = quarto_markdown_pandoc::utils::output::VerboseOutput::Sink(std::io::sink()); - let (pandoc, context) = readers::qmd::read( + let (pandoc, context, _warnings) = readers::qmd::read( qmd_content.as_bytes(), false, "test.qmd", &mut output_stream, - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, ) .expect("Failed to parse QMD"); diff --git a/crates/quarto-markdown-pandoc/tests/test_ordered_list_formatting.rs b/crates/quarto-markdown-pandoc/tests/test_ordered_list_formatting.rs index eeaf5e4..b4564df 100644 --- a/crates/quarto-markdown-pandoc/tests/test_ordered_list_formatting.rs +++ b/crates/quarto-markdown-pandoc/tests/test_ordered_list_formatting.rs @@ -21,20 +21,8 @@ fn test_ordered_list_10plus_formatting() { 11. Eleventh item"#; // Parse the markdown - let (doc, _context) = readers::qmd::read( - input.as_bytes(), - false, - "", - &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .unwrap(); + let (doc, _context, _warnings) = + readers::qmd::read(input.as_bytes(), false, "", &mut std::io::sink()).unwrap(); // Write it back out let mut buf = Vec::new(); @@ -87,20 +75,8 @@ fn test_ordered_list_continuation_indentation() { with continuation"#; // Parse the markdown - let (doc, _context) = readers::qmd::read( - input.as_bytes(), - false, - "", - &mut std::io::sink(), - None::< - fn( - &[u8], - &quarto_markdown_pandoc::utils::tree_sitter_log_observer::TreeSitterLogObserver, - &str, - ) -> Vec, - >, - ) - .unwrap(); + let (doc, _context, _warnings) = + readers::qmd::read(input.as_bytes(), false, "", &mut std::io::sink()).unwrap(); // Write it back out let mut buf = Vec::new(); diff --git a/crates/quarto-markdown-pandoc/tests/test_warnings.rs b/crates/quarto-markdown-pandoc/tests/test_warnings.rs index 3f22d0d..4455175 100644 --- a/crates/quarto-markdown-pandoc/tests/test_warnings.rs +++ b/crates/quarto-markdown-pandoc/tests/test_warnings.rs @@ -1,5 +1,4 @@ use quarto_markdown_pandoc::readers; -use quarto_markdown_pandoc::utils; #[test] fn test_caption_without_table_warning() { @@ -13,15 +12,7 @@ Some content "#; // Parse the document - let result = readers::qmd::read( - input.as_bytes(), - false, - "test.md", - &mut std::io::sink(), - None::< - fn(&[u8], &utils::tree_sitter_log_observer::TreeSitterLogObserver, &str) -> Vec, - >, - ); + let result = readers::qmd::read(input.as_bytes(), false, "test.md", &mut std::io::sink()); // Parsing should succeed (warnings are not errors) assert!( @@ -48,15 +39,7 @@ fn test_caption_with_table_no_warning() { "#; // Parse the document - let result = readers::qmd::read( - input.as_bytes(), - false, - "test.md", - &mut std::io::sink(), - None::< - fn(&[u8], &utils::tree_sitter_log_observer::TreeSitterLogObserver, &str) -> Vec, - >, - ); + let result = readers::qmd::read(input.as_bytes(), false, "test.md", &mut std::io::sink()); // Parsing should succeed and no warnings should be emitted assert!( @@ -64,7 +47,7 @@ fn test_caption_with_table_no_warning() { "Document with valid table caption should parse successfully" ); - let (pandoc, _context) = result.unwrap(); + let (pandoc, _context, _warnings) = result.unwrap(); // Verify we have a table in the output assert!( diff --git a/crates/quarto-markdown-pandoc/tests/test_yaml_tag_regression.rs b/crates/quarto-markdown-pandoc/tests/test_yaml_tag_regression.rs index 06d501a..787142c 100644 --- a/crates/quarto-markdown-pandoc/tests/test_yaml_tag_regression.rs +++ b/crates/quarto-markdown-pandoc/tests/test_yaml_tag_regression.rs @@ -12,6 +12,7 @@ use quarto_markdown_pandoc::pandoc::meta::{ rawblock_to_meta_with_source_info, }; use quarto_markdown_pandoc::pandoc::{Inline, RawBlock}; +use quarto_markdown_pandoc::utils::diagnostic_collector::DiagnosticCollector; #[test] fn test_yaml_tags_preserved_in_new_api() { @@ -42,10 +43,12 @@ regular: This has *emphasis* }; let context = ASTContext::default(); - let meta = rawblock_to_meta_with_source_info(&block, &context); + let mut diagnostics = DiagnosticCollector::new(); + let meta = rawblock_to_meta_with_source_info(&block, &context, &mut diagnostics); let mut outer_meta = Vec::new(); - let parsed_meta = parse_metadata_strings_with_source_info(meta, &mut outer_meta); + let parsed_meta = + parse_metadata_strings_with_source_info(meta, &mut outer_meta, &mut diagnostics); // Extract entries let entries = if let MetaValueWithSourceInfo::MetaMap { entries, .. } = parsed_meta { @@ -65,27 +68,14 @@ regular: This has *emphasis* } = &tagged_path_entry.value { assert_eq!(inlines.len(), 1, "Expected exactly one inline"); - if let Inline::Span(span) = &inlines[0] { - // Should have yaml-tagged-string class - assert!( - span.attr.1.contains(&"yaml-tagged-string".to_string()), - "Expected yaml-tagged-string class, found: {:?}", - span.attr.1 - ); - // Should have tag attribute - assert_eq!( - span.attr.2.get("tag"), - Some(&"path".to_string()), - "Expected tag=path attribute" - ); - // Extract the string content - if let Inline::Str(s) = &span.content[0] { - assert_eq!(s.text, "images/*.png"); - } else { - panic!("Expected Str inline inside Span"); - } + // !path tag should produce plain Str (no Span wrapper) + if let Inline::Str(s) = &inlines[0] { + assert_eq!(s.text, "images/*.png"); } else { - panic!("Expected Span inline, got: {:?}", inlines[0]); + panic!( + "Expected plain Str inline for !path tag, got: {:?}", + inlines[0] + ); } } else { panic!( diff --git a/crates/quarto-source-map/src/context.rs b/crates/quarto-source-map/src/context.rs index 5d0ded6..3c4100d 100644 --- a/crates/quarto-source-map/src/context.rs +++ b/crates/quarto-source-map/src/context.rs @@ -15,6 +15,11 @@ pub struct SourceContext { pub struct SourceFile { /// File path or identifier pub path: String, + /// File content (for ephemeral/in-memory files) + /// When Some, content is stored in memory (e.g., for or test files) + /// When None, content should be read from disk using the path + #[serde(skip_serializing_if = "Option::is_none")] + pub content: Option, /// File information for efficient location lookups (optional for serialization) #[serde(skip_serializing_if = "Option::is_none")] pub file_info: Option, @@ -36,11 +41,32 @@ impl SourceContext { } /// Add a file to the context and return its ID + /// + /// - If content is Some: Creates an ephemeral (in-memory) file. Content is stored and used for ariadne rendering. + /// - If content is None: Creates a disk-backed file. Content will be read from disk when needed (path must exist). + /// + /// For ephemeral files, FileInformation is created immediately from the provided content. + /// For disk-backed files, FileInformation is created by reading from disk if the path exists. pub fn add_file(&mut self, path: String, content: Option) -> FileId { let id = FileId(self.files.len()); - let file_info = content.as_ref().map(|c| FileInformation::new(c)); + + // For ephemeral files (content provided), store it and create FileInformation + // For disk-backed files (no content), try to read from disk for FileInformation only + let (stored_content, content_for_info) = match content { + Some(c) => { + // Ephemeral file: store content and use it for FileInformation + (Some(c.clone()), Some(c)) + } + None => { + // Disk-backed file: don't store content, but try to read for FileInformation + (None, std::fs::read_to_string(&path).ok()) + } + }; + + let file_info = content_for_info.as_ref().map(|c| FileInformation::new(c)); self.files.push(SourceFile { path, + content: stored_content, file_info, metadata: FileMetadata { file_type: None }, }); @@ -52,7 +78,11 @@ impl SourceContext { self.files.get(id.0) } - /// Create a copy without file information (for serialization) + /// Create a copy without FileInformation (for serialization) + /// + /// Note: This preserves the content field for ephemeral files, as they need + /// content to be serialized for proper deserialization. Only FileInformation + /// is removed since it can be reconstructed from content. pub fn without_content(&self) -> Self { SourceContext { files: self @@ -60,6 +90,7 @@ impl SourceContext { .iter() .map(|f| SourceFile { path: f.path.clone(), + content: f.content.clone(), // Preserve content for ephemeral files file_info: None, metadata: f.metadata.clone(), }) diff --git a/crates/quarto-yaml/claude-notes/implementation-plan.md b/crates/quarto-yaml/claude-notes/implementation-plan.md index 2350bdc..984ba83 100644 --- a/crates/quarto-yaml/claude-notes/implementation-plan.md +++ b/crates/quarto-yaml/claude-notes/implementation-plan.md @@ -2,7 +2,7 @@ ## Overview -This crate implements `YamlWithSourceInfo`, a data structure that wraps `yaml-rust2::Yaml` with source location tracking. +This crate implements `YamlWithSourceInfo`, a data structure that wraps `yaml-rust2::Yaml` with source location tracking. This uses the **owned data approach** as decided in the design discussion (see `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/session-logs/2025-10-13-yaml-lifetime-vs-owned-discussion.md`). ## Architecture Decision: Owned Data @@ -157,4 +157,11 @@ impl MarkedEventReceiver for YamlBuilder { 1. **Config merging** - Merge multiple YamlWithSourceInfo objects 2. **Validation** - Schema validation with source positions 3. **Unified SourceInfo** - Replace with project-wide SourceInfo type -4. **Multi-document** - Support YAML streams +4. **YAML tags** - Support for !expr and custom tags +5. **Multi-document** - Support YAML streams + +## References + +- Design document: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/yaml-with-source-info-design.md` +- Session log: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/session-logs/2025-10-13-yaml-lifetime-vs-owned-discussion.md` +- rust-analyzer patterns: `/Users/cscheid/repos/github/cscheid/kyoto/claude-notes/rust-analyzer-owned-data-patterns.md` From a688633c0f4515f377326a848abd0d11e63feeb0 Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Wed, 22 Oct 2025 06:46:49 -0500 Subject: [PATCH 3/4] perf: increase parsing speed by 2x by being careful around parsing tree-sitter log events --- .../src/utils/tree_sitter_log_observer.rs | 104 ++++++++---------- 1 file changed, 46 insertions(+), 58 deletions(-) diff --git a/crates/quarto-markdown-pandoc/src/utils/tree_sitter_log_observer.rs b/crates/quarto-markdown-pandoc/src/utils/tree_sitter_log_observer.rs index 4a2bbc3..e1ae909 100644 --- a/crates/quarto-markdown-pandoc/src/utils/tree_sitter_log_observer.rs +++ b/crates/quarto-markdown-pandoc/src/utils/tree_sitter_log_observer.rs @@ -3,7 +3,7 @@ * Copyright (c) 2025 Posit, PBC */ -use std::collections::HashMap; +use std::collections::HashMap; // Still needed for TreeSitterParseLog::processes #[derive(Debug, PartialEq, Eq, Clone)] pub enum TreeSitterLogState { @@ -82,24 +82,39 @@ impl TreeSitterLogObserver { } pub fn log(&mut self, _log_type: tree_sitter::LogType, message: &str) { // Implement your logging logic here - let str = message.to_string(); - - let words: Vec<&str> = str.split_whitespace().collect(); - if words.len() == 0 { + let words: Vec<&str> = message.split_whitespace().collect(); + if words.is_empty() { eprintln!("Empty log message from tree-sitter"); return; } - let params: HashMap = words[1..] - .iter() - .filter_map(|pair| { - let pair = pair.trim_suffix(","); - let mut split = pair.splitn(2, ':'); - if let (Some(key), Some(value_str)) = (split.next(), split.next()) { - return Some((key.to_string(), value_str.to_string())); + + // Extract parameters directly into typed variables instead of creating a HashMap. + // This eliminates expensive allocations: each HashMap creation costs ~608 bytes + // (HashMap allocation + String allocations for keys and values + hash computations). + // With only 6 possible parameters that are always parsed to the same types, + // we can use simple Option variables on the stack (48 bytes total). + let mut version: Option = None; + let mut state: Option = None; + let mut row: Option = None; + let mut col: Option = None; + let mut sym: Option<&str> = None; + let mut size: Option = None; + + // Single pass through parameters + for pair in &words[1..] { + let pair = pair.trim_end_matches(','); + if let Some((key, value)) = pair.split_once(':') { + match key { + "version" => version = value.parse().ok(), + "state" => state = value.parse().ok(), + "row" => row = value.parse().ok(), + "col" => col = value.parse().ok(), + "sym" => sym = Some(value), + "size" => size = value.parse().ok(), + _ => {} // Ignore unknown parameters } - None - }) - .collect(); + } + } match words[0] { "new_parse" => { if self.state != TreeSitterLogState::Idle { @@ -121,11 +136,7 @@ impl TreeSitterLogObserver { self.state = TreeSitterLogState::Idle; } "resume" => { - let version = params - .get("version") - .expect("Missing 'version' in process log") - .parse::() - .expect("Failed to parse 'version' as usize"); + let version = version.expect("Missing 'version' in process log"); let current_parse = self .parses .last_mut() @@ -133,26 +144,10 @@ impl TreeSitterLogObserver { current_parse.current_process = Some(version); } "process" => { - let version = params - .get("version") - .expect("Missing 'version' in process log") - .parse::() - .expect("Failed to parse 'version' as usize"); - let state = params - .get("state") - .expect("Missing 'state' in process log") - .parse::() - .expect("Failed to parse 'state' as usize"); - let row = params - .get("row") - .expect("Missing 'row' in process log") - .parse::() - .expect("Failed to parse 'row' as usize"); - let column = params - .get("col") - .expect("Missing 'col' in process log") - .parse::() - .expect("Failed to parse 'col' as usize"); + let version = version.expect("Missing 'version' in process log"); + let state = state.expect("Missing 'state' in process log"); + let row = row.expect("Missing 'row' in process log"); + let column = col.expect("Missing 'col' in process log"); let current_parse = self .parses @@ -196,6 +191,9 @@ impl TreeSitterLogObserver { .push(current_process_message.clone()); } "lexed_lookahead" => { + let sym_str = sym.expect("Missing 'sym' in lexed_lookahead log"); + let size_val = size.expect("Missing 'size' in lexed_lookahead log"); + let current_parse = self .parses .last_mut() @@ -208,15 +206,13 @@ impl TreeSitterLogObserver { .current_message .as_mut() .expect("No current process message"); - current_parse.current_lookahead = Some(( - params.get("sym").unwrap().to_string(), - params.get("size").unwrap().parse::().unwrap(), - )); - current_process_message.sym = params.get("sym").unwrap().to_string(); - current_process_message.size = - params.get("size").unwrap().parse::().unwrap(); + current_parse.current_lookahead = Some((sym_str.to_string(), size_val)); + current_process_message.sym = sym_str.to_string(); + current_process_message.size = size_val; } "shift" => { + let state_val = state.expect("Missing 'state' in shift log"); + let current_parse = self .parses .last_mut() @@ -229,18 +225,13 @@ impl TreeSitterLogObserver { .current_message .as_mut() .expect("No current process message"); - let state = params - .get("state") - .expect("Missing 'state' in shift log") - .parse::() - .expect("Failed to parse 'state' as usize"); let size = current_parse .current_lookahead .as_ref() .map(|(_, s)| *s) .unwrap_or(0); current_parse.consumed_tokens.push(ConsumedToken { - lr_state: state, + lr_state: state_val, row: current_process_message.row, column: current_process_message.column, size, @@ -278,11 +269,8 @@ impl TreeSitterLogObserver { } } } - match self.parses.last_mut() { - Some(current_parse) => { - current_parse.messages.push(str); - } - _ => {} + if let Some(current_parse) = self.parses.last_mut() { + current_parse.messages.push(message.to_string()); } } } From a153a7be89c7e9d740b95833640505c2d26cf85a Mon Sep 17 00:00:00 2001 From: Carlos Scheidegger Date: Wed, 22 Oct 2025 07:02:43 -0500 Subject: [PATCH 4/4] cargo fmt --- .../quarto-error-reporting/src/diagnostic.rs | 39 ++++++++++++------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/crates/quarto-error-reporting/src/diagnostic.rs b/crates/quarto-error-reporting/src/diagnostic.rs index 5608f71..ab2b1fd 100644 --- a/crates/quarto-error-reporting/src/diagnostic.rs +++ b/crates/quarto-error-reporting/src/diagnostic.rs @@ -296,17 +296,20 @@ impl DiagnosticMessage { // Check if we have any location info that could be displayed with ariadne // This includes the main diagnostic location OR any detail with a location - let has_any_location = self.location.is_some() - || self.details.iter().any(|d| d.location.is_some()); + let has_any_location = + self.location.is_some() || self.details.iter().any(|d| d.location.is_some()); // If we have location info and source context, render ariadne source display let has_ariadne = if has_any_location && ctx.is_some() { // Use main location if available, otherwise use first detail location - let location = self.location.as_ref() + let location = self + .location + .as_ref() .or_else(|| self.details.iter().find_map(|d| d.location.as_ref())); if let Some(loc) = location { - if let Some(ariadne_output) = self.render_ariadne_source_context(loc, ctx.unwrap()) { + if let Some(ariadne_output) = self.render_ariadne_source_context(loc, ctx.unwrap()) + { result.push_str(&ariadne_output); true } else { @@ -494,14 +497,22 @@ impl DiagnosticMessage { } /// Extract the original file_id from a SourceInfo by traversing the mapping chain - fn extract_file_id(source_info: &quarto_source_map::SourceInfo) -> Option { + fn extract_file_id( + source_info: &quarto_source_map::SourceInfo, + ) -> Option { match &source_info.mapping { quarto_source_map::SourceMapping::Original { file_id } => Some(*file_id), - quarto_source_map::SourceMapping::Substring { parent, .. } => Self::extract_file_id(parent), - quarto_source_map::SourceMapping::Transformed { parent, .. } => Self::extract_file_id(parent), + quarto_source_map::SourceMapping::Substring { parent, .. } => { + Self::extract_file_id(parent) + } + quarto_source_map::SourceMapping::Transformed { parent, .. } => { + Self::extract_file_id(parent) + } quarto_source_map::SourceMapping::Concat { pieces } => { // For concatenated sources, use the first piece's file_id - pieces.first().and_then(|p| Self::extract_file_id(&p.source_info)) + pieces + .first() + .and_then(|p| Self::extract_file_id(&p.source_info)) } } } @@ -545,11 +556,8 @@ impl DiagnosticMessage { }; // Build the report using the mapped offset for proper line:column display - let mut report = Report::build( - report_kind, - file.path.clone(), - start_mapped.location.offset, - ); + let mut report = + Report::build(report_kind, file.path.clone(), start_mapped.location.offset); // Add title with error code if let Some(code) = &self.code { @@ -608,7 +616,10 @@ impl DiagnosticMessage { let report = report.finish(); let mut output = Vec::new(); report - .write((file.path.clone(), Source::from(content.as_str())), &mut output) + .write( + (file.path.clone(), Source::from(content.as_str())), + &mut output, + ) .ok()?; String::from_utf8(output).ok()