Perf option: ascii happy path

johanrd · johanrd · commit c71c45bc9e8e · 2026-03-16T16:04:02.000+01:00
diff --git a/Cargo.toml b/Cargo.toml
@@ -47,6 +47,10 @@ features = [
     "fast-rng"  # Use a faster (but still sufficiently random) RNG
 ]
 
+[[bench]]
+name = "parse_bench"
+harness = false
+
 [dev-dependencies]
 difference = "2"
 regex = "1.11.1"
diff --git a/benches/parse_bench.rs b/benches/parse_bench.rs
@@ -0,0 +1,157 @@
+use content_tag::{Options, Preprocessor};
+use std::time::Instant;
+
+fn bench_parse(name: &str, src: &str, iterations: u32) -> f64 {
+    // Warmup
+    for _ in 0..100 {
+        let p = Preprocessor::new();
+        let _ = p.parse(src, Options::default());
+    }
+
+    // Run 3 rounds, take the minimum
+    let mut best = f64::MAX;
+    for _ in 0..3 {
+        let start = Instant::now();
+        for _ in 0..iterations {
+            let p = Preprocessor::new();
+            let _ = p.parse(src, Options::default());
+        }
+        let elapsed = start.elapsed();
+        let per_iter = elapsed.as_nanos() as f64 / iterations as f64;
+        if per_iter < best {
+            best = per_iter;
+        }
+    }
+
+    println!(
+        "{:<55} {:>8.1}µs per parse  ({} chars)",
+        name,
+        best / 1000.0,
+        src.len(),
+    );
+    best / 1000.0
+}
+
+fn main() {
+    // Global warmup: run a few hundred parses to warm CPU caches
+    // before any measured benchmarks.
+    {
+        let w = "import Component from '@glimmer/component';\nclass C extends Component { <template>hi</template> }";
+        for _ in 0..500 {
+            let p = Preprocessor::new();
+            let _ = p.parse(w, Options::default());
+        }
+    }
+
+    // The same component is used as baseline across all tests.
+    let base_component = r#"
+import Component from '@glimmer/component';
+class Comp extends Component {
+  <template>
+    <div class="container">
+      <h1>{{this.title}}</h1>
+      <p>{{this.description}}</p>
+    </div>
+  </template>
+}
+"#;
+
+    // =========================================================
+    // Test 1: Scaling by number of templates
+    //   Same component repeated N times.
+    // =========================================================
+    println!("=== Scaling by template count ===\n");
+
+    for repeats in [1, 2, 5, 10, 20] {
+        let src = base_component.repeat(repeats);
+        bench_parse(
+            &format!("{} templates ({} chars)", repeats, src.len()),
+            &src,
+            3000,
+        );
+    }
+
+    // =========================================================
+    // Test 2: Scaling by template content size
+    //   Same component, but with extra rows inside the template.
+    // =========================================================
+    println!("\n=== Scaling by template content size ===\n");
+
+    let extra_row = "      <div class=\"item\">{{this.value}}</div>\n";
+
+    // Baseline: the component as-is (0 extra rows)
+    bench_parse(
+        &format!("0 extra rows ({} chars)", base_component.len()),
+        base_component,
+        3000,
+    );
+
+    for num_rows in [10, 50, 200] {
+        let extra_content = extra_row.repeat(num_rows);
+        let src = base_component.replace(
+            "      <p>{{this.description}}</p>",
+            &format!("      <p>{{{{this.description}}}}</p>\n{}", extra_content),
+        );
+        bench_parse(
+            &format!("{} extra rows inside template ({} chars)", num_rows, src.len()),
+            &src,
+            3000,
+        );
+    }
+
+    // =========================================================
+    // Test 3: Scaling by JS code before the template
+    //   Same component, but with extra JS lines before it.
+    // =========================================================
+    println!("\n=== Scaling by JS code before template ===\n");
+
+    let extra_line = "const x = 'some padding code to increase byte offset';\n";
+
+    // Baseline: the component as-is (0 extra lines)
+    bench_parse(
+        &format!("0 extra lines ({} chars)", base_component.len()),
+        base_component,
+        3000,
+    );
+
+    for num_lines in [10, 50, 200] {
+        let prefix = extra_line.repeat(num_lines);
+        let src = format!("{}{}", prefix, base_component);
+        bench_parse(
+            &format!(
+                "{} extra JS lines before template ({} chars)",
+                num_lines,
+                src.len()
+            ),
+            &src,
+            3000,
+        );
+    }
+
+    // =========================================================
+    // Test 4: Typical real-world files
+    // =========================================================
+    println!("\n=== Typical files ===\n");
+
+    let no_template = r#"
+import { tracked } from '@glimmer/tracking';
+import { action } from '@ember/object';
+import Service, { service } from '@ember/service';
+
+export default class AuthService extends Service {
+  @service declare session: any;
+  @tracked count = 0;
+
+  @action
+  increment() { this.count++; }
+
+  get doubled() { return this.count * 2; }
+}"#;
+
+    bench_parse(
+        &format!("base component (1 template, {} chars)", base_component.len()),
+        base_component,
+        5000,
+    );
+    bench_parse("utility file (no template)", no_template, 5000);
+}
diff --git a/src/lib.rs b/src/lib.rs
@@ -88,6 +88,7 @@ impl Preprocessor {
 
             let mut visitor = locate::LocateContentTagVisitor {
                 occurrences: Default::default(),
+                is_ascii: src.is_ascii(),
                 src: src.to_string(),
             };
 
diff --git a/src/locate.rs b/src/locate.rs
@@ -10,6 +10,7 @@ use swc_ecma_visit::{Visit, VisitWith};
 pub struct LocateContentTagVisitor {
     pub occurrences: Vec<Occurrence>,
     pub src: String,
+    pub is_ascii: bool,
 }
 
 #[derive(Eq, PartialEq, Debug, Serialize)]
@@ -32,10 +33,10 @@ impl LocateContentTagVisitor {
             kind,
             tag_name: "template".to_owned(),
             contents: contents.value.to_string(),
-            range: Range::new(&self.src, span),
-            start_range: Range::new(&self.src, &opening.span),
-            content_range: Range::new(&self.src, &contents.span),
-            end_range: Range::new(&self.src, &closing.span),
+            range: Range::new(&self.src, span, self.is_ascii),
+            start_range: Range::new(&self.src, &opening.span, self.is_ascii),
+            content_range: Range::new(&self.src, &contents.span, self.is_ascii),
+            end_range: Range::new(&self.src, &closing.span, self.is_ascii),
         };
 
         self.occurrences.push(occurrence);
@@ -108,14 +109,28 @@ pub struct Range {
     end_utf16_codepoint: usize,
 }
 impl Range {
-    pub fn new(src: &str, span: &Span) -> Range {
-        Range {
-            start_byte: span.lo.0 as usize - 1,
-            end_byte: span.hi.0 as usize - 1,
-            start_char: src[..span.lo.0 as usize - 1].chars().count(),
-            end_char: src[..span.hi.0 as usize - 1].chars().count(),
-            start_utf16_codepoint: src[..span.lo.0 as usize - 1].encode_utf16().count(),
-            end_utf16_codepoint: src[..span.hi.0 as usize - 1].encode_utf16().count(),
+    pub fn new(src: &str, span: &Span, is_ascii: bool) -> Range {
+        let start_byte = span.lo.0 as usize - 1;
+        let end_byte = span.hi.0 as usize - 1;
+        if is_ascii {
+            // For ASCII sources, byte/char/utf16 offsets are all identical.
+            Range {
+                start_byte,
+                end_byte,
+                start_char: start_byte,
+                end_char: end_byte,
+                start_utf16_codepoint: start_byte,
+                end_utf16_codepoint: end_byte,
+            }
+        } else {
+            Range {
+                start_byte,
+                end_byte,
+                start_char: src[..start_byte].chars().count(),
+                end_char: src[..end_byte].chars().count(),
+                start_utf16_codepoint: src[..start_byte].encode_utf16().count(),
+                end_utf16_codepoint: src[..end_byte].encode_utf16().count(),
+            }
         }
     }
 }