Skip to content

Commit f21f4e7

Browse files
committed
WIP: attempt to reduce string allocations for ClippyAnnotation
1 parent 1737326 commit f21f4e7

File tree

4 files changed

+159
-73
lines changed

4 files changed

+159
-73
lines changed

.github/actions/clippy-annotation-reporter/src/analyzer.rs

Lines changed: 105 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -7,24 +7,26 @@
77
//! including getting changed files, comparing branches, and producing analysis results.
88
99
use anyhow::{Context as _, Result};
10+
use octocrab::params::pulls::MediaType::Raw;
1011
use octocrab::Octocrab;
1112
use regex::Regex;
1213
use std::collections::{HashMap, HashSet};
1314
use std::process::Command;
15+
use std::rc::Rc;
1416

1517
/// Represents a clippy annotation in code
1618
#[derive(Debug, Clone)]
1719
pub struct ClippyAnnotation {
18-
pub file: String,
19-
pub rule: String,
20+
pub file: Rc<String>,
21+
pub rule: Rc<String>,
2022
}
2123

2224
/// Result of annotation analysis
2325
pub struct AnalysisResult {
2426
pub base_annotations: Vec<ClippyAnnotation>,
2527
pub head_annotations: Vec<ClippyAnnotation>,
26-
pub base_counts: HashMap<String, usize>,
27-
pub head_counts: HashMap<String, usize>,
28+
pub base_counts: HashMap<Rc<String>, usize>,
29+
pub head_counts: HashMap<Rc<String>, usize>,
2830
pub changed_files: HashSet<String>,
2931
pub base_crate_counts: HashMap<String, usize>,
3032
pub head_crate_counts: HashMap<String, usize>,
@@ -78,9 +80,12 @@ fn analyze_annotations(
7880
))
7981
.context("Failed to compile annotation regex")?;
8082

81-
let mut base_annotations = Vec::new();
82-
let mut head_annotations = Vec::new();
83-
let mut changed_files = HashSet::new();
83+
let mut base_annotations = Vec::with_capacity(files.len() * 3);
84+
let mut head_annotations = Vec::with_capacity(files.len() * 3);
85+
let mut changed_files = HashSet::with_capacity(files.len());
86+
87+
// Cache for rule Rc instances to avoid duplicates
88+
let mut rule_cache = HashMap::new();
8489

8590
// Process each file
8691
for file in files {
@@ -116,6 +121,7 @@ fn analyze_annotations(
116121
file,
117122
&base_content,
118123
&annotation_regex,
124+
&mut rule_cache,
119125
);
120126

121127
// Find annotations in head branch
@@ -124,6 +130,7 @@ fn analyze_annotations(
124130
file,
125131
&head_content,
126132
&annotation_regex,
133+
&mut rule_cache,
127134
);
128135
}
129136

@@ -206,32 +213,48 @@ fn find_annotations(
206213
file: &str,
207214
content: &str,
208215
regex: &Regex,
216+
rule_cache: &mut HashMap<String, Rc<String>>,
209217
) {
218+
// Use Rc for file path
219+
let file_rc = Rc::new(file.to_string());
220+
210221
for line in content.lines() {
211222
if let Some(captures) = regex.captures(line) {
212223
if let Some(rule_match) = captures.get(1) {
213-
let rule = rule_match.as_str().to_owned();
224+
let rule_str = rule_match.as_str().to_string();
225+
226+
// Get or create Rc for this rule
227+
let rule_rc = match rule_cache.get(&rule_str) {
228+
Some(cached) => Rc::clone(cached),
229+
None => {
230+
let rc = Rc::new(rule_str.clone());
231+
rule_cache.insert(rule_str, Rc::clone(&rc));
232+
rc
233+
}
234+
};
235+
214236
annotations.push(ClippyAnnotation {
215-
file: file.to_owned(),
216-
rule,
237+
file: Rc::clone(&file_rc),
238+
rule: rule_rc,
217239
});
218240
}
219241
}
220242
}
221243
}
222244

223245
/// Count annotations by rule
224-
fn count_annotations_by_rule(annotations: &[ClippyAnnotation]) -> HashMap<String, usize> {
225-
let mut counts = HashMap::new();
246+
fn count_annotations_by_rule(annotations: &[ClippyAnnotation]) -> HashMap<Rc<String>, usize> {
247+
let mut counts = HashMap::with_capacity(annotations.len().min(20));
226248

227249
for annotation in annotations {
228-
*counts.entry(annotation.rule.clone()).or_insert(0) += 1;
250+
*counts.entry(Rc::clone(&annotation.rule)).or_insert(0) += 1;
229251
}
230252

231253
counts
232254
}
233255

234256
/// Get crate information for a given file path
257+
// TODO: EK - is this the right way to determine crate names?
235258
fn get_crate_for_file(file_path: &str) -> String {
236259
// Simple heuristic: use the first directory as the crate name
237260
// For files in src/ directory, use the parent directory
@@ -267,10 +290,23 @@ fn get_crate_for_file(file_path: &str) -> String {
267290

268291
/// Count annotations by crate
269292
fn count_annotations_by_crate(annotations: &[ClippyAnnotation]) -> HashMap<String, usize> {
270-
let mut counts = HashMap::new();
293+
let mut counts = HashMap::with_capacity(annotations.len().min(20));
294+
// TODO: EK - does this make sense?
295+
let mut crate_cache: HashMap<String, String> = HashMap::new();
271296

272297
for annotation in annotations {
273-
let crate_name = get_crate_for_file(&annotation.file);
298+
let file_path = annotation.file.as_str();
299+
300+
// Use cached crate name if we've seen this file before
301+
let crate_name = match crate_cache.get(file_path) {
302+
Some(name) => name.clone(),
303+
None => {
304+
let name = get_crate_for_file(file_path).to_owned();
305+
crate_cache.insert(file_path.to_string(), name.clone());
306+
name
307+
}
308+
};
309+
274310
*counts.entry(crate_name).or_insert(0) += 1;
275311
}
276312

@@ -323,7 +359,8 @@ fn analyze_all_files_for_crates(
323359

324360
let mut base_annotations = Vec::new();
325361
let mut head_annotations = Vec::new();
326-
362+
// TODO: EK - Should this be static?
363+
let mut rule_cache = HashMap::new();
327364
// Process each file
328365
for file in files {
329366
// Get file content from base branch
@@ -362,6 +399,7 @@ fn analyze_all_files_for_crates(
362399
file,
363400
&base_content,
364401
&annotation_regex,
402+
&mut rule_cache,
365403
);
366404

367405
// Find annotations in head branch
@@ -370,6 +408,7 @@ fn analyze_all_files_for_crates(
370408
file,
371409
&head_content,
372410
&annotation_regex,
411+
&mut rule_cache,
373412
);
374413
}
375414

@@ -382,8 +421,8 @@ fn analyze_all_files_for_crates(
382421
#[cfg(test)]
383422
mod tests {
384423
use super::*;
424+
use std::rc::Rc;
385425

386-
// Test for find_annotations
387426
#[test]
388427
fn test_find_annotations() {
389428
let mut annotations = Vec::new();
@@ -392,77 +431,81 @@ mod tests {
392431
fn test() {
393432
#[allow(clippy::unwrap_used)]
394433
let x = Some(5).unwrap();
395-
434+
396435
#[allow(clippy::expect_used)]
397436
let y = Some(10).expect("This should exist");
398437
}
399438
"#;
400439

401-
let regex = Regex::new(r"#\s*\[\s*allow\s*\(\s*clippy\s*::\s*(unwrap_used|expect_used)\s*\)\s*]").unwrap();
440+
let regex =
441+
Regex::new(r"#\s*\[\s*allow\s*\(\s*clippy\s*::\s*(unwrap_used|expect_used)\s*\)\s*\]")
442+
.unwrap();
443+
let mut rule_cache = HashMap::new();
402444

403-
find_annotations(&mut annotations, file, content, &regex);
445+
find_annotations(&mut annotations, file, content, &regex, &mut rule_cache);
404446

405447
assert_eq!(annotations.len(), 2);
406-
assert_eq!(annotations[0].rule, "unwrap_used");
407-
assert_eq!(annotations[0].file, file);
408-
assert_eq!(annotations[1].rule, "expect_used");
448+
assert_eq!(*annotations[0].rule, "unwrap_used");
449+
assert_eq!(*annotations[0].file, "src/test.rs");
450+
assert_eq!(*annotations[1].rule, "expect_used");
409451
}
410452

411-
// Test for count_annotations_by_rule
412453
#[test]
413454
fn test_count_annotations_by_rule() {
455+
let rule1 = Rc::new("unwrap_used".to_string());
456+
let rule2 = Rc::new("expect_used".to_string());
457+
let file1 = Rc::new("src/test1.rs".to_string());
458+
let file2 = Rc::new("src/test2.rs".to_string());
459+
414460
let annotations = vec![
415461
ClippyAnnotation {
416-
file: "src/test1.rs".to_owned(),
417-
rule: "unwrap_used".to_owned(),
462+
file: Rc::clone(&file1),
463+
rule: Rc::clone(&rule1),
418464
},
419465
ClippyAnnotation {
420-
file: "src/test1.rs".to_owned(),
421-
rule: "expect_used".to_owned(),
466+
file: Rc::clone(&file1),
467+
rule: Rc::clone(&rule2),
422468
},
423469
ClippyAnnotation {
424-
file: "src/test2.rs".to_owned(),
425-
rule: "unwrap_used".to_owned(),
470+
file: Rc::clone(&file2),
471+
rule: Rc::clone(&rule1),
426472
},
427473
];
428474

429475
let counts = count_annotations_by_rule(&annotations);
430476

431477
assert_eq!(counts.len(), 2);
432-
assert_eq!(*counts.get("unwrap_used").unwrap(), 2);
433-
assert_eq!(*counts.get("expect_used").unwrap(), 1);
478+
assert_eq!(*counts.get(&rule1).unwrap(), 2);
479+
assert_eq!(*counts.get(&rule2).unwrap(), 1);
434480
}
435481

436-
// Test for get_crate_for_file
437-
#[test]
438-
fn test_get_crate_for_file() {
439-
assert_eq!(get_crate_for_file("src/main.rs"), "root");
440-
assert_eq!(get_crate_for_file("tests/test_utils.rs"), "root");
441-
assert_eq!(get_crate_for_file("crates/foo/src/lib.rs"), "foo");
442-
assert_eq!(get_crate_for_file("foo/src/lib.rs"), "foo");
443-
assert_eq!(get_crate_for_file("bar/tests/test.rs"), "bar");
444-
assert_eq!(get_crate_for_file("standalone.rs"), "standalone");
445-
}
446-
447-
// Test for count_annotations_by_crate
448482
#[test]
449483
fn test_count_annotations_by_crate() {
484+
let rule1 = Rc::new("unwrap_used".to_string());
485+
let rule2 = Rc::new("expect_used".to_string());
486+
let rule3 = Rc::new("panic".to_string());
487+
488+
let file1 = Rc::new("src/main.rs".to_string());
489+
let file2 = Rc::new("crates/foo/src/lib.rs".to_string());
490+
let file3 = Rc::new("crates/foo/src/utils.rs".to_string());
491+
let file4 = Rc::new("bar/src/lib.rs".to_string());
492+
450493
let annotations = vec![
451494
ClippyAnnotation {
452-
file: "src/main.rs".to_owned(),
453-
rule: "unwrap_used".to_owned(),
495+
file: Rc::clone(&file1),
496+
rule: Rc::clone(&rule1),
454497
},
455498
ClippyAnnotation {
456-
file: "crates/foo/src/lib.rs".to_owned(),
457-
rule: "expect_used".to_owned(),
499+
file: Rc::clone(&file2),
500+
rule: Rc::clone(&rule2),
458501
},
459502
ClippyAnnotation {
460-
file: "crates/foo/src/utils.rs".to_owned(),
461-
rule: "unwrap_used".to_owned(),
503+
file: Rc::clone(&file3),
504+
rule: Rc::clone(&rule1),
462505
},
463506
ClippyAnnotation {
464-
file: "bar/src/lib.rs".to_owned(),
465-
rule: "panic".to_owned(),
507+
file: Rc::clone(&file4),
508+
rule: Rc::clone(&rule3),
466509
},
467510
];
468511

@@ -473,4 +516,14 @@ mod tests {
473516
assert_eq!(*counts.get("foo").unwrap(), 2);
474517
assert_eq!(*counts.get("bar").unwrap(), 1);
475518
}
519+
520+
#[test]
521+
fn test_get_crate_for_file() {
522+
assert_eq!(get_crate_for_file("src/main.rs"), "root");
523+
assert_eq!(get_crate_for_file("tests/test_utils.rs"), "root");
524+
assert_eq!(get_crate_for_file("crates/foo/src/lib.rs"), "foo");
525+
assert_eq!(get_crate_for_file("foo/src/lib.rs"), "foo");
526+
assert_eq!(get_crate_for_file("bar/tests/test.rs"), "bar");
527+
assert_eq!(get_crate_for_file("standalone.rs"), "standalone.rs");
528+
}
476529
}

0 commit comments

Comments
 (0)