|
| 1 | +use criterion::{black_box, criterion_group, criterion_main, Criterion}; |
| 2 | +use regex::Replacer; |
| 3 | +use regex::{Captures, Regex}; |
| 4 | + |
| 5 | +const RULES: &[(&str, &str)] = &[ |
| 6 | + ( |
| 7 | + r"[\u{5b}\u{3010}].*(电影|高清|原盘|蓝光|发布).*?[\u{3011}\u{5d}]", |
| 8 | + "", |
| 9 | + ), |
| 10 | + (r"\.", " "), |
| 11 | +]; |
| 12 | + |
| 13 | +fn compound_replacement(text: &str, compound_re: &Regex, replacements: &[&str]) -> String { |
| 14 | + struct CompoundSwapper<'a> { |
| 15 | + replacements: &'a [&'a str], |
| 16 | + group_names: Vec<String>, |
| 17 | + } |
| 18 | + |
| 19 | + impl Replacer for CompoundSwapper<'_> { |
| 20 | + fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) { |
| 21 | + for (i, group_name) in self.group_names.iter().enumerate() { |
| 22 | + if caps.name(group_name).is_some() { |
| 23 | + dst.push_str(self.replacements[i]); |
| 24 | + return; |
| 25 | + } |
| 26 | + } |
| 27 | + } |
| 28 | + } |
| 29 | + |
| 30 | + let group_names = (0..replacements.len()) |
| 31 | + .map(|i| format!("group{}", i)) |
| 32 | + .collect(); |
| 33 | + |
| 34 | + compound_re |
| 35 | + .replace_all( |
| 36 | + text, |
| 37 | + CompoundSwapper { |
| 38 | + replacements, |
| 39 | + group_names, |
| 40 | + }, |
| 41 | + ) |
| 42 | + .into_owned() |
| 43 | +} |
| 44 | + |
| 45 | +fn build_compound_regex(rules: &[(&str, &str)]) -> Regex { |
| 46 | + let pattern = rules |
| 47 | + .iter() |
| 48 | + .enumerate() |
| 49 | + .map(|(i, (pat, _))| format!(r"(?P<group{}>{})", i, pat)) |
| 50 | + .collect::<Vec<_>>() |
| 51 | + .join("|"); |
| 52 | + Regex::new(&pattern).unwrap() |
| 53 | +} |
| 54 | + |
| 55 | +fn compile_rules<'a>(rules: &[(&'a str, &'a str)]) -> Vec<(Regex, &'a str)> { |
| 56 | + rules |
| 57 | + .iter() |
| 58 | + .map(|(pat, repl)| (Regex::new(pat).unwrap(), *repl)) |
| 59 | + .collect() |
| 60 | +} |
| 61 | + |
| 62 | +fn apply_rename_rules(name: &str, compiled_rules: &[(Regex, &str)]) -> String { |
| 63 | + let mut new_name = name.to_string(); |
| 64 | + for (re, replacement) in compiled_rules { |
| 65 | + new_name = re.replace_all(&new_name, *replacement).into_owned(); |
| 66 | + } |
| 67 | + new_name.trim().to_string() |
| 68 | +} |
| 69 | + |
| 70 | +fn criterion_benchmark(c: &mut Criterion) { |
| 71 | + let text = "【高清影视之家发布 www.WHATMV.com】小丑2:双重妄想[HDR+杜比视界双版本][中文字幕].2024.2160p.UHD.BluRay.Remux.DV.HEVC.TrueHD7.1-ParkHD"; |
| 72 | + |
| 73 | + // Benchmark compound replacement |
| 74 | + let compound_re = build_compound_regex(RULES); |
| 75 | + let replacements: Vec<_> = RULES.iter().map(|(_, repl)| *repl).collect(); |
| 76 | + assert_eq!( |
| 77 | + compound_replacement(text, &compound_re, &replacements), |
| 78 | + "小丑2:双重妄想[HDR+杜比视界双版本][中文字幕] 2024 2160p UHD BluRay Remux DV HEVC TrueHD7 1-ParkHD" |
| 79 | + ); |
| 80 | + c.bench_function("compound_replacement", |b| { |
| 81 | + b.iter(|| { |
| 82 | + compound_replacement( |
| 83 | + black_box(text), |
| 84 | + black_box(&compound_re), |
| 85 | + black_box(&replacements), |
| 86 | + ) |
| 87 | + }) |
| 88 | + }); |
| 89 | + |
| 90 | + assert_eq!( |
| 91 | + apply_rename_rules(text, &compile_rules(RULES)), |
| 92 | + "小丑2:双重妄想[HDR+杜比视界双版本][中文字幕] 2024 2160p UHD BluRay Remux DV HEVC TrueHD7 1-ParkHD" |
| 93 | + ); |
| 94 | + // Benchmark sequential replacement |
| 95 | + let compiled_rules = compile_rules(RULES); |
| 96 | + c.bench_function("apply_rename_rules", |b| { |
| 97 | + b.iter(|| apply_rename_rules(black_box(text), black_box(&compiled_rules))) |
| 98 | + }); |
| 99 | +} |
| 100 | + |
| 101 | +criterion_group!(benches, criterion_benchmark); |
| 102 | +criterion_main!(benches); |
0 commit comments