Skip to content

Commit 1ece6c8

Browse files
committed
优化批量正则替换
1 parent 2706d41 commit 1ece6c8

File tree

7 files changed

+214
-31
lines changed

7 files changed

+214
-31
lines changed

Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,5 +27,9 @@ regex-automata = { version = "0.4", features = ["std", "dfa-search"] }
2727
criterion = "0.5"
2828

2929
[[bench]]
30-
name = "my_benchmark"
30+
name = "split"
31+
harness = false
32+
33+
[[bench]]
34+
name = "replace"
3135
harness = false

benches/replace.rs

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
use criterion::{black_box, criterion_group, criterion_main, Criterion};
2+
use regex::Replacer;
3+
use regex::{Captures, Regex};
4+
5+
const RULES: &[(&str, &str)] = &[
6+
(
7+
r"[\u{5b}\u{3010}].*(电影|高清|原盘|蓝光|发布).*?[\u{3011}\u{5d}]",
8+
"",
9+
),
10+
(r"\.", " "),
11+
];
12+
13+
fn compound_replacement(text: &str, compound_re: &Regex, replacements: &[&str]) -> String {
14+
struct CompoundSwapper<'a> {
15+
replacements: &'a [&'a str],
16+
group_names: Vec<String>,
17+
}
18+
19+
impl Replacer for CompoundSwapper<'_> {
20+
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
21+
for (i, group_name) in self.group_names.iter().enumerate() {
22+
if caps.name(group_name).is_some() {
23+
dst.push_str(self.replacements[i]);
24+
return;
25+
}
26+
}
27+
}
28+
}
29+
30+
let group_names = (0..replacements.len())
31+
.map(|i| format!("group{}", i))
32+
.collect();
33+
34+
compound_re
35+
.replace_all(
36+
text,
37+
CompoundSwapper {
38+
replacements,
39+
group_names,
40+
},
41+
)
42+
.into_owned()
43+
}
44+
45+
fn build_compound_regex(rules: &[(&str, &str)]) -> Regex {
46+
let pattern = rules
47+
.iter()
48+
.enumerate()
49+
.map(|(i, (pat, _))| format!(r"(?P<group{}>{})", i, pat))
50+
.collect::<Vec<_>>()
51+
.join("|");
52+
Regex::new(&pattern).unwrap()
53+
}
54+
55+
fn compile_rules<'a>(rules: &[(&'a str, &'a str)]) -> Vec<(Regex, &'a str)> {
56+
rules
57+
.iter()
58+
.map(|(pat, repl)| (Regex::new(pat).unwrap(), *repl))
59+
.collect()
60+
}
61+
62+
fn apply_rename_rules(name: &str, compiled_rules: &[(Regex, &str)]) -> String {
63+
let mut new_name = name.to_string();
64+
for (re, replacement) in compiled_rules {
65+
new_name = re.replace_all(&new_name, *replacement).into_owned();
66+
}
67+
new_name.trim().to_string()
68+
}
69+
70+
fn criterion_benchmark(c: &mut Criterion) {
71+
let text = "【高清影视之家发布 www.WHATMV.com】小丑2:双重妄想[HDR+杜比视界双版本][中文字幕].2024.2160p.UHD.BluRay.Remux.DV.HEVC.TrueHD7.1-ParkHD";
72+
73+
// Benchmark compound replacement
74+
let compound_re = build_compound_regex(RULES);
75+
let replacements: Vec<_> = RULES.iter().map(|(_, repl)| *repl).collect();
76+
assert_eq!(
77+
compound_replacement(text, &compound_re, &replacements),
78+
"小丑2:双重妄想[HDR+杜比视界双版本][中文字幕] 2024 2160p UHD BluRay Remux DV HEVC TrueHD7 1-ParkHD"
79+
);
80+
c.bench_function("compound_replacement", |b| {
81+
b.iter(|| {
82+
compound_replacement(
83+
black_box(text),
84+
black_box(&compound_re),
85+
black_box(&replacements),
86+
)
87+
})
88+
});
89+
90+
assert_eq!(
91+
apply_rename_rules(text, &compile_rules(RULES)),
92+
"小丑2:双重妄想[HDR+杜比视界双版本][中文字幕] 2024 2160p UHD BluRay Remux DV HEVC TrueHD7 1-ParkHD"
93+
);
94+
// Benchmark sequential replacement
95+
let compiled_rules = compile_rules(RULES);
96+
c.bench_function("apply_rename_rules", |b| {
97+
b.iter(|| apply_rename_rules(black_box(text), black_box(&compiled_rules)))
98+
});
99+
}
100+
101+
criterion_group!(benches, criterion_benchmark);
102+
criterion_main!(benches);

src/main.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ mod q_bit;
33
mod re;
44

55
use crate::logger::LogUnwrap;
6+
use crate::re::CompoundReplacer;
67
use clap::Parser;
7-
use regex::Regex;
88
use reqwest::Client;
99

1010
#[derive(Parser, Debug)]
@@ -40,19 +40,18 @@ async fn main() {
4040
let torrent_hash = args.torrent_hash;
4141

4242
// 提取参数 重命名规则,提前编译正则表达式
43-
let rename_rules: Vec<(Regex, &str)> = args.rename_rules
43+
let compound_replacer = CompoundReplacer::new(args.rename_rules
4444
.iter()
4545
.map(|rule| {
4646
let index = rule.rfind('=').log_unwrap("Invalid rename rule: missing '='");
4747
let (pattern_part, replacement_with_eq) = rule.split_at(index);
4848
let replacement = &replacement_with_eq[1..];
4949
(
50-
Regex::new(pattern_part).log_unwrap("Invalid regex pattern"),
50+
pattern_part,
5151
replacement,
5252
)
53-
})
54-
.collect();
55-
53+
}));
54+
5655
let mut builder = Client::builder().cookie_store(true);
5756

5857
if !args.vpn {
@@ -71,7 +70,7 @@ async fn main() {
7170
}
7271
// 并行执行重命名种子和重命名文件
7372
let (_, _) = tokio::join!(
74-
q_bit::rename_torrent(&client, &webui_url, &torrent_hash, &rename_rules),
75-
q_bit::rename_files(&client, &webui_url, &torrent_hash, &rename_rules)
73+
q_bit::rename_torrent(&client, &webui_url, &torrent_hash, &compound_replacer),
74+
q_bit::rename_files(&client, &webui_url, &torrent_hash, &compound_replacer)
7675
);
7776
}

src/q_bit.rs

Lines changed: 8 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
use crate::{log, re};
22

33
use crate::logger::LogUnwrap;
4-
use regex::Regex;
4+
use crate::re::CompoundReplacer;
55
use reqwest::Client;
66
use serde::{Deserialize, Serialize};
77
use tokio::task;
@@ -94,13 +94,13 @@ pub async fn rename_torrent(
9494
client: &Client,
9595
webui_url: &str,
9696
torrent_hash: &str,
97-
rename_rules: &Vec<(Regex, &str)>,
97+
compound_replacer: &CompoundReplacer,
9898
) -> Result<(), String> {
9999
let torrent = get_torrent_info(client, webui_url, torrent_hash)
100100
.await
101101
.log_unwrap("Failed to get torrent info");
102102

103-
let new_name = apply_rename_rules(&torrent.name, rename_rules);
103+
let new_name = compound_replacer.replace(&torrent.name);
104104

105105
if torrent.name != new_name {
106106
let rename_url = format!("{}/api/v2/torrents/rename", webui_url);
@@ -132,7 +132,7 @@ pub async fn rename_files(
132132
client: &Client,
133133
webui_url: &str,
134134
torrent_hash: &str,
135-
rename_rules: &Vec<(Regex, &str)>,
135+
compound_replacer: &CompoundReplacer,
136136
) -> Result<(), String> {
137137
// https://github.com/qbittorrent/qBittorrent/wiki/WebUI-API-(qBittorrent-4.1)#get-torrent-contents
138138
let files_url = format!("{}/api/v2/torrents/files?hash={}", webui_url, torrent_hash);
@@ -157,7 +157,7 @@ pub async fn rename_files(
157157
let mut tasks = Vec::new();
158158

159159
for file in torrent_files {
160-
let new_path = apply_rename_rules_to_file(&file.name, rename_rules);
160+
let new_path = apply_rename_rules_to_file(&file.name, compound_replacer);
161161

162162
if file.name != new_path {
163163
let rename_file_url = format!("{}/api/v2/torrents/renameFile", webui_url);
@@ -204,26 +204,12 @@ pub async fn rename_files(
204204
Ok(())
205205
}
206206

207-
fn apply_rename_rules(name: &str, compiled_rules: &Vec<(Regex, &str)>) -> String {
208-
let mut new_name = name.to_string();
209-
210-
for (re, replacement) in compiled_rules {
211-
new_name = re.replace_all(&new_name, *replacement).into_owned();
212-
}
213-
214-
new_name.trim().to_string()
215-
}
216-
217207
/// 将文件名应用重命名规则,不改变文件扩展名
218-
fn apply_rename_rules_to_file(name: &str, compiled_rules: &Vec<(Regex, &str)>) -> String {
219-
let (mut stem, ext) = re::split_filename(name);
208+
fn apply_rename_rules_to_file(name: &str, compound_replacer: &CompoundReplacer) -> String {
209+
let (stem, ext) = re::split_filename(name);
220210

221211
// 仅对主名部分应用替换规则
222-
for (re, replacement) in compiled_rules {
223-
stem = re.replace_all(&stem, *replacement).into_owned();
224-
}
225-
226-
let stem = stem.trim();
212+
let stem = compound_replacer.replace(stem.as_str());
227213

228214
// 重新组合主名和扩展名
229215
if ext.is_empty() {

src/re/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
use regex_automata::{dfa::Automaton, Anchored, Input};
22

33
mod file_extension_split;
4+
mod replace;
5+
pub use crate::re::replace::CompoundReplacer as CompoundReplacer;
46

57
/// 将文件名拆分为主名和扩展名 FILE_EXTENSION_SPLIT
68
/// 使用 regex_cli 对正则表达式进行预编译,运行时通过读取字节反序列化,减少90%的运行时开销

src/re/replace.rs

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
use std::borrow::Borrow;
2+
use std::sync::Arc;
3+
use regex::{Captures, Regex};
4+
use regex::Replacer;
5+
6+
#[derive(Debug, Clone)]
7+
pub struct CompoundReplacer {
8+
compound_re: Regex,
9+
group_names: Arc<Vec<String>>,
10+
replacements: Arc<Vec<String>>,
11+
}
12+
13+
impl Replacer for CompoundReplacer {
14+
fn replace_append(&mut self, caps: &Captures<'_>, dst: &mut String) {
15+
for (group_name, replacement) in self.group_names.iter().zip(self.replacements.iter()) {
16+
if caps.name(group_name).is_some() {
17+
dst.push_str(replacement);
18+
return;
19+
}
20+
}
21+
}
22+
}
23+
24+
impl CompoundReplacer {
25+
pub fn new<I, T, K, V>(pairs: I) -> Self
26+
where
27+
I: IntoIterator<Item = T>,
28+
T: Borrow<(K, V)>,
29+
K: AsRef<str>,
30+
V: AsRef<str>,
31+
{
32+
let (patterns, replacements): (Vec<String>, Vec<String>) = pairs
33+
.into_iter()
34+
.map(|t| {
35+
let (k, v) = t.borrow();
36+
(k.as_ref().to_string(), v.as_ref().to_string())
37+
})
38+
.unzip();
39+
40+
let group_names: Vec<String> = (0..patterns.len())
41+
.map(|i| format!("_group{}", i))
42+
.collect();
43+
let regex_str = patterns
44+
.iter()
45+
.enumerate()
46+
.map(|(i, pat)| format!("(?P<{}>{})", group_names[i], pat))
47+
.collect::<Vec<_>>()
48+
.join("|");
49+
50+
Self {
51+
compound_re: Regex::new(&regex_str).unwrap(),
52+
group_names: Arc::new(group_names),
53+
replacements: Arc::new(replacements),
54+
}
55+
}
56+
57+
58+
pub fn replace(&self, text: &str) -> String {
59+
self.compound_re.replace_all(text, self.clone()).into_owned()
60+
}
61+
}
62+
63+
64+
#[cfg(test)]
65+
mod tests {
66+
use super::*;
67+
68+
#[test]
69+
fn test_2025_02_18_11_33_08() {
70+
fn compound_replacement(text: &str, replacer: &[(&str,&str)]) -> String {
71+
let replacer = CompoundReplacer::new(replacer);
72+
replacer.replace(text)
73+
}
74+
75+
assert_eq!(
76+
compound_replacement(
77+
"a b c a b c c b a b b a a c a b e f g",
78+
&[("a", "1"), ("b", "2"), ("c", "3")]
79+
),
80+
"1 2 3 1 2 3 3 2 1 2 2 1 1 3 1 2 e f g"
81+
);
82+
assert_eq!(
83+
compound_replacement(
84+
"【高清影视之家发布 www.WHATMV.com】小丑2:双重妄想[HDR+杜比视界双版本][中文字幕].2024.2160p.UHD.BluRay.Remux.DV.HEVC.TrueHD7.1-ParkHD",
85+
&[(r"[\[【].*(电影|高清|原盘|蓝光|发布).*?[】\]]", ""), (r"\.", " ")]
86+
),
87+
"小丑2:双重妄想[HDR+杜比视界双版本][中文字幕] 2024 2160p UHD BluRay Remux DV HEVC TrueHD7 1-ParkHD"
88+
);
89+
}
90+
}

0 commit comments

Comments
 (0)