muyuanjin
diff --git a/‎Cargo.lock‎
Lines changed: 246 additions & 55 deletions b/‎Cargo.lock‎
Lines changed: 246 additions & 55 deletions
diff --git a/‎Cargo.toml‎
Lines changed: 15 additions & 8 deletions b/‎Cargo.toml‎
Lines changed: 15 additions & 8 deletions
diff --git a/‎benches/file_extension_split.bigendian.dfa‎
77.3 KB b/‎benches/file_extension_split.bigendian.dfa‎
77.3 KB
diff --git a/‎benches/file_extension_split.littleendian.dfa‎
77.3 KB b/‎benches/file_extension_split.littleendian.dfa‎
77.3 KB
diff --git a/‎benches/my_benchmark.rs‎
Lines changed: 77 additions & 0 deletions b/‎benches/my_benchmark.rs‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎src/main.rs‎
Lines changed: 1 addition & 1 deletion b/‎src/main.rs‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/q_bit.rs‎
Lines changed: 2 additions & 36 deletions b/‎src/q_bit.rs‎
Lines changed: 2 additions & 36 deletions
diff --git a/‎src/re/file_extension_split.bigendian.dfa‎
77.3 KB b/‎src/re/file_extension_split.bigendian.dfa‎
77.3 KB
diff --git a/‎src/re/file_extension_split.littleendian.dfa‎
77.3 KB b/‎src/re/file_extension_split.littleendian.dfa‎
77.3 KB
diff --git a/‎src/re/file_extension_split.rs‎
Lines changed: 22 additions & 0 deletions b/‎src/re/file_extension_split.rs‎
Lines changed: 22 additions & 0 deletions
@@ -3,14 +3,6 @@ name = "torrent-tidy"
 version = "0.1.0"
 edition = "2021"
 
-[dependencies]
-reqwest = { version = "0.12", features = ["json", "cookies"] }
-serde = { version = "1.0", features = ["derive"] }
-serde_json = "1.0"
-tokio = { version = "1", features = ["full"] }
-clap = { version = "4.5", features = ["derive"] }
-regex = { version = "1.11" }
-
 [workspace]
 # 指定解析器版本
 resolver = "2"
@@ -22,3 +14,18 @@ codegen-units = 1   # Reduce number of codegen units to increase optimizations
 panic = 'abort'     # Abort on panic
 strip = true        # Strip symbols from binary*, strip = true is equivalent to strip = "symbols"
 debug = false       # Disable debug info
+
+[dependencies]
+reqwest = { version = "0.12", features = ["json", "cookies"] }
+serde = { version = "1.0", features = ["derive"] }
+tokio = { version = "1", features = ["rt-multi-thread", "macros"] }
+clap = { version = "4.5", features = ["derive"] }
+regex = { version = "1.11" }
+regex-automata = { version = "0.4", features = ["std", "dfa-search"] }
+
+[dev-dependencies]
+criterion = "0.5"
+
+[[bench]]
+name = "my_benchmark"
+harness = false
@@ -0,0 +1,77 @@
+use std::sync::LazyLock;
+use criterion::{black_box, criterion_group, criterion_main, Criterion};
+use regex::Regex;
+use regex_automata::{dfa::Automaton, Anchored, Input};
+
+fn split_filename(filename: &str) -> (String, String) {
+    static RE: LazyLock<Regex> = LazyLock::new(|| {
+        Regex::new(r"^(.*?)\.(tar\.gz|tar\.xz|tar\.bz2|cpio\.gz|cpio\.bz2|(?:7z|rar|zip)\.\d{3}|[^.]+)$").unwrap()
+    });
+
+    if let Some(caps) = RE.captures(filename) {
+        (caps.get(1).unwrap().as_str().to_string(), caps.get(2).unwrap().as_str().to_string())
+    } else {
+        (filename.to_string(), String::new()) // 无扩展名的情况
+    }
+}
+
+fn split_filename_old(name: &str) -> (String, String) {
+    if let Some(dot_pos) = name.rfind('.') {
+        if dot_pos == 0 || dot_pos == name.len() - 1 {
+            (name.to_string(), String::new())
+        } else {
+            let (stem, ext_with_dot) = name.split_at(dot_pos);
+            let ext = &ext_with_dot[1..];
+            (stem.to_string(), ext.to_string())
+        }
+    } else {
+        (name.to_string(), String::new())
+    }
+}
+
+use regex_automata::{
+    dfa::dense::DFA,
+    util::{lazy::Lazy, wire::AlignAs},
+};
+
+pub static FILE_EXTENSION_SPLIT: Lazy<DFA<&'static [u32]>> = Lazy::new(|| {
+    static ALIGNED: &AlignAs<[u8], u32> = &AlignAs {
+        _align: [],
+        #[cfg(target_endian = "big")]
+        bytes: *include_bytes!("file_extension_split.bigendian.dfa"),
+        #[cfg(target_endian = "little")]
+        bytes: *include_bytes!("file_extension_split.littleendian.dfa"),
+    };
+    let (dfa, _) = regex_automata::dfa::dense::DFA::from_bytes(&ALIGNED.bytes).expect("serialized DFA should be valid");
+    dfa
+});
+
+/// 将文件名拆分为主名和扩展名 FILE_EXTENSION_SPLIT
+fn split_filename_new(filename: &str) -> (String, String) {
+    let input = Input::new(filename).anchored(Anchored::Yes);
+    match FILE_EXTENSION_SPLIT.try_search_rev(&input) {
+        Ok(Some(index)) => {
+            let (main, ext) = filename.split_at(index.offset());
+            // 去除index位置的点
+            (main.into(), ext[1..].into())
+        }
+        Ok(None) | Err(_) => {
+            (filename.to_string(), String::new())
+        },
+    }
+}
+
+
+fn criterion_benchmark(c: &mut Criterion) {
+    c.bench_function("file.with.dots.txt", |b| b.iter(|| split_filename(black_box("file.with.dots.txt"))));
+    c.bench_function("a.b.c.d.f", |b| b.iter(|| split_filename(black_box("a.b.c.d.f"))));
+
+    c.bench_function("file.with.dots.txt.old", |b| b.iter(|| split_filename_old(black_box("file.with.dots.txt"))));
+    c.bench_function("a.b.c.d.f.old", |b| b.iter(|| split_filename_old(black_box("a.b.c.d.f"))));
+
+    c.bench_function("file.with.dots.txt.new", |b| b.iter(|| split_filename_new(black_box("file.with.dots.txt"))));
+    c.bench_function("a.b.c.d.f.new", |b| b.iter(|| split_filename_new(black_box("a.b.c.d.f"))));
+}
+
+criterion_group!(benches, criterion_benchmark);
+criterion_main!(benches);
@@ -1,5 +1,6 @@
 mod logger;
 mod q_bit;
+mod re;
 
 use crate::logger::LogUnwrap;
 use clap::Parser;
@@ -52,7 +53,6 @@ async fn main() {
         })
         .collect();
 
-
     let mut builder = Client::builder().cookie_store(true);
 
     if !args.vpn {
 
@@ -1,5 +1,4 @@
-use std::sync::LazyLock;
-use crate::log;
+use crate::{log, re};
 
 use crate::logger::LogUnwrap;
 use regex::Regex;
@@ -217,7 +216,7 @@ fn apply_rename_rules(name: &str, compiled_rules: &Vec<(Regex, &str)>) -> String
 
 /// 将文件名应用重命名规则，不改变文件扩展名
 fn apply_rename_rules_to_file(name: &str, compiled_rules: &Vec<(Regex, &str)>) -> String {
-    let (mut stem, ext) = split_filename(name);
+    let (mut stem, ext) = re::split_filename(name);
 
     // 仅对主名部分应用替换规则
     for (re, replacement) in compiled_rules {
@@ -232,37 +231,4 @@ fn apply_rename_rules_to_file(name: &str, compiled_rules: &Vec<(Regex, &str)>) -
     } else {
         format!("{}.{}", stem, ext)
     }
-}
-
-
-
-/// 将文件名拆分为主名和扩展名 FILE_EXTENSION_SPLIT 
-fn split_filename(filename: &str) -> (String, String) {
-    static RE: LazyLock<Regex> = LazyLock::new(|| {
-        Regex::new(r"^(.*?)\.(tar\.(?:gz|xz|bz2)|cpio\.(?:gz|bz2)|(?:7z|rar|zip)\.\d{3}|[^.]+)$").unwrap()
-    });
-
-    RE.captures(filename)
-        .map(|caps| (caps[1].to_string(), caps[2].to_string()))
-        .unwrap_or_else(|| (filename.to_string(), String::new()))
-}
-
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn test_2025_02_17_16_36_27() {
-        assert_eq!(split_filename(""), ("".into(), "".into()));
-        assert_eq!(split_filename("."), (".".into(), "".into()));
-        assert_eq!(split_filename("f"), ("f".into(), "".into()));
-        assert_eq!(split_filename(".f"), ("".into(), "f".into()));
-        assert_eq!(split_filename("f."), ("f.".into(), "".into()));
-        assert_eq!(split_filename("a.b.c.d.f"), ("a.b.c.d".into(), "f".into()));
-        assert_eq!(split_filename("abc.tar.gz"), ("abc".into(), "tar.gz".into()));
-        assert_eq!(split_filename("abc.7z.001"), ("abc".into(), "7z.001".into()));
-        assert_eq!(split_filename("file.with.dots.txt"), ("file.with.dots".into(), "txt".into()));
-        assert_eq!(split_filename("no_extension"), ("no_extension".into(), "".into()));
-    }
 }
@@ -0,0 +1,22 @@
+// DO NOT EDIT THIS FILE. IT WAS AUTOMATICALLY GENERATED BY:
+//
+//     C:\Users\muyuanjin\.cargo\bin\regex-cli.exe generate serialize dense dfa --minimize --shrink --start-kind anchored --rustfmt --safe --reverse --captures none FILE_EXTENSION_SPLIT ./src/re/ \.(tar\.(?:gz|xz|bz2)|cpio\.(?:gz|bz2)|(?:7z|rar|zip)\.\d{3}|[^.]+)
+//
+// regex-cli 0.2.1 is available on crates.io.
+
+use regex_automata::{
+    dfa::dense::DFA,
+    util::{lazy::Lazy, wire::AlignAs},
+};
+
+pub static FILE_EXTENSION_SPLIT: Lazy<DFA<&'static [u32]>> = Lazy::new(|| {
+    static ALIGNED: &AlignAs<[u8], u32> = &AlignAs {
+        _align: [],
+        #[cfg(target_endian = "big")]
+        bytes: *include_bytes!("file_extension_split.bigendian.dfa"),
+        #[cfg(target_endian = "little")]
+        bytes: *include_bytes!("file_extension_split.littleendian.dfa"),
+    };
+    let (dfa, _) = DFA::from_bytes(&ALIGNED.bytes).expect("serialized DFA should be valid");
+    dfa
+});