@@ -47,7 +47,10 @@ pub fn parse(input: &str) -> Shared {
47
47
// Use as many cores as possible to parallelize the remaining search.
48
48
thread:: scope ( |scope| {
49
49
for _ in 0 ..thread:: available_parallelism ( ) . unwrap ( ) . get ( ) {
50
+ #[ cfg( not( feature = "simd" ) ) ]
50
51
scope. spawn ( || worker ( & shared) ) ;
52
+ #[ cfg( feature = "simd" ) ]
53
+ scope. spawn ( || simd:: worker ( & shared) ) ;
51
54
}
52
55
} ) ;
53
56
@@ -83,6 +86,7 @@ fn check_hash(buffer: &mut [u8], size: usize, n: u32, shared: &Shared) {
83
86
}
84
87
}
85
88
89
+ #[ cfg( not( feature = "simd" ) ) ]
86
90
fn worker ( shared : & Shared ) {
87
91
while !shared. done . load ( Ordering :: Relaxed ) {
88
92
let offset = shared. counter . fetch_add ( 1000 , Ordering :: Relaxed ) ;
@@ -98,3 +102,54 @@ fn worker(shared: &Shared) {
98
102
}
99
103
}
100
104
}
105
+
106
+ #[ cfg( feature = "simd" ) ]
107
+ mod simd {
108
+ use super :: * ;
109
+ use crate :: util:: md5:: simd:: hash;
110
+ use std:: simd:: { LaneCount , SupportedLaneCount } ;
111
+
112
+ #[ allow( clippy:: needless_range_loop) ]
113
+ fn check_hash_simd < const N : usize > (
114
+ buffers : & mut [ [ u8 ; 64 ] ] ,
115
+ size : usize ,
116
+ start : u32 ,
117
+ offset : u32 ,
118
+ shared : & Shared ,
119
+ ) where
120
+ LaneCount < N > : SupportedLaneCount ,
121
+ {
122
+ // Format macro is very slow, so update digits directly
123
+ for i in 0 ..N {
124
+ let n = offset + i as u32 ;
125
+ buffers[ i] [ size - 3 ] = b'0' + ( n / 100 ) as u8 ;
126
+ buffers[ i] [ size - 2 ] = b'0' + ( ( n / 10 ) % 10 ) as u8 ;
127
+ buffers[ i] [ size - 1 ] = b'0' + ( n % 10 ) as u8 ;
128
+ }
129
+
130
+ let ( result, ..) = hash :: < N > ( buffers, size) ;
131
+
132
+ for i in 0 ..N {
133
+ if result[ i] & 0xffffff00 == 0 {
134
+ shared. second . fetch_min ( start + offset + i as u32 , Ordering :: Relaxed ) ;
135
+ shared. done . store ( true , Ordering :: Relaxed ) ;
136
+ } else if result[ i] & 0xfffff000 == 0 {
137
+ shared. first . fetch_min ( start + offset + i as u32 , Ordering :: Relaxed ) ;
138
+ }
139
+ }
140
+ }
141
+
142
+ pub ( super ) fn worker ( shared : & Shared ) {
143
+ while !shared. done . load ( Ordering :: Relaxed ) {
144
+ let start = shared. counter . fetch_add ( 1000 , Ordering :: Relaxed ) ;
145
+ let ( prefix, size) = format_string ( & shared. prefix , start) ;
146
+ let mut buffers = [ prefix; 32 ] ;
147
+
148
+ for offset in ( 0 ..992 ) . step_by ( 32 ) {
149
+ check_hash_simd :: < 32 > ( & mut buffers, size, start, offset, shared) ;
150
+ }
151
+
152
+ check_hash_simd :: < 8 > ( & mut buffers, size, start, 992 , shared) ;
153
+ }
154
+ }
155
+ }
0 commit comments