1
1
use crate :: parser:: Extractor ;
2
+ use fxhash:: FxHashSet ;
2
3
use rayon:: prelude:: * ;
3
4
use std:: path:: PathBuf ;
4
5
use tracing:: event;
@@ -11,14 +12,7 @@ pub mod parser;
11
12
pub mod utility;
12
13
pub mod variant;
13
14
14
- #[ derive( Debug , Clone ) ]
15
- pub struct ChangedContent {
16
- pub file : Option < PathBuf > ,
17
- pub content : Option < String > ,
18
- pub extension : String ,
19
- }
20
-
21
- pub fn parse_candidate_strings_from_files ( changed_content : Vec < ChangedContent > ) -> Vec < String > {
15
+ fn init_tracing ( ) {
22
16
if matches ! ( std:: env:: var( "DEBUG" ) , Ok ( value) if value. eq( "*" ) || value. eq( "1" ) || value. eq( "true" ) || value. contains( "tailwind" ) )
23
17
{
24
18
tracing_subscriber:: fmt ( )
@@ -27,10 +21,63 @@ pub fn parse_candidate_strings_from_files(changed_content: Vec<ChangedContent>)
27
21
. compact ( )
28
22
. init ( ) ;
29
23
}
24
+ }
25
+
26
+ #[ derive( Debug , Clone ) ]
27
+ pub struct ChangedContent {
28
+ pub file : Option < PathBuf > ,
29
+ pub content : Option < String > ,
30
+ pub extension : String ,
31
+ }
32
+
33
+ #[ derive( Debug ) ]
34
+ pub enum IO {
35
+ Sequential = 0b0001 ,
36
+ Parallel = 0b0010 ,
37
+ }
30
38
39
+ impl From < u8 > for IO {
40
+ fn from ( item : u8 ) -> Self {
41
+ match item & 0b0011 {
42
+ 0b0001 => IO :: Sequential ,
43
+ 0b0010 => IO :: Parallel ,
44
+ _ => unimplemented ! ( "Unknown 'IO' strategy" ) ,
45
+ }
46
+ }
47
+ }
48
+
49
+ #[ derive( Debug ) ]
50
+ pub enum Parsing {
51
+ Sequential = 0b0100 ,
52
+ Parallel = 0b1000 ,
53
+ }
54
+
55
+ impl From < u8 > for Parsing {
56
+ fn from ( item : u8 ) -> Self {
57
+ match item & 0b1100 {
58
+ 0b0100 => Parsing :: Sequential ,
59
+ 0b1000 => Parsing :: Parallel ,
60
+ _ => unimplemented ! ( "Unknown 'Parsing' strategy" ) ,
61
+ }
62
+ }
63
+ }
64
+
65
+ pub fn parse_candidate_strings_from_files ( changed_content : Vec < ChangedContent > ) -> Vec < String > {
66
+ init_tracing ( ) ;
31
67
parse_all_blobs ( read_all_files ( changed_content) )
32
68
}
33
69
70
+ pub fn parse_candidate_strings ( input : Vec < ChangedContent > , options : u8 ) -> Vec < String > {
71
+ init_tracing ( ) ;
72
+
73
+ match ( IO :: from ( options) , Parsing :: from ( options) ) {
74
+ ( IO :: Sequential , Parsing :: Sequential ) => parse_all_blobs_sync ( read_all_files_sync ( input) ) ,
75
+ ( IO :: Sequential , Parsing :: Parallel ) => parse_all_blobs_sync ( read_all_files ( input) ) ,
76
+ ( IO :: Parallel , Parsing :: Sequential ) => parse_all_blobs ( read_all_files_sync ( input) ) ,
77
+ ( IO :: Parallel , Parsing :: Parallel ) => parse_all_blobs ( read_all_files ( input) ) ,
78
+ }
79
+ }
80
+
34
81
#[ tracing:: instrument( skip( changed_content) ) ]
35
82
fn read_all_files ( changed_content : Vec < ChangedContent > ) -> Vec < Vec < u8 > > {
36
83
event ! (
@@ -49,6 +96,24 @@ fn read_all_files(changed_content: Vec<ChangedContent>) -> Vec<Vec<u8>> {
49
96
. collect ( )
50
97
}
51
98
99
+ #[ tracing:: instrument( skip( changed_content) ) ]
100
+ fn read_all_files_sync ( changed_content : Vec < ChangedContent > ) -> Vec < Vec < u8 > > {
101
+ event ! (
102
+ tracing:: Level :: INFO ,
103
+ "Reading {:?} file(s)" ,
104
+ changed_content. len( )
105
+ ) ;
106
+
107
+ changed_content
108
+ . into_iter ( )
109
+ . map ( |c| match ( c. file , c. content ) {
110
+ ( Some ( file) , None ) => std:: fs:: read ( file) . unwrap ( ) ,
111
+ ( None , Some ( content) ) => content. into_bytes ( ) ,
112
+ _ => Default :: default ( ) ,
113
+ } )
114
+ . collect ( )
115
+ }
116
+
52
117
#[ tracing:: instrument( skip( blobs) ) ]
53
118
fn parse_all_blobs ( blobs : Vec < Vec < u8 > > ) -> Vec < String > {
54
119
let input: Vec < _ > = blobs. iter ( ) . map ( |blob| & blob[ ..] ) . collect ( ) ;
@@ -72,3 +137,27 @@ fn parse_all_blobs(blobs: Vec<Vec<u8>>) -> Vec<String> {
72
137
result. sort ( ) ;
73
138
result
74
139
}
140
+
141
+ #[ tracing:: instrument( skip( blobs) ) ]
142
+ fn parse_all_blobs_sync ( blobs : Vec < Vec < u8 > > ) -> Vec < String > {
143
+ let input: Vec < _ > = blobs. iter ( ) . map ( |blob| & blob[ ..] ) . collect ( ) ;
144
+ let input = & input[ ..] ;
145
+
146
+ let mut result: Vec < String > = input
147
+ . iter ( )
148
+ . map ( |input| Extractor :: unique ( input, Default :: default ( ) ) )
149
+ . fold ( FxHashSet :: default ( ) , |mut a, b| {
150
+ a. extend ( b) ;
151
+ a
152
+ } )
153
+ . into_iter ( )
154
+ . map ( |s| {
155
+ // SAFETY: When we parsed the candidates, we already guaranteed that the byte slices
156
+ // are valid, therefore we don't have to re-check here when we want to convert it back
157
+ // to a string.
158
+ unsafe { String :: from_utf8_unchecked ( s. to_vec ( ) ) }
159
+ } )
160
+ . collect ( ) ;
161
+ result. sort ( ) ;
162
+ result
163
+ }
0 commit comments