1
- #[ macro_use]
2
- extern crate lazy_static;
3
-
4
- use clap:: arg;
1
+ use clap:: Args ;
2
+ use lazy_static:: lazy_static;
5
3
use rayon:: prelude:: * ;
6
4
use std:: borrow:: Cow ;
7
5
use std:: fs;
@@ -11,23 +9,22 @@ use tree_sitter::{Language, Parser, Range};
11
9
12
10
use codeql_extractor:: { diagnostics, extractor, file_paths, node_types, trap} ;
13
11
14
- lazy_static ! {
15
- static ref CP_NUMBER : regex:: Regex = regex:: Regex :: new( "cp([0-9]+)" ) . unwrap( ) ;
16
- }
12
+ #[ derive( Args ) ]
13
+ pub struct Options {
14
+ /// Sets a custom source achive folder
15
+ #[ arg( long) ]
16
+ source_archive_dir : String ,
17
17
18
- /// Returns the `encoding::Encoding` corresponding to the given encoding name, if one exists.
19
- fn encoding_from_name ( encoding_name : & str ) -> Option < & ( dyn encoding:: Encoding + Send + Sync ) > {
20
- match encoding:: label:: encoding_from_whatwg_label ( encoding_name) {
21
- s @ Some ( _) => s,
22
- None => CP_NUMBER . captures ( encoding_name) . and_then ( |cap| {
23
- encoding:: label:: encoding_from_windows_code_page (
24
- str:: parse ( cap. get ( 1 ) . unwrap ( ) . as_str ( ) ) . unwrap ( ) ,
25
- )
26
- } ) ,
27
- }
18
+ /// Sets a custom trap folder
19
+ #[ arg( long) ]
20
+ output_dir : String ,
21
+
22
+ /// A text file containing the paths of the files to extract
23
+ #[ arg( long) ]
24
+ file_list : String ,
28
25
}
29
26
30
- fn main ( ) -> std:: io:: Result < ( ) > {
27
+ pub fn run ( options : Options ) -> std:: io:: Result < ( ) > {
31
28
tracing_subscriber:: fmt ( )
32
29
. with_target ( false )
33
30
. without_time ( )
@@ -82,29 +79,11 @@ fn main() -> std::io::Result<()> {
82
79
. build_global ( )
83
80
. unwrap ( ) ;
84
81
85
- let matches = clap:: Command :: new ( "Ruby extractor" )
86
- . version ( "1.0" )
87
- . author ( "GitHub" )
88
- . about ( "CodeQL Ruby extractor" )
89
- . arg ( arg ! ( --"source-archive-dir" <DIR > "Sets a custom source archive folder" ) )
90
- . arg ( arg ! ( --"output-dir" <DIR > "Sets a custom trap folder" ) )
91
- . arg ( arg ! ( --"file-list" <FILE_LIST > "A text file containing the paths of the files to extract" ) )
92
- . get_matches ( ) ;
82
+ let src_archive_dir = file_paths:: path_from_string ( & options. source_archive_dir ) ;
93
83
94
- let src_archive_dir = matches
95
- . get_one :: < String > ( "source-archive-dir" )
96
- . expect ( "missing --source-archive-dir" ) ;
97
- let src_archive_dir = file_paths:: path_from_string ( src_archive_dir) ;
84
+ let trap_dir = file_paths:: path_from_string ( & options. output_dir ) ;
98
85
99
- let trap_dir = matches
100
- . get_one :: < String > ( "output-dir" )
101
- . expect ( "missing --output-dir" ) ;
102
- let trap_dir = file_paths:: path_from_string ( & trap_dir) ;
103
-
104
- let file_list = matches
105
- . get_one :: < String > ( "file-list" )
106
- . expect ( "missing --file-list" ) ;
107
- let file_list = fs:: File :: open ( file_paths:: path_from_string ( & file_list) ) ?;
86
+ let file_list = fs:: File :: open ( file_paths:: path_from_string ( & options. file_list ) ) ?;
108
87
109
88
let language = tree_sitter_ruby:: language ( ) ;
110
89
let erb = tree_sitter_embedded_template:: language ( ) ;
@@ -242,6 +221,22 @@ fn main() -> std::io::Result<()> {
242
221
write_trap ( & trap_dir, path, & trap_writer, trap_compression)
243
222
}
244
223
224
+ lazy_static ! {
225
+ static ref CP_NUMBER : regex:: Regex = regex:: Regex :: new( "cp([0-9]+)" ) . unwrap( ) ;
226
+ }
227
+
228
+ /// Returns the `encoding::Encoding` corresponding to the given encoding name, if one exists.
229
+ fn encoding_from_name ( encoding_name : & str ) -> Option < & ( dyn encoding:: Encoding + Send + Sync ) > {
230
+ match encoding:: label:: encoding_from_whatwg_label ( encoding_name) {
231
+ s @ Some ( _) => s,
232
+ None => CP_NUMBER . captures ( encoding_name) . and_then ( |cap| {
233
+ encoding:: label:: encoding_from_windows_code_page (
234
+ str:: parse ( cap. get ( 1 ) . unwrap ( ) . as_str ( ) ) . unwrap ( ) ,
235
+ )
236
+ } ) ,
237
+ }
238
+ }
239
+
245
240
fn write_trap (
246
241
trap_dir : & Path ,
247
242
path : PathBuf ,
@@ -373,67 +368,3 @@ fn scan_coding_comment(content: &[u8]) -> std::option::Option<Cow<str>> {
373
368
}
374
369
None
375
370
}
376
-
377
- #[ test]
378
- fn test_scan_coding_comment ( ) {
379
- let text = "# encoding: utf-8" ;
380
- let result = scan_coding_comment ( text. as_bytes ( ) ) ;
381
- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
382
-
383
- let text = "#coding:utf-8" ;
384
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
385
- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
386
-
387
- let text = "# foo\n # encoding: utf-8" ;
388
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
389
- assert_eq ! ( result, None ) ;
390
-
391
- let text = "# encoding: latin1 encoding: utf-8" ;
392
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
393
- assert_eq ! ( result, Some ( "latin1" . into( ) ) ) ;
394
-
395
- let text = "# encoding: nonsense" ;
396
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
397
- assert_eq ! ( result, Some ( "nonsense" . into( ) ) ) ;
398
-
399
- let text = "# coding = utf-8" ;
400
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
401
- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
402
-
403
- let text = "# CODING = utf-8" ;
404
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
405
- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
406
-
407
- let text = "# CoDiNg = utf-8" ;
408
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
409
- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
410
-
411
- let text = "# blah blahblahcoding = utf-8" ;
412
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
413
- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
414
-
415
- // unicode BOM is ignored
416
- let text = "\u{FEFF} # encoding: utf-8" ;
417
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
418
- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
419
-
420
- let text = "\u{FEFF} # encoding: utf-8" ;
421
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
422
- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
423
-
424
- let text = "#! /usr/bin/env ruby\n # encoding: utf-8" ;
425
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
426
- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
427
-
428
- let text = "\u{FEFF} #! /usr/bin/env ruby\n # encoding: utf-8" ;
429
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
430
- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
431
-
432
- // A #! must be the first thing on a line, otherwise it's a normal comment
433
- let text = " #! /usr/bin/env ruby encoding = utf-8" ;
434
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
435
- assert_eq ! ( result, Some ( "utf-8" . into( ) ) ) ;
436
- let text = " #! /usr/bin/env ruby \n # encoding = utf-8" ;
437
- let result = scan_coding_comment ( & text. as_bytes ( ) ) ;
438
- assert_eq ! ( result, None ) ;
439
- }
0 commit comments