Skip to content

Commit 79089b4

Browse files
committed
Ruby: Merge extractor binaries into one
There is now one binary, codeql-ruby-extractor, which takes a positional argument specifying whether to extract, generate or autobuild.
1 parent 5a8a6f2 commit 79089b4

File tree

5 files changed

+100
-147
lines changed

5 files changed

+100
-147
lines changed

ruby/extractor/src/bin/autobuilder.rs renamed to ruby/extractor/src/autobuilder.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
1+
use clap::Args;
12
use std::env;
23
use std::path::PathBuf;
34
use std::process::Command;
45

5-
fn main() -> std::io::Result<()> {
6+
#[derive(Args)]
7+
// The autobuilder takes no command-line options, but this may change in the future.
8+
pub struct Options {}
9+
10+
pub fn run(_: Options) -> std::io::Result<()> {
611
let dist = env::var("CODEQL_DIST").expect("CODEQL_DIST not set");
712
let db = env::var("CODEQL_EXTRACTOR_RUBY_WIP_DATABASE")
813
.expect("CODEQL_EXTRACTOR_RUBY_WIP_DATABASE not set");

ruby/extractor/src/bin/generator.rs

Lines changed: 0 additions & 43 deletions
This file was deleted.

ruby/extractor/src/bin/extractor.rs renamed to ruby/extractor/src/extractor.rs

Lines changed: 34 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
1-
#[macro_use]
2-
extern crate lazy_static;
3-
4-
use clap::arg;
1+
use clap::Args;
2+
use lazy_static::lazy_static;
53
use rayon::prelude::*;
64
use std::borrow::Cow;
75
use std::fs;
@@ -11,23 +9,22 @@ use tree_sitter::{Language, Parser, Range};
119

1210
use codeql_extractor::{diagnostics, extractor, file_paths, node_types, trap};
1311

14-
lazy_static! {
15-
static ref CP_NUMBER: regex::Regex = regex::Regex::new("cp([0-9]+)").unwrap();
16-
}
12+
#[derive(Args)]
13+
pub struct Options {
14+
/// Sets a custom source achive folder
15+
#[arg(long)]
16+
source_archive_dir: String,
1717

18-
/// Returns the `encoding::Encoding` corresponding to the given encoding name, if one exists.
19-
fn encoding_from_name(encoding_name: &str) -> Option<&(dyn encoding::Encoding + Send + Sync)> {
20-
match encoding::label::encoding_from_whatwg_label(encoding_name) {
21-
s @ Some(_) => s,
22-
None => CP_NUMBER.captures(encoding_name).and_then(|cap| {
23-
encoding::label::encoding_from_windows_code_page(
24-
str::parse(cap.get(1).unwrap().as_str()).unwrap(),
25-
)
26-
}),
27-
}
18+
/// Sets a custom trap folder
19+
#[arg(long)]
20+
output_dir: String,
21+
22+
/// A text file containing the paths of the files to extract
23+
#[arg(long)]
24+
file_list: String,
2825
}
2926

30-
fn main() -> std::io::Result<()> {
27+
pub fn run(options: Options) -> std::io::Result<()> {
3128
tracing_subscriber::fmt()
3229
.with_target(false)
3330
.without_time()
@@ -82,29 +79,11 @@ fn main() -> std::io::Result<()> {
8279
.build_global()
8380
.unwrap();
8481

85-
let matches = clap::Command::new("Ruby extractor")
86-
.version("1.0")
87-
.author("GitHub")
88-
.about("CodeQL Ruby extractor")
89-
.arg(arg!(--"source-archive-dir" <DIR> "Sets a custom source archive folder"))
90-
.arg(arg!(--"output-dir" <DIR> "Sets a custom trap folder"))
91-
.arg(arg!(--"file-list" <FILE_LIST> "A text file containing the paths of the files to extract"))
92-
.get_matches();
82+
let src_archive_dir = file_paths::path_from_string(&options.source_archive_dir);
9383

94-
let src_archive_dir = matches
95-
.get_one::<String>("source-archive-dir")
96-
.expect("missing --source-archive-dir");
97-
let src_archive_dir = file_paths::path_from_string(src_archive_dir);
84+
let trap_dir = file_paths::path_from_string(&options.output_dir);
9885

99-
let trap_dir = matches
100-
.get_one::<String>("output-dir")
101-
.expect("missing --output-dir");
102-
let trap_dir = file_paths::path_from_string(&trap_dir);
103-
104-
let file_list = matches
105-
.get_one::<String>("file-list")
106-
.expect("missing --file-list");
107-
let file_list = fs::File::open(file_paths::path_from_string(&file_list))?;
86+
let file_list = fs::File::open(file_paths::path_from_string(&options.file_list))?;
10887

10988
let language = tree_sitter_ruby::language();
11089
let erb = tree_sitter_embedded_template::language();
@@ -242,6 +221,22 @@ fn main() -> std::io::Result<()> {
242221
write_trap(&trap_dir, path, &trap_writer, trap_compression)
243222
}
244223

224+
lazy_static! {
225+
static ref CP_NUMBER: regex::Regex = regex::Regex::new("cp([0-9]+)").unwrap();
226+
}
227+
228+
/// Returns the `encoding::Encoding` corresponding to the given encoding name, if one exists.
229+
fn encoding_from_name(encoding_name: &str) -> Option<&(dyn encoding::Encoding + Send + Sync)> {
230+
match encoding::label::encoding_from_whatwg_label(encoding_name) {
231+
s @ Some(_) => s,
232+
None => CP_NUMBER.captures(encoding_name).and_then(|cap| {
233+
encoding::label::encoding_from_windows_code_page(
234+
str::parse(cap.get(1).unwrap().as_str()).unwrap(),
235+
)
236+
}),
237+
}
238+
}
239+
245240
fn write_trap(
246241
trap_dir: &Path,
247242
path: PathBuf,
@@ -373,67 +368,3 @@ fn scan_coding_comment(content: &[u8]) -> std::option::Option<Cow<str>> {
373368
}
374369
None
375370
}
376-
377-
#[test]
378-
fn test_scan_coding_comment() {
379-
let text = "# encoding: utf-8";
380-
let result = scan_coding_comment(text.as_bytes());
381-
assert_eq!(result, Some("utf-8".into()));
382-
383-
let text = "#coding:utf-8";
384-
let result = scan_coding_comment(&text.as_bytes());
385-
assert_eq!(result, Some("utf-8".into()));
386-
387-
let text = "# foo\n# encoding: utf-8";
388-
let result = scan_coding_comment(&text.as_bytes());
389-
assert_eq!(result, None);
390-
391-
let text = "# encoding: latin1 encoding: utf-8";
392-
let result = scan_coding_comment(&text.as_bytes());
393-
assert_eq!(result, Some("latin1".into()));
394-
395-
let text = "# encoding: nonsense";
396-
let result = scan_coding_comment(&text.as_bytes());
397-
assert_eq!(result, Some("nonsense".into()));
398-
399-
let text = "# coding = utf-8";
400-
let result = scan_coding_comment(&text.as_bytes());
401-
assert_eq!(result, Some("utf-8".into()));
402-
403-
let text = "# CODING = utf-8";
404-
let result = scan_coding_comment(&text.as_bytes());
405-
assert_eq!(result, Some("utf-8".into()));
406-
407-
let text = "# CoDiNg = utf-8";
408-
let result = scan_coding_comment(&text.as_bytes());
409-
assert_eq!(result, Some("utf-8".into()));
410-
411-
let text = "# blah blahblahcoding = utf-8";
412-
let result = scan_coding_comment(&text.as_bytes());
413-
assert_eq!(result, Some("utf-8".into()));
414-
415-
// unicode BOM is ignored
416-
let text = "\u{FEFF}# encoding: utf-8";
417-
let result = scan_coding_comment(&text.as_bytes());
418-
assert_eq!(result, Some("utf-8".into()));
419-
420-
let text = "\u{FEFF} # encoding: utf-8";
421-
let result = scan_coding_comment(&text.as_bytes());
422-
assert_eq!(result, Some("utf-8".into()));
423-
424-
let text = "#! /usr/bin/env ruby\n # encoding: utf-8";
425-
let result = scan_coding_comment(&text.as_bytes());
426-
assert_eq!(result, Some("utf-8".into()));
427-
428-
let text = "\u{FEFF}#! /usr/bin/env ruby\n # encoding: utf-8";
429-
let result = scan_coding_comment(&text.as_bytes());
430-
assert_eq!(result, Some("utf-8".into()));
431-
432-
// A #! must be the first thing on a line, otherwise it's a normal comment
433-
let text = " #! /usr/bin/env ruby encoding = utf-8";
434-
let result = scan_coding_comment(&text.as_bytes());
435-
assert_eq!(result, Some("utf-8".into()));
436-
let text = " #! /usr/bin/env ruby \n # encoding = utf-8";
437-
let result = scan_coding_comment(&text.as_bytes());
438-
assert_eq!(result, None);
439-
}

ruby/extractor/src/generator.rs

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
use clap::Args;
2+
use std::path::PathBuf;
3+
4+
use codeql_extractor::generator::{generate, language::Language};
5+
6+
#[derive(Args)]
7+
pub struct Options {
8+
/// Path of the generated dbscheme file
9+
#[arg(long)]
10+
dbscheme: PathBuf,
11+
12+
/// Path of the generated QLL file
13+
#[arg(long)]
14+
library: PathBuf,
15+
}
16+
17+
pub fn run(options: Options) -> std::io::Result<()> {
18+
tracing_subscriber::fmt()
19+
.with_target(false)
20+
.without_time()
21+
.with_level(true)
22+
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
23+
.init();
24+
25+
let languages = vec![
26+
Language {
27+
name: "Ruby".to_owned(),
28+
node_types: tree_sitter_ruby::NODE_TYPES,
29+
},
30+
Language {
31+
name: "Erb".to_owned(),
32+
node_types: tree_sitter_embedded_template::NODE_TYPES,
33+
},
34+
];
35+
36+
generate(languages, options.dbscheme, options.library)
37+
}

ruby/extractor/src/main.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
use clap::Parser;
2+
3+
mod autobuilder;
4+
mod extractor;
5+
mod generator;
6+
7+
#[derive(Parser)]
8+
#[command(author, version, about, long_about = None)]
9+
enum Cli {
10+
Extract(extractor::Options),
11+
Generate(generator::Options),
12+
Autobuild(autobuilder::Options),
13+
}
14+
15+
fn main() -> std::io::Result<()> {
16+
let cli = Cli::parse();
17+
18+
match cli {
19+
Cli::Extract(options) => extractor::run(options),
20+
Cli::Generate(options) => generator::run(options),
21+
Cli::Autobuild(options) => autobuilder::run(options),
22+
}
23+
}

0 commit comments

Comments
 (0)