Skip to content

Commit da9a49d

Browse files
committed
QL: Use high level extractor API
1 parent c4d7658 commit da9a49d

File tree

1 file changed

+40
-216
lines changed

1 file changed

+40
-216
lines changed

ql/extractor/src/extractor.rs

Lines changed: 40 additions & 216 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
use clap::Args;
2-
use rayon::prelude::*;
3-
use std::fs;
4-
use std::io::BufRead;
5-
use std::path::{Path, PathBuf};
2+
use std::path::PathBuf;
63

7-
use codeql_extractor::{diagnostics, extractor, node_types, trap};
4+
use codeql_extractor::extractor::simple;
85

96
#[derive(Args)]
107
pub struct Options {
@@ -29,217 +26,44 @@ pub fn run(options: Options) -> std::io::Result<()> {
2926
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
3027
.init();
3128

32-
let diagnostics = diagnostics::DiagnosticLoggers::new("ql");
33-
let mut main_thread_logger = diagnostics.logger();
34-
let num_threads = match codeql_extractor::options::num_threads() {
35-
Ok(num) => num,
36-
Err(e) => {
37-
main_thread_logger.write(
38-
main_thread_logger
39-
.new_entry("configuration-error", "Configuration error")
40-
.message(
41-
"{}; defaulting to 1 thread.",
42-
&[diagnostics::MessageArg::Code(&e)],
43-
)
44-
.severity(diagnostics::Severity::Warning),
45-
);
46-
1
47-
}
48-
};
49-
tracing::info!(
50-
"Using {} {}",
51-
num_threads,
52-
if num_threads == 1 {
53-
"thread"
54-
} else {
55-
"threads"
56-
}
57-
);
58-
let trap_compression = match trap::Compression::from_env("CODEQL_QL_TRAP_COMPRESSION") {
59-
Ok(x) => x,
60-
Err(e) => {
61-
main_thread_logger.write(
62-
main_thread_logger
63-
.new_entry("configuration-error", "Configuration error")
64-
.message("{}; using gzip.", &[diagnostics::MessageArg::Code(&e)])
65-
.severity(diagnostics::Severity::Warning),
66-
);
67-
trap::Compression::Gzip
68-
}
29+
let extractor = simple::Extractor {
30+
prefix: "ql".to_string(),
31+
languages: vec![
32+
simple::LanguageSpec {
33+
prefix: "ql",
34+
ts_language: tree_sitter_ql::language(),
35+
node_types: tree_sitter_ql::NODE_TYPES,
36+
file_extensions: vec!["ql".into(), "qll".into()],
37+
},
38+
simple::LanguageSpec {
39+
prefix: "dbscheme",
40+
ts_language: tree_sitter_ql_dbscheme::language(),
41+
node_types: tree_sitter_ql_dbscheme::NODE_TYPES,
42+
file_extensions: vec!["dbscheme".into()],
43+
},
44+
simple::LanguageSpec {
45+
prefix: "yaml",
46+
ts_language: tree_sitter_ql_yaml::language(),
47+
node_types: tree_sitter_ql_yaml::NODE_TYPES,
48+
file_extensions: vec!["yml".into()],
49+
},
50+
simple::LanguageSpec {
51+
prefix: "json",
52+
ts_language: tree_sitter_json::language(),
53+
node_types: tree_sitter_json::NODE_TYPES,
54+
file_extensions: vec!["json".into(), "jsonl".into(), "jsonc".into()],
55+
},
56+
simple::LanguageSpec {
57+
prefix: "blame",
58+
ts_language: tree_sitter_blame::language(),
59+
node_types: tree_sitter_blame::NODE_TYPES,
60+
file_extensions: vec!["blame".into()],
61+
},
62+
],
63+
trap_dir: options.output_dir,
64+
source_archive_dir: options.source_archive_dir,
65+
file_list: options.file_list,
6966
};
70-
drop(main_thread_logger);
71-
72-
rayon::ThreadPoolBuilder::new()
73-
.num_threads(num_threads)
74-
.build_global()
75-
.unwrap();
76-
77-
let trap_dir = options.output_dir;
78-
let file_list = fs::File::open(options.file_list)?;
79-
let source_archive_dir = options.source_archive_dir;
8067

81-
let language = tree_sitter_ql::language();
82-
let dbscheme = tree_sitter_ql_dbscheme::language();
83-
let yaml = tree_sitter_ql_yaml::language();
84-
let blame = tree_sitter_blame::language();
85-
let json = tree_sitter_json::language();
86-
let schema = node_types::read_node_types_str("ql", tree_sitter_ql::NODE_TYPES)?;
87-
let dbscheme_schema =
88-
node_types::read_node_types_str("dbscheme", tree_sitter_ql_dbscheme::NODE_TYPES)?;
89-
let yaml_schema = node_types::read_node_types_str("yaml", tree_sitter_ql_yaml::NODE_TYPES)?;
90-
let blame_schema = node_types::read_node_types_str("blame", tree_sitter_blame::NODE_TYPES)?;
91-
let json_schema = node_types::read_node_types_str("json", tree_sitter_json::NODE_TYPES)?;
92-
93-
let lines: std::io::Result<Vec<String>> = std::io::BufReader::new(file_list).lines().collect();
94-
let lines = lines?;
95-
lines
96-
.par_iter()
97-
.try_for_each(|line| {
98-
// only consider files that end with .ql/.qll/.dbscheme/qlpack.yml
99-
// TODO: This is a bad fix, wait for the post-merge discussion in https://github.com/github/codeql/pull/7444 to be resolved
100-
if !line.ends_with(".ql")
101-
&& !line.ends_with(".qll")
102-
&& !line.ends_with(".dbscheme")
103-
&& !line.ends_with("qlpack.yml")
104-
&& !line.ends_with(".blame")
105-
&& !line.ends_with(".json")
106-
&& !line.ends_with(".jsonl")
107-
&& !line.ends_with(".jsonc")
108-
{
109-
return Ok(());
110-
}
111-
let path = PathBuf::from(line).canonicalize()?;
112-
let src_archive_file = path_for(&source_archive_dir, &path, "");
113-
let source = std::fs::read(&path)?;
114-
let code_ranges = vec![];
115-
let mut trap_writer = trap::Writer::new();
116-
let mut diagnostics_writer = diagnostics.logger();
117-
if line.ends_with(".dbscheme") {
118-
extractor::extract(
119-
dbscheme,
120-
"dbscheme",
121-
&dbscheme_schema,
122-
&mut diagnostics_writer,
123-
&mut trap_writer,
124-
&path,
125-
&source,
126-
&code_ranges,
127-
)
128-
} else if line.ends_with("qlpack.yml") {
129-
extractor::extract(
130-
yaml,
131-
"yaml",
132-
&yaml_schema,
133-
&mut diagnostics_writer,
134-
&mut trap_writer,
135-
&path,
136-
&source,
137-
&code_ranges,
138-
)
139-
} else if line.ends_with(".json")
140-
|| line.ends_with(".jsonl")
141-
|| line.ends_with(".jsonc")
142-
{
143-
extractor::extract(
144-
json,
145-
"json",
146-
&json_schema,
147-
&mut diagnostics_writer,
148-
&mut trap_writer,
149-
&path,
150-
&source,
151-
&code_ranges,
152-
)
153-
} else if line.ends_with(".blame") {
154-
extractor::extract(
155-
blame,
156-
"blame",
157-
&blame_schema,
158-
&mut diagnostics_writer,
159-
&mut trap_writer,
160-
&path,
161-
&source,
162-
&code_ranges,
163-
)
164-
} else {
165-
extractor::extract(
166-
language,
167-
"ql",
168-
&schema,
169-
&mut diagnostics_writer,
170-
&mut trap_writer,
171-
&path,
172-
&source,
173-
&code_ranges,
174-
)
175-
}
176-
std::fs::create_dir_all(src_archive_file.parent().unwrap())?;
177-
std::fs::copy(&path, &src_archive_file)?;
178-
write_trap(&trap_dir, path, &trap_writer, trap_compression)
179-
})
180-
.expect("failed to extract files");
181-
182-
let path = PathBuf::from("extras");
183-
let mut trap_writer = trap::Writer::new();
184-
extractor::populate_empty_location(&mut trap_writer);
185-
write_trap(&trap_dir, path, &trap_writer, trap_compression)
186-
}
187-
188-
fn write_trap(
189-
trap_dir: &Path,
190-
path: PathBuf,
191-
trap_writer: &trap::Writer,
192-
trap_compression: trap::Compression,
193-
) -> std::io::Result<()> {
194-
let trap_file = path_for(trap_dir, &path, trap_compression.extension());
195-
std::fs::create_dir_all(trap_file.parent().unwrap())?;
196-
trap_writer.write_to_file(&trap_file, trap_compression)
197-
}
198-
199-
fn path_for(dir: &Path, path: &Path, ext: &str) -> PathBuf {
200-
let mut result = PathBuf::from(dir);
201-
for component in path.components() {
202-
match component {
203-
std::path::Component::Prefix(prefix) => match prefix.kind() {
204-
std::path::Prefix::Disk(letter) | std::path::Prefix::VerbatimDisk(letter) => {
205-
result.push(format!("{}_", letter as char))
206-
}
207-
std::path::Prefix::Verbatim(x) | std::path::Prefix::DeviceNS(x) => {
208-
result.push(x);
209-
}
210-
std::path::Prefix::UNC(server, share)
211-
| std::path::Prefix::VerbatimUNC(server, share) => {
212-
result.push("unc");
213-
result.push(server);
214-
result.push(share);
215-
}
216-
},
217-
std::path::Component::RootDir => {
218-
// skip
219-
}
220-
std::path::Component::Normal(_) => {
221-
result.push(component);
222-
}
223-
std::path::Component::CurDir => {
224-
// skip
225-
}
226-
std::path::Component::ParentDir => {
227-
result.pop();
228-
}
229-
}
230-
}
231-
if !ext.is_empty() {
232-
match result.extension() {
233-
Some(x) => {
234-
let mut new_ext = x.to_os_string();
235-
new_ext.push(".");
236-
new_ext.push(ext);
237-
result.set_extension(new_ext);
238-
}
239-
None => {
240-
result.set_extension(ext);
241-
}
242-
}
243-
}
244-
result
68+
extractor.run()
24569
}

0 commit comments

Comments
 (0)