|
| 1 | +use crate::trap; |
| 2 | +use rayon::prelude::*; |
| 3 | +use std::collections::HashMap; |
| 4 | +use std::ffi::{OsStr, OsString}; |
| 5 | +use std::fs::File; |
| 6 | +use std::io::BufRead; |
| 7 | +use std::path::{Path, PathBuf}; |
| 8 | + |
| 9 | +use crate::diagnostics; |
| 10 | +use crate::node_types; |
| 11 | + |
| 12 | +pub struct LanguageSpec { |
| 13 | + pub prefix: &'static str, |
| 14 | + pub ts_language: tree_sitter::Language, |
| 15 | + pub node_types: &'static str, |
| 16 | + pub file_extensions: Vec<OsString>, |
| 17 | +} |
| 18 | + |
| 19 | +pub struct Extractor { |
| 20 | + pub prefix: String, |
| 21 | + pub languages: Vec<LanguageSpec>, |
| 22 | + pub trap_dir: PathBuf, |
| 23 | + pub source_archive_dir: PathBuf, |
| 24 | + pub file_list: PathBuf, |
| 25 | +} |
| 26 | + |
| 27 | +impl Extractor { |
| 28 | + pub fn run(&self) -> std::io::Result<()> { |
| 29 | + let diagnostics = diagnostics::DiagnosticLoggers::new(&self.prefix); |
| 30 | + let mut main_thread_logger = diagnostics.logger(); |
| 31 | + let num_threads = match crate::options::num_threads() { |
| 32 | + Ok(num) => num, |
| 33 | + Err(e) => { |
| 34 | + main_thread_logger.write( |
| 35 | + main_thread_logger |
| 36 | + .new_entry("configuration-error", "Configuration error") |
| 37 | + .message( |
| 38 | + "{}; defaulting to 1 thread.", |
| 39 | + &[diagnostics::MessageArg::Code(&e)], |
| 40 | + ) |
| 41 | + .severity(diagnostics::Severity::Warning), |
| 42 | + ); |
| 43 | + 1 |
| 44 | + } |
| 45 | + }; |
| 46 | + tracing::info!( |
| 47 | + "Using {} {}", |
| 48 | + num_threads, |
| 49 | + if num_threads == 1 { |
| 50 | + "thread" |
| 51 | + } else { |
| 52 | + "threads" |
| 53 | + } |
| 54 | + ); |
| 55 | + let trap_compression = match trap::Compression::from_env("CODEQL_QL_TRAP_COMPRESSION") { |
| 56 | + Ok(x) => x, |
| 57 | + Err(e) => { |
| 58 | + main_thread_logger.write( |
| 59 | + main_thread_logger |
| 60 | + .new_entry("configuration-error", "Configuration error") |
| 61 | + .message("{}; using gzip.", &[diagnostics::MessageArg::Code(&e)]) |
| 62 | + .severity(diagnostics::Severity::Warning), |
| 63 | + ); |
| 64 | + trap::Compression::Gzip |
| 65 | + } |
| 66 | + }; |
| 67 | + drop(main_thread_logger); |
| 68 | + |
| 69 | + rayon::ThreadPoolBuilder::new() |
| 70 | + .num_threads(num_threads) |
| 71 | + .build_global() |
| 72 | + .unwrap(); |
| 73 | + |
| 74 | + let file_list = File::open(&self.file_list)?; |
| 75 | + |
| 76 | + let mut schemas = vec![]; |
| 77 | + for lang in &self.languages { |
| 78 | + let schema = node_types::read_node_types_str(lang.prefix, lang.node_types)?; |
| 79 | + schemas.push(schema); |
| 80 | + } |
| 81 | + |
| 82 | + // Construct a map from file extension -> LanguageSpec |
| 83 | + let mut file_extension_language_mapping: HashMap<&OsStr, Vec<usize>> = HashMap::new(); |
| 84 | + for (i, lang) in self.languages.iter().enumerate() { |
| 85 | + for (j, _ext) in lang.file_extensions.iter().enumerate() { |
| 86 | + let indexes = file_extension_language_mapping |
| 87 | + .entry(&lang.file_extensions[j]) |
| 88 | + .or_default(); |
| 89 | + indexes.push(i); |
| 90 | + } |
| 91 | + } |
| 92 | + |
| 93 | + let lines: std::io::Result<Vec<String>> = |
| 94 | + std::io::BufReader::new(file_list).lines().collect(); |
| 95 | + let lines = lines?; |
| 96 | + |
| 97 | + lines |
| 98 | + .par_iter() |
| 99 | + .try_for_each(|line| { |
| 100 | + let mut diagnostics_writer = diagnostics.logger(); |
| 101 | + let path = PathBuf::from(line).canonicalize()?; |
| 102 | + let src_archive_file = |
| 103 | + crate::file_paths::path_for(&self.source_archive_dir, &path, ""); |
| 104 | + let source = std::fs::read(&path)?; |
| 105 | + let mut trap_writer = trap::Writer::new(); |
| 106 | + |
| 107 | + match path.extension() { |
| 108 | + None => { |
| 109 | + tracing::error!(?path, "No extension found, skipping file."); |
| 110 | + } |
| 111 | + Some(ext) => { |
| 112 | + if let Some(indexes) = file_extension_language_mapping.get(ext) { |
| 113 | + for i in indexes { |
| 114 | + let lang = &self.languages[*i]; |
| 115 | + crate::extractor::extract( |
| 116 | + lang.ts_language, |
| 117 | + "ruby", |
| 118 | + &schemas[*i], |
| 119 | + &mut diagnostics_writer, |
| 120 | + &mut trap_writer, |
| 121 | + &path, |
| 122 | + &source, |
| 123 | + &[], |
| 124 | + ); |
| 125 | + std::fs::create_dir_all(src_archive_file.parent().unwrap())?; |
| 126 | + std::fs::copy(&path, &src_archive_file)?; |
| 127 | + write_trap(&self.trap_dir, &path, &trap_writer, trap_compression)?; |
| 128 | + } |
| 129 | + } else { |
| 130 | + tracing::warn!(?path, "No language matches path, skipping file."); |
| 131 | + } |
| 132 | + } |
| 133 | + }; |
| 134 | + Ok(()) as std::io::Result<()> |
| 135 | + }) |
| 136 | + .expect("failed to extract files"); |
| 137 | + |
| 138 | + let path = PathBuf::from("extras"); |
| 139 | + let mut trap_writer = trap::Writer::new(); |
| 140 | + crate::extractor::populate_empty_location(&mut trap_writer); |
| 141 | + |
| 142 | + write_trap(&self.trap_dir, &path, &trap_writer, trap_compression) |
| 143 | + } |
| 144 | +} |
| 145 | + |
| 146 | +fn write_trap( |
| 147 | + trap_dir: &Path, |
| 148 | + path: &Path, |
| 149 | + trap_writer: &trap::Writer, |
| 150 | + trap_compression: trap::Compression, |
| 151 | +) -> std::io::Result<()> { |
| 152 | + let trap_file = crate::file_paths::path_for(trap_dir, path, trap_compression.extension()); |
| 153 | + std::fs::create_dir_all(trap_file.parent().unwrap())?; |
| 154 | + trap_writer.write_to_file(&trap_file, trap_compression) |
| 155 | +} |
0 commit comments