Skip to content

Commit 3680613

Browse files
committed
Shared: Restrict extractor file globs to filenames
1 parent cc7ef5d commit 3680613

File tree

1 file changed

+38
-28
lines changed
  • shared/tree-sitter-extractor/src/extractor

1 file changed

+38
-28
lines changed

shared/tree-sitter-extractor/src/extractor/simple.rs

Lines changed: 38 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
use crate::trap;
2-
use globset::{Glob, GlobSetBuilder};
2+
use globset::{GlobBuilder, GlobSetBuilder};
33
use rayon::prelude::*;
44
use std::fs::File;
55
use std::io::BufRead;
@@ -89,7 +89,10 @@ impl Extractor {
8989
let mut glob_lang_mapping = vec![];
9090
for (i, lang) in self.languages.iter().enumerate() {
9191
for glob_str in &lang.file_globs {
92-
let glob = Glob::new(glob_str).expect("invalid glob");
92+
let glob = GlobBuilder::new(glob_str)
93+
.literal_separator(true)
94+
.build()
95+
.expect("invalid glob");
9396
builder.add(glob);
9497
glob_lang_mapping.push(i);
9598
}
@@ -114,33 +117,40 @@ impl Extractor {
114117
let source = std::fs::read(&path)?;
115118
let mut trap_writer = trap::Writer::new();
116119

117-
let matches = globset.matches(&path);
118-
if matches.is_empty() {
119-
tracing::error!(?path, "No matching language found, skipping file.");
120-
} else {
121-
let mut languages_processed = vec![false; self.languages.len()];
122-
123-
for m in matches {
124-
let i = glob_language_mapping[m];
125-
if languages_processed[i] {
126-
continue;
120+
match path.file_name() {
121+
None => {
122+
tracing::error!(?path, "No file name found, skipping file.");
123+
}
124+
Some(filename) => {
125+
let matches = globset.matches(&filename);
126+
if matches.is_empty() {
127+
tracing::error!(?path, "No matching language found, skipping file.");
128+
} else {
129+
let mut languages_processed = vec![false; self.languages.len()];
130+
131+
for m in matches {
132+
let i = glob_language_mapping[m];
133+
if languages_processed[i] {
134+
continue;
135+
}
136+
languages_processed[i] = true;
137+
let lang = &self.languages[i];
138+
139+
crate::extractor::extract(
140+
lang.ts_language,
141+
lang.prefix,
142+
&schemas[i],
143+
&mut diagnostics_writer,
144+
&mut trap_writer,
145+
&path,
146+
&source,
147+
&[],
148+
);
149+
std::fs::create_dir_all(src_archive_file.parent().unwrap())?;
150+
std::fs::copy(&path, &src_archive_file)?;
151+
write_trap(&self.trap_dir, &path, &trap_writer, trap_compression)?;
152+
}
127153
}
128-
languages_processed[i] = true;
129-
let lang = &self.languages[i];
130-
131-
crate::extractor::extract(
132-
lang.ts_language,
133-
lang.prefix,
134-
&schemas[i],
135-
&mut diagnostics_writer,
136-
&mut trap_writer,
137-
&path,
138-
&source,
139-
&[],
140-
);
141-
std::fs::create_dir_all(src_archive_file.parent().unwrap())?;
142-
std::fs::copy(&path, &src_archive_file)?;
143-
write_trap(&self.trap_dir, &path, &trap_writer, trap_compression)?;
144154
}
145155
}
146156
Ok(()) as std::io::Result<()>

0 commit comments

Comments
 (0)