Skip to content

Commit a6e55b2

Browse files
authored
Support included/excluded patterns in LocalFile (#133)
1 parent 91e08f7 commit a6e55b2

File tree

3 files changed

+43
-9
lines changed

3 files changed

+43
-9
lines changed

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,4 @@ tree-sitter-python = "0.23.6"
5656
tree-sitter-javascript = "0.23.1"
5757
tree-sitter-typescript = "0.23.2"
5858
tree-sitter-md = "0.3.2"
59+
globset = "0.4.16"

python/cocoindex/sources.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,11 @@ class LocalFile(op.SourceSpec):
88

99
path: str
1010
binary: bool = False
11+
12+
# If provided, only files matching these patterns will be included.
13+
# See https://docs.rs/globset/latest/globset/index.html for the syntax of the patterns.
14+
included_patterns: list[str] | None = None
15+
16+
# If provided, files matching these patterns will be excluded.
17+
# See https://docs.rs/globset/latest/globset/index.html for the syntax of the patterns.
18+
excluded_patterns: list[str] | None = None

src/ops/sources/local_file.rs

Lines changed: 34 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
use globset::{Glob, GlobSet, GlobSetBuilder};
12
use log::warn;
23
use std::{path::PathBuf, sync::Arc};
34

@@ -7,29 +8,43 @@ use crate::{fields_value, ops::sdk::*};
78
pub struct Spec {
89
path: String,
910
binary: bool,
11+
included_patterns: Option<Vec<String>>,
12+
excluded_patterns: Option<Vec<String>>,
1013
}
1114

1215
struct Executor {
1316
root_path_str: String,
1417
root_path: PathBuf,
1518
binary: bool,
19+
included_glob_set: Option<GlobSet>,
20+
excluded_glob_set: Option<GlobSet>,
1621
}
1722

1823
impl Executor {
1924
async fn traverse_dir(&self, dir_path: &PathBuf, result: &mut Vec<KeyValue>) -> Result<()> {
2025
for entry in std::fs::read_dir(dir_path)? {
2126
let entry = entry?;
2227
let path = entry.path();
23-
if path.is_dir() {
24-
Box::pin(self.traverse_dir(&path, result)).await?;
25-
} else {
26-
if let Some(file_name) = path.to_str() {
27-
result.push(KeyValue::Str(Arc::from(
28-
&file_name[self.root_path_str.len() + 1..],
29-
)));
30-
} else {
31-
warn!("Skipped ill-formed file path: {}", path.display());
28+
if let Some(file_name) = path.to_str() {
29+
let relative_path = &file_name[self.root_path_str.len() + 1..];
30+
if self
31+
.excluded_glob_set
32+
.as_ref()
33+
.map_or(false, |glob_set| glob_set.is_match(relative_path))
34+
{
35+
continue;
3236
}
37+
if path.is_dir() {
38+
Box::pin(self.traverse_dir(&path, result)).await?;
39+
} else if self
40+
.included_glob_set
41+
.as_ref()
42+
.map_or(true, |glob_set| glob_set.is_match(relative_path))
43+
{
44+
result.push(KeyValue::Str(Arc::from(relative_path)));
45+
}
46+
} else {
47+
warn!("Skipped ill-formed file path: {}", path.display());
3348
}
3449
}
3550
Ok(())
@@ -102,6 +117,16 @@ impl SourceFactoryBase for Factory {
102117
root_path_str: spec.path.clone(),
103118
root_path: PathBuf::from(spec.path),
104119
binary: spec.binary,
120+
included_glob_set: spec.included_patterns.map(build_glob_set).transpose()?,
121+
excluded_glob_set: spec.excluded_patterns.map(build_glob_set).transpose()?,
105122
}))
106123
}
107124
}
125+
126+
fn build_glob_set(patterns: Vec<String>) -> Result<GlobSet> {
127+
let mut builder = GlobSetBuilder::new();
128+
for pattern in patterns {
129+
builder.add(Glob::new(pattern.as_str())?);
130+
}
131+
Ok(builder.build()?)
132+
}

0 commit comments

Comments
 (0)