Skip to content
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
- Add `--quiet-empty` (`-E`) flag to suppress output when input is empty. Closes #1936, see #3563 (@NORMAL-EX)
- Improve native man pages and command help syntax highlighting by stripping overstriking, see #3517 (@akirk)
- Add `--fallback-syntax`/`--fallback-language` to apply syntax highlighting only when auto-detection fails, see #1341 (@Xavrir)
- Map `BUILD` case sensitively to Python (Starlark) for Bazel, see #3576 (@vorburger)

## Bugfixes
- Fix `BAT_CONFIG_DIR` pointing at system config directory causing duplicate flag errors. Closes #3589, see #3620 (@Xavrir)
Expand Down
40 changes: 35 additions & 5 deletions build/syntax_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,13 @@ impl ToTokens for MappingTarget {
/// A single matcher.
///
/// Codegen converts this into a `Lazy<Option<GlobMatcher>>`.
struct Matcher(Vec<MatcherSegment>);
struct Matcher {
segments: Vec<MatcherSegment>,
/// Whether the glob pattern should be matched case-insensitively.
///
/// Defaults to `true` (case-insensitive) for backwards compatibility.
case_insensitive: bool,
}
/// Parse a matcher.
///
/// Note that this implementation is rather strict: it will greedily interpret
Expand Down Expand Up @@ -116,18 +122,24 @@ impl FromStr for Matcher {
bail!(r#"Invalid matcher: "{s}""#);
}

Ok(Self(non_empty_segments))
Ok(Self {
segments: non_empty_segments,
case_insensitive: true,
})
}
}
impl ToTokens for Matcher {
fn to_tokens(&self, tokens: &mut TokenStream) {
let t = match self.0.as_slice() {
let case_insensitive = self.case_insensitive;
let t = match self.segments.as_slice() {
[] => unreachable!("0-length matcher should never be created"),
[MatcherSegment::Text(text)] => {
quote! { Lazy::new(|| Some(build_matcher_fixed(#text))) }
quote! { Lazy::new(|| Some(build_matcher_fixed(#text, #case_insensitive))) }
}
// parser logic ensures that this case can only happen when there are dynamic segments
segs @ [_, ..] => quote! { Lazy::new(|| build_matcher_dynamic(&[ #(#segs),* ])) },
segs @ [_, ..] => {
quote! { Lazy::new(|| build_matcher_dynamic(&[ #(#segs),* ], #case_insensitive)) }
}
};
tokens.append_all(t);
}
Expand Down Expand Up @@ -175,7 +187,12 @@ impl MatcherSegment {
/// A struct that models a single .toml file in /src/syntax_mapping/builtins/.
#[derive(Clone, Debug, Deserialize)]
struct MappingDefModel {
#[serde(default)]
mappings: IndexMap<MappingTarget, Vec<Matcher>>,
/// Case-sensitive mappings. Unlike `mappings`, these glob patterns are
/// matched case-sensitively.
#[serde(default)]
case_sensitive_mappings: IndexMap<MappingTarget, Vec<Matcher>>,
}
impl MappingDefModel {
fn into_mapping_list(self) -> MappingList {
Expand All @@ -188,6 +205,19 @@ impl MappingDefModel {
.map(|matcher| (matcher, target.clone()))
.collect::<Vec<_>>()
})
.chain(
self.case_sensitive_mappings
.into_iter()
.flat_map(|(target, matchers)| {
matchers
.into_iter()
.map(|mut matcher| {
matcher.case_insensitive = false;
(matcher, target.clone())
})
.collect::<Vec<_>>()
}),
)
.collect();
MappingList(list)
}
Expand Down
50 changes: 47 additions & 3 deletions src/syntax_mapping.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@ use ignored_suffixes::IgnoredSuffixes;
mod builtin;
pub mod ignored_suffixes;

fn make_glob_matcher(from: &str) -> Result<GlobMatcher> {
fn make_glob_matcher(from: &str, case_insensitive: bool) -> Result<GlobMatcher> {
let matcher = GlobBuilder::new(from)
.case_insensitive(true)
.case_insensitive(case_insensitive)
.literal_separator(true)
.build()?
.compile_matcher();
Expand Down Expand Up @@ -97,7 +97,14 @@ impl<'a> SyntaxMapping<'a> {
}

pub fn insert(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
let matcher = make_glob_matcher(from)?;
let matcher = make_glob_matcher(from, true)?;
self.custom_mappings.push((matcher, to));
Ok(())
}

/// Like [`Self::insert`], but the glob pattern is matched case-sensitively.
pub fn insert_case_sensitive(&mut self, from: &str, to: MappingTarget<'a>) -> Result<()> {
let matcher = make_glob_matcher(from, false)?;
self.custom_mappings.push((matcher, to));
Ok(())
}
Expand Down Expand Up @@ -261,4 +268,41 @@ mod tests {
Some(MappingTarget::MapTo("alpha"))
);
}

#[test]
fn case_sensitive_custom_mappings_work() {
let mut map = SyntaxMapping::new();
map.insert_case_sensitive("MY_SPECIAL_FILE", MappingTarget::MapTo("Python"))
.ok();

// Exact case matches
assert_eq!(
map.get_syntax_for("/path/to/MY_SPECIAL_FILE"),
Some(MappingTarget::MapTo("Python"))
);
// Different case should NOT match the case-sensitive rule
assert_eq!(map.get_syntax_for("/path/to/my_special_file"), None);
assert_eq!(map.get_syntax_for("/path/to/My_Special_File"), None);
}

#[test]
fn builtin_mappings_build_is_case_sensitive() {
let map = SyntaxMapping::new();

// "BUILD" (uppercase) should map to Python via case-sensitive builtin
assert_eq!(
map.get_syntax_for("/path/to/BUILD"),
Some(MappingTarget::MapTo("Python"))
);
// "build" (lowercase) should still map to MapToUnknown
assert_eq!(
map.get_syntax_for("/path/to/build"),
Some(MappingTarget::MapToUnknown)
);
// Mixed case should NOT match the Python rule
assert_eq!(
map.get_syntax_for("/path/to/Build"),
Some(MappingTarget::MapToUnknown)
);
}
}
9 changes: 5 additions & 4 deletions src/syntax_mapping/builtin.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,9 @@ include!(concat!(
/// A failure to compile is a fatal error.
///
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
fn build_matcher_fixed(from: &str) -> GlobMatcher {
make_glob_matcher(from).expect("A builtin fixed glob matcher failed to compile")
fn build_matcher_fixed(from: &str, case_insensitive: bool) -> GlobMatcher {
make_glob_matcher(from, case_insensitive)
.expect("A builtin fixed glob matcher failed to compile")
}

/// Join a list of matcher segments to create a glob string, replacing all
Expand All @@ -64,7 +65,7 @@ fn build_matcher_fixed(from: &str) -> GlobMatcher {
/// to compile.
///
/// Used internally by `Lazy<Option<GlobMatcher>>`'s lazy evaluation closure.
fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> {
fn build_matcher_dynamic(segs: &[MatcherSegment], case_insensitive: bool) -> Option<GlobMatcher> {
// join segments
let mut buf = String::new();
for seg in segs {
Expand All @@ -77,7 +78,7 @@ fn build_matcher_dynamic(segs: &[MatcherSegment]) -> Option<GlobMatcher> {
}
}
// compile glob matcher
let matcher = make_glob_matcher(&buf).ok()?;
let matcher = make_glob_matcher(&buf, case_insensitive).ok()?;
Some(matcher)
}

Expand Down
6 changes: 3 additions & 3 deletions src/syntax_mapping/builtins/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,9 @@ syntax mappings defined by all TOML files, and embed them into the binary.

## File syntax

Each TOML file should contain a single section named `mappings`, with each of
its keys being a language identifier (first column of `bat -L`; also referred to
as "target").
Each TOML file should contain a single section named `mappings` and/or a single
section named `case_sensitive_mappings`, with each of its keys being a language
identifier (first column of `bat -L`; also referred to as "target").

The value of each key should be an array of strings, with each item being a glob
matcher. We will call each of these items a "rule".
Expand Down
2 changes: 2 additions & 0 deletions src/syntax_mapping/builtins/common/50-bazel.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[case_sensitive_mappings]
"Python" = ["BUILD"]
Copy link

Copilot AI Feb 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This change will break the existing test 'syntax_detection_first_line' in src/assets.rs:540-541. The test expects files named "build" (lowercase) with a bash shebang to use first-line fallback detection and be identified as "Bourne Again Shell (bash)". However, with this mapping, "build" will match the case-insensitive "BUILD" pattern and be mapped to Python, preventing first-line fallback and causing the test to fail.

Suggested change
"Python" = ["BUILD"]
# Intentionally no mapping for bare "BUILD" to avoid conflicting with
# first-line fallback detection for lowercase "build" files.

Copilot uses AI. Check for mistakes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no longer relevant after case sensitive matching was added in 56fe0fa

Copy link

Copilot AI Feb 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No test coverage has been added for the new BUILD file mapping. Consider adding a test case to src/assets.rs in the syntax_detection_basic test function, similar to the existing tests for other filename mappings like PKGBUILD and Makefile. For example: assert_eq!(test.syntax_for_file("BUILD"), "Python");

Copilot uses AI. Check for mistakes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

added test in 56fe0fa

Copy link

Copilot AI Feb 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The pattern "BUILD" will match files named "build" (lowercase) due to case-insensitive glob matching (see src/syntax_mapping.rs:22). This overrides the existing mapping in 99-unset-ambiguous-filenames.toml that intentionally maps "build" to MapToUnknown to prevent incorrect syntax detection for NAnt Build Files. This breaks existing behavior.

Since Bazel BUILD files are typically uppercase, consider either:

  1. Finding a way to make this mapping case-sensitive (would require code changes to the make_glob_matcher function)
  2. Documenting this as a known limitation and accepting that lowercase "build" files will now be treated as Python
  3. Not adding this mapping and relying on users to configure it manually via --map-syntax if needed

The issue noted in 99-unset-ambiguous-filenames.toml is that "NAnt Build File" should only match *.build files (with extension), not files named "build" (no extension). This PR's case-insensitive "BUILD" pattern will cause lowercase "build" files to be mapped to Python instead of remaining as MapToUnknown.

Suggested change
"Python" = ["BUILD"]

Copilot uses AI. Check for mistakes.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

fixed in 56fe0fa