Skip to content

Commit c859605

Browse files
Merge branch 'main' into fix/bridge-direct-vs-transitive-47
2 parents 9b23566 + db0e1b7 commit c859605

2 files changed

Lines changed: 235 additions & 13 deletions

File tree

src/abi/PatternCompleteness.idr

Lines changed: 16 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,12 @@ module PanicAttack.ABI.PatternCompleteness
2323

2424
||| All 49 programming languages supported by the scanner.
2525
||| This MUST stay in sync with src/types.rs Language enum.
26+
||| Note: Cpp is the C/C++ unified analyzer (analyze_c_cpp dispatches
27+
||| on .c / .cpp / .h / .hpp / .cc / .hh — there is no separate C
28+
||| constructor in the Rust enum).
2629
public export
2730
data Lang
28-
= Rust | C | Cpp | Go | Java | Python | JavaScript | Ruby
31+
= Rust | Cpp | Go | Java | Python | JavaScript | Ruby
2932
-- BEAM family
3033
| Elixir | Erlang | Gleam
3134
-- ML family
@@ -44,6 +47,8 @@ data Lang
4447
| Nickel | Nix
4548
-- Scripting / data
4649
| Shell | Julia | Lua
50+
-- HPC / parallel
51+
| Chapel
4752
-- Nextgen custom DSLs
4853
| WokeLang | Eclexia | MyLang | JuliaTheViper | Oblibeny
4954
| Anvomidav | AffineScript | Ephapax | BetLang | ErrorLang
@@ -70,7 +75,6 @@ data HasAnalyzer : Lang -> Type where
7075
public export
7176
analyzerFor : (lang : Lang) -> HasAnalyzer lang
7277
analyzerFor Rust = SpecificAnalyzer
73-
analyzerFor C = SpecificAnalyzer
7478
analyzerFor Cpp = SpecificAnalyzer
7579
analyzerFor Go = SpecificAnalyzer
7680
analyzerFor Java = SpecificAnalyzer
@@ -106,6 +110,7 @@ analyzerFor Nix = SpecificAnalyzer
106110
analyzerFor Shell = SpecificAnalyzer
107111
analyzerFor Julia = SpecificAnalyzer
108112
analyzerFor Lua = SpecificAnalyzer
113+
analyzerFor Chapel = SpecificAnalyzer
109114
analyzerFor WokeLang = SpecificAnalyzer
110115
analyzerFor Eclexia = SpecificAnalyzer
111116
analyzerFor MyLang = SpecificAnalyzer
@@ -206,24 +211,24 @@ data HasDetector : WPCategory -> Type where
206211
||| matching code in analyzer.rs.
207212
public export
208213
detectorsFor : (cat : WPCategory) -> HasDetector cat
209-
detectorsFor UncheckedAllocation = DetectedBy [C, Cpp]
210-
detectorsFor UnboundedLoop = DetectedBy [Rust, C, Cpp, Go, Python, JavaScript]
214+
detectorsFor UncheckedAllocation = DetectedBy [Cpp]
215+
detectorsFor UnboundedLoop = DetectedBy [Rust, Cpp, Go, Python, JavaScript]
211216
detectorsFor BlockingIO = DetectedBy [Rust, Go, Python, JavaScript]
212-
detectorsFor UnsafeCode = DetectedBy [Rust, C, Cpp, Zig, Nim, DLang]
217+
detectorsFor UnsafeCode = DetectedBy [Rust, Cpp, Zig, Nim, DLang]
213218
detectorsFor PanicPath = DetectedBy [Rust, Go, Haskell]
214-
detectorsFor RaceCondition = DetectedBy [Rust, C, Cpp, Go]
215-
detectorsFor DeadlockPotential = DetectedBy [Rust, C, Cpp, Go]
216-
detectorsFor ResourceLeak = DetectedBy [Rust, C, Cpp]
219+
detectorsFor RaceCondition = DetectedBy [Rust, Cpp, Go]
220+
detectorsFor DeadlockPotential = DetectedBy [Rust, Cpp, Go]
221+
detectorsFor ResourceLeak = DetectedBy [Rust, Cpp]
217222
detectorsFor CommandInjection = DetectedBy [Python, Ruby, JavaScript, Shell, Elixir, Erlang]
218223
detectorsFor UnsafeDeserialization = DetectedBy [Python, Ruby, JavaScript, Java]
219224
detectorsFor DynamicCodeExecution = DetectedBy [Python, JavaScript, Ruby, Elixir, Shell]
220225
detectorsFor UnsafeFFI = DetectedBy [Elixir, Erlang, Haskell, Pony, OCaml, Zig]
221226
detectorsFor AtomExhaustion = DetectedBy [Elixir, Erlang]
222-
detectorsFor InsecureProtocol = DetectedBy [Rust, C, Cpp, Go, Python, JavaScript, Ruby]
227+
detectorsFor InsecureProtocol = DetectedBy [Rust, Cpp, Go, Python, JavaScript, Ruby]
223228
detectorsFor ExcessivePermissions = DetectedBy [Shell]
224229
detectorsFor PathTraversal = DetectedBy [Python, JavaScript, Ruby, Go, Java]
225-
detectorsFor HardcodedSecret = DetectedBy [Rust, C, Cpp, Go, Python, JavaScript, Ruby]
226-
detectorsFor UncheckedError = DetectedBy [Go, Rust, C, Cpp]
230+
detectorsFor HardcodedSecret = DetectedBy [Rust, Cpp, Go, Python, JavaScript, Ruby]
231+
detectorsFor UncheckedError = DetectedBy [Go, Rust, Cpp]
227232
detectorsFor InfiniteRecursion = DetectedBy [Haskell, PureScript, Scheme, Racket]
228233
detectorsFor UnsafeTypeCoercion = DetectedBy [OCaml, Haskell, DLang, Nim]
229234
detectorsFor ProofDrift = DetectedBy [Idris, Lean, Agda, Isabelle, Coq, Julia]

src/assail/analyzer.rs

Lines changed: 219 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3581,8 +3581,13 @@ impl Analyzer {
35813581
weak_points: &mut Vec<WeakPoint>,
35823582
file_path: &str,
35833583
) -> Result<()> {
3584-
// Isabelle uses '(* *)' block comments only — no line comments.
3585-
let code = strip_proof_comments(content, "", Some(("(*", "*)")));
3584+
// Isabelle uses '(* *)' block comments AND `text \<open>...\<close>`
3585+
// prose declarations AND `@{text ...}` antiquotations. All three must
3586+
// be stripped before tactic-keyword counting: docstrings discussing
3587+
// "zero `sorry`" or "all `oops` placeholders" are valid prose, not
3588+
// proof-escape hatches. See #43.
3589+
let without_blocks = strip_proof_comments(content, "", Some(("(*", "*)")));
3590+
let code = strip_isabelle_prose(&without_blocks);
35863591
// sorry — Isabelle's admitted-proof escape hatch (banned estate-wide)
35873592
let sorry_count = code.matches("sorry").count();
35883593
if sorry_count > 0 {
@@ -5526,6 +5531,133 @@ fn strip_proof_comments(content: &str, line_marker: &str, block: Option<(&str, &
55265531
out
55275532
}
55285533

5534+
/// Strip Isabelle prose constructs that are documentation, not proof code:
5535+
///
5536+
/// 1. `@{text ...}` antiquotations — used inside prose blocks AND inside
5537+
/// `(* ... *)` comments to refer to proof tactics by name. Brace-balanced.
5538+
/// 2. Prose-block cartouches: `chapter`, `section`, `subsection`,
5539+
/// `subsubsection`, `paragraph`, and `text` followed by a
5540+
/// `\<open>...\<close>` cartouche. Cartouches nest, so depth-count.
5541+
///
5542+
/// `(* ... *)` comments must be stripped separately via `strip_proof_comments`.
5543+
/// The order is significant: strip `(* *)` first, then this function, because
5544+
/// a `\<open>...\<close>` cartouche inside a `(* *)` comment is already gone.
5545+
///
5546+
/// Conservative: only strips cartouches that follow a prose-block keyword.
5547+
/// Cartouches used as string literals inside tactics (rare) are kept.
5548+
fn strip_isabelle_prose(content: &str) -> String {
5549+
const OPEN: &str = "\\<open>";
5550+
const CLOSE: &str = "\\<close>";
5551+
const PROSE_KEYWORDS: [&str; 6] = [
5552+
"chapter",
5553+
"subsubsection",
5554+
"subsection",
5555+
"section",
5556+
"paragraph",
5557+
"text",
5558+
];
5559+
5560+
let mut out = String::with_capacity(content.len());
5561+
let mut rest = content;
5562+
5563+
loop {
5564+
let aq_pos = rest.find("@{text");
5565+
let mut prose_pos: Option<(usize, usize)> = None;
5566+
for kw in &PROSE_KEYWORDS {
5567+
if let Some(found) = find_prose_block(rest, kw, OPEN) {
5568+
prose_pos = match prose_pos {
5569+
Some((existing, _)) if existing <= found.0 => prose_pos,
5570+
_ => Some(found),
5571+
};
5572+
}
5573+
}
5574+
5575+
let next = match (aq_pos, prose_pos) {
5576+
(None, None) => {
5577+
out.push_str(rest);
5578+
return out;
5579+
}
5580+
(Some(a), None) => (a, skip_antiquotation_end(rest, a)),
5581+
(None, Some((p_start, p_after_open))) => {
5582+
(p_start, skip_cartouche_end(rest, p_after_open, OPEN, CLOSE))
5583+
}
5584+
(Some(a), Some((p_start, p_after_open))) => {
5585+
if a <= p_start {
5586+
(a, skip_antiquotation_end(rest, a))
5587+
} else {
5588+
(p_start, skip_cartouche_end(rest, p_after_open, OPEN, CLOSE))
5589+
}
5590+
}
5591+
};
5592+
5593+
out.push_str(&rest[..next.0]);
5594+
out.push(' ');
5595+
rest = &rest[next.1..];
5596+
}
5597+
}
5598+
5599+
/// Find `keyword` (preceded by start-of-string or whitespace) followed by
5600+
/// optional whitespace + `open_marker`. Returns `(keyword_start, after_open)`.
5601+
fn find_prose_block(haystack: &str, keyword: &str, open_marker: &str) -> Option<(usize, usize)> {
5602+
let mut search_from = 0;
5603+
while let Some(rel) = haystack[search_from..].find(keyword) {
5604+
let abs = search_from + rel;
5605+
let preceded_ok = abs == 0
5606+
|| haystack[..abs]
5607+
.chars()
5608+
.last()
5609+
.is_some_and(|c| c.is_whitespace());
5610+
let after_kw = abs + keyword.len();
5611+
let after_ws: usize = haystack[after_kw..]
5612+
.chars()
5613+
.take_while(|c| c.is_whitespace())
5614+
.map(char::len_utf8)
5615+
.sum();
5616+
let open_at = after_kw + after_ws;
5617+
if preceded_ok && haystack[open_at..].starts_with(open_marker) {
5618+
return Some((abs, open_at + open_marker.len()));
5619+
}
5620+
search_from = abs + keyword.len();
5621+
}
5622+
None
5623+
}
5624+
5625+
/// Given an `@{text` start at `start`, find the byte just past the matching `}`.
5626+
fn skip_antiquotation_end(haystack: &str, start: usize) -> usize {
5627+
let after = start + "@{text".len();
5628+
let bytes = haystack.as_bytes();
5629+
let mut depth: i32 = 1;
5630+
let mut j = after;
5631+
while j < bytes.len() && depth > 0 {
5632+
match bytes[j] {
5633+
b'{' => depth += 1,
5634+
b'}' => depth -= 1,
5635+
_ => {}
5636+
}
5637+
j += 1;
5638+
}
5639+
j
5640+
}
5641+
5642+
/// Given a position just past the first `\<open>` (depth = 1), scan forward and
5643+
/// return the byte index just past the matching `\<close>`. Nesting-aware.
5644+
fn skip_cartouche_end(haystack: &str, after_open: usize, open: &str, close: &str) -> usize {
5645+
let mut depth: i32 = 1;
5646+
let mut j = after_open;
5647+
while j < haystack.len() && depth > 0 {
5648+
if haystack[j..].starts_with(open) {
5649+
depth += 1;
5650+
j += open.len();
5651+
} else if haystack[j..].starts_with(close) {
5652+
depth -= 1;
5653+
j += close.len();
5654+
} else {
5655+
j += 1;
5656+
}
5657+
}
5658+
j
5659+
}
5660+
55295661
/// Count Rocq `Axiom`/`Parameter` declarations that represent genuine
55305662
/// unverified postulates, excluding the two legitimate scaffold shapes:
55315663
///
@@ -5737,6 +5869,91 @@ mod tests {
57375869
use std::fs;
57385870
use tempfile::TempDir;
57395871

5872+
// ---------------------------------------------------------------
5873+
// 0. Isabelle prose-stripping (regression for #43 false positives)
5874+
// ---------------------------------------------------------------
5875+
5876+
#[test]
5877+
fn isabelle_strip_text_antiquotation_inside_block_comment() {
5878+
// Tropical_Kleene.thy:663 — sorry appears only in `@{text sorry}` antiquotation
5879+
let stripped = strip_isabelle_prose("(* All proofs are complete — zero @{text sorry}. *)");
5880+
// Outer (* *) is stripped separately; this helper handles the antiquotation
5881+
assert!(
5882+
!stripped.contains("sorry"),
5883+
"@{{text sorry}} should be elided, got: {stripped:?}"
5884+
);
5885+
}
5886+
5887+
#[test]
5888+
fn isabelle_strip_cartouche_after_text_keyword() {
5889+
// Tropical_CNO.thy:29 — text \<open>All sorry placeholders have been closed.\<close>
5890+
let src = "text \\<open>All sorry placeholders have been closed.\\<close>\nlemma foo: True by simp";
5891+
let stripped = strip_isabelle_prose(src);
5892+
assert!(!stripped.contains("sorry"), "got: {stripped:?}");
5893+
assert!(stripped.contains("lemma foo"));
5894+
}
5895+
5896+
#[test]
5897+
fn isabelle_strip_section_cartouche() {
5898+
let src = "section \\<open>oops as a tactic\\<close>\nlemma bar: True by simp";
5899+
let stripped = strip_isabelle_prose(src);
5900+
assert!(!stripped.contains("oops"));
5901+
assert!(stripped.contains("lemma bar"));
5902+
}
5903+
5904+
#[test]
5905+
fn isabelle_strip_subsection_subsubsection_paragraph_chapter() {
5906+
for kw in ["chapter", "subsection", "subsubsection", "paragraph"] {
5907+
let src = format!("{kw} \\<open>discuss sorry\\<close>\nreal_code");
5908+
let stripped = strip_isabelle_prose(&src);
5909+
assert!(
5910+
!stripped.contains("sorry"),
5911+
"{kw} cartouche not stripped: {stripped:?}"
5912+
);
5913+
}
5914+
}
5915+
5916+
#[test]
5917+
fn isabelle_preserve_real_sorry_outside_prose() {
5918+
// A genuine `sorry` proof tactic must still be visible.
5919+
let src = "lemma broken: False\n sorry";
5920+
let stripped = strip_isabelle_prose(src);
5921+
assert!(
5922+
stripped.contains("sorry"),
5923+
"genuine sorry was wrongly stripped: {stripped:?}"
5924+
);
5925+
}
5926+
5927+
#[test]
5928+
fn isabelle_nested_cartouches() {
5929+
// \<open> can nest. A cartouche containing another cartouche
5930+
// should be fully consumed.
5931+
let src = "text \\<open>outer \\<open>inner sorry\\<close> tail\\<close>\ncode";
5932+
let stripped = strip_isabelle_prose(src);
5933+
assert!(!stripped.contains("sorry"));
5934+
assert!(stripped.contains("code"));
5935+
}
5936+
5937+
#[test]
5938+
fn isabelle_antiquotation_with_nested_braces() {
5939+
let src = "text \\<open>see @{text \"sorry { x = 1 }\"} below\\<close>\ncode";
5940+
let stripped = strip_isabelle_prose(src);
5941+
assert!(!stripped.contains("sorry"));
5942+
assert!(stripped.contains("code"));
5943+
}
5944+
5945+
#[test]
5946+
fn isabelle_keyword_not_at_word_boundary() {
5947+
// `subsection` should match, but a function called `mysection`
5948+
// (no leading whitespace before "section") should NOT.
5949+
let src = "mysection_lemma: True by sorry -- real tactic\n";
5950+
let stripped = strip_isabelle_prose(src);
5951+
assert!(
5952+
stripped.contains("sorry"),
5953+
"false positive on identifier ending in 'section': {stripped:?}"
5954+
);
5955+
}
5956+
57405957
// ---------------------------------------------------------------
57415958
// 1. Language::detect for different file extensions
57425959
// ---------------------------------------------------------------

0 commit comments

Comments
 (0)