Skip to content

Commit 35df4a6

Browse files
yudongusaclaude
andcommitted
feat: differential testing CI against LLVM 19 (closes #81)
- Add `.github/workflows/differential.yml`: CI on ubuntu-24.04 installs llvm-19/clang-19 and runs all differential tests with REQUIRE_LLVM=1 (zero skips enforced) - Add 51 fixture `.ll` files covering all InstrKind variants plus a SHA-256 regression hash database (known_hashes.json) - Expand `differential.rs`: roundtrip_and_validate for each fixture, check_regression_hashes test, objdump_text helper, semantic exit-code tests with LLVM and our x86 backend - Fix printer: `load volatile` / `store volatile` (keyword after opcode, not before) - Fix parser: `extractelement` result type was vector type; now correctly extracts element type from TypeData::Vector - Re-bootstrap regression hashes after the two bug fixes Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent b8e410d commit 35df4a6

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

59 files changed

+1663
-34
lines changed

.github/workflows/differential.yml

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
name: Differential Tests (LLVM 19)
2+
3+
on:
4+
push:
5+
branches: [main, "feat/**"]
6+
pull_request:
7+
branches: [main]
8+
9+
jobs:
10+
differential:
11+
name: Differential tests against LLVM 19
12+
runs-on: ubuntu-24.04
13+
14+
steps:
15+
- uses: actions/checkout@v4
16+
17+
- name: Install Rust nightly toolchain
18+
uses: dtolnay/rust-toolchain@nightly
19+
20+
- name: Cache cargo registry and build artifacts
21+
uses: actions/cache@v4
22+
with:
23+
path: |
24+
~/.cargo/registry
25+
~/.cargo/git
26+
target
27+
key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
28+
restore-keys: ${{ runner.os }}-cargo-
29+
30+
- name: Install LLVM 19 and Clang 19
31+
run: |
32+
sudo apt-get update -y
33+
sudo apt-get install -y llvm-19 clang-19 binutils
34+
# Create unversioned symlinks so find_llvm_bin() resolves them
35+
sudo update-alternatives --install /usr/bin/llvm-as llvm-as \
36+
/usr/lib/llvm-19/bin/llvm-as 100
37+
sudo update-alternatives --install /usr/bin/llvm-objdump llvm-objdump \
38+
/usr/lib/llvm-19/bin/llvm-objdump 100
39+
sudo update-alternatives --install /usr/bin/clang clang \
40+
/usr/bin/clang-19 100
41+
# Verify installation
42+
llvm-as --version
43+
llvm-objdump --version
44+
clang --version
45+
46+
- name: Run differential tests (ZERO skips enforced)
47+
env:
48+
REQUIRE_LLVM: "1"
49+
run: cargo test -p llvm-ir-parser differential --no-fail-fast -- --nocapture
50+
51+
- name: Run regression hash check
52+
env:
53+
REQUIRE_LLVM: "1"
54+
run: cargo test -p llvm-ir-parser check_regression_hashes -- --nocapture

Cargo.lock

Lines changed: 187 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/llvm-ir-parser/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,6 @@ llvm-ir = { path = "../llvm-ir" }
1212
llvm-codegen = { path = "../llvm-codegen" }
1313
llvm-target-x86 = { path = "../llvm-target-x86" }
1414
llvm-transforms = { path = "../llvm-transforms" }
15+
sha2 = "0.10"
16+
hex = "0.4"
17+
serde_json = "1"

src/llvm-ir-parser/src/parser.rs

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -833,18 +833,34 @@ impl<'src> Parser<'src> {
833833
Token::Kw(Keyword::Alloca) => {
834834
self.lex.next()?;
835835
let alloc_ty = self.parse_type()?;
836-
let num_elements = if self.lex.eat(&Token::Comma) {
837-
match self.lex.peek()? {
838-
Token::Kw(Keyword::Align) => None,
839-
_ => {
840-
let (ne, _) = self.parse_typed_value()?;
841-
Some(ne)
836+
// Parse optional `, <num_elements>` and/or `, align N`.
837+
// When we eat a comma and see `align` directly (no
838+
// num_elements), the comma is already consumed so we must NOT
839+
// go through parse_optional_align (which expects its own
840+
// leading comma).
841+
let (num_elements, comma_before_align_consumed) =
842+
if self.lex.eat(&Token::Comma) {
843+
match self.lex.peek()? {
844+
Token::Kw(Keyword::Align) => (None, true),
845+
_ => {
846+
let (ne, _) = self.parse_typed_value()?;
847+
(Some(ne), false)
848+
}
842849
}
850+
} else {
851+
(None, false)
852+
};
853+
let align = if comma_before_align_consumed {
854+
// Comma was already consumed; parse `align N` directly.
855+
if self.lex.eat_kw(Keyword::Align) {
856+
let a = self.lex.expect_uint_lit()? as u32;
857+
Some(a)
858+
} else {
859+
None
843860
}
844861
} else {
845-
None
862+
self.parse_optional_align()?
846863
};
847-
let align = self.parse_optional_align()?;
848864
let ptr_ty = self.ctx.ptr_ty;
849865
Ok((
850866
InstrKind::Alloca {
@@ -1092,8 +1108,12 @@ impl<'src> Parser<'src> {
10921108
let (vec, vec_ty) = self.parse_typed_value()?;
10931109
self.lex.expect(&Token::Comma)?;
10941110
let (idx, _) = self.parse_typed_value()?;
1095-
// Result type is element type — approximate.
1096-
Ok((InstrKind::ExtractElement { vec, idx }, vec_ty))
1111+
// Result type is the element type of the vector.
1112+
let elem_ty = match self.ctx.get_type(vec_ty) {
1113+
llvm_ir::types::TypeData::Vector { element, .. } => *element,
1114+
_ => vec_ty,
1115+
};
1116+
Ok((InstrKind::ExtractElement { vec, idx }, elem_ty))
10971117
}
10981118
Token::Kw(Keyword::Insertelement) => {
10991119
self.lex.next()?;
@@ -1513,14 +1533,31 @@ impl<'src> Parser<'src> {
15131533
}
15141534

15151535
fn parse_shuffle_mask(&mut self) -> Result<Vec<i32>, ParseError> {
1516-
// Could be `<i32 0, i32 1>` or `undef`.
1536+
// Mask is either `undef` or a typed constant vector.
1537+
// LLVM IR requires the type annotation: `<N x i32> <i32 0, i32 1, ...>`.
1538+
// Older (pre-typed-pointer) IR sometimes omits the outer type, so we
1539+
// accept both forms.
15171540
if self.lex.eat_kw(Keyword::Undef) {
15181541
return Ok(vec![]);
15191542
}
1543+
// Consume optional outer type annotation `<N x i32>`.
1544+
if matches!(self.lex.peek()?, Token::LAngle) {
1545+
// We don't know yet whether this is the type prefix or the inner
1546+
// constant itself. Speculatively parse it as a type; if the next
1547+
// token after `>` is `<` we consumed the type prefix and the inner
1548+
// constant follows. Either way we discard the type — we care only
1549+
// about the integer values.
1550+
let _outer_ty = self.parse_type()?;
1551+
// If the next token is NOT `<`, we've already consumed the whole
1552+
// mask (old short form without type prefix) — but that can't happen
1553+
// here because `parse_type` would have parsed `<i32 0,...>` as a
1554+
// vector type, not as a constant. So after consuming the outer type
1555+
// the next token must be `<` starting the actual constant.
1556+
}
15201557
self.lex.expect(&Token::LAngle)?;
15211558
let mut mask = Vec::new();
15221559
loop {
1523-
// Skip type.
1560+
// Each element: `i32 <int_literal>`.
15241561
let _ = self.parse_type()?;
15251562
let n = self.lex.expect_int_lit()? as i32;
15261563
mask.push(n);

0 commit comments

Comments
 (0)