Skip to content

Commit 7b19f4f

Browse files
committed
[AArch64] Add assembly/disaasembly of atomic ld/st
This patch adds assembly/disassembly for the following instructions: - ldfadd{a,al,l,}, ldbfadd{a,al,l,} - ldfmax{a,al,l,}, ldbfmax{a,al,l,} - ldfmaxnm{a,al,l,}, ldbfmaxnm{a,al,l,} - ldfmin{a,al,l,}, ldbfmin{a,al,l,} - ldfminnm{a,al,l,} ldbfminnm{a,al,l,} - stfadd{l,}, stbfadd{l,} - stfmax{l,}, stbfmax{l,} - stfmaxnm{l,}, stbfmaxnm{l,} - stfmin{l,}, stbfmin{l,} - stfminnm{l,}, stbfminnm{l,}
1 parent 5c37316 commit 7b19f4f

28 files changed

+3468
-0
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12626,3 +12626,64 @@ def : TokenAlias<".H", ".h">;
1262612626
def : TokenAlias<".S", ".s">;
1262712627
def : TokenAlias<".D", ".d">;
1262812628
def : TokenAlias<".Q", ".q">;
12629+
12630+
//----------------------------------------------------------------------------
12631+
// 2024 Armv9.6 Extensions
12632+
//----------------------------------------------------------------------------
12633+
12634+
let mayLoad = 1, mayStore = 1 in
12635+
class BaseAtomicFPLoad<RegisterClass regtype, bits<2> sz, bits<2> AR,
12636+
bits<3> op0, string asm>
12637+
: I<(outs regtype:$Rt),
12638+
(ins regtype:$Rs, GPR64sp:$Rn),
12639+
asm, "\t$Rs, $Rt, [$Rn]","", []>,
12640+
Sched<[]> {
12641+
bits<5> Rt;
12642+
bits<5> Rs;
12643+
bits<5> Rn;
12644+
let Inst{31-30} = sz;
12645+
let Inst{29-24} = 0b111100;
12646+
let Inst{23-22} = AR;
12647+
let Inst{21} = 0b1;
12648+
let Inst{20-16} = Rs;
12649+
let Inst{15} = 0b0;
12650+
let Inst{14-12} = op0;
12651+
let Inst{11-10} = 0b00;
12652+
let Inst{9-5} = Rn;
12653+
let Inst{4-0} = Rt;
12654+
}
12655+
12656+
multiclass AtomicFPLoad<bits<2> AR, bits<3> op0, string asm> {
12657+
def D : BaseAtomicFPLoad<FPR64, 0b11, AR, op0, asm>;
12658+
def S : BaseAtomicFPLoad<FPR32, 0b10, AR, op0, asm>;
12659+
def H : BaseAtomicFPLoad<FPR16, 0b01, AR, op0, asm>;
12660+
}
12661+
12662+
let mayLoad = 1, mayStore = 1 in
12663+
class BaseAtomicFPStore<RegisterClass regtype, bits<2> sz, bit R,
12664+
bits<3> op0, string asm>
12665+
: I<(outs),
12666+
(ins regtype:$Rs, GPR64sp:$Rn),
12667+
asm, "\t$Rs, [$Rn]",
12668+
"", []>,
12669+
Sched<[]> {
12670+
bits<5> Rt;
12671+
bits<5> Rs;
12672+
bits<5> Rn;
12673+
let Inst{31-30} = sz;
12674+
let Inst{29-23} = 0b1111000;
12675+
let Inst{22} = R;
12676+
let Inst{21} = 0b1;
12677+
let Inst{20-16} = Rs;
12678+
let Inst{15} = 0b1;
12679+
let Inst{14-12} = op0;
12680+
let Inst{11-10} = 0b00;
12681+
let Inst{9-5} = Rn;
12682+
let Inst{4-0} = 0b11111;
12683+
}
12684+
12685+
multiclass AtomicFPStore<bit R, bits<3> op0, string asm> {
12686+
def D : BaseAtomicFPStore<FPR64, 0b11, R, op0, asm>;
12687+
def S : BaseAtomicFPStore<FPR32, 0b10, R, op0, asm>;
12688+
def H : BaseAtomicFPStore<FPR16, 0b01, R, op0, asm>;
12689+
}

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10378,6 +10378,78 @@ defm : PromoteBinaryv8f16Tov4f32<any_fdiv, FDIVv4f32>;
1037810378
defm : PromoteBinaryv8f16Tov4f32<any_fmul, FMULv4f32>;
1037910379
defm : PromoteBinaryv8f16Tov4f32<any_fsub, FSUBv4f32>;
1038010380

10381+
//===----------------------------===//
10382+
// 2024 Architecture Extensions:
10383+
//===----------------------------===//
10384+
10385+
let Predicates = [HasLSFE] in {
10386+
// Floating-point Atomic Load
10387+
defm LDFADDA : AtomicFPLoad<0b10, 0b000, "ldfadda">;
10388+
defm LDFADDAL : AtomicFPLoad<0b11, 0b000, "ldfaddal">;
10389+
defm LDFADD : AtomicFPLoad<0b00, 0b000, "ldfadd">;
10390+
defm LDFADDL : AtomicFPLoad<0b01, 0b000, "ldfaddl">;
10391+
defm LDFMAXA : AtomicFPLoad<0b10, 0b100, "ldfmaxa">;
10392+
defm LDFMAXAL : AtomicFPLoad<0b11, 0b100, "ldfmaxal">;
10393+
defm LDFMAX : AtomicFPLoad<0b00, 0b100, "ldfmax">;
10394+
defm LDFMAXL : AtomicFPLoad<0b01, 0b100, "ldfmaxl">;
10395+
defm LDFMINA : AtomicFPLoad<0b10, 0b101, "ldfmina">;
10396+
defm LDFMINAL : AtomicFPLoad<0b11, 0b101, "ldfminal">;
10397+
defm LDFMIN : AtomicFPLoad<0b00, 0b101, "ldfmin">;
10398+
defm LDFMINL : AtomicFPLoad<0b01, 0b101, "ldfminl">;
10399+
defm LDFMAXNMA : AtomicFPLoad<0b10, 0b110, "ldfmaxnma">;
10400+
defm LDFMAXNMAL : AtomicFPLoad<0b11, 0b110, "ldfmaxnmal">;
10401+
defm LDFMAXNM : AtomicFPLoad<0b00, 0b110, "ldfmaxnm">;
10402+
defm LDFMAXNML : AtomicFPLoad<0b01, 0b110, "ldfmaxnml">;
10403+
defm LDFMINNMA : AtomicFPLoad<0b10, 0b111, "ldfminnma">;
10404+
defm LDFMINNMAL : AtomicFPLoad<0b11, 0b111, "ldfminnmal">;
10405+
defm LDFMINMN : AtomicFPLoad<0b00, 0b111, "ldfminnm">;
10406+
defm LDFMINNML : AtomicFPLoad<0b01, 0b111, "ldfminnml">;
10407+
// BFloat16
10408+
def LDBFADDA : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b000, "ldbfadda">;
10409+
def LDBFADDAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b000, "ldbfaddal">;
10410+
def LDBFADD : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b000, "ldbfadd">;
10411+
def LDBFADDL : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b000, "ldbfaddl">;
10412+
def LDBFMAXA : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b100, "ldbfmaxa">;
10413+
def LDBFMAXAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b100, "ldbfmaxal">;
10414+
def LDBFMAX : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b100, "ldbfmax">;
10415+
def LDBFMAXL : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b100, "ldbfmaxl">;
10416+
def LDBFMINA : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b101, "ldbfmina">;
10417+
def LDBFMINAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b101, "ldbfminal">;
10418+
def LDBFMIN : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b101, "ldbfmin">;
10419+
def LDBFMINL : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b101, "ldbfminl">;
10420+
def LDBFMAXNMA : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b110, "ldbfmaxnma">;
10421+
def LDBFMAXNMAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b110, "ldbfmaxnmal">;
10422+
def LDBFMAXNM : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b110, "ldbfmaxnm">;
10423+
def LDBFMAXNML : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b110, "ldbfmaxnml">;
10424+
def LDBFMINNMA : BaseAtomicFPLoad<FPR16, 0b00, 0b10, 0b111, "ldbfminnma">;
10425+
def LDBFMINNMAL : BaseAtomicFPLoad<FPR16, 0b00, 0b11, 0b111, "ldbfminnmal">;
10426+
def LDBFMINNM : BaseAtomicFPLoad<FPR16, 0b00, 0b00, 0b111, "ldbfminnm">;
10427+
def LDBFMINNML : BaseAtomicFPLoad<FPR16, 0b00, 0b01, 0b111, "ldbfminnml">;
10428+
10429+
// Floating-point Atomic Store
10430+
defm STFADD : AtomicFPStore<0b0, 0b000, "stfadd">;
10431+
defm STFADDL : AtomicFPStore<0b1, 0b000, "stfaddl">;
10432+
defm STFMAX : AtomicFPStore<0b0, 0b100, "stfmax">;
10433+
defm STFMAXL : AtomicFPStore<0b1, 0b100, "stfmaxl">;
10434+
defm STFMIN : AtomicFPStore<0b0, 0b101, "stfmin">;
10435+
defm STFMINL : AtomicFPStore<0b1, 0b101, "stfminl">;
10436+
defm STFMAXNM : AtomicFPStore<0b0, 0b110, "stfmaxnm">;
10437+
defm STFMAXNML : AtomicFPStore<0b1, 0b110, "stfmaxnml">;
10438+
defm STFMINNM : AtomicFPStore<0b0, 0b111, "stfminnm">;
10439+
defm STFMINNML : AtomicFPStore<0b1, 0b111, "stfminnml">;
10440+
// BFloat16
10441+
def STBFADD : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b000, "stbfadd">;
10442+
def STBFADDL : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b000, "stbfaddl">;
10443+
def STBFMAX : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b100, "stbfmax">;
10444+
def STBFMAXL : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b100, "stbfmaxl">;
10445+
def STBFMIN : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b101, "stbfmin">;
10446+
def STBFMINL : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b101, "stbfminl">;
10447+
def STBFMAXNM : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b110, "stbfmaxnm">;
10448+
def STBFMAXNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b110, "stbfmaxnml">;
10449+
def STBFMINNM : BaseAtomicFPStore<FPR16, 0b00, 0b0, 0b111, "stbfminnm">;
10450+
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
10451+
}
10452+
1038110453
include "AArch64InstrAtomics.td"
1038210454
include "AArch64SVEInstrInfo.td"
1038310455
include "AArch64SMEInstrInfo.td"
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.arch armv9.6-a+lsfe
4+
.arch armv9.6-a+nolsfe
5+
ldfadd h0, h1, [x2]
6+
// CHECK: error: instruction requires: lsfe
7+
// CHECK: ldfadd h0, h1, [x2]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: llvm-mc -triple aarch64 -o - %s 2>&1 | FileCheck %s
2+
3+
.arch armv9.6-a+lsfe
4+
ldfadd h0, h1, [x2]
5+
// CHECK: ldfadd h0, h1, [x2]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.arch_extension lsfe
4+
.arch_extension nolsfe
5+
ldfadd h0, h1, [x2]
6+
// CHECK: ldfadd h0, h1, [x2]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.arch_extension lsfe
4+
ldfadd h0, h1, [x2]
5+
// CHECK: ldfadd h0, h1, [x2]
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
// RUN: not llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.cpu generic+lsfe
4+
.cpu generic+nolsfe
5+
ldfadd h0, h1, [x2]
6+
// CHECK: error: instruction requires: lsfe
7+
// CHECK-NEXT: ldfadd h0, h1, [x2]
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
// RUN: llvm-mc -triple aarch64 -filetype asm -o - %s 2>&1 | FileCheck %s
2+
3+
.cpu generic+lsfe
4+
ldfadd h0, h1, [x2]
5+
// CHECK: ldfadd h0, h1, [x2]

0 commit comments

Comments
 (0)