Skip to content

Commit 1062595

Browse files
[AArch64]Add convert and multiply-add SIMD&FP assembly/disassembly in… (#113296)
…structions This patch adds the following instructions: Conversion between floating-point and integer: FCVT{AS, AU, MS, MU, NS, NU, PS, PU, ZS, ZU} {S,U}CVTF Advanced SIMD three-register extension: FMMLA According to https://developer.arm.com/documentation/ddi0602 Co-authored-by: Marian Lukac [email protected] Co-authored-by: Spencer Abson [email protected]
1 parent 7a71011 commit 1062595

12 files changed

+637
-62
lines changed

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 90 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -5234,6 +5234,32 @@ multiclass FPToIntegerUnscaled<bits<2> rmode, bits<3> opcode, string asm,
52345234
}
52355235
}
52365236

5237+
multiclass FPToIntegerSIMDScalar<bits<2> rmode, bits<3> opcode, string asm> {
5238+
// double-precision to 32-bit SIMD/FPR
5239+
def SDr : BaseFPToIntegerUnscaled<0b01, rmode, opcode, FPR64, FPR32, asm,
5240+
[]> {
5241+
let Inst{31} = 0; // 32-bit FPR flag
5242+
}
5243+
5244+
// half-precision to 32-bit SIMD/FPR
5245+
def SHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR32, asm,
5246+
[]> {
5247+
let Inst{31} = 0; // 32-bit FPR flag
5248+
}
5249+
5250+
// half-precision to 64-bit SIMD/FPR
5251+
def DHr : BaseFPToIntegerUnscaled<0b11, rmode, opcode, FPR16, FPR64, asm,
5252+
[]> {
5253+
let Inst{31} = 1; // 64-bit FPR flag
5254+
}
5255+
5256+
// single-precision to 64-bit SIMD/FPR
5257+
def DSr : BaseFPToIntegerUnscaled<0b00, rmode, opcode, FPR32, FPR64, asm,
5258+
[]> {
5259+
let Inst{31} = 1; // 64-bit FPR flag
5260+
}
5261+
}
5262+
52375263
multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
52385264
SDPatternOperator OpN> {
52395265
// Scaled half-precision to 32-bit
@@ -5295,7 +5321,7 @@ multiclass FPToIntegerScaled<bits<2> rmode, bits<3> opcode, string asm,
52955321
//---
52965322

52975323
let mayStore = 0, mayLoad = 0, hasSideEffects = 0, mayRaiseFPException = 1, Uses = [FPCR] in
5298-
class BaseIntegerToFP<bit isUnsigned,
5324+
class BaseIntegerToFP<bits<2> rmode, bits<3> opcode,
52995325
RegisterClass srcType, RegisterClass dstType,
53005326
Operand immType, string asm, list<dag> pattern>
53015327
: I<(outs dstType:$Rd), (ins srcType:$Rn, immType:$scale),
@@ -5305,15 +5331,16 @@ class BaseIntegerToFP<bit isUnsigned,
53055331
bits<5> Rn;
53065332
bits<6> scale;
53075333
let Inst{30-24} = 0b0011110;
5308-
let Inst{21-17} = 0b00001;
5309-
let Inst{16} = isUnsigned;
5334+
let Inst{21} = 0b0;
5335+
let Inst{20-19} = rmode;
5336+
let Inst{18-16} = opcode;
53105337
let Inst{15-10} = scale;
53115338
let Inst{9-5} = Rn;
53125339
let Inst{4-0} = Rd;
53135340
}
53145341

53155342
let mayRaiseFPException = 1, Uses = [FPCR] in
5316-
class BaseIntegerToFPUnscaled<bit isUnsigned,
5343+
class BaseIntegerToFPUnscaled<bits<2> rmode, bits<3> opcode,
53175344
RegisterClass srcType, RegisterClass dstType,
53185345
ValueType dvt, string asm, SDPatternOperator node>
53195346
: I<(outs dstType:$Rd), (ins srcType:$Rn),
@@ -5323,49 +5350,50 @@ class BaseIntegerToFPUnscaled<bit isUnsigned,
53235350
bits<5> Rn;
53245351
bits<6> scale;
53255352
let Inst{30-24} = 0b0011110;
5326-
let Inst{21-17} = 0b10001;
5327-
let Inst{16} = isUnsigned;
5353+
let Inst{21} = 0b1;
5354+
let Inst{20-19} = rmode;
5355+
let Inst{18-16} = opcode;
53285356
let Inst{15-10} = 0b000000;
53295357
let Inst{9-5} = Rn;
53305358
let Inst{4-0} = Rd;
53315359
}
53325360

5333-
multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
5361+
multiclass IntegerToFP<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator node> {
53345362
// Unscaled
5335-
def UWHri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR16, f16, asm, node> {
5363+
def UWHri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR16, f16, asm, node> {
53365364
let Inst{31} = 0; // 32-bit GPR flag
53375365
let Inst{23-22} = 0b11; // 16-bit FPR flag
53385366
let Predicates = [HasFullFP16];
53395367
}
53405368

5341-
def UWSri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR32, f32, asm, node> {
5369+
def UWSri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR32, f32, asm, node> {
53425370
let Inst{31} = 0; // 32-bit GPR flag
53435371
let Inst{23-22} = 0b00; // 32-bit FPR flag
53445372
}
53455373

5346-
def UWDri: BaseIntegerToFPUnscaled<isUnsigned, GPR32, FPR64, f64, asm, node> {
5374+
def UWDri: BaseIntegerToFPUnscaled<rmode, opcode, GPR32, FPR64, f64, asm, node> {
53475375
let Inst{31} = 0; // 32-bit GPR flag
53485376
let Inst{23-22} = 0b01; // 64-bit FPR flag
53495377
}
53505378

5351-
def UXHri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR16, f16, asm, node> {
5379+
def UXHri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR16, f16, asm, node> {
53525380
let Inst{31} = 1; // 64-bit GPR flag
53535381
let Inst{23-22} = 0b11; // 16-bit FPR flag
53545382
let Predicates = [HasFullFP16];
53555383
}
53565384

5357-
def UXSri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR32, f32, asm, node> {
5385+
def UXSri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR32, f32, asm, node> {
53585386
let Inst{31} = 1; // 64-bit GPR flag
53595387
let Inst{23-22} = 0b00; // 32-bit FPR flag
53605388
}
53615389

5362-
def UXDri: BaseIntegerToFPUnscaled<isUnsigned, GPR64, FPR64, f64, asm, node> {
5390+
def UXDri: BaseIntegerToFPUnscaled<rmode, opcode, GPR64, FPR64, f64, asm, node> {
53635391
let Inst{31} = 1; // 64-bit GPR flag
53645392
let Inst{23-22} = 0b01; // 64-bit FPR flag
53655393
}
53665394

53675395
// Scaled
5368-
def SWHri: BaseIntegerToFP<isUnsigned, GPR32, FPR16, fixedpoint_recip_f16_i32, asm,
5396+
def SWHri: BaseIntegerToFP<rmode, opcode, GPR32, FPR16, fixedpoint_recip_f16_i32, asm,
53695397
[(set (f16 FPR16:$Rd),
53705398
(fmul (node GPR32:$Rn),
53715399
fixedpoint_recip_f16_i32:$scale))]> {
@@ -5375,7 +5403,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
53755403
let Predicates = [HasFullFP16];
53765404
}
53775405

5378-
def SWSri: BaseIntegerToFP<isUnsigned, GPR32, FPR32, fixedpoint_recip_f32_i32, asm,
5406+
def SWSri: BaseIntegerToFP<rmode, opcode, GPR32, FPR32, fixedpoint_recip_f32_i32, asm,
53795407
[(set FPR32:$Rd,
53805408
(fmul (node GPR32:$Rn),
53815409
fixedpoint_recip_f32_i32:$scale))]> {
@@ -5384,7 +5412,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
53845412
let scale{5} = 1;
53855413
}
53865414

5387-
def SWDri: BaseIntegerToFP<isUnsigned, GPR32, FPR64, fixedpoint_recip_f64_i32, asm,
5415+
def SWDri: BaseIntegerToFP<rmode, opcode, GPR32, FPR64, fixedpoint_recip_f64_i32, asm,
53885416
[(set FPR64:$Rd,
53895417
(fmul (node GPR32:$Rn),
53905418
fixedpoint_recip_f64_i32:$scale))]> {
@@ -5393,7 +5421,7 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
53935421
let scale{5} = 1;
53945422
}
53955423

5396-
def SXHri: BaseIntegerToFP<isUnsigned, GPR64, FPR16, fixedpoint_recip_f16_i64, asm,
5424+
def SXHri: BaseIntegerToFP<rmode, opcode, GPR64, FPR16, fixedpoint_recip_f16_i64, asm,
53975425
[(set (f16 FPR16:$Rd),
53985426
(fmul (node GPR64:$Rn),
53995427
fixedpoint_recip_f16_i64:$scale))]> {
@@ -5402,15 +5430,15 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
54025430
let Predicates = [HasFullFP16];
54035431
}
54045432

5405-
def SXSri: BaseIntegerToFP<isUnsigned, GPR64, FPR32, fixedpoint_recip_f32_i64, asm,
5433+
def SXSri: BaseIntegerToFP<rmode, opcode, GPR64, FPR32, fixedpoint_recip_f32_i64, asm,
54065434
[(set FPR32:$Rd,
54075435
(fmul (node GPR64:$Rn),
54085436
fixedpoint_recip_f32_i64:$scale))]> {
54095437
let Inst{31} = 1; // 64-bit GPR flag
54105438
let Inst{23-22} = 0b00; // 32-bit FPR flag
54115439
}
54125440

5413-
def SXDri: BaseIntegerToFP<isUnsigned, GPR64, FPR64, fixedpoint_recip_f64_i64, asm,
5441+
def SXDri: BaseIntegerToFP<rmode, opcode, GPR64, FPR64, fixedpoint_recip_f64_i64, asm,
54145442
[(set FPR64:$Rd,
54155443
(fmul (node GPR64:$Rn),
54165444
fixedpoint_recip_f64_i64:$scale))]> {
@@ -5419,6 +5447,32 @@ multiclass IntegerToFP<bit isUnsigned, string asm, SDPatternOperator node> {
54195447
}
54205448
}
54215449

5450+
multiclass IntegerToFPSIMDScalar<bits<2> rmode, bits<3> opcode, string asm, SDPatternOperator node = null_frag> {
5451+
// 32-bit to half-precision
5452+
def HSr: BaseIntegerToFPUnscaled<rmode, opcode, FPR32, FPR16, f16, asm, node> {
5453+
let Inst{31} = 0; // 32-bit FPR flag
5454+
let Inst{23-22} = 0b11; // 16-bit FPR flag
5455+
}
5456+
5457+
// 32-bit to double-precision
5458+
def DSr: BaseIntegerToFPUnscaled<rmode, opcode, FPR32, FPR64, f64, asm, node> {
5459+
let Inst{31} = 0; // 32-bit FPR flag
5460+
let Inst{23-22} = 0b01; // 64-bit FPR flag
5461+
}
5462+
5463+
// 64-bit to half-precision
5464+
def HDr: BaseIntegerToFPUnscaled<rmode, opcode, FPR64, FPR16, f16, asm, node> {
5465+
let Inst{31} = 1; // 64-bit FPR flag
5466+
let Inst{23-22} = 0b11; // 16-bit FPR flag
5467+
}
5468+
5469+
// 64-bit to single-precision
5470+
def SDr: BaseIntegerToFPUnscaled<rmode, opcode, FPR64, FPR32, f32, asm, node> {
5471+
let Inst{31} = 1; // 64-bit FPR flag
5472+
let Inst{23-22} = 0b00; // 32-bit FPR flag
5473+
}
5474+
}
5475+
54225476
//---
54235477
// Unscaled integer <-> floating point conversion (i.e. FMOV)
54245478
//---
@@ -13126,3 +13180,20 @@ multiclass AtomicFPStore<bit R, bits<3> op0, string asm> {
1312613180
def S : BaseAtomicFPStore<FPR32, 0b10, R, op0, asm>;
1312713181
def H : BaseAtomicFPStore<FPR16, 0b01, R, op0, asm>;
1312813182
}
13183+
13184+
class BaseSIMDThreeSameVectorFP8MatrixMul<string asm, bits<2> size, string kind>
13185+
: BaseSIMDThreeSameVectorTied<1, 1, {size, 0}, 0b11101,
13186+
V128, asm, ".16b", []> {
13187+
let AsmString = !strconcat(asm, "{\t$Rd", kind, ", $Rn", ".16b",
13188+
", $Rm", ".16b", "}");
13189+
}
13190+
13191+
multiclass SIMDThreeSameVectorFP8MatrixMul<string asm>{
13192+
def v8f16: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b00, ".8h">{
13193+
let Predicates = [HasNEON, HasF8F16MM];
13194+
}
13195+
def v4f32: BaseSIMDThreeSameVectorFP8MatrixMul<asm, 0b10, ".4s">{
13196+
let Predicates = [HasNEON, HasF8F32MM];
13197+
}
13198+
}
13199+

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4838,6 +4838,19 @@ defm FCVTZU : FPToIntegerUnscaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
48384838
defm FCVTZS : FPToIntegerScaled<0b11, 0b000, "fcvtzs", any_fp_to_sint>;
48394839
defm FCVTZU : FPToIntegerScaled<0b11, 0b001, "fcvtzu", any_fp_to_uint>;
48404840

4841+
let Predicates = [HasNEON, HasFPRCVT] in{
4842+
defm FCVTAS : FPToIntegerSIMDScalar<0b11, 0b010, "fcvtas">;
4843+
defm FCVTAU : FPToIntegerSIMDScalar<0b11, 0b011, "fcvtau">;
4844+
defm FCVTMS : FPToIntegerSIMDScalar<0b10, 0b100, "fcvtms">;
4845+
defm FCVTMU : FPToIntegerSIMDScalar<0b10, 0b101, "fcvtmu">;
4846+
defm FCVTNS : FPToIntegerSIMDScalar<0b01, 0b010, "fcvtns">;
4847+
defm FCVTNU : FPToIntegerSIMDScalar<0b01, 0b011, "fcvtnu">;
4848+
defm FCVTPS : FPToIntegerSIMDScalar<0b10, 0b010, "fcvtps">;
4849+
defm FCVTPU : FPToIntegerSIMDScalar<0b10, 0b011, "fcvtpu">;
4850+
defm FCVTZS : FPToIntegerSIMDScalar<0b10, 0b110, "fcvtzs">;
4851+
defm FCVTZU : FPToIntegerSIMDScalar<0b10, 0b111, "fcvtzu">;
4852+
}
4853+
48414854
// AArch64's FCVT instructions saturate when out of range.
48424855
multiclass FPToIntegerSatPats<SDNode to_int_sat, SDNode to_int_sat_gi, string INST> {
48434856
let Predicates = [HasFullFP16] in {
@@ -4996,8 +5009,13 @@ def : Pat<(i64 (any_llround f64:$Rn)),
49965009
// Scaled integer to floating point conversion instructions.
49975010
//===----------------------------------------------------------------------===//
49985011

4999-
defm SCVTF : IntegerToFP<0, "scvtf", any_sint_to_fp>;
5000-
defm UCVTF : IntegerToFP<1, "ucvtf", any_uint_to_fp>;
5012+
defm SCVTF : IntegerToFP<0b00, 0b010, "scvtf", any_sint_to_fp>;
5013+
defm UCVTF : IntegerToFP<0b00, 0b011, "ucvtf", any_uint_to_fp>;
5014+
5015+
let Predicates = [HasNEON, HasFPRCVT] in {
5016+
defm SCVTF : IntegerToFPSIMDScalar<0b11, 0b100, "scvtf">;
5017+
defm UCVTF : IntegerToFPSIMDScalar<0b11, 0b101, "ucvtf">;
5018+
}
50015019

50025020
def : Pat<(f16 (fdiv (f16 (any_sint_to_fp (i32 GPR32:$Rn))), fixedpoint_f16_i32:$scale)),
50035021
(SCVTFSWHri GPR32:$Rn, fixedpoint_f16_i32:$scale)>;
@@ -10547,6 +10565,9 @@ let Predicates = [HasLSFE] in {
1054710565
def STBFMINNML : BaseAtomicFPStore<FPR16, 0b00, 0b1, 0b111, "stbfminnml">;
1054810566
}
1054910567

10568+
let Uses = [FPMR, FPCR] in
10569+
defm FMMLA : SIMDThreeSameVectorFP8MatrixMul<"fmmla">;
10570+
1055010571
include "AArch64InstrAtomics.td"
1055110572
include "AArch64SVEInstrInfo.td"
1055210573
include "AArch64SMEInstrInfo.td"
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// RUN: not llvm-mc -triple=aarch64 -mattr=+f8f16mm,+f8f32mm 2>&1 < %s| FileCheck %s
2+
3+
fmmla v0.4h, v1.16b, v2.16b
4+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
5+
// CHECK-NEXT: fmmla v0.4h, v1.16b, v2.16b
6+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
7+
8+
fmmla v0.8s, v1.16b, v2.16b
9+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid vector kind qualifier
10+
// CHECK-NEXT: fmmla v0.8s, v1.16b, v2.16b
11+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
12+
13+
fmmla v0.4s, v1.4s, v2.4s
14+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
15+
// CHECK-NEXT: fmmla v0.4s, v1.4s, v2.4s
16+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
17+
18+
fmmla v0.8h, v1.8h, v2.8h
19+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
20+
// CHECK-NEXT: fmmla v0.8h, v1.8h, v2.8h
21+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
22+
23+
fmmla v0.16b, v1.16b, v2.16b
24+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
25+
// CHECK-NEXT: fmmla v0.16b, v1.16b, v2.16b
26+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
27+
28+
fmmla v0.d, v1.16b, v2.16b
29+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
30+
// CHECK-NEXT: fmmla v0.d, v1.16b, v2.16b
31+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
32+
33+
fmmla v0.2d, v1.16b, v2.16b
34+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
35+
// CHECK-NEXT: fmmla v0.2d, v1.16b, v2.16b
36+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
37+
38+
fmmla v0.8h, v1.8b, v2.8b
39+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
40+
// CHECK-NEXT: fmmla v0.8h, v1.8b, v2.8b
41+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:
42+
43+
fmmla v0.4s, v1.8b, v2.8b
44+
// CHECK: [[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction
45+
// CHECK-NEXT: fmmla v0.4s, v1.8b, v2.8b
46+
// CHECK-NOT: [[@LINE-1]]:{{[0-9]+}}:

llvm/test/MC/AArch64/FP8/fmmla.s

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f8f16mm,+f8f32mm < %s \
2+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
3+
// RUN: not llvm-mc -triple=aarch64 -show-encoding < %s 2>&1 \
4+
// RUN: | FileCheck %s --check-prefix=CHECK-ERROR
5+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f8f16mm,+f8f32mm < %s \
6+
// RUN: | llvm-objdump -d --mattr=+f8f16mm,+f8f32mm - | FileCheck %s --check-prefix=CHECK-INST
7+
// RUN: llvm-mc -triple=aarch64 -filetype=obj -mattr=+f8f16mm,+f8f32mm < %s \
8+
// RUN: | llvm-objdump -d --no-print-imm-hex --mattr=-f8f16mm,-f8f32mm - | FileCheck %s --check-prefix=CHECK-UNKNOWN
9+
// Disassemble encoding and check the re-encoding (-show-encoding) matches.
10+
// RUN: llvm-mc -triple=aarch64 -show-encoding -mattr=+f8f16mm,+f8f32mm < %s \
11+
// RUN: | sed '/.text/d' | sed 's/.*encoding: //g' \
12+
// RUN: | llvm-mc -triple=aarch64 -mattr=+f8f16mm,+f8f32mm -disassemble -show-encoding \
13+
// RUN: | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
14+
15+
fmmla v0.8h, v1.16b, v2.16b
16+
// CHECK-INST: fmmla v0.8h, v1.16b, v2.16b
17+
// CHECK-ENCODING: [0x20,0xec,0x02,0x6e]
18+
// CHECK-ERROR: instruction requires: f8f16mm
19+
// CHECK-UNKNOWN: 6e02ec20 <unknown>
20+
21+
fmmla v0.4s, v1.16b, v2.16b
22+
// CHECK-INST: fmmla v0.4s, v1.16b, v2.16b
23+
// CHECK-ENCODING: [0x20,0xec,0x82,0x6e]
24+
// CHECK-ERROR: instruction requires: f8f32mm
25+
// CHECK-UNKNOWN: 6e82ec20 <unknown>

0 commit comments

Comments
 (0)