-
Notifications
You must be signed in to change notification settings - Fork 15.4k
Adding support in llvm-exegesis for Aarch64 for handling FPR64/128, PPR16 and ZPR128 reg class. #127564
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Adding support in llvm-exegesis for Aarch64 for handling FPR64/128, PPR16 and ZPR128 reg class. #127564
Changes from 16 commits
624a7ee
4c4d605
f8ce0cc
d34cb6d
566081a
803b9e9
3589838
53b9f0b
caebb7b
230aade
aab854b
f1e561c
25b02b6
b83b52d
433b62e
482a0a3
951e05e
ea8b28b
b5853a9
2c16af6
c21ee8b
5cda550
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| REQUIRES: aarch64-registered-target | ||
|
|
||
| ## PPR Register Class Initialization Testcase | ||
| ## Ideally, we should use PTRUE_{B/H/S/D} instead of FADDV_VPZ_D for an isolated test case; however, Exegesis does not yet support PTRUE_{B/H/S/D}. | ||
| RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=PPR | ||
| RUN: llvm-objdump -d %d > %t.s | ||
| RUN: FileCheck %s --check-prefix=PPR_ASM < %t.s | ||
| PPR-NOT: setRegTo is not implemented, results will be unreliable | ||
| PPR: assembled_snippet: {{.*}}C0035FD6 | ||
| PPR_ASM: {{<foo>:}} | ||
| PPR_ASM: ptrue p{{[0-9]+}}.b | ||
| PPR_ASM-NEXT: dupm z{{[0-9]+}}.s, #0x1 | ||
| PPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}} | ||
|
|
||
| ## ZPR Register Class Initialization Testcase | ||
| ## Ideally, we should use PTRUE_{B/H/S/D} instead of FADDV_VPZ_D for an isolated test case; however, Exegesis does not yet support PTRUE_{B/H/S/D}. | ||
| RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=FADDV_VPZ_D 2>&1 | FileCheck %s --check-prefix=ZPR | ||
| RUN: llvm-objdump -d %d > %t.s | ||
| RUN: FileCheck %s --check-prefix=ZPR_ASM < %t.s | ||
| ZPR-NOT: setRegTo is not implemented, results will be unreliable | ||
| ZPR: assembled_snippet: {{.*}}C0035FD6 | ||
| ZPR_ASM: {{<foo>:}} | ||
| ZPR_ASM: ptrue p{{[0-9]+}}.b | ||
| ZPR_ASM-NEXT: dupm z{{[0-9]+}}.s, #0x1 | ||
| ZPR_ASM-NEXT: faddv d{{[0-9]+}}, p{{[0-9]+}}, z{{[0-9]+}} | ||
|
|
||
| ## FPR128 Register Class Initialization Testcase | ||
| RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv16i8v 2>&1 | FileCheck %s --check-prefix=FPR128 | ||
| RUN: llvm-objdump -d %d > %t.s | ||
| RUN: FileCheck %s --check-prefix=FPR128-ASM < %t.s | ||
| FPR128-NOT: setRegTo is not implemented, results will be unreliable | ||
| FPR128: assembled_snippet: {{.*}}C0035FD6 | ||
| FPR128-ASM: {{<foo>:}} | ||
| FPR128-ASM: movi v{{[0-9]+}}.2d, #0000000000000000 | ||
| FPR128-ASM-NEXT: addv b{{[0-9]+}}, v{{[0-9]+}}.16b | ||
|
|
||
| ## FPR64 Register Class Initialization Testcase | ||
| RUN: llvm-exegesis -mcpu=neoverse-v2 -mode=latency --dump-object-to-disk=%d --opcode-name=ADDVv4i16v 2>&1 | FileCheck %s --check-prefix=FPR64 | ||
| RUN: llvm-objdump -d %d > %t.s | ||
| RUN: FileCheck %s --check-prefix=FPR64-ASM < %t.s | ||
| FPR64-NOT: setRegTo is not implemented, results will be unreliable | ||
| FPR64: assembled_snippet: {{.*}}C0035FD6 | ||
| FPR64-ASM: {{<foo>:}} | ||
| FPR64-ASM: fmov d{{[0-9]+}}, {{#2.0+|#2\.000000000000000000e\+00}} | ||
| FPR64-ASM-NEXT: addv h{{[0-9]+}}, v{{[0-9]+}}.4h | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -28,13 +28,66 @@ static unsigned getLoadImmediateOpcode(unsigned RegBitWidth) { | |
| // Generates instruction to load an immediate value into a register. | ||
| static MCInst loadImmediate(MCRegister Reg, unsigned RegBitWidth, | ||
| const APInt &Value) { | ||
| if (Value.getBitWidth() > RegBitWidth) | ||
| llvm_unreachable("Value must fit in the Register"); | ||
| // 0 <= Value.getZExtValue() < 2**16 | ||
| assert(Value.getZExtValue() < (1 << 16) && | ||
| "Value must be in the range of the immediate opcode"); | ||
|
||
| return MCInstBuilder(getLoadImmediateOpcode(RegBitWidth)) | ||
| .addReg(Reg) | ||
| .addImm(Value.getZExtValue()); | ||
| } | ||
|
|
||
| static MCInst loadZPRImmediate(MCRegister Reg, unsigned RegBitWidth, | ||
| const APInt &Value) { | ||
| // 0 <= Value.getZExtValue() < 2**13 | ||
| assert(Value.getZExtValue() < (1 << 13) && | ||
| "Value must be in the range of the immediate opcode"); | ||
| // For ZPR, we typically use DUPM instruction to load immediate values | ||
| return MCInstBuilder(AArch64::DUPM_ZI) | ||
|
||
| .addReg(Reg) | ||
| .addImm(Value.getZExtValue()); | ||
| } | ||
|
|
||
| static MCInst loadPPRImmediate(MCRegister Reg, unsigned RegBitWidth, | ||
| const APInt &Value) { | ||
| // For PPR, we typically use PTRUE instruction to set predicate registers | ||
| return MCInstBuilder(AArch64::PTRUE_B) | ||
| .addReg(Reg) | ||
| .addImm(31); // All lanes true for 16 bits | ||
| } | ||
|
|
||
| // Fetch base-instruction to load an FP immediate value into a register. | ||
| static unsigned getLoadFPImmediateOpcode(unsigned RegBitWidth) { | ||
| switch (RegBitWidth) { | ||
| case 64: | ||
davemgreen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| return AArch64::FMOVDi; | ||
| case 128: | ||
| return AArch64::MOVIv2d_ns; | ||
| } | ||
| llvm_unreachable("Invalid Value Width"); | ||
| } | ||
|
|
||
| // Generates instruction to load an FP immediate value into a register. | ||
| static MCInst loadFPImmediate(MCRegister Reg, unsigned RegBitWidth, | ||
| const APInt &Value) { | ||
| // -31 <= Value.getZExtValue() <= 31 | ||
| assert(Value.getZExtValue() <= 31 && | ||
|
||
| "Value must be in the range of the immediate opcode"); | ||
| return MCInstBuilder(getLoadFPImmediateOpcode(RegBitWidth)) | ||
| .addReg(Reg) | ||
| .addImm(Value.getZExtValue()); | ||
| } | ||
|
|
||
| // Generates instruction to load an FP128 immediate value into a register. | ||
| static MCInst loadFP128Immediate(MCRegister Reg, unsigned RegBitWidth, | ||
| const APInt &Value) { | ||
| // 0 <= Value.getZExtValue() < 2**8 | ||
lakshayk-nv marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| assert(Value.getZExtValue() < (1 << 8) && | ||
lakshayk-nv marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| "Value must be in the range of the immediate opcode"); | ||
| return MCInstBuilder(getLoadFPImmediateOpcode(RegBitWidth)) | ||
davemgreen marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| .addReg(Reg) | ||
| .addImm(Value.getZExtValue()); | ||
| } | ||
|
|
||
| #include "AArch64GenExegesis.inc" | ||
|
|
||
| namespace { | ||
|
|
@@ -51,6 +104,15 @@ class ExegesisAArch64Target : public ExegesisTarget { | |
| return {loadImmediate(Reg, 32, Value)}; | ||
| if (AArch64::GPR64RegClass.contains(Reg)) | ||
| return {loadImmediate(Reg, 64, Value)}; | ||
| if (AArch64::PPRRegClass.contains(Reg)) | ||
| return {loadPPRImmediate(Reg, 16, Value)}; | ||
| if (AArch64::FPR64RegClass.contains(Reg)) | ||
| return {loadFPImmediate(Reg, 64, Value)}; | ||
| if (AArch64::FPR128RegClass.contains(Reg)) | ||
| return {loadFP128Immediate(Reg, 128, Value)}; | ||
| if (AArch64::ZPRRegClass.contains(Reg)) | ||
| return {loadZPRImmediate(Reg, 128, Value)}; | ||
|
|
||
| errs() << "setRegTo is not implemented, results will be unreliable\n"; | ||
| return {}; | ||
| } | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a nitpick, but I think we are testing the same things with:
and
So I think we can just test one thing, and the best thing to check is the assembly.
I also don't know what this means or is testing:
Here's my suggestion for this test just to clean this up, and with a bit of extra formatting and spacing to make it slightly more easy to read (in my opinion):
Can you give the other tests below a similar treatment?
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Cleaned up format in testfile. Only checking required assembly checks.
No needed, Removed.
Just for reference, this checks for assembly snippet to end with return statement (
C0035FD6is return instruction for aarch64) i.e "Check that we add ret instr to snippet" as one of the exegesis existing test case used, took inspiration from there.and
They check the same things using different commands. Thus removed.
Thanks!