Skip to content

Commit 8e7767f

Browse files
vladimirradosavljevicakiramenai
authored andcommitted
[EVM] Add support for CLZ instruction
CLZ will be introduced in the fusaka update: https://eips.ethereum.org/EIPS/eip-7939 Beside adding support for CLZ instruction in this patch, support for EVM versioning is added, so CLZ instruction is generated for fusaka and later versions. isCheapToSpeculateCttz and isCheapToSpeculateCtlz are implemented so CGP doesn't generate check for zero to avoid calling the intrinsic in despeculateCountZeros. CTZ is generated via CLZ, and currently LLVM is generating: `256 - clz(~x & (x-1))` If x != 0 is proven, this can be optimized to: `255 - clz(x & -x)` (#918) Signed-off-by: Vladimir Radosavljevic <[email protected]>
1 parent 38bed48 commit 8e7767f

File tree

12 files changed

+284
-189
lines changed

12 files changed

+284
-189
lines changed

llvm/lib/Target/EVM/Disassembler/EVMDisassembler.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
//===----------------------------------------------------------------------===//
1515

1616
#include "MCTargetDesc/EVMMCExpr.h"
17+
#include "MCTargetDesc/EVMMCTargetDesc.h"
1718
#include "TargetInfo/EVMTargetInfo.h"
1819
#include "llvm/ADT/StringExtras.h"
1920
#include "llvm/MC/MCContext.h"

llvm/lib/Target/EVM/EVM.td

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,13 @@
1616

1717
include "llvm/Target/Target.td"
1818

19+
//===----------------------------------------------------------------------===//
20+
// EVM Versions
21+
//===----------------------------------------------------------------------===//
22+
23+
def FeatureFusaka : SubtargetFeature<"fusaka", "Version", "EVMVersion::Fusaka",
24+
"Fusaka release">;
25+
def HasCLZ : Predicate<"Subtarget->hasCLZ()">;
1926

2027
//===----------------------------------------------------------------------===//
2128
// Register File Description

llvm/lib/Target/EVM/EVMISelDAGToDAG.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,17 @@ using namespace llvm;
2626

2727
namespace {
2828
class EVMDAGToDAGISel final : public SelectionDAGISel {
29+
const EVMSubtarget *Subtarget = nullptr;
30+
2931
public:
3032
EVMDAGToDAGISel(EVMTargetMachine &TM, CodeGenOptLevel OptLevel)
3133
: SelectionDAGISel(TM, OptLevel) {}
3234

35+
bool runOnMachineFunction(MachineFunction &MF) override {
36+
Subtarget = &MF.getSubtarget<EVMSubtarget>();
37+
return SelectionDAGISel::runOnMachineFunction(MF);
38+
}
39+
3340
private:
3441

3542
// Include the pieces autogenerated from the target description.

llvm/lib/Target/EVM/EVMISelLowering.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,9 @@ EVMTargetLowering::EVMTargetLowering(const TargetMachine &TM,
5454
ISD::FrameIndex},
5555
MVT::i256, Legal);
5656

57+
if (Subtarget->hasCLZ())
58+
setOperationAction(ISD::CTLZ, MVT::i256, Legal);
59+
5760
for (auto CC : {ISD::SETULT, ISD::SETUGT, ISD::SETLT, ISD::SETGT, ISD::SETGE,
5861
ISD::SETUGE, ISD::SETLE, ISD::SETULE, ISD::SETEQ, ISD::SETNE})
5962
setCondCodeAction(CC, MVT::i256, Legal);
@@ -129,6 +132,14 @@ bool EVMTargetLowering::isLegalAddressingMode(const DataLayout &DL,
129132
return true;
130133
}
131134

135+
bool EVMTargetLowering::isCheapToSpeculateCtlz(Type *Ty) const {
136+
return Subtarget->hasCLZ();
137+
}
138+
139+
bool EVMTargetLowering::isCheapToSpeculateCttz(Type *Ty) const {
140+
return Subtarget->hasCLZ();
141+
}
142+
132143
//===----------------------------------------------------------------------===//
133144
// EVM Lowering private implementation.
134145
//===----------------------------------------------------------------------===//

llvm/lib/Target/EVM/EVMISelLowering.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,10 @@ class EVMTargetLowering final : public TargetLowering {
4949
return MVT::i256;
5050
}
5151

52+
bool isCheapToSpeculateCttz(Type *Ty) const override;
53+
54+
bool isCheapToSpeculateCtlz(Type *Ty) const override;
55+
5256
/// Return true if it is profitable to move this shift by a constant amount
5357
/// through its operand, adjusting any immediate operands as necessary to
5458
/// preserve semantics. This transformation may not be desirable if it

llvm/lib/Target/EVM/EVMInstrInfo.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,11 @@ defm NOT
357357
: I<(outs GPR:$dst), (ins GPR:$src), [(set GPR:$dst, (not GPR:$src))],
358358
"NOT", "$dst, $src", 0x19, 3>;
359359

360+
let Predicates = [HasCLZ] in
361+
defm CLZ
362+
: I<(outs GPR:$dst), (ins GPR:$src), [(set GPR:$dst, (ctlz GPR:$src))],
363+
"CLZ", "$dst, $src", 0x1e, 5>;
364+
360365
let mayLoad = 1 in
361366
defm KECCAK256
362367
: I<(outs GPR:$dst), (ins GPR:$offset, GPR:$size),

llvm/lib/Target/EVM/EVMSubtarget.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,10 +18,18 @@ using namespace llvm;
1818

1919
#define GET_SUBTARGETINFO_TARGET_DESC
2020
#define GET_SUBTARGETINFO_CTOR
21+
#define GET_SUBTARGETINFO_ENUM
2122
#include "EVMGenSubtargetInfo.inc"
2223

24+
EVMSubtarget &EVMSubtarget::initializeSubtargetDependencies(StringRef CPU,
25+
StringRef FS) {
26+
ParseSubtargetFeatures(CPU, /*TuneCPU*/ CPU, FS);
27+
return *this;
28+
}
29+
2330
EVMSubtarget::EVMSubtarget(const Triple &TT, const std::string &CPU,
2431
const std::string &FS, const TargetMachine &TM)
25-
: EVMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS), TLInfo(TM, *this) {}
32+
: EVMGenSubtargetInfo(TT, CPU, /*TuneCPU*/ CPU, FS),
33+
TLInfo(TM, initializeSubtargetDependencies(CPU, FS)) {}
2634

2735
bool EVMSubtarget::useAA() const { return true; }

llvm/lib/Target/EVM/EVMSubtarget.h

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
#include "EVMRegisterInfo.h"
2020
#include "llvm/CodeGen/TargetSubtargetInfo.h"
2121

22-
#define GET_SUBTARGETINFO_ENUM
2322
#define GET_SUBTARGETINFO_HEADER
2423
#include "EVMGenSubtargetInfo.inc"
2524

@@ -28,10 +27,18 @@ class StringRef;
2827

2928
class EVMSubtarget final : public EVMGenSubtargetInfo {
3029
private:
30+
enum class EVMVersion : uint8_t {
31+
Generic = 0, // Older versions of the EVM.
32+
Fusaka,
33+
};
34+
EVMVersion Version = EVMVersion::Generic;
35+
3136
EVMFrameLowering FrameLowering;
3237
EVMInstrInfo InstrInfo;
3338
EVMTargetLowering TLInfo;
3439

40+
EVMSubtarget &initializeSubtargetDependencies(StringRef CPU, StringRef FS);
41+
3542
public:
3643
// This constructor initializes the data members to match that
3744
// of the specified triple.
@@ -57,6 +64,8 @@ class EVMSubtarget final : public EVMGenSubtargetInfo {
5764
bool useAA() const override;
5865

5966
unsigned stackDepthLimit() const { return 16; }
67+
68+
bool hasCLZ() const { return Version >= EVMVersion::Fusaka; }
6069
};
6170
} // namespace llvm
6271

llvm/lib/Target/EVM/MCTargetDesc/EVMMCTargetDesc.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,4 +65,7 @@ std::string getImmutableId(StringRef Name);
6565
#define GET_INSTRINFO_MC_HELPER_DECLS
6666
#include "EVMGenInstrInfo.inc"
6767

68+
#define GET_SUBTARGETINFO_ENUM
69+
#include "EVMGenSubtargetInfo.inc"
70+
6871
#endif // LLVM_LIB_TARGET_EVM_MCTARGETDESC_EVMMCTARGETDESC_H

llvm/test/CodeGen/EVM/bitmanipulation-intrinsics.ll

Lines changed: 0 additions & 187 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ target triple = "evm"
77
declare i256 @llvm.bitreverse.i256(i256)
88
declare i256 @llvm.bswap.i256(i256)
99
declare i256 @llvm.ctpop.i256(i256)
10-
declare i256 @llvm.ctlz.i256(i256, i1)
11-
declare i256 @llvm.cttz.i256(i256, i1)
1210

1311
define i256 @bitreversetest(i256 %v) {
1412
; CHECK-LABEL: bitreversetest:
@@ -583,188 +581,3 @@ define i256 @ctpoptest(i256 %v) {
583581
%res = call i256 @llvm.ctpop.i256(i256 %v)
584582
ret i256 %res
585583
}
586-
587-
define i256 @ctlztest(i256 %v) {
588-
; CHECK-LABEL: ctlztest:
589-
; CHECK: ; %bb.0:
590-
; CHECK-NEXT: JUMPDEST
591-
; CHECK-NEXT: PUSH32 0x101010101010101010101010101010101010101010101010101010101010101
592-
; CHECK-NEXT: PUSH16 0xF0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F
593-
; CHECK-NEXT: DUP2
594-
; CHECK-NEXT: DUP2
595-
; CHECK-NEXT: PUSH32 0x3333333333333333333333333333333333333333333333333333333333333333
596-
; CHECK-NEXT: DUP6
597-
; CHECK-NEXT: PUSH16 0x55555555555555555555555555555555
598-
; CHECK-NEXT: SWAP7
599-
; CHECK-NEXT: PUSH1 0x1
600-
; CHECK-NEXT: SHR
601-
; CHECK-NEXT: OR
602-
; CHECK-NEXT: DUP1
603-
; CHECK-NEXT: PUSH1 0x2
604-
; CHECK-NEXT: SHR
605-
; CHECK-NEXT: OR
606-
; CHECK-NEXT: DUP1
607-
; CHECK-NEXT: PUSH1 0x4
608-
; CHECK-NEXT: SHR
609-
; CHECK-NEXT: OR
610-
; CHECK-NEXT: DUP1
611-
; CHECK-NEXT: PUSH1 0x8
612-
; CHECK-NEXT: SHR
613-
; CHECK-NEXT: OR
614-
; CHECK-NEXT: DUP1
615-
; CHECK-NEXT: PUSH1 0x10
616-
; CHECK-NEXT: SHR
617-
; CHECK-NEXT: OR
618-
; CHECK-NEXT: DUP1
619-
; CHECK-NEXT: PUSH1 0x20
620-
; CHECK-NEXT: SHR
621-
; CHECK-NEXT: OR
622-
; CHECK-NEXT: DUP1
623-
; CHECK-NEXT: PUSH1 0x40
624-
; CHECK-NEXT: SHR
625-
; CHECK-NEXT: OR
626-
; CHECK-NEXT: DUP1
627-
; CHECK-NEXT: PUSH1 0x80
628-
; CHECK-NEXT: SHR
629-
; CHECK-NEXT: OR
630-
; CHECK-NEXT: NOT
631-
; CHECK-NEXT: PUSH16 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
632-
; CHECK-NEXT: DUP8
633-
; CHECK-NEXT: DUP3
634-
; CHECK-NEXT: PUSH1 0x81
635-
; CHECK-NEXT: SHR
636-
; CHECK-NEXT: AND
637-
; CHECK-NEXT: DUP3
638-
; CHECK-NEXT: PUSH1 0x80
639-
; CHECK-NEXT: SHR
640-
; CHECK-NEXT: SUB
641-
; CHECK-NEXT: DUP4
642-
; CHECK-NEXT: DUP1
643-
; CHECK-NEXT: DUP3
644-
; CHECK-NEXT: PUSH1 0x2
645-
; CHECK-NEXT: SHR
646-
; CHECK-NEXT: AND
647-
; CHECK-NEXT: SWAP2
648-
; CHECK-NEXT: AND
649-
; CHECK-NEXT: ADD
650-
; CHECK-NEXT: SWAP8
651-
; CHECK-NEXT: DUP3
652-
; CHECK-NEXT: PUSH1 0x1
653-
; CHECK-NEXT: SHR
654-
; CHECK-NEXT: AND
655-
; CHECK-NEXT: SWAP2
656-
; CHECK-NEXT: AND
657-
; CHECK-NEXT: SUB
658-
; CHECK-NEXT: SWAP1
659-
; CHECK-NEXT: DUP1
660-
; CHECK-NEXT: DUP3
661-
; CHECK-NEXT: PUSH1 0x2
662-
; CHECK-NEXT: SHR
663-
; CHECK-NEXT: AND
664-
; CHECK-NEXT: SWAP2
665-
; CHECK-NEXT: AND
666-
; CHECK-NEXT: ADD
667-
; CHECK-NEXT: SWAP5
668-
; CHECK-NEXT: DUP1
669-
; CHECK-NEXT: PUSH1 0x4
670-
; CHECK-NEXT: SHR
671-
; CHECK-NEXT: ADD
672-
; CHECK-NEXT: AND
673-
; CHECK-NEXT: MUL
674-
; CHECK-NEXT: PUSH1 0x10
675-
; CHECK-NEXT: BYTE
676-
; CHECK-NEXT: SWAP3
677-
; CHECK-NEXT: DUP1
678-
; CHECK-NEXT: PUSH1 0x4
679-
; CHECK-NEXT: SHR
680-
; CHECK-NEXT: ADD
681-
; CHECK-NEXT: AND
682-
; CHECK-NEXT: MUL
683-
; CHECK-NEXT: PUSH1 0x10
684-
; CHECK-NEXT: BYTE
685-
; CHECK-NEXT: ADD
686-
; CHECK-NEXT: SWAP1
687-
; CHECK-NEXT: JUMP
688-
689-
%res = call i256 @llvm.ctlz.i256(i256 %v, i1 false)
690-
ret i256 %res
691-
}
692-
693-
define i256 @cttztest(i256 %v) {
694-
; CHECK-LABEL: cttztest:
695-
; CHECK: ; %bb.0:
696-
; CHECK-NEXT: JUMPDEST
697-
; CHECK-NEXT: PUSH32 0x101010101010101010101010101010101010101010101010101010101010101
698-
; CHECK-NEXT: PUSH16 0xF0F0F0F0F0F0F0F0F0F0F0F0F0F0F0F
699-
; CHECK-NEXT: DUP2
700-
; CHECK-NEXT: DUP2
701-
; CHECK-NEXT: PUSH32 0x3333333333333333333333333333333333333333333333333333333333333333
702-
; CHECK-NEXT: DUP6
703-
; CHECK-NEXT: PUSH1 0x1
704-
; CHECK-NEXT: PUSH16 0x55555555555555555555555555555555
705-
; CHECK-NEXT: SWAP8
706-
; CHECK-NEXT: SUB
707-
; CHECK-NEXT: SWAP1
708-
; CHECK-NEXT: NOT
709-
; CHECK-NEXT: AND
710-
; CHECK-NEXT: PUSH16 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF
711-
; CHECK-NEXT: DUP8
712-
; CHECK-NEXT: DUP3
713-
; CHECK-NEXT: PUSH1 0x81
714-
; CHECK-NEXT: SHR
715-
; CHECK-NEXT: AND
716-
; CHECK-NEXT: DUP3
717-
; CHECK-NEXT: PUSH1 0x80
718-
; CHECK-NEXT: SHR
719-
; CHECK-NEXT: SUB
720-
; CHECK-NEXT: DUP4
721-
; CHECK-NEXT: DUP1
722-
; CHECK-NEXT: DUP3
723-
; CHECK-NEXT: PUSH1 0x2
724-
; CHECK-NEXT: SHR
725-
; CHECK-NEXT: AND
726-
; CHECK-NEXT: SWAP2
727-
; CHECK-NEXT: AND
728-
; CHECK-NEXT: ADD
729-
; CHECK-NEXT: SWAP8
730-
; CHECK-NEXT: DUP3
731-
; CHECK-NEXT: PUSH1 0x1
732-
; CHECK-NEXT: SHR
733-
; CHECK-NEXT: AND
734-
; CHECK-NEXT: SWAP2
735-
; CHECK-NEXT: AND
736-
; CHECK-NEXT: SUB
737-
; CHECK-NEXT: SWAP1
738-
; CHECK-NEXT: DUP1
739-
; CHECK-NEXT: DUP3
740-
; CHECK-NEXT: PUSH1 0x2
741-
; CHECK-NEXT: SHR
742-
; CHECK-NEXT: AND
743-
; CHECK-NEXT: SWAP2
744-
; CHECK-NEXT: AND
745-
; CHECK-NEXT: ADD
746-
; CHECK-NEXT: SWAP5
747-
; CHECK-NEXT: DUP1
748-
; CHECK-NEXT: PUSH1 0x4
749-
; CHECK-NEXT: SHR
750-
; CHECK-NEXT: ADD
751-
; CHECK-NEXT: AND
752-
; CHECK-NEXT: MUL
753-
; CHECK-NEXT: PUSH1 0x10
754-
; CHECK-NEXT: BYTE
755-
; CHECK-NEXT: SWAP3
756-
; CHECK-NEXT: DUP1
757-
; CHECK-NEXT: PUSH1 0x4
758-
; CHECK-NEXT: SHR
759-
; CHECK-NEXT: ADD
760-
; CHECK-NEXT: AND
761-
; CHECK-NEXT: MUL
762-
; CHECK-NEXT: PUSH1 0x10
763-
; CHECK-NEXT: BYTE
764-
; CHECK-NEXT: ADD
765-
; CHECK-NEXT: SWAP1
766-
; CHECK-NEXT: JUMP
767-
768-
%res = call i256 @llvm.cttz.i256(i256 %v, i1 false)
769-
ret i256 %res
770-
}

0 commit comments

Comments
 (0)