Skip to content
Open
48 changes: 48 additions & 0 deletions llvm/docs/LangRef.rst
Original file line number Diff line number Diff line change
Expand Up @@ -18155,6 +18155,54 @@ Example:
%r = call i8 @llvm.fshr.i8(i8 15, i8 15, i8 11) ; %r = i8: 225 (0b11100001)
%r = call i8 @llvm.fshr.i8(i8 0, i8 255, i8 8) ; %r = i8: 255 (0b11111111)

.. clmul:

'``clmul.*``' Intrinsic
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Syntax
"""""""

This is an overloaded intrinsic. You can use ``llvm.clmul``
on any integer bit width or vectors of integers.

::

declare i16 @llvm.clmul.i16(i16 %a, i16 %b)
declare i32 @llvm.clmul.i32(i32 %a, i32 %b)
declare i64 @llvm.clmul.i64(i64 %a, i64 %b)
declare <4 x i32> @llvm.clmul.v4i32(<4 x i32> %a, <4 x i32> %b)

Overview
"""""""""

The '``llvm.clmul``' family of intrinsic functions performs carryless multiplication
(also known as xor multiplication) on the 2 arguments.

Arguments
""""""""""

The arguments (%a and %b) and the result may be of integer types of any bit
width, but they must have the same bit width. ``%a`` and ``%b`` are the two
values that will undergo carryless multiplication.

Semantics:
""""""""""

The ‘llvm.clmul’ intrinsic computes carryless multiply of ``%a`` and ``%b``, which is the result
of applying the standard multiplication algorithm if you replace all of the additions with exclusive ors.
The vector intrinsics, such as llvm.clmul.v4i32, operate on a per-element basis and the element order is not affected.

Examples
"""""""""

.. code-block:: llvm

%res = call i4 @llvm.clmul.i4(i4 1, i4 2) ; %res = 2
%res = call i4 @llvm.clmul.i4(i4 5, i4 6) ; %res = 14
%res = call i4 @llvm.clmul.i4(i4 -4, i4 2) ; %res = -8
%res = call i4 @llvm.clmul.i4(i4 -4, i4 -5) ; %res = -12

Arithmetic with Overflow Intrinsics
-----------------------------------

Expand Down
3 changes: 3 additions & 0 deletions llvm/include/llvm/CodeGen/ISDOpcodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -760,6 +760,9 @@ enum NodeType {
ROTR,
FSHL,
FSHR,

/// Carryless multiplication operator
CLMUL,

/// Byte Swap and Counting operators.
BSWAP,
Expand Down
5 changes: 5 additions & 0 deletions llvm/include/llvm/CodeGen/TargetLowering.h
Original file line number Diff line number Diff line change
Expand Up @@ -5400,6 +5400,11 @@ class LLVM_ABI TargetLowering : public TargetLoweringBase {
/// \returns The expansion if successful, SDValue() otherwise
SDValue expandFunnelShift(SDNode *N, SelectionDAG &DAG) const;

/// Expand carryless multiply.
/// \param N Node to expand
/// \returns The expansion if successful, SDValue() otherwise
SDValue expandCLMUL(SDNode *N, SelectionDAG &DAG) const;

/// Expand rotations.
/// \param N Node to expand
/// \param AllowVectorOps expand vector rotate, this should only be performed
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/IR/Intrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -1427,6 +1427,8 @@ let IntrProperties = [IntrNoMem, IntrSpeculatable] in {
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_fshr : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>]>;
def int_clmul : DefaultAttrsIntrinsic<[llvm_anyint_ty],
[LLVMMatchType<0>, LLVMMatchType<0>]>;
}

let IntrProperties = [IntrNoMem, IntrSpeculatable,
Expand Down
2 changes: 2 additions & 0 deletions llvm/include/llvm/Target/TargetSelectionDAG.td
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,8 @@ def sra_parts : SDNode<"ISD::SRA_PARTS" , SDTIntShiftPairOp>;
def srl_parts : SDNode<"ISD::SRL_PARTS" , SDTIntShiftPairOp>;
def fshl : SDNode<"ISD::FSHL" , SDTIntShiftDOp>;
def fshr : SDNode<"ISD::FSHR" , SDTIntShiftDOp>;
def clmul : SDNode<"ISD::CLMUL" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
def and : SDNode<"ISD::AND" , SDTIntBinOp,
[SDNPCommutative, SDNPAssociative]>;
def or : SDNode<"ISD::OR" , SDTIntBinOp,
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4017,6 +4017,9 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
if (SDValue Expanded = TLI.expandFunnelShift(Node, DAG))
Results.push_back(Expanded);
break;
case ISD::CLMUL:
Results.push_back(TLI.expandCLMUL(Node, DAG));
break;
case ISD::ROTL:
case ISD::ROTR:
if (SDValue Expanded = TLI.expandROT(Node, true /*AllowVectorOps*/, DAG))
Expand Down
38 changes: 37 additions & 1 deletion llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -209,7 +209,8 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::VP_XOR:
case ISD::VP_ADD:
case ISD::VP_SUB:
case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
case ISD::VP_MUL:
case ISD::CLMUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;

case ISD::ABDS:
case ISD::AVGCEILS:
Expand Down Expand Up @@ -3140,6 +3141,10 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
ExpandIntRes_FunnelShift(N, Lo, Hi);
break;

case ISD::CLMUL:
ExpandIntRes_CLMUL(N, Lo, Hi);
break;

case ISD::VSCALE:
ExpandIntRes_VSCALE(N, Lo, Hi);
break;
Expand Down Expand Up @@ -5476,6 +5481,37 @@ void DAGTypeLegalizer::ExpandIntRes_FunnelShift(SDNode *N, SDValue &Lo,
Hi = DAG.getNode(Opc, DL, HalfVT, Select3, Select2, NewShAmt);
}

void DAGTypeLegalizer::ExpandIntRes_CLMUL(SDNode *N, SDValue &Lo,
SDValue &Hi) {
// Values numbered from least significant to most significant.
SDValue LL, LH, RL, RH;
GetExpandedInteger(N->getOperand(0), LL, LH);
GetExpandedInteger(N->getOperand(1), RL, RH);
EVT HalfVT = LL.getValueType();
SDLoc DL(N);

// Lo is computed from the low half
Lo = DAG.getNode(ISD::CLMUL, DL, HalfVT, LL, RL);
// CLMUL is carryless so the high bits not included in CLMUL(A,B)
// can be computed by
// BITREVERSE(CLMUL(BITREVERSE(A), BITREVERSE(B))) >> 1
// Therefore we can compute the 2 hi/lo cross products
// and the the overflow of the low product
// and xor them together to compute HI
// TODO: if the target supports a widening CLMUL or a CLMULH we should probably use that
SDValue BitRevLL = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, LL);
SDValue BitRevRL = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, RL);
SDValue BitRevLoHi = DAG.getNode(ISD::CLMUL, DL, HalfVT, BitRevLL, BitRevRL);
SDValue LoHi = DAG.getNode(ISD::BITREVERSE, DL, HalfVT, BitRevLoHi);
SDValue One = DAG.getShiftAmountConstant(1, HalfVT, DL);
Hi = DAG.getNode(ISD::SRL, DL, HalfVT, LoHi, One);

SDValue HiTmp = DAG.getNode(ISD::CLMUL, DL, HalfVT, LL, RH);
Hi = DAG.getNode(ISD::XOR, DL, HalfVT, Hi, HiTmp);
HiTmp = DAG.getNode(ISD::CLMUL, DL, HalfVT, LH, RL);
Hi = DAG.getNode(ISD::XOR, DL, HalfVT, Hi, HiTmp);
}

void DAGTypeLegalizer::ExpandIntRes_VSCALE(SDNode *N, SDValue &Lo,
SDValue &Hi) {
EVT VT = N->getValueType(0);
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {

void ExpandIntRes_Rotate (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_FunnelShift (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_CLMUL (SDNode *N, SDValue &Lo, SDValue &Hi);

void ExpandIntRes_VSCALE (SDNode *N, SDValue &Lo, SDValue &Hi);
void ExpandIntRes_READ_REGISTER(SDNode *N, SDValue &Lo, SDValue &Hi);
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMAX:
case ISD::UMIN:
case ISD::UMAX:
case ISD::CLMUL:

case ISD::SADDSAT:
case ISD::UADDSAT:
Expand Down Expand Up @@ -1330,6 +1331,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SMAX: case ISD::VP_SMAX:
case ISD::UMIN: case ISD::VP_UMIN:
case ISD::UMAX: case ISD::VP_UMAX:
case ISD::CLMUL:
case ISD::SADDSAT: case ISD::VP_SADDSAT:
case ISD::UADDSAT: case ISD::VP_UADDSAT:
case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
Expand Down Expand Up @@ -4764,6 +4766,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::SSUBSAT: case ISD::VP_SSUBSAT:
case ISD::SSHLSAT:
case ISD::USHLSAT:
case ISD::CLMUL:
case ISD::ROTL:
case ISD::ROTR:
case ISD::AVGFLOORS:
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7521,6 +7521,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
case ISD::SSUBSAT:
case ISD::UADDSAT:
case ISD::USUBSAT:
case ISD::CLMUL:
assert(VT.isInteger() && "This operator does not apply to FP types!");
assert(N1.getValueType() == N2.getValueType() &&
N1.getValueType() == VT && "Binary operator types must match!");
Expand Down
6 changes: 6 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7228,6 +7228,12 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
}
return;
}
case Intrinsic::clmul: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
setValue(&I, DAG.getNode(ISD::CLMUL, sdl, Op1.getValueType(), Op1, Op2));
return;
}
case Intrinsic::sadd_sat: {
SDValue Op1 = getValue(I.getArgOperand(0));
SDValue Op2 = getValue(I.getArgOperand(1));
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,6 +299,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
case ISD::ROTR: return "rotr";
case ISD::FSHL: return "fshl";
case ISD::FSHR: return "fshr";
case ISD::CLMUL: return "clmul";
case ISD::FADD: return "fadd";
case ISD::STRICT_FADD: return "strict_fadd";
case ISD::FSUB: return "fsub";
Expand Down
38 changes: 38 additions & 0 deletions llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8277,6 +8277,44 @@ SDValue TargetLowering::expandFunnelShift(SDNode *Node,
return DAG.getNode(ISD::OR, DL, VT, ShX, ShY);
}

SDValue TargetLowering::expandCLMUL(SDNode *Node,
SelectionDAG &DAG) const {
SDLoc DL(Node);
EVT VT = Node->getValueType(0);
SDValue V1 = Node->getOperand(0);
SDValue V2 = Node->getOperand(1);
unsigned NumBitsPerElt = VT.getScalarSizeInBits();

EVT SetCCType =
getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
// Only expand vector types if we have the appropriate vector bit operations.
// FIXME: Should really try to split the vector in case it's legal on a
// subvector.
if (VT.isVector() && (!isPowerOf2_32(NumBitsPerElt) ||
(!isOperationLegalOrCustom(ISD::SRL, VT) ||
!isOperationLegalOrCustom(ISD::SHL, VT) ||
!isOperationLegalOrCustom(ISD::XOR, VT) ||
!isOperationLegalOrCustom(ISD::AND, VT) ||
!isOperationLegalOrCustom(ISD::SELECT, VT))))
return DAG.UnrollVectorOp(Node);

SDValue Res = DAG.getConstant(0, DL, VT);
SDValue Zero = DAG.getConstant(0, DL, VT);
SDValue One = DAG.getConstant(1, DL, VT);
SDValue OneForShift = DAG.getShiftAmountConstant(1, VT, DL);
for (unsigned I = 0; I < NumBitsPerElt; ++I) {
SDValue LowBit = DAG.getNode(ISD::AND, DL, VT, V1, One);
SDValue LowBool = DAG.getSetCC(DL, SetCCType, LowBit, Zero, ISD::SETNE);
SDValue Pred = DAG.getNode(ISD::SELECT, DL, VT, LowBool, V2, Zero);
Res = DAG.getNode(ISD::XOR, DL, VT, Res, Pred);
if (I != NumBitsPerElt - 1) {
V1 = DAG.getNode(ISD::SRL, DL, VT, V1, OneForShift);
V2 = DAG.getNode(ISD::SHL, DL, VT, V2, OneForShift);
}
}
return Res;
}

// TODO: Merge with expandFunnelShift.
SDValue TargetLowering::expandROT(SDNode *Node, bool AllowVectorOps,
SelectionDAG &DAG) const {
Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/CodeGen/TargetLoweringBase.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -844,6 +844,9 @@ void TargetLoweringBase::initActions() {
// Absolute difference
setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Expand);

// Carryless multiply
setOperationAction(ISD::CLMUL, VT, Expand);

// Saturated trunc
setOperationAction(ISD::TRUNCATE_SSAT_S, VT, Expand);
setOperationAction(ISD::TRUNCATE_SSAT_U, VT, Expand);
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Target/RISCV/RISCVISelLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,6 +402,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
Legal);
}

if (Subtarget.hasStdExtZbc() || Subtarget.hasStdExtZbkc()) {
setOperationAction(ISD::CLMUL, XLenVT, Legal);
}

if (Subtarget.hasStdExtZbb() ||
(Subtarget.hasVendorXCVbitmanip() && !Subtarget.is64Bit())) {
if (Subtarget.is64Bit())
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Target/RISCV/RISCVInstrInfoZb.td
Original file line number Diff line number Diff line change
Expand Up @@ -799,6 +799,7 @@ def : Sh3AddPat<SH3ADD>;
} // Predicates = [HasStdExtZba, IsRV64]

let Predicates = [HasStdExtZbcOrZbkc] in {
def : PatGprGpr<clmul, CLMUL>;
def : PatGprGpr<riscv_clmul, CLMUL>;
def : PatGprGpr<riscv_clmulh, CLMULH>;
} // Predicates = [HasStdExtZbcOrZbkc]
Expand Down
Loading