Skip to content

Commit abf18ac

Browse files
committed
address comments
1 parent e4e6b69 commit abf18ac

File tree

6 files changed

+128
-177
lines changed

6 files changed

+128
-177
lines changed

llvm/docs/NVPTXUsage.rst

Lines changed: 15 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -568,98 +568,35 @@ to left-shift the found bit into the most-significant bit position, otherwise
568568
the result is the shift amount needed to right-shift the found bit into the
569569
least-significant bit position. 0xffffffff is returned if no 1 bit is found.
570570

571-
'``llvm.nvvm.zext.inreg.clamp``' Intrinsic
572-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
573-
574-
Syntax:
575-
"""""""
576-
577-
.. code-block:: llvm
578-
579-
declare i32 @llvm.nvvm.zext.inreg.clamp(i32 %a, i32 %b)
580-
581-
Overview:
582-
"""""""""
583-
584-
The '``llvm.nvvm.zext.inreg.clamp``' intrinsic extracts the low bits of the
585-
input value, and zero-extends them back to the original width.
586-
587-
Semantics:
588-
""""""""""
589-
590-
The '``llvm.nvvm.zext.inreg.clamp``' returns the zero-extension of N lowest bits
591-
of operand %a. N is the value of operand %b clamped to the range [0, 32]. If N
592-
is 0, the result is 0.
593-
594-
'``llvm.nvvm.zext.inreg.wrap``' Intrinsic
595-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
596-
597-
Syntax:
598-
"""""""
599-
600-
.. code-block:: llvm
601-
602-
declare i32 @llvm.nvvm.zext.inreg.wrap(i32 %a, i32 %b)
603-
604-
Overview:
605-
"""""""""
606-
607-
The '``llvm.nvvm.zext.inreg.wrap``' intrinsic extracts the low bits of the
608-
input value, and zero-extends them back to the original width.
609-
610-
Semantics:
611-
""""""""""
612-
613-
The '``llvm.nvvm.zext.inreg.wrap``' returns the zero-extension of N lowest bits
614-
of operand %a. N is the value of operand %b modulo 32. If N is 0, the result
615-
is 0.
616-
617-
'``llvm.nvvm.sext.inreg.clamp``' Intrinsic
618-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
619-
620-
Syntax:
621-
"""""""
622-
623-
.. code-block:: llvm
624-
625-
declare i32 @llvm.nvvm.sext.inreg.clamp(i32 %a, i32 %b)
626-
627-
Overview:
628-
"""""""""
629-
630-
The '``llvm.nvvm.sext.inreg.clamp``' intrinsic extracts the low bits of the
631-
input value, and sign-extends them back to the original width.
632-
633-
Semantics:
634-
""""""""""
635-
636-
The '``llvm.nvvm.sext.inreg.clamp``' returns the sign-extension of N lowest bits
637-
of operand %a. N is the value of operand %b clamped to the range [0, 32]. If N
638-
is 0, the result is 0.
639-
640-
641-
'``llvm.nvvm.sext.inreg.wrap``' Intrinsic
642-
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
571+
'``llvm.nvvm.{zext,sext}.{wrap,clamp}``' Intrinsics
572+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
643573

644574
Syntax:
645575
"""""""
646576

647577
.. code-block:: llvm
648578
649-
declare i32 @llvm.nvvm.sext.inreg.wrap(i32 %a, i32 %b)
579+
declare i32 @llvm.nvvm.zext.wrap(i32 %a, i32 %b)
580+
declare i32 @llvm.nvvm.zext.clamp(i32 %a, i32 %b)
581+
declare i32 @llvm.nvvm.sext.wrap(i32 %a, i32 %b)
582+
declare i32 @llvm.nvvm.sext.clamp(i32 %a, i32 %b)
650583
651584
Overview:
652585
"""""""""
653586

654-
The '``llvm.nvvm.sext.inreg.wrap``' intrinsic extracts the low bits of the
655-
input value, and sign-extends them back to the original width.
587+
The '``llvm.nvvm.{zext,sext}.{wrap,clamp}``' family of intrinsics extracts the
588+
low bits of the input value, and zero- or sign-extends them back to the original
589+
width.
656590

657591
Semantics:
658592
""""""""""
659593

660-
The '``llvm.nvvm.sext.inreg.wrap``' returns the sign-extension of N lowest bits
661-
of operand %a. N is the value of operand %b modulo 32. If N is 0, the result
662-
is 0.
594+
The '``llvm.nvvm.{zext,sext}.{wrap,clamp}``' family of intrinsics returns
595+
extension of N lowest bits of operand %a. For the '``wrap``' variants, N is the
596+
value of operand %b modulo 32. For the '``clamp``' variants, N is the value of
597+
operand %b clamped to the range [0, 32]. The N lowest bits are then
598+
zero-extended the case of the '``zext``' variants, or sign-extended the case of
599+
the '``sext``' variants. If N is 0, the result is 0.
663600

664601
TMA family of Intrinsics
665602
------------------------

llvm/include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1362,7 +1362,7 @@ let TargetPrefix = "nvvm" in {
13621362
//
13631363
foreach ext = ["sext", "zext"] in
13641364
foreach mode = ["wrap", "clamp"] in
1365-
def int_nvvm_ # ext # _inreg_ # mode :
1365+
def int_nvvm_ # ext # _ # mode :
13661366
DefaultAttrsIntrinsic<[llvm_i32_ty],
13671367
[llvm_i32_ty, llvm_i32_ty],
13681368
[IntrNoMem, IntrSpeculatable]>;

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 20 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,7 @@ class RegTyInfo<ValueType ty, NVPTXRegClass rc, Operand imm, SDNode imm_node,
227227
int Size = ty.Size;
228228
}
229229

230+
def I1RT : RegTyInfo<i1, Int1Regs, i1imm, imm>;
230231
def I16RT : RegTyInfo<i16, Int16Regs, i16imm, imm>;
231232
def I32RT : RegTyInfo<i32, Int32Regs, i32imm, imm>;
232233
def I64RT : RegTyInfo<i64, Int64Regs, i64imm, imm>;
@@ -252,13 +253,13 @@ multiclass I3Inst<string op_str, SDPatternOperator op_node, RegTyInfo t,
252253
def ri :
253254
NVPTXInst<(outs t.RC:$dst), (ins t.RC:$a, t.Imm:$b),
254255
asmstr,
255-
[(set t.Ty:$dst, (op_node t.RC:$a, imm:$b))]>,
256+
[(set t.Ty:$dst, (op_node t.Ty:$a, (t.Ty imm:$b)))]>,
256257
Requires<requires>;
257258
if !not(commutative) then
258259
def ir :
259260
NVPTXInst<(outs t.RC:$dst), (ins t.Imm:$a, t.RC:$b),
260261
asmstr,
261-
[(set t.Ty:$dst, (op_node imm:$a, t.RC:$b))]>,
262+
[(set t.Ty:$dst, (op_node (t.Ty imm:$a), t.Ty:$b))]>,
262263
Requires<requires>;
263264
}
264265

@@ -833,8 +834,8 @@ defm SUB_i1 : ADD_SUB_i1<sub>;
833834

834835
// int16, int32, and int64 signed addition. Since nvptx is 2's complement, we
835836
// also use these for unsigned arithmetic.
836-
defm ADD : I3<"add.s", add, /*commutative=*/ true>;
837-
defm SUB : I3<"sub.s", sub, /*commutative=*/ false>;
837+
defm ADD : I3<"add.s", add, commutative = true>;
838+
defm SUB : I3<"sub.s", sub, commutative = false>;
838839

839840
def ADD16x2 : I16x2<"add.s", add>;
840841

@@ -846,18 +847,18 @@ defm SUBCC : ADD_SUB_INT_CARRY<"sub.cc", subc, commutative = false>;
846847
defm ADDCCC : ADD_SUB_INT_CARRY<"addc.cc", adde, commutative = true>;
847848
defm SUBCCC : ADD_SUB_INT_CARRY<"subc.cc", sube, commutative = false>;
848849

849-
defm MULT : I3<"mul.lo.s", mul, /*commutative=*/ true>;
850+
defm MULT : I3<"mul.lo.s", mul, commutative = true>;
850851

851-
defm MULTHS : I3<"mul.hi.s", mulhs, /*commutative=*/ true>;
852-
defm MULTHU : I3<"mul.hi.u", mulhu, /*commutative=*/ true>;
852+
defm MULTHS : I3<"mul.hi.s", mulhs, commutative = true>;
853+
defm MULTHU : I3<"mul.hi.u", mulhu, commutative = true>;
853854

854-
defm SDIV : I3<"div.s", sdiv, /*commutative=*/ false>;
855-
defm UDIV : I3<"div.u", udiv, /*commutative=*/ false>;
855+
defm SDIV : I3<"div.s", sdiv, commutative = false>;
856+
defm UDIV : I3<"div.u", udiv, commutative = false>;
856857

857858
// The ri versions of rem.s and rem.u won't be selected; DAGCombiner::visitSREM
858859
// will lower it.
859-
defm SREM : I3<"rem.s", srem, /*commutative=*/ false>;
860-
defm UREM : I3<"rem.u", urem, /*commutative=*/ false>;
860+
defm SREM : I3<"rem.s", srem, commutative = false>;
861+
defm UREM : I3<"rem.u", urem, commutative = false>;
861862

862863
// Integer absolute value. NumBits should be one minus the bit width of RC.
863864
// This idiom implements the algorithm at
@@ -872,10 +873,10 @@ defm ABS_32 : ABS<i32, Int32Regs, ".s32">;
872873
defm ABS_64 : ABS<i64, Int64Regs, ".s64">;
873874

874875
// Integer min/max.
875-
defm SMAX : I3<"max.s", smax, /*commutative=*/ true>;
876-
defm UMAX : I3<"max.u", umax, /*commutative=*/ true>;
877-
defm SMIN : I3<"min.s", smin, /*commutative=*/ true>;
878-
defm UMIN : I3<"min.u", umin, /*commutative=*/ true>;
876+
defm SMAX : I3<"max.s", smax, commutative = true>;
877+
defm UMAX : I3<"max.u", umax, commutative = true>;
878+
defm SMIN : I3<"min.s", smin, commutative = true>;
879+
defm UMIN : I3<"min.u", umin, commutative = true>;
879880

880881
def SMAX16x2 : I16x2<"max.s", smax>;
881882
def UMAX16x2 : I16x2<"max.u", umax>;
@@ -1385,38 +1386,10 @@ def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
13851386
// Template for three-arg bitwise operations. Takes three args, Creates .b16,
13861387
// .b32, .b64, and .pred (predicate registers -- i.e., i1) versions of OpcStr.
13871388
multiclass BITWISE<string OpcStr, SDNode OpNode> {
1388-
def b1rr :
1389-
NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
1390-
!strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
1391-
[(set i1:$dst, (OpNode i1:$a, i1:$b))]>;
1392-
def b1ri :
1393-
NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
1394-
!strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
1395-
[(set i1:$dst, (OpNode i1:$a, imm:$b))]>;
1396-
def b16rr :
1397-
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
1398-
!strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
1399-
[(set i16:$dst, (OpNode i16:$a, i16:$b))]>;
1400-
def b16ri :
1401-
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
1402-
!strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
1403-
[(set i16:$dst, (OpNode i16:$a, imm:$b))]>;
1404-
def b32rr :
1405-
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
1406-
!strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
1407-
[(set i32:$dst, (OpNode i32:$a, i32:$b))]>;
1408-
def b32ri :
1409-
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
1410-
!strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
1411-
[(set i32:$dst, (OpNode i32:$a, imm:$b))]>;
1412-
def b64rr :
1413-
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
1414-
!strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
1415-
[(set i64:$dst, (OpNode i64:$a, i64:$b))]>;
1416-
def b64ri :
1417-
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
1418-
!strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
1419-
[(set i64:$dst, (OpNode i64:$a, imm:$b))]>;
1389+
defm b1 : I3Inst<OpcStr # ".pred", OpNode, I1RT, commutative = true>;
1390+
defm b16 : I3Inst<OpcStr # ".b16", OpNode, I16RT, commutative = true>;
1391+
defm b32 : I3Inst<OpcStr # ".b32", OpNode, I32RT, commutative = true>;
1392+
defm b64 : I3Inst<OpcStr # ".b64", OpNode, I64RT, commutative = true>;
14201393
}
14211394

14221395
defm OR : BITWISE<"or", or>;

llvm/lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1685,7 +1685,7 @@ foreach t = [I32RT, I64RT] in {
16851685
foreach sign = ["s", "u"] in {
16861686
foreach mode = ["wrap", "clamp"] in {
16871687
defvar ext = !if(!eq(sign, "s"), "sext", "zext");
1688-
defvar intrin = !cast<Intrinsic>("int_nvvm_" # ext # "_inreg_" # mode);
1688+
defvar intrin = !cast<Intrinsic>("int_nvvm_" # ext # "_" # mode);
16891689
defm SZEXT_ # sign # _ # mode
16901690
: I3Inst<"szext." # mode # "." # sign # "32",
16911691
intrin, I32RT, commutative = false,

llvm/test/CodeGen/NVPTX/i128.ll

Lines changed: 46 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -13,37 +13,37 @@ define i128 @srem_i128(i128 %lhs, i128 %rhs) {
1313
; CHECK-NEXT: ld.param.v2.u64 {%rd45, %rd46}, [srem_i128_param_0];
1414
; CHECK-NEXT: ld.param.v2.u64 {%rd49, %rd50}, [srem_i128_param_1];
1515
; CHECK-NEXT: shr.s64 %rd2, %rd46, 63;
16-
; CHECK-NEXT: mov.b64 %rd117, 0;
17-
; CHECK-NEXT: sub.cc.s64 %rd52, %rd117, %rd45;
18-
; CHECK-NEXT: subc.cc.s64 %rd53, %rd117, %rd46;
16+
; CHECK-NEXT: sub.cc.s64 %rd51, 0, %rd45;
17+
; CHECK-NEXT: subc.cc.s64 %rd52, 0, %rd46;
1918
; CHECK-NEXT: setp.lt.s64 %p1, %rd46, 0;
20-
; CHECK-NEXT: selp.b64 %rd4, %rd53, %rd46, %p1;
21-
; CHECK-NEXT: selp.b64 %rd3, %rd52, %rd45, %p1;
22-
; CHECK-NEXT: sub.cc.s64 %rd54, %rd117, %rd49;
23-
; CHECK-NEXT: subc.cc.s64 %rd55, %rd117, %rd50;
19+
; CHECK-NEXT: selp.b64 %rd4, %rd52, %rd46, %p1;
20+
; CHECK-NEXT: selp.b64 %rd3, %rd51, %rd45, %p1;
21+
; CHECK-NEXT: sub.cc.s64 %rd53, 0, %rd49;
22+
; CHECK-NEXT: subc.cc.s64 %rd54, 0, %rd50;
2423
; CHECK-NEXT: setp.lt.s64 %p2, %rd50, 0;
25-
; CHECK-NEXT: selp.b64 %rd6, %rd55, %rd50, %p2;
26-
; CHECK-NEXT: selp.b64 %rd5, %rd54, %rd49, %p2;
27-
; CHECK-NEXT: or.b64 %rd56, %rd5, %rd6;
28-
; CHECK-NEXT: setp.eq.s64 %p3, %rd56, 0;
29-
; CHECK-NEXT: or.b64 %rd57, %rd3, %rd4;
30-
; CHECK-NEXT: setp.eq.s64 %p4, %rd57, 0;
24+
; CHECK-NEXT: selp.b64 %rd6, %rd54, %rd50, %p2;
25+
; CHECK-NEXT: selp.b64 %rd5, %rd53, %rd49, %p2;
26+
; CHECK-NEXT: or.b64 %rd55, %rd5, %rd6;
27+
; CHECK-NEXT: setp.eq.s64 %p3, %rd55, 0;
28+
; CHECK-NEXT: or.b64 %rd56, %rd3, %rd4;
29+
; CHECK-NEXT: setp.eq.s64 %p4, %rd56, 0;
3130
; CHECK-NEXT: or.pred %p5, %p3, %p4;
3231
; CHECK-NEXT: setp.ne.s64 %p6, %rd6, 0;
3332
; CHECK-NEXT: clz.b64 %r1, %rd6;
34-
; CHECK-NEXT: cvt.u64.u32 %rd58, %r1;
33+
; CHECK-NEXT: cvt.u64.u32 %rd57, %r1;
3534
; CHECK-NEXT: clz.b64 %r2, %rd5;
36-
; CHECK-NEXT: cvt.u64.u32 %rd59, %r2;
37-
; CHECK-NEXT: add.s64 %rd60, %rd59, 64;
38-
; CHECK-NEXT: selp.b64 %rd61, %rd58, %rd60, %p6;
35+
; CHECK-NEXT: cvt.u64.u32 %rd58, %r2;
36+
; CHECK-NEXT: add.s64 %rd59, %rd58, 64;
37+
; CHECK-NEXT: selp.b64 %rd60, %rd57, %rd59, %p6;
3938
; CHECK-NEXT: setp.ne.s64 %p7, %rd4, 0;
4039
; CHECK-NEXT: clz.b64 %r3, %rd4;
41-
; CHECK-NEXT: cvt.u64.u32 %rd62, %r3;
40+
; CHECK-NEXT: cvt.u64.u32 %rd61, %r3;
4241
; CHECK-NEXT: clz.b64 %r4, %rd3;
43-
; CHECK-NEXT: cvt.u64.u32 %rd63, %r4;
44-
; CHECK-NEXT: add.s64 %rd64, %rd63, 64;
45-
; CHECK-NEXT: selp.b64 %rd65, %rd62, %rd64, %p7;
46-
; CHECK-NEXT: sub.cc.s64 %rd66, %rd61, %rd65;
42+
; CHECK-NEXT: cvt.u64.u32 %rd62, %r4;
43+
; CHECK-NEXT: add.s64 %rd63, %rd62, 64;
44+
; CHECK-NEXT: selp.b64 %rd64, %rd61, %rd63, %p7;
45+
; CHECK-NEXT: mov.b64 %rd117, 0;
46+
; CHECK-NEXT: sub.cc.s64 %rd66, %rd60, %rd64;
4747
; CHECK-NEXT: subc.cc.s64 %rd67, %rd117, 0;
4848
; CHECK-NEXT: setp.gt.u64 %p8, %rd66, 127;
4949
; CHECK-NEXT: setp.eq.s64 %p9, %rd67, 0;
@@ -314,39 +314,39 @@ define i128 @sdiv_i128(i128 %lhs, i128 %rhs) {
314314
; CHECK-NEXT: // %bb.0: // %_udiv-special-cases
315315
; CHECK-NEXT: ld.param.v2.u64 {%rd45, %rd46}, [sdiv_i128_param_0];
316316
; CHECK-NEXT: ld.param.v2.u64 {%rd49, %rd50}, [sdiv_i128_param_1];
317-
; CHECK-NEXT: mov.b64 %rd112, 0;
318-
; CHECK-NEXT: sub.cc.s64 %rd52, %rd112, %rd45;
319-
; CHECK-NEXT: subc.cc.s64 %rd53, %rd112, %rd46;
317+
; CHECK-NEXT: sub.cc.s64 %rd51, 0, %rd45;
318+
; CHECK-NEXT: subc.cc.s64 %rd52, 0, %rd46;
320319
; CHECK-NEXT: setp.lt.s64 %p1, %rd46, 0;
321-
; CHECK-NEXT: selp.b64 %rd2, %rd53, %rd46, %p1;
322-
; CHECK-NEXT: selp.b64 %rd1, %rd52, %rd45, %p1;
323-
; CHECK-NEXT: sub.cc.s64 %rd54, %rd112, %rd49;
324-
; CHECK-NEXT: subc.cc.s64 %rd55, %rd112, %rd50;
320+
; CHECK-NEXT: selp.b64 %rd2, %rd52, %rd46, %p1;
321+
; CHECK-NEXT: selp.b64 %rd1, %rd51, %rd45, %p1;
322+
; CHECK-NEXT: sub.cc.s64 %rd53, 0, %rd49;
323+
; CHECK-NEXT: subc.cc.s64 %rd54, 0, %rd50;
325324
; CHECK-NEXT: setp.lt.s64 %p2, %rd50, 0;
326-
; CHECK-NEXT: selp.b64 %rd4, %rd55, %rd50, %p2;
327-
; CHECK-NEXT: selp.b64 %rd3, %rd54, %rd49, %p2;
328-
; CHECK-NEXT: xor.b64 %rd56, %rd50, %rd46;
329-
; CHECK-NEXT: shr.s64 %rd5, %rd56, 63;
330-
; CHECK-NEXT: or.b64 %rd57, %rd3, %rd4;
331-
; CHECK-NEXT: setp.eq.s64 %p3, %rd57, 0;
332-
; CHECK-NEXT: or.b64 %rd58, %rd1, %rd2;
333-
; CHECK-NEXT: setp.eq.s64 %p4, %rd58, 0;
325+
; CHECK-NEXT: selp.b64 %rd4, %rd54, %rd50, %p2;
326+
; CHECK-NEXT: selp.b64 %rd3, %rd53, %rd49, %p2;
327+
; CHECK-NEXT: xor.b64 %rd55, %rd50, %rd46;
328+
; CHECK-NEXT: shr.s64 %rd5, %rd55, 63;
329+
; CHECK-NEXT: or.b64 %rd56, %rd3, %rd4;
330+
; CHECK-NEXT: setp.eq.s64 %p3, %rd56, 0;
331+
; CHECK-NEXT: or.b64 %rd57, %rd1, %rd2;
332+
; CHECK-NEXT: setp.eq.s64 %p4, %rd57, 0;
334333
; CHECK-NEXT: or.pred %p5, %p3, %p4;
335334
; CHECK-NEXT: setp.ne.s64 %p6, %rd4, 0;
336335
; CHECK-NEXT: clz.b64 %r1, %rd4;
337-
; CHECK-NEXT: cvt.u64.u32 %rd59, %r1;
336+
; CHECK-NEXT: cvt.u64.u32 %rd58, %r1;
338337
; CHECK-NEXT: clz.b64 %r2, %rd3;
339-
; CHECK-NEXT: cvt.u64.u32 %rd60, %r2;
340-
; CHECK-NEXT: add.s64 %rd61, %rd60, 64;
341-
; CHECK-NEXT: selp.b64 %rd62, %rd59, %rd61, %p6;
338+
; CHECK-NEXT: cvt.u64.u32 %rd59, %r2;
339+
; CHECK-NEXT: add.s64 %rd60, %rd59, 64;
340+
; CHECK-NEXT: selp.b64 %rd61, %rd58, %rd60, %p6;
342341
; CHECK-NEXT: setp.ne.s64 %p7, %rd2, 0;
343342
; CHECK-NEXT: clz.b64 %r3, %rd2;
344-
; CHECK-NEXT: cvt.u64.u32 %rd63, %r3;
343+
; CHECK-NEXT: cvt.u64.u32 %rd62, %r3;
345344
; CHECK-NEXT: clz.b64 %r4, %rd1;
346-
; CHECK-NEXT: cvt.u64.u32 %rd64, %r4;
347-
; CHECK-NEXT: add.s64 %rd65, %rd64, 64;
348-
; CHECK-NEXT: selp.b64 %rd66, %rd63, %rd65, %p7;
349-
; CHECK-NEXT: sub.cc.s64 %rd67, %rd62, %rd66;
345+
; CHECK-NEXT: cvt.u64.u32 %rd63, %r4;
346+
; CHECK-NEXT: add.s64 %rd64, %rd63, 64;
347+
; CHECK-NEXT: selp.b64 %rd65, %rd62, %rd64, %p7;
348+
; CHECK-NEXT: mov.b64 %rd112, 0;
349+
; CHECK-NEXT: sub.cc.s64 %rd67, %rd61, %rd65;
350350
; CHECK-NEXT: subc.cc.s64 %rd68, %rd112, 0;
351351
; CHECK-NEXT: setp.gt.u64 %p8, %rd67, 127;
352352
; CHECK-NEXT: setp.eq.s64 %p9, %rd68, 0;

0 commit comments

Comments
 (0)