Skip to content

Commit c261a02

Browse files
author
Hugh Delaney
committed
Update tests after rebase
1 parent 771a428 commit c261a02

File tree

3 files changed

+131
-169
lines changed

3 files changed

+131
-169
lines changed

llvm/test/CodeGen/NVPTX/fma-relu-contract.ll

Lines changed: 59 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,18 @@ define half @fma_f16_expanded_no_nans(half %a, half %b, half %c) #0 {
3636
;
3737
; CHECK-SM70-LABEL: fma_f16_expanded_no_nans(
3838
; CHECK-SM70: {
39-
; CHECK-SM70-NEXT: .reg .b16 %rs<6>;
40-
; CHECK-SM70-NEXT: .reg .f32 %f<3>;
39+
; CHECK-SM70-NEXT: .reg .pred %p<2>;
40+
; CHECK-SM70-NEXT: .reg .b16 %rs<7>;
4141
; CHECK-SM70-EMPTY:
4242
; CHECK-SM70-NEXT: // %bb.0:
4343
; CHECK-SM70-NEXT: ld.param.b16 %rs1, [fma_f16_expanded_no_nans_param_0];
4444
; CHECK-SM70-NEXT: ld.param.b16 %rs2, [fma_f16_expanded_no_nans_param_1];
4545
; CHECK-SM70-NEXT: ld.param.b16 %rs3, [fma_f16_expanded_no_nans_param_2];
4646
; CHECK-SM70-NEXT: fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
47-
; CHECK-SM70-NEXT: cvt.f32.f16 %f1, %rs4;
48-
; CHECK-SM70-NEXT: max.f32 %f2, %f1, 0f00000000;
49-
; CHECK-SM70-NEXT: cvt.rn.f16.f32 %rs5, %f2;
50-
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs5;
47+
; CHECK-SM70-NEXT: mov.b16 %rs5, 0x0000;
48+
; CHECK-SM70-NEXT: setp.gt.f16 %p1, %rs4, %rs5;
49+
; CHECK-SM70-NEXT: selp.b16 %rs6, %rs4, 0x0000, %p1;
50+
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs6;
5151
; CHECK-SM70-NEXT: ret;
5252
%1 = fmul half %a, %b
5353
%2 = fadd half %1, %c
@@ -94,21 +94,21 @@ define half @fma_f16_expanded_no_nans_multiple_uses_of_fma(half %a, half %b, hal
9494
;
9595
; CHECK-SM70-LABEL: fma_f16_expanded_no_nans_multiple_uses_of_fma(
9696
; CHECK-SM70: {
97-
; CHECK-SM70-NEXT: .reg .b16 %rs<9>;
98-
; CHECK-SM70-NEXT: .reg .f32 %f<3>;
97+
; CHECK-SM70-NEXT: .reg .pred %p<2>;
98+
; CHECK-SM70-NEXT: .reg .b16 %rs<10>;
9999
; CHECK-SM70-EMPTY:
100100
; CHECK-SM70-NEXT: // %bb.0:
101101
; CHECK-SM70-NEXT: ld.param.b16 %rs1, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_0];
102102
; CHECK-SM70-NEXT: ld.param.b16 %rs2, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_1];
103103
; CHECK-SM70-NEXT: ld.param.b16 %rs3, [fma_f16_expanded_no_nans_multiple_uses_of_fma_param_2];
104104
; CHECK-SM70-NEXT: fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
105-
; CHECK-SM70-NEXT: cvt.f32.f16 %f1, %rs4;
106-
; CHECK-SM70-NEXT: max.f32 %f2, %f1, 0f00000000;
107-
; CHECK-SM70-NEXT: cvt.rn.f16.f32 %rs5, %f2;
108-
; CHECK-SM70-NEXT: mov.b16 %rs6, 0x4700;
109-
; CHECK-SM70-NEXT: add.f16 %rs7, %rs4, %rs6;
110-
; CHECK-SM70-NEXT: add.f16 %rs8, %rs5, %rs7;
111-
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs8;
105+
; CHECK-SM70-NEXT: mov.b16 %rs5, 0x0000;
106+
; CHECK-SM70-NEXT: setp.gt.f16 %p1, %rs4, %rs5;
107+
; CHECK-SM70-NEXT: selp.b16 %rs6, %rs4, 0x0000, %p1;
108+
; CHECK-SM70-NEXT: mov.b16 %rs7, 0x4700;
109+
; CHECK-SM70-NEXT: add.f16 %rs8, %rs4, %rs7;
110+
; CHECK-SM70-NEXT: add.f16 %rs9, %rs6, %rs8;
111+
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs9;
112112
; CHECK-SM70-NEXT: ret;
113113
%1 = fmul half %a, %b
114114
%2 = fadd half %1, %c
@@ -150,18 +150,18 @@ define half @fma_f16_expanded_unsafe_with_nans(half %a, half %b, half %c) #1 {
150150
;
151151
; CHECK-SM70-LABEL: fma_f16_expanded_unsafe_with_nans(
152152
; CHECK-SM70: {
153-
; CHECK-SM70-NEXT: .reg .b16 %rs<6>;
154-
; CHECK-SM70-NEXT: .reg .f32 %f<3>;
153+
; CHECK-SM70-NEXT: .reg .pred %p<2>;
154+
; CHECK-SM70-NEXT: .reg .b16 %rs<7>;
155155
; CHECK-SM70-EMPTY:
156156
; CHECK-SM70-NEXT: // %bb.0:
157157
; CHECK-SM70-NEXT: ld.param.b16 %rs1, [fma_f16_expanded_unsafe_with_nans_param_0];
158158
; CHECK-SM70-NEXT: ld.param.b16 %rs2, [fma_f16_expanded_unsafe_with_nans_param_1];
159159
; CHECK-SM70-NEXT: ld.param.b16 %rs3, [fma_f16_expanded_unsafe_with_nans_param_2];
160160
; CHECK-SM70-NEXT: fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
161-
; CHECK-SM70-NEXT: cvt.f32.f16 %f1, %rs4;
162-
; CHECK-SM70-NEXT: max.f32 %f2, %f1, 0f00000000;
163-
; CHECK-SM70-NEXT: cvt.rn.f16.f32 %rs5, %f2;
164-
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs5;
161+
; CHECK-SM70-NEXT: mov.b16 %rs5, 0x0000;
162+
; CHECK-SM70-NEXT: setp.gt.f16 %p1, %rs4, %rs5;
163+
; CHECK-SM70-NEXT: selp.b16 %rs6, %rs4, 0x0000, %p1;
164+
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs6;
165165
; CHECK-SM70-NEXT: ret;
166166
%1 = fmul half %a, %b
167167
%2 = fadd half %1, %c
@@ -248,9 +248,9 @@ define bfloat @fma_bf16_expanded_unsafe_with_nans(bfloat %a, bfloat %b, bfloat %
248248
; CHECK-SM70-LABEL: fma_bf16_expanded_unsafe_with_nans(
249249
; CHECK-SM70: {
250250
; CHECK-SM70-NEXT: .reg .pred %p<3>;
251-
; CHECK-SM70-NEXT: .reg .b16 %rs<3>;
252-
; CHECK-SM70-NEXT: .reg .b32 %r<20>;
253-
; CHECK-SM70-NEXT: .reg .f32 %f<7>;
251+
; CHECK-SM70-NEXT: .reg .b16 %rs<4>;
252+
; CHECK-SM70-NEXT: .reg .b32 %r<14>;
253+
; CHECK-SM70-NEXT: .reg .f32 %f<6>;
254254
; CHECK-SM70-EMPTY:
255255
; CHECK-SM70-NEXT: // %bb.0:
256256
; CHECK-SM70-NEXT: ld.param.u16 %r1, [fma_bf16_expanded_unsafe_with_nans_param_2];
@@ -270,18 +270,12 @@ define bfloat @fma_bf16_expanded_unsafe_with_nans(bfloat %a, bfloat %b, bfloat %
270270
; CHECK-SM70-NEXT: setp.nan.f32 %p1, %f4, %f4;
271271
; CHECK-SM70-NEXT: or.b32 %r11, %r7, 4194304;
272272
; CHECK-SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
273+
; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r12; }
273274
; CHECK-SM70-NEXT: and.b32 %r13, %r12, -65536;
274275
; CHECK-SM70-NEXT: mov.b32 %f5, %r13;
275-
; CHECK-SM70-NEXT: max.f32 %f6, %f5, 0f00000000;
276-
; CHECK-SM70-NEXT: mov.b32 %r14, %f6;
277-
; CHECK-SM70-NEXT: bfe.u32 %r15, %r14, 16, 1;
278-
; CHECK-SM70-NEXT: add.s32 %r16, %r15, %r14;
279-
; CHECK-SM70-NEXT: add.s32 %r17, %r16, 32767;
280-
; CHECK-SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
281-
; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304;
282-
; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2;
283-
; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; }
284-
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1;
276+
; CHECK-SM70-NEXT: setp.gt.f32 %p2, %f5, 0f00000000;
277+
; CHECK-SM70-NEXT: selp.b16 %rs3, %rs1, 0x0000, %p2;
278+
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs3;
285279
; CHECK-SM70-NEXT: ret;
286280
%1 = fmul bfloat %a, %b
287281
%2 = fadd bfloat %1, %c
@@ -318,9 +312,9 @@ define bfloat @fma_bf16_expanded_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
318312
; CHECK-SM70-LABEL: fma_bf16_expanded_no_nans(
319313
; CHECK-SM70: {
320314
; CHECK-SM70-NEXT: .reg .pred %p<3>;
321-
; CHECK-SM70-NEXT: .reg .b16 %rs<3>;
322-
; CHECK-SM70-NEXT: .reg .b32 %r<20>;
323-
; CHECK-SM70-NEXT: .reg .f32 %f<7>;
315+
; CHECK-SM70-NEXT: .reg .b16 %rs<4>;
316+
; CHECK-SM70-NEXT: .reg .b32 %r<14>;
317+
; CHECK-SM70-NEXT: .reg .f32 %f<6>;
324318
; CHECK-SM70-EMPTY:
325319
; CHECK-SM70-NEXT: // %bb.0:
326320
; CHECK-SM70-NEXT: ld.param.u16 %r1, [fma_bf16_expanded_no_nans_param_2];
@@ -340,18 +334,12 @@ define bfloat @fma_bf16_expanded_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
340334
; CHECK-SM70-NEXT: setp.nan.f32 %p1, %f4, %f4;
341335
; CHECK-SM70-NEXT: or.b32 %r11, %r7, 4194304;
342336
; CHECK-SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
337+
; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r12; }
343338
; CHECK-SM70-NEXT: and.b32 %r13, %r12, -65536;
344339
; CHECK-SM70-NEXT: mov.b32 %f5, %r13;
345-
; CHECK-SM70-NEXT: max.f32 %f6, %f5, 0f00000000;
346-
; CHECK-SM70-NEXT: mov.b32 %r14, %f6;
347-
; CHECK-SM70-NEXT: bfe.u32 %r15, %r14, 16, 1;
348-
; CHECK-SM70-NEXT: add.s32 %r16, %r15, %r14;
349-
; CHECK-SM70-NEXT: add.s32 %r17, %r16, 32767;
350-
; CHECK-SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
351-
; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304;
352-
; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2;
353-
; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; }
354-
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1;
340+
; CHECK-SM70-NEXT: setp.gt.f32 %p2, %f5, 0f00000000;
341+
; CHECK-SM70-NEXT: selp.b16 %rs3, %rs1, 0x0000, %p2;
342+
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs3;
355343
; CHECK-SM70-NEXT: ret;
356344
%1 = fmul bfloat %a, %b
357345
%2 = fadd bfloat %1, %c
@@ -423,9 +411,9 @@ define bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat
423411
; CHECK-SM70-LABEL: fma_bf16_expanded_no_nans_multiple_uses_of_fma(
424412
; CHECK-SM70: {
425413
; CHECK-SM70-NEXT: .reg .pred %p<5>;
426-
; CHECK-SM70-NEXT: .reg .b16 %rs<3>;
427-
; CHECK-SM70-NEXT: .reg .b32 %r<34>;
428-
; CHECK-SM70-NEXT: .reg .f32 %f<11>;
414+
; CHECK-SM70-NEXT: .reg .b16 %rs<7>;
415+
; CHECK-SM70-NEXT: .reg .b32 %r<29>;
416+
; CHECK-SM70-NEXT: .reg .f32 %f<10>;
429417
; CHECK-SM70-EMPTY:
430418
; CHECK-SM70-NEXT: // %bb.0:
431419
; CHECK-SM70-NEXT: ld.param.u16 %r1, [fma_bf16_expanded_no_nans_multiple_uses_of_fma_param_2];
@@ -445,38 +433,34 @@ define bfloat @fma_bf16_expanded_no_nans_multiple_uses_of_fma(bfloat %a, bfloat
445433
; CHECK-SM70-NEXT: setp.nan.f32 %p1, %f4, %f4;
446434
; CHECK-SM70-NEXT: or.b32 %r11, %r7, 4194304;
447435
; CHECK-SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
436+
; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r12; }
448437
; CHECK-SM70-NEXT: and.b32 %r13, %r12, -65536;
449438
; CHECK-SM70-NEXT: mov.b32 %f5, %r13;
450-
; CHECK-SM70-NEXT: max.f32 %f6, %f5, 0f00000000;
439+
; CHECK-SM70-NEXT: setp.gt.f32 %p2, %f5, 0f00000000;
440+
; CHECK-SM70-NEXT: selp.b16 %rs3, %rs1, 0x0000, %p2;
441+
; CHECK-SM70-NEXT: add.f32 %f6, %f5, 0f40E00000;
451442
; CHECK-SM70-NEXT: mov.b32 %r14, %f6;
452443
; CHECK-SM70-NEXT: bfe.u32 %r15, %r14, 16, 1;
453444
; CHECK-SM70-NEXT: add.s32 %r16, %r15, %r14;
454445
; CHECK-SM70-NEXT: add.s32 %r17, %r16, 32767;
455-
; CHECK-SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
446+
; CHECK-SM70-NEXT: setp.nan.f32 %p3, %f6, %f6;
456447
; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304;
457-
; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2;
458-
; CHECK-SM70-NEXT: add.f32 %f7, %f5, 0f40E00000;
459-
; CHECK-SM70-NEXT: mov.b32 %r20, %f7;
460-
; CHECK-SM70-NEXT: bfe.u32 %r21, %r20, 16, 1;
461-
; CHECK-SM70-NEXT: add.s32 %r22, %r21, %r20;
462-
; CHECK-SM70-NEXT: add.s32 %r23, %r22, 32767;
463-
; CHECK-SM70-NEXT: setp.nan.f32 %p3, %f7, %f7;
464-
; CHECK-SM70-NEXT: or.b32 %r24, %r20, 4194304;
465-
; CHECK-SM70-NEXT: selp.b32 %r25, %r24, %r23, %p3;
466-
; CHECK-SM70-NEXT: and.b32 %r26, %r25, -65536;
467-
; CHECK-SM70-NEXT: mov.b32 %f8, %r26;
468-
; CHECK-SM70-NEXT: and.b32 %r27, %r19, -65536;
469-
; CHECK-SM70-NEXT: mov.b32 %f9, %r27;
470-
; CHECK-SM70-NEXT: add.f32 %f10, %f9, %f8;
471-
; CHECK-SM70-NEXT: mov.b32 %r28, %f10;
472-
; CHECK-SM70-NEXT: bfe.u32 %r29, %r28, 16, 1;
473-
; CHECK-SM70-NEXT: add.s32 %r30, %r29, %r28;
474-
; CHECK-SM70-NEXT: add.s32 %r31, %r30, 32767;
475-
; CHECK-SM70-NEXT: setp.nan.f32 %p4, %f10, %f10;
476-
; CHECK-SM70-NEXT: or.b32 %r32, %r28, 4194304;
477-
; CHECK-SM70-NEXT: selp.b32 %r33, %r32, %r31, %p4;
478-
; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r33; }
479-
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1;
448+
; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p3;
449+
; CHECK-SM70-NEXT: cvt.u32.u16 %r20, %rs3;
450+
; CHECK-SM70-NEXT: shl.b32 %r21, %r20, 16;
451+
; CHECK-SM70-NEXT: mov.b32 %f7, %r21;
452+
; CHECK-SM70-NEXT: and.b32 %r22, %r19, -65536;
453+
; CHECK-SM70-NEXT: mov.b32 %f8, %r22;
454+
; CHECK-SM70-NEXT: add.f32 %f9, %f7, %f8;
455+
; CHECK-SM70-NEXT: mov.b32 %r23, %f9;
456+
; CHECK-SM70-NEXT: bfe.u32 %r24, %r23, 16, 1;
457+
; CHECK-SM70-NEXT: add.s32 %r25, %r24, %r23;
458+
; CHECK-SM70-NEXT: add.s32 %r26, %r25, 32767;
459+
; CHECK-SM70-NEXT: setp.nan.f32 %p4, %f9, %f9;
460+
; CHECK-SM70-NEXT: or.b32 %r27, %r23, 4194304;
461+
; CHECK-SM70-NEXT: selp.b32 %r28, %r27, %r26, %p4;
462+
; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs5}, %r28; }
463+
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs5;
480464
; CHECK-SM70-NEXT: ret;
481465
%1 = fmul bfloat %a, %b
482466
%2 = fadd bfloat %1, %c

llvm/test/CodeGen/NVPTX/fma-relu-fma-intrinsic.ll

Lines changed: 13 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -36,18 +36,18 @@ define half @fma_f16_no_nans(half %a, half %b, half %c) #0 {
3636
;
3737
; CHECK-SM70-LABEL: fma_f16_no_nans(
3838
; CHECK-SM70: {
39-
; CHECK-SM70-NEXT: .reg .b16 %rs<6>;
40-
; CHECK-SM70-NEXT: .reg .f32 %f<3>;
39+
; CHECK-SM70-NEXT: .reg .pred %p<2>;
40+
; CHECK-SM70-NEXT: .reg .b16 %rs<7>;
4141
; CHECK-SM70-EMPTY:
4242
; CHECK-SM70-NEXT: // %bb.0:
4343
; CHECK-SM70-NEXT: ld.param.b16 %rs1, [fma_f16_no_nans_param_0];
4444
; CHECK-SM70-NEXT: ld.param.b16 %rs2, [fma_f16_no_nans_param_1];
4545
; CHECK-SM70-NEXT: ld.param.b16 %rs3, [fma_f16_no_nans_param_2];
4646
; CHECK-SM70-NEXT: fma.rn.f16 %rs4, %rs1, %rs2, %rs3;
47-
; CHECK-SM70-NEXT: cvt.f32.f16 %f1, %rs4;
48-
; CHECK-SM70-NEXT: max.f32 %f2, %f1, 0f00000000;
49-
; CHECK-SM70-NEXT: cvt.rn.f16.f32 %rs5, %f2;
50-
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs5;
47+
; CHECK-SM70-NEXT: mov.b16 %rs5, 0x0000;
48+
; CHECK-SM70-NEXT: setp.gt.f16 %p1, %rs4, %rs5;
49+
; CHECK-SM70-NEXT: selp.b16 %rs6, %rs4, 0x0000, %p1;
50+
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs6;
5151
; CHECK-SM70-NEXT: ret;
5252
%1 = call half @llvm.fma.f16(half %a, half %b, half %c)
5353
%2 = fcmp ogt half %1, 0.0
@@ -182,9 +182,9 @@ define bfloat @fma_bf16_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
182182
; CHECK-SM70-LABEL: fma_bf16_no_nans(
183183
; CHECK-SM70: {
184184
; CHECK-SM70-NEXT: .reg .pred %p<3>;
185-
; CHECK-SM70-NEXT: .reg .b16 %rs<3>;
186-
; CHECK-SM70-NEXT: .reg .b32 %r<20>;
187-
; CHECK-SM70-NEXT: .reg .f32 %f<7>;
185+
; CHECK-SM70-NEXT: .reg .b16 %rs<4>;
186+
; CHECK-SM70-NEXT: .reg .b32 %r<14>;
187+
; CHECK-SM70-NEXT: .reg .f32 %f<6>;
188188
; CHECK-SM70-EMPTY:
189189
; CHECK-SM70-NEXT: // %bb.0:
190190
; CHECK-SM70-NEXT: ld.param.u16 %r1, [fma_bf16_no_nans_param_2];
@@ -204,18 +204,12 @@ define bfloat @fma_bf16_no_nans(bfloat %a, bfloat %b, bfloat %c) #0 {
204204
; CHECK-SM70-NEXT: setp.nan.f32 %p1, %f4, %f4;
205205
; CHECK-SM70-NEXT: or.b32 %r11, %r7, 4194304;
206206
; CHECK-SM70-NEXT: selp.b32 %r12, %r11, %r10, %p1;
207+
; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r12; }
207208
; CHECK-SM70-NEXT: and.b32 %r13, %r12, -65536;
208209
; CHECK-SM70-NEXT: mov.b32 %f5, %r13;
209-
; CHECK-SM70-NEXT: max.f32 %f6, %f5, 0f00000000;
210-
; CHECK-SM70-NEXT: mov.b32 %r14, %f6;
211-
; CHECK-SM70-NEXT: bfe.u32 %r15, %r14, 16, 1;
212-
; CHECK-SM70-NEXT: add.s32 %r16, %r15, %r14;
213-
; CHECK-SM70-NEXT: add.s32 %r17, %r16, 32767;
214-
; CHECK-SM70-NEXT: setp.nan.f32 %p2, %f6, %f6;
215-
; CHECK-SM70-NEXT: or.b32 %r18, %r14, 4194304;
216-
; CHECK-SM70-NEXT: selp.b32 %r19, %r18, %r17, %p2;
217-
; CHECK-SM70-NEXT: { .reg .b16 tmp; mov.b32 {tmp, %rs1}, %r19; }
218-
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs1;
210+
; CHECK-SM70-NEXT: setp.gt.f32 %p2, %f5, 0f00000000;
211+
; CHECK-SM70-NEXT: selp.b16 %rs3, %rs1, 0x0000, %p2;
212+
; CHECK-SM70-NEXT: st.param.b16 [func_retval0], %rs3;
219213
; CHECK-SM70-NEXT: ret;
220214
%1 = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
221215
%2 = fcmp ogt bfloat %1, 0.0

0 commit comments

Comments
 (0)