@@ -310,29 +310,43 @@ define <4 x bfloat> @sitofp_i32(<4 x i32> %a) #0 {
310310define <4 x bfloat> @sitofp_i64 (<4 x i64 > %a ) #0 {
311311; CHECK-CVT-LABEL: sitofp_i64:
312312; CHECK-CVT: // %bb.0:
313- ; CHECK-CVT-NEXT: scvtf v0.2d, v0.2d
314- ; CHECK-CVT-NEXT: scvtf v1.2d, v1.2d
315- ; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
316- ; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
317- ; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
318- ; CHECK-CVT-NEXT: movi v1.4s, #1
319- ; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
320- ; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
321- ; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
322- ; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s
323- ; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16
324- ; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s
325- ; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b
313+ ; CHECK-CVT-NEXT: mov x8, v0.d[1]
314+ ; CHECK-CVT-NEXT: fmov x9, d0
315+ ; CHECK-CVT-NEXT: scvtf s2, x9
316+ ; CHECK-CVT-NEXT: mov x9, v1.d[1]
317+ ; CHECK-CVT-NEXT: scvtf s0, x8
318+ ; CHECK-CVT-NEXT: fmov x8, d1
319+ ; CHECK-CVT-NEXT: scvtf s1, x8
320+ ; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
321+ ; CHECK-CVT-NEXT: scvtf s0, x9
322+ ; CHECK-CVT-NEXT: mov v2.s[2], v1.s[0]
323+ ; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
324+ ; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
325+ ; CHECK-CVT-NEXT: movi v0.4s, #1
326+ ; CHECK-CVT-NEXT: ushr v3.4s, v2.4s, #16
327+ ; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s
328+ ; CHECK-CVT-NEXT: and v0.16b, v3.16b, v0.16b
329+ ; CHECK-CVT-NEXT: fcmeq v3.4s, v2.4s, v2.4s
330+ ; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16
331+ ; CHECK-CVT-NEXT: add v0.4s, v0.4s, v1.4s
332+ ; CHECK-CVT-NEXT: bif v0.16b, v2.16b, v3.16b
326333; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16
327334; CHECK-CVT-NEXT: ret
328335;
329336; CHECK-BF16-LABEL: sitofp_i64:
330337; CHECK-BF16: // %bb.0:
331- ; CHECK-BF16-NEXT: scvtf v0.2d, v0.2d
332- ; CHECK-BF16-NEXT: scvtf v1.2d, v1.2d
333- ; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
334- ; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
335- ; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
338+ ; CHECK-BF16-NEXT: mov x8, v0.d[1]
339+ ; CHECK-BF16-NEXT: fmov x9, d0
340+ ; CHECK-BF16-NEXT: scvtf s2, x9
341+ ; CHECK-BF16-NEXT: mov x9, v1.d[1]
342+ ; CHECK-BF16-NEXT: scvtf s0, x8
343+ ; CHECK-BF16-NEXT: fmov x8, d1
344+ ; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
345+ ; CHECK-BF16-NEXT: scvtf s0, x8
346+ ; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
347+ ; CHECK-BF16-NEXT: scvtf s0, x9
348+ ; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
349+ ; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s
336350; CHECK-BF16-NEXT: ret
337351 %1 = sitofp <4 x i64 > %a to <4 x bfloat>
338352 ret <4 x bfloat> %1
@@ -413,12 +427,39 @@ define <4 x bfloat> @uitofp_i32(<4 x i32> %a) #0 {
413427define <4 x bfloat> @uitofp_i64 (<4 x i64 > %a ) #0 {
414428; CHECK-CVT-LABEL: uitofp_i64:
415429; CHECK-CVT: // %bb.0:
416- ; CHECK-CVT-NEXT: ucvtf v0.2d, v0.2d
417- ; CHECK-CVT-NEXT: ucvtf v1.2d, v1.2d
418- ; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
419- ; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
420- ; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
430+ ; CHECK-CVT-NEXT: movi v2.2d, #0x000000ffffffff
431+ ; CHECK-CVT-NEXT: ushr v3.2d, v0.2d, #32
432+ ; CHECK-CVT-NEXT: ushr v4.2d, v1.2d, #32
433+ ; CHECK-CVT-NEXT: mov x8, v3.d[1]
434+ ; CHECK-CVT-NEXT: fmov x10, d3
435+ ; CHECK-CVT-NEXT: and v0.16b, v0.16b, v2.16b
436+ ; CHECK-CVT-NEXT: and v1.16b, v1.16b, v2.16b
437+ ; CHECK-CVT-NEXT: scvtf s3, x10
438+ ; CHECK-CVT-NEXT: scvtf s5, x8
439+ ; CHECK-CVT-NEXT: fmov x8, d0
440+ ; CHECK-CVT-NEXT: mov x9, v0.d[1]
441+ ; CHECK-CVT-NEXT: scvtf s2, x8
442+ ; CHECK-CVT-NEXT: fmov x8, d4
443+ ; CHECK-CVT-NEXT: scvtf s0, x9
444+ ; CHECK-CVT-NEXT: mov x9, v4.d[1]
445+ ; CHECK-CVT-NEXT: mov v3.s[1], v5.s[0]
446+ ; CHECK-CVT-NEXT: scvtf s4, x8
447+ ; CHECK-CVT-NEXT: fmov x8, d1
448+ ; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
449+ ; CHECK-CVT-NEXT: scvtf s0, x8
450+ ; CHECK-CVT-NEXT: mov x8, v1.d[1]
451+ ; CHECK-CVT-NEXT: scvtf s1, x9
452+ ; CHECK-CVT-NEXT: mov v3.s[2], v4.s[0]
453+ ; CHECK-CVT-NEXT: mov v2.s[2], v0.s[0]
454+ ; CHECK-CVT-NEXT: scvtf s0, x8
455+ ; CHECK-CVT-NEXT: mov w8, #1333788672 // =0x4f800000
456+ ; CHECK-CVT-NEXT: mov v3.s[3], v1.s[0]
457+ ; CHECK-CVT-NEXT: dup v1.4s, w8
458+ ; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
459+ ; CHECK-CVT-NEXT: fmul v0.4s, v3.4s, v1.4s
421460; CHECK-CVT-NEXT: movi v1.4s, #1
461+ ; CHECK-CVT-NEXT: fadd v0.4s, v0.4s, v2.4s
462+ ; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
422463; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
423464; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
424465; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
@@ -431,10 +472,37 @@ define <4 x bfloat> @uitofp_i64(<4 x i64> %a) #0 {
431472;
432473; CHECK-BF16-LABEL: uitofp_i64:
433474; CHECK-BF16: // %bb.0:
434- ; CHECK-BF16-NEXT: ucvtf v0.2d, v0.2d
435- ; CHECK-BF16-NEXT: ucvtf v1.2d, v1.2d
436- ; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
437- ; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
475+ ; CHECK-BF16-NEXT: movi v2.2d, #0x000000ffffffff
476+ ; CHECK-BF16-NEXT: ushr v3.2d, v0.2d, #32
477+ ; CHECK-BF16-NEXT: ushr v4.2d, v1.2d, #32
478+ ; CHECK-BF16-NEXT: mov x8, v3.d[1]
479+ ; CHECK-BF16-NEXT: fmov x10, d3
480+ ; CHECK-BF16-NEXT: and v0.16b, v0.16b, v2.16b
481+ ; CHECK-BF16-NEXT: and v1.16b, v1.16b, v2.16b
482+ ; CHECK-BF16-NEXT: scvtf s3, x10
483+ ; CHECK-BF16-NEXT: scvtf s5, x8
484+ ; CHECK-BF16-NEXT: fmov x8, d0
485+ ; CHECK-BF16-NEXT: mov x9, v0.d[1]
486+ ; CHECK-BF16-NEXT: scvtf s2, x8
487+ ; CHECK-BF16-NEXT: fmov x8, d4
488+ ; CHECK-BF16-NEXT: scvtf s0, x9
489+ ; CHECK-BF16-NEXT: mov x9, v4.d[1]
490+ ; CHECK-BF16-NEXT: mov v3.s[1], v5.s[0]
491+ ; CHECK-BF16-NEXT: scvtf s4, x8
492+ ; CHECK-BF16-NEXT: fmov x8, d1
493+ ; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
494+ ; CHECK-BF16-NEXT: scvtf s0, x8
495+ ; CHECK-BF16-NEXT: mov x8, v1.d[1]
496+ ; CHECK-BF16-NEXT: scvtf s1, x9
497+ ; CHECK-BF16-NEXT: mov v3.s[2], v4.s[0]
498+ ; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
499+ ; CHECK-BF16-NEXT: scvtf s0, x8
500+ ; CHECK-BF16-NEXT: mov w8, #1333788672 // =0x4f800000
501+ ; CHECK-BF16-NEXT: mov v3.s[3], v1.s[0]
502+ ; CHECK-BF16-NEXT: dup v1.4s, w8
503+ ; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
504+ ; CHECK-BF16-NEXT: fmul v0.4s, v3.4s, v1.4s
505+ ; CHECK-BF16-NEXT: fadd v0.4s, v0.4s, v2.4s
438506; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
439507; CHECK-BF16-NEXT: ret
440508 %1 = uitofp <4 x i64 > %a to <4 x bfloat>
0 commit comments