@@ -310,29 +310,43 @@ define <4 x bfloat> @sitofp_i32(<4 x i32> %a) #0 {
310310define <4 x bfloat> @sitofp_i64 (<4 x i64 > %a ) #0 {
311311; CHECK-CVT-LABEL: sitofp_i64:
312312; CHECK-CVT: // %bb.0:
313- ; CHECK-CVT-NEXT: scvtf v0.2d, v0.2d
314- ; CHECK-CVT-NEXT: scvtf v1.2d, v1.2d
315- ; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
316- ; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
317- ; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
318- ; CHECK-CVT-NEXT: movi v1.4s, #1
319- ; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
320- ; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
321- ; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
322- ; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s
323- ; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16
324- ; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s
325- ; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b
313+ ; CHECK-CVT-NEXT: mov x8, v0.d[1]
314+ ; CHECK-CVT-NEXT: fmov x9, d0
315+ ; CHECK-CVT-NEXT: scvtf s2, x9
316+ ; CHECK-CVT-NEXT: mov x9, v1.d[1]
317+ ; CHECK-CVT-NEXT: scvtf s0, x8
318+ ; CHECK-CVT-NEXT: fmov x8, d1
319+ ; CHECK-CVT-NEXT: scvtf s1, x8
320+ ; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
321+ ; CHECK-CVT-NEXT: scvtf s0, x9
322+ ; CHECK-CVT-NEXT: mov v2.s[2], v1.s[0]
323+ ; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
324+ ; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
325+ ; CHECK-CVT-NEXT: movi v0.4s, #1
326+ ; CHECK-CVT-NEXT: ushr v3.4s, v2.4s, #16
327+ ; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s
328+ ; CHECK-CVT-NEXT: and v0.16b, v3.16b, v0.16b
329+ ; CHECK-CVT-NEXT: fcmeq v3.4s, v2.4s, v2.4s
330+ ; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16
331+ ; CHECK-CVT-NEXT: add v0.4s, v0.4s, v1.4s
332+ ; CHECK-CVT-NEXT: bif v0.16b, v2.16b, v3.16b
326333; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16
327334; CHECK-CVT-NEXT: ret
328335;
329336; CHECK-BF16-LABEL: sitofp_i64:
330337; CHECK-BF16: // %bb.0:
331- ; CHECK-BF16-NEXT: scvtf v0.2d, v0.2d
332- ; CHECK-BF16-NEXT: scvtf v1.2d, v1.2d
333- ; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
334- ; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
335- ; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
338+ ; CHECK-BF16-NEXT: mov x8, v0.d[1]
339+ ; CHECK-BF16-NEXT: fmov x9, d0
340+ ; CHECK-BF16-NEXT: scvtf s2, x9
341+ ; CHECK-BF16-NEXT: mov x9, v1.d[1]
342+ ; CHECK-BF16-NEXT: scvtf s0, x8
343+ ; CHECK-BF16-NEXT: fmov x8, d1
344+ ; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
345+ ; CHECK-BF16-NEXT: scvtf s0, x8
346+ ; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
347+ ; CHECK-BF16-NEXT: scvtf s0, x9
348+ ; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
349+ ; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s
336350; CHECK-BF16-NEXT: ret
337351 %1 = sitofp <4 x i64 > %a to <4 x bfloat>
338352 ret <4 x bfloat> %1
@@ -413,29 +427,43 @@ define <4 x bfloat> @uitofp_i32(<4 x i32> %a) #0 {
413427define <4 x bfloat> @uitofp_i64 (<4 x i64 > %a ) #0 {
414428; CHECK-CVT-LABEL: uitofp_i64:
415429; CHECK-CVT: // %bb.0:
416- ; CHECK-CVT-NEXT: ucvtf v0.2d, v0.2d
417- ; CHECK-CVT-NEXT: ucvtf v1.2d, v1.2d
418- ; CHECK-CVT-NEXT: movi v2.4s, #127, msl #8
419- ; CHECK-CVT-NEXT: fcvtn v0.2s, v0.2d
420- ; CHECK-CVT-NEXT: fcvtn2 v0.4s, v1.2d
421- ; CHECK-CVT-NEXT: movi v1.4s, #1
422- ; CHECK-CVT-NEXT: ushr v3.4s, v0.4s, #16
423- ; CHECK-CVT-NEXT: add v2.4s, v0.4s, v2.4s
424- ; CHECK-CVT-NEXT: and v1.16b, v3.16b, v1.16b
425- ; CHECK-CVT-NEXT: fcmeq v3.4s, v0.4s, v0.4s
426- ; CHECK-CVT-NEXT: orr v0.4s, #64, lsl #16
427- ; CHECK-CVT-NEXT: add v1.4s, v1.4s, v2.4s
428- ; CHECK-CVT-NEXT: bit v0.16b, v1.16b, v3.16b
430+ ; CHECK-CVT-NEXT: mov x8, v0.d[1]
431+ ; CHECK-CVT-NEXT: fmov x9, d0
432+ ; CHECK-CVT-NEXT: ucvtf s2, x9
433+ ; CHECK-CVT-NEXT: mov x9, v1.d[1]
434+ ; CHECK-CVT-NEXT: ucvtf s0, x8
435+ ; CHECK-CVT-NEXT: fmov x8, d1
436+ ; CHECK-CVT-NEXT: ucvtf s1, x8
437+ ; CHECK-CVT-NEXT: mov v2.s[1], v0.s[0]
438+ ; CHECK-CVT-NEXT: ucvtf s0, x9
439+ ; CHECK-CVT-NEXT: mov v2.s[2], v1.s[0]
440+ ; CHECK-CVT-NEXT: movi v1.4s, #127, msl #8
441+ ; CHECK-CVT-NEXT: mov v2.s[3], v0.s[0]
442+ ; CHECK-CVT-NEXT: movi v0.4s, #1
443+ ; CHECK-CVT-NEXT: ushr v3.4s, v2.4s, #16
444+ ; CHECK-CVT-NEXT: add v1.4s, v2.4s, v1.4s
445+ ; CHECK-CVT-NEXT: and v0.16b, v3.16b, v0.16b
446+ ; CHECK-CVT-NEXT: fcmeq v3.4s, v2.4s, v2.4s
447+ ; CHECK-CVT-NEXT: orr v2.4s, #64, lsl #16
448+ ; CHECK-CVT-NEXT: add v0.4s, v0.4s, v1.4s
449+ ; CHECK-CVT-NEXT: bif v0.16b, v2.16b, v3.16b
429450; CHECK-CVT-NEXT: shrn v0.4h, v0.4s, #16
430451; CHECK-CVT-NEXT: ret
431452;
432453; CHECK-BF16-LABEL: uitofp_i64:
433454; CHECK-BF16: // %bb.0:
434- ; CHECK-BF16-NEXT: ucvtf v0.2d, v0.2d
435- ; CHECK-BF16-NEXT: ucvtf v1.2d, v1.2d
436- ; CHECK-BF16-NEXT: fcvtn v0.2s, v0.2d
437- ; CHECK-BF16-NEXT: fcvtn2 v0.4s, v1.2d
438- ; CHECK-BF16-NEXT: bfcvtn v0.4h, v0.4s
455+ ; CHECK-BF16-NEXT: mov x8, v0.d[1]
456+ ; CHECK-BF16-NEXT: fmov x9, d0
457+ ; CHECK-BF16-NEXT: ucvtf s2, x9
458+ ; CHECK-BF16-NEXT: mov x9, v1.d[1]
459+ ; CHECK-BF16-NEXT: ucvtf s0, x8
460+ ; CHECK-BF16-NEXT: fmov x8, d1
461+ ; CHECK-BF16-NEXT: mov v2.s[1], v0.s[0]
462+ ; CHECK-BF16-NEXT: ucvtf s0, x8
463+ ; CHECK-BF16-NEXT: mov v2.s[2], v0.s[0]
464+ ; CHECK-BF16-NEXT: ucvtf s0, x9
465+ ; CHECK-BF16-NEXT: mov v2.s[3], v0.s[0]
466+ ; CHECK-BF16-NEXT: bfcvtn v0.4h, v2.4s
439467; CHECK-BF16-NEXT: ret
440468 %1 = uitofp <4 x i64 > %a to <4 x bfloat>
441469 ret <4 x bfloat> %1
0 commit comments