|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py |
| 2 | +; RUN: llc -mtriple=aarch64 < %s | FileCheck %s |
| 3 | +; RUN: llc -mtriple=aarch64 --enable-no-signed-zeros-fp-math < %s | FileCheck %s --check-prefix=NO-SIGNED-ZEROS |
| 4 | + |
| 5 | +; Test folding of float->int->float roundtrips into float-only operations. |
| 6 | +; The optimization could converts patterns like: |
| 7 | +; sitofp(fptosi(x)) -> ftrunc(x) |
| 8 | +; sitofp(smin(fptosi(x), C)) -> fminnum(ftrunc(x), (float)C) |
| 9 | +; This is relevant for AArch64 as it avoids GPR bouncing and keeps computation in SIMD/FP registers. |
| 10 | + |
| 11 | +define float @test_signed_basic(float %x) { |
| 12 | +; CHECK-LABEL: test_signed_basic: |
| 13 | +; CHECK: // %bb.0: // %entry |
| 14 | +; CHECK-NEXT: fcvtzs s0, s0 |
| 15 | +; CHECK-NEXT: scvtf s0, s0 |
| 16 | +; CHECK-NEXT: ret |
| 17 | +; |
| 18 | +; NO-SIGNED-ZEROS-LABEL: test_signed_basic: |
| 19 | +; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| 20 | +; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 |
| 21 | +; NO-SIGNED-ZEROS-NEXT: ret |
| 22 | +entry: |
| 23 | + %i = fptosi float %x to i32 |
| 24 | + %f = sitofp i32 %i to float |
| 25 | + ret float %f |
| 26 | +} |
| 27 | + |
| 28 | +define float @test_unsigned_basic(float %x) { |
| 29 | +; CHECK-LABEL: test_unsigned_basic: |
| 30 | +; CHECK: // %bb.0: // %entry |
| 31 | +; CHECK-NEXT: fcvtzu s0, s0 |
| 32 | +; CHECK-NEXT: ucvtf s0, s0 |
| 33 | +; CHECK-NEXT: ret |
| 34 | +; |
| 35 | +; NO-SIGNED-ZEROS-LABEL: test_unsigned_basic: |
| 36 | +; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| 37 | +; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 |
| 38 | +; NO-SIGNED-ZEROS-NEXT: ret |
| 39 | +entry: |
| 40 | + %i = fptoui float %x to i32 |
| 41 | + %f = uitofp i32 %i to float |
| 42 | + ret float %f |
| 43 | +} |
| 44 | + |
| 45 | +define float @test_signed_min_max(float %x) { |
| 46 | +; CHECK-LABEL: test_signed_min_max: |
| 47 | +; CHECK: // %bb.0: // %entry |
| 48 | +; CHECK-NEXT: fcvtzs w9, s0 |
| 49 | +; CHECK-NEXT: mov w8, #-512 // =0xfffffe00 |
| 50 | +; CHECK-NEXT: cmn w9, #512 |
| 51 | +; CHECK-NEXT: csel w8, w9, w8, gt |
| 52 | +; CHECK-NEXT: mov w9, #1023 // =0x3ff |
| 53 | +; CHECK-NEXT: cmp w8, #1023 |
| 54 | +; CHECK-NEXT: csel w8, w8, w9, lt |
| 55 | +; CHECK-NEXT: scvtf s0, w8 |
| 56 | +; CHECK-NEXT: ret |
| 57 | +; |
| 58 | +; NO-SIGNED-ZEROS-LABEL: test_signed_min_max: |
| 59 | +; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| 60 | +; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #196, lsl #24 |
| 61 | +; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 |
| 62 | +; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000 |
| 63 | +; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16 |
| 64 | +; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1 |
| 65 | +; NO-SIGNED-ZEROS-NEXT: fmov s1, w8 |
| 66 | +; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1 |
| 67 | +; NO-SIGNED-ZEROS-NEXT: ret |
| 68 | +entry: |
| 69 | + %i = fptosi float %x to i32 |
| 70 | + %lower = call i32 @llvm.smax.i32(i32 %i, i32 -512) |
| 71 | + %clamped = call i32 @llvm.smin.i32(i32 %lower, i32 1023) |
| 72 | + %f = sitofp i32 %clamped to float |
| 73 | + ret float %f |
| 74 | +} |
| 75 | + |
| 76 | +define float @test_unsigned_min_max(float %x) { |
| 77 | +; CHECK-LABEL: test_unsigned_min_max: |
| 78 | +; CHECK: // %bb.0: // %entry |
| 79 | +; CHECK-NEXT: fcvtzu w9, s0 |
| 80 | +; CHECK-NEXT: mov w8, #512 // =0x200 |
| 81 | +; CHECK-NEXT: cmp w9, #512 |
| 82 | +; CHECK-NEXT: csel w8, w9, w8, hi |
| 83 | +; CHECK-NEXT: mov w9, #1023 // =0x3ff |
| 84 | +; CHECK-NEXT: cmp w8, #1023 |
| 85 | +; CHECK-NEXT: csel w8, w8, w9, lo |
| 86 | +; CHECK-NEXT: ucvtf s0, w8 |
| 87 | +; CHECK-NEXT: ret |
| 88 | +; |
| 89 | +; NO-SIGNED-ZEROS-LABEL: test_unsigned_min_max: |
| 90 | +; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| 91 | +; NO-SIGNED-ZEROS-NEXT: movi v1.2s, #68, lsl #24 |
| 92 | +; NO-SIGNED-ZEROS-NEXT: frintz s0, s0 |
| 93 | +; NO-SIGNED-ZEROS-NEXT: mov w8, #49152 // =0xc000 |
| 94 | +; NO-SIGNED-ZEROS-NEXT: movk w8, #17535, lsl #16 |
| 95 | +; NO-SIGNED-ZEROS-NEXT: fmaxnm s0, s0, s1 |
| 96 | +; NO-SIGNED-ZEROS-NEXT: fmov s1, w8 |
| 97 | +; NO-SIGNED-ZEROS-NEXT: fminnm s0, s0, s1 |
| 98 | +; NO-SIGNED-ZEROS-NEXT: ret |
| 99 | +entry: |
| 100 | + %i = fptoui float %x to i32 |
| 101 | + %lower = call i32 @llvm.umax.i32(i32 %i, i32 512) |
| 102 | + %clamped = call i32 @llvm.umin.i32(i32 %lower, i32 1023) |
| 103 | + %f = uitofp i32 %clamped to float |
| 104 | + ret float %f |
| 105 | +} |
| 106 | + |
| 107 | +; 16777217 is NOT exactly representable in f32. |
| 108 | +define float @test_inexact_16777217(float %x) { |
| 109 | +; CHECK-LABEL: test_inexact_16777217: |
| 110 | +; CHECK: // %bb.0: // %entry |
| 111 | +; CHECK-NEXT: fcvtzs w8, s0 |
| 112 | +; CHECK-NEXT: mov w9, #16777216 // =0x1000000 |
| 113 | +; CHECK-NEXT: cmp w8, w9 |
| 114 | +; CHECK-NEXT: mov w9, #1 // =0x1 |
| 115 | +; CHECK-NEXT: movk w9, #256, lsl #16 |
| 116 | +; CHECK-NEXT: csel w8, w8, w9, le |
| 117 | +; CHECK-NEXT: scvtf s0, w8 |
| 118 | +; CHECK-NEXT: ret |
| 119 | +; |
| 120 | +; NO-SIGNED-ZEROS-LABEL: test_inexact_16777217: |
| 121 | +; NO-SIGNED-ZEROS: // %bb.0: // %entry |
| 122 | +; NO-SIGNED-ZEROS-NEXT: fcvtzs w8, s0 |
| 123 | +; NO-SIGNED-ZEROS-NEXT: mov w9, #16777216 // =0x1000000 |
| 124 | +; NO-SIGNED-ZEROS-NEXT: cmp w8, w9 |
| 125 | +; NO-SIGNED-ZEROS-NEXT: mov w9, #1 // =0x1 |
| 126 | +; NO-SIGNED-ZEROS-NEXT: movk w9, #256, lsl #16 |
| 127 | +; NO-SIGNED-ZEROS-NEXT: csel w8, w8, w9, le |
| 128 | +; NO-SIGNED-ZEROS-NEXT: scvtf s0, w8 |
| 129 | +; NO-SIGNED-ZEROS-NEXT: ret |
| 130 | +entry: |
| 131 | + %i = fptosi float %x to i32 |
| 132 | + %clamped = call i32 @llvm.smin.i32(i32 %i, i32 16777217) |
| 133 | + %f = sitofp i32 %clamped to float |
| 134 | + ret float %f |
| 135 | +} |
| 136 | + |
| 137 | +declare i32 @llvm.smin.i32(i32, i32) |
| 138 | +declare i32 @llvm.smax.i32(i32, i32) |
| 139 | +declare i32 @llvm.umin.i32(i32, i32) |
| 140 | +declare i32 @llvm.umax.i32(i32, i32) |
0 commit comments