|
| 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 |
| 2 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -verify-machineinstrs | FileCheck %s --check-prefixes=SSE2 |
| 3 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -verify-machineinstrs | FileCheck %s --check-prefixes=AVX |
| 4 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f -verify-machineinstrs | FileCheck %s --check-prefixes=AVX512 |
| 5 | + |
| 6 | +define half @test(float %f, ptr %p) nounwind { |
| 7 | +; SSE2-LABEL: test: |
| 8 | +; SSE2: # %bb.0: |
| 9 | +; SSE2-NEXT: pushq %rbx |
| 10 | +; SSE2-NEXT: subq $16, %rsp |
| 11 | +; SSE2-NEXT: movq %rdi, %rbx |
| 12 | +; SSE2-NEXT: callq __truncsfhf2@PLT |
| 13 | +; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| 14 | +; SSE2-NEXT: callq __extendhfsf2@PLT |
| 15 | +; SSE2-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| 16 | +; SSE2-NEXT: #APP |
| 17 | +; SSE2-NEXT: #NO_APP |
| 18 | +; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload |
| 19 | +; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero |
| 20 | +; SSE2-NEXT: movss %xmm0, (%rbx) |
| 21 | +; SSE2-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload |
| 22 | +; SSE2-NEXT: # xmm0 = mem[0],zero,zero,zero |
| 23 | +; SSE2-NEXT: addq $16, %rsp |
| 24 | +; SSE2-NEXT: popq %rbx |
| 25 | +; SSE2-NEXT: retq |
| 26 | +; |
| 27 | +; AVX-LABEL: test: |
| 28 | +; AVX: # %bb.0: |
| 29 | +; AVX-NEXT: pushq %rbx |
| 30 | +; AVX-NEXT: subq $16, %rsp |
| 31 | +; AVX-NEXT: movq %rdi, %rbx |
| 32 | +; AVX-NEXT: callq __truncsfhf2@PLT |
| 33 | +; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| 34 | +; AVX-NEXT: callq __extendhfsf2@PLT |
| 35 | +; AVX-NEXT: vmovss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill |
| 36 | +; AVX-NEXT: #APP |
| 37 | +; AVX-NEXT: #NO_APP |
| 38 | +; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload |
| 39 | +; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero |
| 40 | +; AVX-NEXT: vmovss %xmm0, (%rbx) |
| 41 | +; AVX-NEXT: vmovss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload |
| 42 | +; AVX-NEXT: # xmm0 = mem[0],zero,zero,zero |
| 43 | +; AVX-NEXT: addq $16, %rsp |
| 44 | +; AVX-NEXT: popq %rbx |
| 45 | +; AVX-NEXT: retq |
| 46 | +; |
| 47 | +; AVX512-LABEL: test: |
| 48 | +; AVX512: # %bb.0: |
| 49 | +; AVX512-NEXT: vcvtps2ph $4, %xmm0, %xmm0 |
| 50 | +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| 51 | +; AVX512-NEXT: vcvtph2ps %xmm0, %xmm0 |
| 52 | +; AVX512-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill |
| 53 | +; AVX512-NEXT: #APP |
| 54 | +; AVX512-NEXT: #NO_APP |
| 55 | +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload |
| 56 | +; AVX512-NEXT: vmovss %xmm0, (%rdi) |
| 57 | +; AVX512-NEXT: vmovaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload |
| 58 | +; AVX512-NEXT: retq |
| 59 | + %t = fptrunc float %f to half |
| 60 | + %t2 = fpext half %t to float |
| 61 | + tail call void asm sideeffect "", "~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"() |
| 62 | + store float %t2, ptr %p |
| 63 | + ret half %t |
| 64 | +} |
0 commit comments