| 
 | 1 | +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py  | 
 | 2 | +; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86-X87  | 
 | 3 | +; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86-SSE  | 
 | 4 | +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64  | 
 | 5 | + | 
 | 6 | +; Ideally this would compile to 5 multiplies.  | 
 | 7 | + | 
 | 8 | +define double @pow_wrapper(double %a) nounwind readonly ssp noredzone {  | 
 | 9 | +; X86-X87-LABEL: pow_wrapper:  | 
 | 10 | +; X86-X87:       # %bb.0:  | 
 | 11 | +; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)  | 
 | 12 | +; X86-X87-NEXT:    fld %st(0)  | 
 | 13 | +; X86-X87-NEXT:    fmul %st(1), %st  | 
 | 14 | +; X86-X87-NEXT:    fmul %st, %st(1)  | 
 | 15 | +; X86-X87-NEXT:    fmul %st, %st(0)  | 
 | 16 | +; X86-X87-NEXT:    fmul %st, %st(1)  | 
 | 17 | +; X86-X87-NEXT:    fmul %st, %st(0)  | 
 | 18 | +; X86-X87-NEXT:    fmulp %st, %st(1)  | 
 | 19 | +; X86-X87-NEXT:    retl  | 
 | 20 | +;  | 
 | 21 | +; X86-SSE-LABEL: pow_wrapper:  | 
 | 22 | +; X86-SSE:       # %bb.0:  | 
 | 23 | +; X86-SSE-NEXT:    pushl %ebp  | 
 | 24 | +; X86-SSE-NEXT:    movl %esp, %ebp  | 
 | 25 | +; X86-SSE-NEXT:    andl $-8, %esp  | 
 | 26 | +; X86-SSE-NEXT:    subl $8, %esp  | 
 | 27 | +; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero  | 
 | 28 | +; X86-SSE-NEXT:    movapd %xmm0, %xmm1  | 
 | 29 | +; X86-SSE-NEXT:    mulsd %xmm0, %xmm1  | 
 | 30 | +; X86-SSE-NEXT:    mulsd %xmm1, %xmm0  | 
 | 31 | +; X86-SSE-NEXT:    mulsd %xmm1, %xmm1  | 
 | 32 | +; X86-SSE-NEXT:    mulsd %xmm1, %xmm0  | 
 | 33 | +; X86-SSE-NEXT:    mulsd %xmm1, %xmm1  | 
 | 34 | +; X86-SSE-NEXT:    mulsd %xmm0, %xmm1  | 
 | 35 | +; X86-SSE-NEXT:    movsd %xmm1, (%esp)  | 
 | 36 | +; X86-SSE-NEXT:    fldl (%esp)  | 
 | 37 | +; X86-SSE-NEXT:    movl %ebp, %esp  | 
 | 38 | +; X86-SSE-NEXT:    popl %ebp  | 
 | 39 | +; X86-SSE-NEXT:    retl  | 
 | 40 | +;  | 
 | 41 | +; X64-LABEL: pow_wrapper:  | 
 | 42 | +; X64:       # %bb.0:  | 
 | 43 | +; X64-NEXT:    movapd %xmm0, %xmm1  | 
 | 44 | +; X64-NEXT:    mulsd %xmm0, %xmm1  | 
 | 45 | +; X64-NEXT:    mulsd %xmm1, %xmm0  | 
 | 46 | +; X64-NEXT:    mulsd %xmm1, %xmm1  | 
 | 47 | +; X64-NEXT:    mulsd %xmm1, %xmm0  | 
 | 48 | +; X64-NEXT:    mulsd %xmm1, %xmm1  | 
 | 49 | +; X64-NEXT:    mulsd %xmm1, %xmm0  | 
 | 50 | +; X64-NEXT:    retq  | 
 | 51 | +  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]  | 
 | 52 | +  ret double %ret  | 
 | 53 | +}  | 
 | 54 | + | 
 | 55 | +define double @pow_wrapper_optsize(double %a) nounwind optsize {  | 
 | 56 | +; X86-X87-LABEL: pow_wrapper_optsize:  | 
 | 57 | +; X86-X87:       # %bb.0:  | 
 | 58 | +; X86-X87-NEXT:    subl $12, %esp  | 
 | 59 | +; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)  | 
 | 60 | +; X86-X87-NEXT:    fstpl (%esp)  | 
 | 61 | +; X86-X87-NEXT:    movl $15, {{[0-9]+}}(%esp)  | 
 | 62 | +; X86-X87-NEXT:    calll __powidf2  | 
 | 63 | +; X86-X87-NEXT:    addl $12, %esp  | 
 | 64 | +; X86-X87-NEXT:    retl  | 
 | 65 | +;  | 
 | 66 | +; X86-SSE-LABEL: pow_wrapper_optsize:  | 
 | 67 | +; X86-SSE:       # %bb.0:  | 
 | 68 | +; X86-SSE-NEXT:    subl $12, %esp  | 
 | 69 | +; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero  | 
 | 70 | +; X86-SSE-NEXT:    movsd %xmm0, (%esp)  | 
 | 71 | +; X86-SSE-NEXT:    movl $15, {{[0-9]+}}(%esp)  | 
 | 72 | +; X86-SSE-NEXT:    calll __powidf2  | 
 | 73 | +; X86-SSE-NEXT:    addl $12, %esp  | 
 | 74 | +; X86-SSE-NEXT:    retl  | 
 | 75 | +;  | 
 | 76 | +; X64-LABEL: pow_wrapper_optsize:  | 
 | 77 | +; X64:       # %bb.0:  | 
 | 78 | +; X64-NEXT:    movl $15, %edi  | 
 | 79 | +; X64-NEXT:    jmp __powidf2@PLT # TAILCALL  | 
 | 80 | +  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]  | 
 | 81 | +  ret double %ret  | 
 | 82 | +}  | 
 | 83 | + | 
 | 84 | +define double @pow_wrapper_pgso(double %a) nounwind !prof !14 {  | 
 | 85 | +; X86-X87-LABEL: pow_wrapper_pgso:  | 
 | 86 | +; X86-X87:       # %bb.0:  | 
 | 87 | +; X86-X87-NEXT:    subl $12, %esp  | 
 | 88 | +; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)  | 
 | 89 | +; X86-X87-NEXT:    fstpl (%esp)  | 
 | 90 | +; X86-X87-NEXT:    movl $15, {{[0-9]+}}(%esp)  | 
 | 91 | +; X86-X87-NEXT:    calll __powidf2  | 
 | 92 | +; X86-X87-NEXT:    addl $12, %esp  | 
 | 93 | +; X86-X87-NEXT:    retl  | 
 | 94 | +;  | 
 | 95 | +; X86-SSE-LABEL: pow_wrapper_pgso:  | 
 | 96 | +; X86-SSE:       # %bb.0:  | 
 | 97 | +; X86-SSE-NEXT:    subl $12, %esp  | 
 | 98 | +; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero  | 
 | 99 | +; X86-SSE-NEXT:    movsd %xmm0, (%esp)  | 
 | 100 | +; X86-SSE-NEXT:    movl $15, {{[0-9]+}}(%esp)  | 
 | 101 | +; X86-SSE-NEXT:    calll __powidf2  | 
 | 102 | +; X86-SSE-NEXT:    addl $12, %esp  | 
 | 103 | +; X86-SSE-NEXT:    retl  | 
 | 104 | +;  | 
 | 105 | +; X64-LABEL: pow_wrapper_pgso:  | 
 | 106 | +; X64:       # %bb.0:  | 
 | 107 | +; X64-NEXT:    movl $15, %edi  | 
 | 108 | +; X64-NEXT:    jmp __powidf2@PLT # TAILCALL  | 
 | 109 | +  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]  | 
 | 110 | +  ret double %ret  | 
 | 111 | +}  | 
 | 112 | + | 
 | 113 | +define double @pow_wrapper_minsize(double %a) nounwind minsize {  | 
 | 114 | +; X86-X87-LABEL: pow_wrapper_minsize:  | 
 | 115 | +; X86-X87:       # %bb.0:  | 
 | 116 | +; X86-X87-NEXT:    subl $12, %esp  | 
 | 117 | +; X86-X87-NEXT:    fldl {{[0-9]+}}(%esp)  | 
 | 118 | +; X86-X87-NEXT:    fstpl (%esp)  | 
 | 119 | +; X86-X87-NEXT:    movl $15, {{[0-9]+}}(%esp)  | 
 | 120 | +; X86-X87-NEXT:    calll __powidf2  | 
 | 121 | +; X86-X87-NEXT:    addl $12, %esp  | 
 | 122 | +; X86-X87-NEXT:    retl  | 
 | 123 | +;  | 
 | 124 | +; X86-SSE-LABEL: pow_wrapper_minsize:  | 
 | 125 | +; X86-SSE:       # %bb.0:  | 
 | 126 | +; X86-SSE-NEXT:    subl $12, %esp  | 
 | 127 | +; X86-SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero  | 
 | 128 | +; X86-SSE-NEXT:    movsd %xmm0, (%esp)  | 
 | 129 | +; X86-SSE-NEXT:    movl $15, {{[0-9]+}}(%esp)  | 
 | 130 | +; X86-SSE-NEXT:    calll __powidf2  | 
 | 131 | +; X86-SSE-NEXT:    addl $12, %esp  | 
 | 132 | +; X86-SSE-NEXT:    retl  | 
 | 133 | +;  | 
 | 134 | +; X64-LABEL: pow_wrapper_minsize:  | 
 | 135 | +; X64:       # %bb.0:  | 
 | 136 | +; X64-NEXT:    pushq $15  | 
 | 137 | +; X64-NEXT:    popq %rdi  | 
 | 138 | +; X64-NEXT:    jmp __powidf2@PLT # TAILCALL  | 
 | 139 | +  %ret = tail call double @llvm.powi.f64.i32(double %a, i32 15) nounwind ; <double> [#uses=1]  | 
 | 140 | +  ret double %ret  | 
 | 141 | +}  | 
 | 142 | + | 
 | 143 | +define <2 x float> @powi_v2f32(<2 x float> %a) nounwind minsize {  | 
 | 144 | +; X86-X87-LABEL: powi_v2f32:  | 
 | 145 | +; X86-X87:       # %bb.0:  | 
 | 146 | +; X86-X87-NEXT:    pushl %esi  | 
 | 147 | +; X86-X87-NEXT:    subl $16, %esp  | 
 | 148 | +; X86-X87-NEXT:    flds {{[0-9]+}}(%esp)  | 
 | 149 | +; X86-X87-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  | 
 | 150 | +; X86-X87-NEXT:    flds {{[0-9]+}}(%esp)  | 
 | 151 | +; X86-X87-NEXT:    pushl $15  | 
 | 152 | +; X86-X87-NEXT:    popl %esi  | 
 | 153 | +; X86-X87-NEXT:    movl %esi, {{[0-9]+}}(%esp)  | 
 | 154 | +; X86-X87-NEXT:    fstps (%esp)  | 
 | 155 | +; X86-X87-NEXT:    calll __powisf2  | 
 | 156 | +; X86-X87-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill  | 
 | 157 | +; X86-X87-NEXT:    movl %esi, {{[0-9]+}}(%esp)  | 
 | 158 | +; X86-X87-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload  | 
 | 159 | +; X86-X87-NEXT:    fstps (%esp)  | 
 | 160 | +; X86-X87-NEXT:    calll __powisf2  | 
 | 161 | +; X86-X87-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Reload  | 
 | 162 | +; X86-X87-NEXT:    fxch %st(1)  | 
 | 163 | +; X86-X87-NEXT:    addl $16, %esp  | 
 | 164 | +; X86-X87-NEXT:    popl %esi  | 
 | 165 | +; X86-X87-NEXT:    retl  | 
 | 166 | +;  | 
 | 167 | +; X86-SSE-LABEL: powi_v2f32:  | 
 | 168 | +; X86-SSE:       # %bb.0:  | 
 | 169 | +; X86-SSE-NEXT:    pushl %esi  | 
 | 170 | +; X86-SSE-NEXT:    subl $32, %esp  | 
 | 171 | +; X86-SSE-NEXT:    movups %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill  | 
 | 172 | +; X86-SSE-NEXT:    pushl $15  | 
 | 173 | +; X86-SSE-NEXT:    popl %esi  | 
 | 174 | +; X86-SSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)  | 
 | 175 | +; X86-SSE-NEXT:    movss %xmm0, (%esp)  | 
 | 176 | +; X86-SSE-NEXT:    calll __powisf2  | 
 | 177 | +; X86-SSE-NEXT:    movl %esi, {{[0-9]+}}(%esp)  | 
 | 178 | +; X86-SSE-NEXT:    movups {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload  | 
 | 179 | +; X86-SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]  | 
 | 180 | +; X86-SSE-NEXT:    movss %xmm0, (%esp)  | 
 | 181 | +; X86-SSE-NEXT:    fstps {{[0-9]+}}(%esp)  | 
 | 182 | +; X86-SSE-NEXT:    calll __powisf2  | 
 | 183 | +; X86-SSE-NEXT:    fstps {{[0-9]+}}(%esp)  | 
 | 184 | +; X86-SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero  | 
 | 185 | +; X86-SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero  | 
 | 186 | +; X86-SSE-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]  | 
 | 187 | +; X86-SSE-NEXT:    addl $32, %esp  | 
 | 188 | +; X86-SSE-NEXT:    popl %esi  | 
 | 189 | +; X86-SSE-NEXT:    retl  | 
 | 190 | +;  | 
 | 191 | +; X64-LABEL: powi_v2f32:  | 
 | 192 | +; X64:       # %bb.0:  | 
 | 193 | +; X64-NEXT:    pushq %rbx  | 
 | 194 | +; X64-NEXT:    subq $32, %rsp  | 
 | 195 | +; X64-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill  | 
 | 196 | +; X64-NEXT:    pushq $15  | 
 | 197 | +; X64-NEXT:    popq %rbx  | 
 | 198 | +; X64-NEXT:    movl %ebx, %edi  | 
 | 199 | +; X64-NEXT:    callq __powisf2@PLT  | 
 | 200 | +; X64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill  | 
 | 201 | +; X64-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload  | 
 | 202 | +; X64-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]  | 
 | 203 | +; X64-NEXT:    movl %ebx, %edi  | 
 | 204 | +; X64-NEXT:    callq __powisf2@PLT  | 
 | 205 | +; X64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload  | 
 | 206 | +; X64-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]  | 
 | 207 | +; X64-NEXT:    movaps %xmm1, %xmm0  | 
 | 208 | +; X64-NEXT:    addq $32, %rsp  | 
 | 209 | +; X64-NEXT:    popq %rbx  | 
 | 210 | +; X64-NEXT:    retq  | 
 | 211 | +  %ret = tail call < 2 x float> @llvm.powi.v2f32.i32(<2 x float> %a, i32 15) nounwind ;  | 
 | 212 | +  ret <2 x float> %ret  | 
 | 213 | +}  | 
 | 214 | + | 
 | 215 | +declare double @llvm.powi.f64.i32(double, i32) nounwind readonly  | 
 | 216 | +declare < 2 x float> @llvm.powi.v2f32.i32(<2 x float>, i32) nounwind readonly  | 
 | 217 | + | 
 | 218 | +!llvm.module.flags = !{!0}  | 
 | 219 | +!0 = !{i32 1, !"ProfileSummary", !1}  | 
 | 220 | +!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}  | 
 | 221 | +!2 = !{!"ProfileFormat", !"InstrProf"}  | 
 | 222 | +!3 = !{!"TotalCount", i64 10000}  | 
 | 223 | +!4 = !{!"MaxCount", i64 10}  | 
 | 224 | +!5 = !{!"MaxInternalCount", i64 1}  | 
 | 225 | +!6 = !{!"MaxFunctionCount", i64 1000}  | 
 | 226 | +!7 = !{!"NumCounts", i64 3}  | 
 | 227 | +!8 = !{!"NumFunctions", i64 3}  | 
 | 228 | +!9 = !{!"DetailedSummary", !10}  | 
 | 229 | +!10 = !{!11, !12, !13}  | 
 | 230 | +!11 = !{i32 10000, i64 100, i32 1}  | 
 | 231 | +!12 = !{i32 999000, i64 100, i32 1}  | 
 | 232 | +!13 = !{i32 999999, i64 1, i32 2}  | 
 | 233 | +!14 = !{!"function_entry_count", i64 0}  | 
0 commit comments