-
Notifications
You must be signed in to change notification settings - Fork 15.2k
Open
Labels
Description
Description:
bash> cat test.cpp
extern "C" {
#include "altivec.h"
void vsexp (float* __restrict __output_a,float* __restrict var1321In_a,int* __restrict n)
{
int j;
# pragma nounroll
for(j=j*4;j<*n;j++) {
vector float var1321;
vector unsigned char var1326;
vector unsigned char var1323=(vector unsigned char) {63,184,170,59,63,184,170,59,63,184,170,59,63,184,170,59};
vector unsigned char var1325=(vector unsigned char) {69,195,244,0,69,195,244,0,69,195,244,0,69,195,244,0};
var1321=vec_xl(0,var1321In_a+4*(j));
var1326=(vector unsigned char) vec_madd((vector float)(var1321), (vector float)(var1323), (vector float)(var1325));
vec_xst((vector float)var1326,0,__output_a+4*(j));
}
return;
}
} //extern "C"
when compile with -O3 -mllvm -disable-ppc-vsx-fma-mutation=false -mllvm -schedule-ppc-vsx-fma-mutation-early -mllvm -ppc-asm-full-reg-names -mcpu=pwr10 -m64
it generate the asm as (the loop has 6 instructions)
.vsexp:
# %bb.0: # %entry
lwz r5, 0(r5)
cmpwi r5, 1
bltlr cr0
# %bb.1: # %for.body.preheader
xxspltiw vs0, 1069066811
mtctr r5
li r5, 0
.align 5
L..BB0_2: # %for.body
# =>This Inner Loop Header: Depth=1
lxvx vs1, r4, r5
xxspltiw vs2, 1170469888
xvmaddasp vs2, vs1, vs0
stxvx vs2, r3, r5
addi r5, r5, 16
bdnz L..BB0_2
# %bb.3: # %for.end
blr
obviously , there is more efficient code as which move xxspltiw vs2, 1170469888 out from the loop and
change the xvmaddasp to `xvmaddmsp' , the asm code as following (the loop only has 5 instructions)
.vsexp:
# %bb.0: # %entry
lwz r5, 0(r5)
cmpwi r5, 1
bltlr cr0
# %bb.1: # %for.body.preheader
xxspltiw vs0, 1069066811
xxspltiw vs1, 1170469888
mtctr r5
li r5, 0
.align 5
L..BB0_2: # %for.body
# =>This Inner Loop Header: Depth=1
lxvx vs2, r4, r5
xvmaddmsp vs2, vs0, vs1
stxvx vs2, r3, r5
addi r5, r5, 16
bdnz L..BB0_2
# %bb.3: # %for.end
blr