Skip to content

[PowerPC] missing VSX FMA Mutation optimize in some case for option -schedule-ppc-vsx-fma-mutation-early #111906

@diggerlin

Description

@diggerlin

Description:

bash> cat test.cpp

   extern "C" {
#include "altivec.h"

void vsexp (float* __restrict __output_a,float* __restrict var1321In_a,int* __restrict n)
{
  int j;
# pragma nounroll
  for(j=j*4;j<*n;j++) {
    vector float var1321;
    vector unsigned char var1326;
    vector unsigned char var1323=(vector unsigned char) {63,184,170,59,63,184,170,59,63,184,170,59,63,184,170,59};
    vector unsigned char var1325=(vector unsigned char) {69,195,244,0,69,195,244,0,69,195,244,0,69,195,244,0};
    var1321=vec_xl(0,var1321In_a+4*(j));
    var1326=(vector unsigned char) vec_madd((vector float)(var1321), (vector float)(var1323), (vector float)(var1325));
     vec_xst((vector float)var1326,0,__output_a+4*(j));
}
  return;
}
} //extern "C"

when compile with -O3 -mllvm -disable-ppc-vsx-fma-mutation=false -mllvm -schedule-ppc-vsx-fma-mutation-early -mllvm -ppc-asm-full-reg-names -mcpu=pwr10 -m64

it generate the asm as (the loop has 6 instructions)

 .vsexp:
# %bb.0:                                # %entry
        lwz r5, 0(r5)
        cmpwi   r5, 1
        bltlr   cr0
# %bb.1:                                # %for.body.preheader
        xxspltiw vs0, 1069066811
        mtctr r5
        li r5, 0
        .align  5
L..BB0_2:                               # %for.body
                                        # =>This Inner Loop Header: Depth=1
        lxvx vs1, r4, r5
        xxspltiw vs2, 1170469888
        xvmaddasp vs2, vs1, vs0
        stxvx vs2, r3, r5
        addi r5, r5, 16
        bdnz L..BB0_2
# %bb.3:                                # %for.end
        blr

obviously , there is more efficient code as which move xxspltiw vs2, 1170469888 out from the loop and
change the xvmaddasp to `xvmaddmsp' , the asm code as following (the loop only has 5 instructions)

.vsexp:
# %bb.0:                                # %entry
        lwz r5, 0(r5)
        cmpwi   r5, 1
        bltlr   cr0
# %bb.1:                                # %for.body.preheader
        xxspltiw vs0, 1069066811
        xxspltiw vs1, 1170469888
        mtctr r5
        li r5, 0
        .align  5
L..BB0_2:                               # %for.body
                                        # =>This Inner Loop Header: Depth=1
        lxvx vs2, r4, r5
        xvmaddmsp vs2, vs0, vs1
        stxvx vs2, r3, r5
        addi r5, r5, 16
        bdnz L..BB0_2
# %bb.3:                                # %for.end
        blr

Metadata

Metadata

Assignees

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions