Skip to content

Commit 21a0c8b

Browse files
committed
[GR-60516] Implement Vector API rearrange operation
PullRequest: graal/19664
2 parents c64d730 + c68f5c0 commit 21a0c8b

File tree

3 files changed

+323
-0
lines changed

3 files changed

+323
-0
lines changed

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/asm/amd64/AMD64Assembler.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2327,7 +2327,11 @@ public static class VexRVMOp extends VexOp {
23272327
public static final VexRVMOp VSQRTSD = new VexRVMOp("VSQRTSD", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x51, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_64BIT, VEXPrefixConfig.W1);
23282328
public static final VexRVMOp VSQRTSS = new VexRVMOp("VSQRTSS", VEXPrefixConfig.P_F3, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x51, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_32BIT, VEXPrefixConfig.W0);
23292329

2330+
public static final VexRVMOp VPERMILPS = new VexRVMOp("VPERMILPS", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x0C, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, VEXPrefixConfig.W0);
23302331
public static final VexRVMOp VPERMD = new VexRVMOp("VPERMD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x36, VEXOpAssertion.AVX2_AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W0);
2332+
public static final VexRVMOp VPERMPS = new VexRVMOp("VPERMPS", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x16, VEXOpAssertion.AVX2_AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W0);
2333+
public static final VexRVMOp VPERMILPD = new VexRVMOp("VPERMILPD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x0D, VEXOpAssertion.AVX1_AVX512F_VL, EVEXTuple.FVM, VEXPrefixConfig.W1);
2334+
23312335
public static final VexRVMOp VMOVSS = new VexRVMOp("VMOVSS", VEXPrefixConfig.P_F3, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x10, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_32BIT, VEXPrefixConfig.W0);
23322336
public static final VexRVMOp VMOVSD = new VexRVMOp("VMOVSD", VEXPrefixConfig.P_F2, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x10, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_64BIT, VEXPrefixConfig.W1);
23332337
public static final VexRVMOp VMOVHPD = new VexRVMOp("VMOVHPD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F, VEXPrefixConfig.WIG, 0x16, VEXOpAssertion.AVX1_AVX512F_128, EVEXTuple.T1S_64BIT, VEXPrefixConfig.W1);
@@ -2431,8 +2435,14 @@ public static class VexRVMOp extends VexOp {
24312435
public static final VexRVMOp EVSQRTSD = new VexRVMOp("EVSQRTSD", VSQRTSD);
24322436
public static final VexRVMOp EVSQRTSS = new VexRVMOp("EVSQRTSS", VSQRTSS);
24332437

2438+
public static final VexRVMOp EVPERMB = new VexRVMOp("EVPERMB", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x8D, VEXOpAssertion.AVX512_VBMI_VL, EVEXTuple.FVM, VEXPrefixConfig.W0, true);
24342439
public static final VexRVMOp EVPERMW = new VexRVMOp("EVPERMW", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x8D, VEXOpAssertion.AVX512BW_VL, EVEXTuple.FVM, VEXPrefixConfig.W1, true);
2440+
public static final VexRVMOp EVPERMILPS = new VexRVMOp("EVPERMILPS", VPERMILPS);
24352441
public static final VexRVMOp EVPERMD = new VexRVMOp("EVPERMD", VPERMD);
2442+
public static final VexRVMOp EVPERMPS = new VexRVMOp("EVPERMPS", VPERMPS);
2443+
public static final VexRVMOp EVPERMILPD = new VexRVMOp("EVPERMILPD", VPERMILPD);
2444+
public static final VexRVMOp EVPERMQ = new VexRVMOp("EVPERMQ", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x36, VEXOpAssertion.AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W1, true);
2445+
public static final VexRVMOp EVPERMPD = new VexRVMOp("EVPERMPD", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x16, VEXOpAssertion.AVX512F_VL_256_512, EVEXTuple.FVM, VEXPrefixConfig.W1, true);
24362446

24372447
public static final VexRVMOp EVPBLENDMB = new VexRVMOp("EVPBLENDMB", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W0, 0x66, VEXOpAssertion.AVX512BW_VL, EVEXTuple.FVM, VEXPrefixConfig.W0, true);
24382448
public static final VexRVMOp EVPBLENDMW = new VexRVMOp("EVPBLENDMW", VEXPrefixConfig.P_66, VEXPrefixConfig.M_0F38, VEXPrefixConfig.W1, 0x66, VEXOpAssertion.AVX512BW_VL, EVEXTuple.FVM, VEXPrefixConfig.W1, true);

compiler/src/jdk.graal.compiler/src/jdk/graal/compiler/lir/aarch64/AArch64PermuteOp.java

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,13 +30,17 @@
3030
import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler.ASIMDSize;
3131
import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler.ElementSize;
3232
import jdk.graal.compiler.asm.aarch64.AArch64MacroAssembler;
33+
import jdk.graal.compiler.core.common.LIRKind;
3334
import jdk.graal.compiler.debug.GraalError;
3435
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
3536
import jdk.graal.compiler.lir.LIRInstructionClass;
3637
import jdk.graal.compiler.lir.Opcode;
3738

39+
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
40+
import jdk.vm.ci.aarch64.AArch64Kind;
3841
import jdk.vm.ci.code.Register;
3942
import jdk.vm.ci.meta.AllocatableValue;
43+
import jdk.vm.ci.meta.Value;
4044

4145
/**
4246
* This enum encapsulates AArch64 instructions which perform permutations.
@@ -102,4 +106,61 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
102106

103107
}
104108
}
109+
110+
public static class ASIMDPermuteOp extends AArch64LIRInstruction {
111+
private static final LIRInstructionClass<ASIMDPermuteOp> TYPE = LIRInstructionClass.create(ASIMDPermuteOp.class);
112+
113+
@Def protected AllocatableValue result;
114+
@Alive protected AllocatableValue source;
115+
@Use protected AllocatableValue indices;
116+
@Temp({OperandFlag.REG, OperandFlag.ILLEGAL}) protected AllocatableValue xtmp1;
117+
@Temp({OperandFlag.REG, OperandFlag.ILLEGAL}) protected AllocatableValue xtmp2;
118+
119+
public ASIMDPermuteOp(LIRGeneratorTool tool, AllocatableValue result, AllocatableValue source, AllocatableValue indices) {
120+
super(TYPE);
121+
this.result = result;
122+
this.source = source;
123+
this.indices = indices;
124+
AArch64Kind eKind = ((AArch64Kind) result.getPlatformKind()).getScalar();
125+
this.xtmp1 = eKind == AArch64Kind.BYTE ? Value.ILLEGAL : tool.newVariable(LIRKind.value(AArch64Kind.V128_BYTE));
126+
this.xtmp2 = eKind == AArch64Kind.BYTE ? Value.ILLEGAL : tool.newVariable(LIRKind.value(AArch64Kind.V128_BYTE));
127+
}
128+
129+
@Override
130+
public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
131+
AArch64Kind vKind = (AArch64Kind) result.getPlatformKind();
132+
AArch64Kind eKind = vKind.getScalar();
133+
ASIMDSize vSize = ASIMDSize.fromVectorKind(vKind);
134+
Register xtmp1Reg = xtmp1.equals(Value.ILLEGAL) ? Register.None : asRegister(xtmp1);
135+
Register xtmp2Reg = xtmp2.equals(Value.ILLEGAL) ? Register.None : asRegister(xtmp2);
136+
Register currentIdxReg = asRegister(indices);
137+
// Since NEON only supports byte look up, we repeatedly convert a 2W-bit look up into
138+
// W-bit look up by transforming a 2W-bit index with value v into a pair of W-bit
139+
// indices v * 2, v * 2 + 1 until we reach the element width equal to Byte.SIZE
140+
if (eKind.getSizeInBytes() == AArch64Kind.QWORD.getSizeInBytes()) {
141+
masm.neon.shlVVI(vSize, ElementSize.DoubleWord, xtmp1Reg, currentIdxReg, 1);
142+
masm.neon.shlVVI(vSize, ElementSize.DoubleWord, xtmp2Reg, xtmp1Reg, Integer.SIZE);
143+
masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg);
144+
masm.neon.orrVI(vSize, ElementSize.DoubleWord, xtmp1Reg, 1L << Integer.SIZE);
145+
currentIdxReg = xtmp1Reg;
146+
eKind = AArch64Kind.DWORD;
147+
}
148+
if (eKind.getSizeInBytes() == AArch64Kind.DWORD.getSizeInBytes()) {
149+
masm.neon.shlVVI(vSize, ElementSize.Word, xtmp1Reg, currentIdxReg, 1);
150+
masm.neon.shlVVI(vSize, ElementSize.Word, xtmp2Reg, xtmp1Reg, Short.SIZE);
151+
masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg);
152+
masm.neon.orrVI(vSize, ElementSize.Word, xtmp1Reg, 1 << Short.SIZE);
153+
currentIdxReg = xtmp1Reg;
154+
eKind = AArch64Kind.WORD;
155+
}
156+
if (eKind.getSizeInBytes() == AArch64Kind.WORD.getSizeInBytes()) {
157+
masm.neon.shlVVI(vSize, ElementSize.HalfWord, xtmp1Reg, currentIdxReg, 1);
158+
masm.neon.shlVVI(vSize, ElementSize.HalfWord, xtmp2Reg, xtmp1Reg, Byte.SIZE);
159+
masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg);
160+
masm.neon.orrVI(vSize, ElementSize.HalfWord, xtmp1Reg, 1 << Byte.SIZE);
161+
currentIdxReg = xtmp1Reg;
162+
}
163+
masm.neon.tblVVV(vSize, asRegister(result), asRegister(source), currentIdxReg);
164+
}
165+
}
105166
}

0 commit comments

Comments
 (0)