|
30 | 30 | import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler.ASIMDSize;
|
31 | 31 | import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler.ElementSize;
|
32 | 32 | import jdk.graal.compiler.asm.aarch64.AArch64MacroAssembler;
|
| 33 | +import jdk.graal.compiler.core.common.LIRKind; |
33 | 34 | import jdk.graal.compiler.debug.GraalError;
|
34 | 35 | import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
|
35 | 36 | import jdk.graal.compiler.lir.LIRInstructionClass;
|
36 | 37 | import jdk.graal.compiler.lir.Opcode;
|
37 | 38 |
|
| 39 | +import jdk.graal.compiler.lir.gen.LIRGeneratorTool; |
| 40 | +import jdk.vm.ci.aarch64.AArch64Kind; |
38 | 41 | import jdk.vm.ci.code.Register;
|
39 | 42 | import jdk.vm.ci.meta.AllocatableValue;
|
| 43 | +import jdk.vm.ci.meta.Value; |
40 | 44 |
|
41 | 45 | /**
|
42 | 46 | * This enum encapsulates AArch64 instructions which perform permutations.
|
@@ -102,4 +106,61 @@ public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
|
102 | 106 |
|
103 | 107 | }
|
104 | 108 | }
|
| 109 | + |
| 110 | + public static class ASIMDPermuteOp extends AArch64LIRInstruction { |
| 111 | + private static final LIRInstructionClass<ASIMDPermuteOp> TYPE = LIRInstructionClass.create(ASIMDPermuteOp.class); |
| 112 | + |
| 113 | + @Def protected AllocatableValue result; |
| 114 | + @Alive protected AllocatableValue source; |
| 115 | + @Use protected AllocatableValue indices; |
| 116 | + @Temp({OperandFlag.REG, OperandFlag.ILLEGAL}) protected AllocatableValue xtmp1; |
| 117 | + @Temp({OperandFlag.REG, OperandFlag.ILLEGAL}) protected AllocatableValue xtmp2; |
| 118 | + |
| 119 | + public ASIMDPermuteOp(LIRGeneratorTool tool, AllocatableValue result, AllocatableValue source, AllocatableValue indices) { |
| 120 | + super(TYPE); |
| 121 | + this.result = result; |
| 122 | + this.source = source; |
| 123 | + this.indices = indices; |
| 124 | + AArch64Kind eKind = ((AArch64Kind) result.getPlatformKind()).getScalar(); |
| 125 | + this.xtmp1 = eKind == AArch64Kind.BYTE ? Value.ILLEGAL : tool.newVariable(LIRKind.value(AArch64Kind.V128_BYTE)); |
| 126 | + this.xtmp2 = eKind == AArch64Kind.BYTE ? Value.ILLEGAL : tool.newVariable(LIRKind.value(AArch64Kind.V128_BYTE)); |
| 127 | + } |
| 128 | + |
| 129 | + @Override |
| 130 | + public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) { |
| 131 | + AArch64Kind vKind = (AArch64Kind) result.getPlatformKind(); |
| 132 | + AArch64Kind eKind = vKind.getScalar(); |
| 133 | + ASIMDSize vSize = ASIMDSize.fromVectorKind(vKind); |
| 134 | + Register xtmp1Reg = xtmp1.equals(Value.ILLEGAL) ? Register.None : asRegister(xtmp1); |
| 135 | + Register xtmp2Reg = xtmp2.equals(Value.ILLEGAL) ? Register.None : asRegister(xtmp2); |
| 136 | + Register currentIdxReg = asRegister(indices); |
| 137 | + // Since NEON only supports byte look up, we repeatedly convert a 2W-bit look up into |
| 138 | + // W-bit look up by transforming a 2W-bit index with value v into a pair of W-bit |
| 139 | + // indices v * 2, v * 2 + 1 until we reach the element width equal to Byte.SIZE |
| 140 | + if (eKind.getSizeInBytes() == AArch64Kind.QWORD.getSizeInBytes()) { |
| 141 | + masm.neon.shlVVI(vSize, ElementSize.DoubleWord, xtmp1Reg, currentIdxReg, 1); |
| 142 | + masm.neon.shlVVI(vSize, ElementSize.DoubleWord, xtmp2Reg, xtmp1Reg, Integer.SIZE); |
| 143 | + masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg); |
| 144 | + masm.neon.orrVI(vSize, ElementSize.DoubleWord, xtmp1Reg, 1L << Integer.SIZE); |
| 145 | + currentIdxReg = xtmp1Reg; |
| 146 | + eKind = AArch64Kind.DWORD; |
| 147 | + } |
| 148 | + if (eKind.getSizeInBytes() == AArch64Kind.DWORD.getSizeInBytes()) { |
| 149 | + masm.neon.shlVVI(vSize, ElementSize.Word, xtmp1Reg, currentIdxReg, 1); |
| 150 | + masm.neon.shlVVI(vSize, ElementSize.Word, xtmp2Reg, xtmp1Reg, Short.SIZE); |
| 151 | + masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg); |
| 152 | + masm.neon.orrVI(vSize, ElementSize.Word, xtmp1Reg, 1 << Short.SIZE); |
| 153 | + currentIdxReg = xtmp1Reg; |
| 154 | + eKind = AArch64Kind.WORD; |
| 155 | + } |
| 156 | + if (eKind.getSizeInBytes() == AArch64Kind.WORD.getSizeInBytes()) { |
| 157 | + masm.neon.shlVVI(vSize, ElementSize.HalfWord, xtmp1Reg, currentIdxReg, 1); |
| 158 | + masm.neon.shlVVI(vSize, ElementSize.HalfWord, xtmp2Reg, xtmp1Reg, Byte.SIZE); |
| 159 | + masm.neon.orrVVV(vSize, xtmp1Reg, xtmp1Reg, xtmp2Reg); |
| 160 | + masm.neon.orrVI(vSize, ElementSize.HalfWord, xtmp1Reg, 1 << Byte.SIZE); |
| 161 | + currentIdxReg = xtmp1Reg; |
| 162 | + } |
| 163 | + masm.neon.tblVVV(vSize, asRegister(result), asRegister(source), currentIdxReg); |
| 164 | + } |
| 165 | + } |
105 | 166 | }
|
0 commit comments