Skip to content
This repository was archived by the owner on Jan 23, 2023. It is now read-only.

Commit 0ebc097

Browse files
authored
Merge pull request #16955 from fiigii/moreavx
Implement more AVX/AVX2 intrinsics
2 parents 103fe23 + dd96fc5 commit 0ebc097

File tree

69 files changed

+12926
-5
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+12926
-5
lines changed

src/jit/emitxarch.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,12 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
223223
case INS_unpcklpd:
224224
case INS_vinsertf128:
225225
case INS_vinserti128:
226+
case INS_vmaskmovps:
227+
case INS_vmaskmovpd:
226228
case INS_vperm2i128:
229+
case INS_vperm2f128:
230+
case INS_vpermilpsvar:
231+
case INS_vpermilpdvar:
227232
case INS_vpsrlvd:
228233
case INS_vpsrlvq:
229234
case INS_vpsravd:

src/jit/hwintrinsiccodegenxarch.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,14 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
134134
}
135135
else if (category == HW_Category_MemoryLoad)
136136
{
137-
emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg);
137+
if (intrinsicID == NI_AVX_MaskLoad)
138+
{
139+
emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op2Reg, op1Reg);
140+
}
141+
else
142+
{
143+
emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2Reg);
144+
}
138145
}
139146
else if (Compiler::isImmHWIntrinsic(intrinsicID, op2))
140147
{
@@ -1331,7 +1338,6 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
13311338

13321339
if (op1Reg != targetReg)
13331340
{
1334-
instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
13351341
emit->emitIns_R_R(ins, emitTypeSize(TYP_SIMD32), targetReg, op1Reg);
13361342
}
13371343
break;

src/jit/hwintrinsiclistxarch.h

Lines changed: 16 additions & 0 deletions
Large diffs are not rendered by default.

src/jit/instrsxarch.h

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -474,8 +474,8 @@ INST3( vpbroadcastb, "pbroadcastb" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS
474474
INST3( vpbroadcastw, "pbroadcastw" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x79)) // Broadcast int16 value from reg/memory to entire ymm register
475475
INST3( vpbroadcastd, "pbroadcastd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x58)) // Broadcast int32 value from reg/memory to entire ymm register
476476
INST3( vpbroadcastq, "pbroadcastq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x59)) // Broadcast int64 value from reg/memory to entire ymm register
477-
INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19), BAD_CODE, BAD_CODE) // Extract 128-bit packed floating point values
478-
INST3( vextracti128, "extracti128" , 0, IUM_WR, 0, 0, SSE3A(0x39), BAD_CODE, BAD_CODE) // Extract 128-bit packed integer values
477+
INST3( vextractf128, "extractf128" , 0, IUM_WR, 0, 0, SSE3A(0x19), BAD_CODE, SSE3A(0x19)) // Extract 128-bit packed floating point values
478+
INST3( vextracti128, "extracti128" , 0, IUM_WR, 0, 0, SSE3A(0x39), BAD_CODE, SSE3A(0x39)) // Extract 128-bit packed integer values
479479
INST3( vinsertf128, "insertf128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x18)) // Insert 128-bit packed floating point values
480480
INST3( vinserti128, "inserti128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x38)) // Insert 128-bit packed integer values
481481
INST3( vzeroupper, "zeroupper" , 0, IUM_WR, 0, 0, 0xC577F8, BAD_CODE, BAD_CODE) // Zero upper 128-bits of all YMM regs (includes 2-byte fixed VEX prefix)
@@ -493,7 +493,13 @@ INST3( vpsllvd, "psllvd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SS
493493
INST3( vpsllvq, "psllvq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x47)) // Variable Bit Shift Left Logical
494494
INST3( vpermilps, "permilps" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x04)) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
495495
INST3( vpermilpd, "permilpd" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x05)) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
496-
496+
INST3( vpermilpsvar, "permilpsvar" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0C)) // Permute In-Lane of Quadruples of Single-Precision Floating-Point Values
497+
INST3( vpermilpdvar, "permilpdvar" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x0D)) // Permute In-Lane of Quadruples of Double-Precision Floating-Point Values
498+
INST3( vperm2f128, "perm2f128" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE3A(0x06)) // Permute Floating-Point Values
499+
INST3(vbroadcastf128,"broadcastf128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x1A)) // Broadcast packed float values read from memory to entire ymm register
500+
INST3(vbroadcasti128,"broadcasti128",0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x5A)) // Broadcast packed integer values read from memory to entire ymm register
501+
INST3(vmaskmovps, "maskmovps" ,0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2C)) // Conditional SIMD Packed Loads Float
502+
INST3(vmaskmovpd, "maskmovpd" ,0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, SSE38(0x2D)) // Conditional SIMD Packed Loads Double
497503
INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION", 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
498504

499505
// Scalar instructions in SSE4.2

tests/src/JIT/HardwareIntrinsics/X86/Avx/Avx_r.csproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@
130130
<Compile Include="Permute.Double.1.cs" />
131131
<Compile Include="Permute.Single.2.cs" />
132132
<Compile Include="Permute.Double.2.cs" />
133+
<Compile Include="PermuteVar.Single.cs" />
134+
<Compile Include="PermuteVar.Double.cs" />
133135
<Compile Include="RoundCurrentDirection.Double.cs" />
134136
<Compile Include="RoundCurrentDirection.Single.cs" />
135137
<Compile Include="RoundToNearestInteger.Double.cs" />

tests/src/JIT/HardwareIntrinsics/X86/Avx/Avx_ro.csproj

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,8 @@
130130
<Compile Include="Permute.Double.1.cs" />
131131
<Compile Include="Permute.Single.2.cs" />
132132
<Compile Include="Permute.Double.2.cs" />
133+
<Compile Include="PermuteVar.Single.cs" />
134+
<Compile Include="PermuteVar.Double.cs" />
133135
<Compile Include="RoundCurrentDirection.Double.cs" />
134136
<Compile Include="RoundCurrentDirection.Single.cs" />
135137
<Compile Include="RoundToNearestInteger.Double.cs" />
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
// Licensed to the .NET Foundation under one or more agreements.
2+
// The .NET Foundation licenses this file to you under the MIT license.
3+
// See the LICENSE file in the project root for more information.
4+
//
5+
6+
using System;
7+
using System.Runtime.CompilerServices;
8+
using System.Runtime.InteropServices;
9+
using System.Runtime.Intrinsics.X86;
10+
using System.Runtime.Intrinsics;
11+
12+
namespace IntelHardwareIntrinsicTest
13+
{
14+
class Program
15+
{
16+
const int Pass = 100;
17+
const int Fail = 0;
18+
19+
static unsafe int Main(string[] args)
20+
{
21+
int testResult = Pass;
22+
23+
if (Avx.IsSupported)
24+
{
25+
using (TestTable<float> floatTable = new TestTable<float>(new float[8] { 1, -5, 100, 0, 1, 2, 3, 4 }, new float[8]))
26+
{
27+
var vf = Avx.BroadcastVector128ToVector256((float*)(floatTable.inArrayPtr));
28+
Unsafe.Write(floatTable.outArrayPtr, vf);
29+
30+
if (!floatTable.CheckResult((x, y) => BitConverter.SingleToInt32Bits(x) == BitConverter.SingleToInt32Bits(y)))
31+
{
32+
Console.WriteLine("AVX BroadcastVector128ToVector256 failed on float:");
33+
foreach (var item in floatTable.outArray)
34+
{
35+
Console.Write(item + ", ");
36+
}
37+
Console.WriteLine();
38+
testResult = Fail;
39+
}
40+
}
41+
42+
using (TestTable<double> doubleTable = new TestTable<double>(new double[4] { 1, -5, 100, 0}, new double[4]))
43+
{
44+
var vf = Avx.BroadcastVector128ToVector256((double*)(doubleTable.inArrayPtr));
45+
Unsafe.Write(doubleTable.outArrayPtr, vf);
46+
47+
if (!doubleTable.CheckResult((x, y) => BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y)))
48+
{
49+
Console.WriteLine("AVX BroadcastVector128ToVector256 failed on double:");
50+
foreach (var item in doubleTable.outArray)
51+
{
52+
Console.Write(item + ", ");
53+
}
54+
Console.WriteLine();
55+
testResult = Fail;
56+
}
57+
}
58+
}
59+
return testResult;
60+
}
61+
62+
public unsafe struct TestTable<T> : IDisposable where T : struct
63+
{
64+
public T[] inArray;
65+
public T[] outArray;
66+
67+
public void* inArrayPtr => inHandle.AddrOfPinnedObject().ToPointer();
68+
public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer();
69+
70+
GCHandle inHandle;
71+
GCHandle outHandle;
72+
public TestTable(T[] a, T[] b)
73+
{
74+
this.inArray = a;
75+
this.outArray = b;
76+
77+
inHandle = GCHandle.Alloc(inArray, GCHandleType.Pinned);
78+
outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned);
79+
}
80+
public bool CheckResult(Func<T, T, bool> check)
81+
{
82+
for (int i = 0; i < outArray.Length/2; i++)
83+
{
84+
if (!check(inArray[i], outArray[i]))
85+
{
86+
return false;
87+
}
88+
}
89+
for (int i = outArray.Length/2; i < outArray.Length; i++)
90+
{
91+
if (!check(inArray[i - outArray.Length/2], outArray[i]))
92+
{
93+
return false;
94+
}
95+
}
96+
return true;
97+
}
98+
99+
public void Dispose()
100+
{
101+
inHandle.Free();
102+
outHandle.Free();
103+
}
104+
}
105+
106+
}
107+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3+
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
4+
<PropertyGroup>
5+
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
6+
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
7+
<SchemaVersion>2.0</SchemaVersion>
8+
<ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
9+
<OutputType>Exe</OutputType>
10+
<ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
11+
<SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
12+
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
13+
</PropertyGroup>
14+
<!-- Default configurations to help VS understand the configurations -->
15+
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
16+
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
17+
<ItemGroup>
18+
<CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
19+
<Visible>False</Visible>
20+
</CodeAnalysisDependentAssemblyPaths>
21+
</ItemGroup>
22+
<PropertyGroup>
23+
<DebugType>None</DebugType>
24+
<Optimize></Optimize>
25+
</PropertyGroup>
26+
<ItemGroup>
27+
<Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
28+
</ItemGroup>
29+
<ItemGroup>
30+
<Compile Include="BroadcastVector128ToVector256.cs" />
31+
</ItemGroup>
32+
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
33+
<PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
34+
</Project>
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
<?xml version="1.0" encoding="utf-8"?>
2+
<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
3+
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />
4+
<PropertyGroup>
5+
<Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>
6+
<Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>
7+
<SchemaVersion>2.0</SchemaVersion>
8+
<ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>
9+
<OutputType>Exe</OutputType>
10+
<ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids>
11+
<SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>
12+
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
13+
</PropertyGroup>
14+
<!-- Default configurations to help VS understand the configurations -->
15+
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>
16+
<PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />
17+
<ItemGroup>
18+
<CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">
19+
<Visible>False</Visible>
20+
</CodeAnalysisDependentAssemblyPaths>
21+
</ItemGroup>
22+
<PropertyGroup>
23+
<DebugType>None</DebugType>
24+
<Optimize>True</Optimize>
25+
</PropertyGroup>
26+
<ItemGroup>
27+
<Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
28+
</ItemGroup>
29+
<ItemGroup>
30+
<Compile Include="BroadcastVector128ToVector256.cs" />
31+
</ItemGroup>
32+
<Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" />
33+
<PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>
34+
</Project>

0 commit comments

Comments
 (0)