|
1 | 1 | // Copyright (c) Six Labors.
|
2 | 2 | // Licensed under the Six Labors Split License.
|
3 | 3 |
|
4 |
| -using System.Numerics; |
5 | 4 | using System.Runtime.CompilerServices;
|
6 | 5 | using System.Runtime.InteropServices;
|
7 | 6 | using System.Runtime.Intrinsics;
|
@@ -60,109 +59,76 @@ public void NormalizeColorsAndRoundInPlaceVector256(float maximum)
|
60 | 59 | }
|
61 | 60 |
|
62 | 61 | /// <summary>
|
63 |
| - /// Loads values from <paramref name="source"/> using extended AVX2 intrinsics. |
| 62 | + /// Loads values from <paramref name="source"/> using <see cref="Vector256{T}"/> intrinsics. |
64 | 63 | /// </summary>
|
65 | 64 | /// <param name="source">The source <see cref="Block8x8"/></param>
|
66 |
| - public void LoadFromInt16ExtendedAvx2(ref Block8x8 source) |
| 65 | + public void LoadFromInt16ExtendedVector256(ref Block8x8 source) |
67 | 66 | {
|
68 | 67 | DebugGuard.IsTrue(
|
69 |
| - Avx2.IsSupported, |
70 |
| - "LoadFromUInt16ExtendedAvx2 only works on AVX2 compatible architecture!"); |
| 68 | + Vector256.IsHardwareAccelerated, |
| 69 | + "LoadFromInt16ExtendedVector256 only works on Vector256 compatible architecture!"); |
71 | 70 |
|
72 | 71 | ref short sRef = ref Unsafe.As<Block8x8, short>(ref source);
|
73 | 72 | ref Vector256<float> dRef = ref Unsafe.As<Block8x8F, Vector256<float>>(ref this);
|
74 | 73 |
|
75 |
| - // Vector256<ushort>.Count == 16 on AVX2 |
| 74 | + // Vector256<ushort>.Count == 16 |
76 | 75 | // We can process 2 block rows in a single step
|
77 |
| - Vector256<int> top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef)); |
78 |
| - Vector256<int> bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)Vector256<int>.Count)); |
79 |
| - dRef = Avx.ConvertToVector256Single(top); |
80 |
| - Unsafe.Add(ref dRef, 1) = Avx.ConvertToVector256Single(bottom); |
81 |
| - |
82 |
| - top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 2))); |
83 |
| - bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 3))); |
84 |
| - Unsafe.Add(ref dRef, 2) = Avx.ConvertToVector256Single(top); |
85 |
| - Unsafe.Add(ref dRef, 3) = Avx.ConvertToVector256Single(bottom); |
86 |
| - |
87 |
| - top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 4))); |
88 |
| - bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 5))); |
89 |
| - Unsafe.Add(ref dRef, 4) = Avx.ConvertToVector256Single(top); |
90 |
| - Unsafe.Add(ref dRef, 5) = Avx.ConvertToVector256Single(bottom); |
91 |
| - |
92 |
| - top = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 6))); |
93 |
| - bottom = Avx2.ConvertToVector256Int32(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 7))); |
94 |
| - Unsafe.Add(ref dRef, 6) = Avx.ConvertToVector256Single(top); |
95 |
| - Unsafe.Add(ref dRef, 7) = Avx.ConvertToVector256Single(bottom); |
| 76 | + Vector256<int> top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef)); |
| 77 | + Vector256<int> bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)Vector256<int>.Count)); |
| 78 | + dRef = Vector256.ConvertToSingle(top); |
| 79 | + Unsafe.Add(ref dRef, 1) = Vector256.ConvertToSingle(bottom); |
| 80 | + |
| 81 | + top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 2))); |
| 82 | + bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 3))); |
| 83 | + Unsafe.Add(ref dRef, 2) = Vector256.ConvertToSingle(top); |
| 84 | + Unsafe.Add(ref dRef, 3) = Vector256.ConvertToSingle(bottom); |
| 85 | + |
| 86 | + top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 4))); |
| 87 | + bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 5))); |
| 88 | + Unsafe.Add(ref dRef, 4) = Vector256.ConvertToSingle(top); |
| 89 | + Unsafe.Add(ref dRef, 5) = Vector256.ConvertToSingle(bottom); |
| 90 | + |
| 91 | + top = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 6))); |
| 92 | + bottom = Vector256_.Widen(Vector128.LoadUnsafe(ref sRef, (nuint)(Vector256<int>.Count * 7))); |
| 93 | + Unsafe.Add(ref dRef, 6) = Vector256.ConvertToSingle(top); |
| 94 | + Unsafe.Add(ref dRef, 7) = Vector256.ConvertToSingle(bottom); |
96 | 95 | }
|
97 | 96 |
|
98 | 97 | [MethodImpl(InliningOptions.ShortMethod)]
|
99 | 98 | private static Vector256<float> NormalizeAndRoundVector256(Vector256<float> value, Vector256<float> off, Vector256<float> max)
|
100 | 99 | => Vector256_.RoundToNearestInteger(Vector256_.Clamp(value + off, Vector256<float>.Zero, max));
|
101 | 100 |
|
102 |
| - private static unsafe void MultiplyIntoInt16_Avx2(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest) |
| 101 | + private static unsafe void MultiplyIntoInt16Vector256(ref Block8x8F a, ref Block8x8F b, ref Block8x8 dest) |
103 | 102 | {
|
104 |
| - DebugGuard.IsTrue(Avx2.IsSupported, "Avx2 support is required to run this operation!"); |
| 103 | + DebugGuard.IsTrue(Vector256.IsHardwareAccelerated, "Vector256 support is required to run this operation!"); |
105 | 104 |
|
106 | 105 | ref Vector256<float> aBase = ref a.V256_0;
|
107 | 106 | ref Vector256<float> bBase = ref b.V256_0;
|
108 |
| - |
109 | 107 | ref Vector256<short> destRef = ref dest.V01;
|
110 |
| - Vector256<int> multiplyIntoInt16ShuffleMask = Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7); |
111 | 108 |
|
112 | 109 | for (nuint i = 0; i < 8; i += 2)
|
113 | 110 | {
|
114 |
| - Vector256<int> row0 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 0), Unsafe.Add(ref bBase, i + 0))); |
115 |
| - Vector256<int> row1 = Avx.ConvertToVector256Int32(Avx.Multiply(Unsafe.Add(ref aBase, i + 1), Unsafe.Add(ref bBase, i + 1))); |
| 111 | + Vector256<int> row0 = Vector256_.ConvertToInt32RoundToEven(Unsafe.Add(ref aBase, i + 0) * Unsafe.Add(ref bBase, i + 0)); |
| 112 | + Vector256<int> row1 = Vector256_.ConvertToInt32RoundToEven(Unsafe.Add(ref aBase, i + 1) * Unsafe.Add(ref bBase, i + 1)); |
116 | 113 |
|
117 |
| - Vector256<short> row = Avx2.PackSignedSaturate(row0, row1); |
118 |
| - row = Avx2.PermuteVar8x32(row.AsInt32(), multiplyIntoInt16ShuffleMask).AsInt16(); |
| 114 | + Vector256<short> row = Vector256_.PackSignedSaturate(row0, row1); |
| 115 | + row = Vector256.Shuffle(row.AsInt32(), Vector256.Create(0, 1, 4, 5, 2, 3, 6, 7)).AsInt16(); |
119 | 116 |
|
120 | 117 | Unsafe.Add(ref destRef, i / 2) = row;
|
121 | 118 | }
|
122 | 119 | }
|
123 | 120 |
|
124 |
| - private void TransposeInPlace_Avx() |
| 121 | + private void TransposeInPlaceVector256() |
125 | 122 | {
|
126 | 123 | // https://stackoverflow.com/questions/25622745/transpose-an-8x8-float-using-avx-avx2/25627536#25627536
|
127 |
| - Vector256<float> r0 = Avx.InsertVector128( |
128 |
| - this.V256_0, |
129 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V4L), |
130 |
| - 1); |
131 |
| - |
132 |
| - Vector256<float> r1 = Avx.InsertVector128( |
133 |
| - this.V256_1, |
134 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V5L), |
135 |
| - 1); |
136 |
| - |
137 |
| - Vector256<float> r2 = Avx.InsertVector128( |
138 |
| - this.V256_2, |
139 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V6L), |
140 |
| - 1); |
141 |
| - |
142 |
| - Vector256<float> r3 = Avx.InsertVector128( |
143 |
| - this.V256_3, |
144 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V7L), |
145 |
| - 1); |
146 |
| - |
147 |
| - Vector256<float> r4 = Avx.InsertVector128( |
148 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V0R).ToVector256(), |
149 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V4R), |
150 |
| - 1); |
151 |
| - |
152 |
| - Vector256<float> r5 = Avx.InsertVector128( |
153 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V1R).ToVector256(), |
154 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V5R), |
155 |
| - 1); |
156 |
| - |
157 |
| - Vector256<float> r6 = Avx.InsertVector128( |
158 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V2R).ToVector256(), |
159 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V6R), |
160 |
| - 1); |
161 |
| - |
162 |
| - Vector256<float> r7 = Avx.InsertVector128( |
163 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V3R).ToVector256(), |
164 |
| - Unsafe.As<Vector4, Vector128<float>>(ref this.V7R), |
165 |
| - 1); |
| 124 | + Vector256<float> r0 = this.V256_0.WithUpper(this.V4L.AsVector128()); |
| 125 | + Vector256<float> r1 = this.V256_1.WithUpper(this.V5L.AsVector128()); |
| 126 | + Vector256<float> r2 = this.V256_2.WithUpper(this.V6L.AsVector128()); |
| 127 | + Vector256<float> r3 = this.V256_3.WithUpper(this.V7L.AsVector128()); |
| 128 | + Vector256<float> r4 = this.V0R.AsVector128().ToVector256().WithUpper(this.V4R.AsVector128()); |
| 129 | + Vector256<float> r5 = this.V1R.AsVector128().ToVector256().WithUpper(this.V5R.AsVector128()); |
| 130 | + Vector256<float> r6 = this.V2R.AsVector128().ToVector256().WithUpper(this.V6R.AsVector128()); |
| 131 | + Vector256<float> r7 = this.V3R.AsVector128().ToVector256().WithUpper(this.V7R.AsVector128()); |
166 | 132 |
|
167 | 133 | Vector256<float> t0 = Avx.UnpackLow(r0, r1);
|
168 | 134 | Vector256<float> t2 = Avx.UnpackLow(r2, r3);
|
|
0 commit comments