|
27 | 27 | import java.util.Arrays; |
28 | 28 | import java.util.Set; |
29 | 29 |
|
30 | | -import jdk.incubator.vector.DoubleVector; |
31 | | -import jdk.incubator.vector.VectorSpecies; |
32 | 30 | import org.apache.commons.lang3.NotImplementedException; |
33 | 31 | import org.apache.sysds.runtime.compress.DMLCompressionException; |
34 | 32 | import org.apache.sysds.runtime.compress.colgroup.indexes.ArrayIndex; |
@@ -67,8 +65,6 @@ public class MatrixBlockDictionary extends ADictionary { |
67 | 65 |
|
68 | 66 | final private MatrixBlock _data; |
69 | 67 |
|
70 | | - static final VectorSpecies<Double> SPECIES = DoubleVector.SPECIES_PREFERRED; |
71 | | - |
72 | 68 | /** |
73 | 69 | * Unsafe private constructor that does not check the data validity. USE WITH CAUTION. |
74 | 70 | * |
@@ -2085,102 +2081,7 @@ private void preaggValuesFromDenseDictDenseAggArray(final int numVals, final ICo |
2085 | 2081 |
|
2086 | 2082 | private void preaggValuesFromDenseDictDenseAggRange(final int numVals, final IColIndex colIndexes, final int s, |
2087 | 2083 | final int e, final double[] b, final int cut, final double[] ret) { |
2088 | | - if(colIndexes instanceof RangeIndex) { |
2089 | | - RangeIndex ri = (RangeIndex) colIndexes; |
2090 | | - preaggValuesFromDenseDictDenseAggRangeRange(numVals, ri.get(0), ri.get(0) + ri.size(), s, e, b, cut, ret); |
2091 | | - } |
2092 | | - else |
2093 | | - preaggValuesFromDenseDictDenseAggRangeGeneric(numVals, colIndexes, s, e, b, cut, ret); |
2094 | | - } |
2095 | | - |
2096 | | - private void preaggValuesFromDenseDictDenseAggRangeRange(final int numVals, final int ls, final int le, final int rs, |
2097 | | - final int re, final double[] b, final int cut, final double[] ret) { |
2098 | | - final int cz = le - ls; |
2099 | | - final int az = re - rs; |
2100 | | - // final int nCells = numVals * cz; |
2101 | | - final double[] values = _data.getDenseBlockValues(); |
2102 | | - // Correctly named ikj matrix multiplication . |
2103 | | - |
2104 | | - final int blkzI = 32; |
2105 | | - final int blkzK = 24; |
2106 | | - final int blkzJ = 1024; |
2107 | | - for(int bi = 0; bi < numVals; bi += blkzI) { |
2108 | | - final int bie = Math.min(numVals, bi + blkzI); |
2109 | | - for(int bk = 0; bk < cz; bk += blkzK) { |
2110 | | - final int bke = Math.min(cz, bk + blkzK); |
2111 | | - for(int bj = 0; bj < az; bj += blkzJ) { |
2112 | | - final int bje = Math.min(az, bj + blkzJ); |
2113 | | - final int sOffT = rs + bj; |
2114 | | - final int eOffT = rs + bje; |
2115 | | - preaggValuesFromDenseDictBlockedIKJ(values, b, ret, bi, bk, bj, bie, bke, cz, az, ls, cut, sOffT, eOffT); |
2116 | | - // preaggValuesFromDenseDictBlockedIJK(values, b, ret, bi, bk, bj, bie, bke, bje, cz, az, ls, cut, sOffT, eOffT); |
2117 | | - } |
2118 | | - } |
2119 | | - } |
2120 | | - } |
2121 | | - |
2122 | | - // private static void preaggValuesFromDenseDictBlockedIJK(double[] a, double[] b, double[] ret, int bi, int bk, int bj, |
2123 | | - // int bie, int bke, int bje, int cz, int az, int ls, int cut, int sOffT, int eOffT) { |
2124 | | - // final int vLen = SPECIES.length(); |
2125 | | - // final DoubleVector vVec = DoubleVector.zero(SPECIES); |
2126 | | - // for(int i = bi; i < bie; i++) { |
2127 | | - // final int offI = i * cz; |
2128 | | - // final int offOutT = i * az + bj; |
2129 | | - // int offOut = offOutT; |
2130 | | - // final int end = (bje - bj) % vLen; |
2131 | | - // for(int j = bj + sOffT; j < end + sOffT; j += vLen, offOut += vLen) { |
2132 | | - // final DoubleVector res = DoubleVector.fromArray(SPECIES, ret, offOut); |
2133 | | - // for(int k = bk; k < bke; k++) { |
2134 | | - // final int idb = (k + ls) * cut; |
2135 | | - // final double v = a[offI + k]; |
2136 | | - // vVec.broadcast(v); |
2137 | | - // DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, idb + j); |
2138 | | - // vVec.fma(bVec, res); |
2139 | | - // } |
2140 | | - // res.intoArray(ret, offOut); |
2141 | | - // } |
2142 | | - // for(int j = end + sOffT; j < bje + sOffT; j++, offOut++) { |
2143 | | - // for(int k = bk; k < bke; k++) { |
2144 | | - // final int idb = (k + ls) * cut; |
2145 | | - // final double v = a[offI + k]; |
2146 | | - // ret[offOut] += v * b[idb + j]; |
2147 | | - // } |
2148 | | - // } |
2149 | | - // } |
2150 | | - // } |
2151 | | - |
2152 | | - private static void preaggValuesFromDenseDictBlockedIKJ(double[] a, double[] b, double[] ret, int bi, int bk, int bj, |
2153 | | - int bie, int bke, int cz, int az, int ls, int cut, int sOffT, int eOffT) { |
2154 | | - final int vLen = SPECIES.length(); |
2155 | | - final DoubleVector vVec = DoubleVector.zero(SPECIES); |
2156 | | - final int leftover = sOffT - eOffT % vLen; // leftover not vectorized |
2157 | | - for(int i = bi; i < bie; i++) { |
2158 | | - final int offI = i * cz; |
2159 | | - final int offOutT = i * az + bj; |
2160 | | - for(int k = bk; k < bke; k++) { |
2161 | | - final int idb = (k + ls) * cut; |
2162 | | - final int sOff = sOffT + idb; |
2163 | | - final int eOff = eOffT + idb; |
2164 | | - final double v = a[offI + k]; |
2165 | | - vecInnerLoop(v, b, ret, offOutT, eOff, sOff, leftover, vLen, vVec); |
2166 | | - } |
2167 | | - } |
2168 | | - } |
2169 | | - |
2170 | | - private static void vecInnerLoop(final double v, final double[] b, final double[] ret, final int offOutT, |
2171 | | - final int eOff, final int sOff, final int leftover, final int vLen, DoubleVector vVec) { |
2172 | | - int offOut = offOutT; |
2173 | | - vVec = vVec.broadcast(v); |
2174 | | - final int end = eOff - leftover; |
2175 | | - for(int j = sOff; j < end; j += vLen, offOut += vLen) { |
2176 | | - DoubleVector res = DoubleVector.fromArray(SPECIES, ret, offOut); |
2177 | | - DoubleVector bVec = DoubleVector.fromArray(SPECIES, b, j); |
2178 | | - vVec.fma(bVec, res).intoArray(ret, offOut); |
2179 | | - } |
2180 | | - for(int j = end; j < eOff; j++, offOut++) { |
2181 | | - ret[offOut] += v * b[j]; |
2182 | | - } |
2183 | | - |
| 2084 | + preaggValuesFromDenseDictDenseAggRangeGeneric(numVals, colIndexes, s, e, b, cut, ret); |
2184 | 2085 | } |
2185 | 2086 |
|
2186 | 2087 | private void preaggValuesFromDenseDictDenseAggRangeGeneric(final int numVals, final IColIndex colIndexes, |
|
0 commit comments