Skip to content

Commit 24bdc7f

Browse files
committed
Fix
1 parent ff17330 commit 24bdc7f

File tree

18 files changed

+304
-65
lines changed

18 files changed

+304
-65
lines changed

src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroupValue.java

Lines changed: 0 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,8 @@
2525
import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
2626
import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
2727
import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
28-
import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
29-
import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
3028
import org.apache.sysds.runtime.functionobjects.Builtin;
3129
import org.apache.sysds.runtime.instructions.cp.CM_COV_Object;
32-
import org.apache.sysds.runtime.matrix.data.Pair;
3330
import org.apache.sysds.runtime.matrix.operators.CMOperator;
3431

3532
public abstract class AColGroupValue extends ADictBasedColGroup {
@@ -62,8 +59,6 @@ public int getNumValues() {
6259
* produce an overhead in cases where the count is calculated, but the overhead will be limited to number of distinct
6360
* tuples in the dictionary.
6461
*
65-
* The returned counts always contains the number of zero tuples as well if there are some contained, even if they
66-
* are not materialized.
6762
*
6863
* @return The count of each value in the MatrixBlock.
6964
*/
@@ -215,35 +210,6 @@ public void clear() {
215210
counts = null;
216211
}
217212

218-
@Override
219-
public AColGroup sort(){
220-
// TODO restore support for run length encoding.
221-
222-
int[] counts = getCounts();
223-
224-
Pair<IDictionary, int[]> r = _dict.sort();
225-
226-
int[] newCounts = r.getValue();
227-
int nRows = 0;
228-
for(int i = 0; i < counts.length; i++){
229-
// set the new counts to the sorted indexes.
230-
newCounts[i] = counts[newCounts[i]];
231-
nRows += newCounts[i];
232-
}
233-
234-
// TODO restore support for run length encoding.
235-
// This here allocates a ddc array instead.
236-
AMapToData m = MapToFactory.create(nRows, counts.length);
237-
int off = 0;
238-
for(int i = 0; i < counts.length; i++){
239-
for(int j = 0; j < newCounts[i]; j++){
240-
m.set(off++, j);
241-
}
242-
}
243-
244-
return ColGroupDDC.create(_colIndexes, r.getKey(), m, newCounts);
245-
246-
}
247213

248214
@Override
249215
public String toString() {

src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,6 @@
2626
import java.util.List;
2727
import java.util.concurrent.ExecutorService;
2828

29-
import jdk.incubator.vector.DoubleVector;
30-
import jdk.incubator.vector.VectorSpecies;
3129
import org.apache.commons.lang3.NotImplementedException;
3230
import org.apache.sysds.runtime.DMLRuntimeException;
3331
import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
@@ -67,6 +65,9 @@
6765
import org.apache.sysds.runtime.matrix.operators.UnaryOperator;
6866
import org.jboss.netty.handler.codec.compression.CompressionException;
6967

68+
import jdk.incubator.vector.DoubleVector;
69+
import jdk.incubator.vector.VectorSpecies;
70+
7071
/**
7172
* Class to encapsulate information about a column group that is encoded with dense dictionary encoding (DDC).
7273
*/
@@ -1091,6 +1092,27 @@ public AColGroup[] splitReshapePushDown(int multiplier, int nRow, int nColOrg, E
10911092
return res;
10921093
}
10931094

1095+
@Override
1096+
public AColGroup sort() {
1097+
// TODO restore support for run length encoding to exploit the runs
1098+
1099+
int[] counts = getCounts();
1100+
// get the sort index
1101+
int[] r = _dict.sort();
1102+
1103+
AMapToData m = MapToFactory.create(_data.size(), counts.length);
1104+
int off = 0;
1105+
for(int i = 0; i < counts.length; i++) {
1106+
for(int j = 0; j < counts[r[i]]; j++) {
1107+
m.set(off++, r[i]);
1108+
}
1109+
}
1110+
1111+
return ColGroupDDC.create(_colIndexes, _dict, m, counts);
1112+
1113+
}
1114+
1115+
10941116
@Override
10951117
public String toString() {
10961118
StringBuilder sb = new StringBuilder();
@@ -1105,4 +1127,6 @@ protected boolean allowShallowIdentityRightMult() {
11051127
return true;
11061128
}
11071129

1130+
1131+
11081132
}

src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDCFOR.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -546,6 +546,27 @@ protected boolean allowShallowIdentityRightMult() {
546546
return false;
547547
}
548548

549+
550+
@Override
551+
public AColGroup sort() {
552+
// TODO restore support for run length encoding.
553+
554+
int[] counts = getCounts();
555+
// get the sort index
556+
int[] r = _dict.sort();
557+
558+
AMapToData m = MapToFactory.create(_data.size(), counts.length);
559+
int off = 0;
560+
for(int i = 0; i < counts.length; i++) {
561+
for(int j = 0; j < counts[r[i]]; j++) {
562+
m.set(off++, r[i]);
563+
}
564+
}
565+
566+
return ColGroupDDCFOR.create(_colIndexes, _dict, m, counts, _reference);
567+
568+
}
569+
549570
@Override
550571
public String toString() {
551572
StringBuilder sb = new StringBuilder();

src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@
2626
import org.apache.commons.lang3.NotImplementedException;
2727
import org.apache.sysds.runtime.compress.CompressionSettings;
2828
import org.apache.sysds.runtime.compress.bitmap.ABitmap;
29-
import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
3029
import org.apache.sysds.runtime.compress.colgroup.ColGroupUtils.P;
3130
import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary;
3231
import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory;
32+
import org.apache.sysds.runtime.compress.colgroup.dictionary.IDictionary;
3333
import org.apache.sysds.runtime.compress.colgroup.indexes.ColIndexFactory;
3434
import org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
3535
import org.apache.sysds.runtime.compress.colgroup.scheme.ICLAScheme;
@@ -731,5 +731,9 @@ public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
731731
throw new NotImplementedException("Unimplemented method 'splitReshape'");
732732
}
733733

734+
@Override
735+
public AColGroup sort() {
736+
throw new NotImplementedException();
737+
}
734738

735739
}

src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,4 +1190,8 @@ public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
11901190
throw new NotImplementedException("Unimplemented method 'splitReshape'");
11911191
}
11921192

1193+
@Override
1194+
public AColGroup sort() {
1195+
throw new NotImplementedException();
1196+
}
11931197
}

src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -508,10 +508,10 @@ protected static AColGroup rexpandCols(int max, boolean ignore, boolean cast, in
508508
AOffset indexes, AMapToData data, int[] counts, int def, int nVal) {
509509

510510
if(d == null) {
511-
if(def <= 0){
511+
if(def <= 0) {
512512
if(max > 0)
513513
return ColGroupEmpty.create(max);
514-
else
514+
else
515515
return null;
516516
}
517517
else if(def > max && max > 0)
@@ -873,6 +873,52 @@ public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
873873
return res;
874874
}
875875

876+
@Override
877+
public AColGroup sort() {
878+
if(getNumCols() > 1)
879+
throw new NotImplementedException();
880+
// TODO restore support for run length encoding.
881+
882+
final int[] counts = getCounts();
883+
// get the sort index
884+
final int[] r = _dict.sort();
885+
886+
// find default value position.
887+
// todo use binary search for minor improvements.
888+
final double def = _defaultTuple[0];
889+
int defIdx = -1;
890+
int nondefault = 0;
891+
for(int i = 0; i < r.length; i++) {
892+
if(defIdx == -1 && _dict.getValue(r[i], 0, 1) >= def) {
893+
defIdx = i;
894+
}
895+
nondefault += counts[i];
896+
}
897+
898+
int defaultLength = _numRows - nondefault;
899+
AMapToData m = MapToFactory.create(_data.size(), counts.length);
900+
int[] offsets = new int[nondefault];
901+
902+
int off = 0;
903+
for(int i = 0; i < counts.length; i++) {
904+
if(i < defIdx) {
905+
for(int j = 0; j < counts[r[i]]; j++) {
906+
offsets[off] = off;
907+
m.set(off++, r[i]);
908+
}
909+
}
910+
else {// if( i >= defIdx){
911+
for(int j = 0; j < counts[r[i]]; j++) {
912+
offsets[off] = off + defaultLength;
913+
m.set(off++, r[i]);
914+
}
915+
}
916+
}
917+
918+
AOffset o = OffsetFactory.createOffset(offsets);
919+
return ColGroupSDC.create(_colIndexes, _numRows, _dict, _defaultTuple, o, m, counts);
920+
}
921+
876922
@Override
877923
public String toString() {
878924
StringBuilder sb = new StringBuilder();

src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCFOR.java

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -620,6 +620,51 @@ public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
620620
return res;
621621
}
622622

623+
@Override
624+
public AColGroup sort() {
625+
if(getNumCols() > 1)
626+
throw new NotImplementedException();
627+
// TODO restore support for run length encoding.
628+
629+
final int[] counts = getCounts();
630+
// get the sort index
631+
final int[] r = _dict.sort();
632+
633+
// find default value position.
634+
// todo use binary search for minor improvements.
635+
int defIdx = -1;
636+
int nondefault = 0;
637+
for(int i = 0; i < r.length; i++) {
638+
if(defIdx == -1 && _dict.getValue(r[i], 0, 1) >= 0) {
639+
defIdx = i;
640+
}
641+
nondefault += counts[i];
642+
}
643+
644+
int defaultLength = _numRows - nondefault;
645+
AMapToData m = MapToFactory.create(_data.size(), counts.length);
646+
int[] offsets = new int[nondefault];
647+
648+
int off = 0;
649+
for(int i = 0; i < counts.length; i++) {
650+
if(i < defIdx) {
651+
for(int j = 0; j < counts[r[i]]; j++) {
652+
offsets[off] = off;
653+
m.set(off++, r[i]);
654+
}
655+
}
656+
else {// if( i >= defIdx){
657+
for(int j = 0; j < counts[r[i]]; j++) {
658+
offsets[off] = off + defaultLength;
659+
m.set(off++, r[i]);
660+
}
661+
}
662+
}
663+
664+
AOffset o = OffsetFactory.createOffset(offsets);
665+
return ColGroupSDCFOR.create(_colIndexes, _numRows, _dict, o, m, counts, _reference);
666+
}
667+
623668
@Override
624669
public String toString() {
625670
StringBuilder sb = new StringBuilder();

src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,50 @@ public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
717717
}
718718
return res;
719719
}
720+
721+
722+
@Override
723+
public AColGroup sort() {
724+
if(getNumCols() > 1)
725+
throw new NotImplementedException();
726+
// TODO restore support for run length encoding.
727+
728+
final int[] counts = getCounts();
729+
// get the sort index
730+
final int[] r = _dict.sort();
731+
732+
// find default value position.
733+
// todo use binary search for minor improvements.
734+
final double def = _defaultTuple[0];
735+
int defIdx = -1;
736+
int nondefault = 0;
737+
for(int i = 0; i < r.length; i++) {
738+
if(defIdx == -1 && _dict.getValue(r[i], 0, 1) >= def) {
739+
defIdx = i;
740+
}
741+
nondefault += counts[i];
742+
}
743+
744+
int defaultLength = _numRows - nondefault;
745+
int[] offsets = new int[nondefault];
746+
747+
int off = 0;
748+
for(int i = 0; i < counts.length; i++) {
749+
if(i < defIdx) {
750+
for(int j = 0; j < counts[r[i]]; j++) {
751+
offsets[off] = off;
752+
}
753+
}
754+
else {// if( i >= defIdx){
755+
for(int j = 0; j < counts[r[i]]; j++) {
756+
offsets[off] = off + defaultLength;
757+
}
758+
}
759+
}
760+
761+
AOffset o = OffsetFactory.createOffset(offsets);
762+
return ColGroupSDCSingle.create(_colIndexes, _numRows, _dict, _defaultTuple, o, counts);
763+
}
720764

721765
@Override
722766
public String toString() {

src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1049,6 +1049,49 @@ public AColGroup[] splitReshape(int multiplier, int nRow, int nColOrg) {
10491049
return res;
10501050
}
10511051

1052+
1053+
@Override
1054+
public AColGroup sort() {
1055+
if(getNumCols() > 1)
1056+
throw new NotImplementedException();
1057+
// TODO restore support for run length encoding.
1058+
1059+
final int[] counts = getCounts();
1060+
// get the sort index
1061+
final int[] r = _dict.sort();
1062+
1063+
// find default value position.
1064+
// todo use binary search for minor improvements.
1065+
int defIdx = -1;
1066+
int nondefault = 0;
1067+
for(int i = 0; i < r.length; i++) {
1068+
if(defIdx == -1 && _dict.getValue(r[i], 0, 1) >= 0) {
1069+
defIdx = i;
1070+
}
1071+
nondefault += counts[i];
1072+
}
1073+
1074+
int defaultLength = _numRows - nondefault;
1075+
int[] offsets = new int[nondefault];
1076+
1077+
int off = 0;
1078+
for(int i = 0; i < counts.length; i++) {
1079+
if(i < defIdx) {
1080+
for(int j = 0; j < counts[r[i]]; j++) {
1081+
offsets[off] = off;
1082+
}
1083+
}
1084+
else {// if( i >= defIdx){
1085+
for(int j = 0; j < counts[r[i]]; j++) {
1086+
offsets[off] = off + defaultLength;
1087+
}
1088+
}
1089+
}
1090+
1091+
AOffset o = OffsetFactory.createOffset(offsets);
1092+
return ColGroupSDCSingleZeros.create(_colIndexes, _numRows, _dict, o, counts);
1093+
}
1094+
10521095
@Override
10531096
public String toString() {
10541097
StringBuilder sb = new StringBuilder();

0 commit comments

Comments
 (0)