Skip to content

Commit ae2ad07

Browse files
committed
[SYSTEMDS-3787] New rewrite for transformencode w/o metadata output
This patch add a simple statement block rewrite which checks if the metadata frame output of transformencode is used at all, and if not sets a flag on the transformencode operation to avoid allocating and serializing this meta data. The rewrite applies in about half of all existing 'org.apache.sysds.test.functions.transform' tests.
1 parent 12d8cd7 commit ae2ad07

File tree

4 files changed

+103
-15
lines changed

4 files changed

+103
-15
lines changed

src/main/java/org/apache/sysds/hops/rewrite/ProgramRewriter.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,7 @@ public ProgramRewriter(boolean staticRewrites, boolean dynamicRewrites)
115115
_sbRuleSet.add( new RewriteMarkLoopVariablesUpdateInPlace() );
116116
if( LineageCacheConfig.getCompAssRW() )
117117
_sbRuleSet.add( new MarkForLineageReuse() );
118+
_sbRuleSet.add( new RewriteRemoveTransformEncodeMeta() );
118119
}
119120

120121
// DYNAMIC REWRITES (which do require size information)
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.sysds.hops.rewrite;
21+
22+
import java.util.Arrays;
23+
import java.util.List;
24+
25+
import org.apache.sysds.hops.FunctionOp;
26+
import org.apache.sysds.hops.Hop;
27+
import org.apache.sysds.hops.LiteralOp;
28+
import org.apache.sysds.parser.StatementBlock;
29+
30+
/**
31+
* Rule: If transformencode procudes a meta data frame which is never
32+
* used, flag transformencode to never allocate an serialize this frame.
33+
*/
34+
public class RewriteRemoveTransformEncodeMeta extends StatementBlockRewriteRule
35+
{
36+
private final static String TF_OPCODE = "TRANSFORMENCODE";
37+
38+
@Override
39+
public List<StatementBlock> rewriteStatementBlock(StatementBlock sb, ProgramRewriteStatus state)
40+
{
41+
if( sb.getHops() == null || sb.getHops().isEmpty() )
42+
return Arrays.asList(sb);
43+
44+
//Transformencode is a multi-return FunctionOp and always appears as root
45+
//of the DAG. We then check that the meta data object is never used,
46+
//that is, the meta data is not in the live-out variables of the statementblock
47+
Hop root = sb.getHops().get(0);
48+
if( root instanceof FunctionOp
49+
&& TF_OPCODE.equals(((FunctionOp)root).getFunctionName()) )
50+
{
51+
FunctionOp func = (FunctionOp)root;
52+
if( !sb.liveOut().containsVariable(func.getOutputVariableNames()[1])
53+
&& func.getInput().size() == 2) { //not added yet
54+
func.getInput().add(new LiteralOp(false));
55+
LOG.debug("Applied removeTransformEncodeMeta (line "+ func.getBeginLine() +").");
56+
}
57+
}
58+
59+
return Arrays.asList(sb);
60+
}
61+
62+
@Override
63+
public List<StatementBlock> rewriteStatementBlocks(List<StatementBlock> sbs, ProgramRewriteStatus sate) {
64+
return sbs;
65+
}
66+
67+
@Override
68+
public boolean createsSplitDag() {
69+
return false;
70+
}
71+
}

src/main/java/org/apache/sysds/runtime/instructions/cp/MultiReturnParameterizedBuiltinCPInstruction.java

Lines changed: 15 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,13 @@
3939

4040
public class MultiReturnParameterizedBuiltinCPInstruction extends ComputationCPInstruction {
4141
protected final ArrayList<CPOperand> _outputs;
42-
42+
protected final boolean _metaReturn;
43+
4344
private MultiReturnParameterizedBuiltinCPInstruction(Operator op, CPOperand input1, CPOperand input2,
44-
ArrayList<CPOperand> outputs, String opcode, String istr) {
45+
boolean metaReturn, ArrayList<CPOperand> outputs, String opcode, String istr) {
4546
super(CPType.MultiReturnBuiltin, op, input1, input2, outputs.get(0), opcode, istr);
4647
_outputs = outputs;
48+
_metaReturn = metaReturn;
4749
}
4850

4951
public CPOperand getOutput(int i) {
@@ -67,9 +69,14 @@ public static MultiReturnParameterizedBuiltinCPInstruction parseInstruction(Stri
6769
// one input and two outputs
6870
CPOperand in1 = new CPOperand(parts[1]);
6971
CPOperand in2 = new CPOperand(parts[2]);
70-
outputs.add(new CPOperand(parts[3], ValueType.FP64, DataType.MATRIX));
71-
outputs.add(new CPOperand(parts[4], ValueType.STRING, DataType.FRAME));
72-
return new MultiReturnParameterizedBuiltinCPInstruction(null, in1, in2, outputs, opcode, str);
72+
int pos = 3;
73+
boolean metaReturn = true;
74+
if( parts.length == 7 ) //no need for meta data
75+
metaReturn = new CPOperand(parts[pos++]).getLiteral().getBooleanValue();
76+
outputs.add(new CPOperand(parts[pos], ValueType.FP64, DataType.MATRIX));
77+
outputs.add(new CPOperand(parts[pos+1], ValueType.STRING, DataType.FRAME));
78+
return new MultiReturnParameterizedBuiltinCPInstruction(
79+
null, in1, in2, metaReturn, outputs, opcode, str);
7380
}
7481
else {
7582
throw new DMLRuntimeException("Invalid opcode in MultiReturnBuiltin instruction: " + opcode);
@@ -87,9 +94,10 @@ public void processInstruction(ExecutionContext ec) {
8794
// execute block transform encode
8895
MultiColumnEncoder encoder = EncoderFactory.createEncoder(spec, colnames, fin.getNumColumns(), null);
8996
// TODO: Assign #threads in compiler and pass via the instruction string
97+
int k = OptimizerUtils.getTransformNumThreads();
9098
MatrixBlock data = encoder.encode(fin, OptimizerUtils.getTransformNumThreads()); // build and apply
91-
FrameBlock meta = encoder.getMetaData(new FrameBlock(fin.getNumColumns(), ValueType.STRING),
92-
OptimizerUtils.getTransformNumThreads());
99+
FrameBlock meta = !_metaReturn ? new FrameBlock() :
100+
encoder.getMetaData(new FrameBlock(fin.getNumColumns(), ValueType.STRING), k);
93101
meta.setColumnNames(colnames);
94102

95103
// release input and outputs

src/main/java/org/apache/sysds/runtime/instructions/spark/MultiReturnParameterizedBuiltinSPInstruction.java

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,12 @@
7777

7878
public class MultiReturnParameterizedBuiltinSPInstruction extends ComputationSPInstruction {
7979
protected ArrayList<CPOperand> _outputs;
80+
protected final boolean _metaReturn;
8081

8182
private MultiReturnParameterizedBuiltinSPInstruction(Operator op, CPOperand input1, CPOperand input2,
82-
ArrayList<CPOperand> outputs, String opcode, String istr) {
83+
boolean metaReturn, ArrayList<CPOperand> outputs, String opcode, String istr) {
8384
super(SPType.MultiReturnBuiltin, op, input1, input2, outputs.get(0), opcode, istr);
85+
_metaReturn = metaReturn;
8486
_outputs = outputs;
8587
}
8688

@@ -93,14 +95,17 @@ public static MultiReturnParameterizedBuiltinSPInstruction parseInstruction(Stri
9395
// one input and two outputs
9496
CPOperand in1 = new CPOperand(parts[1]);
9597
CPOperand in2 = new CPOperand(parts[2]);
96-
outputs.add(new CPOperand(parts[3], ValueType.FP64, DataType.MATRIX));
97-
outputs.add(new CPOperand(parts[4], ValueType.STRING, DataType.FRAME));
98-
return new MultiReturnParameterizedBuiltinSPInstruction(null, in1, in2, outputs, opcode, str);
98+
int pos = 3;
99+
boolean metaReturn = true;
100+
if( parts.length == 6 ) //no need for meta data
101+
metaReturn = new CPOperand(parts[pos++]).getLiteral().getBooleanValue();
102+
outputs.add(new CPOperand(parts[pos], ValueType.FP64, DataType.MATRIX));
103+
outputs.add(new CPOperand(parts[pos+1], ValueType.STRING, DataType.FRAME));
104+
return new MultiReturnParameterizedBuiltinSPInstruction(null, in1, in2, metaReturn, outputs, opcode, str);
99105
}
100106
else {
101107
throw new DMLRuntimeException("Invalid opcode in MultiReturnBuiltin instruction: " + opcode);
102108
}
103-
104109
}
105110

106111
@Override
@@ -112,8 +117,8 @@ public void processInstruction(ExecutionContext ec) {
112117
// get input RDD and meta data
113118
FrameObject fo = sec.getFrameObject(input1.getName());
114119
FrameObject fometa = sec.getFrameObject(_outputs.get(1).getName());
115-
JavaPairRDD<Long, FrameBlock> in = (JavaPairRDD<Long, FrameBlock>) sec.getRDDHandleForFrameObject(fo,
116-
FileFormat.BINARY);
120+
JavaPairRDD<Long, FrameBlock> in = (JavaPairRDD<Long, FrameBlock>)
121+
sec.getRDDHandleForFrameObject(fo, FileFormat.BINARY);
117122
String spec = ec.getScalarInput(input2).getStringValue();
118123
DataCharacteristics mcIn = sec.getDataCharacteristics(input1.getName());
119124
DataCharacteristics mcOut = sec.getDataCharacteristics(output.getName());
@@ -163,7 +168,10 @@ public void processInstruction(ExecutionContext ec) {
163168
// set output and maintain lineage/output characteristics
164169
sec.setRDDHandleForVariable(_outputs.get(0).getName(), out);
165170
sec.addLineageRDD(_outputs.get(0).getName(), input1.getName());
166-
sec.setFrameOutput(_outputs.get(1).getName(), meta);
171+
if( _metaReturn )
172+
sec.setFrameOutput(_outputs.get(1).getName(), meta);
173+
else
174+
sec.setFrameOutput(_outputs.get(1).getName(), new FrameBlock());
167175
}
168176
catch(IOException ex) {
169177
throw new RuntimeException(ex);

0 commit comments

Comments
 (0)