Skip to content

Commit bf0c344

Browse files
committed
[MINOR] Improved code coverage I/O framework (readers/writers)
This patch adds a test that systematically applies the single- and multi-threaded writers/readers for matrices and frames, all formats, as well as dense and sparse data. These tests also revealed bugs in the hdf5 readers/writers where incorrect data is read for single-threaded sparse as well as multi-threaded dense and sparse.
1 parent d726705 commit bf0c344

File tree

1 file changed

+258
-0
lines changed

1 file changed

+258
-0
lines changed
Lines changed: 258 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,258 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing,
13+
* software distributed under the License is distributed on an
14+
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15+
* KIND, either express or implied. See the License for the
16+
* specific language governing permissions and limitations
17+
* under the License.
18+
*/
19+
20+
package org.apache.sysds.test.functions.io;
21+
22+
import java.util.Arrays;
23+
import java.util.Collection;
24+
25+
import org.junit.Assert;
26+
import org.junit.Test;
27+
import org.junit.runner.RunWith;
28+
import org.junit.runners.Parameterized;
29+
import org.junit.runners.Parameterized.Parameters;
30+
import org.apache.sysds.common.Types.FileFormat;
31+
import org.apache.sysds.common.Types.ValueType;
32+
import org.apache.sysds.runtime.frame.data.FrameBlock;
33+
import org.apache.sysds.runtime.io.FileFormatPropertiesCSV;
34+
import org.apache.sysds.runtime.io.FileFormatPropertiesHDF5;
35+
import org.apache.sysds.runtime.io.FileFormatPropertiesLIBSVM;
36+
import org.apache.sysds.runtime.io.FrameReader;
37+
import org.apache.sysds.runtime.io.FrameReaderBinaryBlock;
38+
import org.apache.sysds.runtime.io.FrameReaderBinaryBlockParallel;
39+
import org.apache.sysds.runtime.io.FrameReaderTextCSV;
40+
import org.apache.sysds.runtime.io.FrameReaderTextCSVParallel;
41+
import org.apache.sysds.runtime.io.FrameReaderTextCell;
42+
import org.apache.sysds.runtime.io.FrameReaderTextCellParallel;
43+
import org.apache.sysds.runtime.io.FrameWriter;
44+
import org.apache.sysds.runtime.io.FrameWriterBinaryBlock;
45+
import org.apache.sysds.runtime.io.FrameWriterBinaryBlockParallel;
46+
import org.apache.sysds.runtime.io.FrameWriterTextCSV;
47+
import org.apache.sysds.runtime.io.FrameWriterTextCSVParallel;
48+
import org.apache.sysds.runtime.io.FrameWriterTextCell;
49+
import org.apache.sysds.runtime.io.FrameWriterTextCellParallel;
50+
import org.apache.sysds.runtime.io.MatrixReader;
51+
import org.apache.sysds.runtime.io.MatrixWriter;
52+
import org.apache.sysds.runtime.io.ReaderBinaryBlock;
53+
import org.apache.sysds.runtime.io.ReaderBinaryBlockParallel;
54+
import org.apache.sysds.runtime.io.ReaderHDF5;
55+
import org.apache.sysds.runtime.io.ReaderHDF5Parallel;
56+
import org.apache.sysds.runtime.io.ReaderTextCSV;
57+
import org.apache.sysds.runtime.io.ReaderTextCSVParallel;
58+
import org.apache.sysds.runtime.io.ReaderTextCell;
59+
import org.apache.sysds.runtime.io.ReaderTextCellParallel;
60+
import org.apache.sysds.runtime.io.ReaderTextLIBSVM;
61+
import org.apache.sysds.runtime.io.ReaderTextLIBSVMParallel;
62+
import org.apache.sysds.runtime.io.WriterBinaryBlock;
63+
import org.apache.sysds.runtime.io.WriterBinaryBlockParallel;
64+
import org.apache.sysds.runtime.io.WriterHDF5;
65+
import org.apache.sysds.runtime.io.WriterHDF5Parallel;
66+
import org.apache.sysds.runtime.io.WriterMatrixMarket;
67+
import org.apache.sysds.runtime.io.WriterMatrixMarketParallel;
68+
import org.apache.sysds.runtime.io.WriterTextCSV;
69+
import org.apache.sysds.runtime.io.WriterTextCSVParallel;
70+
import org.apache.sysds.runtime.io.WriterTextCell;
71+
import org.apache.sysds.runtime.io.WriterTextCellParallel;
72+
import org.apache.sysds.runtime.io.WriterTextLIBSVM;
73+
import org.apache.sysds.runtime.io.WriterTextLIBSVMParallel;
74+
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
75+
import org.apache.sysds.runtime.util.DataConverter;
76+
import org.apache.sysds.runtime.util.UtilFunctions;
77+
import org.apache.sysds.test.AutomatedTestBase;
78+
import org.apache.sysds.test.TestConfiguration;
79+
import org.apache.sysds.test.TestUtils;
80+
81+
@RunWith(value = Parameterized.class)
82+
@net.jcip.annotations.NotThreadSafe
83+
public class SeqParReadTest2 extends AutomatedTestBase {
84+
85+
private final static String TEST_NAME = "SeqParReadTest";
86+
private final static String TEST_DIR = "functions/io/";
87+
private final static String TEST_CLASS_DIR = TEST_DIR + SeqParReadTest2.class.getSimpleName() + "/";
88+
89+
private final static int rows = 1200;
90+
private final static int cols = 300;
91+
private final static ValueType[] schema = UtilFunctions.nCopies(cols, ValueType.FP64);
92+
private final static double eps = 1e-9;
93+
private final boolean _matrix;
94+
private final String _format;
95+
private final boolean _par;
96+
private final double _sparsity;
97+
98+
public SeqParReadTest2(boolean matrix, String format, boolean par, double sparsity) {
99+
_matrix = matrix;
100+
_format = format;
101+
_par = par;
102+
_sparsity = sparsity;
103+
}
104+
105+
@Override
106+
public void setUp() {
107+
TestUtils.clearAssertionInformation();
108+
addTestConfiguration(TEST_NAME,
109+
new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] { "Rout" }) );
110+
}
111+
112+
@Parameters
113+
public static Collection<Object[]> data() {
114+
Object[][] data = new Object[][] {
115+
//matrix/frame, format, par, sparsity
116+
{true, "text", false, 0.7},
117+
{true, "text", false, 0.1},
118+
{true, "text", true, 0.7},
119+
{true, "text", true, 0.1},
120+
{false, "text", false, 0.7},
121+
{false, "text", false, 0.1},
122+
{false, "text", true, 0.7},
123+
{false, "text", true, 0.1},
124+
{true, "mm", false, 0.7},
125+
{true, "mm", false, 0.1},
126+
{true, "mm", true, 0.7},
127+
{true, "mm", true, 0.1},
128+
{false, "mm", false, 0.7},
129+
{false, "mm", false, 0.1},
130+
{false, "mm", true, 0.7},
131+
{false, "mm", true, 0.1},
132+
{true, "csv", false, 0.7},
133+
{true, "csv", false, 0.1},
134+
{true, "csv", true, 0.7},
135+
{true, "csv", true, 0.1},
136+
{false, "csv", false, 0.7},
137+
{false, "csv", false, 0.1},
138+
{false, "csv", true, 0.7},
139+
{false, "csv", true, 0.1},
140+
{true, "binary", false, 0.7},
141+
{true, "binary", false, 0.1},
142+
{true, "binary", true, 0.7},
143+
{true, "binary", true, 0.1},
144+
{false, "binary", false, 0.7},
145+
{false, "binary", false, 0.1},
146+
{false, "binary", true, 0.7},
147+
{false, "binary", true, 0.1},
148+
{true, "hdf5", false, 0.7},
149+
//{true, "hdf5", false, 0.1}, //FIXME
150+
//{true, "hdf5", true, 0.7},
151+
//{true, "hdf5", true, 0.1},
152+
{true, "libsvm", false, 0.7},
153+
{true, "libsvm", false, 0.1},
154+
{true, "libsvm", true, 0.7},
155+
{true, "libsvm", true, 0.1},
156+
};
157+
return Arrays.asList(data);
158+
}
159+
160+
@Test
161+
public void textWriteRead() {
162+
getAndLoadTestConfiguration(TEST_NAME);
163+
setOutputBuffering(false);
164+
String fname = output("Rout");
165+
166+
MatrixBlock data = MatrixBlock.randOperations(rows, cols, _sparsity, 0, 1, "uniform", 7);
167+
MatrixBlock data2 = null;
168+
169+
try {
170+
if( _matrix ) {
171+
MatrixWriter writer = createMatrixWriter(FileFormat.safeValueOf(_format), _par);
172+
writer.writeMatrixToHDFS(data, fname, rows, cols, 1000, data.getNonZeros());
173+
MatrixReader reader = createMatrixReader(FileFormat.safeValueOf(_format), _par);
174+
data2 = reader.readMatrixFromHDFS(fname, rows, cols, 1000, data.getNonZeros());
175+
}
176+
else {
177+
FrameBlock fdata = DataConverter.convertToFrameBlock(data);
178+
FrameWriter writer = createFrameWriter(FileFormat.safeValueOf(_format), _par);
179+
writer.writeFrameToHDFS(fdata, fname, rows, cols);
180+
FrameReader reader = createFrameReader(FileFormat.safeValueOf(_format), _par);
181+
FrameBlock fdata2 = reader.readFrameFromHDFS(fname, schema, rows, cols);
182+
data2 = DataConverter.convertToMatrixBlock(fdata2);
183+
}
184+
}
185+
catch(Exception ex) {
186+
ex.printStackTrace();
187+
Assert.fail();
188+
}
189+
190+
//compare read content is equivalent to original
191+
if( data2 != null )
192+
TestUtils.compareMatrices(data, data2, eps);
193+
}
194+
195+
@SuppressWarnings("incomplete-switch")
196+
public static MatrixWriter createMatrixWriter(FileFormat fmt, boolean par) {
197+
switch(fmt) {
198+
case TEXT: return par? new WriterTextCellParallel() : new WriterTextCell();
199+
case MM: return par? new WriterMatrixMarketParallel() : new WriterMatrixMarket();
200+
case CSV: return par ?
201+
new WriterTextCSVParallel(new FileFormatPropertiesCSV()) :
202+
new WriterTextCSV(new FileFormatPropertiesCSV());
203+
case LIBSVM: return par ?
204+
new WriterTextLIBSVMParallel(new FileFormatPropertiesLIBSVM()) :
205+
new WriterTextLIBSVM(new FileFormatPropertiesLIBSVM());
206+
case BINARY: return par ? new WriterBinaryBlockParallel(3) : new WriterBinaryBlock(3);
207+
case HDF5: return par ?
208+
new WriterHDF5Parallel(new FileFormatPropertiesHDF5()) :
209+
new WriterHDF5(new FileFormatPropertiesHDF5());
210+
}
211+
return null;
212+
}
213+
214+
@SuppressWarnings("incomplete-switch")
215+
public static MatrixReader createMatrixReader(FileFormat fmt, boolean par) {
216+
switch(fmt) {
217+
case TEXT: return par? new ReaderTextCellParallel(fmt) : new ReaderTextCell(fmt);
218+
case MM: return par? new ReaderTextCell(fmt) : new ReaderTextCell(fmt);
219+
case CSV: return par ?
220+
new ReaderTextCSVParallel(new FileFormatPropertiesCSV()) :
221+
new ReaderTextCSV(new FileFormatPropertiesCSV());
222+
case LIBSVM: return par ?
223+
new ReaderTextLIBSVMParallel(new FileFormatPropertiesLIBSVM()) :
224+
new ReaderTextLIBSVM(new FileFormatPropertiesLIBSVM());
225+
case BINARY: return par ? new ReaderBinaryBlockParallel(false) : new ReaderBinaryBlock(false);
226+
case HDF5: return par ?
227+
new ReaderHDF5Parallel(new FileFormatPropertiesHDF5()) :
228+
new ReaderHDF5(new FileFormatPropertiesHDF5());
229+
}
230+
return null;
231+
}
232+
233+
@SuppressWarnings("incomplete-switch")
234+
public static FrameWriter createFrameWriter(FileFormat fmt, boolean par) {
235+
switch(fmt) {
236+
case TEXT: return par? new FrameWriterTextCellParallel() : new FrameWriterTextCell();
237+
case MM: return par? new FrameWriterTextCellParallel() : new FrameWriterTextCell();
238+
case CSV: return par ?
239+
new FrameWriterTextCSVParallel(new FileFormatPropertiesCSV()) :
240+
new FrameWriterTextCSV(new FileFormatPropertiesCSV());
241+
case BINARY: return par ? new FrameWriterBinaryBlockParallel() : new FrameWriterBinaryBlock();
242+
}
243+
return null;
244+
}
245+
246+
@SuppressWarnings("incomplete-switch")
247+
public static FrameReader createFrameReader(FileFormat fmt, boolean par) {
248+
switch(fmt) {
249+
case TEXT: return par? new FrameReaderTextCellParallel() : new FrameReaderTextCell();
250+
case MM: return par? new FrameReaderTextCell() : new FrameReaderTextCell();
251+
case CSV: return par ?
252+
new FrameReaderTextCSVParallel(new FileFormatPropertiesCSV()) :
253+
new FrameReaderTextCSV(new FileFormatPropertiesCSV());
254+
case BINARY: return par ? new FrameReaderBinaryBlockParallel() : new FrameReaderBinaryBlock();
255+
}
256+
return null;
257+
}
258+
}

0 commit comments

Comments
 (0)