3838import org .apache .sysds .runtime .io .hdf5 .H5Constants ;
3939import org .apache .sysds .runtime .matrix .data .MatrixBlock ;
4040import org .apache .sysds .runtime .util .CommonThreadPool ;
41+ import org .apache .sysds .runtime .util .HDFSTool ;
4142
4243public class ReaderHDF5Parallel extends ReaderHDF5 {
4344
@@ -46,7 +47,7 @@ public class ReaderHDF5Parallel extends ReaderHDF5 {
4647
4748 public ReaderHDF5Parallel (FileFormatPropertiesHDF5 props ) {
4849 super (props );
49- _numThreads = OptimizerUtils .getParallelTextReadParallelism ();
50+ _numThreads = OptimizerUtils .getParallelBinaryReadParallelism ();
5051 }
5152
5253 @ Override
@@ -69,26 +70,31 @@ public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int bl
6970 // allocate output matrix block
7071 ArrayList <Path > files = new ArrayList <>();
7172 files .add (path );
72- MatrixBlock src = computeHDF5Size (files , fs , _props .getDatasetName ());
73-
73+ MatrixBlock src = computeHDF5Size (files , fs , _props .getDatasetName (), estnnz );
74+ int numParts = Math .min (files .size (), _numThreads );
75+
7476 //create and execute tasks
7577 ExecutorService pool = CommonThreadPool .get (_numThreads );
7678 try {
7779 int bufferSize = (src .getNumColumns () * src .getNumRows ()) * 8 + H5Constants .STATIC_HEADER_SIZE ;
7880 ArrayList <ReadHDF5Task > tasks = new ArrayList <>();
7981 rlen = src .getNumRows ();
80- int blklen = (int ) Math .ceil ((double ) rlen / _numThreads );
82+ int blklen = (int ) Math .ceil ((double ) rlen / numParts );
8183 for (int i = 0 ; i < _numThreads & i * blklen < rlen ; i ++) {
8284 int rl = i * blklen ;
8385 int ru = (int ) Math .min ((i + 1 ) * blklen , rlen );
84- BufferedInputStream bis = new BufferedInputStream (fs .open (path ), bufferSize );
86+ Path newPath = HDFSTool .isDirectory (fs , path ) ?
87+ new Path (path , IOUtilFunctions .getPartFileName (i )) : path ;
88+ BufferedInputStream bis = new BufferedInputStream (fs .open (newPath ), bufferSize );
8589
8690 //BufferedInputStream bis, String datasetName, MatrixBlock src, MutableInt rl, int ru
87- tasks .add (new ReadHDF5Task (bis , _props .getDatasetName (), src , rl , ru ));
91+ tasks .add (new ReadHDF5Task (bis , _props .getDatasetName (), src , rl , ru , clen , blklen ));
8892 }
8993
90- for (Future <Object > task : pool .invokeAll (tasks ))
91- task .get ();
94+ long nnz = 0 ;
95+ for (Future <Long > task : pool .invokeAll (tasks ))
96+ nnz += task .get ();
97+ src .setNonZeros (nnz );
9298
9399 return src ;
94100 }
@@ -102,31 +108,36 @@ public MatrixBlock readMatrixFromHDFS(String fname, long rlen, long clen, int bl
102108
103109 @ Override
104110 public MatrixBlock readMatrixFromInputStream (InputStream is , long rlen , long clen , int blen , long estnnz )
105- throws IOException , DMLRuntimeException {
106-
111+ throws IOException , DMLRuntimeException
112+ {
107113 return new ReaderHDF5 (_props ).readMatrixFromInputStream (is , rlen , clen , blen , estnnz );
108114 }
109115
110- private static class ReadHDF5Task implements Callable <Object > {
116+ private static class ReadHDF5Task implements Callable <Long > {
111117
112118 private final BufferedInputStream _bis ;
113119 private final String _datasetName ;
114120 private final MatrixBlock _src ;
115121 private final int _rl ;
116122 private final int _ru ;
123+ private final long _clen ;
124+ private final int _blen ;
117125
118- public ReadHDF5Task (BufferedInputStream bis , String datasetName , MatrixBlock src , int rl , int ru ) {
126+ public ReadHDF5Task (BufferedInputStream bis , String datasetName , MatrixBlock src ,
127+ int rl , int ru , long clen , int blen )
128+ {
119129 _bis = bis ;
120130 _datasetName = datasetName ;
121131 _src = src ;
122132 _rl = rl ;
123133 _ru = ru ;
134+ _clen = clen ;
135+ _blen = blen ;
124136 }
125137
126138 @ Override
127- public Object call () throws IOException {
128- readMatrixFromHDF5 (_bis , _datasetName , _src , _rl , _ru , 0 , 0 );
129- return null ;
139+ public Long call () throws IOException {
140+ return readMatrixFromHDF5 (_bis , _datasetName , _src , _rl , _ru , _clen , _blen );
130141 }
131142 }
132143}
0 commit comments