5757import org .apache .sysds .runtime .meta .MatrixCharacteristics ;
5858import org .apache .sysds .runtime .util .CommonThreadPool ;
5959import org .apache .sysds .utils .stats .InfrastructureAnalyzer ;
60+ import org .apache .sysds .runtime .util .HDFSTool ;
6061
6162public final class WriterCompressed extends MatrixWriter {
6263
@@ -146,7 +147,7 @@ private void write(MatrixBlock src, final String fname, final int blen) throws I
146147 }
147148
148149 fs = IOUtilFunctions .getFileSystem (new Path (fname ), job );
149-
150+
150151 int k = OptimizerUtils .getParallelBinaryWriteParallelism ();
151152
152153 k = Math .min (k , (int )(src .getInMemorySize () / InfrastructureAnalyzer .getBlockSize (fs )));
@@ -213,8 +214,6 @@ private void writeMultiBlockCompressedSingleThread(MatrixBlock mb, final int rle
213214 throws IOException {
214215 try {
215216 final CompressedMatrixBlock cmb = (CompressedMatrixBlock ) mb ;
216-
217- setupWrite ();
218217 final Path path = new Path (fname );
219218 Writer w = generateWriter (job , path , fs );
220219 for (int bc = 0 ; bc * blen < clen ; bc ++) {// column blocks
@@ -244,7 +243,6 @@ private void writeMultiBlockCompressedSingleThread(MatrixBlock mb, final int rle
244243 private void writeMultiBlockCompressedParallel (MatrixBlock b , final int rlen , final int clen , final int blen , int k )
245244 throws IOException {
246245
247- setupWrite ();
248246 final ExecutorService pool = CommonThreadPool .get (k );
249247 try {
250248 final ArrayList <Callable <Object >> tasks = new ArrayList <>();
@@ -265,7 +263,8 @@ private void writeMultiBlockCompressedParallel(MatrixBlock b, final int rlen, fi
265263 final int colBlocks = (int ) Math .ceil ((double ) clen / blen );
266264 final int nBlocks = (int ) Math .ceil ((double ) rlen / blen );
267265 final int blocksPerThread = Math .max (1 , nBlocks * colBlocks / k );
268-
266+ HDFSTool .deleteFileIfExistOnHDFS (new Path (fname + ".dict" ), job );
267+
269268 int i = 0 ;
270269 for (int bc = 0 ; bc * blen < clen ; bc ++) {// column blocks
271270 final int sC = bc * blen ;
@@ -307,13 +306,6 @@ private void writeMultiBlockCompressedParallel(MatrixBlock b, final int rlen, fi
307306 }
308307 }
309308
310- private void setupWrite () throws IOException {
311- // final Path path = new Path(fname);
312- // final JobConf job = ConfigurationManager.getCachedJobConf();
313- // HDFSTool.deleteFileIfExistOnHDFS(path, job);
314- // HDFSTool.createDirIfNotExistOnHDFS(path, DMLConfig.DEFAULT_SHARED_DIR_PERMISSION);
315- }
316-
317309 private Path getPath (int id ) {
318310 return new Path (fname , IOUtilFunctions .getPartFileName (id ));
319311 }
@@ -397,6 +389,7 @@ protected DictWriteTask(String fname, List<IDictionary> dicts, int id) {
397389 public Object call () throws Exception {
398390
399391 Path p = new Path (fname + ".dict" , IOUtilFunctions .getPartFileName (id ));
392+ HDFSTool .deleteFileIfExistOnHDFS (p , job );
400393 try (Writer w = SequenceFile .createWriter (job , Writer .file (p ), //
401394 Writer .bufferSize (4096 ), //
402395 Writer .keyClass (DictWritable .K .class ), //
0 commit comments