@@ -33,40 +33,32 @@ class SQLHadoopMapReduceCommitProtocol(jobId: String, path: String, isAppend: Bo
33
33
extends HadoopMapReduceCommitProtocol (jobId, path) with Serializable with Logging {
34
34
35
35
override protected def setupCommitter (context : TaskAttemptContext ): OutputCommitter = {
36
- var committer = context.getOutputFormatClass.newInstance().getOutputCommitter(context)
36
+ val clazz = context.getConfiguration
37
+ .getClass(SQLConf .OUTPUT_COMMITTER_CLASS .key, null , classOf [OutputCommitter ])
37
38
38
- if (! isAppend) {
39
- // If we are appending data to an existing dir, we will only use the output committer
40
- // associated with the file output format since it is not safe to use a custom
41
- // committer for appending. For example, in S3, direct parquet output committer may
42
- // leave partial data in the destination dir when the appending job fails.
43
- // See SPARK-8578 for more details.
44
- val configuration = context.getConfiguration
45
- val clazz =
46
- configuration.getClass(SQLConf .OUTPUT_COMMITTER_CLASS .key, null , classOf [OutputCommitter ])
39
+ if (clazz != null ) {
40
+ logInfo(s " Using user defined output committer class ${clazz.getCanonicalName}" )
47
41
48
- if (clazz != null ) {
49
- logInfo(s " Using user defined output committer class ${clazz.getCanonicalName}" )
50
-
51
- // Every output format based on org.apache.hadoop.mapreduce.lib.output.OutputFormat
52
- // has an associated output committer. To override this output committer,
53
- // we will first try to use the output committer set in SQLConf.OUTPUT_COMMITTER_CLASS.
54
- // If a data source needs to override the output committer, it needs to set the
55
- // output committer in prepareForWrite method.
56
- if (classOf [FileOutputCommitter ].isAssignableFrom(clazz)) {
57
- // The specified output committer is a FileOutputCommitter.
58
- // So, we will use the FileOutputCommitter-specified constructor.
59
- val ctor = clazz.getDeclaredConstructor(classOf [Path ], classOf [TaskAttemptContext ])
60
- committer = ctor.newInstance(new Path (path), context)
61
- } else {
62
- // The specified output committer is just an OutputCommitter.
63
- // So, we will use the no-argument constructor.
64
- val ctor = clazz.getDeclaredConstructor()
65
- committer = ctor.newInstance()
66
- }
42
+ // Every output format based on org.apache.hadoop.mapreduce.lib.output.OutputFormat
43
+ // has an associated output committer. To override this output committer,
44
+ // we will first try to use the output committer set in SQLConf.OUTPUT_COMMITTER_CLASS.
45
+ // If a data source needs to override the output committer, it needs to set the
46
+ // output committer in prepareForWrite method.
47
+ if (classOf [FileOutputCommitter ].isAssignableFrom(clazz)) {
48
+ // The specified output committer is a FileOutputCommitter.
49
+ // So, we will use the FileOutputCommitter-specified constructor.
50
+ val ctor = clazz.getDeclaredConstructor(classOf [Path ], classOf [TaskAttemptContext ])
51
+ ctor.newInstance(new Path (path), context)
52
+ } else {
53
+ // The specified output committer is just an OutputCommitter.
54
+ // So, we will use the no-argument constructor.
55
+ val ctor = clazz.getDeclaredConstructor()
56
+ ctor.newInstance()
67
57
}
58
+ } else {
59
+ val committer = context.getOutputFormatClass.newInstance().getOutputCommitter(context)
60
+ logInfo(s " Using output committer class ${committer.getClass.getCanonicalName}" )
61
+ committer
68
62
}
69
- logInfo(s " Using output committer class ${committer.getClass.getCanonicalName}" )
70
- committer
71
63
}
72
64
}
0 commit comments