Skip to content

Commit 90bf407

Browse files
authored
HIVE-29362: New configuration to display EXPLAIN FORMATTED in human-readable format (#6230)
1. Add new property to control indentation of EXPLAIN FORMATTED result 2. Create the appropriate JsonParser in ExplainTask based on explain configurations 3. Drop now unused and redundant JsonParserFactory 4. Extract logic for augmenting RS outputs in separate method dedicated for this purpose
1 parent 844df7e commit 90bf407

28 files changed

+21050
-92
lines changed

common/src/java/org/apache/hadoop/hive/common/jsonexplain/JsonParserFactory.java

Lines changed: 0 additions & 40 deletions
This file was deleted.

common/src/java/org/apache/hadoop/hive/conf/HiveConf.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3765,6 +3765,10 @@ public static enum ConfVars {
37653765
+ "prints) the same node multiple times. The number of visits can become exponential and make the server "
37663766
+ "crash or become unresponsive so this limit acts as a safety net to fail-fast the problematic query and "
37673767
+ "avoid bringing down the entire server."),
3768+
@InterfaceAudience.Private
3769+
HIVE_EXPLAIN_FORMATTED_INDENT("hive.explain.formatted.indent", false,
3770+
"Whether to indent the JSON output of EXPLAIN FORMATTED for better readability. " +
3771+
"The property is private to be used in tests only."),
37683772
// prefix used to auto generated column aliases (this should be started with '_')
37693773
HIVE_AUTOGEN_COLUMNALIAS_PREFIX_LABEL("hive.autogen.columnalias.prefix.label", "_c",
37703774
"String used as a prefix when auto generating column alias.\n" +

data/conf/iceberg/tez/hive-site.xml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -320,4 +320,8 @@
320320
<name>hive.lock.sleep.between.retries</name>
321321
<value>2</value>
322322
</property>
323+
<property>
324+
<name>hive.explain.formatted.indent</name>
325+
<value>true</value>
326+
</property>
323327
</configuration>

data/conf/llap/hive-site.xml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,7 +294,10 @@
294294
<name>hive.explain.user</name>
295295
<value>false</value>
296296
</property>
297-
297+
<property>
298+
<name>hive.explain.formatted.indent</name>
299+
<value>true</value>
300+
</property>
298301
<property>
299302
<name>hive.join.inner.residual</name>
300303
<value>true</value>

iceberg/iceberg-handler/src/test/results/positive/iceberg_explain_formatted.q.out

Lines changed: 144 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,147 @@ POSTHOOK: query: explain formatted select * from test
1414
POSTHOOK: type: QUERY
1515
POSTHOOK: Input: default@test
1616
POSTHOOK: Output: hdfs://### HDFS PATH ###
17-
{"CBOPlan":{"rels":[{"id":"0","relOp":"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan","table":["default","test"],"table:alias":"test","inputs":[],"rowCount":1,"avgRowSize":8,"rowType":{"fields":[{"type":"INTEGER","nullable":true,"name":"a"},{"type":"INTEGER","nullable":true,"name":"b"},{"type":"INTEGER","nullable":true,"name":"PARTITION__SPEC__ID"},{"type":"BIGINT","nullable":true,"name":"PARTITION__HASH"},{"type":"VARCHAR","nullable":true,"precision":2147483647,"name":"FILE__PATH"},{"type":"BIGINT","nullable":true,"name":"ROW__POSITION"},{"type":"VARCHAR","nullable":true,"precision":2147483647,"name":"PARTITION__PROJECTION"},{"type":"BIGINT","nullable":true,"name":"SNAPSHOT__ID"}],"nullable":false},"colStats":[{"name":"a","ndv":1,"minValue":-2147483648,"maxValue":2147483647},{"name":"b","ndv":1,"minValue":-2147483648,"maxValue":2147483647}]},{"id":"1","relOp":"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject","fields":["test.a","test.b"],"exprs":[{"input":0,"name":"$0"},{"input":1,"name":"$1"}],"rowCount":1}]},"optimizedSQL":"SELECT `a` AS `test.a`, `b` AS `test.b`\nFROM `default`.`test`","cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-0":{"ROOT STAGE":"TRUE"}},"STAGE PLANS":{"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"TableScan":{"alias:":"test","columns:":["a","b"],"database:":"default","table:":"test","isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"a (type: int), b (type: int)","columnExprMap:":{"_col0":"a","_col1":"b"},"outputColumnNames:":["_col0","_col1"],"OperatorId:":"SEL_1","children":{"ListSink":{"OperatorId:":"LIST_SINK_3"}}}}}}}}}}
17+
{
18+
"CBOPlan": {
19+
"rels": [
20+
{
21+
"id": "0",
22+
"relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan",
23+
"table": [
24+
"default",
25+
"test"
26+
],
27+
"table:alias": "test",
28+
"inputs": [],
29+
"rowCount": 1,
30+
"avgRowSize": 8,
31+
"rowType": {
32+
"fields": [
33+
{
34+
"type": "INTEGER",
35+
"nullable": true,
36+
"name": "a"
37+
},
38+
{
39+
"type": "INTEGER",
40+
"nullable": true,
41+
"name": "b"
42+
},
43+
{
44+
"type": "INTEGER",
45+
"nullable": true,
46+
"name": "PARTITION__SPEC__ID"
47+
},
48+
{
49+
"type": "BIGINT",
50+
"nullable": true,
51+
"name": "PARTITION__HASH"
52+
},
53+
{
54+
"type": "VARCHAR",
55+
"nullable": true,
56+
"precision": 2147483647,
57+
"name": "FILE__PATH"
58+
},
59+
{
60+
"type": "BIGINT",
61+
"nullable": true,
62+
"name": "ROW__POSITION"
63+
},
64+
{
65+
"type": "VARCHAR",
66+
"nullable": true,
67+
"precision": 2147483647,
68+
"name": "PARTITION__PROJECTION"
69+
},
70+
{
71+
"type": "BIGINT",
72+
"nullable": true,
73+
"name": "SNAPSHOT__ID"
74+
}
75+
],
76+
"nullable": false
77+
},
78+
"colStats": [
79+
{
80+
"name": "a",
81+
"ndv": 1,
82+
"minValue": -2147483648,
83+
"maxValue": 2147483647
84+
},
85+
{
86+
"name": "b",
87+
"ndv": 1,
88+
"minValue": -2147483648,
89+
"maxValue": 2147483647
90+
}
91+
]
92+
},
93+
{
94+
"id": "1",
95+
"relOp": "org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject",
96+
"fields": [
97+
"test.a",
98+
"test.b"
99+
],
100+
"exprs": [
101+
{
102+
"input": 0,
103+
"name": "$0"
104+
},
105+
{
106+
"input": 1,
107+
"name": "$1"
108+
}
109+
],
110+
"rowCount": 1
111+
}
112+
]
113+
},
114+
"optimizedSQL": "SELECT `a` AS `test.a`, `b` AS `test.b`\nFROM `default`.`test`",
115+
"cboInfo": "Plan optimized by CBO.",
116+
"STAGE DEPENDENCIES": {
117+
"Stage-0": {
118+
"ROOT STAGE": "TRUE"
119+
}
120+
},
121+
"STAGE PLANS": {
122+
"Stage-0": {
123+
"Fetch Operator": {
124+
"limit:": "-1",
125+
"Processor Tree:": {
126+
"TableScan": {
127+
"alias:": "test",
128+
"columns:": [
129+
"a",
130+
"b"
131+
],
132+
"database:": "default",
133+
"table:": "test",
134+
"isTempTable:": "false",
135+
"OperatorId:": "TS_0",
136+
"children": {
137+
"Select Operator": {
138+
"expressions:": "a (type: int), b (type: int)",
139+
"columnExprMap:": {
140+
"_col0": "a",
141+
"_col1": "b"
142+
},
143+
"outputColumnNames:": [
144+
"_col0",
145+
"_col1"
146+
],
147+
"OperatorId:": "SEL_1",
148+
"children": {
149+
"ListSink": {
150+
"OperatorId:": "LIST_SINK_3"
151+
}
152+
}
153+
}
154+
}
155+
}
156+
}
157+
}
158+
}
159+
}
160+
}

ql/src/java/org/apache/hadoop/hive/ql/exec/ExplainTask.java

Lines changed: 45 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
import org.apache.commons.lang3.tuple.ImmutablePair;
4747
import org.apache.hadoop.fs.Path;
4848
import org.apache.hadoop.hive.common.jsonexplain.JsonParser;
49-
import org.apache.hadoop.hive.common.jsonexplain.JsonParserFactory;
49+
import org.apache.hadoop.hive.common.jsonexplain.tez.TezJsonParser;
5050
import org.apache.hadoop.hive.conf.HiveConf;
5151
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
5252
import org.apache.hadoop.hive.metastore.api.FieldSchema;
@@ -549,31 +549,32 @@ public int execute() {
549549
Path resFile = work.getResFile();
550550
OutputStream outS = resFile.getFileSystem(conf).create(resFile);
551551
out = new PrintStream(outS, false, StandardCharsets.UTF_8.name());
552+
JsonParser jsonParser = createParser();
552553

553554
if(work.isDDL()){
554555
getDDLPlan(out);
555556
} else if (work.isCbo()) {
556557
JSONObject jsonCBOPlan = getJSONCBOPlan(out, work);
557558
if (work.isFormatted()) {
558-
out.print(jsonCBOPlan);
559+
jsonParser.print(jsonCBOPlan, out);
559560
}
560561
} else if (work.isLogical()) {
561562
JSONObject jsonLogicalPlan = getJSONLogicalPlan(out, work);
562563
if (work.isFormatted()) {
563-
out.print(jsonLogicalPlan);
564+
jsonParser.print(jsonLogicalPlan, out);
564565
}
565566
} else if (work.isAuthorize()) {
566567
JSONObject jsonAuth = collectAuthRelatedEntities(out, work);
567568
if (work.isFormatted()) {
568-
out.print(jsonAuth);
569+
jsonParser.print(jsonAuth, out);
569570
}
570571
} else if (work.getDependency()) {
571572
JSONObject jsonDependencies = getJSONDependencies(work);
572-
out.print(jsonDependencies);
573+
jsonParser.print(jsonDependencies, out);
573574
} else if (work.isLocks()) {
574575
JSONObject jsonLocks = getLocks(out, work);
575576
if(work.isFormatted()) {
576-
out.print(jsonLocks);
577+
jsonParser.print(jsonLocks, out);
577578
}
578579
} else if (work.isAst()) {
579580
// Print out the parse AST
@@ -582,9 +583,6 @@ public int execute() {
582583
}
583584
} else {
584585
if (work.isUserLevelExplain()) {
585-
// Because of the implementation of the JsonParserFactory, we are sure
586-
// that we can get a TezJsonParser.
587-
JsonParser jsonParser = JsonParserFactory.getParser(conf);
588586
work.getConfig().setFormatted(true);
589587
JSONObject jsonPlan = getJSONPlan(out, work);
590588
if (work.getCboInfo() != null) {
@@ -603,13 +601,8 @@ public int execute() {
603601
} else {
604602
JSONObject jsonPlan = getJSONPlan(out, work);
605603
if (work.isFormatted()) {
606-
// use the parser to get the output operators of RS
607-
JsonParser jsonParser = JsonParserFactory.getParser(conf);
608-
if (jsonParser != null) {
609-
jsonParser.print(jsonPlan, null);
610-
LOG.info("JsonPlan is augmented to {}", jsonPlan);
611-
}
612-
out.print(jsonPlan);
604+
augmentJSONWithRSOutputs(jsonPlan);
605+
jsonParser.print(jsonPlan, out);
613606
}
614607
}
615608
}
@@ -628,6 +621,42 @@ public int execute() {
628621
}
629622
}
630623

624+
/**
625+
* Augments the JSON plan with the outputs names of ReduceSink operators.
626+
* @param jsonPlan the JSON plan object that is augmented
627+
*/
628+
private void augmentJSONWithRSOutputs(JSONObject jsonPlan) throws Exception {
629+
if ("tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
630+
// The JSON object is augmented via side effect of TezJsonParser.print()
631+
new TezJsonParser().print(jsonPlan, null);
632+
}
633+
}
634+
635+
/**
636+
* Creates the appropriate JsonParser based on the ExplainWork configuration.
637+
* @return a JsonParser
638+
*/
639+
private JsonParser createParser() {
640+
final JsonParser parser;
641+
if (HiveConf.getBoolVar(conf, ConfVars.HIVE_EXPLAIN_FORMATTED_INDENT)) {
642+
parser = (json, out) -> out.print(json.toString(2));
643+
} else {
644+
parser = (json, out) -> out.print(json.toString());
645+
}
646+
if (work.isCbo() || work.isLogical() || work.isAuthorize() || work.getDependency() || work.isLocks()) {
647+
return parser;
648+
} else if (work.isUserLevelExplain()) {
649+
if ("tez".equals(HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE))) {
650+
return new TezJsonParser();
651+
} else {
652+
LOG.error("Running explain user level is only supported for Tez engine.");
653+
return (obj, out) -> {
654+
};
655+
}
656+
}
657+
return parser;
658+
}
659+
631660
@VisibleForTesting
632661
JSONObject collectAuthRelatedEntities(PrintStream out, ExplainWork work)
633662
throws Exception {
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
CREATE TABLE person (id INT, fname STRING);
2+
set hive.explain.formatted.indent=true;
3+
EXPLAIN FORMATTED SELECT fname FROM person WHERE id > 100;
4+
set hive.explain.formatted.indent=false;
5+
EXPLAIN FORMATTED SELECT fname FROM person WHERE id > 100;

ql/src/test/results/clientpositive/llap/authorization_explain.q.out

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,21 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12
4949
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11
5050
POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12
5151
#### A masked pattern was here ####
52+
{
53+
"INPUTS": [
54+
"default@src",
55+
"default@srcpart",
56+
"default@srcpart@ds=2008-04-08/hr=11",
57+
"default@srcpart@ds=2008-04-08/hr=12",
58+
"default@srcpart@ds=2008-04-09/hr=11",
59+
"default@srcpart@ds=2008-04-09/hr=12"
60+
],
61+
"OUTPUTS": [
62+
#### A masked pattern was here ####
63+
],
64+
"CURRENT_USER": "hive_test_user",
65+
"OPERATION": "QUERY"
66+
}
5267
PREHOOK: query: explain authorization use default
5368
PREHOOK: type: SWITCHDATABASE
5469
PREHOOK: Input: database:default
@@ -68,4 +83,11 @@ PREHOOK: Input: database:default
6883
POSTHOOK: query: explain formatted authorization use default
6984
POSTHOOK: type: SWITCHDATABASE
7085
POSTHOOK: Input: database:default
71-
{"INPUTS":["database:default"],"OUTPUTS":[],"CURRENT_USER":"hive_test_user","OPERATION":"SWITCHDATABASE"}
86+
{
87+
"INPUTS": [
88+
"database:default"
89+
],
90+
"OUTPUTS": [],
91+
"CURRENT_USER": "hive_test_user",
92+
"OPERATION": "SWITCHDATABASE"
93+
}

0 commit comments

Comments
 (0)