Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -2306,6 +2306,7 @@ public List<Partition> getPartitionsByExpr(org.apache.hadoop.hive.ql.metadata.Ta
}

Set<Partition> partitions = Sets.newHashSet();
String defaultPartitionName = HiveConf.getVar(conf, ConfVars.DEFAULT_PARTITION_NAME);

try (CloseableIterable<FileScanTask> tasks = scan.planFiles()) {
FluentIterable.from(tasks)
Expand All @@ -2316,8 +2317,8 @@ public List<Partition> getPartitionsByExpr(org.apache.hadoop.hive.ql.metadata.Ta
PartitionData partitionData = IcebergTableUtil.toPartitionData(task.partition(), spec.partitionType());
String partName = spec.partitionToPath(partitionData);

Map<String, String> partSpecMap = Maps.newLinkedHashMap();
Warehouse.makeSpecFromName(partSpecMap, new Path(partName), null);
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There are more calls to Warehouse#makeSpecFromName methods in IcebergTableUtil#convertNameToMetastorePartition, IcebergQueryCompactor, and potentially other places as well. Do we need to update them as well?

Copy link
Copy Markdown
Member Author

@deniskuzZ deniskuzZ Jun 1, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The other call sites (e.g. IcebergTableUtil#convertNameToMetastorePartition) don't have access to partitionData — they only receive the partition path string. Without the underlying partition data, there's no way to disambiguate whether "null" in the path originated from a true NULL value or from a literal string "null". The fix here works precisely because we have partitionData available to check the actual value. If those other paths turn out to be affected, they'd need a different approach to obtain that context.

Note: IcebergQueryCompactor should be fine — it deals with compaction where NULL filtering semantics aren't in play

Map<String, String> partSpecMap =
IcebergTableUtil.makeSpecFromName(partName, spec, partitionData, defaultPartitionName);

DummyPartition partition = new DummyPartition(hmsTable, partName, partSpecMap);
partitions.add(partition);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -452,6 +452,24 @@ public static void performMetadataDelete(Table icebergTable, String branchName,
deleteFiles.deleteFromRowFilter(exp).commit();
}

/**
* Parses an Iceberg partition path into a Hive-compatible spec map, representing null partition
* values with the Hive default partition name.
*/
Comment thread
deniskuzZ marked this conversation as resolved.
public static Map<String, String> makeSpecFromName(String partName, PartitionSpec spec, PartitionData data,
String defaultPartitionName) {
Map<String, String> partSpecMap = Maps.newLinkedHashMap();
Warehouse.makeSpecFromName(partSpecMap, new Path(partName), null);

List<PartitionField> fields = spec.fields();
for (int i = 0; i < fields.size(); i++) {
if (data.get(i) == null) {
partSpecMap.put(fields.get(i).name(), defaultPartitionName);
}
}
return partSpecMap;
}

public static PartitionData toPartitionData(StructLike key, Types.StructType keyType) {
PartitionData keyTemplate = new PartitionData(keyType);
return keyTemplate.copyFor(key);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.fetch.task.conversion=none;
set hive.explain.user=false;

drop table if exists ice_01;
create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg;

insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08';
insert into ice_01 partition (ds) select 'B', 'V2', 'null';
insert into ice_01 partition (ds) select 'C', 'V3', null;

explain select key, value, ds from ice_01 where ds is null;
select key, value, ds from ice_01 where ds is null;

explain select key, value, ds from ice_01 where ds is not null;
select key, value, ds from ice_01 where ds is not null order by key;

select key, value, ds from ice_01 where ds = 'null';
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
PREHOOK: query: drop table if exists ice_01
PREHOOK: type: DROPTABLE
PREHOOK: Output: database:default
POSTHOOK: query: drop table if exists ice_01
POSTHOOK: type: DROPTABLE
POSTHOOK: Output: database:default
PREHOOK: query: create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg
PREHOOK: type: CREATETABLE
PREHOOK: Output: database:default
PREHOOK: Output: default@ice_01
POSTHOOK: query: create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg
POSTHOOK: type: CREATETABLE
POSTHOOK: Output: database:default
POSTHOOK: Output: default@ice_01
PREHOOK: query: insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08'
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice_01
POSTHOOK: query: insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08'
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice_01
PREHOOK: query: insert into ice_01 partition (ds) select 'B', 'V2', 'null'
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice_01
POSTHOOK: query: insert into ice_01 partition (ds) select 'B', 'V2', 'null'
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice_01
PREHOOK: query: insert into ice_01 partition (ds) select 'C', 'V3', null
PREHOOK: type: QUERY
PREHOOK: Input: _dummy_database@_dummy_table
PREHOOK: Output: default@ice_01
POSTHOOK: query: insert into ice_01 partition (ds) select 'C', 'V3', null
POSTHOOK: type: QUERY
POSTHOOK: Input: _dummy_database@_dummy_table
POSTHOOK: Output: default@ice_01
PREHOOK: query: explain select key, value, ds from ice_01 where ds is null
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: explain select key, value, ds from ice_01 where ds is null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_01
POSTHOOK: Output: hdfs://### HDFS PATH ###
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: ice_01
filterExpr: ds is null (type: boolean)
Statistics: Num rows: 1 Data size: 171 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string), null (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

PREHOOK: query: select key, value, ds from ice_01 where ds is null
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select key, value, ds from ice_01 where ds is null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_01
POSTHOOK: Output: hdfs://### HDFS PATH ###
C V3 NULL
PREHOOK: query: explain select key, value, ds from ice_01 where ds is not null
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: explain select key, value, ds from ice_01 where ds is not null
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_01
POSTHOOK: Output: hdfs://### HDFS PATH ###
STAGE DEPENDENCIES:
Stage-1 is a root stage
Stage-0 depends on stages: Stage-1

STAGE PLANS:
Stage: Stage-1
Tez
#### A masked pattern was here ####
Vertices:
Map 1
Map Operator Tree:
TableScan
alias: ice_01
filterExpr: ds is not null (type: boolean)
Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE
Select Operator
expressions: key (type: string), value (type: string), ds (type: string)
outputColumnNames: _col0, _col1, _col2
Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE
File Output Operator
compressed: false
Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE
table:
input format: org.apache.hadoop.mapred.SequenceFileInputFormat
output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
Execution mode: vectorized

Stage: Stage-0
Fetch Operator
limit: -1
Processor Tree:
ListSink

PREHOOK: query: select key, value, ds from ice_01 where ds is not null order by key
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select key, value, ds from ice_01 where ds is not null order by key
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_01
POSTHOOK: Output: hdfs://### HDFS PATH ###
A V1 2000-04-08
B V2 null
PREHOOK: query: select key, value, ds from ice_01 where ds = 'null'
PREHOOK: type: QUERY
PREHOOK: Input: default@ice_01
PREHOOK: Output: hdfs://### HDFS PATH ###
POSTHOOK: query: select key, value, ds from ice_01 where ds = 'null'
POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_01
POSTHOOK: Output: hdfs://### HDFS PATH ###
B V2 null
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
import java.util.Properties;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.ddl.DDLUtils;
Expand All @@ -33,6 +34,7 @@
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Partition;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
Expand Down Expand Up @@ -73,6 +75,9 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv
throw new HiveException("Internal error : Partition Spec size, " + partSpec.size() +
" doesn't match partition key definition size, " + partKeyTypes.length);
}
String defaultPartitionName = HiveConf.getVar(SessionState.getSessionConf(),
HiveConf.ConfVars.DEFAULT_PARTITION_NAME);

// Create the row object
List<String> partNames = new ArrayList<>();
List<Object> partValues = new ArrayList<>();
Expand All @@ -82,9 +87,15 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv
partNames.add(entry.getKey());
ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector
(TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i++]));
partValues.add(ObjectInspectorConverters.getConverter(
PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi)
.convert(entry.getValue()));

String partitionValue = entry.getValue();
if (partitionValue.equals(defaultPartitionName)) {
partValues.add(null); // Null for default partition.
} else {
partValues.add(ObjectInspectorConverters.getConverter(
PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi)
.convert(partitionValue));
}
partObjectInspectors.add(oi);
}
StructObjectInspector partObjectInspector = ObjectInspectorFactory
Expand All @@ -104,7 +115,7 @@ public static Pair<PrimitiveObjectInspector, ExprNodeEvaluator> prepareExpr(
ExprNodeDesc expr, List<String> partColumnNames,
List<PrimitiveTypeInfo> partColumnTypeInfos) throws HiveException {
// Create the row object
List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>();
List<ObjectInspector> partObjectInspectors = new ArrayList<>();
for (int i = 0; i < partColumnNames.size(); i++) {
partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
partColumnTypeInfos.get(i)));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -563,7 +563,7 @@ public static boolean prunePartitionNames(List<String> partColumnNames,
Warehouse.makeValsFromName(partName, values);

List<Object> convertedValues = new ArrayList<>(values.size());
for(int i=0; i<values.size(); i++) {
for (int i = 0; i < values.size(); i++) {
String partitionValue = values.get(i);
PrimitiveTypeInfo typeInfo = partColumnTypeInfos.get(i);

Expand Down
17 changes: 17 additions & 0 deletions ql/src/test/queries/clientpositive/pcr_null_partition.q
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.fetch.task.conversion=none;

drop table if exists pcr_t1;
create table pcr_t1 (key string, value string) partitioned by (ds string);

insert into pcr_t1 partition (ds) select 'A', 'V1', '2000-04-08';
insert into pcr_t1 partition (ds) select 'B', 'V2', 'null';
insert into pcr_t1 partition (ds) select 'C', 'V3', null;

explain select key, value, ds from pcr_t1 where ds is null;
select key, value, ds from pcr_t1 where ds is null;

explain select key, value, ds from pcr_t1 where ds is not null;
select key, value, ds from pcr_t1 where ds is not null order by key;

select key, value, ds from pcr_t1 where ds = 'null';
40 changes: 24 additions & 16 deletions ql/src/test/results/clientpositive/llap/annotate_stats_part.q.out
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,12 @@ STAGE PLANS:
TableScan
alias: loc_orc_n4
filterExpr: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink
Filter Operator
predicate: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink

PREHOOK: query: explain select * from loc_orc_n4
PREHOOK: type: QUERY
Expand Down Expand Up @@ -228,10 +230,12 @@ STAGE PLANS:
TableScan
alias: loc_orc_n4
filterExpr: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink
Filter Operator
predicate: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink

PREHOOK: query: explain select * from loc_orc_n4
PREHOOK: type: QUERY
Expand Down Expand Up @@ -283,10 +287,12 @@ STAGE PLANS:
TableScan
alias: loc_orc_n4
filterExpr: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink
Filter Operator
predicate: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
outputColumnNames: _col0, _col1, _col2, _col3
ListSink

PREHOOK: query: explain select * from loc_orc_n4 where year='2001' and year='__HIVE_DEFAULT_PARTITION__'
PREHOOK: type: QUERY
Expand Down Expand Up @@ -475,10 +481,12 @@ STAGE PLANS:
TableScan
alias: loc_orc_n4
filterExpr: (year <> '2001') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: _col0, _col1
ListSink
Filter Operator
predicate: (year <> '2001') (type: boolean)
Select Operator
expressions: state (type: string), locid (type: int)
outputColumnNames: _col0, _col1
ListSink

PREHOOK: query: explain select * from loc_orc_n4
PREHOOK: type: QUERY
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,11 @@ STAGE PLANS:
alias: dynamic_part_table
filterExpr: ((partcol1 = '1') and (partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__')) (type: boolean)
GatherStats: false
Select Operator
expressions: intcol (type: string)
outputColumnNames: _col0
ListSink
Filter Operator
isSamplingPred: false
predicate: (partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__') (type: boolean)
Select Operator
expressions: intcol (type: string)
outputColumnNames: _col0
ListSink

Loading
Loading