apache · deniskuzZ · May 29, 2026 · Jun 2, 2026 · Jun 2, 2026 · zabetak
diff --git a/...g/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java b/...g/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/HiveIcebergStorageHandler.java
@@ -2306,6 +2306,7 @@ public List<Partition> getPartitionsByExpr(org.apache.hadoop.hive.ql.metadata.Ta
     }
 
     Set<Partition> partitions = Sets.newHashSet();
+    String defaultPartitionName = HiveConf.getVar(conf, ConfVars.DEFAULT_PARTITION_NAME);
 
     try (CloseableIterable<FileScanTask> tasks = scan.planFiles()) {
       FluentIterable.from(tasks)
@@ -2316,8 +2317,8 @@ public List<Partition> getPartitionsByExpr(org.apache.hadoop.hive.ql.metadata.Ta
             PartitionData partitionData = IcebergTableUtil.toPartitionData(task.partition(), spec.partitionType());
             String partName = spec.partitionToPath(partitionData);
 
-            Map<String, String> partSpecMap = Maps.newLinkedHashMap();
-            Warehouse.makeSpecFromName(partSpecMap, new Path(partName), null);
+            Map<String, String> partSpecMap =
+                IcebergTableUtil.makeSpecFromName(partName, spec, partitionData, defaultPartitionName);
 
             DummyPartition partition = new DummyPartition(hmsTable, partName, partSpecMap);
             partitions.add(partition);

diff --git a/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java b/iceberg/iceberg-handler/src/main/java/org/apache/iceberg/mr/hive/IcebergTableUtil.java
@@ -452,6 +452,24 @@ public static void performMetadataDelete(Table icebergTable, String branchName,
     deleteFiles.deleteFromRowFilter(exp).commit();
   }
 
+  /**
+   * Parses an Iceberg partition path into a Hive-compatible spec map, representing null partition
+   * values with the Hive default partition name.
+   */
+  public static Map<String, String> makeSpecFromName(String partName, PartitionSpec spec, PartitionData data,
+      String defaultPartitionName) {
+    Map<String, String> partSpecMap = Maps.newLinkedHashMap();
+    Warehouse.makeSpecFromName(partSpecMap, new Path(partName), null);
+
+    List<PartitionField> fields = spec.fields();
+    for (int i = 0; i < fields.size(); i++) {
+      if (data.get(i) == null) {
+        partSpecMap.put(fields.get(i).name(), defaultPartitionName);
+      }
+    }
+    return partSpecMap;
+  }
+
   public static PartitionData toPartitionData(StructLike key, Types.StructType keyType) {
     PartitionData keyTemplate = new PartitionData(keyType);
     return keyTemplate.copyFor(key);

diff --git a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_pcr_null_partition.q b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_pcr_null_partition.q
@@ -0,0 +1,18 @@
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.fetch.task.conversion=none;
+set hive.explain.user=false;
+
+drop table if exists ice_01;
+create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg;
+
+insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08';
+insert into ice_01 partition (ds) select 'B', 'V2', 'null';
+insert into ice_01 partition (ds) select 'C', 'V3', null;
+
+explain select key, value, ds from ice_01 where ds is null;
+select key, value, ds from ice_01 where ds is null;
+
+explain select key, value, ds from ice_01 where ds is not null;
+select key, value, ds from ice_01 where ds is not null order by key;
+
+select key, value, ds from ice_01 where ds = 'null';
diff --git a/iceberg/iceberg-handler/src/test/results/positive/iceberg_pcr_null_partition.q.out b/iceberg/iceberg-handler/src/test/results/positive/iceberg_pcr_null_partition.q.out
@@ -0,0 +1,150 @@
+PREHOOK: query: drop table if exists ice_01
+PREHOOK: type: DROPTABLE
+PREHOOK: Output: database:default
+POSTHOOK: query: drop table if exists ice_01
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Output: database:default
+PREHOOK: query: create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_01
+POSTHOOK: query: create external table ice_01 (key string, value string) partitioned by (ds string) stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_01
+PREHOOK: query: insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_01
+POSTHOOK: query: insert into ice_01 partition (ds) select 'A', 'V1', '2000-04-08'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_01
+PREHOOK: query: insert into ice_01 partition (ds) select 'B', 'V2', 'null'
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_01
+POSTHOOK: query: insert into ice_01 partition (ds) select 'B', 'V2', 'null'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_01
+PREHOOK: query: insert into ice_01 partition (ds) select 'C', 'V3', null
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_01
+POSTHOOK: query: insert into ice_01 partition (ds) select 'C', 'V3', null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_01
+PREHOOK: query: explain select key, value, ds from ice_01 where ds is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_01
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select key, value, ds from ice_01 where ds is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_01
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: ice_01
+                  filterExpr: ds is null (type: boolean)
+                  Statistics: Num rows: 1 Data size: 171 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: string), value (type: string), null (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 1 Data size: 255 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, ds from ice_01 where ds is null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_01
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select key, value, ds from ice_01 where ds is null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_01
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+C	V3	NULL
+PREHOOK: query: explain select key, value, ds from ice_01 where ds is not null
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_01
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select key, value, ds from ice_01 where ds is not null
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_01
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: ice_01
+                  filterExpr: ds is not null (type: boolean)
+                  Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE
+                  Select Operator
+                    expressions: key (type: string), value (type: string), ds (type: string)
+                    outputColumnNames: _col0, _col1, _col2
+                    Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE
+                    File Output Operator
+                      compressed: false
+                      Statistics: Num rows: 2 Data size: 530 Basic stats: COMPLETE Column stats: COMPLETE
+                      table:
+                          input format: org.apache.hadoop.mapred.SequenceFileInputFormat
+                          output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                          serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+            Execution mode: vectorized
+
+  Stage: Stage-0
+    Fetch Operator
+      limit: -1
+      Processor Tree:
+        ListSink
+
+PREHOOK: query: select key, value, ds from ice_01 where ds is not null order by key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_01
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select key, value, ds from ice_01 where ds is not null order by key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_01
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+A	V1	2000-04-08
+B	V2	null
+PREHOOK: query: select key, value, ds from ice_01 where ds = 'null'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_01
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select key, value, ds from ice_01 where ds = 'null'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_01
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+B	V2	null
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartExprEvalUtils.java
@@ -25,6 +25,7 @@
 import java.util.Properties;
 
 import org.apache.commons.lang3.tuple.Pair;
+import org.apache.hadoop.hive.conf.HiveConf;
 import org.apache.hadoop.hive.metastore.api.FieldSchema;
 import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
 import org.apache.hadoop.hive.ql.ddl.DDLUtils;
@@ -33,6 +34,7 @@
 import org.apache.hadoop.hive.ql.metadata.HiveException;
 import org.apache.hadoop.hive.ql.metadata.Partition;
 import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
+import org.apache.hadoop.hive.ql.session.SessionState;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
 import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
@@ -73,6 +75,9 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv
       throw new HiveException("Internal error : Partition Spec size, " + partSpec.size() +
           " doesn't match partition key definition size, " + partKeyTypes.length);
     }
+    String defaultPartitionName = HiveConf.getVar(SessionState.getSessionConf(),
+        HiveConf.ConfVars.DEFAULT_PARTITION_NAME);
+
     // Create the row object
     List<String> partNames = new ArrayList<>();
     List<Object> partValues = new ArrayList<>();
@@ -82,9 +87,15 @@ static public Object evalExprWithPart(ExprNodeDesc expr, Partition p) throws Hiv
       partNames.add(entry.getKey());
       ObjectInspector oi = PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector
           (TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i++]));
-      partValues.add(ObjectInspectorConverters.getConverter(
-          PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi)
-          .convert(entry.getValue()));
+
+      String partitionValue = entry.getValue();
+      if (partitionValue.equals(defaultPartitionName)) {
+        partValues.add(null); // Null for default partition.
+      } else {
+        partValues.add(ObjectInspectorConverters.getConverter(
+            PrimitiveObjectInspectorFactory.javaStringObjectInspector, oi)
+            .convert(partitionValue));
+      }
       partObjectInspectors.add(oi);
     }
     StructObjectInspector partObjectInspector = ObjectInspectorFactory
@@ -104,7 +115,7 @@ public static Pair<PrimitiveObjectInspector, ExprNodeEvaluator> prepareExpr(
       ExprNodeDesc expr, List<String> partColumnNames,
       List<PrimitiveTypeInfo> partColumnTypeInfos) throws HiveException {
     // Create the row object
-    List<ObjectInspector> partObjectInspectors = new ArrayList<ObjectInspector>();
+    List<ObjectInspector> partObjectInspectors = new ArrayList<>();
     for (int i = 0; i < partColumnNames.size(); i++) {
       partObjectInspectors.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(
         partColumnTypeInfos.get(i)));

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
@@ -563,7 +563,7 @@ public static boolean prunePartitionNames(List<String> partColumnNames,
       Warehouse.makeValsFromName(partName, values);
 
       List<Object> convertedValues = new ArrayList<>(values.size());
-      for(int i=0; i<values.size(); i++) {
+      for (int i = 0; i < values.size(); i++) {
         String partitionValue = values.get(i);
         PrimitiveTypeInfo typeInfo = partColumnTypeInfos.get(i);
 

diff --git a/ql/src/test/queries/clientpositive/pcr_null_partition.q b/ql/src/test/queries/clientpositive/pcr_null_partition.q
@@ -0,0 +1,17 @@
+set hive.exec.dynamic.partition.mode=nonstrict;
+set hive.fetch.task.conversion=none;
+
+drop table if exists pcr_t1;
+create table pcr_t1 (key string, value string) partitioned by (ds string);
+
+insert into pcr_t1 partition (ds) select 'A', 'V1', '2000-04-08';
+insert into pcr_t1 partition (ds) select 'B', 'V2', 'null';
+insert into pcr_t1 partition (ds) select 'C', 'V3', null;
+
+explain select key, value, ds from pcr_t1 where ds is null;
+select key, value, ds from pcr_t1 where ds is null;
+
+explain select key, value, ds from pcr_t1 where ds is not null;
+select key, value, ds from pcr_t1 where ds is not null order by key;
+
+select key, value, ds from pcr_t1 where ds = 'null';
diff --git a/ql/src/test/results/clientpositive/llap/annotate_stats_part.q.out b/ql/src/test/results/clientpositive/llap/annotate_stats_part.q.out
@@ -137,10 +137,12 @@ STAGE PLANS:
         TableScan
           alias: loc_orc_n4
           filterExpr: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
-          Select Operator
-            expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
-            outputColumnNames: _col0, _col1, _col2, _col3
-            ListSink
+          Filter Operator
+            predicate: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
+            Select Operator
+              expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3
+              ListSink
 
 PREHOOK: query: explain select * from loc_orc_n4
 PREHOOK: type: QUERY
@@ -228,10 +230,12 @@ STAGE PLANS:
         TableScan
           alias: loc_orc_n4
           filterExpr: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
-          Select Operator
-            expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
-            outputColumnNames: _col0, _col1, _col2, _col3
-            ListSink
+          Filter Operator
+            predicate: (year = '__HIVE_DEFAULT_PARTITION__') (type: boolean)
+            Select Operator
+              expressions: state (type: string), locid (type: int), zip (type: bigint), '__HIVE_DEFAULT_PARTITION__' (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3
+              ListSink
 
 PREHOOK: query: explain select * from loc_orc_n4
 PREHOOK: type: QUERY
@@ -283,10 +287,12 @@ STAGE PLANS:
         TableScan
           alias: loc_orc_n4
           filterExpr: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean)
-          Select Operator
-            expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
-            outputColumnNames: _col0, _col1, _col2, _col3
-            ListSink
+          Filter Operator
+            predicate: (year) IN ('2001', '__HIVE_DEFAULT_PARTITION__') (type: boolean)
+            Select Operator
+              expressions: state (type: string), locid (type: int), zip (type: bigint), year (type: string)
+              outputColumnNames: _col0, _col1, _col2, _col3
+              ListSink
 
 PREHOOK: query: explain select * from loc_orc_n4 where year='2001' and year='__HIVE_DEFAULT_PARTITION__'
 PREHOOK: type: QUERY
@@ -475,10 +481,12 @@ STAGE PLANS:
         TableScan
           alias: loc_orc_n4
           filterExpr: (year <> '2001') (type: boolean)
-          Select Operator
-            expressions: state (type: string), locid (type: int)
-            outputColumnNames: _col0, _col1
-            ListSink
+          Filter Operator
+            predicate: (year <> '2001') (type: boolean)
+            Select Operator
+              expressions: state (type: string), locid (type: int)
+              outputColumnNames: _col0, _col1
+              ListSink
 
 PREHOOK: query: explain select * from loc_orc_n4
 PREHOOK: type: QUERY

diff --git a/ql/src/test/results/clientpositive/llap/dynamic_partition_skip_default.q.out b/ql/src/test/results/clientpositive/llap/dynamic_partition_skip_default.q.out
@@ -276,8 +276,11 @@ STAGE PLANS:
           alias: dynamic_part_table
           filterExpr: ((partcol1 = '1') and (partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__')) (type: boolean)
           GatherStats: false
-          Select Operator
-            expressions: intcol (type: string)
-            outputColumnNames: _col0
-            ListSink
+          Filter Operator
+            isSamplingPred: false
+            predicate: (partcol2) IN ('1', '__HIVE_DEFAULT_PARTITION__') (type: boolean)
+            Select Operator
+              expressions: intcol (type: string)
+              outputColumnNames: _col0
+              ListSink