Skip to content

Commit 059241b

Browse files
hsiang-cSteve Vaughan Jr
authored andcommitted
fix: order query result deterministically (apache#2208)
1 parent 02f5021 commit 059241b

File tree

1 file changed

+82
-1
lines changed

1 file changed

+82
-1
lines changed

dev/diffs/iceberg/1.8.1.diff

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1518,7 +1518,7 @@ index 182b1ef..ffceac5 100644
15181518
}
15191519

15201520
diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDataWrite.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDataWrite.java
1521-
index fb2b312..58911fc 100644
1521+
index fb2b312..c3f4e14 100644
15221522
--- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDataWrite.java
15231523
+++ b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkDataWrite.java
15241524
@@ -96,7 +96,18 @@ public class TestSparkDataWrite {
@@ -1541,6 +1541,87 @@ index fb2b312..58911fc 100644
15411541
}
15421542

15431543
@AfterEach
1544+
@@ -140,7 +151,7 @@ public class TestSparkDataWrite {
1545+
Dataset<Row> result = spark.read().format("iceberg").load(targetLocation);
1546+
1547+
List<SimpleRecord> actual =
1548+
- result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1549+
+ result.orderBy("id", "data").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1550+
assertThat(actual).as("Number of rows should match").hasSameSizeAs(expected);
1551+
assertThat(actual).as("Result rows should match").isEqualTo(expected);
1552+
for (ManifestFile manifest :
1553+
@@ -210,7 +221,7 @@ public class TestSparkDataWrite {
1554+
Dataset<Row> result = spark.read().format("iceberg").load(targetLocation);
1555+
1556+
List<SimpleRecord> actual =
1557+
- result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1558+
+ result.orderBy("id", "data").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1559+
assertThat(actual).as("Number of rows should match").hasSameSizeAs(expected);
1560+
assertThat(actual).as("Result rows should match").isEqualTo(expected);
1561+
}
1562+
@@ -256,7 +267,7 @@ public class TestSparkDataWrite {
1563+
Dataset<Row> result = spark.read().format("iceberg").load(targetLocation);
1564+
1565+
List<SimpleRecord> actual =
1566+
- result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1567+
+ result.orderBy("id", "data").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1568+
assertThat(actual).as("Number of rows should match").hasSameSizeAs(expected);
1569+
assertThat(actual).as("Result rows should match").isEqualTo(expected);
1570+
}
1571+
@@ -309,7 +320,7 @@ public class TestSparkDataWrite {
1572+
Dataset<Row> result = spark.read().format("iceberg").load(targetLocation);
1573+
1574+
List<SimpleRecord> actual =
1575+
- result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1576+
+ result.orderBy("id", "data").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1577+
assertThat(actual).as("Number of rows should match").hasSameSizeAs(expected);
1578+
assertThat(actual).as("Result rows should match").isEqualTo(expected);
1579+
}
1580+
@@ -352,7 +363,7 @@ public class TestSparkDataWrite {
1581+
Dataset<Row> result = spark.read().format("iceberg").load(targetLocation);
1582+
1583+
List<SimpleRecord> actual =
1584+
- result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1585+
+ result.orderBy("id", "data").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1586+
assertThat(actual).as("Number of rows should match").hasSameSizeAs(expected);
1587+
assertThat(actual).as("Result rows should match").isEqualTo(expected);
1588+
}
1589+
@@ -392,7 +403,7 @@ public class TestSparkDataWrite {
1590+
Dataset<Row> result = spark.read().format("iceberg").load(targetLocation);
1591+
1592+
List<SimpleRecord> actual =
1593+
- result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1594+
+ result.orderBy("id", "data").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1595+
assertThat(actual).as("Number of rows should match").hasSameSizeAs(expected);
1596+
assertThat(actual).as("Result rows should match").isEqualTo(expected);
1597+
1598+
@@ -458,7 +469,7 @@ public class TestSparkDataWrite {
1599+
Dataset<Row> result = spark.read().format("iceberg").load(targetLocation);
1600+
1601+
List<SimpleRecord> actual =
1602+
- result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1603+
+ result.orderBy("id", "data").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1604+
assertThat(actual).as("Number of rows should match").hasSameSizeAs(expected);
1605+
assertThat(actual).as("Result rows should match").isEqualTo(expected);
1606+
}
1607+
@@ -622,7 +633,7 @@ public class TestSparkDataWrite {
1608+
Dataset<Row> result = spark.read().format("iceberg").load(targetLocation);
1609+
1610+
List<SimpleRecord> actual =
1611+
- result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1612+
+ result.orderBy("id", "data").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1613+
assertThat(actual).as("Number of rows should match").hasSameSizeAs(expected);
1614+
assertThat(actual).as("Result rows should match").isEqualTo(expected);
1615+
1616+
@@ -708,7 +719,7 @@ public class TestSparkDataWrite {
1617+
// Since write and commit succeeded, the rows should be readable
1618+
Dataset<Row> result = spark.read().format("iceberg").load(targetLocation);
1619+
List<SimpleRecord> actual =
1620+
- result.orderBy("id").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1621+
+ result.orderBy("id", "data").as(Encoders.bean(SimpleRecord.class)).collectAsList();
1622+
assertThat(actual).as("Number of rows should match").hasSize(records.size() + records2.size());
1623+
assertThat(actual)
1624+
.describedAs("Result rows should match")
15441625
diff --git a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadProjection.java b/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadProjection.java
15451626
index becf6a0..b98c2f6 100644
15461627
--- a/spark/v3.5/spark/src/test/java/org/apache/iceberg/spark/source/TestSparkReadProjection.java

0 commit comments

Comments
 (0)