Skip to content

Commit 8e7cc5f

Browse files
authored
Support SeparateLines output mode for assertSmallDatasetEquality (#213)
1 parent 33747f0 commit 8e7cc5f

File tree

7 files changed

+520
-295
lines changed

7 files changed

+520
-295
lines changed

core/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparer.scala

Lines changed: 13 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ trait DataFrameComparer extends DatasetComparer {
2222
* \- don't compare column metadata when matching schemas
2323
* @param truncate
2424
* \- truncate column if length more than specified number
25+
* @param outputFormat
26+
* \- format of the dataframe diff output either SideBySide or SeparateLines
2527
*/
2628
def assertSmallDataFrameEquality(
2729
actualDF: DataFrame,
@@ -34,51 +36,17 @@ trait DataFrameComparer extends DatasetComparer {
3436
truncate: Int = 500,
3537
outputFormat: DataframeDiffOutputFormat = DataframeDiffOutputFormat.SideBySide
3638
): Unit = {
37-
outputFormat match {
38-
case DataframeDiffOutputFormat.SideBySide =>
39-
assertSmallDatasetEquality(
40-
actualDF,
41-
expectedDF,
42-
ignoreNullable,
43-
ignoreColumnNames,
44-
orderedComparison,
45-
ignoreColumnOrder,
46-
ignoreMetadata,
47-
truncate
48-
)
49-
case DataframeDiffOutputFormat.SeparateLines =>
50-
SchemaComparer.assertSchemaEqual(
51-
actualDF.schema,
52-
expectedDF.schema,
53-
ignoreNullable,
54-
ignoreColumnNames,
55-
ignoreColumnOrder,
56-
ignoreMetadata
57-
)
58-
val actual = if (ignoreColumnOrder) orderColumns(actualDF, expectedDF) else actualDF
59-
if (orderedComparison)
60-
assertSmallDataFrameEquality(actual, expectedDF, truncate)
61-
else
62-
assertSmallDataFrameEquality(
63-
defaultSortDataset(actual),
64-
defaultSortDataset(expectedDF),
65-
truncate
66-
)
67-
}
68-
69-
}
70-
71-
private def assertSmallDataFrameEquality(
72-
actualDF: DataFrame,
73-
expectedDF: DataFrame,
74-
truncate: Int
75-
): Unit = {
76-
val a = actualDF.collect()
77-
val e = expectedDF.collect()
78-
if (!a.toSeq.approximateSameElements(e, (o1: Row, o2: Row) => o1.equals(o2))) {
79-
val msg = "Difference\n" ++ DataframeUtil.showDataframeDiff(a, e, actualDF.schema.fieldNames, truncate)
80-
throw DatasetContentMismatch(msg)
81-
}
39+
assertSmallDatasetEquality(
40+
actualDF,
41+
expectedDF,
42+
ignoreNullable,
43+
ignoreColumnNames,
44+
orderedComparison,
45+
ignoreColumnOrder,
46+
ignoreMetadata,
47+
truncate,
48+
outputFormat = outputFormat
49+
)
8250
}
8351

8452
/**

core/src/main/scala/com/github/mrpowers/spark/fast/tests/DataframeUtil.scala

Lines changed: 0 additions & 143 deletions
This file was deleted.

core/src/main/scala/com/github/mrpowers/spark/fast/tests/DatasetComparer.scala

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package com.github.mrpowers.spark.fast.tests
22

3+
import com.github.mrpowers.spark.fast.tests.DataframeDiffOutputFormat.DataframeDiffOutputFormat
34
import com.github.mrpowers.spark.fast.tests.DatasetComparer.maxUnequalRowsToShow
45
import com.github.mrpowers.spark.fast.tests.SeqLikesExtensions.SeqExtensions
56
import org.apache.spark.rdd.RDD
@@ -47,32 +48,39 @@ Expected DataFrame Row Count: '$expectedCount'
4748
ignoreColumnOrder: Boolean = false,
4849
ignoreMetadata: Boolean = true,
4950
truncate: Int = 500,
50-
equals: (T, T) => Boolean = (o1: T, o2: T) => o1.equals(o2)
51+
equals: (T, T) => Boolean = (o1: T, o2: T) => o1.equals(o2),
52+
outputFormat: DataframeDiffOutputFormat = DataframeDiffOutputFormat.SideBySide
5153
): Unit = {
5254
SchemaComparer.assertDatasetSchemaEqual(actualDS, expectedDS, ignoreNullable, ignoreColumnNames, ignoreColumnOrder, ignoreMetadata)
5355
val actual = if (ignoreColumnOrder) orderColumns(actualDS, expectedDS) else actualDS
54-
assertSmallDatasetContentEquality(actual, expectedDS, orderedComparison, truncate, equals)
56+
assertSmallDatasetContentEquality(actual, expectedDS, orderedComparison, truncate, equals, outputFormat)
5557
}
5658

5759
def assertSmallDatasetContentEquality[T: ClassTag](
5860
actualDS: Dataset[T],
5961
expectedDS: Dataset[T],
6062
orderedComparison: Boolean,
6163
truncate: Int,
62-
equals: (T, T) => Boolean
64+
equals: (T, T) => Boolean,
65+
outputFormat: DataframeDiffOutputFormat
6366
): Unit = {
6467
if (orderedComparison)
65-
assertSmallDatasetContentEquality(actualDS, expectedDS, truncate, equals)
68+
assertSmallDatasetContentEquality(actualDS, expectedDS, truncate, equals, outputFormat)
6669
else
67-
assertSmallDatasetContentEquality(defaultSortDataset(actualDS), defaultSortDataset(expectedDS), truncate, equals)
70+
assertSmallDatasetContentEquality(defaultSortDataset(actualDS), defaultSortDataset(expectedDS), truncate, equals, outputFormat)
6871
}
6972

70-
def assertSmallDatasetContentEquality[T: ClassTag](actualDS: Dataset[T], expectedDS: Dataset[T], truncate: Int, equals: (T, T) => Boolean): Unit = {
73+
def assertSmallDatasetContentEquality[T: ClassTag](
74+
actualDS: Dataset[T],
75+
expectedDS: Dataset[T],
76+
truncate: Int,
77+
equals: (T, T) => Boolean,
78+
outputFormat: DataframeDiffOutputFormat
79+
): Unit = {
7180
val a = actualDS.collect().toSeq
7281
val e = expectedDS.collect().toSeq
7382
if (!a.approximateSameElements(e, equals)) {
74-
val arr = ("Actual Content", "Expected Content")
75-
val msg = "Diffs\n" ++ ProductUtil.showProductDiff(arr, a, e, truncate)
83+
val msg = "Diffs\n" ++ ProductUtil.showProductDiff(expectedDS.columns, a, e, truncate, outputFormat = outputFormat)
7684
throw DatasetContentMismatch(msg)
7785
}
7886
}

0 commit comments

Comments
 (0)