Skip to content

Commit 17c955b

Browse files
committed
WIP
1 parent 3938227 commit 17c955b

File tree

3 files changed

+25
-45
lines changed

3 files changed

+25
-45
lines changed

core/src/main/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparer.scala

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,26 +57,24 @@ trait DataFrameComparer extends DatasetComparer {
5757
)
5858
val actual = if (ignoreColumnOrder) orderColumns(actualDF, expectedDF) else actualDF
5959
if (orderedComparison)
60-
assertSmallDataFrameEquality(actual, expectedDF, truncate)
60+
assertSmallDataFrameEquality(actual, expectedDF)
6161
else
6262
assertSmallDataFrameEquality(
6363
defaultSortDataset(actual),
64-
defaultSortDataset(expectedDF),
65-
truncate
64+
defaultSortDataset(expectedDF)
6665
)
6766
}
6867

6968
}
7069

7170
private def assertSmallDataFrameEquality(
7271
actualDF: DataFrame,
73-
expectedDF: DataFrame,
74-
truncate: Int
72+
expectedDF: DataFrame
7573
): Unit = {
7674
val a = actualDF.collect()
7775
val e = expectedDF.collect()
7876
if (!a.toSeq.approximateSameElements(e, (o1: Row, o2: Row) => o1.equals(o2))) {
79-
val msg = "Difference\n" ++ DataframeUtil.showDataframeDiff(a, e, actualDF.schema.fieldNames, truncate)
77+
val msg = "Difference\n" ++ DataframeUtil.showDataframeDiff(a, e, actualDF.schema.fieldNames)
8078
throw DatasetContentMismatch(msg)
8179
}
8280
}

core/src/main/scala/com/github/mrpowers/spark/fast/tests/DataframeUtil.scala

Lines changed: 15 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ object DataframeUtil {
1212
actual: Array[Row],
1313
expected: Array[Row],
1414
fieldNames: Array[String],
15-
truncate: Int = 20,
1615
minColWidth: Int = 3
1716
): String = {
1817
val sb = new StringBuilder
@@ -31,8 +30,8 @@ object DataframeUtil {
3130
.zipAll(expected, Row.empty, Row.empty)
3231
.map { case (actualRow, expectedRow) => (actualRow, expectedRow, actualRow.equals(expectedRow)) }
3332
val diff = fullJoinWithEquals.map { case (actualRow, expectedRow, rowsAreEqual) =>
34-
val paddedActualRow = pad(actualRow, truncate, rowWidths)
35-
val paddedExpectedRow = pad(expectedRow, truncate, rowWidths)
33+
val paddedActualRow = pad(actualRow.toSeq, rowWidths)
34+
val paddedExpectedRow = pad(expectedRow.toSeq, rowWidths)
3635
if (rowsAreEqual) {
3736
List(DarkGray(paddedActualRow.mkString("|")), DarkGray(paddedActualRow.mkString("|")))
3837
} else {
@@ -61,12 +60,12 @@ object DataframeUtil {
6160
val coloredDiff = withEquals.zipWithIndex
6261
.map {
6362
case ((actualRowField, expectedRowField, true), i) =>
64-
val paddedActualRow = padAny(actualRowField, truncate, rowWidths(i))
65-
val paddedExpected = padAny(expectedRowField, truncate, rowWidths(i))
63+
val paddedActualRow = padAny(actualRowField, rowWidths(i))
64+
val paddedExpected = padAny(expectedRowField, rowWidths(i))
6665
(DarkGray(paddedActualRow), DarkGray(paddedExpected))
6766
case ((actualRowField, expectedRowField, false), i) =>
68-
val paddedActualRow = padAny(actualRowField, truncate, rowWidths(i))
69-
val paddedExpected = padAny(expectedRowField, truncate, rowWidths(i))
67+
val paddedActualRow = padAny(actualRowField, rowWidths(i))
68+
val paddedExpected = padAny(expectedRowField, rowWidths(i))
7069
(Red(paddedActualRow), Green(paddedExpected))
7170
}
7271
val start = DarkGray("")
@@ -81,7 +80,7 @@ object DataframeUtil {
8180
}
8281
}
8382

84-
val headerWithLeftPadding = pad(fieldNames, truncate, rowWidths)
83+
val headerWithLeftPadding = pad(fieldNames, rowWidths)
8584
val headerFields = List(headerWithLeftPadding.mkString("|"))
8685
val colWidths: Array[Int] = getColWidths(minColWidth, diff)
8786

@@ -92,13 +91,10 @@ object DataframeUtil {
9291

9392
sb.append(separatorLine)
9493

95-
headerFields.zipWithIndex
96-
.map { case (cell, i) =>
97-
if (truncate > 0) {
98-
StringUtils.leftPad(cell, colWidths(i))
99-
} else {
100-
StringUtils.rightPad(cell, colWidths(i))
101-
}
94+
headerFields
95+
.zip(colWidths)
96+
.map { case (cell, colWidth) =>
97+
StringUtils.leftPad(cell, colWidth)
10298
}
10399
.addString(sb, StringUtils.leftPad("|", largestIndexOffset), "|", "|\n")
104100
diff.zipWithIndex.map { case (actual :: expected :: Nil, i) =>
@@ -125,34 +121,14 @@ object DataframeUtil {
125121
sb.append(separatorLine).toString()
126122
}
127123

128-
private def pad(row: Row, truncate: Int, colWidths: Array[Int]): Seq[String] =
129-
pad(
130-
row.toSeq.map { v =>
131-
Option(v).map(_.toString).getOrElse("null")
132-
},
133-
truncate,
134-
colWidths
135-
)
124+
private def pad(items: Seq[Any], colWidths: Array[Int]): Seq[String] =
125+
items.zip(colWidths).map { case (v, colWidth) => padAny(v, colWidth) }
136126

137-
private def padAny(s: Any, truncate: Int, width: Int) = {
127+
private def padAny(s: Any, width: Int) = {
138128
val cell = Option(s).map(_.toString).getOrElse("null")
139-
if (truncate > 0) {
140-
StringUtils.leftPad(cell, width)
141-
} else {
142-
StringUtils.rightPad(cell, width)
143-
}
129+
StringUtils.leftPad(cell, width)
144130
}
145131

146-
private def pad(header: Seq[String], truncate: Int, colWidths: Array[Int]) =
147-
header.zipWithIndex
148-
.map { case (cell, i) =>
149-
if (truncate > 0) {
150-
StringUtils.leftPad(cell, colWidths(i))
151-
} else {
152-
StringUtils.rightPad(cell, colWidths(i))
153-
}
154-
}
155-
156132
private def getColWidths(minColWidth: Int, rows: Array[List[Str]]) = {
157133
val numCols = rows.map(_.length).max
158134
// Initialise the width of each column to a minimum value

core/src/test/scala/com/github/mrpowers/spark/fast/tests/DataFrameComparerTest.scala

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
553553
val e = intercept[DatasetContentMismatch] {
554554
assertSmallDataFrameEquality(sourceDF, expectedDF, outputFormat = DataframeDiffOutputFormat.SeparateLines)
555555
}
556+
println(e.getMessage)
556557
assert(
557558
e.getMessage ==
558559
"""Difference
@@ -600,6 +601,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
600601
val e = intercept[DatasetContentMismatch] {
601602
assertSmallDataFrameEquality(sourceDF, expectedDF, outputFormat = DataframeDiffOutputFormat.SeparateLines)
602603
}
604+
println(e.getMessage)
603605
val expected =
604606
"""|Difference
605607
| +------+---+-------+
@@ -647,6 +649,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
647649
val e = intercept[DatasetContentMismatch] {
648650
assertSmallDataFrameEquality(sourceDF, expectedDF, outputFormat = DataframeDiffOutputFormat.SeparateLines)
649651
}
652+
println(e.getMessage)
650653

651654
assert(e.getMessage == """Difference
652655
| +-----+---+-------+
@@ -689,6 +692,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
689692
val e = intercept[DatasetContentMismatch] {
690693
assertSmallDataFrameEquality(sourceDF, expectedDF, outputFormat = DataframeDiffOutputFormat.SeparateLines)
691694
}
695+
println(e.getMessage)
692696

693697
assert(e.getMessage == """Difference
694698
| +------+---+-------+
@@ -726,6 +730,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
726730
val e = intercept[DatasetContentMismatch] {
727731
assertSmallDataFrameEquality(actualDF, expectedDF, outputFormat = DataframeDiffOutputFormat.SeparateLines)
728732
}
733+
println(e.getMessage)
729734

730735
assert(e.getMessage == """Difference
731736
| +------+---+-------+
@@ -764,6 +769,7 @@ class DataFrameComparerTest extends AnyFreeSpec with DataFrameComparer with Spar
764769
val e = intercept[DatasetContentMismatch] {
765770
assertSmallDataFrameEquality(actualDF, expectedDF, outputFormat = DataframeDiffOutputFormat.SeparateLines)
766771
}
772+
println(e.getMessage)
767773

768774
assert(
769775
e.getMessage == """Difference

0 commit comments

Comments
 (0)