Skip to content

Commit 1ef847b

Browse files
committed
add variable substitution for minNumRows, fixes #58
1 parent 2839ed2 commit 1ef847b

File tree

4 files changed

+42
-21
lines changed

4 files changed

+42
-21
lines changed

src/main/scala/com/target/data_validator/validator/ColumnBased.scala

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -35,26 +35,40 @@ abstract class ColumnBased(column: String, condTest: Expression) extends CheapCh
3535
}
3636
}
3737

38-
case class MinNumRows(minNumRows: Long) extends ColumnBased("", ValidatorBase.L0) {
38+
case class MinNumRows(minNumRows: Json) extends ColumnBased("", ValidatorBase.L0) {
3939
override def name: String = "MinNumRows"
4040

41-
override def substituteVariables(dict: VarSubstitution): ValidatorBase = this
41+
override def substituteVariables(dict: VarSubstitution): ValidatorBase = {
42+
val ret = MinNumRows(getVarSubJson(minNumRows, "minNumRows", dict))
43+
getEvents.foreach(ret.addEvent)
44+
ret
45+
}
4246

4347
override def configCheck(df: DataFrame): Boolean = {
44-
if (minNumRows <= 0) {
45-
val msg = s"MinNumRows: $minNumRows <= 0"
48+
49+
def notNaturalNumber(): Unit = {
50+
val msg = "minNumRows must be a natural number"
4651
logger.error(msg)
4752
addEvent(ValidatorError(msg))
48-
failed = true
49-
true
50-
} else {
51-
false
5253
}
54+
55+
minNumRows.asNumber match {
56+
case Some(jsonNumber) => jsonNumber.toLong match {
57+
case Some(x) if x > 0 =>
58+
case _ => notNaturalNumber()
59+
}
60+
case _ => notNaturalNumber()
61+
}
62+
failed
5363
}
5464

5565
override def quickCheck(row: Row, count: Long, idx: Int): Boolean = {
56-
failed = count < minNumRows
57-
val pctError = if (failed) calculatePctError(minNumRows, count) else "0.00%"
66+
// Convert to `JsonNumber` then to `Long`
67+
// safe because already handled in `configCheck`
68+
val minNumRowsLong = minNumRows.asNumber.get.toLong.get
69+
70+
failed = count < minNumRowsLong
71+
val pctError = if (failed) calculatePctError(minNumRowsLong, count) else "0.00%"
5872
addEvent(ValidatorCounter("rowCount", count))
5973
val msg = s"MinNumRowsCheck Expected: $minNumRows Actual: $count Relative Error: $pctError"
6074
val data = ListMap("expected" -> minNumRows.toString, "actual" -> count.toString, "relative_error" -> pctError)
@@ -64,7 +78,7 @@ case class MinNumRows(minNumRows: Long) extends ColumnBased("", ValidatorBase.L0
6478

6579
override def toJson: Json = Json.obj(
6680
("type", Json.fromString("rowCount")),
67-
("minNumRows", Json.fromLong(minNumRows)),
81+
("minNumRows", minNumRows),
6882
("failed", Json.fromBoolean(failed)),
6983
("events", this.getEvents.asJson)
7084
)

src/test/scala/com/target/data_validator/ConfigParserSpec.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ class ConfigParserSpec extends FunSpec with BeforeAndAfterAll {
2828
"bar",
2929
Some(List("one", "two")),
3030
None,
31-
List(MinNumRows(10294), NullCheck("mdse_item_i", None)) // scalastyle:ignore magic.number
31+
List(MinNumRows(Json.fromInt(10294)), NullCheck("mdse_item_i", None)) // scalastyle:ignore magic.number
3232
),
3333
ValidatorOrcFile("LocalFile.orc", None, Some("foo < 10"), List(NullCheck("start_d", None))),
3434
ValidatorParquetFile("LocFile.parquet", None, Some("bar < 10"), List(NullCheck("end_d", None)))

src/test/scala/com/target/data_validator/ConfigVarSubSpec.scala

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,9 +62,9 @@ class ConfigVarSubSpec extends FunSpec with Matchers with TestingSparkSession {
6262

6363
describe("MinNumRows") {
6464

65-
it("MinNumRows doesn't support substitutions so it should be equal, no changes.") {
66-
val sut = MinNumRows(100) // scalastyle:ignore
67-
assert(sut.substituteVariables(dict) == sut)
65+
it("should substitute variables properly") {
66+
val sut = MinNumRows(Json.fromString("$one"))
67+
assert(sut.substituteVariables(dict) == MinNumRows(Json.fromInt(1)))
6868
}
6969

7070
}

src/test/scala/com/target/data_validator/ValidatorBaseSpec.scala

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -171,19 +171,27 @@ class ValidatorBaseSpec extends FunSpec with Matchers with TestingSparkSession {
171171

172172
describe("ValidatorMinNumRows") {
173173

174+
val df = spark.createDataFrame(sc.parallelize(List(Row("Doug", 50), Row("Collin", 32))), schema) //scalastyle:ignore
175+
176+
it("configCheck() should fail for minNumRows as non-numeric") {
177+
val dict = new VarSubstitution
178+
val config = mkConfig(df, List(MinNumRows(Json.fromString("badinput"))))
179+
assert(config.configCheck(spark, dict))
180+
assert(config.failed)
181+
assert(config.tables.head.failed)
182+
}
183+
174184
it("configCheck() should fail for negative minNumRows") {
175185
val dict = new VarSubstitution
176-
val df = spark.createDataFrame(sc.parallelize(List(Row("Doug", 50), Row("Collin", 32))), schema) //scalastyle:ignore
177-
val config = mkConfig(df, List(MinNumRows(-10))) //scalastyle:ignore
186+
val config = mkConfig(df, List(MinNumRows(Json.fromLong(-10)))) // scalastyle:ignore magic.number
178187
assert(config.configCheck(spark, dict))
179188
assert(config.failed)
180189
assert(config.tables.head.failed)
181190
}
182191

183192
it("quickCheck() should fail when rowCount < minNumRows") {
184193
val dict = new VarSubstitution
185-
val df = spark.createDataFrame(sc.parallelize(List(Row("Doug", 50), Row("Collin", 32))), schema) //scalastyle:ignore
186-
val minNumRowsCheck = MinNumRows(10) // scalastyle:ignore magic.number
194+
val minNumRowsCheck = MinNumRows(Json.fromLong(10)) // scalastyle:ignore magic.number
187195
val config = mkConfig(df, List(minNumRowsCheck))
188196
assert(config.quickChecks(spark, dict))
189197
assert(config.failed)
@@ -197,8 +205,7 @@ class ValidatorBaseSpec extends FunSpec with Matchers with TestingSparkSession {
197205

198206
it("quickCheck() should succeed when rowCount > minNumRows") {
199207
val dict = new VarSubstitution
200-
val df = spark.createDataFrame(sc.parallelize(List(Row("Doug", 50), Row("Collin", 32))), schema) //scalastyle:ignore
201-
val minNumRowsCheck = MinNumRows(1)
208+
val minNumRowsCheck = MinNumRows(Json.fromInt(1))
202209
val config = mkConfig(df, List(minNumRowsCheck))
203210
assert(!config.configCheck(spark, dict))
204211
assert(!config.quickChecks(spark, dict))

0 commit comments

Comments
 (0)