1616
1717package com .amazon .deequ .dqdl .translation .rules
1818
19+ import com .amazon .deequ .analyzers .{AnalyzerOptions , FilteredRowOutcome , NullBehavior }
1920import com .amazon .deequ .checks .Check
2021import com .amazon .deequ .checks .CheckLevel
2122import com .amazon .deequ .dqdl .model .DeequMetricMapping
@@ -71,12 +72,17 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
7172 val hasNullOperand = rawOperands.exists(_.isInstanceOf [NullNumericOperand ])
7273 val numericOperands = rawOperands.collect { case a : AtomicNumberOperand => a.getOperand.toDouble }
7374
75+ val opts = analyzerOptionsForWhereClause(rule)
76+ val nullFailOpts : Option [AnalyzerOptions ] =
77+ Some (AnalyzerOptions (NullBehavior .Fail ,
78+ opts.map(_.filteredRow).getOrElse(FilteredRowOutcome .TRUE )))
79+
7480 condition.getOperator match {
7581 case GREATER_THAN =>
7682 val resultCheck = if (isWhereClausePresent(rule)) {
7783 check
78- .hasMin(targetColumn, _ > numericOperands.head).where(rule.getWhereClause)
79- .isComplete(targetColumn).where(rule.getWhereClause)
84+ .hasMin(targetColumn, _ > numericOperands.head, analyzerOptions = opts ).where(rule.getWhereClause)
85+ .isComplete(targetColumn, None , opts ).where(rule.getWhereClause)
8086 } else {
8187 check
8288 .hasMin(targetColumn, _ > numericOperands.head)
@@ -87,8 +93,8 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
8793 case GREATER_THAN_EQUAL_TO =>
8894 val resultCheck = if (isWhereClausePresent(rule)) {
8995 check
90- .hasMin(targetColumn, _ >= numericOperands.head).where(rule.getWhereClause)
91- .isComplete(targetColumn).where(rule.getWhereClause)
96+ .hasMin(targetColumn, _ >= numericOperands.head, analyzerOptions = opts ).where(rule.getWhereClause)
97+ .isComplete(targetColumn, None , opts ).where(rule.getWhereClause)
9298 } else {
9399 check
94100 .hasMin(targetColumn, _ >= numericOperands.head)
@@ -99,8 +105,8 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
99105 case LESS_THAN =>
100106 val resultCheck = if (isWhereClausePresent(rule)) {
101107 check
102- .hasMax(targetColumn, _ < numericOperands.head).where(rule.getWhereClause)
103- .isComplete(targetColumn).where(rule.getWhereClause)
108+ .hasMax(targetColumn, _ < numericOperands.head, analyzerOptions = opts ).where(rule.getWhereClause)
109+ .isComplete(targetColumn, None , opts ).where(rule.getWhereClause)
104110 } else {
105111 check
106112 .hasMax(targetColumn, _ < numericOperands.head)
@@ -111,8 +117,8 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
111117 case LESS_THAN_EQUAL_TO =>
112118 val resultCheck = if (isWhereClausePresent(rule)) {
113119 check
114- .hasMax(targetColumn, _ <= numericOperands.head).where(rule.getWhereClause)
115- .isComplete(targetColumn).where(rule.getWhereClause)
120+ .hasMax(targetColumn, _ <= numericOperands.head, analyzerOptions = opts ).where(rule.getWhereClause)
121+ .isComplete(targetColumn, None , opts ).where(rule.getWhereClause)
116122 } else {
117123 check
118124 .hasMax(targetColumn, _ <= numericOperands.head)
@@ -127,7 +133,7 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
127133 val resultCheck = if (isWhereClausePresent(rule)) {
128134 check.isContainedIn(targetColumn, numericOperands.head, numericOperands.last,
129135 includeLowerBound = false , includeUpperBound = false ).where(rule.getWhereClause)
130- .isComplete(targetColumn).where(rule.getWhereClause)
136+ .isComplete(targetColumn, None , opts ).where(rule.getWhereClause)
131137 } else {
132138 check.isContainedIn(targetColumn, numericOperands.head, numericOperands.last,
133139 includeLowerBound = false , includeUpperBound = false )
@@ -142,7 +148,8 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
142148 val sql = s " $transformedCol IS NOT NULL AND " +
143149 s " ( $transformedCol <= ${numericOperands.head} OR $transformedCol >= ${numericOperands.last}) "
144150 Right ((addWhereClause(rule, check.satisfies(sql, check.description, _ == 1.0 ,
145- columns = List (transformedCol))), complianceMetric(targetColumn, check.description, rule)))
151+ columns = List (transformedCol), analyzerOptions = opts)),
152+ complianceMetric(targetColumn, check.description, rule)))
146153
147154 case IN =>
148155 val nums = numericOperands.mkString(" , " )
@@ -153,7 +160,8 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
153160 case _ => " FALSE"
154161 }
155162 Right ((addWhereClause(rule, check.satisfies(sql, check.description, _ == 1.0 ,
156- columns = List (transformedCol))), complianceMetric(targetColumn, check.description, rule)))
163+ columns = List (transformedCol), analyzerOptions = opts)),
164+ complianceMetric(targetColumn, check.description, rule)))
157165
158166 case NOT_IN =>
159167 val nums = numericOperands.mkString(" , " )
@@ -164,23 +172,28 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
164172 case _ => " TRUE"
165173 }
166174 Right ((addWhereClause(rule, check.satisfies(sql, check.description, _ == 1.0 ,
167- columns = List (transformedCol))), complianceMetric(targetColumn, check.description, rule)))
175+ columns = List (transformedCol), analyzerOptions = opts)),
176+ complianceMetric(targetColumn, check.description, rule)))
168177
169178 case EQUALS =>
170179 if (hasNullOperand) {
171180 val sql = s " $transformedCol IS NULL "
172181 Right ((addWhereClause(rule, check.satisfies(sql, check.description, _ == 1.0 ,
173- columns = List (transformedCol))), complianceMetric(targetColumn, check.description, rule)))
182+ columns = List (transformedCol), analyzerOptions = opts)),
183+ complianceMetric(targetColumn, check.description, rule)))
174184 } else {
175185 val resultCheck = if (isWhereClausePresent(rule)) {
176186 check
177- .hasMin(targetColumn, _ == numericOperands.head).where(rule.getWhereClause)
178- .hasMax(targetColumn, _ == numericOperands.head).where(rule.getWhereClause)
179- .isComplete(targetColumn).where(rule.getWhereClause)
187+ .hasMin(targetColumn, _ == numericOperands.head,
188+ analyzerOptions = nullFailOpts).where(rule.getWhereClause)
189+ .hasMax(targetColumn, _ == numericOperands.head,
190+ analyzerOptions = nullFailOpts).where(rule.getWhereClause)
191+ .isComplete(targetColumn, analyzerOptions = opts)
192+ .where(rule.getWhereClause)
180193 } else {
181194 check
182- .hasMin(targetColumn, _ == numericOperands.head)
183- .hasMax(targetColumn, _ == numericOperands.head)
195+ .hasMin(targetColumn, _ == numericOperands.head, analyzerOptions = nullFailOpts )
196+ .hasMax(targetColumn, _ == numericOperands.head, analyzerOptions = nullFailOpts )
184197 .isComplete(targetColumn)
185198 }
186199 Right ((resultCheck, minMetric(targetColumn, rule) ++ maxMetric(targetColumn, rule)))
@@ -190,11 +203,13 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
190203 if (hasNullOperand) {
191204 val sql = s " $transformedCol IS NOT NULL "
192205 Right ((addWhereClause(rule, check.satisfies(sql, check.description, _ == 1.0 ,
193- columns = List (transformedCol))), complianceMetric(targetColumn, check.description, rule)))
206+ columns = List (transformedCol), analyzerOptions = opts)),
207+ complianceMetric(targetColumn, check.description, rule)))
194208 } else {
195209 val sql = s " $transformedCol IS NULL OR $transformedCol != ${numericOperands.head}"
196210 Right ((addWhereClause(rule, check.satisfies(sql, check.description, _ == 1.0 ,
197- columns = List (transformedCol))), complianceMetric(targetColumn, check.description, rule)))
211+ columns = List (transformedCol), analyzerOptions = opts)),
212+ complianceMetric(targetColumn, check.description, rule)))
198213 }
199214
200215 case _ =>
@@ -209,24 +224,28 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
209224 case StringBasedConditionOperator .MATCHES =>
210225 val pattern = extractPattern(condition)
211226 val fullRegex = s " ^ ${pattern}$$ " .r
212- Right ((addWhereClause(rule, check.hasPattern(targetColumn, fullRegex)),
227+ Right ((addWhereClause(rule, check.hasPattern(targetColumn, fullRegex,
228+ analyzerOptions = analyzerOptionsForWhereClause(rule))),
213229 Seq (DeequMetricMapping (" Column" , targetColumn, " PatternMatch" , " PatternMatch" , None , rule = rule))))
214230
215231 case StringBasedConditionOperator .NOT_MATCHES =>
216232 val pattern = extractPattern(condition)
217233 val fullRegex = s " ^(?! \\ b ${pattern}\\ b).* $$ " .r
218- Right ((addWhereClause(rule, check.hasPattern(targetColumn, fullRegex)),
234+ Right ((addWhereClause(rule, check.hasPattern(targetColumn, fullRegex,
235+ analyzerOptions = analyzerOptionsForWhereClause(rule))),
219236 Seq (DeequMetricMapping (" Column" , targetColumn, " PatternMatch" , " PatternMatch" , None , rule = rule))))
220237
221238 case StringBasedConditionOperator .IN | StringBasedConditionOperator .EQUALS =>
222239 val sql = constructComplianceCondition(transformedCol, condition, isNegated = false )
223240 Right ((addWhereClause(rule, check.satisfies(sql, check.description, _ == 1.0 ,
224- columns = List (transformedCol))), complianceMetric(targetColumn, check.description, rule)))
241+ columns = List (transformedCol), analyzerOptions = analyzerOptionsForWhereClause(rule))),
242+ complianceMetric(targetColumn, check.description, rule)))
225243
226244 case StringBasedConditionOperator .NOT_IN | StringBasedConditionOperator .NOT_EQUALS =>
227245 val sql = constructComplianceCondition(transformedCol, condition, isNegated = true )
228246 Right ((addWhereClause(rule, check.satisfies(sql, check.description, _ == 1.0 ,
229- columns = List (transformedCol))), complianceMetric(targetColumn, check.description, rule)))
247+ columns = List (transformedCol), analyzerOptions = analyzerOptionsForWhereClause(rule))),
248+ complianceMetric(targetColumn, check.description, rule)))
230249
231250 case _ =>
232251 Left (s " Unsupported operator for ColumnValues string condition: ${condition.getOperator}" )
@@ -247,9 +266,9 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
247266
248267 if (isNegated) {
249268 if (hasNull) conditions += s " $targetColumn IS NOT NULL "
250- if (hasEmpty) conditions += s " $targetColumn != '' "
269+ if (hasEmpty) conditions += s " ( $targetColumn IS NULL OR $targetColumn != '') "
251270 if (hasWhitespacesOnly) {
252- conditions += s " (LENGTH(TRIM( $targetColumn)) > 0 OR LENGTH( $targetColumn) = 0) "
271+ conditions += s " ( $targetColumn IS NULL OR LENGTH(TRIM( $targetColumn)) > 0 OR LENGTH( $targetColumn) = 0) "
253272 }
254273 if (quotedStrings.nonEmpty) {
255274 val valueList = quotedStrings.map(s => s " ' ${s.replace(" '" , " ''" )}' " ).mkString(" , " )
@@ -258,9 +277,9 @@ case class ColumnValuesRule() extends DQDLRuleConverter {
258277 if (conditions.isEmpty) " TRUE" else conditions.mkString(" AND " )
259278 } else {
260279 if (hasNull) conditions += s " $targetColumn IS NULL "
261- if (hasEmpty) conditions += s " $targetColumn = '' "
280+ if (hasEmpty) conditions += s " ( $targetColumn IS NOT NULL AND $targetColumn = '') "
262281 if (hasWhitespacesOnly) {
263- conditions += s " (LENGTH(TRIM( $targetColumn)) = 0 AND LENGTH( $targetColumn) > 0) "
282+ conditions += s " ( $targetColumn IS NOT NULL AND LENGTH(TRIM( $targetColumn)) = 0 AND LENGTH( $targetColumn) > 0) "
264283 }
265284 if (quotedStrings.nonEmpty) {
266285 val valueList = quotedStrings.map(s => s " ' ${s.replace(" '" , " ''" )}' " ).mkString(" , " )
0 commit comments