@@ -177,13 +177,21 @@ trait SanityCheckerParams extends DerivedFeatureFilterParams {
177177 def setMinRequiredRuleSupport (value : Double ): this .type = set(minRequiredRuleSupport, value)
178178 def getMinRequiredRuleSupport : Double = $(minRequiredRuleSupport)
179179
180- final val featureLabelCorrOnly = new BooleanParam (
181- parent = this , name = " featureLabelCorrOnly" ,
182- doc = " If true, then only calculate the correlations between the features and the label. Otherwise, calculate " +
183- " the entire correlation matrix, which includes all feature-feature correlations."
180+ final val featureFeatureCorrLevel = new Param [String ](
181+ parent = this , name = " featureFeatureCorrOnly" ,
182+ doc = " This setting determines feature-feature correlation computations. Levels are: Off, Computed, Stored"
184183 )
185- def setFeatureLabelCorrOnly (value : Boolean ): this .type = set(featureLabelCorrOnly, value)
186- def getFeatureLabelCorrOnly : Boolean = $(featureLabelCorrOnly)
184+ def setFeatureFeatureCorrLevel (value : CorrelationLevel ): this .type = set(featureFeatureCorrLevel, value.entryName)
185+ def getFeatureFeatureCorrLevel : CorrelationLevel = CorrelationLevel .withName($(featureFeatureCorrLevel))
186+
187+ @ deprecated(" this setting is overridden by featureFeatureCorrLevel" , " 0.7.0" )
188+ def setFeatureLabelCorrOnly (value : Boolean ): this .type = {
189+ if (value) set(featureFeatureCorrLevel, CorrelationLevel .Off .entryName)
190+ else set(featureFeatureCorrLevel, CorrelationLevel .Computed .entryName)
191+ }
192+
193+ @ deprecated(" this setting is overridden by featureFeatureCorrLevel" , " 0.7.0" )
194+ def getFeatureLabelCorrOnly : Boolean = $(featureFeatureCorrLevel) == CorrelationLevel .Off .entryName
187195
188196 final val correlationExclusion : Param [String ] = new Param [String ](this , " correlationExclusion" ,
189197 " Setting for what categories of feature vector columns to exclude from the correlation calculation" ,
@@ -208,7 +216,7 @@ trait SanityCheckerParams extends DerivedFeatureFilterParams {
208216 correlationType -> SanityChecker .CorrelationTypeDefault .entryName,
209217 maxRuleConfidence -> SanityChecker .MaxRuleConfidence ,
210218 minRequiredRuleSupport -> SanityChecker .MinRequiredRuleSupport ,
211- featureLabelCorrOnly -> SanityChecker .FeatureLabelCorrOnly ,
219+ featureFeatureCorrLevel -> SanityChecker .FeatureFeatureCorrLevel .entryName ,
212220 correlationExclusion -> SanityChecker .CorrelationExclusionDefault .entryName
213221 )
214222}
@@ -453,7 +461,7 @@ class SanityChecker(uid: String = UID[SanityChecker])
453461 else ((0 until featureSize + 1 ).toArray, vectorRows)
454462 val numCorrIndices = corrIndices.length
455463
456- val (corrMatrix, corrsWithLabel) = if ($(featureLabelCorrOnly) ) {
464+ val (corrMatrix, corrsWithLabel) = if ($(featureFeatureCorrLevel) == CorrelationLevel . Off .entryName ) {
457465 None -> OpStatistics .computeCorrelationsWithLabel(vectorRowsForCorr, colStats, count)
458466 }
459467 else {
@@ -513,7 +521,8 @@ class SanityChecker(uid: String = UID[SanityChecker])
513521 colStats = colStats,
514522 names = featureNames :+ in1.name,
515523 correlationType = CorrelationType .withNameInsensitive(corrType),
516- sample = sampleFraction
524+ sample = sampleFraction,
525+ keepFeatureFeature = getFeatureFeatureCorrLevel
517526 )
518527 setMetadata(outputMeta.toMetadata.withSummaryMetadata(summary.toMetadata()))
519528
@@ -565,7 +574,7 @@ object SanityChecker {
565574 // These settings will make the maxRuleConfidence check off by default
566575 val MaxRuleConfidence = 1.0
567576 val MinRequiredRuleSupport = 1.0
568- val FeatureLabelCorrOnly = false
577+ val FeatureFeatureCorrLevel = CorrelationLevel . Computed
569578 val CorrelationExclusionDefault = CorrelationExclusion .NoExclusion
570579
571580 def SampleSeed : Long = util.Random .nextLong() // scalastyle:off method.name
@@ -620,3 +629,28 @@ object CorrelationExclusion extends Enum[CorrelationExclusion] {
620629 */
621630 case object HashedText extends CorrelationExclusion
622631}
632+
633+
634+ /**
635+ * Settings for feature - feature correlations
636+ */
637+ sealed trait CorrelationLevel extends EnumEntry with Serializable
638+
639+ object CorrelationLevel extends Enum [CorrelationLevel ] {
640+ val values : Seq [CorrelationLevel ] = findValues
641+
642+ /**
643+ * Feature-feature correlations are off
644+ */
645+ case object Off extends CorrelationLevel
646+
647+ /**
648+ * Feature-feature correlations computed for feature exclusion
649+ */
650+ case object Computed extends CorrelationLevel
651+
652+ /**
653+ * Feature-feature correlations stored in metadata
654+ */
655+ case object Stored extends CorrelationLevel
656+ }
0 commit comments