@@ -375,7 +375,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
375
375
// sigmoid to select more samples for smaller scale factors
376
376
val sampleSize = Math .min(count, Math .max(minSampleSize, count / (1 + Math .exp(count * curveFactor)) * 2 ))
377
377
378
- val sampleFraction = sampleSize / count
378
+ val sampleFraction = Math .min( sampleSize / count, 1.0 )
379
379
380
380
log.info(s " Factor people4Hops: using ${sampleSize} samples ( ${sampleFraction * 100 }%) " )
381
381
@@ -402,7 +402,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
402
402
$" Person2.deletionDate" .as(" Person2DeletionDate" )
403
403
)
404
404
405
- val sampleFractionPersonPairs = 10000.0 / personPairs.count()
405
+ val sampleFractionPersonPairs = Math .min( 10000.0 / personPairs.count(), 1.0 )
406
406
personPairs.sample(sampleFractionPersonPairs, 42 )
407
407
},
408
408
" people2Hops" -> Factor (PersonType , PlaceType , PersonKnowsPersonType ) { case Seq (person, place, knows) =>
@@ -423,7 +423,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
423
423
// sigmoid to select more samples for smaller scale factors
424
424
val sampleSize = Math .min(count, Math .max(minSampleSize, count / (1 + Math .exp(count * curveFactor)) * 2 ))
425
425
426
- val sampleFraction = sampleSize / count
426
+ val sampleFraction = Math .min( sampleSize / count, 1.0 )
427
427
428
428
log.info(s " Factor people4Hops: using ${sampleSize} samples ( ${sampleFraction * 100 }%) " )
429
429
@@ -450,7 +450,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
450
450
$" Person2.deletionDate" .as(" Person2DeletionDate" )
451
451
)
452
452
453
- val sampleFractionPersonPairs = 10000.0 / personPairs.count()
453
+ val sampleFractionPersonPairs = Math .min( 10000.0 / personPairs.count(), 1.0 )
454
454
personPairs.sample(sampleFractionPersonPairs, 42 )
455
455
},
456
456
" sameUniversityKnows" -> LargeFactor (PersonKnowsPersonType , PersonStudyAtUniversityType ) { case Seq (personKnowsPerson, studyAt) =>
0 commit comments