Skip to content

Commit 903f544

Browse files
committed
Ensure that sampleFractions are in the interval [0, 1]
1 parent 10aa0c9 commit 903f544

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

src/main/scala/ldbc/snb/datagen/factors/FactorGenerationStage.scala

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -375,7 +375,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
375375
// sigmoid to select more samples for smaller scale factors
376376
val sampleSize = Math.min(count, Math.max(minSampleSize, count / (1 + Math.exp(count * curveFactor)) * 2))
377377

378-
val sampleFraction = sampleSize / count
378+
val sampleFraction = Math.min(sampleSize / count, 1.0)
379379

380380
log.info(s"Factor people4Hops: using ${sampleSize} samples (${sampleFraction * 100}%)")
381381

@@ -402,7 +402,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
402402
$"Person2.deletionDate".as("Person2DeletionDate")
403403
)
404404

405-
val sampleFractionPersonPairs = 10000.0 / personPairs.count()
405+
val sampleFractionPersonPairs = Math.min(10000.0 / personPairs.count(), 1.0)
406406
personPairs.sample(sampleFractionPersonPairs, 42)
407407
},
408408
"people2Hops" -> Factor(PersonType, PlaceType, PersonKnowsPersonType) { case Seq(person, place, knows) =>
@@ -423,7 +423,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
423423
// sigmoid to select more samples for smaller scale factors
424424
val sampleSize = Math.min(count, Math.max(minSampleSize, count / (1 + Math.exp(count * curveFactor)) * 2))
425425

426-
val sampleFraction = sampleSize / count
426+
val sampleFraction = Math.min(sampleSize / count, 1.0)
427427

428428
log.info(s"Factor people4Hops: using ${sampleSize} samples (${sampleFraction * 100}%)")
429429

@@ -450,7 +450,7 @@ object FactorGenerationStage extends DatagenStage with Logging {
450450
$"Person2.deletionDate".as("Person2DeletionDate")
451451
)
452452

453-
val sampleFractionPersonPairs = 10000.0 / personPairs.count()
453+
val sampleFractionPersonPairs = Math.min(10000.0 / personPairs.count(), 1.0)
454454
personPairs.sample(sampleFractionPersonPairs, 42)
455455
},
456456
"sameUniversityKnows" -> LargeFactor(PersonKnowsPersonType, PersonStudyAtUniversityType) { case Seq(personKnowsPerson, studyAt) =>

0 commit comments

Comments
 (0)