Skip to content

Commit 60d9d3e

Browse files
aglinxinyuanMA77HEW820
authored andcommitted
Fix Split Operator requires a seed (#3235)
Fix the bug where the Split Operator incorrectly requires a seed, even when it's optional. In this PR, we introduce a "Random Shuffle" checkbox to simplify the user experience and prevent confusion regarding the seed value. For advanced users, the ability to input a seed is still maintained. After the change: | | | |-|-| | ![image](https://github.com/user-attachments/assets/159ccd0f-610a-4431-a462-8bfa6dc53a55) | ![image](https://github.com/user-attachments/assets/3f0d9f7f-8059-4c26-a380-b1b98bfdd567) |
1 parent 661d9a2 commit 60d9d3e

File tree

2 files changed

+26
-7
lines changed

2 files changed

+26
-7
lines changed

core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/split/SplitOpDesc.scala

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,23 +2,42 @@ package edu.uci.ics.amber.operator.split
22

33
import com.fasterxml.jackson.annotation.{JsonProperty, JsonPropertyDescription}
44
import com.google.common.base.Preconditions
5+
import com.kjetland.jackson.jsonSchema.annotations.{
6+
JsonSchemaInject,
7+
JsonSchemaString,
8+
JsonSchemaTitle
9+
}
510
import edu.uci.ics.amber.core.executor.OpExecWithClassName
611
import edu.uci.ics.amber.core.virtualidentity.{ExecutionIdentity, WorkflowIdentity}
712
import edu.uci.ics.amber.core.workflow._
813
import edu.uci.ics.amber.operator.LogicalOp
14+
import edu.uci.ics.amber.operator.metadata.annotations.HideAnnotation
915
import edu.uci.ics.amber.operator.metadata.{OperatorGroupConstants, OperatorInfo}
1016
import edu.uci.ics.amber.util.JSONUtils.objectMapper
1117

12-
import scala.util.Random
1318
class SplitOpDesc extends LogicalOp {
1419

15-
@JsonProperty(value = "split percentage", required = false, defaultValue = "80")
16-
@JsonPropertyDescription("percentage of data going to the upper port (default 80%)")
20+
@JsonSchemaTitle("Split Percentage")
21+
@JsonProperty(defaultValue = "80")
22+
@JsonPropertyDescription("percentage of data going to the upper port")
1723
var k: Int = 80
1824

19-
@JsonProperty(value = "random seed", required = false)
20-
@JsonPropertyDescription("Random seed for split")
21-
var seed: Int = Random.nextInt()
25+
@JsonSchemaTitle("Auto-Generate Seed")
26+
@JsonPropertyDescription("Shuffle the data based on a random seed")
27+
@JsonProperty(defaultValue = "true")
28+
var random: Boolean = true
29+
30+
@JsonSchemaTitle("Seed")
31+
@JsonProperty(defaultValue = "1")
32+
@JsonPropertyDescription("An int for reproducible output across multiple run")
33+
@JsonSchemaInject(
34+
strings = Array(
35+
new JsonSchemaString(path = HideAnnotation.hideTarget, value = "random"),
36+
new JsonSchemaString(path = HideAnnotation.hideType, value = HideAnnotation.Type.equals),
37+
new JsonSchemaString(path = HideAnnotation.hideExpectedValue, value = "true")
38+
)
39+
)
40+
var seed: Int = 1
2241

2342
override def getPhysicalOp(
2443
workflowId: WorkflowIdentity,

core/workflow-operator/src/main/scala/edu/uci/ics/amber/operator/split/SplitOpExec.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ class SplitOpExec(
1111
descString: String
1212
) extends OperatorExecutor {
1313
val desc: SplitOpDesc = objectMapper.readValue(descString, classOf[SplitOpDesc])
14-
lazy val random = new Random(desc.seed)
14+
lazy val random: Random = if (desc.random) new Random() else new Random(desc.seed)
1515

1616
override def processTupleMultiPort(
1717
tuple: Tuple,

0 commit comments

Comments
 (0)