Skip to content

Commit a1bdd3f

Browse files
committed
tools
1 parent dabf355 commit a1bdd3f

File tree

1 file changed

+13
-13
lines changed

1 file changed

+13
-13
lines changed

tools/emr/submit_datagen_job.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@
2424
'use_spot': True,
2525
'master_instance_type': 'r6gd.2xlarge',
2626
'instance_type': 'r6gd.4xlarge',
27-
'executors_per_sf': 1e-3,
28-
'partitions_per_sf': 1e-1,
29-
'az': 'us-east-2c',
27+
'sf_per_executors': 3e3,
28+
'sf_per_partitions': 1e2,
29+
'az': 'us-west-2c',
3030
'yes': False,
3131
'ec2_key': None,
3232
'emr_release': 'emr-6.6.0'
@@ -68,9 +68,9 @@ def submit_datagen_job(name,
6868
use_spot,
6969
instance_type,
7070
executors,
71-
executors_per_sf,
71+
sf_per_executors,
7272
partitions,
73-
partitions_per_sf,
73+
sf_per_partitions,
7474
master_instance_type,
7575
az,
7676
emr_release,
@@ -106,10 +106,10 @@ def submit_datagen_job(name,
106106
}
107107

108108
if executors is None:
109-
executors = max(min_num_workers, min(max_num_workers, ceil(sf * executors_per_sf)))
109+
executors = max(min_num_workers, min(max_num_workers, ceil(sf / sf_per_executors)))
110110

111111
if partitions is None:
112-
partitions = max(min_num_threads, ceil(sf * partitions_per_sf))
112+
partitions = max(min_num_threads, ceil(sf / sf_per_partitions))
113113

114114
spark_defaults_config = {
115115
'spark.serializer': 'org.apache.spark.serializer.KryoSerializer',
@@ -265,20 +265,20 @@ def submit_datagen_job(name,
265265
type=int,
266266
help=f"Total number of Spark executors."
267267
)
268-
executor_args.add_argument("--executors-per-sf",
268+
executor_args.add_argument("--sf-per-executors",
269269
type=float,
270-
default=defaults['executors_per_sf'],
271-
help=f"Number of Spark executors per scale factor. Default: {defaults['executors_per_sf']}"
270+
default=defaults['sf_per_executors'],
271+
help=f"Number of Spark executors per scale factor. Default: {defaults['sf_per_executors']}"
272272
)
273273
partitioning_args = parser.add_mutually_exclusive_group()
274274
partitioning_args.add_argument("--partitions",
275275
type=int,
276276
help=f"Total number of Spark partitions to use when generating the dataset."
277277
)
278-
partitioning_args.add_argument("--partitions-per-sf",
278+
partitioning_args.add_argument("--sf-per-partitions",
279279
type=float,
280-
default=defaults['partitions_per_sf'],
281-
help=f"Number of Spark partitions per scale factor to use when generating the dataset. Default: {defaults['partitions_per_sf']}"
280+
default=defaults['sf_per_partitions'],
281+
help=f"Number of Spark partitions per scale factor to use when generating the dataset. Default: {defaults['sf_per_partitions']}"
282282
)
283283

284284
parser.add_argument('--', nargs='*', help='Arguments passed to LDBC SNB Datagen', dest="arg")

0 commit comments

Comments
 (0)