|
24 | 24 | 'use_spot': True,
|
25 | 25 | 'master_instance_type': 'r6gd.2xlarge',
|
26 | 26 | 'instance_type': 'r6gd.4xlarge',
|
27 |
| - 'executors_per_sf': 1e-3, |
28 |
| - 'partitions_per_sf': 1e-1, |
29 |
| - 'az': 'us-east-2c', |
| 27 | + 'sf_per_executors': 3e3, |
| 28 | + 'sf_per_partitions': 1e2, |
| 29 | + 'az': 'us-west-2c', |
30 | 30 | 'yes': False,
|
31 | 31 | 'ec2_key': None,
|
32 | 32 | 'emr_release': 'emr-6.6.0'
|
@@ -68,9 +68,9 @@ def submit_datagen_job(name,
|
68 | 68 | use_spot,
|
69 | 69 | instance_type,
|
70 | 70 | executors,
|
71 |
| - executors_per_sf, |
| 71 | + sf_per_executors, |
72 | 72 | partitions,
|
73 |
| - partitions_per_sf, |
| 73 | + sf_per_partitions, |
74 | 74 | master_instance_type,
|
75 | 75 | az,
|
76 | 76 | emr_release,
|
@@ -106,10 +106,10 @@ def submit_datagen_job(name,
|
106 | 106 | }
|
107 | 107 |
|
108 | 108 | if executors is None:
|
109 |
| - executors = max(min_num_workers, min(max_num_workers, ceil(sf * executors_per_sf))) |
| 109 | + executors = max(min_num_workers, min(max_num_workers, ceil(sf / sf_per_executors))) |
110 | 110 |
|
111 | 111 | if partitions is None:
|
112 |
| - partitions = max(min_num_threads, ceil(sf * partitions_per_sf)) |
| 112 | + partitions = max(min_num_threads, ceil(sf / sf_per_partitions)) |
113 | 113 |
|
114 | 114 | spark_defaults_config = {
|
115 | 115 | 'spark.serializer': 'org.apache.spark.serializer.KryoSerializer',
|
@@ -265,20 +265,20 @@ def submit_datagen_job(name,
|
265 | 265 | type=int,
|
266 | 266 | help=f"Total number of Spark executors."
|
267 | 267 | )
|
268 |
| - executor_args.add_argument("--executors-per-sf", |
| 268 | + executor_args.add_argument("--sf-per-executors", |
269 | 269 | type=float,
|
270 |
| - default=defaults['executors_per_sf'], |
271 |
| - help=f"Number of Spark executors per scale factor. Default: {defaults['executors_per_sf']}" |
| 270 | + default=defaults['sf_per_executors'], |
| 271 | + help=f"Number of Spark executors per scale factor. Default: {defaults['sf_per_executors']}" |
272 | 272 | )
|
273 | 273 | partitioning_args = parser.add_mutually_exclusive_group()
|
274 | 274 | partitioning_args.add_argument("--partitions",
|
275 | 275 | type=int,
|
276 | 276 | help=f"Total number of Spark partitions to use when generating the dataset."
|
277 | 277 | )
|
278 |
| - partitioning_args.add_argument("--partitions-per-sf", |
| 278 | + partitioning_args.add_argument("--sf-per-partitions", |
279 | 279 | type=float,
|
280 |
| - default=defaults['partitions_per_sf'], |
281 |
| - help=f"Number of Spark partitions per scale factor to use when generating the dataset. Default: {defaults['partitions_per_sf']}" |
| 280 | + default=defaults['sf_per_partitions'], |
| 281 | + help=f"Number of Spark partitions per scale factor to use when generating the dataset. Default: {defaults['sf_per_partitions']}" |
282 | 282 | )
|
283 | 283 |
|
284 | 284 | parser.add_argument('--', nargs='*', help='Arguments passed to LDBC SNB Datagen', dest="arg")
|
|
0 commit comments