Skip to content

Commit e7f28fc

Browse files
committed
unify build
1 parent bc0dd4d commit e7f28fc

File tree

8 files changed

+37
-36
lines changed

8 files changed

+37
-36
lines changed

.dockerignore

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,10 @@
44
*.swp
55
*.crc
66
*.log
7-
87
**/*.iml
98
.idea/*
109
**/.gitignore
1110
.git/*
1211
.circleci
13-
12+
**/.bloop
1413
Dockerfile

Dockerfile

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
FROM eclipse-temurin:8 as build-jar
2-
ARG MAVEN_VERSION=3.8.6
3-
COPY pom.xml /build/pom.xml
2+
ARG SBT_VERSION=1.5.2
3+
RUN cd /opt && curl -fSsL https://github.com/sbt/sbt/releases/download/v${SBT_VERSION}/sbt-${SBT_VERSION}.tgz | tar xvz
4+
ENV PATH=/opt/sbt/bin:$PATH
45
WORKDIR build
5-
RUN cd /opt && curl https://dlcdn.apache.org/maven/maven-3/${MAVEN_VERSION}/binaries/apache-maven-${MAVEN_VERSION}-bin.tar.gz | tar xvz
6-
ENV PATH=/opt/apache-maven-${MAVEN_VERSION}/bin:$PATH
7-
RUN mvn install
8-
COPY src /build/src
9-
RUN mvn assembly:assembly -DskipTests
6+
COPY build.sbt build.sbt
7+
COPY project project
8+
RUN sbt update
9+
COPY src src
10+
RUN sbt assembly
1011

1112
FROM scratch as jar
1213
COPY --from=build-jar /build/target/ldbc_snb_datagen_*-jar-with-dependencies.jar /jar

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ Both Java 8 and Java 11 are supported.
6868
Once you have Spark in place and built the JAR file, run the generator as follows:
6969

7070
```bash
71-
export PLATFORM_VERSION=spark3.2_2.12
71+
export PLATFORM_VERSION=$(sbt -batch -error 'print platformVersion')
7272
export DATAGEN_VERSION=$(sbt -batch -error 'print version')
7373
export LDBC_SNB_DATAGEN_JAR=$(sbt -batch -error 'print assembly / assemblyOutputPath')
7474
./tools/run.py <runtime configuration arguments> -- <generator configuration arguments>

build.sbt

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,11 @@ ThisBuild / Test / fork := true
1414

1515
val sparkVersion = settingKey[String]("The version of Spark used for building.")
1616
val sparkCompatVersion = taskKey[String]("The compatibility version of Spark")
17+
val platformVersion = taskKey[String]("The version of the target platform")
1718

1819
sparkVersion := "3.2.1"
1920
sparkCompatVersion := { sparkVersion.value.split("\\.", 3).take(2).mkString(".") }
21+
platformVersion := { scalaBinaryVersion.value + "_spark" + sparkCompatVersion.value }
2022

2123
resolvers += "TUDelft Repository" at "https://simulation.tudelft.nl/maven/"
2224

@@ -27,7 +29,7 @@ libraryDependencies ++= Seq(
2729
"com.github.scopt" %% "scopt" % "3.7.1",
2830
"org.javatuples" % "javatuples" % "1.2",
2931
"ca.umontreal.iro" % "ssj" % "2.5",
30-
"xerces" % "xercesImpl" % "2.12.0" % Runtime,
32+
"xml-apis" % "xml-apis" % "1.4.01",
3133
"org.specs2" %% "specs2-core" % "4.2.0" % Test,
3234
"org.specs2" %% "specs2-junit" % "4.2.0" % Test,
3335
"org.mockito" % "mockito-core" % "3.3.3" % Test,
@@ -99,7 +101,10 @@ assembly / assemblyMergeStrategy := {
99101
case _ => MergeStrategy.first
100102
}
101103

104+
// Override JAR name
102105
assembly / assemblyJarName := {
103-
moduleName.value + "-spark" + sparkCompatVersion.value + "_" +
104-
scalaBinaryVersion.value + "-" + version.value + ".assembly.jar"
105-
}
106+
moduleName.value + "_" + platformVersion.value + "-" + version.value + "-jar-with-dependencies.jar"
107+
}
108+
109+
// Put under target instead of target/<scala-binary-version>
110+
assembly / target := { target.value }

tools/build.sh

Lines changed: 0 additions & 3 deletions
This file was deleted.

tools/datagen/lib.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1 @@
1-
platform_version = "2.12_spark3.2"
2-
version = "0.5.0-SNAPSHOT"
31
main_class = 'ldbc.snb.datagen.LdbcDatagen'

tools/emr/README.md

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,14 @@ Install the required libraries as described in the [main README](../../README.md
2828

2929
## Submitting a job
3030

31-
1. Upload the JAR to S3. (We don't version the JARs yet, so you can only make sure that you run the intended code this way :( )
31+
1. Upload the JAR to S3. (We don't version the JARs yet, so you can only make sure that you run the intended code this way :( )
3232

3333
```bash
34-
PLATFORM_VERSION=2.12_spark3.1
35-
DATAGEN_VERSION=0.5.0-SNAPSHOT
36-
aws s3 cp target/ldbc_snb_datagen_${PLATFORM_VERSION}-${DATAGEN_VERSION}-jar-with-dependencies.jar s3://${BUCKET_NAME}/jars/ldbc_snb_datagen_${PLATFORM_VERSION}-${DATAGEN_VERSION}-jar-with-dependencies.jar
34+
export PLATFORM_VERSION=$(sbt -batch -error 'print platformVersion')
35+
export DATAGEN_VERSION=$(sbt -batch -error 'print version')
36+
export LDBC_SNB_DATAGEN_JAR=$(sbt -batch -error 'print assembly / assemblyOutputPath')
37+
export JAR_NAME=$(basename ${LDBC_SNB_DATAGEN_JAR})
38+
aws s3 cp ${LDBC_SNB_DATAGEN_JAR} s3://${BUCKET_NAME}/jars/$JAR_NAME
3739
```
3840

3941
1. Submit the job. Run with `--help` for customization options.
@@ -43,6 +45,7 @@ JOB_NAME=MyTest
4345
SCALE_FACTOR=10
4446
./tools/emr/submit_datagen_job.py \
4547
--bucket ${BUCKET_NAME} \
48+
--jar ${JAR_NAME} \
4649
${JOB_NAME} \
4750
${SCALE_FACTOR} \
4851
csv \
@@ -59,6 +62,7 @@ To use spot instances, add the `--use-spot` argument:
5962
./tools/emr/submit_datagen_job.py \
6063
--use-spot \
6164
--bucket ${BUCKET_NAME} \
65+
--jar ${JAR_NAME} \
6266
${JOB_NAME} \
6367
${SCALE_FACTOR} \
6468
csv \
@@ -78,6 +82,7 @@ Generate the BI data set with the following configuration:
7882
./tools/emr/submit_datagen_job.py \
7983
--use-spot \
8084
--bucket ${BUCKET_NAME} \
85+
--jar ${JAR_NAME} \
8186
--az us-east-2c \
8287
--copy-all \
8388
${JOB_NAME} \
@@ -99,7 +104,7 @@ Make sure that you have uploaded the right JAR first.
99104
PLATFORM_VERSION=2.12_spark3.1
100105
./tools/emr/submit_datagen_job.py \
101106
--bucket ${BUCKET_NAME} \
102-
--platform-version ${PLATFORM_VERSION} \
107+
--jar ${JAR_NAME} \
103108
--emr-release emr-6.2.0 \
104109
${JOB_NAME} \
105110
${SCALE_FACTOR} \

tools/emr/submit_datagen_job.py

Lines changed: 8 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,9 @@
99
import __main__
1010

1111
from math import ceil
12+
from botocore.credentials import subprocess
1213
from datagen import lib, util
14+
import subprocess
1315

1416
import argparse
1517

@@ -24,8 +26,6 @@
2426
'master_instance_type': 'r6gd.2xlarge',
2527
'instance_type': 'r6gd.4xlarge',
2628
'sf_ratio': 100.0, # ratio of SFs and machines. a ratio of 250.0 for SF1000 yields 4 machines
27-
'platform_version': lib.platform_version,
28-
'version': lib.version,
2929
'az': 'us-west-2c',
3030
'yes': False,
3131
'ec2_key': None,
@@ -73,22 +73,21 @@ def submit_datagen_job(name,
7373
format,
7474
mode,
7575
bucket,
76+
jar,
7677
use_spot,
7778
instance_type,
7879
sf_ratio,
7980
master_instance_type,
8081
az,
8182
emr_release,
82-
platform_version,
83-
version,
8483
yes,
8584
ec2_key,
8685
conf,
8786
copy_filter,
8887
copy_all,
8988
passthrough_args, **kwargs
9089
):
91-
90+
9291
is_interactive = (not yes) and hasattr(__main__, '__file__')
9392

9493
build_dir = '/ldbc_snb_datagen/build'
@@ -107,7 +106,7 @@ def submit_datagen_job(name,
107106
ts = datetime.utcnow()
108107
ts_formatted = ts.strftime('%Y%m%d_%H%M%S')
109108

110-
jar_url = f's3://{bucket}/jars/ldbc_snb_datagen_{platform_version}-{version}-jar-with-dependencies.jar'
109+
jar_url = f's3://{bucket}/jars/{jar}'
111110

112111
results_url = f's3://{bucket}/results/{name}'
113112
run_url = f'{results_url}/runs/{ts_formatted}'
@@ -241,12 +240,9 @@ def submit_datagen_job(name,
241240
parser.add_argument('--ec2-key',
242241
default=defaults['ec2_key'],
243242
help='EC2 key name for SSH connection')
244-
parser.add_argument('--platform-version',
245-
default=defaults['platform_version'],
246-
help='The spark platform the JAR is compiled for formatted like {scala.compat.version}_spark{spark.compat.version}, e.g. 2.12_spark3.2')
247-
parser.add_argument('--version',
248-
default=defaults['version'],
249-
help='LDBC SNB Datagen library version')
243+
parser.add_argument('--jar',
244+
required=True,
245+
help='LDBC SNB Datagen library JAR name')
250246
parser.add_argument('--emr-release',
251247
default=defaults['emr_release'],
252248
help='The EMR release to use. E.g. emr-6.6.0')

0 commit comments

Comments
 (0)