From e813aebe7cf61d01e7014ab00cb4dc9b1f20a847 Mon Sep 17 00:00:00 2001 From: xeniape Date: Tue, 15 Oct 2024 10:35:32 +0200 Subject: [PATCH 1/4] ci: Add ny-tlc-report workflow, update READMEs --- .github/workflows/dev_ny-tlc-report.yaml | 89 ++++++++++++++++++++++++ apps/README.md | 4 -- examples/README-examples.md | 6 +- 3 files changed, 92 insertions(+), 7 deletions(-) create mode 100644 .github/workflows/dev_ny-tlc-report.yaml diff --git a/.github/workflows/dev_ny-tlc-report.yaml b/.github/workflows/dev_ny-tlc-report.yaml new file mode 100644 index 00000000..50fb5b11 --- /dev/null +++ b/.github/workflows/dev_ny-tlc-report.yaml @@ -0,0 +1,89 @@ +--- +name: Build and publish ny-tlc-report + +env: + IMAGE_NAME: ny-tlc-report + IMAGE_VERSION: 0.2.0 + REGISTRY_PATH: stackable + DOCKERFILE_PATH: "apps/docker/Dockerfile" + +on: + workflow_dispatch: + push: + branches: + - main + paths: + - apps/docker/Dockerfile + - apps/ny_tlc_report.py + - .github/workflows/dev_ny-tlc-report.yaml + +jobs: + build: + name: Build/Publish ${{ matrix.runner.arch }} Image + permissions: + id-token: write + runs-on: ${{ matrix.runner.name }} + strategy: + matrix: + runner: + - {name: "ubuntu-latest", arch: "amd64"} + - {name: "ubicloud-standard-8-arm", arch: "arm64"} + steps: + - name: Checkout Repository + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Build image + id: build + uses: stackabletech/actions/build-container-image@013e6482fbc0edf2d38cf9220fc931f6a81336fb # v0.0.6 + with: + image-name: ${{ env.IMAGE_NAME }} + image-index-manifest-tag: ${{ env.IMAGE_VERSION }} + container-file: ${{ env.DOCKERFILE_PATH }} + + - name: Publish Container Image on docker.stackable.tech + uses: stackabletech/actions/publish-image@013e6482fbc0edf2d38cf9220fc931f6a81336fb # v0.0.6 + with: + image-registry-uri: docker.stackable.tech + image-registry-username: github + image-registry-password: ${{ secrets.NEXUS_PASSWORD }} + image-repository: stackable/${{ env.IMAGE_NAME }} + image-manifest-tag: ${{ steps.build.outputs.image-manifest-tag }} + source-image-uri: ${{ steps.build.outputs.image-manifest-uri }} + + - name: Publish Container Image on oci.stackable.tech + uses: stackabletech/actions/publish-image@013e6482fbc0edf2d38cf9220fc931f6a81336fb # v0.0.6 + with: + image-registry-uri: oci.stackable.tech + image-registry-username: robot$stackable+github-action-build + image-registry-password: ${{ secrets.HARBOR_ROBOT_STACKABLE_GITHUB_ACTION_BUILD_SECRET }} + image-repository: ${{ env.REGISTRY_PATH }}/${{ env.IMAGE_NAME }} + image-manifest-tag: ${{ steps.build.outputs.image-manifest-tag }} + source-image-uri: ${{ steps.build.outputs.image-manifest-uri }} + + publish_manifests: + name: Build/Publish Manifest + needs: [build] + permissions: + id-token: write + runs-on: ubuntu-latest + steps: + - name: Checkout Repository + uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7 + + - name: Publish and Sign Image Index Manifest to docker.stackable.tech + uses: stackabletech/actions/publish-index-manifest@013e6482fbc0edf2d38cf9220fc931f6a81336fb # v0.0.6 + with: + image-registry-uri: docker.stackable.tech + image-registry-username: github + image-registry-password: ${{ secrets.NEXUS_PASSWORD }} + image-repository: stackable/${{ env.IMAGE_NAME }} + image-index-manifest-tag: ${{ env.IMAGE_VERSION }} + + - name: Publish and Sign Image Index Manifest to oci.stackable.tech + uses: stackabletech/actions/publish-index-manifest@013e6482fbc0edf2d38cf9220fc931f6a81336fb # v0.0.6 + with: + image-registry-uri: oci.stackable.tech + image-registry-username: robot$stackable+github-action-build + image-registry-password: ${{ secrets.HARBOR_ROBOT_STACKABLE_GITHUB_ACTION_BUILD_SECRET }} + image-repository: ${{ env.REGISTRY_PATH }}/${{ env.IMAGE_NAME }} + image-index-manifest-tag: ${{ env.IMAGE_VERSION }} \ No newline at end of file diff --git a/apps/README.md b/apps/README.md index 7f423e87..f6994ba3 100644 --- a/apps/README.md +++ b/apps/README.md @@ -1,8 +1,4 @@ -## Build job image - - docker build -t docker.stackable.tech/stackable/ny-tlc-report:0.2.0 -t docker.stackable.tech/stackable/ny-tlc-report:latest -f apps/docker/Dockerfile apps/ - ## Generate report from the public data set spark-submit --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider --packages org.apache.hadoop:hadoop-aws:3.2.0,com.amazonaws:aws-java-sdk-s3:1.12.180,com.amazonaws:aws-java-sdk-core:1.12.180 ny_tlc_report.py --input 's3a://nyc-tlc/trip data/yellow_tripdata_2021-07.csv' diff --git a/examples/README-examples.md b/examples/README-examples.md index 4466c7ea..7d739f74 100644 --- a/examples/README-examples.md +++ b/examples/README-examples.md @@ -9,11 +9,11 @@ This note outlines a few things that are needed to run these examples on a local Create a new local cluster (e.g. with [Kind](https://kind.sigs.k8s.io/docs/user/quick-start/) and the [stackablectl tool](https://github.com/stackabletech/stackablectl)). This creates a cluster named `stackable-data-platform`. ````text -kind delete clusters --all -stackablectl operator install spark-k8s commons secret -k +kind create cluster --name stackable-data-platform +stackablectl operator install spark-k8s commons secret ```` -Build the `ny-tlc-report` image from the Dockerfile in this repository (apps/docker/Dockerfile) and then load it to the cluster: +Load the `ny-tlc-report` image to the cluster: ````text kind load docker-image docker.stackable.tech/stackable/ny-tlc-report:0.2.0 --name stackable-data-platform From 54f94162ba100ec7170ea9f09d500d02c0a35bd2 Mon Sep 17 00:00:00 2001 From: xeniape Date: Tue, 15 Oct 2024 11:01:56 +0200 Subject: [PATCH 2/4] add new line --- .github/workflows/dev_ny-tlc-report.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/dev_ny-tlc-report.yaml b/.github/workflows/dev_ny-tlc-report.yaml index 50fb5b11..a7d3c851 100644 --- a/.github/workflows/dev_ny-tlc-report.yaml +++ b/.github/workflows/dev_ny-tlc-report.yaml @@ -86,4 +86,4 @@ jobs: image-registry-username: robot$stackable+github-action-build image-registry-password: ${{ secrets.HARBOR_ROBOT_STACKABLE_GITHUB_ACTION_BUILD_SECRET }} image-repository: ${{ env.REGISTRY_PATH }}/${{ env.IMAGE_NAME }} - image-index-manifest-tag: ${{ env.IMAGE_VERSION }} \ No newline at end of file + image-index-manifest-tag: ${{ env.IMAGE_VERSION }} From ec8c5a05ddad99771201db222dec62d761986902 Mon Sep 17 00:00:00 2001 From: xeniape Date: Tue, 15 Oct 2024 11:13:07 +0200 Subject: [PATCH 3/4] adjust headings in README --- apps/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apps/README.md b/apps/README.md index f6994ba3..1b1bf19a 100644 --- a/apps/README.md +++ b/apps/README.md @@ -1,5 +1,5 @@ -## Generate report from the public data set +# Generate report from the public data set spark-submit --conf spark.hadoop.fs.s3a.aws.credentials.provider=org.apache.hadoop.fs.s3a.AnonymousAWSCredentialsProvider --packages org.apache.hadoop:hadoop-aws:3.2.0,com.amazonaws:aws-java-sdk-s3:1.12.180,com.amazonaws:aws-java-sdk-core:1.12.180 ny_tlc_report.py --input 's3a://nyc-tlc/trip data/yellow_tripdata_2021-07.csv' @@ -10,7 +10,7 @@ Notes: * Only one file is used for reporting in this example `yellow_tripdata_2021-07.csv` * This example works with `spark-3.1.3-bin-hadoop3.2` and Python 3.9.7. Other versions may require different dependency versions or even complete different dependencies altogether. -### Links +## Links [0] TLC trip data set https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page [1] AWS Java SDK https://github.com/aws/aws-sdk-java From 0cd45d4069bbd9d5c98e14f333cbc6597891cc23 Mon Sep 17 00:00:00 2001 From: xeniape Date: Tue, 15 Oct 2024 11:15:56 +0200 Subject: [PATCH 4/4] linter: modify links --- apps/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/README.md b/apps/README.md index 1b1bf19a..29e1167b 100644 --- a/apps/README.md +++ b/apps/README.md @@ -12,6 +12,6 @@ Notes: ## Links -[0] TLC trip data set https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page -[1] AWS Java SDK https://github.com/aws/aws-sdk-java -[2] Hadoop AWS module https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html +[0] [TLC trip data set](https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page) +[1] [AWS Java SDK](https://github.com/aws/aws-sdk-java) +[2] [Hadoop AWS module](https://hadoop.apache.org/docs/current/hadoop-aws/tools/hadoop-aws/index.html)