Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions projects/control-service/cicd/.gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,21 @@ control_service_publish_job_builder_image:
changes:
- projects/control-service/projects/job-builder/version.txt

control_service_publish_job_builder_codecommit_image:
extends: .images:dind:docker-push-to-vdk-repos
stage: publish_artifacts
script:
- apk add --no-cache bash
- docker login --username "${VDK_DOCKER_REGISTRY_USERNAME}" --password "${VDK_DOCKER_REGISTRY_PASSWORD}" "${VDK_DOCKER_REGISTRY_URL}"
- cd projects/control-service/projects/job-builder-code-commit
- bash -ex ./publish-vdk-job-builder.sh
retry: !reference [.control_service_retry, retry_options]
rules:
- if: '$CI_PIPELINE_SOURCE == "schedule"'
when: never
- if: '$CI_COMMIT_BRANCH == "main"'
changes:
- projects/control-service/projects/job-builder/version.txt

control_service_publish_job_builder_secure_image:
extends: control_service_publish_job_builder_image
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ spec:
value: "{{ .Values.deploymentEcrAwsServiceAccountSecretAccessKey }}"
- name: DATAJOBS_AWS_ROLE_ARN
value: "{{ .Values.deploymentEcrAwsRoleArn }}"
- name: DATAJOBS_CC_AWS_ASSUME_IAM_ROLE
value: "{{ .Values.deploymentCodeCommitAwsAssumeIamRole}}"
- name: GIT_GRC_URL
value: "{{ .Values.deploymentGitRemoteCodeCommitUrl}}"
- name: DATAJOBS_AWS_DEFAULT_SESSION_DURATION_SECONDS
value: "{{ .Values.deploymentEcrAwsDefaultSessionDurationSeconds }}"
- name: DOCKER_REGISTRY_TYPE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,9 @@ deploymentGitPassword: ""
# Credentials with read and write access to the Git repository.
uploadGitReadWriteUsername: ""
uploadGitReadWritePassword: ""

# Code commit properties
deploymentCodeCommitAwsAssumeIamRole: false
deploymentGitRemoteCodeCommitUrl: ""
# List of file types that are allowed to be uploaded.
# It is comma separated list with file types. For example "image/png,text/plain"
# Only base type can be specified as well, then all files with that base type are allowed.
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# Used to trigger a build for a data job image.

FROM gcr.io/kaniko-project/executor

FROM alpine

COPY --from=0 /kaniko /kaniko


ENV PATH $PATH:/kaniko
ENV SSL_CERT_DIR=/kaniko/ssl/certs
ENV DOCKER_CONFIG /kaniko/.docker/

WORKDIR /workspace

COPY Dockerfile.python.vdk /workspace/Dockerfile
COPY build_image.sh /build_image.sh
RUN chmod +x /build_image.sh


# Setup Python and Git
## Update & Install dependencies
RUN apk add --no-cache --update \
git \
bash

RUN apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/v3.10/main python3=3.7.10-r0 py3-pip \
&& pip3 install awscli \
&& pip3 install git-remote-codecommit \
&& apk --purge -v del py3-pip \
&& rm -rf /var/cache/apk/*

ENTRYPOINT ["/build_image.sh"]
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# https://docs.docker.com/develop/develop-images/dockerfile_best-practices

ARG base_image=python:3.9-slim

FROM $base_image

ARG UID=1000
ARG GID=1000

# Set the working directory
WORKDIR /job

# Create necessary users and set home directory to /job
RUN groupadd -r -g $GID group && useradd -u $UID -g $GID -r user && chown -R $UID:$GID /job
ENV HOME=/job

# Copy the actual job that has to be executed
ARG job_name
COPY --chown=$UID:$GID $job_name $job_name/

# TODO: this would trigger for any change in job even if requirements.txt does not change
# but there's no COPY_IF_EXISTS command in docker to try copy it.
ARG requirements_file=requirements.txt
RUN if [ -f "$job_name/$requirements_file" ]; then pip3 install --no-cache-dir --disable-pip-version-check -q -r "$job_name/$requirements_file" || ( echo ">requirements_failed<" && exit 1 ) ; fi

ARG job_githash
ENV JOB_NAME $job_name
ENV VDK_JOB_GITHASH $job_githash

USER $UID
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
This package provides a way to configure and build your own Data Job images.
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/bin/sh
# Copyright 2023-2024 Broadcom
# SPDX-License-Identifier: Apache-2.0

# Copyright 2021-2023 VMware, Inc.
# SPDX-License-Identifier: Apache-2.0
# TODO: replace those as env variables

aws_access_key_id=$1
aws_secret_access_key=$2
aws_region=$3
docker_registry=$4
git_repository=$7
registry_type=$8
registry_username=$9
registry_password=${10}
aws_session_token=${11}

# Within this property docker config should be included to connect to the registry used to pull the image from.
# it should be prefixed with a comma
# example: ,"ghcr.io/versatile-data-kit-dev/dp/versatiledatakit":{"auth":"dmVyc2F0aWxlLWRhdGEta2l0LWRldjo8bXlUb2tlbj4="}}
extra_auth=${extra_auth:-""}
# Echo selected data to be logged
echo "AWS_REGION=$aws_region"
echo "DOCKER_REGISTRY=$docker_registry"
echo "GIT_REPOSITORY=$git_repository"
echo "REGISTRY_TYPE=$registry_type"
# We default to generic repo.
# We have special support for ECR because
# even though Kaniko supports building and pushing images to ECR
# it doesn't create repository nor do they think they should support it -
# https://github.com/GoogleContainerTools/kaniko/pull/1537
# And ECR requires for each image to create separate repository
# And ECR will not create new image repository on docker push
# So we need to do it manually.
if [ "$registry_type" = "ecr" ] || [ "$registry_type" = "ECR" ] ; then
# Setup credentials to connect to AWS - same creds will be used by kaniko as well.
aws configure set aws_access_key_id $aws_access_key_id
aws configure set aws_secret_access_key $aws_secret_access_key

# Check if aws_session_token is set and not empty.
if [ -n "$aws_session_token" ] ; then
aws configure set aws_session_token "$aws_session_token"
fi
# https://stackoverflow.com/questions/1199613/extract-filename-and-path-from-url-in-bash-script
repository_prefix=${docker_registry#*/}
# Create docker repository if it does not exist
aws ecr describe-repositories --region $aws_region --repository-names $repository_prefix/${DATA_JOB_NAME} ||
aws ecr create-repository --region $aws_region --repository-name $repository_prefix/${DATA_JOB_NAME}
echo '{ "credsStore": "ecr-login" }' > /kaniko/.docker/config.json
elif [ "$registry_type" = "generic" ] || [ "$registry_type" = "GENERIC" ]; then
export auth=$(echo -n $registry_username:$registry_password | base64 -w 0)
cat > /kaniko/.docker/config.json <<- EOM
{
"auths": {
"$IMAGE_REGISTRY_PATH": {
"username":"$registry_username",
"password":"$registry_password",
"auth": "$auth"
}
$extra_auth
}
}
EOM
#cat /kaniko/.docker/config.json
fi
# Clone repo into /data-jobs dir to get job's source
git clone $git_repository ./data-jobs
cd ./data-jobs
git reset --hard $GIT_COMMIT || ( echo ">data-job-not-found<" && exit 1 )
if [ ! -d ${DATA_JOB_NAME} ]; then
echo ">data-job-not-found<"
exit 1
fi
cd ..
# kaniko supports building directly from git repository but as we are using codecommit
# and using aws session credentials, we need to build it beforehand
/kaniko/executor \
--dockerfile=/workspace/Dockerfile \
--destination="${IMAGE_REGISTRY_PATH}/${DATA_JOB_NAME}:${GIT_COMMIT}" \
--build-arg=job_githash="$JOB_GITHASH" \
--build-arg=base_image="$BASE_IMAGE" \
--build-arg=job_name="$JOB_NAME" \
--context=./data-jobs $EXTRA_ARGUMENTS
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash

# Copyright 2023-2024 Broadcom
# SPDX-License-Identifier: Apache-2.0

SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
VERSION_TAG=$(cat "$SCRIPT_DIR/version.txt")
VDK_DOCKER_REGISTRY_URL=${VDK_DOCKER_REGISTRY_URL:-"registry.hub.docker.com/versatiledatakit"}

function build_and_push_image() {
name="$1"
docker_file="$2"
arguments="$3"

image_repo="$VDK_DOCKER_REGISTRY_URL/$name"
image_tag="$image_repo:$VERSION_TAG"

docker build -t $image_tag -t $image_repo:latest -f "$SCRIPT_DIR/$docker_file" $arguments "$SCRIPT_DIR"
docker_push_vdk.sh $image_tag
docker_push_vdk.sh $image_repo:latest
}

build_and_push_image "job-builder" Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
1.0.0
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,12 @@ configurations {
testImplementation.exclude group: 'com.vaadin.external.google', module: 'android-json'
}

dependencyManagement {
imports {
mavenBom 'org.springframework.cloud:spring-cloud-dependencies:2021.0.9'
}
}

dependencies { // Implementation dependencies are found on compile classpath of this component and consumers.
implementation project(':base')
implementation 'com.vmware.taurus:model:3.1.+'
Expand All @@ -37,6 +43,7 @@ dependencies { // Implementation dependencies are found on compile classpath of

// for authorization
implementation 'org.springframework.security:spring-security-oauth2-resource-server'
implementation 'org.springframework.cloud:spring-cloud-config-server'

implementation 'org.springframework.boot:spring-boot-starter-web'
implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@
import com.amazonaws.services.securitytoken.AWSSecurityTokenServiceClientBuilder;
import com.amazonaws.services.securitytoken.model.AssumeRoleRequest;
import java.util.UUID;

import lombok.Getter;
import org.springframework.stereotype.Service;

/**
Expand All @@ -26,6 +28,7 @@ public class AWSCredentialsService {
public record AWSCredentialsDTO(
String awsSecretAccessKey, String awsAccessKeyId, String awsSessionToken, String region) {}

@Getter
private STSAssumeRoleSessionCredentialsProvider credentialsProvider;
private AWSCredentialsServiceConfig awsCredentialsServiceConfig;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,12 @@ public class JobImageBuilder {
@Value("${datajobs.git.url}")
private String gitRepo;

@Value("${datajobs.git.cc.grc}")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why can't you just re use thw git url above ?

then you don't need the if stateent below ?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is URL expected from Git Remote Code-commit tool, it is following format "codecommit::us-east-1://vdkdata-jobs" and only for this url format, git can fetch from AWS Code Commit repositories

Source - https://github.com/aws/git-remote-codecommit

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes but can't you just set this through {datajobs.git.url} property?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I tested that but code push through jgit didnt work in that case, so I included both the git and grc url, this is a optional property required only if datajobs.git.assumeIAMRole is true, maybe I can add a comment before this field in properties file to clarify this further

private String gitCCRepo;

@Value("${datajobs.git.assumeIAMRole}")
boolean assumeCodeCommitIAMRole;

@Value("${datajobs.git.username}")
private String gitUsername;

Expand Down Expand Up @@ -197,6 +203,20 @@ public boolean buildImage(
registryUsername,
registryPassword,
builderAwsSessionToken);
if(assumeCodeCommitIAMRole){
args = Arrays.asList(
builderAwsAccessKeyId,
builderAwsSecretAccessKey,
awsRegion,
dockerRepositoryUrl,
"",
"",
gitCCRepo,
registryType,
registryUsername,
registryPassword,
builderAwsSessionToken);
}
var envs = getBuildParameters(dataJob, desiredDataJobDeployment);
String builderImage =
supportedPythonVersions.getBuilderImage(desiredDataJobDeployment.getPythonVersion());
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
/*
* Copyright 2023-2024 Broadcom
* SPDX-License-Identifier: Apache-2.0
*/

package com.vmware.taurus.service.upload;

import com.vmware.taurus.service.credentials.AWSCredentialsService;
import org.eclipse.jgit.transport.CredentialsProvider;
import org.springframework.cloud.config.server.support.AwsCodeCommitCredentialProvider;
import org.springframework.stereotype.Component;

@Component
public class CodeCommitCredentialProvider implements VCSCredentialsProvider {
private final AWSCredentialsService awsCredentialsService;

public CodeCommitCredentialProvider(AWSCredentialsService awsCredentialsService) {
this.awsCredentialsService = awsCredentialsService;
}

@Override
public CredentialsProvider getProvider() {
AwsCodeCommitCredentialProvider codeCommitCredentialProvider = new AwsCodeCommitCredentialProvider();
codeCommitCredentialProvider.setAwsCredentialProvider(awsCredentialsService.getCredentialsProvider());
return codeCommitCredentialProvider;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
* Copyright 2023-2024 Broadcom
* SPDX-License-Identifier: Apache-2.0
*/

package com.vmware.taurus.service.upload;

import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;

@Configuration
public class CredentialProviderConfig {


private final VCSCredentialsProvider credentialsProvider;

@Autowired
public CredentialProviderConfig(
@Value("${datajobs.git.assumeIAMRole}") boolean assumeCodeCommitIAMRole,
GitCredentialsProvider gitCredentialsProvider,
CodeCommitCredentialProvider codeCommitProvider) {
if (assumeCodeCommitIAMRole) {
this.credentialsProvider = codeCommitProvider;
} else {
this.credentialsProvider = gitCredentialsProvider;
}
}

@Bean(name="credentialsProvider")
public VCSCredentialsProvider credentialsProvider() {
return credentialsProvider;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@
* Other providers are explained: https://www.codeaffine.com/2014/12/09/jgit-authentication/
*/
@Component
public class GitCredentialsProvider {
public class GitCredentialsProvider implements VCSCredentialsProvider {

@Value("${datajobs.git.read.write.username:}")
private String gitReadWriteUsername;

@Value("${datajobs.git.read.write.password:}")
private String gitReadWritePassword;

@Override
public CredentialsProvider getProvider() {
return new UsernamePasswordCredentialsProvider(gitReadWriteUsername, gitReadWritePassword);
}
Expand Down
Loading