Skip to content

Commit 566cdba

Browse files
jmgtanvgkowski
andauthored
feat(processing): User Defined Stages in Spark CI/CD Pipeline (#837)
* Add support for user defined environments in CI/CD pipeline --------- Co-authored-by: vgkowski <[email protected]>
1 parent 65d43b1 commit 566cdba

File tree

10 files changed

+691
-52
lines changed

10 files changed

+691
-52
lines changed

examples/spark-data-lake/README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,35 @@ pip install -r requirements.txt
7070
}
7171
```
7272

73+
Alternatively, if further customization is necessary, the following allows multiple different environments to be created:
74+
75+
**Stage names must be unique**
76+
77+
```json
78+
{
79+
"environments": [
80+
{
81+
"stageName": "<STAGE_NAME_1>",
82+
"account": "<STAGE_ACCOUNT_ID>",
83+
"region": "<REGION>",
84+
"triggerIntegTest": "<OPTIONAL_BOOLEAN_CAN_BE_OMMITTED>"
85+
},
86+
{
87+
"stageName": "<STAGE_NAME_2>",
88+
"account": "<STAGE_ACCOUNT_ID>",
89+
"region": "<REGION>",
90+
"triggerIntegTest": "<OPTIONAL_BOOLEAN_CAN_BE_OMMITTED>"
91+
},
92+
{
93+
"stageName": "<STAGE_NAME_3>",
94+
"account": "<STAGE_ACCOUNT_ID>",
95+
"region": "<REGION>",
96+
"triggerIntegTest": "<OPTIONAL_BOOLEAN_CAN_BE_OMMITTED>"
97+
}
98+
]
99+
}
100+
```
101+
73102
5. Create a connection, this will server to link your code repository to Amazon Code Pipeline. You can follow the instruction in the [AWS documentation](https://docs.aws.amazon.com/dtconsole/latest/userguide/connections.html)
74103
to create a connection.
75104

framework/API.md

Lines changed: 84 additions & 23 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

framework/src/processing/README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,6 +206,37 @@ You need to also provide the accounts information in the cdk.json in the form of
206206
}
207207
```
208208

209+
## User Defined Stages
210+
211+
To define multiple stages (which can also be deployed in different AWS accounts by following the bootstrap command in the previous section), configure the `cdk.json` file with the following:
212+
213+
**Stage names must be unique**
214+
215+
```json
216+
{
217+
"environments": [
218+
{
219+
"stageName": "<STAGE_NAME_1>",
220+
"account": "<STAGE_ACCOUNT_ID>",
221+
"region": "<REGION>",
222+
"triggerIntegTest": "<OPTIONAL_BOOLEAN_CAN_BE_OMMITTED>"
223+
},
224+
{
225+
"stageName": "<STAGE_NAME_2>",
226+
"account": "<STAGE_ACCOUNT_ID>",
227+
"region": "<REGION>",
228+
"triggerIntegTest": "<OPTIONAL_BOOLEAN_CAN_BE_OMMITTED>"
229+
},
230+
{
231+
"stageName": "<STAGE_NAME_3>",
232+
"account": "<STAGE_ACCOUNT_ID>",
233+
"region": "<REGION>",
234+
"triggerIntegTest": "<OPTIONAL_BOOLEAN_CAN_BE_OMMITTED>"
235+
}
236+
]
237+
}
238+
```
239+
209240
## Defining a CDK Stack for the Spark application
210241

211242
The `SparkCICDPipeline` construct deploys an application stack, which contains your business logic, into staging and production environments.

framework/src/processing/lib/cicd-pipeline/spark-emr-cicd-pipeline.ts

Lines changed: 86 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,18 @@ import {
1818
} from '../../../utils';
1919
import { DEFAULT_SPARK_IMAGE, SparkImage } from '../emr-releases';
2020

21+
const MISSING_ENVIRONMENTS_ERROR = 'MissingEnvironmentsError';
22+
const DUPLICATE_STAGE_NAME_ERROR = 'DuplicateStageNameError';
23+
24+
/**
25+
* User defined CI/CD environment stages
26+
*/
27+
interface CICDEnvironment {
28+
stageName: string;
29+
account: string;
30+
region: string;
31+
triggerIntegTest?: boolean;
32+
}
2133

2234
/**
2335
* A CICD Pipeline to test and deploy a Spark application on Amazon EMR in cross-account environments using CDK Pipelines.
@@ -211,36 +223,63 @@ export class SparkEmrCICDPipeline extends TrackedConstruct {
211223
},
212224
});
213225

214-
// Create the Staging stage of the CICD
215-
const staging = new ApplicationStage(this, 'Staging', {
216-
env: this.getAccountFromContext('staging'),
226+
try {
227+
const environments = this.getUserDefinedEnvironmentsFromContext();
228+
229+
for (const e of environments) {
230+
this.integrationTestStage = this.attachStageToPipeline(e.stageName.toUpperCase(), {
231+
account: e.account,
232+
region: e.region,
233+
}, e.triggerIntegTest || false, buildStage, props);
234+
}
235+
} catch (e) {
236+
const error = e as Error;
237+
if (error.name === DUPLICATE_STAGE_NAME_ERROR) {
238+
throw e;
239+
}
240+
241+
this.integrationTestStage = this.attachStageToPipeline('Staging', this.getAccountFromContext('staging'), true, buildStage, props);
242+
this.attachStageToPipeline('Prod', this.getAccountFromContext('prod'), false, buildStage, props);
243+
}
244+
}
245+
246+
/**
247+
* Attaches the given stage to the pipeline
248+
* @param stageName
249+
* @param resourceEnvironment
250+
* @param attachIntegTest
251+
* @param buildStage
252+
* @param props
253+
* @returns {CodeBuildStep|undefined} if integration step is configured, this returns the corresponding `CodeBuildStep` for the test
254+
*/
255+
private attachStageToPipeline(stageName: string, resourceEnvironment: ResourceEnvironment
256+
, attachIntegTest: boolean, buildStage: CodeBuildStep
257+
, props: SparkEmrCICDPipelineProps): CodeBuildStep|undefined {
258+
const applicationStage = new ApplicationStage(this, stageName, {
259+
env: resourceEnvironment,
217260
applicationStackFactory: props.applicationStackFactory,
218-
outputsEnv: props.integTestEnv,
219-
stage: CICDStage.STAGING,
261+
outputsEnv: (attachIntegTest && props.integTestScript) ? props.integTestEnv : undefined,
262+
stage: CICDStage.of(stageName.toUpperCase()),
220263
});
221-
const stagingDeployment = this.pipeline.addStage(staging);
264+
const stageDeployment = this.pipeline.addStage(applicationStage);
265+
266+
let integrationTestStage:CodeBuildStep|undefined = undefined;
222267

223-
if (props.integTestScript) {
268+
if (attachIntegTest && props.integTestScript) {
224269
// Extract the path and script name from the integration tests script path
225270
const [integPath, integScript] = SparkEmrCICDPipeline.extractPath(props.integTestScript);
226271

227-
this.integrationTestStage = new CodeBuildStep('IntegrationTests', {
272+
integrationTestStage = new CodeBuildStep(`${stageName}IntegrationTests`, {
228273
input: buildStage.addOutputDirectory(integPath),
229274
commands: [`chmod +x ${integScript} && ./${integScript}`],
230-
envFromCfnOutputs: staging.stackOutputsEnv,
275+
envFromCfnOutputs: applicationStage.stackOutputsEnv,
231276
rolePolicyStatements: props.integTestPermissions,
232277
});
233278
// Add a post step to run the integration tests
234-
stagingDeployment.addPost(this.integrationTestStage);
279+
stageDeployment.addPost(integrationTestStage);
235280
}
236281

237-
// Create the Production stage of the CICD
238-
this.pipeline.addStage(new ApplicationStage(this, 'Production', {
239-
env: this.getAccountFromContext('prod'),
240-
applicationStackFactory: props.applicationStackFactory,
241-
stage: CICDStage.PROD,
242-
}));
243-
282+
return integrationTestStage;
244283
}
245284

246285
/**
@@ -251,4 +290,34 @@ export class SparkEmrCICDPipeline extends TrackedConstruct {
251290
if (!account) throw new Error(`Missing context variable ${name}`);
252291
return account;
253292
}
293+
294+
/**
295+
* Retrieves the list of user defined environments from the context
296+
* @returns {CICDEnvironment[]} list of user defined environments
297+
*/
298+
private getUserDefinedEnvironmentsFromContext(): CICDEnvironment[] {
299+
const environments = this.node.tryGetContext('environments') as CICDEnvironment[];
300+
301+
if (!environments) {
302+
const missingContextError = new Error('Missing context variable environments');
303+
missingContextError.name = MISSING_ENVIRONMENTS_ERROR;
304+
throw missingContextError;
305+
} else {
306+
//check for duplicates
307+
308+
const stageNameTracker = [];
309+
310+
for (let e of environments) {
311+
if (stageNameTracker.indexOf(e.stageName) != -1) {
312+
const duplicateStageError = new Error('Duplicate stage name found');
313+
duplicateStageError.name = DUPLICATE_STAGE_NAME_ERROR;
314+
throw duplicateStageError;
315+
}
316+
317+
stageNameTracker.push(e.stageName);
318+
}
319+
}
320+
321+
return environments;
322+
}
254323
}

framework/src/utils/lib/application-stage.ts

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,31 @@ import { ApplicationStackFactory } from './application-stack-factory';
1010
/**
1111
* The list of CICD Stages used in CICD Pipelines.
1212
*/
13-
export enum CICDStage {
14-
STAGING = 'staging',
15-
PROD = 'prod',
13+
export class CICDStage {
14+
15+
/**
16+
* Prod stage
17+
*/
18+
public static readonly PROD = CICDStage.of('PROD');
19+
20+
/**
21+
* Staging stage
22+
*/
23+
public static readonly STAGING = CICDStage.of('STAGING');
24+
25+
/**
26+
* Custom stage
27+
* @param stage the stage inside the pipeline
28+
* @returns
29+
*/
30+
public static of(stage: string) {
31+
return new CICDStage(stage);
32+
}
33+
34+
/**
35+
* @param stage the stage inside the pipeline
36+
*/
37+
private constructor(public readonly stage: string) {}
1638
}
1739

1840
/**

framework/test/e2e/spark-cicd-pipeline.e2e.test.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
/**
55
* E2E test for SparkCICDPipeline
66
*
7-
* @group e2e/processing/spark-cicd
7+
* @group e2e/processing/default-spark-cicd
88
*/
99

1010
import { RemovalPolicy, CfnOutput, Stack, StackProps, App } from 'aws-cdk-lib';
@@ -18,7 +18,7 @@ jest.setTimeout(9000000);
1818

1919
// GIVEN
2020
const app = new App();
21-
const cicdStack = new Stack(app, 'CICDStack', {
21+
const cicdStack = new Stack(app, 'DefaultCICDStack', {
2222
env: {
2323
region: 'eu-west-1',
2424
},
@@ -27,8 +27,8 @@ const testStack = new TestStack('SparkCICDPipelineTestStack', app, cicdStack);
2727
const { stack } = testStack;
2828

2929
stack.node.setContext('@data-solutions-framework-on-aws/removeDataOnDestroy', true);
30-
stack.node.setContext('staging', { accountId: '123456789012', region: 'eu-west-1' });
31-
stack.node.setContext('prod', { accountId: '123456789012', region: 'eu-west-1' });
30+
stack.node.setContext('staging', { accountId: stack.account, region: stack.region });
31+
stack.node.setContext('prod', { accountId: stack.account, region: stack.region });
3232

3333
interface MyApplicationStackProps extends StackProps {
3434
readonly prodBoolean: Boolean;

0 commit comments

Comments
 (0)