Skip to content

Commit a630978

Browse files
committed
Merge branch 'development' of ssh://git.amazon.com:2222/pkg/DataLakeAsCode into mainline
2 parents aa97149 + 0e1b3bd commit a630978

16 files changed

+852
-182
lines changed

ApplyLakeFormationPermissions.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
python scripts/local.datalake.RemoveIamAllowedPrincipals.py

bin/aws.ts

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,40 +2,88 @@
22
import 'source-map-support/register';
33
import * as cdk from '@aws-cdk/core';
44
import { BaselineStack } from '../lib/baseline-stack';
5-
import { DatalakeStack } from '../lib/datalake-stack';
5+
import { DataLakeStack } from '../lib/stacks/datalake-stack';
66
import { OpenTargetsStack } from '../lib/opentargets-stack';
77
import { ChemblStack } from '../lib/chembl-25-stack';
88
import { AnalyticsStack } from '../lib/analytics-stack.js';
9+
import iam = require('@aws-cdk/aws-iam');
910
import s3 = require('@aws-cdk/aws-s3');
10-
11+
import { DataLakeEnrollment } from '../lib/constructs/data-lake-enrollment';
1112

1213
const app = new cdk.App();
1314
const baseline = new BaselineStack(app, 'BaselineStack');
1415

1516

16-
const coreDataLake = new DatalakeStack(app, 'CoreDataLake', {
17+
const coreDataLake = new DataLakeStack(app, 'CoreDataLake', {
1718

1819
});
1920

20-
21-
2221
const chemblStack = new ChemblStack(app, 'ChemblStack', {
2322
database: baseline.ChemblDb,
2423
accessSecurityGroup: baseline.chemblDBChemblDbAccessSg,
2524
databaseSecret: baseline.chemblDBSecret,
26-
dataLakeBucket: coreDataLake.DataLakeBucket
25+
DataLake: coreDataLake
2726
});
2827

2928
const openTargetsStack = new OpenTargetsStack(app, 'OpenTargetsStack', {
3029
sourceBucket: baseline.OpenTargetsSourceBucket,
3130
sourceBucketDataPrefix: '/opentargets/sourceExports/19.11/output/',
32-
dataLakeBucket: coreDataLake.DataLakeBucket
31+
DataLake: coreDataLake
3332
});
3433

3534
const analyticsStack = new AnalyticsStack(app, 'AnalyticsStack', {
3635
targetVpc: baseline.Vpc,
3736
});
3837

3938

40-
chemblStack.grantRead(analyticsStack.NotebookRole);
41-
openTargetsStack.grantRead(analyticsStack.NotebookRole);
39+
chemblStack.grantIamRead(analyticsStack.NotebookRole);
40+
openTargetsStack.grantIamRead(analyticsStack.NotebookRole);
41+
42+
43+
44+
45+
46+
47+
48+
49+
const exampleUser = iam.User.fromUserName(coreDataLake, 'exampleGrantee', 'paul1' );
50+
51+
var exampleGrant: DataLakeEnrollment.TablePermissionGrant = {
52+
tables: ["association_data", "evidence_data","target_list","disease_list"],
53+
DatabasePermissions: [DataLakeEnrollment.DatabasePermission.Alter, DataLakeEnrollment.DatabasePermission.CreateTable, DataLakeEnrollment.DatabasePermission.Drop],
54+
GrantableDatabasePermissions: [DataLakeEnrollment.DatabasePermission.Alter, DataLakeEnrollment.DatabasePermission.CreateTable, DataLakeEnrollment.DatabasePermission.Drop],
55+
TablePermissions: [DataLakeEnrollment.TablePermission.Select, DataLakeEnrollment.TablePermission.Insert, DataLakeEnrollment.TablePermission.Delete],
56+
GrantableTablePermissions: [DataLakeEnrollment.TablePermission.Select]
57+
};
58+
59+
openTargetsStack.grantTablePermissions(exampleUser, exampleGrant);
60+
61+
62+
63+
64+
// In the example below, we are using the compound_structures table from ChEMBL. It has the following table definition:
65+
// ['molregno', 'molfile', 'standard_inchi', 'standard_inchi_key', 'canonical_smiles']
66+
// Lets say we want to give a principal ONLY select permissions to everything in the compound_structures table BUT the 'canonical_smiles' column.
67+
68+
var exampleTableWithColumnsGrant: DataLakeEnrollment.TableWithColumnPermissionGrant = {
69+
table: "chembl_25_public_compound_structures",
70+
// Note that we are NOT including 'canonical_smiles'. That effectivley prevents this user from querying that column.
71+
columns: ['molregno', 'molfile', 'standard_inchi', 'standard_inchi_key'],
72+
DatabasePermissions: [],
73+
GrantableDatabasePermissions: [],
74+
TableColumnPermissions: [DataLakeEnrollment.TablePermission.Select],
75+
GrantableTableColumnPermissions: []
76+
};
77+
78+
var exampleTableWithColumnsGrant_WithWildCard: DataLakeEnrollment.TableWithColumnPermissionGrant = {
79+
table: "chembl_25_public_compound_structures",
80+
wildCardFilter: DataLakeEnrollment.TableWithColumnFilter.Exclude,
81+
columns: ['canonical_smiles'],
82+
DatabasePermissions: [],
83+
GrantableDatabasePermissions: [],
84+
TableColumnPermissions: [DataLakeEnrollment.TablePermission.Select],
85+
GrantableTableColumnPermissions: []
86+
};
87+
88+
// Note that exampleTableWithColumnsGrant exampleTableWithColumnsGrant_WithWildCard grants the same effecitve permissions. One just uses a the wildcard.
89+
chemblStack.grantTableWithColumnPermissions(exampleUser, exampleTableWithColumnsGrant);

lib/analytics-stack.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import glue = require('@aws-cdk/aws-glue');
66
import s3 = require('@aws-cdk/aws-s3');
77
import s3assets = require('@aws-cdk/aws-s3-assets');
88
import sagemaker = require('@aws-cdk/aws-sagemaker');
9-
import { DataSetStack, DataSetStackProps} from './dataset-stack';
9+
import { DataSetStack, DataSetStackProps} from './stacks/dataset-stack';
1010

1111
export interface AnalyticsStackProps extends cdk.StackProps{
1212
targetVpc: ec2.Vpc

lib/chembl-25-stack.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,16 @@ import rds = require('@aws-cdk/aws-rds');
55
import glue = require('@aws-cdk/aws-glue');
66
import s3 = require('@aws-cdk/aws-s3');
77
import s3assets = require('@aws-cdk/aws-s3-assets');
8-
import { RDSdataSetSetEnrollmentProps, RDSPostgresDataSetEnrollment } from './rds-data-set-enrollment';
9-
import { DataSetStack, DataSetStackProps} from './dataset-stack';
8+
import { RDSdataSetSetEnrollmentProps, RDSPostgresDataSetEnrollment } from './constructs/rds-data-set-enrollment';
9+
import { DataSetStack, DataSetStackProps} from './stacks/dataset-stack';
10+
11+
1012

1113

1214
export interface ChemblStackEnrollmentProps extends DataSetStackProps {
1315
databaseSecret: rds.DatabaseSecret;
1416
database: rds.DatabaseInstance;
1517
accessSecurityGroup: ec2.SecurityGroup;
16-
dataLakeBucket: s3.Bucket;
1718
}
1819

1920
export class ChemblStack extends DataSetStack{
@@ -28,15 +29,15 @@ export class ChemblStack extends DataSetStack{
2829
databaseSecret: props.databaseSecret,
2930
database: props.database,
3031
accessSecurityGroup: props.accessSecurityGroup,
31-
dataLakeBucket: props.dataLakeBucket,
32+
dataLakeBucket: props.DataLake.DataLakeBucket,
3233
DataSetName: dataSetName,
3334
JdbcTargetIncludePaths: ["chembl_25/%"],
3435
GlueScriptPath: "scripts/glue.s3importchembl25.py",
3536
GlueScriptArguments: {
3637
"--job-language": "python",
3738
"--job-bookmark-option": "job-bookmark-disable",
3839
"--enable-metrics": "",
39-
"--DL_BUCKET": props.dataLakeBucket.bucketName,
40+
"--DL_BUCKET": props.DataLake.DataLakeBucket.bucketName,
4041
"--DL_PREFIX": "/"+dataSetName+"/",
4142
"--DL_REGION": cdk.Stack.of(this).region,
4243
"--GLUE_SRC_DATABASE": "chembl_25_src"

0 commit comments

Comments
 (0)