Skip to content
This repository was archived by the owner on Jul 16, 2024. It is now read-only.

Commit 9cf5cac

Browse files
authored
refactor: Extract SQL from source code to its own files. (#171)
* refactor: extract sql files * refactor: Change Dataset to follow folder structure convention Co-authored-by: ijemmy
1 parent 7e387e8 commit 9cf5cac

34 files changed

+547
-449
lines changed

core/.projen/tasks.json

Lines changed: 3 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

core/src/data-generator/data-generator.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ import { LambdaInvoke, AthenaStartQueryExecution } from '@aws-cdk/aws-stepfuncti
1212
import { Construct, Arn, Aws, Stack, Duration, ArnFormat } from '@aws-cdk/core';
1313
import { AwsCustomResource, AwsCustomResourcePolicy, PhysicalResourceId } from '@aws-cdk/custom-resources';
1414
import { PreBundledFunction } from '../common/pre-bundled-function';
15-
import { Dataset } from '../dataset';
15+
import { Dataset } from '../datasets/dataset';
1616
import { SingletonBucket } from '../singleton-bucket';
1717
import { SingletonGlueDatabase } from '../singleton-glue-database';
1818
import { SynchronousAthenaQuery } from '../synchronous-athena-query';
Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22
// SPDX-License-Identifier: MIT-0
33

44
import { Location } from '@aws-cdk/aws-s3';
5-
import { retailCustomerCreate, retailCustomerCreateTarget, retailCustomerGenerate } from './datasets/retail-customer';
6-
import { retailCustomerAddressCreate, retailCustomerAddressCreateTarget, retailCustomerAddressGenerate } from './datasets/retail-customer-address';
7-
import { retailItemCreate, retailItemGenerate } from './datasets/retail-item';
8-
import { retailPromoCreate, retailPromoGenerate } from './datasets/retail-promo';
9-
import { retailStoreCreate, retailStoreGenerate } from './datasets/retail-store';
10-
import { retailStoreSaleCreate, retailStoreSaleGenerate } from './datasets/retail-store-sale';
11-
import { retailWarehouseCreate, retailWarehouseGenerate } from './datasets/retail-warehouse';
12-
import { retailWebSaleCreate, retailWebSaleGenerate } from './datasets/retail-web-sale';
5+
import { retailCustomerCreate, retailCustomerCreateTarget, retailCustomerGenerate } from './retail-customer';
6+
import { retailCustomerAddressCreate, retailCustomerAddressCreateTarget, retailCustomerAddressGenerate } from './retail-customer-address';
7+
import { retailItemCreate, retailItemGenerate } from './retail-item';
8+
import { retailPromoCreate, retailPromoGenerate } from './retail-promo';
9+
import { retailStoreCreate, retailStoreGenerate } from './retail-store';
10+
import { retailStoreSaleCreate, retailStoreSaleGenerate } from './retail-store-sale';
11+
import { retailWarehouseCreate, retailWarehouseGenerate } from './retail-warehouse';
12+
import { retailWebSaleCreate, retailWebSaleGenerate } from './retail-web-sale';
1313

1414
export interface DatasetProps {
1515
/**

core/src/datasets/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
export { Dataset, DatasetProps } from './dataset';

core/src/datasets/read-sql-file.ts

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
// SPDX-License-Identifier: MIT-0
3+
4+
import { readFileSync } from 'fs';
5+
const ENCODING = 'utf8';
6+
7+
export function readSqlFile(path: string) {
8+
return readFileSync(path, ENCODING);
9+
}
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
CREATE EXTERNAL TABLE IF NOT EXISTS {{DATABASE}}.{{TABLE}}(
2+
address_id string,
3+
city string,
4+
county string,
5+
state string,
6+
zip string,
7+
country string,
8+
gmt_offset string,
9+
location_type string,
10+
street string,
11+
address_datetime string
12+
)
13+
ROW FORMAT DELIMITED
14+
FIELDS TERMINATED BY ','
15+
STORED AS INPUTFORMAT
16+
'org.apache.hadoop.mapred.TextInputFormat'
17+
OUTPUTFORMAT
18+
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
19+
LOCATION
20+
's3://{{BUCKET}}/{{KEY}}/'
21+
TBLPROPERTIES (
22+
'skip.header.line.count'='1'
23+
)
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
CREATE EXTERNAL TABLE IF NOT EXISTS {{DATABASE}}.{{TABLE}}(
2+
address_id string,
3+
city string,
4+
county string,
5+
state string,
6+
zip string,
7+
country string,
8+
gmt_offset string,
9+
location_type string,
10+
street string,
11+
address_datetime string
12+
)
13+
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
14+
LOCATION
15+
's3://{{BUCKET}}/{{KEY}}/'
16+
TBLPROPERTIES (
17+
'skip.header.line.count'='1'
18+
);
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
INSERT INTO {{DATABASE}}.{{TARGET_TABLE}} (
2+
SELECT
3+
address_id,
4+
city,
5+
county,
6+
state,
7+
zip,
8+
country,
9+
gmt_offset,
10+
location_type,
11+
street,
12+
to_iso8601(date_add('second', {{OFFSET}}, from_iso8601_timestamp(address_datetime))) as address_datetime
13+
FROM {{DATABASE}}.{{SOURCE_TABLE}}
14+
WHERE address_datetime
15+
BETWEEN '{{MIN}}' AND '{{MAX}}'
16+
)
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
CREATE EXTERNAL TABLE IF NOT EXISTS {{DATABASE}}.{{TABLE}}(
2+
customer_id string,
3+
salutation string,
4+
first_name string,
5+
last_name string,
6+
birth_country string,
7+
email_address string,
8+
birth_date string,
9+
gender string,
10+
marital_status string,
11+
education_status string,
12+
purchase_estimate bigint,
13+
credit_rating string,
14+
buy_potential string,
15+
vehicle_count bigint,
16+
lower_bound bigint,
17+
upper_bound bigint,
18+
address_id string,
19+
customer_datetime string
20+
)
21+
ROW FORMAT DELIMITED
22+
FIELDS TERMINATED BY ','
23+
STORED AS INPUTFORMAT
24+
'org.apache.hadoop.mapred.TextInputFormat'
25+
OUTPUTFORMAT
26+
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
27+
LOCATION
28+
's3://{{BUCKET}}/{{KEY}}/'
29+
TBLPROPERTIES (
30+
'skip.header.line.count'='1'
31+
)
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
CREATE EXTERNAL TABLE IF NOT EXISTS {{DATABASE}}.{{TABLE}}(
2+
customer_id string,
3+
salutation string,
4+
first_name string,
5+
last_name string,
6+
birth_country string,
7+
email_address string,
8+
birth_date string,
9+
gender string,
10+
marital_status string,
11+
education_status string,
12+
purchase_estimate bigint,
13+
credit_rating string,
14+
buy_potential string,
15+
vehicle_count bigint,
16+
lower_bound bigint,
17+
upper_bound bigint,
18+
address_id string,
19+
customer_datetime string
20+
)
21+
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
22+
LOCATION
23+
's3://{{BUCKET}}/{{KEY}}/'
24+
TBLPROPERTIES (
25+
'skip.header.line.count'='1'
26+
)

0 commit comments

Comments
 (0)