Skip to content

Commit 2fff082

Browse files
authored
delete data migration (#2578)
* delete data migration * remove the scripts * delete the python script too
1 parent 9d83513 commit 2fff082

File tree

4 files changed

+8
-325
lines changed

4 files changed

+8
-325
lines changed

scripts/jobs/academy_data/load_all_academy_data_into_redshift.py

Lines changed: 0 additions & 196 deletions
This file was deleted.

terraform/core/12-aws-s3-scripts.tf

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,3 @@ resource "aws_s3_object" "parking_copy_ringgo_sftp_data_to_raw" {
9393
source = "../../scripts/jobs/parking/parking_copy_ringgo_sftp_data_to_raw.py"
9494
source_hash = filemd5("../../scripts/jobs/parking/parking_copy_ringgo_sftp_data_to_raw.py")
9595
}
96-
97-
resource "aws_s3_object" "load_all_academy_data_into_redshift" {
98-
bucket = module.glue_scripts.bucket_id # this is glue_scripts_data_source in etl folder
99-
key = "scripts/load_all_academy_data_into_redshift.py"
100-
acl = "private"
101-
source = "../../scripts/jobs/academy_data/load_all_academy_data_into_redshift.py"
102-
source_hash = filemd5("../../scripts/jobs/academy_data/load_all_academy_data_into_redshift.py")
103-
}
104-

terraform/core/45-database-migration-iam.tf

Lines changed: 0 additions & 78 deletions
This file was deleted.

terraform/core/51-load-all-academy-data-into-redshift-serverless.tf

Lines changed: 8 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -26,19 +26,19 @@ locals {
2626

2727
# option 2: tailored for this module
2828
resource "aws_glue_connection" "database_ingestion_via_jdbc_connection" {
29-
count = local.is_live_environment && !local.is_production_environment ? 1 : 0
30-
name = "${local.short_identifier_prefix}redshift-serverless-connection-${data.aws_subnet.network[local.instance_subnet_id].availability_zone}"
29+
count = local.is_live_environment && !local.is_production_environment ? 1 : 0
30+
name = "${local.short_identifier_prefix}redshift-serverless-connection-${data.aws_subnet.network[local.instance_subnet_id].availability_zone}"
3131
description = "JDBC connection for Redshift Serverless"
3232
connection_properties = {
33-
JDBC_CONNECTION_URL = "jdbc:redshift://${local.redshift_serverless_credentials["host"]}:${local.redshift_serverless_credentials["port"]}/${local.redshift_serverless_credentials["database_name"]}"
34-
PASSWORD = local.redshift_serverless_credentials["password"]
35-
USERNAME = local.redshift_serverless_credentials["username"]
33+
JDBC_CONNECTION_URL = "jdbc:redshift://${local.redshift_serverless_credentials["host"]}:${local.redshift_serverless_credentials["port"]}/${local.redshift_serverless_credentials["database_name"]}"
34+
PASSWORD = local.redshift_serverless_credentials["password"]
35+
USERNAME = local.redshift_serverless_credentials["username"]
3636
}
3737

3838
physical_connection_requirements {
39-
availability_zone = data.aws_subnet.network[local.instance_subnet_id].availability_zone
40-
security_group_id_list = [aws_security_group.ingestion_database_connection.id]
41-
subnet_id = data.aws_subnet.network[local.instance_subnet_id].id
39+
availability_zone = data.aws_subnet.network[local.instance_subnet_id].availability_zone
40+
security_group_id_list = [aws_security_group.ingestion_database_connection.id]
41+
subnet_id = data.aws_subnet.network[local.instance_subnet_id].id
4242
}
4343

4444
}
@@ -69,37 +69,3 @@ resource "aws_security_group_rule" "ingestion_database_connection_allow_tcp_egre
6969
ipv6_cidr_blocks = ["::/0"]
7070
security_group_id = aws_security_group.ingestion_database_connection.id
7171
}
72-
73-
74-
module "load_all_academy_data_into_redshift" {
75-
count = local.is_live_environment && !local.is_production_environment ? 1 : 0
76-
tags = module.tags.values
77-
source = "../modules/aws-glue-job"
78-
is_live_environment = local.is_live_environment
79-
is_production_environment = local.is_production_environment
80-
job_name = "${local.short_identifier_prefix}load_all_academy_data_into_redshift"
81-
script_s3_object_key = aws_s3_object.load_all_academy_data_into_redshift.key
82-
pydeequ_zip_key = aws_s3_object.pydeequ.key
83-
helper_module_key = aws_s3_object.helpers.key
84-
glue_role_arn = aws_iam_role.glue_role.arn
85-
glue_temp_bucket_id = module.glue_temp_storage.bucket_id
86-
glue_scripts_bucket_id = module.glue_scripts.bucket_id
87-
spark_ui_output_storage_id = module.spark_ui_output_storage.bucket_id
88-
glue_version = "4.0"
89-
glue_job_worker_type = "G.1X"
90-
number_of_workers_for_glue_job = 2
91-
glue_job_timeout = 220
92-
schedule = "cron(15 7 ? * MON-FRI *)"
93-
# jdbc_connections = [module.database_ingestion_via_jdbc_connection[0].name]
94-
jdbc_connections = [aws_glue_connection.database_ingestion_via_jdbc_connection[0].name]
95-
job_parameters = {
96-
"--additional-python-modules" = "botocore==1.27.59, redshift_connector==2.1.0"
97-
"--environment" = var.environment
98-
# This is the ARN of the IAM role used by Redshift Serverless. We have count in redshift-serverless module so index 0 is to get the ARN.
99-
"--role_arn" = try(module.redshift_serverless[0].redshift_serverless_role_arn, "")
100-
"--enable-auto-scaling" = "false"
101-
"--job-bookmark-option" = "job-bookmark-disable"
102-
"--base_s3_url" = "${module.raw_zone.bucket_url}/revenues/"
103-
"--conf" = "spark.sql.legacy.timeParserPolicy=LEGACY --conf spark.sql.legacy.parquet.int96RebaseModeInRead=LEGACY --conf spark.sql.legacy.parquet.int96RebaseModeInWrite=LEGACY --conf spark.sql.legacy.parquet.datetimeRebaseModeInRead=LEGACY --conf spark.sql.legacy.parquet.datetimeRebaseModeInWrite=LEGACY"
104-
}
105-
}

0 commit comments

Comments
 (0)