@@ -90,3 +90,105 @@ resource "aws_glue_crawler" "parking_spatially_enriched_refined_zone" {
9090 }
9191 })
9292}
93+
94+ resource "aws_glue_crawler" "parking_google_sheet_ingestion_raw_zone" {
95+ count = local. is_live_environment ? 1 : 0
96+ name = " ${ local . short_identifier_prefix } ${ module . department_parking_data_source . identifier } -google-sheet-ingestion-raw-zone"
97+ role = data. aws_iam_role . glue_role . arn
98+ database_name = module. department_parking_data_source . raw_zone_catalog_database_name
99+ s3_target {
100+ path = " s3://${ module . raw_zone_data_source . bucket_id } /${ module . department_parking_data_source . identifier } /google-sheets/"
101+ }
102+
103+ configuration = jsonencode ({
104+ Version = 1.0
105+ Grouping = {
106+ TableLevelConfiguration = 4
107+ }
108+ CrawlerOutput = {
109+ Partitions = { AddOrUpdateBehavior = " InheritFromTable" }
110+ Tables = { AddOrUpdateBehavior = " MergeNewColumns" }
111+ }
112+ })
113+ }
114+
115+ resource "aws_glue_crawler" "housing_google_sheet_ingestion_raw_zone" {
116+ count = local. is_live_environment ? 1 : 0
117+ name = " ${ local . short_identifier_prefix } ${ module . department_housing_data_source . identifier } -google-sheet-ingestion-raw-zone"
118+ role = data. aws_iam_role . glue_role . arn
119+ database_name = module. department_housing_data_source . raw_zone_catalog_database_name
120+ s3_target {
121+ path = " s3://${ module . raw_zone_data_source . bucket_id } /${ module . department_housing_data_source . identifier } /google-sheets/"
122+ }
123+
124+ configuration = jsonencode ({
125+ Version = 1.0
126+ Grouping = {
127+ TableLevelConfiguration = 4
128+ }
129+ CrawlerOutput = {
130+ Partitions = { AddOrUpdateBehavior = " InheritFromTable" }
131+ Tables = { AddOrUpdateBehavior = " MergeNewColumns" }
132+ }
133+ })
134+ }
135+
136+ resource "aws_glue_crawler" "data_and_insight_google_sheet_ingestion_raw_zone" {
137+ count = local. is_live_environment ? 1 : 0
138+ name = " ${ local . short_identifier_prefix } ${ module . department_data_and_insight_data_source . identifier } -google-sheet-ingestion-raw-zone"
139+ role = data. aws_iam_role . glue_role . arn
140+ database_name = module. department_data_and_insight_data_source . raw_zone_catalog_database_name
141+ s3_target {
142+ path = " s3://${ module . raw_zone_data_source . bucket_id } /${ module . department_data_and_insight_data_source . identifier } /google-sheets/"
143+ }
144+
145+ configuration = jsonencode ({
146+ Version = 1.0
147+ Grouping = {
148+ TableLevelConfiguration = 4
149+ }
150+ CrawlerOutput = {
151+ Partitions = { AddOrUpdateBehavior = " InheritFromTable" }
152+ Tables = { AddOrUpdateBehavior = " MergeNewColumns" }
153+ }
154+ })
155+ }
156+
157+ # Below crawlers triggers are temporary and will be removed after enabling the airflow google sheet ingestion dag
158+ resource "aws_glue_trigger" "parking_google_sheet_ingestion_raw_zone_trigger" {
159+ name = " ${ local . short_identifier_prefix } ${ module . department_parking_data_source . identifier } -google-sheet-ingestion-raw-zone-trigger"
160+ type = " SCHEDULED"
161+ schedule = " cron(0 7 ? * * *)"
162+ start_on_creation = true
163+
164+ actions {
165+ crawler_name = aws_glue_crawler. parking_google_sheet_ingestion_raw_zone [0 ]. name
166+ }
167+ tags = module. tags . values
168+ }
169+
170+
171+ resource "aws_glue_trigger" "housing_google_sheet_ingestion_raw_zone_trigger" {
172+ name = " ${ local . short_identifier_prefix } ${ module . department_housing_data_source . identifier } -google-sheet-ingestion-raw-zone-trigger"
173+ type = " SCHEDULED"
174+ schedule = " cron(0 7 ? * * *)"
175+ start_on_creation = true
176+
177+ actions {
178+ crawler_name = aws_glue_crawler. housing_google_sheet_ingestion_raw_zone [0 ]. name
179+ }
180+ tags = module. tags . values
181+ }
182+
183+
184+ resource "aws_glue_trigger" "data_and_insight_google_sheet_ingestion_raw_zone_trigger" {
185+ name = " ${ local . short_identifier_prefix } ${ module . department_data_and_insight_data_source . identifier } -google-sheet-ingestion-raw-zone-trigger"
186+ type = " SCHEDULED"
187+ schedule = " cron(0 7 ? * * *)"
188+ start_on_creation = true
189+
190+ actions {
191+ crawler_name = aws_glue_crawler. data_and_insight_google_sheet_ingestion_raw_zone [0 ]. name
192+ }
193+ tags = module. tags . values
194+ }
0 commit comments