|
| 1 | +const util = require('./util'); |
| 2 | + |
| 3 | +// AWS Glue Data Catalog database and tables |
| 4 | +const sourceTable = process.env.SOURCE_TABLE; |
| 5 | +const targetTable = process.env.TARGET_TABLE; |
| 6 | +const database = process.env.DATABASE; |
| 7 | + |
| 8 | +async function insertMissingGzData(database, sourceTable, targetTable, year, month, day) { |
| 9 | + |
| 10 | + const insertStatement = ` |
| 11 | + -- Insert missing Gzip Data on ${year}-${month}-${day} |
| 12 | + INSERT INTO ${database}.${targetTable} |
| 13 | + WITH gz AS ( |
| 14 | + SELECT * |
| 15 | + FROM ${database}.${sourceTable} |
| 16 | + WHERE year = '${year}' AND month = '${month}' AND day = '${day}' |
| 17 | + ), parquet AS ( |
| 18 | + SELECT concat(year, '-', month, '-', day, 'T', hour) dth, request_id |
| 19 | + FROM ${database}.${targetTable} |
| 20 | + WHERE year = '${year}' AND month = '${month}' AND day = '${day}' |
| 21 | + ) |
| 22 | + SELECT |
| 23 | + gz.* |
| 24 | + FROM gz LEFT JOIN parquet |
| 25 | + ON concat(gz.year, '-', gz.month, '-', gz.day, 'T', gz.hour) = parquet.dth |
| 26 | + AND gz.request_id = parquet.request_id |
| 27 | + WHERE parquet.request_id IS NULL`; |
| 28 | + |
| 29 | + await util.runQuery(insertStatement); |
| 30 | +} |
| 31 | + |
| 32 | +// get the partitions of yesterday or use `dt` in event |
| 33 | +exports.handler = async (event, context, callback) => { |
| 34 | + if ( 'dt' in event ) { |
| 35 | + var yesterday = new Date(`${event.dt}T00:00:00Z`) |
| 36 | + if (isNaN(yesterday)) |
| 37 | + throw new Error('invalid dt') |
| 38 | + } else { |
| 39 | + var yesterday = new Date(); |
| 40 | + yesterday.setDate(yesterday.getDate() - 1); |
| 41 | + } |
| 42 | + |
| 43 | + const year = yesterday.getUTCFullYear(); |
| 44 | + const month = (yesterday.getUTCMonth() + 1).toString().padStart(2, '0'); |
| 45 | + const day = yesterday.getUTCDate().toString().padStart(2, '0'); |
| 46 | + |
| 47 | + console.log('Insert Missing Data in Gzip Files on ', { year, month, day }); |
| 48 | + |
| 49 | + await insertMissingGzData(database, sourceTable, targetTable, year, month, day); |
| 50 | +} |
0 commit comments