diff --git a/docs/best_practices.md b/docs/best_practices.md index a0059df..9fe75c9 100644 --- a/docs/best_practices.md +++ b/docs/best_practices.md @@ -427,3 +427,29 @@ input gives same output). Until we provide more configuration options, feel free long as you stick to a standard that makes sense to you or your organisation. If you need advice, [feel free to join our slack and ask our developers](https://join.slack.com/t/dbtvault/shared_invite/enQtODY5MTY3OTIyMzg2LWJlZDMyNzM4YzAzYjgzYTY0MTMzNTNjN2EyZDRjOTljYjY0NDYyYzEwMTlhODMzNGY3MmU2ODNhYWUxYmM2NjA)! +## Ghost Records + +### What are ghost records? + +Ghost records are 'dummy' records that are implemented in satellite tables. In Data Vault 1.0 the best practice was to +have a separate ghost record for each key. This was changed in Data Vault 2.0 and so now there is a single ghost records +inserted at the top of every satellite table. + +The ghost records enable the PIT tables to have a complete set of historic data. This means if a key was introduced +after the initial creation or the keys are not updated at the same frequency, there will still be data in the table +instead of missing the record. + +### What does a ghost record look like? + +| Column | Datatype | Value | +|----------------------|---------------|------------------------------------------------------------------| +| Primary Key - MD5 | BINARY(16) | 00000000000000000000000000000000 | +| Primary Key - SHA256 | BINARY(32) | 0000000000000000000000000000000000000000000000000000000000000000 | +| Hashdiff - MD5 | BINARY(16) | 00000000000000000000000000000000 | +| Hashdiff - SHA256 | BINARY(32) | 0000000000000000000000000000000000000000000000000000000000000000 | +| Effective From | DATE/DATETIME | 1900-01-01 00:00:00.000000 | +| Load Date | DATE/DATETIME | 1900-01-01 00:00:00.000000 | +| Payload Columns | null | NULL | +| Extra Columns | null | NULL | + + diff --git a/docs/macros/index.md b/docs/macros/index.md index 3a47362..7815f97 100644 --- a/docs/macros/index.md +++ b/docs/macros/index.md @@ -3423,8 +3423,7 @@ Generates SQL to build a Point-In-Time (PIT) table. ```sql WITH as_of_dates AS ( - SELECT * - FROM DBTVAULT.TEST.AS_OF_DATE AS a + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE ), new_rows_as_of_dates AS ( @@ -3440,24 +3439,25 @@ Generates SQL to build a Point-In-Time (PIT) table. SELECT a.CUSTOMER_PK, a.AS_OF_DATE, - COALESCE(MAX(sat_customer_details_src.CUSTOMER_PK), CAST('0000000000000000' AS BINARY(16))) AS SAT_CUSTOMER_DETAILS_PK, - COALESCE(MAX(sat_customer_details_src.LOAD_DATE), CAST('1900-01-01 00:00:00.000' AS timestamp_ntz)) AS SAT_CUSTOMER_DETAILS_LDTS, - COALESCE(MAX(sat_customer_login_src.CUSTOMER_PK), CAST('0000000000000000' AS BINARY(16))) AS SAT_CUSTOMER_LOGIN_PK, - COALESCE(MAX(sat_customer_login_src.LOAD_DATE), CAST('1900-01-01 00:00:00.000' AS timestamp_ntz)) AS SAT_CUSTOMER_LOGIN_LDTS, - COALESCE(MAX(sat_customer_profile_src.CUSTOMER_PK), CAST('0000000000000000' AS BINARY(16))) AS SAT_CUSTOMER_PROFILE_PK, - COALESCE(MAX(sat_customer_profile_src.LOAD_DATE), CAST('1900-01-01 00:00:00.000' AS timestamp_ntz)) AS SAT_CUSTOMER_PROFILE_LDTS + + COALESCE(MAX(sat_customer_details_src.CUSTOMER_PK), + CAST('0000000000000000' AS BINARY(16))) + AS SAT_CUSTOMER_DETAILS_PK, + + COALESCE(MAX(sat_customer_details_src.LOAD_DATE), + TO_TIMESTAMP('1900-01-01 00:00:00.000')) + AS SAT_CUSTOMER_DETAILS_LDTS + FROM new_rows_as_of_dates AS a - LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src - ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK - AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE - LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_LOGIN AS sat_customer_login_src - ON a.CUSTOMER_PK = sat_customer_login_src.CUSTOMER_PK - AND sat_customer_login_src.LOAD_DATE <= a.AS_OF_DATE - LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_PROFILE AS sat_customer_profile_src - ON a.CUSTOMER_PK = sat_customer_profile_src.CUSTOMER_PK - AND sat_customer_profile_src.LOAD_DATE <= a.AS_OF_DATE + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + OR sat_customer_details_src.LOAD_DATE = '1900-01-01 00:00:00.000' + GROUP BY - a.CUSTOMER_PK, a.AS_OF_DATE + a.CUSTOMER_PK, + a.AS_OF_DATE ), pit AS ( @@ -3471,23 +3471,18 @@ Generates SQL to build a Point-In-Time (PIT) table. ```sql WITH as_of_dates AS ( - SELECT * - FROM DBTVAULT.TEST.AS_OF_DATE + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE ), last_safe_load_datetime AS ( - SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME + SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME FROM ( - SELECT MIN(LOAD_DATE) AS LOAD_DATETIME FROM DBTVAULT.TEST.STG_CUSTOMER_DETAILS - UNION ALL - SELECT MIN(LOAD_DATE) AS LOAD_DATETIME FROM DBTVAULT.TEST.STG_CUSTOMER_LOGIN - UNION ALL - SELECT MIN(LOAD_DATE) AS LOAD_DATETIME FROM DBTVAULT.TEST.STG_CUSTOMER_PROFILE - ) a + SELECT MIN(LOAD_DATE) AS LOAD_DATETIME FROM DBTVAULT.TEST.STG_CUSTOMER_DETAILS + ) AS l ), as_of_grain_old_entries AS ( - SELECT DISTINCT AS_OF_DATE + SELECT DISTINCT AS_OF_DATE FROM DBTVAULT.TEST.PIT_CUSTOMER ), @@ -3508,42 +3503,56 @@ Generates SQL to build a Point-In-Time (PIT) table. ), min_date AS ( - SELECT min(AS_OF_DATE) AS MIN_DATE + SELECT MIN(AS_OF_DATE) AS MIN_DATE FROM as_of_dates ), backfill_as_of AS ( SELECT AS_OF_DATE FROM as_of_dates AS a + + WHERE a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) ), new_rows_pks AS ( - SELECT a.CUSTOMER_PK - FROM DBTVAULT.TEST.HUB_CUSTOMER AS a - WHERE a.LOAD_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + SELECT h.CUSTOMER_PK + FROM DBTVAULT.TEST.HUB_CUSTOMER AS h + + + WHERE h.LOAD_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) ), new_rows_as_of AS ( SELECT AS_OF_DATE FROM as_of_dates AS a + WHERE a.AS_OF_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) UNION - SELECT AS_OF_DATE + SELECT as_of_date FROM as_of_grain_new_entries ), - overlap AS ( + overlap_pks AS ( SELECT a.* FROM DBTVAULT.TEST.PIT_CUSTOMER AS a INNER JOIN DBTVAULT.TEST.HUB_CUSTOMER as b ON a.CUSTOMER_PK = b.CUSTOMER_PK + WHERE a.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) AND a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) AND a.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) ), - -- Back-fill any newly arrived hubs, set all historical pit dates to ghost records + overlap_as_of AS ( + SELECT p.AS_OF_DATE + FROM as_of_dates AS p + + WHERE p.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) + AND p.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + AND p.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) + + ), backfill_rows_as_of_dates AS ( SELECT @@ -3551,29 +3560,24 @@ Generates SQL to build a Point-In-Time (PIT) table. b.AS_OF_DATE FROM new_rows_pks AS a INNER JOIN backfill_as_of AS b - ON (1=1 ) + ON (1=1) ), backfill AS ( SELECT a.CUSTOMER_PK, a.AS_OF_DATE, + CAST('0000000000000000' AS BINARY(16)) AS SAT_CUSTOMER_DETAILS_PK, - CAST('1900-01-01 00:00:00.000' AS timestamp_ntz) AS SAT_CUSTOMER_DETAILS_LDTS, - CAST('0000000000000000' AS BINARY(16)) AS SAT_CUSTOMER_LOGIN_PK, - CAST('1900-01-01 00:00:00.000' AS timestamp_ntz) AS SAT_CUSTOMER_LOGIN_LDTS, - CAST('0000000000000000' AS BINARY(16)) AS SAT_CUSTOMER_PROFILE_PK, - CAST('1900-01-01 00:00:00.000' AS timestamp_ntz) AS SAT_CUSTOMER_PROFILE_LDTS + TO_TIMESTAMP('1900-01-01 00:00:00.000') AS SAT_CUSTOMER_DETAILS_LDTS + FROM backfill_rows_as_of_dates AS a - LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src - ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK - AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE - LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_LOGIN AS sat_customer_login_src - ON a.CUSTOMER_PK = sat_customer_login_src.CUSTOMER_PK - AND sat_customer_login_src.LOAD_DATE <= a.AS_OF_DATE - LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_PROFILE AS sat_customer_profile_src - ON a.CUSTOMER_PK = sat_customer_profile_src.CUSTOMER_PK - AND sat_customer_profile_src.LOAD_DATE <= a.AS_OF_DATE + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + OR sat_customer_details_src.LOAD_DATE = '1900-01-01 00:00:00.000' + GROUP BY a.CUSTOMER_PK, a.AS_OF_DATE ), @@ -3591,42 +3595,1038 @@ Generates SQL to build a Point-In-Time (PIT) table. SELECT a.CUSTOMER_PK, a.AS_OF_DATE, - COALESCE(MAX(sat_customer_details_src.CUSTOMER_PK), CAST('0000000000000000' AS BINARY(16))) AS SAT_CUSTOMER_DETAILS_PK, - COALESCE(MAX(sat_customer_details_src.LOAD_DATE), CAST('1900-01-01 00:00:00.000' AS timestamp_ntz)) AS SAT_CUSTOMER_DETAILS_LDTS, - COALESCE(MAX(sat_customer_login_src.CUSTOMER_PK), CAST('0000000000000000' AS BINARY(16))) AS SAT_CUSTOMER_LOGIN_PK, - COALESCE(MAX(sat_customer_login_src.LOAD_DATE), CAST('1900-01-01 00:00:00.000' AS timestamp_ntz)) AS SAT_CUSTOMER_LOGIN_LDTS, - COALESCE(MAX(sat_customer_profile_src.CUSTOMER_PK), CAST('0000000000000000' AS BINARY(16))) AS SAT_CUSTOMER_PROFILE_PK, - COALESCE(MAX(sat_customer_profile_src.LOAD_DATE), CAST('1900-01-01 00:00:00.000' AS timestamp_ntz)) AS SAT_CUSTOMER_PROFILE_LDTS + + COALESCE(MAX(sat_customer_details_src.CUSTOMER_PK), + CAST('0000000000000000' AS BINARY(16))) + AS SAT_CUSTOMER_DETAILS_PK, + + COALESCE(MAX(sat_customer_details_src.LOAD_DATE), + TO_TIMESTAMP('1900-01-01 00:00:00.000')) + AS SAT_CUSTOMER_DETAILS_LDTS + FROM new_rows_as_of_dates AS a - LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src - ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK - AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE - LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_LOGIN AS sat_customer_login_src - ON a.CUSTOMER_PK = sat_customer_login_src.CUSTOMER_PK - AND sat_customer_login_src.LOAD_DATE <= a.AS_OF_DATE - LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_PROFILE AS sat_customer_profile_src - ON a.CUSTOMER_PK = sat_customer_profile_src.CUSTOMER_PK - AND sat_customer_profile_src.LOAD_DATE <= a.AS_OF_DATE + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + OR sat_customer_details_src.LOAD_DATE = '1900-01-01 00:00:00.000' + + GROUP BY + a.CUSTOMER_PK, + a.AS_OF_DATE + ), + + pit AS ( + SELECT * FROM new_rows + UNION ALL + SELECT * FROM overlap_pks + UNION ALL + SELECT * FROM backfill + + ) + + SELECT DISTINCT * FROM pit + ``` + + === "Base Load with Ghost Records" + + ```sql + WITH as_of_dates AS ( + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE + ), + + new_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM DBTVAULT.TEST.HUB_CUSTOMER AS a + INNER JOIN as_of_dates AS b + ON (1=1) + ), + + new_rows AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + MAX(sat_customer_details_src.CUSTOMER_PK) AS SAT_CUSTOMER_DETAILS_PK, + MAX(TO_TIMESTAMP(sat_customer_details_src.LOAD_DATE)) AS SAT_CUSTOMER_DETAILS_LDTS + + FROM new_rows_as_of_dates AS a + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + OR sat_customer_details_src.LOAD_DATE = '1900-01-01 00:00:00.000' + + GROUP BY + a.CUSTOMER_PK, + a.AS_OF_DATE + ), + + pit AS ( + SELECT * FROM new_rows + ) + + SELECT DISTINCT * FROM pit + ``` + + === "Incremental Load with Ghost Records" + + ```sql + WITH as_of_dates AS ( + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE + ), + + last_safe_load_datetime AS ( + SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME + FROM ( + SELECT MIN(LOAD_DATE) AS LOAD_DATETIME FROM DBTVAULT.TEST.STG_CUSTOMER_DETAILS + ) AS l + ), + + as_of_grain_old_entries AS ( + SELECT DISTINCT AS_OF_DATE + FROM DBTVAULT.TEST.PIT_CUSTOMER + ), + + as_of_grain_lost_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_grain_old_entries AS a + LEFT OUTER JOIN as_of_dates AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL + ), + + as_of_grain_new_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_dates AS a + LEFT OUTER JOIN as_of_grain_old_entries AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL + ), + + min_date AS ( + SELECT MIN(AS_OF_DATE) AS MIN_DATE + FROM as_of_dates + ), + + backfill_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + + + WHERE a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + ), + + new_rows_pks AS ( + SELECT h.CUSTOMER_PK + FROM DBTVAULT.TEST.HUB_CUSTOMER AS h + + + WHERE h.LOAD_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + ), + + new_rows_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + + WHERE a.AS_OF_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + UNION + SELECT as_of_date + FROM as_of_grain_new_entries + ), + + overlap_pks AS ( + SELECT a.* + FROM DBTVAULT.TEST.PIT_CUSTOMER AS a + INNER JOIN DBTVAULT.TEST.HUB_CUSTOMER as b + ON a.CUSTOMER_PK = b.CUSTOMER_PK + + WHERE a.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) + AND a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + AND a.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) + ), + + overlap_as_of AS ( + SELECT p.AS_OF_DATE + FROM as_of_dates AS p + + WHERE p.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) + AND p.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + AND p.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) + + ), + + backfill_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM new_rows_pks AS a + INNER JOIN backfill_as_of AS b + ON (1=1) + ), + + backfill AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + MIN(sat_customer_details_src.CUSTOMER_PK) AS SAT_CUSTOMER_DETAILS_PK, + MIN(TO_TIMESTAMP(sat_customer_details_src.LOAD_DATE)) AS SAT_CUSTOMER_DETAILS_LDTS + + FROM backfill_rows_as_of_dates AS a + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + OR sat_customer_details_src.LOAD_DATE = '1900-01-01 00:00:00.000' + GROUP BY a.CUSTOMER_PK, a.AS_OF_DATE ), + new_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM DBTVAULT.TEST.HUB_CUSTOMER AS a + INNER JOIN new_rows_as_of AS b + ON (1=1) + ), + + new_rows AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + MAX(sat_customer_details_src.CUSTOMER_PK) AS SAT_CUSTOMER_DETAILS_PK, + MAX(TO_TIMESTAMP(sat_customer_details_src.LOAD_DATE)) AS SAT_CUSTOMER_DETAILS_LDTS + + FROM new_rows_as_of_dates AS a + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + OR sat_customer_details_src.LOAD_DATE = '1900-01-01 00:00:00.000' + + GROUP BY + a.CUSTOMER_PK, + a.AS_OF_DATE + ), + pit AS ( SELECT * FROM new_rows UNION ALL - SELECT * FROM overlap + SELECT * FROM overlap_pks UNION ALL SELECT * FROM backfill + ) SELECT DISTINCT * FROM pit ``` === "Google Bigquery" - Coming soon! -=== "MS SQL Server" - Coming soon! + === "Base Load" + + ```sql + WITH as_of_dates AS ( + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE + ), + + new_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM DBTVAULT.TEST.HUB_CUSTOMER AS a + INNER JOIN as_of_dates AS b + ON (1=1) + ), + + new_rows AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE,COALESCE(MAX(sat_customer_details_src.CUSTOMER_PK), + '0x0000000000000000') AS SAT_CUSTOMER_DETAILS_PK, + COALESCE(MAX(sat_customer_details_src.LOAD_DATE), + PARSE_DATETIME('%F %H:%M:%E6S', '1900-01-01 00:00:00.000000')) AS SAT_CUSTOMER_DETAILS_LDTS + FROM new_rows_as_of_dates AS a + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + + GROUP BY + a.CUSTOMER_PK, a.AS_OF_DATE + ORDER BY (1) + ), + + pit AS ( + SELECT * FROM new_rows + ) + + SELECT DISTINCT * FROM pit + ``` + + === "Incremental Load" + + ```sql + WITH as_of_dates AS ( + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE + ), + + last_safe_load_datetime AS ( + SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME + FROM ( + SELECT MIN(LOAD_DATE) AS LOAD_DATETIME FROM DBTVAULT.TEST.STG_CUSTOMER_DETAILS + ) AS l + ), + + as_of_grain_old_entries AS ( + SELECT DISTINCT AS_OF_DATE + FROM DBTVAULT.TEST.PIT_CUSTOMER + ), + + as_of_grain_lost_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_grain_old_entries AS a + LEFT OUTER JOIN as_of_dates AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL + ), + + as_of_grain_new_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_dates AS a + LEFT OUTER JOIN as_of_grain_old_entries AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL + ), + + min_date AS ( + SELECT MIN(AS_OF_DATE) AS MIN_DATE + FROM as_of_dates + ), + + backfill_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + + INNER JOIN last_safe_load_datetime as l + ON a.AS_OF_DATE < l.LAST_SAFE_LOAD_DATETIME + + ), + + new_rows_pks AS ( + SELECT h.CUSTOMER_PK + FROM DBTVAULT.TEST.HUB_CUSTOMER AS h + + INNER JOIN last_safe_load_datetime as l + ON h.LOAD_DATE >= l.LAST_SAFE_LOAD_DATETIME + + ), + + new_rows_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + INNER JOIN last_safe_load_datetime as l + ON a.AS_OF_DATE >= l.LAST_SAFE_LOAD_DATETIME + UNION DISTINCT + + SELECT as_of_date + FROM as_of_grain_new_entries + ), + + overlap_pks AS ( + SELECT a.* + FROM DBTVAULT.TEST.PIT_CUSTOMER AS a + INNER JOIN DBTVAULT.TEST.HUB_CUSTOMER as b + ON a.CUSTOMER_PK = b.CUSTOMER_PK + INNER JOIN min_date + ON 1 = 1 + INNER JOIN last_safe_load_datetime + ON 1 = 1 + LEFT OUTER JOIN as_of_grain_lost_entries + ON a.AS_OF_DATE = as_of_grain_lost_entries.AS_OF_DATE + WHERE a.AS_OF_DATE >= min_date.MIN_DATE + AND a.AS_OF_DATE < last_safe_load_datetime.LAST_SAFE_LOAD_DATETIME + AND as_of_grain_lost_entries.AS_OF_DATE IS NULL + + ), + + overlap_as_of AS ( + SELECT p.AS_OF_DATE + FROM as_of_dates AS p + INNER JOIN min_date + ON 1 = 1 + INNER JOIN last_safe_load_datetime + ON 1 = 1 + LEFT OUTER JOIN as_of_grain_lost_entries + ON p.AS_OF_DATE = as_of_grain_lost_entries.AS_OF_DATE + WHERE p.AS_OF_DATE >= min_date.MIN_DATE + AND p.AS_OF_DATE < last_safe_load_datetime.LAST_SAFE_LOAD_DATETIME + AND as_of_grain_lost_entries.AS_OF_DATE IS NULL + + ), + + backfill_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM new_rows_pks AS a + INNER JOIN backfill_as_of AS b + ON (1=1 ) + ), + + backfill AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE,'0x0000000000000000' AS SAT_CUSTOMER_DETAILS_PK, + PARSE_DATETIME('%F %H:%M:%E6S', '1900-01-01 00:00:00.000000') AS SAT_CUSTOMER_DETAILS_LDTS + FROM backfill_rows_as_of_dates AS a + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + GROUP BY + a.CUSTOMER_PK, a.AS_OF_DATE + ORDER BY (1) + ), + + new_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM DBTVAULT.TEST.HUB_CUSTOMER AS a + INNER JOIN new_rows_as_of AS b + ON (1=1) + ), + + new_rows AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + COALESCE(MAX(sat_customer_details_src.CUSTOMER_PK), + '0x0000000000000000') AS SAT_CUSTOMER_DETAILS_PK, + COALESCE(MAX(sat_customer_details_src.LOAD_DATE), + PARSE_DATETIME('%F %H:%M:%E6S', '1900-01-01 00:00:00.000000')) AS SAT_CUSTOMER_DETAILS_LDTS + FROM new_rows_as_of_dates AS a + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + + GROUP BY + a.CUSTOMER_PK, a.AS_OF_DATE + ORDER BY (1) + ), + + pit AS ( + SELECT * FROM new_rows + UNION ALL + SELECT * FROM overlap_pks + UNION ALL + SELECT * FROM backfill + ) + + SELECT DISTINCT * FROM pit + ``` + + === "Base Load with Ghost Records" + + ```sql + WITH as_of_dates AS ( + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE + ), + + new_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM DBTVAULT.TEST.HUB_CUSTOMER AS a + INNER JOIN as_of_dates AS b + ON (1=1) + ), + + new_rows AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + MAX(sat_customer_details_src.CUSTOMER_PK) AS SAT_CUSTOMER_DETAILS_PK, + DATETIME(MAX(sat_customer_details_src.LOAD_DATE)) AS SAT_CUSTOMER_DETAILS_LDTS + FROM new_rows_as_of_dates AS a + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + OR sat_customer_details_src.LOAD_DATE = DATETIME('1900-01-01') + + GROUP BY + a.CUSTOMER_PK, a.AS_OF_DATE + ORDER BY (1) + ), + + pit AS ( + SELECT * FROM new_rows + ) + + SELECT DISTINCT * FROM pit + ``` + === "Incremental Load with Ghost Records" + + ```sql + WITH as_of_dates AS ( + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE + ), + + last_safe_load_datetime AS ( + SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME + FROM ( + SELECT MIN(LOAD_DATE) AS LOAD_DATETIME FROM DBTVAULT.TEST.STG_CUSTOMER_DETAILS + ) AS l + ), + + as_of_grain_old_entries AS ( + SELECT DISTINCT AS_OF_DATE + FROM DBTVAULT.TEST.PIT_CUSTOMER + ), + + as_of_grain_lost_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_grain_old_entries AS a + LEFT OUTER JOIN as_of_dates AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL + ), + + as_of_grain_new_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_dates AS a + LEFT OUTER JOIN as_of_grain_old_entries AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL + ), + + min_date AS ( + SELECT MIN(AS_OF_DATE) AS MIN_DATE + FROM as_of_dates + ), + + backfill_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + INNER JOIN last_safe_load_datetime as l + ON a.AS_OF_DATE < l.LAST_SAFE_LOAD_DATETIME + ), + + new_rows_pks AS ( + SELECT h.CUSTOMER_PK + FROM DBTVAULT.TEST.HUB_CUSTOMER AS h + INNER JOIN last_safe_load_datetime as l + ON h.LOAD_DATE >= l.LAST_SAFE_LOAD_DATETIME + ), + + new_rows_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + INNER JOIN last_safe_load_datetime as l + ON a.AS_OF_DATE >= l.LAST_SAFE_LOAD_DATETIME + UNION DISTINCT + SELECT as_of_date + FROM as_of_grain_new_entries + ), + + overlap_pks AS ( + SELECT a.* + FROM DBTVAULT.TEST.PIT_CUSTOMER AS a + INNER JOIN DBTVAULT.TEST.HUB_CUSTOMER as b + ON a.CUSTOMER_PK = b.CUSTOMER_PK + INNER JOIN min_date + ON 1 = 1 + INNER JOIN last_safe_load_datetime + ON 1 = 1 + LEFT OUTER JOIN as_of_grain_lost_entries + ON a.AS_OF_DATE = as_of_grain_lost_entries.AS_OF_DATE + WHERE a.AS_OF_DATE >= min_date.MIN_DATE + AND a.AS_OF_DATE < last_safe_load_datetime.LAST_SAFE_LOAD_DATETIME + AND as_of_grain_lost_entries.AS_OF_DATE IS NULL + ), + + overlap_as_of AS ( + SELECT p.AS_OF_DATE + FROM as_of_dates AS p + INNER JOIN min_date + ON 1 = 1 + INNER JOIN last_safe_load_datetime + ON 1 = 1 + LEFT OUTER JOIN as_of_grain_lost_entries + ON p.AS_OF_DATE = as_of_grain_lost_entries.AS_OF_DATE + WHERE p.AS_OF_DATE >= min_date.MIN_DATE + AND p.AS_OF_DATE < last_safe_load_datetime.LAST_SAFE_LOAD_DATETIME + AND as_of_grain_lost_entries.AS_OF_DATE IS NULL + ), + + backfill_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM new_rows_pks AS a + INNER JOIN backfill_as_of AS b + ON (1=1 ) + ), + + backfill AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + MIN(sat_customer_details_src.CUSTOMER_PK) AS SAT_CUSTOMER_DETAILS_, + DATETIME(MIN(sat_customer_details_src.LOAD_DATE)) AS SAT_CUSTOMER_DETAILS_LDTS + FROM backfill_rows_as_of_dates AS a + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + OR sat_customer_details_src.LOAD_DATE = DATETIME('1900-01-01') + + GROUP BY + a.CUSTOMER_PK, a.AS_OF_DATE + ORDER BY (1) + ), + + new_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM DBTVAULT.TEST.HUB_CUSTOMER AS a + INNER JOIN new_rows_as_of AS b + ON (1=1) + ), + + new_rows AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + MAX(sat_customer_details_src.CUSTOMER_PK) AS SAT_CUSTOMER_DETAILS_PK, + DATETIME(MAX(sat_customer_details_src.LOAD_DATE)) AS SAT_CUSTOMER_DETAILS_LDTS + FROM new_rows_as_of_dates AS a + + LEFT JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + OR sat_customer_details_src.LOAD_DATE = DATETIME('1900-01-01') + + GROUP BY + a.CUSTOMER_PK, a.AS_OF_DATE + ORDER BY (1) + ), + + pit AS ( + SELECT * FROM new_rows + UNION ALL + SELECT * FROM overlap_pks + UNION ALL + SELECT * FROM backfill + ) + + SELECT DISTINCT * FROM pit + ``` + +=== "MS SQL Server" + + === "Base Load" + + ```sql + WITH as_of_dates AS ( + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE + ), + + new_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM DBTVAULT.TEST.HUB_CUSTOMER_1S AS a + INNER JOIN as_of_dates AS b + ON (1=1) + ), + + new_rows AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + + COALESCE(MAX(sat_customer_details_src.CUSTOMER_PK), + CONVERT(BINARY(16), '0000000000000000', 2)) + AS SAT_CUSTOMER_DETAILS_PK, + + COALESCE(MAX(sat_customer_details_src.LOAD_DATE), + CONVERT(DATETIME2, '1900-01-01 00:00:00.000')) + AS SAT_CUSTOMER_DETAILS_LDTS + + FROM new_rows_as_of_dates AS a + + LEFT OUTER JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + + + GROUP BY + a.CUSTOMER_PK, + a.AS_OF_DATE + ), + + pit AS ( + SELECT * FROM new_rows + ) + + SELECT DISTINCT * FROM pit + ``` + + === "Incremental Load" + + ```sql + WITH as_of_dates AS ( + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE + ), + + last_safe_load_datetime AS ( + SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME + FROM ( + SELECT MIN(LOAD_DATE) AS LOAD_DATETIME FROM DBTVAULT.TEST.STG_CUSTOMER_DETAILS + ) AS l + ), + + as_of_grain_old_entries AS ( + SELECT DISTINCT AS_OF_DATE + FROM DBTVAULT.TEST.PIT_CUSTOMER_1S + ), + + as_of_grain_lost_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_grain_old_entries AS a + LEFT OUTER JOIN as_of_dates AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL + ), + + as_of_grain_new_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_dates AS a + LEFT OUTER JOIN as_of_grain_old_entries AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL + ), + + min_date AS ( + SELECT MIN(AS_OF_DATE) AS MIN_DATE + FROM as_of_dates + ), + + backfill_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + WHERE a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + ), + + new_rows_pks AS ( + SELECT h.CUSTOMER_PK + FROM DBTVAULT.TEST.HUB_CUSTOMER_1S AS h + WHERE h.LOAD_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + ), + + new_rows_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + WHERE a.AS_OF_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + UNION + SELECT as_of_date + FROM as_of_grain_new_entries + ), + + overlap_pks AS ( + SELECT a.* + FROM DBTVAULT.TEST.PIT_CUSTOMER_1S AS a + INNER JOIN DBTVAULT.TEST.HUB_CUSTOMER_1S as b + ON a.CUSTOMER_PK = b.CUSTOMER_PK + WHERE a.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) + AND a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + AND a.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) + ), + + overlap_as_of AS ( + SELECT p.AS_OF_DATE + FROM as_of_dates AS p + WHERE p.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) + AND p.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + AND p.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) + ), + + backfill_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM new_rows_pks AS a + INNER JOIN backfill_as_of AS b + ON (1=1) + ), + + backfill AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + + COALESCE(MAX(sat_customer_details_src.CUSTOMER_PK), + CONVERT(BINARY(16), '0000000000000000', 2)) + AS SAT_CUSTOMER_DETAILS_PK, + + COALESCE(MAX(sat_customer_details_src.LOAD_DATE), + CONVERT(DATETIME2, '1900-01-01 00:00:00.000')) + AS SAT_CUSTOMER_DETAILS_LDTS + + FROM backfill_rows_as_of_dates AS a + + LEFT OUTER JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + + GROUP BY + a.CUSTOMER_PK, a.AS_OF_DATE + ), + + new_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM DBTVAULT.TEST.HUB_CUSTOMER_1S AS a + INNER JOIN new_rows_as_of AS b + ON (1=1) + ), + + new_rows AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + + COALESCE(MAX(sat_customer_details_src.CUSTOMER_PK), + CONVERT(BINARY(16), '0000000000000000', 2)) + AS SAT_CUSTOMER_DETAILS_PK, + + COALESCE(MAX(sat_customer_details_src.LOAD_DATE), + CONVERT(DATETIME2, '1900-01-01 00:00:00.000')) + AS SAT_CUSTOMER_DETAILS_LDTS + + FROM new_rows_as_of_dates AS a + + LEFT OUTER JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS AS sat_customer_details_src + ON a.CUSTOMER_PK = sat_customer_details_src.CUSTOMER_PK + AND sat_customer_details_src.LOAD_DATE <= a.AS_OF_DATE + + GROUP BY + a.CUSTOMER_PK, + a.AS_OF_DATE + ), + + pit AS ( + SELECT * FROM new_rows + UNION ALL + SELECT * FROM overlap_pks + UNION ALL + SELECT * FROM backfill + ) + + SELECT DISTINCT * FROM pit + ``` + + === "Base Load with Ghost Records" + + ```sql + WITH as_of_dates AS ( + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE + ), + + new_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM DBTVAULT.TEST.HUB_CUSTOMER_1S_TS AS a + INNER JOIN as_of_dates AS b + ON (1=1) + ), + + new_rows AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + COALESCE(MAX(sat_customer_details_ts_src.CUSTOMER_PK), + CAST(REPLICATE(CAST(CAST('0' AS tinyint) AS BINARY(16)), 16) AS BINARY(16))) + AS SAT_CUSTOMER_DETAILS_TS_PK, + COALESCE(MAX(sat_customer_details_ts_src.LOAD_DATETIME), + CONVERT(DATETIME2, '1900-01-01 00:00:00')) + AS SAT_CUSTOMER_DETAILS_TS_LDTS + + FROM new_rows_as_of_dates AS a + + LEFT OUTER JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS_TS AS sat_customer_details_ts_src + ON a.CUSTOMER_PK = sat_customer_details_ts_src.CUSTOMER_PK + AND sat_customer_details_ts_src.LOAD_DATETIME <= a.AS_OF_DATE + + GROUP BY + a.CUSTOMER_PK, + a.AS_OF_DATE + ), + + pit AS ( + SELECT * FROM new_rows + ) + + SELECT DISTINCT * FROM pit + ``` + + === "Incremental Load with Ghost Records" + + ```sql + WITH as_of_dates AS ( + SELECT * FROM DBTVAULT.TEST.AS_OF_DATE + ), + + last_safe_load_datetime AS ( + SELECT MIN(LOAD_DATETIME) AS LAST_SAFE_LOAD_DATETIME + FROM ( + SELECT MIN(LOAD_DATETIME) AS LOAD_DATETIME FROM DBTVAULT.TEST.SAT_CUSTOMER_DETAILS_TS + ) AS l + ), + + as_of_grain_old_entries AS ( + SELECT DISTINCT AS_OF_DATE + FROM DBTVAULT.TEST.PIT_CUSTOMER_1S_TS + ), + + as_of_grain_lost_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_grain_old_entries AS a + LEFT OUTER JOIN as_of_dates AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL + ), + + as_of_grain_new_entries AS ( + SELECT a.AS_OF_DATE + FROM as_of_dates AS a + LEFT OUTER JOIN as_of_grain_old_entries AS b + ON a.AS_OF_DATE = b.AS_OF_DATE + WHERE b.AS_OF_DATE IS NULL + ), + + min_date AS ( + SELECT MIN(AS_OF_DATE) AS MIN_DATE + FROM as_of_dates + ), + + backfill_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + WHERE a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + ), + + new_rows_pks AS ( + SELECT h.CUSTOMER_PK + FROM DBTVAULT.TEST.HUB_CUSTOMER_1S_TS AS h + WHERE h.LOAD_DATETIME >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + ), + + new_rows_as_of AS ( + SELECT AS_OF_DATE + FROM as_of_dates AS a + WHERE a.AS_OF_DATE >= (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + UNION + SELECT as_of_date + FROM as_of_grain_new_entries + ), + + overlap_pks AS ( + SELECT a.* + FROM DBTVAULT.TEST.PIT_CUSTOMER_1S_TS AS a + INNER JOIN DBTVAULT.TEST.HUB_CUSTOMER_1S_TS as b + ON a.CUSTOMER_PK = b.CUSTOMER_PK + WHERE a.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) + AND a.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + AND a.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) + ), + + overlap_as_of AS ( + SELECT p.AS_OF_DATE + FROM as_of_dates AS p + WHERE p.AS_OF_DATE >= (SELECT MIN_DATE FROM min_date) + AND p.AS_OF_DATE < (SELECT LAST_SAFE_LOAD_DATETIME FROM last_safe_load_datetime) + AND p.AS_OF_DATE NOT IN (SELECT AS_OF_DATE FROM as_of_grain_lost_entries) + ), + + backfill_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM new_rows_pks AS a + INNER JOIN backfill_as_of AS b + ON (1=1) + ), + + backfill AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + COALESCE(MAX(sat_customer_details_ts_src.CUSTOMER_PK), + CAST(REPLICATE(CAST(CAST('0' AS tinyint) AS BINARY(16)), 16) AS BINARY(16))) + AS SAT_CUSTOMER_DETAILS_TS_PK, + COALESCE(MAX(sat_customer_details_ts_src.LOAD_DATETIME), + CONVERT(DATETIME2, '1900-01-01 00:00:00')) + AS SAT_CUSTOMER_DETAILS_TS_LDTS + FROM backfill_rows_as_of_dates AS a + LEFT OUTER JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS_TS AS sat_customer_details_ts_src + ON a.CUSTOMER_PK = sat_customer_details_ts_src.CUSTOMER_PK + AND sat_customer_details_ts_src.LOAD_DATETIME <= a.AS_OF_DATE + GROUP BY + a.CUSTOMER_PK, a.AS_OF_DATE + ), + + new_rows_as_of_dates AS ( + SELECT + a.CUSTOMER_PK, + b.AS_OF_DATE + FROM DBTVAULT.TEST.HUB_CUSTOMER_1S_TS AS a + INNER JOIN new_rows_as_of AS b + ON (1=1) + ), + + new_rows AS ( + SELECT + a.CUSTOMER_PK, + a.AS_OF_DATE, + COALESCE(MAX(sat_customer_details_ts_src.CUSTOMER_PK), + CAST(REPLICATE(CAST(CAST('0' AS tinyint) AS BINARY(16)), 16) AS BINARY(16))) + AS SAT_CUSTOMER_DETAILS_TS_PK, + COALESCE(MAX(sat_customer_details_ts_src.LOAD_DATETIME), + CONVERT(DATETIME2, '1900-01-01 00:00:00')) + AS SAT_CUSTOMER_DETAILS_TS_LDTS + FROM new_rows_as_of_dates AS a + + LEFT OUTER JOIN DBTVAULT.TEST.SAT_CUSTOMER_DETAILS_TS AS sat_customer_details_ts_src + ON a.CUSTOMER_PK = sat_customer_details_ts_src.CUSTOMER_PK + AND sat_customer_details_ts_src.LOAD_DATETIME <= a.AS_OF_DATE + + GROUP BY + a.CUSTOMER_PK, + a.AS_OF_DATE + ), + + pit AS ( + SELECT * FROM new_rows + UNION ALL + SELECT * FROM overlap_pks + UNION ALL + SELECT * FROM backfill + + ) + + SELECT DISTINCT * FROM pit + ``` #### As Of Date Tables