|
| 1 | +-- Snowflake Iceberg V3 Comprehensive Guide |
| 2 | +-- Script 03: Create Iceberg V3 Tables |
| 3 | +-- ==================================== |
| 4 | + |
| 5 | +USE ROLE ACCOUNTADMIN; |
| 6 | +USE DATABASE FLEET_ANALYTICS_DB; |
| 7 | +USE SCHEMA RAW; |
| 8 | +USE WAREHOUSE FLEET_ANALYTICS_WH; |
| 9 | + |
| 10 | +-- ============================================ |
| 11 | +-- Table 1: VEHICLE_TELEMETRY_STREAM |
| 12 | +-- Purpose: Real-time streaming vehicle telemetry with VARIANT |
| 13 | +-- Note: Using TIMESTAMP(6) for microsecond precision (Spark compatibility) |
| 14 | +-- ============================================ |
| 15 | +CREATE OR REPLACE ICEBERG TABLE VEHICLE_TELEMETRY_STREAM ( |
| 16 | + VEHICLE_ID STRING NOT NULL, |
| 17 | + EVENT_TIMESTAMP TIMESTAMP_NTZ(6) NOT NULL, |
| 18 | + TELEMETRY_DATA VARIANT NOT NULL, |
| 19 | + INGESTED_AT TIMESTAMP_LTZ(6)) |
| 20 | + EXTERNAL_VOLUME = 'FLEET_ICEBERG_VOL' |
| 21 | + CATALOG = 'SNOWFLAKE' |
| 22 | + BASE_LOCATION = 'FLEET_ANALYTICS_DB/RAW/VEHICLE_TELEMETRY_STREAM' |
| 23 | + COMMENT = 'Real-time vehicle telemetry events streamed via Snowpipe Streaming'; |
| 24 | + |
| 25 | +-- ============================================ |
| 26 | +-- Table 2: MAINTENANCE_LOGS |
| 27 | +-- Purpose: Batch-loaded JSON maintenance/diagnostic logs |
| 28 | +-- Note: Using TIMESTAMP(6) for microsecond precision (Spark compatibility) |
| 29 | +-- ============================================ |
| 30 | +CREATE OR REPLACE ICEBERG TABLE MAINTENANCE_LOGS ( |
| 31 | + LOG_ID STRING NOT NULL, |
| 32 | + VEHICLE_ID STRING NOT NULL, |
| 33 | + LOG_TIMESTAMP TIMESTAMP_NTZ(6) NOT NULL, |
| 34 | + LOG_DATA VARIANT NOT NULL, |
| 35 | + SOURCE_FILE STRING, |
| 36 | + INGESTED_AT TIMESTAMP_LTZ(6)) |
| 37 | + EXTERNAL_VOLUME = 'FLEET_ICEBERG_VOL' |
| 38 | + CATALOG = 'SNOWFLAKE' |
| 39 | + BASE_LOCATION = 'FLEET_ANALYTICS_DB/RAW/MAINTENANCE_LOGS' |
| 40 | + COMMENT = 'Maintenance and diagnostic logs loaded from JSON files'; |
| 41 | + |
| 42 | +-- ============================================ |
| 43 | +-- Table 3: SENSOR_READINGS |
| 44 | +-- Purpose: High-precision time-series sensor data |
| 45 | +-- Note: Using TIMESTAMP(6) for microsecond precision (Spark compatibility) |
| 46 | +-- ============================================ |
| 47 | +CREATE OR REPLACE ICEBERG TABLE SENSOR_READINGS ( |
| 48 | + READING_ID STRING NOT NULL, |
| 49 | + VEHICLE_ID STRING NOT NULL, |
| 50 | + READING_TIMESTAMP TIMESTAMP_NTZ(6) NOT NULL, -- Microsecond precision for Spark compatibility |
| 51 | + ENGINE_TEMP_F FLOAT, |
| 52 | + OIL_PRESSURE_PSI FLOAT, |
| 53 | + BATTERY_VOLTAGE FLOAT, |
| 54 | + FUEL_CONSUMPTION_GPH FLOAT, |
| 55 | + TIRE_PRESSURE_FL FLOAT, |
| 56 | + TIRE_PRESSURE_FR FLOAT, |
| 57 | + TIRE_PRESSURE_RL FLOAT, |
| 58 | + TIRE_PRESSURE_RR FLOAT, |
| 59 | + ODOMETER_MILES FLOAT, |
| 60 | + INGESTED_AT TIMESTAMP_LTZ(6)) |
| 61 | + EXTERNAL_VOLUME = 'FLEET_ICEBERG_VOL' |
| 62 | + CATALOG = 'SNOWFLAKE' |
| 63 | + BASE_LOCATION = 'FLEET_ANALYTICS_DB/RAW/SENSOR_READINGS' |
| 64 | + COMMENT = 'High-precision time-series sensor readings'; |
| 65 | + |
| 66 | +-- ============================================ |
| 67 | +-- Table 4: VEHICLE_LOCATIONS |
| 68 | +-- Purpose: Geospatial vehicle position data |
| 69 | +-- Note: Using TIMESTAMP(6) for microsecond precision (Spark compatibility) |
| 70 | +-- ============================================ |
| 71 | +CREATE OR REPLACE ICEBERG TABLE VEHICLE_LOCATIONS ( |
| 72 | + LOCATION_ID STRING NOT NULL, |
| 73 | + VEHICLE_ID STRING NOT NULL, |
| 74 | + LOCATION_TIMESTAMP TIMESTAMP_NTZ(6) NOT NULL, |
| 75 | + LATITUDE FLOAT NOT NULL, |
| 76 | + LONGITUDE FLOAT NOT NULL, |
| 77 | + LOCATION_POINT GEOGRAPHY, |
| 78 | + ALTITUDE_FT FLOAT, |
| 79 | + HEADING_DEGREES FLOAT, |
| 80 | + SPEED_MPH FLOAT, |
| 81 | + FLEET_REGION STRING, |
| 82 | + INGESTED_AT TIMESTAMP_LTZ(6)) |
| 83 | + EXTERNAL_VOLUME = 'FLEET_ICEBERG_VOL' |
| 84 | + CATALOG = 'SNOWFLAKE' |
| 85 | + BASE_LOCATION = 'FLEET_ANALYTICS_DB/RAW/VEHICLE_LOCATIONS' |
| 86 | + COMMENT = 'Geospatial vehicle location data with GEOGRAPHY type'; |
| 87 | + |
| 88 | +-- ============================================ |
| 89 | +-- Table 5: VEHICLE_REGISTRY |
| 90 | +-- Purpose: Master data for vehicles and drivers |
| 91 | +-- Note: Using TIMESTAMP(6) for microsecond precision (Spark compatibility) |
| 92 | +-- ============================================ |
| 93 | +CREATE OR REPLACE ICEBERG TABLE VEHICLE_REGISTRY ( |
| 94 | + VEHICLE_ID STRING NOT NULL, |
| 95 | + VIN STRING, |
| 96 | + MAKE STRING, |
| 97 | + MODEL STRING, |
| 98 | + YEAR INT, |
| 99 | + LICENSE_PLATE STRING, |
| 100 | + DRIVER_ID STRING, |
| 101 | + DRIVER_NAME STRING, |
| 102 | + DRIVER_EMAIL STRING, |
| 103 | + DRIVER_PHONE STRING, |
| 104 | + FLEET_REGION STRING, |
| 105 | + VEHICLE_STATUS STRING DEFAULT 'ACTIVE', |
| 106 | + REGISTRATION_DATE DATE, |
| 107 | + LAST_SERVICE_DATE DATE, |
| 108 | + CREATED_AT TIMESTAMP_LTZ(6), |
| 109 | + UPDATED_AT TIMESTAMP_LTZ(6)) |
| 110 | + EXTERNAL_VOLUME = 'FLEET_ICEBERG_VOL' |
| 111 | + CATALOG = 'SNOWFLAKE' |
| 112 | + BASE_LOCATION = 'FLEET_ANALYTICS_DB/RAW/VEHICLE_REGISTRY' |
| 113 | + COMMENT = 'Master data for vehicles and drivers (contains PII)'; |
| 114 | + |
| 115 | +-- ============================================ |
| 116 | +-- Table 6: API_WEATHER_DATA |
| 117 | +-- Purpose: Weather data from public API with VARIANT |
| 118 | +-- Note: Using TIMESTAMP(6) for microsecond precision (Spark compatibility) |
| 119 | +-- ============================================ |
| 120 | +CREATE OR REPLACE ICEBERG TABLE API_WEATHER_DATA ( |
| 121 | + CITY_NAME STRING NOT NULL, |
| 122 | + LATITUDE FLOAT NOT NULL, |
| 123 | + LONGITUDE FLOAT NOT NULL, |
| 124 | + WEATHER_DATA VARIANT NOT NULL, |
| 125 | + INGESTED_AT TIMESTAMP_LTZ(6)) |
| 126 | + EXTERNAL_VOLUME = 'FLEET_ICEBERG_VOL' |
| 127 | + CATALOG = 'SNOWFLAKE' |
| 128 | + BASE_LOCATION = 'FLEET_ANALYTICS_DB/RAW/API_WEATHER_DATA' |
| 129 | + COMMENT = 'Weather data fetched from Open-Meteo API'; |
| 130 | + |
| 131 | +-- ============================================ |
| 132 | +-- Load Sample Data into VEHICLE_REGISTRY |
| 133 | +-- ============================================ |
| 134 | +INSERT INTO VEHICLE_REGISTRY ( |
| 135 | + VEHICLE_ID, VIN, MAKE, MODEL, YEAR, LICENSE_PLATE, |
| 136 | + DRIVER_ID, DRIVER_NAME, DRIVER_EMAIL, DRIVER_PHONE, |
| 137 | + FLEET_REGION, VEHICLE_STATUS, REGISTRATION_DATE, LAST_SERVICE_DATE |
| 138 | +) |
| 139 | +SELECT |
| 140 | + 'VH-' || LPAD(SEQ4()::VARCHAR, 4, '0') AS VEHICLE_ID, |
| 141 | + UPPER(RANDSTR(17, RANDOM())) AS VIN, |
| 142 | + CASE MOD(SEQ4(), 5) |
| 143 | + WHEN 0 THEN 'Ford' |
| 144 | + WHEN 1 THEN 'Chevrolet' |
| 145 | + WHEN 2 THEN 'Toyota' |
| 146 | + WHEN 3 THEN 'Ram' |
| 147 | + ELSE 'Freightliner' |
| 148 | + END AS MAKE, |
| 149 | + CASE MOD(SEQ4(), 5) |
| 150 | + WHEN 0 THEN 'Transit' |
| 151 | + WHEN 1 THEN 'Express' |
| 152 | + WHEN 2 THEN 'Tacoma' |
| 153 | + WHEN 3 THEN 'ProMaster' |
| 154 | + ELSE 'Cascadia' |
| 155 | + END AS MODEL, |
| 156 | + 2020 + MOD(SEQ4(), 6) AS YEAR, |
| 157 | + UPPER(RANDSTR(3, RANDOM())) || '-' || LPAD(MOD(SEQ4() * 7, 9999)::VARCHAR, 4, '0') AS LICENSE_PLATE, |
| 158 | + 'DRV-' || LPAD(SEQ4()::VARCHAR, 4, '0') AS DRIVER_ID, |
| 159 | + CASE MOD(SEQ4(), 10) |
| 160 | + WHEN 0 THEN 'John Smith' |
| 161 | + WHEN 1 THEN 'Sarah Johnson' |
| 162 | + WHEN 2 THEN 'Michael Brown' |
| 163 | + WHEN 3 THEN 'Emily Davis' |
| 164 | + WHEN 4 THEN 'David Wilson' |
| 165 | + WHEN 5 THEN 'Jessica Taylor' |
| 166 | + WHEN 6 THEN 'Christopher Lee' |
| 167 | + WHEN 7 THEN 'Amanda Martinez' |
| 168 | + WHEN 8 THEN 'Daniel Anderson' |
| 169 | + ELSE 'Jennifer Garcia' |
| 170 | + END AS DRIVER_NAME, |
| 171 | + LOWER(SPLIT_PART(DRIVER_NAME, ' ', 1)) || '.' || LOWER(SPLIT_PART(DRIVER_NAME, ' ', 2)) || '@fleetco.com' AS DRIVER_EMAIL, |
| 172 | + '+1-555-' || LPAD(MOD(SEQ4() * 13, 9999)::VARCHAR, 4, '0') AS DRIVER_PHONE, |
| 173 | + CASE MOD(SEQ4(), 5) |
| 174 | + WHEN 0 THEN 'Pacific Northwest' |
| 175 | + WHEN 1 THEN 'California' |
| 176 | + WHEN 2 THEN 'Mountain West' |
| 177 | + WHEN 3 THEN 'Midwest' |
| 178 | + ELSE 'Northeast' |
| 179 | + END AS FLEET_REGION, |
| 180 | + CASE WHEN MOD(SEQ4(), 20) = 0 THEN 'MAINTENANCE' ELSE 'ACTIVE' END AS VEHICLE_STATUS, |
| 181 | + DATEADD('day', -MOD(SEQ4() * 17, 1000), CURRENT_DATE()) AS REGISTRATION_DATE, |
| 182 | + DATEADD('day', -MOD(SEQ4() * 7, 90), CURRENT_DATE()) AS LAST_SERVICE_DATE |
| 183 | +FROM TABLE(GENERATOR(ROWCOUNT => 100)); |
| 184 | + |
| 185 | +-- ============================================ |
| 186 | +-- Load Sample Data into SENSOR_READINGS |
| 187 | +-- ============================================ |
| 188 | +INSERT INTO SENSOR_READINGS ( |
| 189 | + READING_ID, VEHICLE_ID, READING_TIMESTAMP, |
| 190 | + ENGINE_TEMP_F, OIL_PRESSURE_PSI, BATTERY_VOLTAGE, |
| 191 | + FUEL_CONSUMPTION_GPH, TIRE_PRESSURE_FL, TIRE_PRESSURE_FR, |
| 192 | + TIRE_PRESSURE_RL, TIRE_PRESSURE_RR, ODOMETER_MILES |
| 193 | +) |
| 194 | +SELECT |
| 195 | + UUID_STRING() AS READING_ID, |
| 196 | + 'VH-' || LPAD(MOD(SEQ4(), 100)::VARCHAR, 4, '0') AS VEHICLE_ID, |
| 197 | + -- Spread readings over 30 days to overlap with maintenance logs |
| 198 | + TIMESTAMPADD('minute', -SEQ4() * 5, CURRENT_TIMESTAMP())::TIMESTAMP_NTZ(6) AS READING_TIMESTAMP, |
| 199 | + 180 + RANDOM() / POWER(10, 18) * 50 AS ENGINE_TEMP_F, |
| 200 | + 30 + RANDOM() / POWER(10, 18) * 30 AS OIL_PRESSURE_PSI, |
| 201 | + 11.5 + RANDOM() / POWER(10, 18) * 3 AS BATTERY_VOLTAGE, |
| 202 | + 2 + RANDOM() / POWER(10, 18) * 8 AS FUEL_CONSUMPTION_GPH, |
| 203 | + 32 + RANDOM() / POWER(10, 18) * 6 AS TIRE_PRESSURE_FL, |
| 204 | + 32 + RANDOM() / POWER(10, 18) * 6 AS TIRE_PRESSURE_FR, |
| 205 | + 32 + RANDOM() / POWER(10, 18) * 6 AS TIRE_PRESSURE_RL, |
| 206 | + 32 + RANDOM() / POWER(10, 18) * 6 AS TIRE_PRESSURE_RR, |
| 207 | + 10000 + SEQ4() * 50 + RANDOM() / POWER(10, 18) * 100 AS ODOMETER_MILES |
| 208 | +FROM TABLE(GENERATOR(ROWCOUNT => 10000)); |
| 209 | + |
| 210 | +-- ============================================ |
| 211 | +-- Load Sample Data into VEHICLE_LOCATIONS |
| 212 | +-- ============================================ |
| 213 | +-- Using CTEs to generate realistic coordinates around major US cities |
| 214 | +INSERT INTO VEHICLE_LOCATIONS ( |
| 215 | + LOCATION_ID, VEHICLE_ID, LOCATION_TIMESTAMP, |
| 216 | + LATITUDE, LONGITUDE, LOCATION_POINT, |
| 217 | + ALTITUDE_FT, HEADING_DEGREES, SPEED_MPH, FLEET_REGION |
| 218 | +) |
| 219 | +WITH city_centers AS ( |
| 220 | + -- Define city centers for each region (lat, lon, city, region) |
| 221 | + SELECT * FROM (VALUES |
| 222 | + (47.6062, -122.3321, 'Seattle', 'Pacific Northwest'), |
| 223 | + (45.5152, -122.6784, 'Portland', 'Pacific Northwest'), |
| 224 | + (34.0522, -118.2437, 'Los Angeles', 'California'), |
| 225 | + (37.7749, -122.4194, 'San Francisco', 'California'), |
| 226 | + (32.7157, -117.1611, 'San Diego', 'California'), |
| 227 | + (39.7392, -104.9903, 'Denver', 'Mountain West'), |
| 228 | + (40.7608, -111.8910, 'Salt Lake City', 'Mountain West'), |
| 229 | + (33.4484, -112.0740, 'Phoenix', 'Mountain West'), |
| 230 | + (41.8781, -87.6298, 'Chicago', 'Midwest'), |
| 231 | + (44.9778, -93.2650, 'Minneapolis', 'Midwest'), |
| 232 | + (39.0997, -94.5786, 'Kansas City', 'Midwest'), |
| 233 | + (40.7128, -74.0060, 'New York', 'Northeast'), |
| 234 | + (42.3601, -71.0589, 'Boston', 'Northeast'), |
| 235 | + (39.9526, -75.1652, 'Philadelphia', 'Northeast'), |
| 236 | + (38.9072, -77.0369, 'Washington DC', 'Northeast'), |
| 237 | + (29.7604, -95.3698, 'Houston', 'Texas'), |
| 238 | + (32.7767, -96.7970, 'Dallas', 'Texas'), |
| 239 | + (30.2672, -97.7431, 'Austin', 'Texas'), |
| 240 | + (33.4484, -84.3880, 'Atlanta', 'Southeast'), |
| 241 | + (25.7617, -80.1918, 'Miami', 'Southeast') |
| 242 | + ) AS t(base_lat, base_lon, city, region) |
| 243 | +), |
| 244 | +numbered_cities AS ( |
| 245 | + SELECT *, ROW_NUMBER() OVER (ORDER BY base_lat) - 1 AS city_idx |
| 246 | + FROM city_centers |
| 247 | +), |
| 248 | +base_data AS ( |
| 249 | + SELECT |
| 250 | + SEQ4() AS seq, |
| 251 | + 'VH-' || LPAD(MOD(SEQ4(), 100)::VARCHAR, 4, '0') AS VEHICLE_ID, |
| 252 | + TIMESTAMPADD('second', -SEQ4() * 30, CURRENT_TIMESTAMP())::TIMESTAMP_NTZ AS LOCATION_TIMESTAMP, |
| 253 | + MOD(SEQ4(), 20) AS city_selector |
| 254 | + FROM TABLE(GENERATOR(ROWCOUNT => 5000)) |
| 255 | +) |
| 256 | +SELECT |
| 257 | + UUID_STRING() AS LOCATION_ID, |
| 258 | + b.VEHICLE_ID, |
| 259 | + b.LOCATION_TIMESTAMP, |
| 260 | + -- Add random offset within ~30 miles (0.5 degrees) of city center |
| 261 | + c.base_lat + (UNIFORM(-50, 50, RANDOM()) / 100.0) AS LATITUDE, |
| 262 | + c.base_lon + (UNIFORM(-50, 50, RANDOM()) / 100.0) AS LONGITUDE, |
| 263 | + ST_POINT( |
| 264 | + c.base_lon + (UNIFORM(-50, 50, RANDOM()) / 100.0), |
| 265 | + c.base_lat + (UNIFORM(-50, 50, RANDOM()) / 100.0) |
| 266 | + ) AS LOCATION_POINT, |
| 267 | + UNIFORM(100, 5000, RANDOM())::FLOAT AS ALTITUDE_FT, |
| 268 | + UNIFORM(0, 359, RANDOM())::FLOAT AS HEADING_DEGREES, |
| 269 | + UNIFORM(0, 80, RANDOM())::FLOAT AS SPEED_MPH, |
| 270 | + c.region AS FLEET_REGION |
| 271 | +FROM base_data b |
| 272 | +JOIN numbered_cities c ON b.city_selector = c.city_idx; |
| 273 | + |
| 274 | +-- Verify tables |
| 275 | +SHOW ICEBERG TABLES IN SCHEMA RAW; |
| 276 | + |
| 277 | +SELECT 'VEHICLE_REGISTRY' AS TABLE_NAME, COUNT(*) AS ROW_COUNT FROM VEHICLE_REGISTRY |
| 278 | +UNION ALL |
| 279 | +SELECT 'SENSOR_READINGS', COUNT(*) FROM SENSOR_READINGS |
| 280 | +UNION ALL |
| 281 | +SELECT 'VEHICLE_LOCATIONS', COUNT(*) FROM VEHICLE_LOCATIONS; |
| 282 | + |
| 283 | +SELECT 'Iceberg tables created and sample data loaded!' AS STATUS; |
0 commit comments