Skip to content

Commit 80e4201

Browse files
Merge pull request #2941 from Kevinsnowflake/master
Create create-declarative-data-pipelines-with-dynamic-tables guide
2 parents 7d66d13 + ba9b691 commit 80e4201

File tree

14 files changed

+674
-20
lines changed

14 files changed

+674
-20
lines changed
93.9 KB
Loading
171 KB
Loading
114 KB
Loading
140 KB
Loading
60.6 KB
Loading
72.4 KB
Loading
80.9 KB
Loading
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
-- Select the account you using
2+
use role accountadmin;
3+
4+
-- Createing a warehouse to store our data and perfrom computations
5+
create warehouse if not exists compute_wh
6+
with warehouse_size = 'large'
7+
auto_suspend = 300
8+
auto_resume = true;
9+
10+
-- Creating two databases, one for raw data and once for transformed models
11+
create database if not exists raw_db;
12+
create database if not exists analytics_db;
13+
use database raw_db;
14+
15+
-- Create a Python UDTF that can be used to generate fake customer data
16+
create or replace function gen_cust_info(num_records number)
17+
returns table (custid number(10), cname varchar(100), spendlimit number(10,2))
18+
language python
19+
runtime_version=3.10
20+
handler='CustTab'
21+
packages = ('Faker')
22+
as $$
23+
from faker import Faker
24+
import random
25+
26+
fake = Faker()
27+
# Generate a list of customers
28+
29+
class CustTab:
30+
# Generate multiple customer records
31+
def process(self, num_records):
32+
customer_id = 1000 # Starting customer ID
33+
for _ in range(num_records):
34+
custid = customer_id + 1
35+
cname = fake.name()
36+
spendlimit = round(random.uniform(1000, 10000),2)
37+
customer_id += 1
38+
yield (custid,cname,spendlimit)
39+
40+
$$;
41+
42+
-- Create a Python UDTF that can be used to generate an fake inventory of products
43+
create or replace function gen_prod_inv(num_records number)
44+
returns table (pid number(10), pname varchar(100), stock number(10,2), stockdate date)
45+
language python
46+
runtime_version=3.10
47+
handler='ProdTab'
48+
packages = ('Faker')
49+
as $$
50+
from faker import Faker
51+
import random
52+
from datetime import datetime, timedelta
53+
fake = Faker()
54+
55+
class ProdTab:
56+
# Generate multiple product records
57+
def process(self, num_records):
58+
product_id = 100 # Starting product ID
59+
for _ in range(num_records):
60+
pid = product_id + 1
61+
pname = fake.catch_phrase()
62+
stock = round(random.uniform(500, 1000),0)
63+
# Get the current date
64+
current_date = datetime.now()
65+
66+
# Calculate the maximum date (3 months from now)
67+
min_date = current_date - timedelta(days=90)
68+
69+
# Generate a random date within the date range
70+
stockdate = fake.date_between_dates(min_date,current_date)
71+
72+
product_id += 1
73+
yield (pid,pname,stock,stockdate)
74+
75+
$$;
76+
77+
-- Create a Python UDTF that can be used to generate fake customer order data
78+
create or replace function gen_cust_purchase(num_records number,ndays number)
79+
returns table (custid number(10), purchase variant)
80+
language python
81+
runtime_version=3.10
82+
handler='genCustPurchase'
83+
packages = ('Faker')
84+
as $$
85+
from faker import Faker
86+
import random
87+
from datetime import datetime, timedelta
88+
89+
fake = Faker()
90+
91+
class genCustPurchase:
92+
# Generate multiple customer purchase records
93+
def process(self, num_records,ndays):
94+
for _ in range(num_records):
95+
c_id = fake.random_int(min=1001, max=1999)
96+
97+
#print(c_id)
98+
customer_purchase = {
99+
'custid': c_id,
100+
'purchased': []
101+
}
102+
# Get the current date
103+
current_date = datetime.now()
104+
105+
# Calculate the maximum date (days from now)
106+
min_date = current_date - timedelta(days=ndays)
107+
108+
# Generate a random date within the date range
109+
pdate = fake.date_between_dates(min_date,current_date)
110+
111+
purchase = {
112+
'prodid': fake.random_int(min=101, max=199),
113+
'quantity': fake.random_int(min=1, max=5),
114+
'purchase_amount': round(random.uniform(10, 1000),2),
115+
'purchase_date': pdate
116+
}
117+
customer_purchase['purchased'].append(purchase)
118+
119+
#customer_purchases.append(customer_purchase)
120+
yield (c_id,purchase)
121+
122+
$$;
123+
124+
-- Create the customers table using the UDTF for fake customer data
125+
create or replace table customers as select * from table(gen_cust_info(1000)) order by 1;
126+
-- Create the products table using the UDTF for fake product data
127+
create or replace table products as select * from table(gen_prod_inv(100)) order by 1;
128+
-- Create an orders table using the UDTF for fake customer order data
129+
create or replace table orders as select * from table(gen_cust_purchase(10000,10));
130+
131+
-- Preview customer information table, each customer has spending limits
132+
select * from customers limit 10;
133+
-- Preview product table, each product has stock level from fulfilment day
134+
select * from products limit 10;
135+
-- Preview sale orders for products purchsaed online by various customers
136+
select * from orders limit 10;
137+
138+
-- Successful completeion message
139+
select 'Congratulations! Snowflake Data Engineering workshop setup has completed successfully!' as status;
51.3 KB
Loading
41 KB
Loading

0 commit comments

Comments
 (0)