Skip to content

Commit 8c35f4b

Browse files
committed
fixed shell scripts
1 parent d81ef85 commit 8c35f4b

File tree

3 files changed

+47
-43
lines changed

3 files changed

+47
-43
lines changed

datagen_HAWQ/README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,3 +4,8 @@
44
* install fake-factory (tarball included) and numpy. you can use `pip install <package>` or unzip the package tarballs and run `python setup.py build; python setup.py install` in the package directory.
55
* to test usage, run 'python EXECUTE.py' for to generate 1 customer's transactions
66
* to create a dataset trans_fact.csv, run 'sh GENERATE_DATASET.sh N' where N is the desired number of customers
7+
* to run in HAWQ
8+
* move this directory to the same location on each HAWQ host
9+
* run ./process_datagen.sh <# of customers> to generate data in HAWQ ('trans_fact' table and 'spending' view)
10+
* roughly 2000 customers per 1 GB of data as a default
11+
* note/TODO: as a default, the travel values are set too high

datagen_HAWQ/hawq.sql

Lines changed: 40 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -46,20 +46,20 @@ amt float8)
4646
DISTRIBUTED RANDOMLY;
4747

4848
DROP FUNCTION IF EXISTS customer(integer, text) CASCADE;
49-
CREATE OR REPLACE FUNCTION customer(repeat text)
49+
CREATE OR REPLACE FUNCTION customer(repeat text, dir text)
5050
RETURNS SETOF transaction
5151
AS
52-
$$
52+
$$
5353
import cPickle as pickle
5454
import faker
5555
from faker import Faker
5656
import numpy as np
5757
import sys
5858
import os
5959

60-
sys.path.append('/demo/103_datagen_HAWQ/datagen_HAWQ/src')
61-
sys.path.append('/demo/103_datagen_HAWQ/datagen_HAWQ')
62-
os.chdir('/demo/103_datagen_HAWQ/datagen_HAWQ')
60+
sys.path.append(dir + '/src')
61+
sys.path.append(dir)
62+
os.chdir(dir)
6363

6464
# not currently working: repeatable data set
6565
# if repeat.lower() != 'False':
@@ -72,7 +72,7 @@ $$
7272
#for a in alltrans:
7373
# yield(a)
7474
return [a for a in alltrans]
75-
$$
75+
$$
7676
LANGUAGE PLPYTHONU;
7777

7878
-- [pivhdsne:datagen_HAWQ]$ time psql -f hawq.sql -v customers=2
@@ -82,55 +82,55 @@ SELECT generate_series(1, :customers);
8282

8383
-- TODO: look into seed
8484
INSERT INTO trans_fact
85-
SELECT (t).*
86-
FROM
87-
(SELECT (customer('False')) as t
85+
SELECT (t).*
86+
FROM
87+
(SELECT (customer('False', :pwd)) as t
8888
FROM (
89-
SELECT n
90-
FROM numbers
89+
SELECT n
90+
FROM numbers
9191
)q
9292
)q2;
9393

9494
CREATE VIEW spending AS (
95-
SELECT ssn, age, gender, count(*) as num_trans,
96-
sum(food_dining) as food_dining,
97-
sum(utilities) as utilities,
98-
sum(grocery_net) as grocery_net,
99-
sum(home) as home,
100-
sum(pharmacy) as pharmacy,
101-
sum(shopping_pos) as shopping_pos,
102-
sum(kids_pets) as kids_pets,
103-
sum(personal_care) as personal_care,
104-
sum(misc_pos) as misc_pos,
105-
sum(gas_transport) as gas_transport,
106-
sum(misc_net) as misc_net,
107-
sum(health_fitness) as health_fitness,
108-
sum(shopping_net) as shopping_net,
95+
SELECT ssn, age, gender, count(*) as num_trans,
96+
sum(food_dining) as food_dining,
97+
sum(utilities) as utilities,
98+
sum(grocery_net) as grocery_net,
99+
sum(home) as home,
100+
sum(pharmacy) as pharmacy,
101+
sum(shopping_pos) as shopping_pos,
102+
sum(kids_pets) as kids_pets,
103+
sum(personal_care) as personal_care,
104+
sum(misc_pos) as misc_pos,
105+
sum(gas_transport) as gas_transport,
106+
sum(misc_net) as misc_net,
107+
sum(health_fitness) as health_fitness,
108+
sum(shopping_net) as shopping_net,
109109
sum(travel) as travel
110110
FROM(
111111
-- create case statements
112-
SELECT ssn as ssn, extract(years from age(NOW(),dob)) as age,
112+
SELECT ssn as ssn, extract(years from age(NOW(),dob)) as age,
113113
case when gender = 'M' then 0 else 1 end as gender,
114114
case when category = 'food_dining' then (amt) else 0 end as food_dining,
115-
case when category = 'utilities' then (amt) else 0 end as utilities,
116-
case when category = 'grocery_net' then (amt) else 0 end as grocery_net,
117-
case when category = 'home' then (amt) else 0 end as home,
118-
case when category = 'pharmacy' then (amt) else 0 end as pharmacy,
119-
case when category = 'shopping_pos' then (amt) else 0 end as shopping_pos,
120-
case when category = 'kids_pets' then (amt) else 0 end as kids_pets,
121-
case when category = 'personal_care' then (amt) else 0 end as personal_care,
122-
case when category = 'misc_pos' then (amt) else 0 end as misc_pos,
123-
case when category = 'gas_transport' then (amt) else 0 end as gas_transport,
124-
case when category = 'misc_net' then (amt) else 0 end as misc_net,
125-
case when category = 'health_fitness' then (amt) else 0 end as health_fitness,
126-
case when category = 'shopping_net' then (amt) else 0 end as shopping_net,
115+
case when category = 'utilities' then (amt) else 0 end as utilities,
116+
case when category = 'grocery_net' then (amt) else 0 end as grocery_net,
117+
case when category = 'home' then (amt) else 0 end as home,
118+
case when category = 'pharmacy' then (amt) else 0 end as pharmacy,
119+
case when category = 'shopping_pos' then (amt) else 0 end as shopping_pos,
120+
case when category = 'kids_pets' then (amt) else 0 end as kids_pets,
121+
case when category = 'personal_care' then (amt) else 0 end as personal_care,
122+
case when category = 'misc_pos' then (amt) else 0 end as misc_pos,
123+
case when category = 'gas_transport' then (amt) else 0 end as gas_transport,
124+
case when category = 'misc_net' then (amt) else 0 end as misc_net,
125+
case when category = 'health_fitness' then (amt) else 0 end as health_fitness,
126+
case when category = 'shopping_net' then (amt) else 0 end as shopping_net,
127127
case when category = 'travel' then (amt) else 0 end as travel
128128

129129
FROM trans_fact) GROUPED
130130
GROUP BY ssn, age, gender
131131
);
132132

133-
VACUUM ANALYZE trans_fact;
133+
ANALYZE trans_fact;
134134

135135
SELECT nspname || '.' || relname AS "relation",
136136
pg_size_pretty(pg_relation_size(C.oid)) AS "size"
@@ -140,5 +140,4 @@ SELECT nspname || '.' || relname AS "relation",
140140
ORDER BY pg_relation_size(C.oid) DESC
141141
LIMIT 20;
142142

143-
select now();
144-
143+
select now();

datagen_HAWQ/process_datagen.sh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@ if [ -z $1 ]
44
then
55
echo 'enter number of customers as command line argument'
66
else
7-
psql -U gpadmin -p 10432 -f hawq.sql -v customers=$1
7+
psql -U gpadmin -p 5432 -f hawq.sql -v customers=$1
88
export PGPASSWORD=gpadmin
9-
psql -p 10432 -U gpadmin -c "VACUUM ANALYZE trans_fact"
9+
psql -p 5432 -U gpadmin -c "VACUUM ANALYZE trans_fact"
1010
fi
1111

0 commit comments

Comments
 (0)