Skip to content

Commit 92b41b8

Browse files
committed
Initialize hardware store case study
1 parent ca59679 commit 92b41b8

File tree

5 files changed

+431
-0
lines changed

5 files changed

+431
-0
lines changed

hardware_store/README.md

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# Hardware Store sample data
2+
3+
This sample dataset represents a chain of hardware stores managing their inventory and rentals.
4+
5+
```mermaid
6+
%% https://mermaid.js.org/syntax/entityRelationshipDiagram.html
7+
8+
erDiagram
9+
%% https://mermaid.js.org/syntax/entityRelationshipDiagram.html
10+
11+
store_locations {
12+
BIGINT id PK
13+
string name
14+
string address
15+
}
16+
17+
customers {
18+
BIGINT id PK
19+
string first_name
20+
string last_name
21+
string email
22+
string phone
23+
string address
24+
}
25+
26+
assets {
27+
BIGINT id PK
28+
string name
29+
string serial_number
30+
NUMERIC rental_price
31+
NUMERIC sale_price
32+
string rental_period
33+
string location
34+
BIGINT store_id FK
35+
}
36+
37+
transactions {
38+
BIGINT id PK
39+
BIGINT asset_id FK
40+
BIGINT customer_id FK
41+
string transaction_type
42+
TIMESTAMP transaction_date
43+
NUMERIC total_charge
44+
string note
45+
}
46+
47+
rentals {
48+
BIGINT id PK
49+
BIGINT transaction_id FK
50+
TIMESTAMP rental_start
51+
TIMESTAMP rental_end
52+
TIMESTAMP time_out
53+
TIMESTAMP time_in
54+
INTERVAL rental_time
55+
}
56+
57+
%% Relationships
58+
%% See: https://mermaid.js.org/syntax/entityRelationshipDiagram.html#relationship-syntax
59+
assets ||--|{ store_locations : "store_id"
60+
transactions ||--|| assets : "asset_id"
61+
transactions ||--|{ customers : "customer_id"
62+
rentals ||--|| transactions : "transaction_id"
63+
64+
```
65+
66+
67+
## Loading Data
68+
69+
The generated SQL file, `generate_data/load_data.sql`, contains all the necessary COPY commands to import data into your database. The data (and the load data file) are produced by the `generate_data.py` file, which can be adjusted and re-run to alter the data if needed.
70+
71+
Load the data into a locally-running Mathesar instance like this:
72+
73+
```shell
74+
# First load the schema and tables
75+
docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < schema.sql
76+
# Then the sample data
77+
docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < generated_data.sql
78+
```
79+
80+
## Development
81+
82+
The only requirement is to install dependencies with `pip install -r requirements.txt`.

hardware_store/generate_data.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
import os
2+
import random
3+
from faker import Faker
4+
import faker_commerce
5+
from datetime import datetime
6+
7+
fake = Faker()
8+
fake.add_provider(faker_commerce.Provider)
9+
10+
# Number of rows to generate
11+
NUM_STORES = 5
12+
NUM_CUSTOMERS = 20
13+
NUM_ASSETS = 50
14+
NUM_TRANSACTIONS = 60
15+
NUM_RENTALS = 30
16+
17+
# Helper function to clean values for COPY
18+
def clean_value(value):
19+
if value is None:
20+
return r"\N" # PostgreSQL NULL
21+
if isinstance(value, str):
22+
return value.replace("\t", " ").replace("\n", " ")
23+
return str(value)
24+
25+
# Table Data Generation
26+
def generate_store_locations():
27+
for i in range(1, NUM_STORES + 1):
28+
yield [i, fake.company(), fake.address()]
29+
30+
def generate_customers():
31+
for i in range(1, NUM_CUSTOMERS + 1):
32+
yield [
33+
i,
34+
fake.first_name(),
35+
fake.last_name(),
36+
fake.email(),
37+
fake.phone_number(),
38+
fake.address(),
39+
]
40+
41+
def generate_assets(store_ids):
42+
for i in range(1, NUM_ASSETS + 1):
43+
rental_period = random.choice(["daily", "weekly", "monthly"])
44+
rental_price = round(random.uniform(5, 100), 2)
45+
sale_price = (
46+
round(rental_price * random.uniform(0.5, 0.8), 2) # Discounted sale price
47+
if random.random() < 0.2 else None
48+
)
49+
yield [
50+
i,
51+
fake.ecommerce_name(),
52+
fake.unique.ean13(),
53+
rental_price,
54+
sale_price,
55+
rental_period,
56+
f"Aisle {random.randint(1, 20)} - Shelf {random.randint(1, 10)}",
57+
random.choice(store_ids),
58+
]
59+
60+
def generate_transactions(asset_ids, customer_ids):
61+
for i in range(1, NUM_TRANSACTIONS + 1):
62+
asset_id = random.choice(asset_ids)
63+
customer_id = random.choice(customer_ids)
64+
transaction_type = random.choice(["Sale", "Rental", "Return"])
65+
transaction_date = fake.date_time_this_year()
66+
total_charge = round(random.uniform(10, 500), 2)
67+
yield [i, asset_id, customer_id, transaction_type, transaction_date, total_charge, fake.sentence()]
68+
69+
def generate_rentals(transaction_ids):
70+
for i in range(1, NUM_RENTALS + 1):
71+
transaction_id = random.choice(transaction_ids)
72+
rental_start = fake.date_time_this_year()
73+
rental_end = fake.date_time_between_dates(datetime_start=rental_start)
74+
rental_time = rental_end - rental_start
75+
yield [i, transaction_id, rental_start, rental_end, rental_start, rental_end, rental_time]
76+
77+
# Generate Data
78+
store_ids = list(range(1, NUM_STORES + 1))
79+
customer_ids = list(range(1, NUM_CUSTOMERS + 1))
80+
asset_ids = list(range(1, NUM_ASSETS + 1))
81+
transaction_ids = list(range(1, NUM_TRANSACTIONS + 1))
82+
83+
tables = {
84+
"store_locations": generate_store_locations(),
85+
"customers": generate_customers(),
86+
"assets": generate_assets(store_ids),
87+
"transactions": generate_transactions(asset_ids, customer_ids),
88+
"rentals": generate_rentals(transaction_ids),
89+
}
90+
91+
# Write to SQL file
92+
sql_file = os.path.join(os.getcwd(), "generated_data.sql")
93+
94+
with open(sql_file, "w") as f:
95+
f.write('SET search_path="Hardware Store";\n\n')
96+
97+
for table_name, generator in tables.items():
98+
f.write(f"COPY {table_name} FROM stdin;\n")
99+
for row in generator:
100+
cleaned_row = "\t".join(map(clean_value, row))
101+
f.write(f"{cleaned_row}\n")
102+
f.write("\\.\n\n")
103+
104+
print(f"SQL file generated: {sql_file}")

0 commit comments

Comments
 (0)