Skip to content

Commit eb94d8f

Browse files
authored
Cleaned up comments
1 parent 425f855 commit eb94d8f

File tree

1 file changed

+18
-52
lines changed

1 file changed

+18
-52
lines changed

examples/notebooks/retail_data_generation.py

Lines changed: 18 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@
6060
spark.sql(f"USE CATALOG {CATALOG_NAME}")
6161
spark.sql(f"USE SCHEMA {SCHEMA_NAME}")
6262

63-
print(f"Generating data in: {CATALOG_NAME}.{SCHEMA_NAME}")
64-
print(f"📊 Total records to generate: {NUM_PRODUCTS + NUM_DISTRIBUTION_CENTERS + NUM_STORES + NUM_ORDERS + NUM_INVENTORY_RECORDS + NUM_SHIPMENTS:,}")
63+
print(f"Generating data in: {CATALOG_NAME}.{SCHEMA_NAME}")
64+
print(f"Total records to generate: {NUM_PRODUCTS + NUM_DISTRIBUTION_CENTERS + NUM_STORES + NUM_ORDERS + NUM_INVENTORY_RECORDS + NUM_SHIPMENTS:,}")
6565

6666
# COMMAND ----------
6767

@@ -127,7 +127,6 @@
127127
# Write to table
128128
df_products.write.mode("overwrite").saveAsTable("products")
129129

130-
# print(f"✅ Created products table with {df_products.count():,} records")
131130
display(df_products.limit(10))
132131

133132
# COMMAND ----------
@@ -179,7 +178,7 @@
179178
df_distribution_centers = distribution_center_spec.build()
180179
df_distribution_centers.write.mode("overwrite").saveAsTable("distribution_centers")
181180

182-
print(f"Created distribution_centers table with {df_distribution_centers.count():,} records")
181+
print(f"Created distribution_centers table with {df_distribution_centers.count():,} records")
183182
display(df_distribution_centers.limit(10))
184183

185184
# COMMAND ----------
@@ -227,8 +226,8 @@
227226
df_stores = stores_spec.build()
228227
df_stores.write.mode("overwrite").saveAsTable("stores")
229228

230-
print(f"Created stores table with {df_stores.count():,} records")
231-
print(f"🔗 Each store is linked to a distribution_center via distribution_center_id foreign key")
229+
print(f"Created stores table with {df_stores.count():,} records")
230+
print(f"Each store is linked to a distribution_center via distribution_center_id foreign key")
232231
display(df_stores.limit(10))
233232

234233
# COMMAND ----------
@@ -335,8 +334,8 @@
335334

336335
df_orders.write.mode("overwrite").saveAsTable("orders")
337336

338-
print(f"Created orders table with {df_orders.count():,} records")
339-
print(f"📊 Status distribution:")
337+
print(f"Created orders table with {df_orders.count():,} records")
338+
print(f"Order Status distribution:")
340339
df_orders.groupBy("status").count().orderBy("status").show()
341340

342341
# COMMAND ----------
@@ -434,10 +433,10 @@
434433

435434
df_inventory.write.mode("overwrite").saveAsTable("inventory")
436435

437-
print(f"Created inventory table with {df_inventory.count():,} records")
438-
print(f"📊 Location type distribution:")
436+
print(f"Created inventory table with {df_inventory.count():,} records")
437+
print(f"Location type distribution:")
439438
df_inventory.groupBy("location_type").count().show()
440-
print(f"⚠️ Stockout risk distribution:")
439+
print(f"Stockout risk distribution:")
441440
df_inventory.groupBy("stockout_risk").count().orderBy("stockout_risk").show()
442441

443442
# COMMAND ----------
@@ -570,10 +569,10 @@
570569

571570
df_shipments.write.mode("overwrite").saveAsTable("shipments")
572571

573-
print(f"Created shipments table with {df_shipments.count():,} records")
574-
print(f"🚚 Transport mode distribution:")
572+
print(f"Created shipments table with {df_shipments.count():,} records")
573+
print(f"Transport mode distribution:")
575574
df_shipments.groupBy("transport_mode").count().orderBy(F.desc("count")).show()
576-
print(f"📦 Shipment status distribution:")
575+
print(f"Shipment status distribution:")
577576
df_shipments.groupBy("status").count().orderBy(F.desc("count")).show()
578577
display(df_shipments)
579578

@@ -584,35 +583,30 @@
584583
# MAGIC
585584
# MAGIC This dataset enables the following analytics use cases:
586585
# MAGIC
587-
# MAGIC ### 📦 Inventory Optimization
588-
# MAGIC - Multi-echelon inventory visibility across distribution_centers and stores
586+
# MAGIC ### Inventory Optimization
589587
# MAGIC - Stockout risk identification and prediction
590588
# MAGIC - Days of supply analysis by product/location
591589
# MAGIC - Slow-moving inventory identification
592590
# MAGIC
593-
# MAGIC ### 🚚 Logistics & Transportation
591+
# MAGIC ### Logistics & Transportation
594592
# MAGIC - Carrier performance scorecards (OTD%, cost, speed)
595593
# MAGIC - Route optimization opportunities
596594
# MAGIC - Transport mode analysis (cost vs speed tradeoffs)
597-
# MAGIC - Delay root cause analysis
598595
# MAGIC
599-
# MAGIC ### 🏭 Order Planning
596+
# MAGIC ### Order Planning
600597
# MAGIC - Order schedule optimization
601598
# MAGIC - Line efficiency tracking
602599
# MAGIC - Capacity planning and utilization
603-
# MAGIC - Order-to-inventory flow analysis
604600
# MAGIC
605-
# MAGIC ### 📊 Supply Chain Analytics
601+
# MAGIC ### Supply Chain Analytics
606602
# MAGIC - End-to-end supply chain visibility
607603
# MAGIC - Network optimization (distribution_center placement, capacity)
608604
# MAGIC - Working capital optimization
609-
# MAGIC - Cost-to-serve analysis by region/channel
610605
# MAGIC
611-
# MAGIC ### 🤖 AI/ML Use Cases
606+
# MAGIC ### AI/ML Use Cases
612607
# MAGIC - Demand forecasting
613608
# MAGIC - Predictive maintenance (production efficiency)
614609
# MAGIC - Shipment delay prediction
615-
# MAGIC - Inventory replenishment optimization
616610

617611
# COMMAND ----------
618612

@@ -696,31 +690,3 @@
696690

697691
# COMMAND ----------
698692

699-
# MAGIC %md
700-
# MAGIC ## 🎉 Congratulations!
701-
# MAGIC
702-
# MAGIC You've successfully generated a complete CPG supply chain dataset using dbldatagen!
703-
# MAGIC
704-
# MAGIC ### What You've Learned:
705-
# MAGIC ✅ How to install and import dbldatagen
706-
# MAGIC ✅ Basic column generation with different data types
707-
# MAGIC ✅ Creating foreign key relationships
708-
# MAGIC ✅ Weighted categorical distributions
709-
# MAGIC ✅ Date/timestamp generation
710-
# MAGIC ✅ Post-processing with PySpark
711-
# MAGIC ✅ Safe handling of division and NULL values
712-
# MAGIC
713-
# MAGIC ### Your Dataset Includes:
714-
# MAGIC - 500 Products across 7 categories
715-
# MAGIC - 25 Distribution Centers
716-
# MAGIC - 1,000 Retail Stores
717-
# MAGIC - 10,000 Orders
718-
# MAGIC - 50,000 Inventory Records
719-
# MAGIC - 30,000 Shipments
720-
# MAGIC
721-
# MAGIC **Total: 91,525 records ready for analytics!**
722-
# MAGIC
723-
# MAGIC Now go build some amazing dashboards! 📊✨
724-
725-
# COMMAND ----------
726-

0 commit comments

Comments
 (0)