From 29c8fbc6beee7dcc7828a27c7851100bd9486d87 Mon Sep 17 00:00:00 2001 From: Vandana Malik Date: Wed, 29 Oct 2025 16:39:24 +0530 Subject: [PATCH 1/2] Added Read excel data code --- Data_Ingestion/Read_excel_data/README.md | 74 +++++++++++++++++++ .../Read_excel_data/read_excel.ipynb | 50 +++++++++++++ .../Read_excel_data/requirements.txt | 3 + 3 files changed, 127 insertions(+) create mode 100644 Data_Ingestion/Read_excel_data/README.md create mode 100644 Data_Ingestion/Read_excel_data/read_excel.ipynb create mode 100644 Data_Ingestion/Read_excel_data/requirements.txt diff --git a/Data_Ingestion/Read_excel_data/README.md b/Data_Ingestion/Read_excel_data/README.md new file mode 100644 index 0000000..63ed874 --- /dev/null +++ b/Data_Ingestion/Read_excel_data/README.md @@ -0,0 +1,74 @@ +# Read Excel Files in Spark and Pandas + +This module demonstrates two approaches to read Excel files within Spark environments like **OCI Data Flow**, **Databricks**, or **local Spark clusters**. + +--- + +## 1. Using `com.crealytics.spark.excel` + +This approach uses the **Spark Excel connector** developed by [Crealytics](https://github.com/crealytics/spark-excel). +It supports `.xls` and `.xlsx` files directly within Spark DataFrames. + +### Requirements + +You must add the following JARs to your cluster classpath: + +- poi-4.1.2.jar +- poi-ooxml-4.1.2.jar +- poi-ooxml-schemas-4.1.2.jar +- xmlbeans-3.1.0.jar +- curvesapi-1.06.jar +- commons-collections4-4.4.jar +- commons-compress-1.20.jar +- spark-excel_2.12-0.13.5.jar + +Download them from [Maven Central Repository](https://mvnrepository.com/). + +### Example + +```python +excel_path = "/Volumes/test_data.xlsx" + +df = spark.read.format("com.crealytics.spark.excel") \ + .option("header", "true") \ + .option("inferSchema", "true") \ + .load(excel_path) + +df.show() +``` +# Excel to Spark using Pandas + +This example demonstrates how to **read Excel files using Pandas**, optionally convert them to **CSV**, and then **load them into Spark** for further processing. +It’s ideal for lightweight or pre-processing workflows before ingesting data into Spark. + +--- + +## Requirements + +Install the required dependencies via `requirements.txt`: +- `pandas` +- `openpyxl` +- `xlrd` + +### Example + +```python +import pandas as pd + +# Path to Excel file +excel_path = "/Volumes/test_data.xlsx" + +# Read Excel file using Pandas +df = pd.read_excel(excel_path) + +# Convert to CSV if needed +csv_path = "/Volumes/test_data.csv" +df.to_csv(csv_path, index=False) + +print(df.head()) + +# Load the CSV back into Spark +spark_df = spark.read.csv(csv_path, header=True, inferSchema=True) +spark_df.show() + +``` diff --git a/Data_Ingestion/Read_excel_data/read_excel.ipynb b/Data_Ingestion/Read_excel_data/read_excel.ipynb new file mode 100644 index 0000000..02afe32 --- /dev/null +++ b/Data_Ingestion/Read_excel_data/read_excel.ipynb @@ -0,0 +1,50 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "## Reading by using inbuilt spark library\n", + "excel_path = \"/Volumes/test_data.xlsx\"\n", + "\n", + "## You must add the JARs to your cluster classpath as per README.md\n", + "df = spark.read.format(\"com.crealytics.spark.excel\") \\\n", + " .option(\"header\", \"true\") \\\n", + " .option(\"inferSchema\", \"true\") \\\n", + " .load(excel_path)\n", + "\n", + "df.show()" + ], + "id": "4d1c762a078b6ac2" + }, + { + "metadata": {}, + "cell_type": "code", + "outputs": [], + "execution_count": null, + "source": [ + "## Using pandas to convert excel into csv and then read in spark\n", + "import pandas as pd\n", + "\n", + "excel_path = \"/Volumes/test_data.xlsx\"\n", + "df = pd.read_excel(excel_path)\n", + "\n", + "# Convert to CSV if needed\n", + "csv_path = \"/Volumes/test_data.csv\"\n", + "df.to_csv(csv_path, index=False)\n", + "\n", + "print(df.head())\n", + "\n", + "# Load CSV back into Spark\n", + "spark_df = spark.read.csv(csv_path, header=True, inferSchema=True)\n", + "spark_df.show()\n" + ], + "id": "3d929687c9b1c44a" + } + ], + "metadata": {}, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/Data_Ingestion/Read_excel_data/requirements.txt b/Data_Ingestion/Read_excel_data/requirements.txt new file mode 100644 index 0000000..a136b4f --- /dev/null +++ b/Data_Ingestion/Read_excel_data/requirements.txt @@ -0,0 +1,3 @@ +pandas +openpyxl +xlrd From 76801b952eebc940e7fc129f0fe6c763ff466966 Mon Sep 17 00:00:00 2001 From: Vandana Malik Date: Thu, 30 Oct 2025 18:08:44 +0530 Subject: [PATCH 2/2] Added data generator code --- data_generator/README.md | 390 ++++++++++ data_generator/config/config_all_types.yaml | 217 ++++++ data_generator/config/config_ecommerce.yaml | 153 ++++ data_generator/config/config_simple.yaml | 61 ++ .../data_code_generator_Example.ipynb | 175 +++++ data_generator/data_generator.py | 453 +++++++++++ data_generator/output/orders.csv | 101 +++ data_generator/output/orders.json | 702 ++++++++++++++++++ data_generator/output/users.csv | 21 + data_generator/output/users.json | 142 ++++ data_generator/requirements.txt | 2 + 11 files changed, 2417 insertions(+) create mode 100644 data_generator/README.md create mode 100644 data_generator/config/config_all_types.yaml create mode 100644 data_generator/config/config_ecommerce.yaml create mode 100644 data_generator/config/config_simple.yaml create mode 100644 data_generator/data_code_generator_Example.ipynb create mode 100644 data_generator/data_generator.py create mode 100644 data_generator/output/orders.csv create mode 100644 data_generator/output/orders.json create mode 100644 data_generator/output/users.csv create mode 100644 data_generator/output/users.json create mode 100644 data_generator/requirements.txt diff --git a/data_generator/README.md b/data_generator/README.md new file mode 100644 index 0000000..245c55e --- /dev/null +++ b/data_generator/README.md @@ -0,0 +1,390 @@ +# Data Generator Project - Summary + +## What You Have + +A complete, production-ready **Multi-Table Data Generator** with: +- ✅ Python module (.py file) +- ✅ Interactive Jupyter notebook (.ipynb file) +- ✅ Configuration files (.yaml files) +- ✅ Complete documentation (README.md) +- ✅ Setup guide (PROJECT_SETUP.md) + +--- + +## All Files Created + +### Core Files (Required) +1. **`data_generator.py`** - Main Python module + - Contains `MultiTableDataGenerator` class + - All generation logic + +2. **`requirements.txt`** - Dependencies + - pyyaml (required) + - pandas (recommended) + +### Tutorial & Examples +3. **`DataGenerator_Tutorial.ipynb`** - Jupyter Notebook + - Interactive tutorial + - Step-by-step examples + - Data analysis examples + - Ready to run + +4. **`config_simple.yaml`** - Simple Example + - 2 tables (users + orders) + - Foreign key relationship + - Easy to understand + +5. **`config_ecommerce.yaml`** - E-commerce Example + - 4 tables (customers, products, orders, reviews) + - Multiple foreign keys + - Realistic scenario + +6. **`config_all_types.yaml`** - Complete Reference + - Shows ALL column types + - Reference documentation + - Copy-paste templates + +### Documentation +7. **`README.md`** - Complete Documentation + - Features overview + - Installation guide + - API reference + - Examples + - Troubleshooting + +8. **`PROJECT_SETUP.md`** - Setup Guide + - Step-by-step setup + - Directory structure + - Testing instructions + - Troubleshooting + +9. **`COMPLETE_PROJECT_SUMMARY.md`** - This File + - Quick overview + - Usage instructions + - File descriptions + +--- + +## Quick Start (3 Steps) + +### Step 1: Setup +```bash +# Create directory +mkdir data-generator +cd data-generator + +# Save all 9 files in this directory + +# Create requirements.txt and add following dependencies in it. Add the dependencies in the cluster +pyyaml +pandas +``` + +### Step 2: Test +```python +# Test basic generation +from data_generator import MultiTableDataGenerator; \ + +MultiTableDataGenerator(seed=42).generate_from_config('config_simple.yaml') +``` + +### Step 3: Explore + +Open DataGenerator_Tutorial.ipynb notebook in AIDP. Run the commands. Kindly change the paths as per your folder. + + +--- + +## 📋 File Purposes + +| File | What It Does | When to Use | +|------|--------------|-------------| +| `data_generator.py` | Core generator class | Import in your code | +| `DataGenerator_Tutorial.ipynb` | Interactive tutorial | Learning & examples | +| `config_simple.yaml` | Basic 2-table example | Quick testing | +| `config_ecommerce.yaml` | Real-world scenario | Complex relationships | +| `config_all_types.yaml` | All features demo | Reference guide | +| `README.md` | Full documentation | When stuck | +| `requirements.txt` | Dependencies | Installation | + +--- + +## 💡 Usage Examples + +### Example 1: Python Script +```python +from data_generator import MultiTableDataGenerator + +# Simple usage +generator = MultiTableDataGenerator(seed=42) +results = generator.generate_from_config('config_simple.yaml') + +# View sample +generator.print_sample('users', n=5) + +# Get as DataFrame +df = generator.get_dataframe('users') +``` + +### Example 3: Custom Configuration +```python +config = { + 'table_name': 'my_data', + 'rows_count': 100, + 'output_format': 'both', + 'columns': [ + {'name': 'id', 'type': 'integer', 'range': [1, 1000], 'unique': True}, + {'name': 'name', 'type': 'string', 'length': 8}, + {'name': 'email', 'type': 'email', 'unique': True} + ] +} + +generator = MultiTableDataGenerator(seed=42) +generator.generate_from_config(config) +``` + +### Example 4: From Config File +```python +# Use existing config +generator = MultiTableDataGenerator(seed=42) +results = generator.generate_from_config('config_ecommerce.yaml') + +# Access tables +df_customers = generator.get_dataframe('customers') +df_orders = generator.get_dataframe('orders') +``` + +--- + +## Learning Path + +1. **Start**: Read `README.md` +2. **Learn**: Open `DataGenerator_Tutorial.ipynb` +3. **Practice**: Modify `config_simple.yaml` +4. **Build**: Create your own config + +--- + +## Key Features + +### Multi-Table Support +```yaml +tables: + - table_name: users + rows_count: 10 + - table_name: orders + rows_count: 50 +``` + +### Foreign Keys +```yaml +- name: user_id + type: reference + ref_table: users + ref_column: user_id +``` + +### 11+ Column Types +- integer, float, string +- choice (with weights) +- boolean +- date, datetime +- email, phone, uuid +- reference (foreign key) + +### Automatic Features +- Dependency resolution +- Unique constraints +- Progress indicators +- CSV/JSON export +- Pandas integration + +--- + +## 📊 Output Structure + +After running, you'll get: + +``` +Data_generator/ +├── [All your source files] +│ +└── output/ (or ecommerce_data/, etc.) + ├── users.csv + ├── users.json + ├── orders.csv + └── orders.json +``` + +--- + +## 🎯 Common Use Cases + +### 1. Testing Databases +```python +# Generate test data +gen = MultiTableDataGenerator(seed=42) +gen.generate_from_config('config_ecommerce.yaml') +# Import CSVs into your database +``` + +### 2. Prototyping Applications +```python +# Quick demo data +gen = MultiTableDataGenerator() +gen.generate_from_config('config_simple.yaml') +# Use in your app prototype +``` + +### 3. Data Science Practice +```python +# Generate training data +gen = MultiTableDataGenerator(seed=100) +results = gen.generate_from_config('my_ml_config.yaml') +df = gen.get_dataframe('features') +# Use for ML experiments +``` + +### 4. API Testing +```python +# Generate test payloads +gen = MultiTableDataGenerator() +results = gen.generate_from_config('api_test_config.yaml') +# Use in API tests +``` + +--- + +## Configuration Cheat Sheet + +### Basic Structure +```yaml +table_name: my_table +rows_count: 100 +output_format: both +columns: [...] +``` + +### Multi-Table Structure +```yaml +output_path: ./output +output_format: both +tables: + - table_name: table1 + rows_count: 10 + columns: [...] + - table_name: table2 + rows_count: 50 + columns: [...] +``` + +### Column Template +```yaml +- name: column_name + type: column_type + # type-specific options... + unique: false # optional +``` + +--- + +## Commands Reference + +```python + +# Python interactive + +>>> from data_generator import MultiTableDataGenerator +>>> gen = MultiTableDataGenerator(seed=42) +>>> gen.generate_from_config('config_simple.yaml') + +# Check output +! ls -la output/ +``` + +--- + +## 📈 Scaling Tips + +| Dataset Size | Recommendation | +|--------------|----------------| +| < 1K rows | Any format, instant | +| 1K - 100K rows | Prefer CSV, seconds | +| 100K - 1M rows | CSV only, minutes | +| 1M+ rows | Batch generation | + +--- + +## Quick Troubleshooting + +| Problem | Solution | +|---------|----------| +| Module not found | `pip install pyyaml pandas` | +| Config not found | Check file path, use `ls` | +| Can't generate unique | Increase range | +| Referenced table error | Parent table must be first | + + +--- + +## Success Checklist + +- [ ] All 9 files saved +- [ ] Dependencies installed (`pip install pyyaml pandas`) +- [ ] Can import: `from data_generator import MultiTableDataGenerator` +- [ ] `config_simple.yaml` generates successfully +- [ ] Output files created in `./output/` +- [ ] Jupyter notebook opens and runs +- [ ] Can create custom configs + +**All checked? You're ready to generate data! ** + + +## Notes + +- **Reproducibility**: Use seeds (`seed=42`) for consistent results +- **Performance**: CSV is faster than JSON for large datasets +- **Testing**: Start with small `rows_count` (10-20) for testing +- **Safety**: Generated data is saved automatically +- **Pandas**: Use `get_dataframe()` for easy data analysis + +--- + +## What You Can Build + +With this generator, you can create: +- ✅ E-commerce databases +- ✅ Social media datasets +- ✅ School management systems +- ✅ Hospital records +- ✅ Banking transactions +- ✅ IoT sensor data +- ✅ Any relational database! + +--- + +## 🚀 Get Started Now + +```bash +# 1. Install +pip install pyyaml pandas + +# 2. Test +python -c "from data_generator import MultiTableDataGenerator; \ + MultiTableDataGenerator(seed=42).generate_from_config('config_simple.yaml')" + +``` + +--- + +**You have everything you need to generate professional-quality test data! ** + +**Questions?** Check `README.md` for complete documentation. + +**Want examples?** Open `DataGenerator_Tutorial.ipynb` for interactive tutorials. + +**Ready to build?** Start with `config_simple.yaml` and customize it! + +--- + +**Happy Data Generating! ** \ No newline at end of file diff --git a/data_generator/config/config_all_types.yaml b/data_generator/config/config_all_types.yaml new file mode 100644 index 0000000..1401582 --- /dev/null +++ b/data_generator/config/config_all_types.yaml @@ -0,0 +1,217 @@ +# Complete Configuration Example +# This file demonstrates ALL available column types and features + +output_path: ./output +output_format: both # csv, json, or both + +tables: + # Demo table showing all column types + - table_name: demo_all_types + rows_count: 20 + columns: + # 1. INTEGER - whole numbers + - name: id + type: integer + range: [1, 1000] + unique: true + + # 2. FLOAT - decimal numbers + - name: price + type: float + range: [10.0, 1000.0] + decimals: 2 + + # 3. STRING - random text + - name: code + type: string + length: 8 + prefix: 'PROD_' + suffix: '_A' + unique: true + + # 4. CHOICE - select from list (simple) + - name: category + type: choice + values: [Electronics, Clothing, Food, Books] + + # 5. CHOICE - with weights (probability) + - name: status + type: choice + values: [active, inactive, pending] + weights: [0.7, 0.2, 0.1] # 70% active, 20% inactive, 10% pending + + # 6. BOOLEAN - true/false + - name: is_available + type: boolean + true_probability: 0.8 # 80% true, 20% false + + # 7. DATE - date only + - name: created_date + type: date + range: ['2023-01-01', '2024-12-31'] + format: '%Y-%m-%d' + + # 8. DATETIME - date and time + - name: last_updated + type: datetime + range: ['2024-01-01 00:00:00', '2024-12-31 23:59:59'] + format: '%Y-%m-%d %H:%M:%S' + + # 9. EMAIL - email addresses + - name: contact_email + type: email + domains: [example.com, test.com, demo.org] + unique: true + + # 10. PHONE - phone numbers + - name: phone + type: phone + pattern: '+1-###-###-####' # # will be replaced with digits + + # 11. UUID - unique identifiers + - name: transaction_id + type: uuid + + # Parent table for reference example + - table_name: reference_parent + rows_count: 10 + columns: + - name: parent_id + type: integer + range: [1, 100] + unique: true + + - name: parent_name + type: string + length: 10 + prefix: 'PARENT_' + + # Child table with foreign key reference + - table_name: reference_child + rows_count: 50 + columns: + - name: child_id + type: uuid + + # 12. REFERENCE - foreign key to another table + - name: parent_id + type: reference + ref_table: reference_parent + ref_column: parent_id + + - name: child_value + type: integer + range: [1, 100] + + +# ======================================== +# Additional Configuration Examples +# ======================================== + +# Example: Simple Users Table +# ---------------------------- +# table_name: users +# rows_count: 50 +# output_format: csv +# columns: +# - name: user_id +# type: integer +# range: [1, 1000] +# unique: true +# - name: username +# type: string +# length: 8 +# - name: email +# type: email +# unique: true +# - name: age +# type: integer +# range: [18, 65] + + +# Example: Multi-table with relationships +# ---------------------------------------- +# tables: +# - table_name: customers +# rows_count: 20 +# columns: +# - name: customer_id +# type: integer +# range: [1000, 9999] +# unique: true +# - name: name +# type: choice +# values: [Alice, Bob, Charlie] +# +# - table_name: orders +# rows_count: 100 +# columns: +# - name: order_id +# type: uuid +# - name: customer_id +# type: reference +# ref_table: customers +# ref_column: customer_id +# - name: amount +# type: float +# range: [10.0, 1000.0] +# decimals: 2 + + +# Column Type Reference +# --------------------- +# +# INTEGER: +# type: integer +# range: [min, max] +# unique: true/false +# +# FLOAT: +# type: float +# range: [min, max] +# decimals: 2 +# unique: true/false +# +# STRING: +# type: string +# length: 10 +# prefix: 'PRE_' +# suffix: '_SUF' +# chars: 'ABC123' # optional +# unique: true/false +# +# CHOICE: +# type: choice +# values: [opt1, opt2, opt3] +# weights: [0.5, 0.3, 0.2] # optional +# +# BOOLEAN: +# type: boolean +# true_probability: 0.7 # 0.0 to 1.0 +# +# DATE: +# type: date +# range: ['2023-01-01', '2024-12-31'] +# format: '%Y-%m-%d' +# +# DATETIME: +# type: datetime +# range: ['2024-01-01 00:00:00', '2024-12-31 23:59:59'] +# format: '%Y-%m-%d %H:%M:%S' +# +# EMAIL: +# type: email +# domains: [example.com] # optional +# unique: true/false +# +# PHONE: +# type: phone +# pattern: '+1-###-###-####' +# +# UUID: +# type: uuid +# +# REFERENCE (Foreign Key): +# type: reference +# ref_table: other_table +# ref_column: column_name \ No newline at end of file diff --git a/data_generator/config/config_ecommerce.yaml b/data_generator/config/config_ecommerce.yaml new file mode 100644 index 0000000..1855d45 --- /dev/null +++ b/data_generator/config/config_ecommerce.yaml @@ -0,0 +1,153 @@ +# E-commerce Database Configuration +# This configuration generates a complete e-commerce database with foreign key relationships + +output_path: ./ecommerce_data +output_format: both # Generate both CSV and JSON + +tables: + # Table 1: Customers + - table_name: customers + rows_count: 50 + columns: + - name: customer_id + type: integer + range: [10000, 99999] + unique: true + + - name: customer_name + type: choice + values: + - John Doe + - Jane Smith + - Alice Johnson + - Bob Williams + - Charlie Brown + - Diana Martinez + - Edward Wilson + - Fiona Taylor + + - name: email + type: email + domains: [gmail.com, yahoo.com, outlook.com] + unique: true + + - name: phone + type: phone + pattern: '+1-###-###-####' + + - name: registration_date + type: date + range: ['2023-01-01', '2024-12-31'] + format: '%Y-%m-%d' + + - name: is_premium + type: boolean + true_probability: 0.3 + + # Table 2: Products + - table_name: products + rows_count: 30 + columns: + - name: product_id + type: integer + range: [1000, 9999] + unique: true + + - name: product_name + type: choice + values: + - Laptop + - Desktop Computer + - Smartphone + - Tablet + - Smartwatch + - Wireless Earbuds + - Headphones + - Monitor + - Keyboard + - Mouse + + - name: category + type: choice + values: [Electronics, Computers, Accessories, Wearables] + weights: [0.3, 0.3, 0.25, 0.15] + + - name: price + type: float + range: [99.99, 1999.99] + decimals: 2 + + - name: stock_quantity + type: integer + range: [0, 100] + + - name: is_available + type: boolean + true_probability: 0.9 + + # Table 3: Orders (References customers and products) + - table_name: orders + rows_count: 200 + columns: + - name: order_id + type: uuid + + - name: customer_id + type: reference + ref_table: customers + ref_column: customer_id + + - name: product_id + type: reference + ref_table: products + ref_column: product_id + + - name: quantity + type: integer + range: [1, 5] + + - name: order_date + type: datetime + range: ['2024-01-01 00:00:00', '2024-12-31 23:59:59'] + format: '%Y-%m-%d %H:%M:%S' + + - name: status + type: choice + values: [pending, processing, shipped, delivered, cancelled] + weights: [0.05, 0.1, 0.25, 0.55, 0.05] + + - name: payment_method + type: choice + values: [credit_card, debit_card, paypal, bank_transfer] + weights: [0.5, 0.3, 0.15, 0.05] + + # Table 4: Reviews (References customers and products) + - table_name: reviews + rows_count: 150 + columns: + - name: review_id + type: uuid + + - name: customer_id + type: reference + ref_table: customers + ref_column: customer_id + + - name: product_id + type: reference + ref_table: products + ref_column: product_id + + - name: rating + type: choice + values: [1, 2, 3, 4, 5] + weights: [0.05, 0.1, 0.15, 0.35, 0.35] + + - name: review_date + type: date + range: ['2024-01-01', '2024-12-31'] + format: '%Y-%m-%d' + + - name: verified_purchase + type: boolean + true_probability: 0.85 \ No newline at end of file diff --git a/data_generator/config/config_simple.yaml b/data_generator/config/config_simple.yaml new file mode 100644 index 0000000..806318c --- /dev/null +++ b/data_generator/config/config_simple.yaml @@ -0,0 +1,61 @@ +# Simple Configuration Example +# This is a basic configuration with two related tables + +output_path: ./output +output_format: both + +tables: + # Table 1: Users (Parent) + - table_name: users + rows_count: 20 + columns: + - name: user_id + type: integer + range: [1, 1000] + unique: true + + - name: username + type: string + length: 8 + prefix: 'user_' + + - name: email + type: email + domains: [example.com, test.com] + unique: true + + - name: age + type: integer + range: [18, 65] + + - name: is_active + type: boolean + true_probability: 0.8 + + # Table 2: Orders (Child - references users) + - table_name: orders + rows_count: 100 + columns: + - name: order_id + type: uuid + + # Foreign key to users table + - name: user_id + type: reference + ref_table: users + ref_column: user_id + + - name: amount + type: float + range: [10.0, 1000.0] + decimals: 2 + + - name: status + type: choice + values: [pending, completed, cancelled] + weights: [0.2, 0.7, 0.1] + + - name: order_date + type: date + range: ['2024-01-01', '2024-12-31'] + format: '%Y-%m-%d' \ No newline at end of file diff --git a/data_generator/data_code_generator_Example.ipynb b/data_generator/data_code_generator_Example.ipynb new file mode 100644 index 0000000..10791e7 --- /dev/null +++ b/data_generator/data_code_generator_Example.ipynb @@ -0,0 +1,175 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "initial_id", + "metadata": { + "execution": { + "iopub.status.busy": "2025-10-30T12:05:09.672Z" + } + }, + "outputs": [ + { + "data": { + "text/plain": [ + "\n", + "================================================================================\n", + "Generating table: users\n", + "================================================================================\n", + "Generating 20 rows...\n", + " Progress: 10% (2/20 rows)\n", + " Progress: 20% (4/20 rows)\n", + " Progress: 30% (6/20 rows)\n", + " Progress: 40% (8/20 rows)\n", + " Progress: 50% (10/20 rows)\n", + " Progress: 60% (12/20 rows)\n", + " Progress: 70% (14/20 rows)\n", + " Progress: 80% (16/20 rows)\n", + " Progress: 90% (18/20 rows)\n", + " Progress: 100% (20/20 rows)\n", + "Generated 20 rows\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Exported to: output/users.csv\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Exported to: output/users.json\n", + "\n", + "================================================================================\n", + "Generating table: orders\n", + "================================================================================\n", + "Generating 100 rows...\n", + " Progress: 10% (10/100 rows)\n", + " Progress: 20% (20/100 rows)\n", + " Progress: 30% (30/100 rows)\n", + " Progress: 40% (40/100 rows)\n", + " Progress: 50% (50/100 rows)\n", + " Progress: 60% (60/100 rows)\n", + " Progress: 70% (70/100 rows)\n", + " Progress: 80% (80/100 rows)\n", + " Progress: 90% (90/100 rows)\n", + " Progress: 100% (100/100 rows)\n", + "Generated 100 rows\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Exported to: output/orders.csv\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "Exported to: output/orders.json\n", + "\n", + "================================================================================\n", + "ALL TABLES GENERATED SUCCESSFULLY\n", + "================================================================================\n", + " 📊 users: 20 rows\n", + " 📊 orders: 100 rows\n", + "================================================================================\n", + "\n", + "================================================================================\n", + "SAMPLE DATA: users (First 5 rows)\n", + "================================================================================\n", + "\n", + "Row 1:\n", + " user_id : 655\n", + " username : user_gTpigTHK\n", + " email : bhsah@test.com\n", + " age : 32\n", + " is_active : True\n", + "\n", + "Row 2:\n", + " user_id : 285\n", + " username : user_YaXRvj7u\n", + " email : nmmjbq@example.com\n", + " age : 42\n", + " is_active : True\n", + "\n", + "Row 3:\n", + " user_id : 301\n", + " username : user_ZM1JRcor\n", + " email : 45nq4f@test.com\n", + " age : 31\n", + " is_active : True\n", + "\n", + "Row 4:\n", + " user_id : 719\n", + " username : user_6OLkTkx9\n", + " email : yhl1c32oc6uzhr5@test.com\n", + " age : 27\n", + " is_active : True\n", + "\n", + "Row 5:\n", + " user_id : 253\n", + " username : user_UHUAKw9i\n", + " email : dbdw2pcn9t84@example.com\n", + " age : 61\n", + " is_active : True\n", + "================================================================================\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Import the necessary class\n", + "from data_generator.data_generator import MultiTableDataGenerator\n", + "\n", + "# Initialize the data generator with a seed for reproducibility\n", + "gen = MultiTableDataGenerator(seed=42)\n", + "\n", + "# Generate data based on the configuration\n", + "gen.generate_from_config(\"/Workspace/data_generator/config/config_simple.yaml\")\n", + "\n", + "# Print a sample of the generated data for the \"users\" table\n", + "gen.print_sample(\"users\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fd833408-0d0e-497c-8136-19eb4e50fcab", + "metadata": { + "type": "python" + }, + "outputs": [], + "source": [] + } + ], + "metadata": { + "Last_Active_Cell_Index": 1, + "kernelspec": { + "name": "notebook" + }, + "language_info": { + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/data_generator/data_generator.py b/data_generator/data_generator.py new file mode 100644 index 0000000..71b93a6 --- /dev/null +++ b/data_generator/data_generator.py @@ -0,0 +1,453 @@ +""" +Multi-Table Data Generator with Foreign Key Support + +This module provides a flexible data generator that supports: +- Multiple tables in one configuration +- Foreign key relationships between tables +- 11+ column types +- Automatic dependency resolution +- Unique constraints +- Weighted choices +- CSV and JSON export + +Author: Data Generator Team +Version: 2.0 +""" + +import random +import string +import json +import yaml +import csv +from datetime import datetime, timedelta +from typing import Any, Dict, List, Optional, Union +from pathlib import Path + + +class MultiTableDataGenerator: + """ + Advanced data generator with multi-table and foreign key support. + + Example: + generator = MultiTableDataGenerator(seed=42) + config = { + 'tables': [ + {'table_name': 'users', 'rows_count': 10, 'columns': [...]}, + {'table_name': 'orders', 'rows_count': 50, 'columns': [...]} + ] + } + results = generator.generate_from_config(config) + """ + + def __init__(self, seed: Optional[int] = None): + """ + Initialize the generator. + + Args: + seed: Random seed for reproducibility + """ + if seed is not None: + random.seed(seed) + self.used_values = {} + self.generated_tables = {} + + def generate_from_config(self, config: Union[str, Dict]) -> Dict[str, Any]: + """ + Generate data for multiple tables from configuration. + + Args: + config: Either config dict or path to config file (YAML/JSON) + + Returns: + Dictionary with all generated tables + """ + # Load config if it's a file path + if isinstance(config, str): + config_path = Path(config) + if config_path.suffix in ['.yaml', '.yml']: + with open(config_path, 'r', encoding='utf-8') as f: + config = yaml.safe_load(f) + elif config_path.suffix == '.json': + with open(config_path, 'r', encoding='utf-8') as f: + config = json.load(f) + else: + raise ValueError("Config file must be .yaml, .yml, or .json") + + # Reset state + self.generated_tables = {} + + # Handle both single table and multiple tables config + if 'tables' in config: + tables_config = config['tables'] + output_path = config.get('output_path', './output') + output_format = config.get('output_format', 'csv') + elif 'table_name' in config: + tables_config = [config] + output_path = config.get('output_path', './output') + output_format = config.get('output_format', 'csv') + else: + raise ValueError("Config must have either 'tables' or 'table_name'") + + # Sort tables by dependencies + sorted_tables = self._sort_tables_by_dependencies(tables_config) + + # Generate each table + results = {} + for table_config in sorted_tables: + table_name = table_config['table_name'] + print(f"\n{'=' * 80}") + print(f"Generating table: {table_name}") + print('=' * 80) + + result = self._generate_single_table(table_config, output_path, output_format) + results[table_name] = result + + print(f"\n{'=' * 80}") + print("ALL TABLES GENERATED SUCCESSFULLY") + print('=' * 80) + for table_name, result in results.items(): + print(f" 📊 {table_name}: {result['rows_count']} rows") + print('=' * 80) + + return results + + def _sort_tables_by_dependencies(self, tables_config: List[Dict]) -> List[Dict]: + """Sort tables based on foreign key dependencies.""" + dependencies = {} + table_map = {t['table_name']: t for t in tables_config} + + for table in tables_config: + table_name = table['table_name'] + deps = set() + + for col in table.get('columns', []): + if col.get('type') == 'reference': + ref_table = col.get('ref_table') + if ref_table: + deps.add(ref_table) + + col_name = col.get('name', '') + if '.' in col_name: + ref_table = col_name.split('.')[0] + deps.add(ref_table) + + dependencies[table_name] = deps + + # Topological sort + sorted_tables = [] + visited = set() + + def visit(table_name): + if table_name in visited: + return + visited.add(table_name) + for dep in dependencies.get(table_name, set()): + if dep in table_map: + visit(dep) + if table_name in table_map: + sorted_tables.append(table_map[table_name]) + + for table_name in dependencies: + visit(table_name) + + return sorted_tables + + def _generate_single_table(self, table_config: Dict, output_path: str, output_format: str) -> Dict: + """Generate data for a single table.""" + table_name = table_config['table_name'] + rows_count = table_config.get('rows_count', 100) + columns = table_config.get('columns', []) + + table_output_path = table_config.get('output_path', output_path) + table_output_format = table_config.get('output_format', output_format) + + self.used_values = { + col['name']: set() + for col in columns + if col.get('unique', False) + } + + print(f"Generating {rows_count} rows...") + data = [] + for i in range(rows_count): + row = {} + for col in columns: + row[col['name']] = self._generate_value(col, table_name) + data.append(row) + + if (i + 1) % max(1, rows_count // 10) == 0: + progress = ((i + 1) / rows_count) * 100 + print(f" Progress: {progress:.0f}% ({i + 1}/{rows_count} rows)") + + print(f"Generated {len(data)} rows") + + # Store for references + self.generated_tables[table_name] = data + + # Export + self._export_data(data, table_name, table_output_format, table_output_path) + + return { + 'table_name': table_name, + 'rows_count': len(data), + 'columns': [col['name'] for col in columns], + 'data': data + } + + def _generate_value(self, col: Dict[str, Any], current_table: str) -> Any: + """Generate a single value, handling references.""" + col_type = col['type'].lower() + col_name = col['name'] + unique = col.get('unique', False) + + # Handle reference type + if col_type == 'reference': + return self._generate_reference(col) + + # Handle table.column pattern + if '.' in col_name: + return self._generate_reference_from_name(col_name) + + # Regular column types + if col_type == 'integer': + return self._generate_integer(col, unique, col_name) + elif col_type == 'float': + return self._generate_float(col, unique, col_name) + elif col_type == 'string': + return self._generate_string(col, unique, col_name) + elif col_type == 'choice': + return self._generate_choice(col) + elif col_type == 'boolean': + return self._generate_boolean(col) + elif col_type == 'date': + return self._generate_date(col) + elif col_type == 'datetime': + return self._generate_datetime(col) + elif col_type == 'email': + return self._generate_email(col, unique, col_name) + elif col_type == 'phone': + return self._generate_phone(col) + elif col_type == 'uuid': + return self._generate_uuid() + else: + raise ValueError(f"Unsupported column type: {col_type}") + + def _generate_reference(self, col: Dict) -> Any: + """Generate value from referenced table.""" + ref_table = col.get('ref_table') + ref_column = col.get('ref_column') + + if not ref_table or not ref_column: + raise ValueError(f"Reference column must have 'ref_table' and 'ref_column'") + + if ref_table not in self.generated_tables: + raise ValueError(f"Referenced table '{ref_table}' not generated yet") + + ref_data = self.generated_tables[ref_table] + if not ref_data: + raise ValueError(f"Referenced table '{ref_table}' is empty") + + ref_values = [row[ref_column] for row in ref_data if ref_column in row] + if not ref_values: + raise ValueError(f"Column '{ref_column}' not found in table '{ref_table}'") + + return random.choice(ref_values) + + def _generate_reference_from_name(self, col_name: str) -> Any: + """Generate value from table.column format in name.""" + parts = col_name.split('.') + if len(parts) != 2: + raise ValueError(f"Invalid reference format: {col_name}") + + ref_table, ref_column = parts + + if ref_table not in self.generated_tables: + raise ValueError(f"Referenced table '{ref_table}' not generated yet") + + ref_data = self.generated_tables[ref_table] + ref_values = [row[ref_column] for row in ref_data if ref_column in row] + + if not ref_values: + raise ValueError(f"Column '{ref_column}' not found in table '{ref_table}'") + + return random.choice(ref_values) + + def _ensure_unique(self, value: Any, col_name: str, generator_func, max_attempts: int = 1000) -> Any: + """Ensure the generated value is unique.""" + attempts = 0 + while value in self.used_values[col_name] and attempts < max_attempts: + value = generator_func() + attempts += 1 + + if attempts >= max_attempts: + raise ValueError(f"Could not generate unique value for '{col_name}'") + + self.used_values[col_name].add(value) + return value + + def _generate_integer(self, col: Dict, unique: bool, col_name: str) -> int: + range_vals = col.get('range', [0, 100]) + + def gen(): + return random.randint(range_vals[0], range_vals[1]) + + value = gen() + if unique: + value = self._ensure_unique(value, col_name, gen) + return value + + def _generate_float(self, col: Dict, unique: bool, col_name: str) -> float: + range_vals = col.get('range', [0.0, 100.0]) + decimals = col.get('decimals', 2) + + def gen(): + return round(random.uniform(range_vals[0], range_vals[1]), decimals) + + value = gen() + if unique: + value = self._ensure_unique(value, col_name, gen) + return value + + def _generate_string(self, col: Dict, unique: bool, col_name: str) -> str: + length = col.get('length', 10) + prefix = col.get('prefix', '') + suffix = col.get('suffix', '') + chars = col.get('chars', string.ascii_letters + string.digits) + + def gen(): + random_str = ''.join(random.choices(chars, k=length)) + return f"{prefix}{random_str}{suffix}" + + value = gen() + if unique: + value = self._ensure_unique(value, col_name, gen) + return value + + def _generate_choice(self, col: Dict) -> Any: + values = col.get('values', []) + if not values: + raise ValueError(f"Column '{col['name']}' requires 'values' list") + weights = col.get('weights', None) + return random.choices(values, weights=weights, k=1)[0] + + def _generate_boolean(self, col: Dict) -> bool: + true_probability = col.get('true_probability', 0.5) + return random.random() < true_probability + + def _generate_date(self, col: Dict) -> str: + date_format = col.get('format', '%Y-%m-%d') + if 'range' in col: + start_str, end_str = col['range'] + start_date = datetime.strptime(start_str, date_format) + end_date = datetime.strptime(end_str, date_format) + days_between = (end_date - start_date).days + random_days = random.randint(0, days_between) + random_date = start_date + timedelta(days=random_days) + else: + random_date = datetime.now() - timedelta(days=random.randint(0, 365)) + return random_date.strftime(date_format) + + def _generate_datetime(self, col: Dict) -> str: + dt_format = col.get('format', '%Y-%m-%d %H:%M:%S') + if 'range' in col: + start_str, end_str = col['range'] + start_dt = datetime.strptime(start_str, dt_format) + end_dt = datetime.strptime(end_str, dt_format) + seconds_between = int((end_dt - start_dt).total_seconds()) + random_seconds = random.randint(0, seconds_between) + random_dt = start_dt + timedelta(seconds=random_seconds) + else: + random_dt = datetime.now() - timedelta(seconds=random.randint(0, 31536000)) + return random_dt.strftime(dt_format) + + def _generate_email(self, col: Dict, unique: bool, col_name: str) -> str: + domains = col.get('domains', ['example.com']) + + def gen(): + username_length = random.randint(5, 15) + username = ''.join(random.choices(string.ascii_lowercase + string.digits, k=username_length)) + domain = random.choice(domains) + return f"{username}@{domain}" + + value = gen() + if unique: + value = self._ensure_unique(value, col_name, gen) + return value + + def _generate_phone(self, col: Dict) -> str: + pattern = col.get('pattern', '###-###-####') + return ''.join(random.choice(string.digits) if c == '#' else c for c in pattern) + + def _generate_uuid(self) -> str: + import uuid + return str(uuid.uuid4()) + + def _export_data(self, data: List[Dict], table_name: str, output_format: str, output_path: str) -> None: + """Export data in specified format(s).""" + output_dir = Path(output_path) + output_dir.mkdir(parents=True, exist_ok=True) + + formats = output_format.split(',') if ',' in output_format else [output_format] + + for fmt in formats: + fmt = fmt.strip().lower() + if fmt == 'csv': + self._export_csv(data, output_dir / f"{table_name}.csv") + elif fmt == 'json': + self._export_json(data, output_dir / f"{table_name}.json") + elif fmt == 'both': + self._export_csv(data, output_dir / f"{table_name}.csv") + self._export_json(data, output_dir / f"{table_name}.json") + + def _export_csv(self, data: List[Dict], filepath: Path) -> None: + """Export to CSV.""" + if not data: + return + with open(filepath, 'w', newline='', encoding='utf-8') as f: + writer = csv.DictWriter(f, fieldnames=data[0].keys()) + writer.writeheader() + writer.writerows(data) + print(f"Exported to: {filepath}") + + def _export_json(self, data: List[Dict], filepath: Path) -> None: + """Export to JSON.""" + with open(filepath, 'w', encoding='utf-8') as f: + json.dump(data, f, indent=2) + print(f"Exported to: {filepath}") + + def print_sample(self, table_name: str, n: int = 5) -> None: + """Print sample data from a table.""" + if table_name not in self.generated_tables: + print(f"Table '{table_name}' not found") + return + + data = self.generated_tables[table_name] + print(f"\n{'=' * 80}") + print(f"SAMPLE DATA: {table_name} (First {min(n, len(data))} rows)") + print('=' * 80) + for i, row in enumerate(data[:n], 1): + print(f"\nRow {i}:") + for key, value in row.items(): + print(f" {key:25s}: {value}") + print('=' * 80) + + def get_dataframe(self, table_name: str): + """ + Get table data as pandas DataFrame (if pandas is available). + + Args: + table_name: Name of the table + + Returns: + pandas DataFrame or None if pandas not available + """ + try: + import pandas as pd + if table_name in self.generated_tables: + return pd.DataFrame(self.generated_tables[table_name]) + else: + print(f"Table '{table_name}' not found") + return None + except ImportError: + print("pandas not installed. Install with: pip install pandas") + return None \ No newline at end of file diff --git a/data_generator/output/orders.csv b/data_generator/output/orders.csv new file mode 100644 index 0000000..e30cac3 --- /dev/null +++ b/data_generator/output/orders.csv @@ -0,0 +1,101 @@ +order_id,user_id,amount,status,order_date +b34c533a-c225-49da-9b20-fd0249050e74,256,405.66,pending,2024-12-18 +272f78cd-fd98-42b2-92a8-af4f2524b21a,655,893.73,completed,2024-02-24 +693575d1-eba5-4f66-8e69-0abfa380c61a,307,226.65,completed,2024-12-22 +fadee840-f431-485e-a8c5-5f876b6182bd,111,469.9,completed,2024-03-03 +c83337b0-8f8b-4677-aa8e-2ad4a2cd2181,342,142.01,completed,2024-09-28 +b348b1b5-44c4-4f7a-af8b-741a32d6e13b,994,599.5,cancelled,2024-08-14 +1900c545-dd98-48bc-8b77-4ba244da51e5,528,816.86,completed,2024-08-06 +a9c9595d-8506-43d0-ad41-90c63963a72f,994,451.45,pending,2024-08-31 +c7518424-fc27-491d-9609-98f193e11bf2,342,266.6,completed,2024-11-22 +3c17efff-decf-4680-a848-5675ea36d2e2,647,768.16,completed,2024-11-16 +76b67058-d737-4b3f-ac4c-38a0a98aef91,970,281.85,pending,2024-05-26 +fea80c99-26c0-4192-9b9e-16eb4a0dc12f,970,279.0,completed,2024-10-03 +1ffad7b2-a9a3-48bb-9509-06d743df7694,301,146.99,completed,2024-12-21 +b32c133f-a478-43a9-a659-5f4e0b969040,253,709.35,pending,2024-07-27 +496376c5-2d06-470f-9b7c-2485477c909a,493,547.19,completed,2024-04-15 +753d2fed-f3dd-4f13-86cf-449480001916,307,395.58,completed,2024-12-22 +f6c445de-d4fb-4531-983f-392a843abb67,655,858.16,completed,2024-07-13 +507af123-9780-40df-99ce-019177329833,256,15.84,completed,2024-07-18 +7c0cd6b3-cb69-4865-bc69-25509719bdc6,307,542.84,completed,2024-11-04 +701abd25-807f-4da3-9f87-cdce9b006cbd,970,493.35,completed,2024-09-05 +055a45f9-23aa-4112-86cc-6a711d05b2f9,655,394.97,completed,2024-07-26 +3a2ae784-9563-48dd-9768-838f862f1cd8,699,842.1,cancelled,2024-11-14 +bd765d3c-c557-4d86-97c8-1f9d0c1177c2,994,36.7,completed,2024-10-15 +19955fec-f927-4071-861a-2ea7c5a962d1,655,93.11,completed,2024-08-24 +33ee64c4-aebd-4b42-b4b4-6927117e7570,699,59.78,completed,2024-04-18 +7e20ea7b-dfb9-4b5e-b186-a66a0c066e57,342,333.58,completed,2024-07-13 +1deaed08-f988-434e-a6fd-d01c3a288e89,647,754.49,completed,2024-05-09 +d587a230-09dd-4015-9669-4b1d17ddd608,301,475.6,completed,2024-01-27 +05c627ae-3a7b-4b89-bf48-bcd0d770ec01,942,231.98,pending,2024-11-29 +83e75da3-c1ed-4391-8f07-5f3b676df98d,285,756.69,cancelled,2024-04-12 +2be9967b-051c-4dbc-99d1-e1bea3177c7f,655,625.12,completed,2024-08-30 +cf3a9fae-6d22-429f-bc8a-afa995a520d6,719,568.33,completed,2024-12-24 +f5665360-8ed8-457f-9090-a934617e9db8,647,769.23,pending,2024-11-06 +00c68cba-0970-45e2-b7e7-b048e3ee8bec,719,780.01,pending,2024-06-08 +a80edb7b-2a27-4ebd-b572-5b689b18bab8,719,582.9,cancelled,2024-10-21 +ed01304f-89cc-4587-ba0b-b48edfc44781,734,402.69,completed,2024-02-08 +a6828422-bd13-40fe-896b-a89e0a46f1d6,869,693.71,completed,2024-02-22 +95c7f5ed-265a-43ac-ac6e-3eb22e1ed22e,550,851.79,completed,2024-03-02 +fbf3e330-178a-442b-9ea8-d7184a903721,869,784.81,completed,2024-08-07 +c9e2a5c0-6886-4aaf-a039-70fbc3129eda,942,78.27,completed,2024-01-07 +d003991c-9173-479e-98a1-b4a89c595818,307,824.11,pending,2024-07-04 +dee12e45-65c5-44c2-b92c-8efa182da0c2,342,710.24,completed,2024-09-24 +b4314c1f-4a32-4e55-8693-7ccc51b59671,647,619.75,cancelled,2024-09-04 +f413a90a-3ba4-4fef-8263-639f43371f62,342,441.22,completed,2024-05-17 +5c575819-1c7b-4f83-a73f-f6dfa5c9e30e,493,853.2,completed,2024-02-14 +f7aeeb0c-940b-4ade-8873-4f4ef77ac834,647,882.81,completed,2024-08-25 +cffd84f5-e8cc-4048-8c0d-a353f71d1fe1,869,614.23,completed,2024-01-15 +a4d60d7c-9fa8-4398-a5d0-2b47982e34b9,256,852.44,pending,2024-04-18 +3937eda4-70d8-4781-bfe4-2506c9928859,942,799.85,completed,2024-11-01 +8c6f2ecc-c855-47f5-ad93-54e32048de30,647,560.23,completed,2024-04-07 +56ef1cf4-1189-4bcb-9841-55c5f34799d4,301,248.94,completed,2024-10-11 +b888096e-4e08-4d45-9a87-ae37baa10db1,970,693.7,completed,2024-09-08 +5db97910-9199-4e39-b07c-f0740a90b2b1,342,795.0,pending,2024-04-23 +1ce2d874-1744-419c-ae40-dfc5c6f55c78,734,694.87,completed,2024-10-24 +bc92c886-f66d-4d72-88eb-008537ed97d2,942,478.53,completed,2024-08-05 +4d62be5f-9dd7-446a-b5db-a87af6bf0e94,994,337.48,completed,2024-05-18 +53f84379-d6ab-4c65-94c1-95c054f7525a,550,258.89,pending,2024-04-08 +bd29d67a-3613-4635-9d05-d8cacfbb69db,493,128.36,completed,2024-12-19 +44c4edfb-286e-4061-a3bf-b779a572b6d1,699,199.63,completed,2024-05-21 +fd0b0369-9fd0-4496-bb0a-62eb1da3aaff,869,976.84,completed,2024-05-24 +a450d6d9-29b9-4d36-b5b1-2466a5088dde,719,834.3,completed,2024-07-03 +93634d86-0ac6-4eb5-ab23-cc0b37e72bc5,699,309.24,completed,2024-03-05 +40133049-1655-4d43-8e02-f31bf0d8786e,647,55.06,pending,2024-05-29 +4a9e74c6-b6b0-4132-9a51-0977f403b65b,253,641.49,completed,2024-02-22 +83f794da-bea0-44bb-b2b2-7f20a4fb15e8,655,578.32,completed,2024-08-13 +57b534c8-dd76-4a71-b037-1e89495d7860,493,192.52,pending,2024-09-01 +a729ce53-ac94-4208-ab32-7e07df9430e1,719,823.89,completed,2024-02-07 +2262c9dc-ebb5-44e2-868f-52aaa89839c5,869,633.15,pending,2024-03-17 +ff92999c-7a64-45cc-b9ca-be6153dda3ef,869,949.63,pending,2024-05-07 +d541c64d-9770-46d4-8c0b-e52692886e0a,719,562.49,completed,2024-11-01 +da61ca2a-6da1-4787-be85-7e3339f50804,528,233.43,completed,2024-08-18 +697e4bbc-d011-4eb6-82ea-d6e5dc66408b,342,304.38,completed,2024-08-07 +859f76e7-8695-466c-b621-281d4272e839,550,572.94,pending,2024-02-20 +15532e84-a443-4ea7-9b6f-a43b8da744d7,945,629.26,completed,2024-02-11 +4019b312-2f8b-414c-a1e7-06795d21ca3f,699,247.47,completed,2024-03-21 +b7d0c547-17ad-4df2-8e91-5bb370992cef,655,414.43,completed,2024-08-28 +b7bea197-1e75-4f99-8b7c-dad5bf902fce,550,42.31,completed,2024-05-24 +eb332f63-ce53-4a57-9764-155c47200cce,342,80.47,completed,2024-05-15 +bd7a50e7-fec2-4108-8554-0f57b6bbb4d9,869,664.57,cancelled,2024-08-05 +c9e7df68-83bf-494e-9008-3c3f1377bfb7,719,549.12,completed,2024-05-16 +14634d9e-a511-4d7f-866f-0303fe20feb7,253,80.7,pending,2024-06-06 +a60277d1-e02d-4ae9-bbf1-fa54c2be73dd,528,751.47,completed,2024-05-27 +b3efc410-9ce6-4e26-ac79-c4de44f73cfc,342,133.11,completed,2024-12-24 +f99f3b95-e517-4650-8fd0-d783856e466d,734,943.25,completed,2024-09-09 +3afc6460-7d20-41ff-a916-abed7d2ed9a8,342,89.64,pending,2024-08-09 +9f3f15c7-3931-4003-a952-9c11eabb8d98,493,607.68,pending,2024-04-27 +9662ec14-a52a-46c3-9ddf-5f68b5b2008c,869,591.25,pending,2024-12-10 +9df02f22-791e-44ec-8f1f-efce531e5e6d,647,580.48,completed,2024-03-30 +31b5ecf3-287b-4d8e-9e09-9535e2b69f99,256,523.77,completed,2024-05-22 +047435c1-2dc0-4626-a6fa-93de8585157d,699,996.98,completed,2024-09-08 +7821a4a4-28bb-463d-b0fe-f6185fb1aa81,301,475.31,completed,2024-06-13 +b0a238c4-fb59-43ee-b0ca-712f2a642b47,719,859.16,completed,2024-12-21 +9654636f-fbf5-4c5d-a3ab-bd1c06ed4d3b,256,295.34,cancelled,2024-10-08 +1d44ef20-ddc8-45fd-aad1-4096aed2e024,285,460.28,completed,2024-06-14 +6ee9f02a-0bb6-4273-a7e4-a8b9bf975c71,719,970.48,completed,2024-09-20 +e3b24b32-984d-45eb-9dec-36dc652bac93,655,661.08,completed,2024-07-30 +54dfcb1d-1b6c-4160-95c5-323ab8c271a2,285,195.71,completed,2024-09-12 +94b3d38f-6693-4d82-935f-b602b31dad65,342,762.39,completed,2024-10-08 +69704cd7-1aea-4277-a317-849643914889,253,928.4,completed,2024-12-23 +b2b61acc-b422-4e6a-838e-a6c9d2e66979,256,130.21,cancelled,2024-11-07 diff --git a/data_generator/output/orders.json b/data_generator/output/orders.json new file mode 100644 index 0000000..6396d39 --- /dev/null +++ b/data_generator/output/orders.json @@ -0,0 +1,702 @@ +[ + { + "order_id": "b34c533a-c225-49da-9b20-fd0249050e74", + "user_id": 256, + "amount": 405.66, + "status": "pending", + "order_date": "2024-12-18" + }, + { + "order_id": "272f78cd-fd98-42b2-92a8-af4f2524b21a", + "user_id": 655, + "amount": 893.73, + "status": "completed", + "order_date": "2024-02-24" + }, + { + "order_id": "693575d1-eba5-4f66-8e69-0abfa380c61a", + "user_id": 307, + "amount": 226.65, + "status": "completed", + "order_date": "2024-12-22" + }, + { + "order_id": "fadee840-f431-485e-a8c5-5f876b6182bd", + "user_id": 111, + "amount": 469.9, + "status": "completed", + "order_date": "2024-03-03" + }, + { + "order_id": "c83337b0-8f8b-4677-aa8e-2ad4a2cd2181", + "user_id": 342, + "amount": 142.01, + "status": "completed", + "order_date": "2024-09-28" + }, + { + "order_id": "b348b1b5-44c4-4f7a-af8b-741a32d6e13b", + "user_id": 994, + "amount": 599.5, + "status": "cancelled", + "order_date": "2024-08-14" + }, + { + "order_id": "1900c545-dd98-48bc-8b77-4ba244da51e5", + "user_id": 528, + "amount": 816.86, + "status": "completed", + "order_date": "2024-08-06" + }, + { + "order_id": "a9c9595d-8506-43d0-ad41-90c63963a72f", + "user_id": 994, + "amount": 451.45, + "status": "pending", + "order_date": "2024-08-31" + }, + { + "order_id": "c7518424-fc27-491d-9609-98f193e11bf2", + "user_id": 342, + "amount": 266.6, + "status": "completed", + "order_date": "2024-11-22" + }, + { + "order_id": "3c17efff-decf-4680-a848-5675ea36d2e2", + "user_id": 647, + "amount": 768.16, + "status": "completed", + "order_date": "2024-11-16" + }, + { + "order_id": "76b67058-d737-4b3f-ac4c-38a0a98aef91", + "user_id": 970, + "amount": 281.85, + "status": "pending", + "order_date": "2024-05-26" + }, + { + "order_id": "fea80c99-26c0-4192-9b9e-16eb4a0dc12f", + "user_id": 970, + "amount": 279.0, + "status": "completed", + "order_date": "2024-10-03" + }, + { + "order_id": "1ffad7b2-a9a3-48bb-9509-06d743df7694", + "user_id": 301, + "amount": 146.99, + "status": "completed", + "order_date": "2024-12-21" + }, + { + "order_id": "b32c133f-a478-43a9-a659-5f4e0b969040", + "user_id": 253, + "amount": 709.35, + "status": "pending", + "order_date": "2024-07-27" + }, + { + "order_id": "496376c5-2d06-470f-9b7c-2485477c909a", + "user_id": 493, + "amount": 547.19, + "status": "completed", + "order_date": "2024-04-15" + }, + { + "order_id": "753d2fed-f3dd-4f13-86cf-449480001916", + "user_id": 307, + "amount": 395.58, + "status": "completed", + "order_date": "2024-12-22" + }, + { + "order_id": "f6c445de-d4fb-4531-983f-392a843abb67", + "user_id": 655, + "amount": 858.16, + "status": "completed", + "order_date": "2024-07-13" + }, + { + "order_id": "507af123-9780-40df-99ce-019177329833", + "user_id": 256, + "amount": 15.84, + "status": "completed", + "order_date": "2024-07-18" + }, + { + "order_id": "7c0cd6b3-cb69-4865-bc69-25509719bdc6", + "user_id": 307, + "amount": 542.84, + "status": "completed", + "order_date": "2024-11-04" + }, + { + "order_id": "701abd25-807f-4da3-9f87-cdce9b006cbd", + "user_id": 970, + "amount": 493.35, + "status": "completed", + "order_date": "2024-09-05" + }, + { + "order_id": "055a45f9-23aa-4112-86cc-6a711d05b2f9", + "user_id": 655, + "amount": 394.97, + "status": "completed", + "order_date": "2024-07-26" + }, + { + "order_id": "3a2ae784-9563-48dd-9768-838f862f1cd8", + "user_id": 699, + "amount": 842.1, + "status": "cancelled", + "order_date": "2024-11-14" + }, + { + "order_id": "bd765d3c-c557-4d86-97c8-1f9d0c1177c2", + "user_id": 994, + "amount": 36.7, + "status": "completed", + "order_date": "2024-10-15" + }, + { + "order_id": "19955fec-f927-4071-861a-2ea7c5a962d1", + "user_id": 655, + "amount": 93.11, + "status": "completed", + "order_date": "2024-08-24" + }, + { + "order_id": "33ee64c4-aebd-4b42-b4b4-6927117e7570", + "user_id": 699, + "amount": 59.78, + "status": "completed", + "order_date": "2024-04-18" + }, + { + "order_id": "7e20ea7b-dfb9-4b5e-b186-a66a0c066e57", + "user_id": 342, + "amount": 333.58, + "status": "completed", + "order_date": "2024-07-13" + }, + { + "order_id": "1deaed08-f988-434e-a6fd-d01c3a288e89", + "user_id": 647, + "amount": 754.49, + "status": "completed", + "order_date": "2024-05-09" + }, + { + "order_id": "d587a230-09dd-4015-9669-4b1d17ddd608", + "user_id": 301, + "amount": 475.6, + "status": "completed", + "order_date": "2024-01-27" + }, + { + "order_id": "05c627ae-3a7b-4b89-bf48-bcd0d770ec01", + "user_id": 942, + "amount": 231.98, + "status": "pending", + "order_date": "2024-11-29" + }, + { + "order_id": "83e75da3-c1ed-4391-8f07-5f3b676df98d", + "user_id": 285, + "amount": 756.69, + "status": "cancelled", + "order_date": "2024-04-12" + }, + { + "order_id": "2be9967b-051c-4dbc-99d1-e1bea3177c7f", + "user_id": 655, + "amount": 625.12, + "status": "completed", + "order_date": "2024-08-30" + }, + { + "order_id": "cf3a9fae-6d22-429f-bc8a-afa995a520d6", + "user_id": 719, + "amount": 568.33, + "status": "completed", + "order_date": "2024-12-24" + }, + { + "order_id": "f5665360-8ed8-457f-9090-a934617e9db8", + "user_id": 647, + "amount": 769.23, + "status": "pending", + "order_date": "2024-11-06" + }, + { + "order_id": "00c68cba-0970-45e2-b7e7-b048e3ee8bec", + "user_id": 719, + "amount": 780.01, + "status": "pending", + "order_date": "2024-06-08" + }, + { + "order_id": "a80edb7b-2a27-4ebd-b572-5b689b18bab8", + "user_id": 719, + "amount": 582.9, + "status": "cancelled", + "order_date": "2024-10-21" + }, + { + "order_id": "ed01304f-89cc-4587-ba0b-b48edfc44781", + "user_id": 734, + "amount": 402.69, + "status": "completed", + "order_date": "2024-02-08" + }, + { + "order_id": "a6828422-bd13-40fe-896b-a89e0a46f1d6", + "user_id": 869, + "amount": 693.71, + "status": "completed", + "order_date": "2024-02-22" + }, + { + "order_id": "95c7f5ed-265a-43ac-ac6e-3eb22e1ed22e", + "user_id": 550, + "amount": 851.79, + "status": "completed", + "order_date": "2024-03-02" + }, + { + "order_id": "fbf3e330-178a-442b-9ea8-d7184a903721", + "user_id": 869, + "amount": 784.81, + "status": "completed", + "order_date": "2024-08-07" + }, + { + "order_id": "c9e2a5c0-6886-4aaf-a039-70fbc3129eda", + "user_id": 942, + "amount": 78.27, + "status": "completed", + "order_date": "2024-01-07" + }, + { + "order_id": "d003991c-9173-479e-98a1-b4a89c595818", + "user_id": 307, + "amount": 824.11, + "status": "pending", + "order_date": "2024-07-04" + }, + { + "order_id": "dee12e45-65c5-44c2-b92c-8efa182da0c2", + "user_id": 342, + "amount": 710.24, + "status": "completed", + "order_date": "2024-09-24" + }, + { + "order_id": "b4314c1f-4a32-4e55-8693-7ccc51b59671", + "user_id": 647, + "amount": 619.75, + "status": "cancelled", + "order_date": "2024-09-04" + }, + { + "order_id": "f413a90a-3ba4-4fef-8263-639f43371f62", + "user_id": 342, + "amount": 441.22, + "status": "completed", + "order_date": "2024-05-17" + }, + { + "order_id": "5c575819-1c7b-4f83-a73f-f6dfa5c9e30e", + "user_id": 493, + "amount": 853.2, + "status": "completed", + "order_date": "2024-02-14" + }, + { + "order_id": "f7aeeb0c-940b-4ade-8873-4f4ef77ac834", + "user_id": 647, + "amount": 882.81, + "status": "completed", + "order_date": "2024-08-25" + }, + { + "order_id": "cffd84f5-e8cc-4048-8c0d-a353f71d1fe1", + "user_id": 869, + "amount": 614.23, + "status": "completed", + "order_date": "2024-01-15" + }, + { + "order_id": "a4d60d7c-9fa8-4398-a5d0-2b47982e34b9", + "user_id": 256, + "amount": 852.44, + "status": "pending", + "order_date": "2024-04-18" + }, + { + "order_id": "3937eda4-70d8-4781-bfe4-2506c9928859", + "user_id": 942, + "amount": 799.85, + "status": "completed", + "order_date": "2024-11-01" + }, + { + "order_id": "8c6f2ecc-c855-47f5-ad93-54e32048de30", + "user_id": 647, + "amount": 560.23, + "status": "completed", + "order_date": "2024-04-07" + }, + { + "order_id": "56ef1cf4-1189-4bcb-9841-55c5f34799d4", + "user_id": 301, + "amount": 248.94, + "status": "completed", + "order_date": "2024-10-11" + }, + { + "order_id": "b888096e-4e08-4d45-9a87-ae37baa10db1", + "user_id": 970, + "amount": 693.7, + "status": "completed", + "order_date": "2024-09-08" + }, + { + "order_id": "5db97910-9199-4e39-b07c-f0740a90b2b1", + "user_id": 342, + "amount": 795.0, + "status": "pending", + "order_date": "2024-04-23" + }, + { + "order_id": "1ce2d874-1744-419c-ae40-dfc5c6f55c78", + "user_id": 734, + "amount": 694.87, + "status": "completed", + "order_date": "2024-10-24" + }, + { + "order_id": "bc92c886-f66d-4d72-88eb-008537ed97d2", + "user_id": 942, + "amount": 478.53, + "status": "completed", + "order_date": "2024-08-05" + }, + { + "order_id": "4d62be5f-9dd7-446a-b5db-a87af6bf0e94", + "user_id": 994, + "amount": 337.48, + "status": "completed", + "order_date": "2024-05-18" + }, + { + "order_id": "53f84379-d6ab-4c65-94c1-95c054f7525a", + "user_id": 550, + "amount": 258.89, + "status": "pending", + "order_date": "2024-04-08" + }, + { + "order_id": "bd29d67a-3613-4635-9d05-d8cacfbb69db", + "user_id": 493, + "amount": 128.36, + "status": "completed", + "order_date": "2024-12-19" + }, + { + "order_id": "44c4edfb-286e-4061-a3bf-b779a572b6d1", + "user_id": 699, + "amount": 199.63, + "status": "completed", + "order_date": "2024-05-21" + }, + { + "order_id": "fd0b0369-9fd0-4496-bb0a-62eb1da3aaff", + "user_id": 869, + "amount": 976.84, + "status": "completed", + "order_date": "2024-05-24" + }, + { + "order_id": "a450d6d9-29b9-4d36-b5b1-2466a5088dde", + "user_id": 719, + "amount": 834.3, + "status": "completed", + "order_date": "2024-07-03" + }, + { + "order_id": "93634d86-0ac6-4eb5-ab23-cc0b37e72bc5", + "user_id": 699, + "amount": 309.24, + "status": "completed", + "order_date": "2024-03-05" + }, + { + "order_id": "40133049-1655-4d43-8e02-f31bf0d8786e", + "user_id": 647, + "amount": 55.06, + "status": "pending", + "order_date": "2024-05-29" + }, + { + "order_id": "4a9e74c6-b6b0-4132-9a51-0977f403b65b", + "user_id": 253, + "amount": 641.49, + "status": "completed", + "order_date": "2024-02-22" + }, + { + "order_id": "83f794da-bea0-44bb-b2b2-7f20a4fb15e8", + "user_id": 655, + "amount": 578.32, + "status": "completed", + "order_date": "2024-08-13" + }, + { + "order_id": "57b534c8-dd76-4a71-b037-1e89495d7860", + "user_id": 493, + "amount": 192.52, + "status": "pending", + "order_date": "2024-09-01" + }, + { + "order_id": "a729ce53-ac94-4208-ab32-7e07df9430e1", + "user_id": 719, + "amount": 823.89, + "status": "completed", + "order_date": "2024-02-07" + }, + { + "order_id": "2262c9dc-ebb5-44e2-868f-52aaa89839c5", + "user_id": 869, + "amount": 633.15, + "status": "pending", + "order_date": "2024-03-17" + }, + { + "order_id": "ff92999c-7a64-45cc-b9ca-be6153dda3ef", + "user_id": 869, + "amount": 949.63, + "status": "pending", + "order_date": "2024-05-07" + }, + { + "order_id": "d541c64d-9770-46d4-8c0b-e52692886e0a", + "user_id": 719, + "amount": 562.49, + "status": "completed", + "order_date": "2024-11-01" + }, + { + "order_id": "da61ca2a-6da1-4787-be85-7e3339f50804", + "user_id": 528, + "amount": 233.43, + "status": "completed", + "order_date": "2024-08-18" + }, + { + "order_id": "697e4bbc-d011-4eb6-82ea-d6e5dc66408b", + "user_id": 342, + "amount": 304.38, + "status": "completed", + "order_date": "2024-08-07" + }, + { + "order_id": "859f76e7-8695-466c-b621-281d4272e839", + "user_id": 550, + "amount": 572.94, + "status": "pending", + "order_date": "2024-02-20" + }, + { + "order_id": "15532e84-a443-4ea7-9b6f-a43b8da744d7", + "user_id": 945, + "amount": 629.26, + "status": "completed", + "order_date": "2024-02-11" + }, + { + "order_id": "4019b312-2f8b-414c-a1e7-06795d21ca3f", + "user_id": 699, + "amount": 247.47, + "status": "completed", + "order_date": "2024-03-21" + }, + { + "order_id": "b7d0c547-17ad-4df2-8e91-5bb370992cef", + "user_id": 655, + "amount": 414.43, + "status": "completed", + "order_date": "2024-08-28" + }, + { + "order_id": "b7bea197-1e75-4f99-8b7c-dad5bf902fce", + "user_id": 550, + "amount": 42.31, + "status": "completed", + "order_date": "2024-05-24" + }, + { + "order_id": "eb332f63-ce53-4a57-9764-155c47200cce", + "user_id": 342, + "amount": 80.47, + "status": "completed", + "order_date": "2024-05-15" + }, + { + "order_id": "bd7a50e7-fec2-4108-8554-0f57b6bbb4d9", + "user_id": 869, + "amount": 664.57, + "status": "cancelled", + "order_date": "2024-08-05" + }, + { + "order_id": "c9e7df68-83bf-494e-9008-3c3f1377bfb7", + "user_id": 719, + "amount": 549.12, + "status": "completed", + "order_date": "2024-05-16" + }, + { + "order_id": "14634d9e-a511-4d7f-866f-0303fe20feb7", + "user_id": 253, + "amount": 80.7, + "status": "pending", + "order_date": "2024-06-06" + }, + { + "order_id": "a60277d1-e02d-4ae9-bbf1-fa54c2be73dd", + "user_id": 528, + "amount": 751.47, + "status": "completed", + "order_date": "2024-05-27" + }, + { + "order_id": "b3efc410-9ce6-4e26-ac79-c4de44f73cfc", + "user_id": 342, + "amount": 133.11, + "status": "completed", + "order_date": "2024-12-24" + }, + { + "order_id": "f99f3b95-e517-4650-8fd0-d783856e466d", + "user_id": 734, + "amount": 943.25, + "status": "completed", + "order_date": "2024-09-09" + }, + { + "order_id": "3afc6460-7d20-41ff-a916-abed7d2ed9a8", + "user_id": 342, + "amount": 89.64, + "status": "pending", + "order_date": "2024-08-09" + }, + { + "order_id": "9f3f15c7-3931-4003-a952-9c11eabb8d98", + "user_id": 493, + "amount": 607.68, + "status": "pending", + "order_date": "2024-04-27" + }, + { + "order_id": "9662ec14-a52a-46c3-9ddf-5f68b5b2008c", + "user_id": 869, + "amount": 591.25, + "status": "pending", + "order_date": "2024-12-10" + }, + { + "order_id": "9df02f22-791e-44ec-8f1f-efce531e5e6d", + "user_id": 647, + "amount": 580.48, + "status": "completed", + "order_date": "2024-03-30" + }, + { + "order_id": "31b5ecf3-287b-4d8e-9e09-9535e2b69f99", + "user_id": 256, + "amount": 523.77, + "status": "completed", + "order_date": "2024-05-22" + }, + { + "order_id": "047435c1-2dc0-4626-a6fa-93de8585157d", + "user_id": 699, + "amount": 996.98, + "status": "completed", + "order_date": "2024-09-08" + }, + { + "order_id": "7821a4a4-28bb-463d-b0fe-f6185fb1aa81", + "user_id": 301, + "amount": 475.31, + "status": "completed", + "order_date": "2024-06-13" + }, + { + "order_id": "b0a238c4-fb59-43ee-b0ca-712f2a642b47", + "user_id": 719, + "amount": 859.16, + "status": "completed", + "order_date": "2024-12-21" + }, + { + "order_id": "9654636f-fbf5-4c5d-a3ab-bd1c06ed4d3b", + "user_id": 256, + "amount": 295.34, + "status": "cancelled", + "order_date": "2024-10-08" + }, + { + "order_id": "1d44ef20-ddc8-45fd-aad1-4096aed2e024", + "user_id": 285, + "amount": 460.28, + "status": "completed", + "order_date": "2024-06-14" + }, + { + "order_id": "6ee9f02a-0bb6-4273-a7e4-a8b9bf975c71", + "user_id": 719, + "amount": 970.48, + "status": "completed", + "order_date": "2024-09-20" + }, + { + "order_id": "e3b24b32-984d-45eb-9dec-36dc652bac93", + "user_id": 655, + "amount": 661.08, + "status": "completed", + "order_date": "2024-07-30" + }, + { + "order_id": "54dfcb1d-1b6c-4160-95c5-323ab8c271a2", + "user_id": 285, + "amount": 195.71, + "status": "completed", + "order_date": "2024-09-12" + }, + { + "order_id": "94b3d38f-6693-4d82-935f-b602b31dad65", + "user_id": 342, + "amount": 762.39, + "status": "completed", + "order_date": "2024-10-08" + }, + { + "order_id": "69704cd7-1aea-4277-a317-849643914889", + "user_id": 253, + "amount": 928.4, + "status": "completed", + "order_date": "2024-12-23" + }, + { + "order_id": "b2b61acc-b422-4e6a-838e-a6c9d2e66979", + "user_id": 256, + "amount": 130.21, + "status": "cancelled", + "order_date": "2024-11-07" + } +] \ No newline at end of file diff --git a/data_generator/output/users.csv b/data_generator/output/users.csv new file mode 100644 index 0000000..7dc4749 --- /dev/null +++ b/data_generator/output/users.csv @@ -0,0 +1,21 @@ +user_id,username,email,age,is_active +655,user_gTpigTHK,bhsah@test.com,32,True +285,user_YaXRvj7u,nmmjbq@example.com,42,True +301,user_ZM1JRcor,45nq4f@test.com,31,True +719,user_6OLkTkx9,yhl1c32oc6uzhr5@test.com,27,True +253,user_UHUAKw9i,dbdw2pcn9t84@example.com,61,True +699,user_2UVvsjaS,81sdw4shn@example.com,52,False +945,user_Gaub52Zt,iuc03@example.com,26,True +970,user_Iq2AnHTm,oyn6qicat@example.com,18,True +647,user_debuFrEH,uirodxmo5bxxc0@example.com,33,True +550,user_BArp5B1I,xtad1@example.com,28,True +493,user_nydx9qWC,z02xrghc0cl@example.com,55,True +942,user_0j7elKPo,7uubcx@test.com,34,True +734,user_tqiOC56a,99chj755nf4zw9@example.com,60,False +307,user_5g2q3Ujr,zhwjr64d@test.com,35,True +342,user_VNqTIAae,t4ufel6@example.com,40,True +256,user_PvI28U5o,935a0l7@example.com,35,True +111,user_xcDm4vY2,agl58kxo@test.com,19,True +994,user_qJ8cKvWB,e6gbps3y70hp8@test.com,57,True +869,user_h3FPuRIl,7y6wktakpuxqphr@example.com,60,True +528,user_PMf7otYj,ir4co@example.com,63,True diff --git a/data_generator/output/users.json b/data_generator/output/users.json new file mode 100644 index 0000000..346c359 --- /dev/null +++ b/data_generator/output/users.json @@ -0,0 +1,142 @@ +[ + { + "user_id": 655, + "username": "user_gTpigTHK", + "email": "bhsah@test.com", + "age": 32, + "is_active": true + }, + { + "user_id": 285, + "username": "user_YaXRvj7u", + "email": "nmmjbq@example.com", + "age": 42, + "is_active": true + }, + { + "user_id": 301, + "username": "user_ZM1JRcor", + "email": "45nq4f@test.com", + "age": 31, + "is_active": true + }, + { + "user_id": 719, + "username": "user_6OLkTkx9", + "email": "yhl1c32oc6uzhr5@test.com", + "age": 27, + "is_active": true + }, + { + "user_id": 253, + "username": "user_UHUAKw9i", + "email": "dbdw2pcn9t84@example.com", + "age": 61, + "is_active": true + }, + { + "user_id": 699, + "username": "user_2UVvsjaS", + "email": "81sdw4shn@example.com", + "age": 52, + "is_active": false + }, + { + "user_id": 945, + "username": "user_Gaub52Zt", + "email": "iuc03@example.com", + "age": 26, + "is_active": true + }, + { + "user_id": 970, + "username": "user_Iq2AnHTm", + "email": "oyn6qicat@example.com", + "age": 18, + "is_active": true + }, + { + "user_id": 647, + "username": "user_debuFrEH", + "email": "uirodxmo5bxxc0@example.com", + "age": 33, + "is_active": true + }, + { + "user_id": 550, + "username": "user_BArp5B1I", + "email": "xtad1@example.com", + "age": 28, + "is_active": true + }, + { + "user_id": 493, + "username": "user_nydx9qWC", + "email": "z02xrghc0cl@example.com", + "age": 55, + "is_active": true + }, + { + "user_id": 942, + "username": "user_0j7elKPo", + "email": "7uubcx@test.com", + "age": 34, + "is_active": true + }, + { + "user_id": 734, + "username": "user_tqiOC56a", + "email": "99chj755nf4zw9@example.com", + "age": 60, + "is_active": false + }, + { + "user_id": 307, + "username": "user_5g2q3Ujr", + "email": "zhwjr64d@test.com", + "age": 35, + "is_active": true + }, + { + "user_id": 342, + "username": "user_VNqTIAae", + "email": "t4ufel6@example.com", + "age": 40, + "is_active": true + }, + { + "user_id": 256, + "username": "user_PvI28U5o", + "email": "935a0l7@example.com", + "age": 35, + "is_active": true + }, + { + "user_id": 111, + "username": "user_xcDm4vY2", + "email": "agl58kxo@test.com", + "age": 19, + "is_active": true + }, + { + "user_id": 994, + "username": "user_qJ8cKvWB", + "email": "e6gbps3y70hp8@test.com", + "age": 57, + "is_active": true + }, + { + "user_id": 869, + "username": "user_h3FPuRIl", + "email": "7y6wktakpuxqphr@example.com", + "age": 60, + "is_active": true + }, + { + "user_id": 528, + "username": "user_PMf7otYj", + "email": "ir4co@example.com", + "age": 63, + "is_active": true + } +] \ No newline at end of file diff --git a/data_generator/requirements.txt b/data_generator/requirements.txt new file mode 100644 index 0000000..85f6429 --- /dev/null +++ b/data_generator/requirements.txt @@ -0,0 +1,2 @@ +pandas +pyyaml \ No newline at end of file