Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 0 additions & 3 deletions .github/workflows/validate_and_fix_markdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ on:
pull_request:
branches:
- main
push:
branches:
- main

permissions:
contents: write
Expand Down
12 changes: 6 additions & 6 deletions .github/workflows/validate_and_fix_notebook.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,6 @@ on:
pull_request:
branches:
- main
push:
branches:
- main

permissions:
contents: write
Expand Down Expand Up @@ -35,12 +32,15 @@ jobs:
python -c "
import nbformat
import glob
for file in glob.glob('**/*.ypyb', recursive=True):
for file in glob.glob('**/*.ipynb', recursive=True):
with open(file, 'r') as f:
nb = nbformat.read(f, as_version=4)
nbformat.validate(nb)
if 'application/vnd.beylor-adapt+notebook' not in nb.metadata:
nb.metadata['application/vnd.beylor-adapt+notebook'] = {'version': '1.0'}
if 'widgets' in nb.metadata:
if 'application/vnd.jupyter.widget-state+json' not in nb.metadata['widgets']:
nb.metadata['widgets']['application/vnd.jupyter.widget-state+json'] = {'version': '1.0', 'state': {}}
elif 'state' not in nb.metadata['widgets']['application/vnd.jupyter.widget-state+json']:
nb.metadata['widgets']['application/vnd.jupyter.widget-state+json']['state'] = {}
with open(file, 'w') as f:
nbformat.write(nb, f)
"
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,107 @@
{"cells":[{"cell_type":"code","source":["# Generates Dummy json file in Files/\n","\n","# Import necessary libraries\n","from pyspark.sql import SparkSession\n","from pyspark.sql.types import *\n","import random\n","from datetime import datetime, timedelta\n","\n","# Initialize Spark session (if not already initialized)\n","spark = SparkSession.builder.appName(\"GenerateRandomData\").getOrCreate()\n","\n","# Function to generate random data\n","def generate_random_data(num_entries):\n"," data = []\n"," for i in range(1, num_entries + 1):\n"," name = f\"User{i}\"\n"," entry = {\n"," \"id\": i,\n"," \"name\": name,\n"," \"age\": random.randint(18, 65),\n"," \"email\": f\"{name.lower()}@example.com\",\n"," \"created_at\": (datetime.now() - timedelta(days=random.randint(0, 365))).strftime(\"%Y-%m-%d %H:%M:%S\")\n"," }\n"," data.append(entry)\n"," return data\n","\n","# Generate 10 random entries\n","random_data = generate_random_data(10)\n","\n","# Define schema for the DataFrame\n","schema = StructType([\n"," StructField(\"id\", IntegerType(), True),\n"," StructField(\"name\", StringType(), True),\n"," StructField(\"age\", IntegerType(), True),\n"," StructField(\"email\", StringType(), True),\n"," StructField(\"created_at\", StringType(), True)\n","])\n","\n","# Create a DataFrame from the random data\n","df_random_data = spark.createDataFrame(random_data, schema=schema)\n","\n","# Write the DataFrame to the Lakehouse in the specified path\n","output_path = \"abfss://{WORKSPACE-NAME}@onelake.dfs.fabric.microsoft.com/raw_Bronze.Lakehouse/Files/random_data\" # Replace {WORKSPACE-NAME}\n","df_random_data.write.format(\"delta\").mode(\"overwrite\").save(output_path)\n","\n","print(f\"Random data has been saved to the Lakehouse at '{output_path}'.\")"],"outputs":[],"execution_count":null,"metadata":{"microsoft":{"language":"python","language_group":"synapse_pyspark"}},"id":"8d820f25-3c2e-45b3-8a08-af78f0d45e1d"}],"metadata":{"kernel_info":{"name":"synapse_pyspark"},"kernelspec":{"name":"synapse_pyspark","language":"Python","display_name":"Synapse PySpark"},"language_info":{"name":"python"},"microsoft":{"language":"python","language_group":"synapse_pyspark","ms_spell_check":{"ms_spell_check_language":"en"}},"nteract":{"version":"[email protected]"},"spark_compute":{"compute_id":"/trident/default","session_options":{"conf":{"spark.synapse.nbs.session.timeout":"1200000"}}},"dependencies":{}},"nbformat":4,"nbformat_minor":5}
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "8d820f25-3c2e-45b3-8a08-af78f0d45e1d",
"metadata": {
"microsoft": {
"language": "python",
"language_group": "synapse_pyspark"
}
},
"outputs": [],
"source": [
"# Generates Dummy json file in Files/\n",
"\n",
"# Import necessary libraries\n",
"from pyspark.sql import SparkSession\n",
"from pyspark.sql.types import *\n",
"import random\n",
"from datetime import datetime, timedelta\n",
"\n",
"# Initialize Spark session (if not already initialized)\n",
"spark = SparkSession.builder.appName(\"GenerateRandomData\").getOrCreate()\n",
"\n",
"# Function to generate random data\n",
"def generate_random_data(num_entries):\n",
" data = []\n",
" for i in range(1, num_entries + 1):\n",
" name = f\"User{i}\"\n",
" entry = {\n",
" \"id\": i,\n",
" \"name\": name,\n",
" \"age\": random.randint(18, 65),\n",
" \"email\": f\"{name.lower()}@example.com\",\n",
" \"created_at\": (datetime.now() - timedelta(days=random.randint(0, 365))).strftime(\"%Y-%m-%d %H:%M:%S\")\n",
" }\n",
" data.append(entry)\n",
" return data\n",
"\n",
"# Generate 10 random entries\n",
"random_data = generate_random_data(10)\n",
"\n",
"# Define schema for the DataFrame\n",
"schema = StructType([\n",
" StructField(\"id\", IntegerType(), True),\n",
" StructField(\"name\", StringType(), True),\n",
" StructField(\"age\", IntegerType(), True),\n",
" StructField(\"email\", StringType(), True),\n",
" StructField(\"created_at\", StringType(), True)\n",
"])\n",
"\n",
"# Create a DataFrame from the random data\n",
"df_random_data = spark.createDataFrame(random_data, schema=schema)\n",
"\n",
"# Write the DataFrame to the Lakehouse in the specified path\n",
"output_path = \"abfss://{WORKSPACE-NAME}@onelake.dfs.fabric.microsoft.com/raw_Bronze.Lakehouse/Files/random_data\" # Replace {WORKSPACE-NAME}\n",
"df_random_data.write.format(\"delta\").mode(\"overwrite\").save(output_path)\n",
"\n",
"print(f\"Random data has been saved to the Lakehouse at '{output_path}'.\")"
]
}
],
"metadata": {
"application/vnd.jupyter.widget-state+json": {
"version": "1.0"
},
"dependencies": {},
"kernel_info": {
"name": "synapse_pyspark"
},
"kernelspec": {
"display_name": "Synapse PySpark",
"language": "Python",
"name": "synapse_pyspark"
},
"language_info": {
"name": "python"
},
"microsoft": {
"language": "python",
"language_group": "synapse_pyspark",
"ms_spell_check": {
"ms_spell_check_language": "en"
}
},
"nteract": {
"version": "[email protected]"
},
"spark_compute": {
"compute_id": "/trident/default",
"session_options": {
"conf": {
"spark.synapse.nbs.session.timeout": "1200000"
}
}
},
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version": "1.0"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
Expand Up @@ -1152,6 +1152,9 @@
}
],
"metadata": {
"application/vnd.jupyter.widget-state+json": {
"version": "1.0"
},
"dependencies": {
"environment": {
"environmentId": "766562be-9e21-456c-b270-cac7e4bf8d18",
Expand Down Expand Up @@ -1187,7 +1190,12 @@
}
}
},
"widgets": {}
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"state": {},
"version": "1.0"
}
}
},
"nbformat": 4,
"nbformat_minor": 5
Expand Down