Skip to content

Commit 59ae01f

Browse files
authored
moved
1 parent 17e2e02 commit 59ae01f

File tree

1 file changed

+107
-0
lines changed

1 file changed

+107
-0
lines changed
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": null,
6+
"id": "8d820f25-3c2e-45b3-8a08-af78f0d45e1d",
7+
"metadata": {
8+
"microsoft": {
9+
"language": "python",
10+
"language_group": "synapse_pyspark"
11+
}
12+
},
13+
"outputs": [],
14+
"source": [
15+
"# Generates Dummy json file in Files/\n",
16+
"\n",
17+
"# Import necessary libraries\n",
18+
"from pyspark.sql import SparkSession\n",
19+
"from pyspark.sql.types import *\n",
20+
"import random\n",
21+
"from datetime import datetime, timedelta\n",
22+
"\n",
23+
"# Initialize Spark session (if not already initialized)\n",
24+
"spark = SparkSession.builder.appName(\"GenerateRandomData\").getOrCreate()\n",
25+
"\n",
26+
"# Function to generate random data\n",
27+
"def generate_random_data(num_entries):\n",
28+
" data = []\n",
29+
" for i in range(1, num_entries + 1):\n",
30+
" name = f\"User{i}\"\n",
31+
" entry = {\n",
32+
" \"id\": i,\n",
33+
" \"name\": name,\n",
34+
" \"age\": random.randint(18, 65),\n",
35+
" \"email\": f\"{name.lower()}@example.com\",\n",
36+
" \"created_at\": (datetime.now() - timedelta(days=random.randint(0, 365))).strftime(\"%Y-%m-%d %H:%M:%S\")\n",
37+
" }\n",
38+
" data.append(entry)\n",
39+
" return data\n",
40+
"\n",
41+
"# Generate 10 random entries\n",
42+
"random_data = generate_random_data(10)\n",
43+
"\n",
44+
"# Define schema for the DataFrame\n",
45+
"schema = StructType([\n",
46+
" StructField(\"id\", IntegerType(), True),\n",
47+
" StructField(\"name\", StringType(), True),\n",
48+
" StructField(\"age\", IntegerType(), True),\n",
49+
" StructField(\"email\", StringType(), True),\n",
50+
" StructField(\"created_at\", StringType(), True)\n",
51+
"])\n",
52+
"\n",
53+
"# Create a DataFrame from the random data\n",
54+
"df_random_data = spark.createDataFrame(random_data, schema=schema)\n",
55+
"\n",
56+
"# Write the DataFrame to the Lakehouse in the specified path\n",
57+
"output_path = \"abfss://{WORKSPACE-NAME}@onelake.dfs.fabric.microsoft.com/raw_Bronze.Lakehouse/Files/random_data\" # Replace {WORKSPACE-NAME}\n",
58+
"df_random_data.write.format(\"delta\").mode(\"overwrite\").save(output_path)\n",
59+
"\n",
60+
"print(f\"Random data has been saved to the Lakehouse at '{output_path}'.\")"
61+
]
62+
}
63+
],
64+
"metadata": {
65+
"application/vnd.jupyter.widget-state+json": {
66+
"version": "1.0"
67+
},
68+
"dependencies": {},
69+
"kernel_info": {
70+
"name": "synapse_pyspark"
71+
},
72+
"kernelspec": {
73+
"display_name": "Synapse PySpark",
74+
"language": "Python",
75+
"name": "synapse_pyspark"
76+
},
77+
"language_info": {
78+
"name": "python"
79+
},
80+
"microsoft": {
81+
"language": "python",
82+
"language_group": "synapse_pyspark",
83+
"ms_spell_check": {
84+
"ms_spell_check_language": "en"
85+
}
86+
},
87+
"nteract": {
88+
"version": "[email protected]"
89+
},
90+
"spark_compute": {
91+
"compute_id": "/trident/default",
92+
"session_options": {
93+
"conf": {
94+
"spark.synapse.nbs.session.timeout": "1200000"
95+
}
96+
}
97+
},
98+
"widgets": {
99+
"application/vnd.jupyter.widget-state+json": {
100+
"state": {},
101+
"version": "1.0"
102+
}
103+
}
104+
},
105+
"nbformat": 4,
106+
"nbformat_minor": 5
107+
}

0 commit comments

Comments
 (0)