|
5 | 5 | "execution_count": 0, |
6 | 6 | "metadata": { |
7 | 7 | "application/vnd.databricks.v1+cell": { |
8 | | - "cellmetadata": { |
9 | | - "bytelimit": 2048000, |
10 | | - "rowlimit": 10000 |
| 8 | + "cellMetadata": { |
| 9 | + "byteLimit": 2048000, |
| 10 | + "rowLimit": 10000 |
11 | 11 | }, |
12 | | - "inputwidgets": {}, |
| 12 | + "inputWidgets": {}, |
13 | 13 | "nuid": "edf2bb67-c3fa-459c-bb4b-83dec2075401", |
14 | | - "showtitle": false, |
| 14 | + "showTitle": false, |
15 | 15 | "title": "" |
16 | 16 | } |
17 | 17 | }, |
18 | 18 | "outputs": [], |
19 | 19 | "source": [ |
20 | | - "cosmosendpoint = \"https://fabianm-oltp-spark-workshop-cdb.documents.azure.com:443/\"\n", |
21 | | - "cosmosmasterkey = \"\"\n", |
22 | | - "cosmosserviceprincipalpassword=\"\"\n" |
| 20 | + "cosmosendpoint = \"<YourEndpoint>\"\n", |
| 21 | + "cosmosmasterkey = \"<YourKey>\"\n", |
| 22 | + "cosmosserviceprincipalpassword=\"\"\n", |
| 23 | + "accountDataResolverName = \"com.azure.cosmos.spark.samples.MasterKeyAccountDataResolver\"\n", |
| 24 | + "#accountDataResolverName = \"com.azure.cosmos.spark.samples.ServicePrincipalAccountDataResolver\"\n", |
| 25 | + "#accountDataResolverName = \"com.azure.cosmos.spark.samples.ManagedIdentityAccountDataResolver\"" |
23 | 26 | ] |
24 | 27 | }, |
25 | 28 | { |
26 | 29 | "cell_type": "code", |
27 | 30 | "execution_count": 0, |
28 | 31 | "metadata": { |
29 | 32 | "application/vnd.databricks.v1+cell": { |
30 | | - "cellmetadata": { |
31 | | - "bytelimit": 2048000, |
32 | | - "rowlimit": 10000 |
| 33 | + "cellMetadata": { |
| 34 | + "byteLimit": 2048000, |
| 35 | + "rowLimit": 10000 |
33 | 36 | }, |
34 | | - "inputwidgets": {}, |
| 37 | + "inputWidgets": {}, |
35 | 38 | "nuid": "67f2404c-a6b6-4342-9dac-638a2bd7731c", |
36 | | - "showtitle": false, |
| 39 | + "showTitle": false, |
37 | 40 | "title": "" |
38 | 41 | } |
39 | 42 | }, |
40 | 43 | "outputs": [], |
41 | 44 | "source": [ |
42 | 45 | "import base64\n", |
43 | 46 | "import os\n", |
44 | | - "cert_file= open(\"/workspace/users/[email protected]/fabianm-spark-auth-sp-cert.pem\",\"rb\")\n", |
| 47 | + "\n", |
| 48 | + "\n", |
| 49 | + "cert_file= open(\"/Workspace/Users/[email protected]/someCert.pem\",\"rb\")\n", |
45 | 50 | "cert_data_binary = cert_file.read()\n", |
46 | 51 | "cert_data = (base64.b64encode(cert_data_binary)).decode('ascii')\n" |
47 | 52 | ] |
|
51 | 56 | "execution_count": 0, |
52 | 57 | "metadata": { |
53 | 58 | "application/vnd.databricks.v1+cell": { |
54 | | - "cellmetadata": { |
55 | | - "bytelimit": 2048000, |
56 | | - "rowlimit": 10000 |
| 59 | + "cellMetadata": { |
| 60 | + "byteLimit": 2048000, |
| 61 | + "rowLimit": 10000 |
57 | 62 | }, |
58 | | - "inputwidgets": {}, |
| 63 | + "inputWidgets": {}, |
59 | 64 | "nuid": "bfbd87f9-7628-489c-8f8a-1f0d5d14d2be", |
60 | | - "showtitle": false, |
| 65 | + "showTitle": false, |
61 | 66 | "title": "" |
62 | 67 | } |
63 | 68 | }, |
64 | | - "outputs": [ |
65 | | - { |
66 | | - "output_type": "stream", |
67 | | - "name": "stdout", |
68 | | - "output_type": "stream", |
69 | | - "text": [ |
70 | | - "pk: 7d90716b-4ea1-4753-8090-4b72e4a2b93b\nroot\n |-- id: string (nullable = false)\n |-- pk: string (nullable = false)\n |-- emptycolumn: string (nullable = true)\n |-- nullcolumn: string (nullable = true)\n |-- defaultcolumn: integer (nullable = true)\n |-- largecolumn: string (nullable = true)\n\n+------------------------------------+------------------------------------+-----------+----------+-------------+----------------------------------------------------------------------------------------------------+\n| id| pk|emptycolumn|nullcolumn|defaultcolumn| largecolumn|\n+------------------------------------+------------------------------------+-----------+----------+-------------+----------------------------------------------------------------------------------------------------+\n|fa1a3854-4d41-4ffb-b992-a00c82585ddc|7d90716b-4ea1-4753-8090-4b72e4a2b93b| | null| 0|ixcqsfjhwqelwcpjtzaqquhaxlmemdpeheyfxosdobyqvbihrvrftuaicllsfllgmfzwrbefkszobvpihkqxqfyulggqgrznd...|\n|3ef8e8c0-e9e7-4a2e-887f-2a9826f7b987|7d90716b-4ea1-4753-8090-4b72e4a2b93b| | null| 0|obltfpuoonfywvusviupkloeojqolqqyabzhcssnefwwptgvwqgnajesmnsyslvogtclasksjwpltsqrqwkeqgazarodmvbmv...|\n+------------------------------------+------------------------------------+-----------+----------+-------------+----------------------------------------------------------------------------------------------------+\n\n" |
71 | | - ] |
72 | | - } |
73 | | - ], |
| 69 | + "outputs": [], |
74 | 70 | "source": [ |
75 | 71 | "import random\n", |
76 | 72 | "import string\n", |
77 | 73 | "import uuid\n", |
78 | | - "from pyspark.sql.types import structtype,structfield, stringtype, integertype\n", |
| 74 | + "from pyspark.sql.types import StructType,StructField, StringType, IntegerType\n", |
79 | 75 | " \n", |
80 | 76 | "def random_string_generator(str_size, allowed_chars):\n", |
81 | 77 | " return ''.join(random.choice(allowed_chars) for x in range(str_size))\n", |
|
85 | 81 | " \n", |
86 | 82 | "chars = string.ascii_letters\n", |
87 | 83 | "data = [\\\n", |
88 | | - " (str(uuid.uuid4()), pk, \"\", none, 0, random_string_generator(16000, chars)),\\\n", |
89 | | - " (str(uuid.uuid4()), pk, \"\", none, 0, random_string_generator(random.randint(16000, 170000), chars)),\\\n", |
| 84 | + " (str(uuid.uuid4()), pk, \"\", None, 0, random_string_generator(16000, chars)),\\\n", |
| 85 | + " (str(uuid.uuid4()), pk, \"\", None, 0, random_string_generator(random.randint(16000, 170000), chars)),\\\n", |
90 | 86 | " ]\n", |
91 | 87 | "\n", |
92 | | - "schema = structtype([ \\\n", |
93 | | - " structfield(\"id\",stringtype(),false), \\\n", |
94 | | - " structfield(\"pk\",stringtype(),false), \\\n", |
95 | | - " structfield(\"emptycolumn\",stringtype(),true), \\\n", |
96 | | - " structfield(\"nullcolumn\",stringtype(),true), \\\n", |
97 | | - " structfield(\"defaultcolumn\",integertype(),true), \\\n", |
98 | | - " structfield(\"largecolumn\", stringtype(), true)\\\n", |
| 88 | + "schema = StructType([ \\\n", |
| 89 | + " StructField(\"id\",StringType(),False), \\\n", |
| 90 | + " StructField(\"pk\",StringType(),False), \\\n", |
| 91 | + " StructField(\"emptycolumn\",StringType(),True), \\\n", |
| 92 | + " StructField(\"nullcolumn\",StringType(),True), \\\n", |
| 93 | + " StructField(\"defaultcolumn\",IntegerType(),True), \\\n", |
| 94 | + " StructField(\"largecolumn\", StringType(), True)\\\n", |
99 | 95 | " ])\n", |
100 | 96 | " \n", |
101 | | - "df = spark.createdataframe(data=data,schema=schema)\n", |
102 | | - "df.printschema()\n", |
| 97 | + "df = spark.createDataFrame(data=data,schema=schema)\n", |
| 98 | + "df.printSchema()\n", |
103 | 99 | "df.show(truncate=100)\n", |
104 | 100 | "\n", |
105 | 101 | "writecfg = {\n", |
106 | 102 | " \"spark.cosmos.accountendpoint\": cosmosendpoint,\n", |
107 | | - " \"spark.cosmos.database\": \"test\",\n", |
108 | | - " \"spark.cosmos.container\": \"testitems\",\n", |
| 103 | + " \"spark.cosmos.accountDataResolverServiceName\": accountDataResolverName,\n", |
| 104 | + " \"spark.cosmos.database\": \"Test\",\n", |
| 105 | + " \"spark.cosmos.container\": \"TestItems\",\n", |
109 | 106 | " \"spark.cosmos.write.strategy\": \"itemappend\",\n", |
110 | 107 | " \"spark.cosmos.write.bulk.enabled\": \"true\", \n", |
111 | 108 | " \"cosmos.auth.sample.enabled\": \"true\",\n", |
112 | 109 | " # masterkey\n", |
113 | | - " #\"cosmos.auth.sample.authtype\": \"masterkey\",\n", |
| 110 | + " #\"cosmos.auth.sample.authType\": \"masterkey\",\n", |
114 | 111 | " #\"cosmos.auth.sample.key.secret\": cosmosmasterkey,\n", |
115 | 112 | " #\n", |
116 | 113 | " # aad auth with managed identity\n", |
117 | | - " #\"cosmos.auth.sample.authtype\": \"managedidentity\",\n", |
118 | | - " #\"cosmos.auth.sample.tenantid\": \"72f988bf-86f1-41af-91ab-2d7cd011db47\",\n", |
119 | | - " #\"cosmos.auth.sample.subscriptionid\": \"8fba6d4f-7c37-4d13-9063-fd58ad2b86e2\",\n", |
| 114 | + " #\"cosmos.auth.sample.authType\": \"managedidentity\",\n", |
| 115 | + " #\"cosmos.auth.sample.tenantId\": \"72f988bf-86f1-41af-91ab-2d7cd011db47\",\n", |
| 116 | + " #\"cosmos.auth.sample.subscriptionId\": \"8fba6d4f-7c37-4d13-9063-fd58ad2b86e2\",\n", |
120 | 117 | " #\"cosmos.auth.sample.resourcegroupname\": \"fabianm-oltp-spark-workshop\"\n", |
121 | 118 | " #\n", |
122 | 119 | " # aad auth with service principal (password)\n", |
123 | | - " #\"cosmos.auth.sample.authtype\": \"serviceprincipal\",\n", |
124 | | - " #\"cosmos.auth.sample.tenantid\": \"72f988bf-86f1-41af-91ab-2d7cd011db47\",\n", |
125 | | - " #\"cosmos.auth.sample.subscriptionid\": \"8fba6d4f-7c37-4d13-9063-fd58ad2b86e2\",\n", |
| 120 | + " #\"cosmos.auth.sample.authType\": \"serviceprincipal\",\n", |
| 121 | + " #\"cosmos.auth.sample.tenantId\": \"72f988bf-86f1-41af-91ab-2d7cd011db47\",\n", |
| 122 | + " #\"cosmos.auth.sample.subscriptionId\": \"8fba6d4f-7c37-4d13-9063-fd58ad2b86e2\",\n", |
126 | 123 | " #\"cosmos.auth.sample.resourcegroupname\": \"fabianm-oltp-spark-workshop\",\n", |
127 | | - " #\"cosmos.auth.sample.serviceprincipal.clientid\": \"bd559cf4-786d-43ae-9ff6-eb83c5952c73\",\n", |
| 124 | + " #\"cosmos.auth.sample.serviceprincipal.clientId\": \"bd559cf4-786d-43ae-9ff6-eb83c5952c73\",\n", |
128 | 125 | " #\"cosmos.auth.sample.serviceprincipal.clientsecret\": cosmosserviceprincipalpassword\n", |
129 | 126 | " #\n", |
130 | 127 | " # aad auth with service principal (cert)\n", |
131 | | - " #\"cosmos.auth.sample.authtype\": \"serviceprincipal\",\n", |
132 | | - " #\"cosmos.auth.sample.tenantid\": \"72f988bf-86f1-41af-91ab-2d7cd011db47\",\n", |
133 | | - " #\"cosmos.auth.sample.subscriptionid\": \"8fba6d4f-7c37-4d13-9063-fd58ad2b86e2\",\n", |
| 128 | + " #\"cosmos.auth.sample.authType\": \"serviceprincipal\",\n", |
| 129 | + " #\"cosmos.auth.sample.tenantId\": \"72f988bf-86f1-41af-91ab-2d7cd011db47\",\n", |
| 130 | + " #\"cosmos.auth.sample.subscriptionId\": \"8fba6d4f-7c37-4d13-9063-fd58ad2b86e2\",\n", |
134 | 131 | " #\"cosmos.auth.sample.resourcegroupname\": \"fabianm-oltp-spark-workshop\",\n", |
135 | | - " #\"cosmos.auth.sample.serviceprincipal.clientid\": \"88436299-945f-4824-8183-2cbddf981388\",\n", |
| 132 | + " #\"cosmos.auth.sample.serviceprincipal.clientId\": \"88436299-945f-4824-8183-2cbddf981388\",\n", |
136 | 133 | " #\"cosmos.auth.sample.serviceprincipal.cert\": cert_data\n", |
137 | 134 | "}\n", |
138 | 135 | "\n", |
|
148 | 145 | "metadata": { |
149 | 146 | "application/vnd.databricks.v1+notebook": { |
150 | 147 | "dashboards": [], |
| 148 | + "environmentMetadata": null, |
151 | 149 | "language": "python", |
152 | | - "notebookmetadata": { |
153 | | - "mostrecentlyexecutedcommandwithimplicitdf": { |
154 | | - "commandid": 3298457839905717, |
155 | | - "dataframes": [ |
156 | | - "_sqldf" |
157 | | - ] |
158 | | - }, |
159 | | - "pythonindentunit": 4 |
| 150 | + "notebookMetadata": { |
| 151 | + "pythonIndentUnit": 4 |
160 | 152 | }, |
161 | | - "notebookname": "accounttokenresolversample", |
| 153 | + "notebookName": "AccountTokenResolverSample", |
162 | 154 | "widgets": {} |
163 | 155 | } |
164 | 156 | }, |
|
0 commit comments