@@ -4,7 +4,7 @@ description: Learn about Azure Cosmos DB transactional (row-based) and analytica
4
4
author : Rodrigossz
5
5
ms.service : cosmos-db
6
6
ms.topic : conceptual
7
- ms.date : 03/24/2022
7
+ ms.date : 04/18/2023
8
8
ms.author : rosouz
9
9
ms.custom : seo-nov-2020, devx-track-azurecli, ignite-2022
10
10
ms.reviewer : mjbrown
@@ -446,24 +446,24 @@ the MongoDB `_id` field is fundamental to every collection in MongoDB and origin
446
446
447
447
###### Working with the MongoDB `_id` field in Spark
448
448
449
+ The example below works on Spark 2.x and 3.x versions:
450
+
449
451
```Python
450
- import org.apache.spark.sql.types._
451
- val simpleSchema = StructType(Array(
452
- StructField("_id", StructType(Array(StructField("objectId",BinaryType,true)) ),true),
453
- StructField("id", StringType, true)
454
- ))
455
-
456
- df = spark.read.format("cosmos.olap")\
457
- .option("spark.synapse.linkedService", "<enter linked service name>")\
458
- .option("spark.cosmos.container", "<enter container name>")\
459
- .schema(simpleSchema)
460
- .load()
452
+ val df = spark.read.format("cosmos.olap").option("spark.synapse.linkedService", "xxxx").option("spark.cosmos.container", "xxxx").load()
461
453
462
- df.select("id", "_id.objectId").show()
463
- ```
454
+ val convertObjectId = udf((bytes: Array[Byte]) => {
455
+ val builder = new StringBuilder
464
456
465
- > [!NOTE]
466
- > This workaround was designed to work with Spark 2.4.
457
+ for (b <- bytes) {
458
+ builder.append(String.format("%02x", Byte.box(b)))
459
+ }
460
+ builder.toString
461
+ }
462
+ )
463
+
464
+ val dfConverted = df.withColumn("objectId", col("_id.objectId")).withColumn("convertedObjectId", convertObjectId(col("_id.objectId"))).select("id", "objectId", "convertedObjectId")
465
+ display(dfConverted)
466
+ ```
467
467
468
468
###### Working with the MongoDB `_id` field in SQL
469
469
0 commit comments