Merge pull request #50594 from theresa-i/use-apache-spark

JamesJBarnett · web-flow · commit 904904892fed · 2025-05-22T10:53:39.000-07:00
Updates
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/1-introduction.yml b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/1-introduction.yml
@@ -4,14 +4,10 @@ title: Introduction
 metadata:
   title: Introduction
   description: "Introduction"
-  ms.date: 04/16/2025
-  author: wwlpublish
+  ms.date: 05/22/2025
+  author: theresa-i
   ms.author: theresai
   ms.topic: unit
-  ms.custom:
-  - build-2023
-  - build-2023-dataai
-  - build-2023-fabric
 durationInMinutes: 1
 content: |
   [!include[](includes/1-introduction.md)]
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/2-spark.yml b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/2-spark.yml
@@ -4,14 +4,10 @@ title: Prepare to use Apache Spark
 metadata:
   title: Prepare to use Apache Spark
   description: "Prepare to use Apache Spark"
-  ms.date: 04/16/2025
-  author: wwlpublish
+  ms.date: 05/22/2025
+  author: theresa-i
   ms.author: theresai
   ms.topic: unit
-  ms.custom:
-  - build-2023
-  - build-2023-dataai
-  - build-2023-fabric
 durationInMinutes: 3
 content: |
   [!include[](includes/2-spark.md)]
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/3-spark-code.yml b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/3-spark-code.yml
@@ -4,14 +4,10 @@ title: Run Spark code
 metadata:
   title: Run Spark code
   description: "Run Spark code"
-  ms.date: 04/16/2025
-  author: wwlpublish
+  ms.date: 05/22/2025
+  author: theresa-i
   ms.author: theresai
   ms.topic: unit
-  ms.custom:
-  - build-2023
-  - build-2023-dataai
-  - build-2023-fabric
 durationInMinutes: 3
 content: |
   [!include[](includes/3-spark-code.md)]
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/4-dataframe.yml b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/4-dataframe.yml
@@ -4,14 +4,10 @@ title: Work with data in a Spark dataframe
 metadata:
   title: Work with data in a Spark dataframe
   description: "Work with data in a Spark dataframe"
-  ms.date: 04/16/2025
-  author: wwlpublish
+  ms.date: 05/22/2025
+  author: theresa-i
   ms.author: theresai
   ms.topic: unit
-  ms.custom:
-  - build-2023
-  - build-2023-dataai
-  - build-2023-fabric
 durationInMinutes: 5
 content: |
   [!include[](includes/4-dataframe.md)]
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/5-spark-sql.yml b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/5-spark-sql.yml
@@ -4,14 +4,10 @@ title: Work with data using Spark SQL
 metadata:
   title: Work with data using Spark SQL
   description: "Work with data using Spark SQL"
-  ms.date: 04/16/2025
-  author: wwlpublish
+  ms.date: 05/22/2025
+  author: theresa-i
   ms.author: theresai
   ms.topic: unit
-  ms.custom:
-  - build-2023
-  - build-2023-dataai
-  - build-2023-fabric
 durationInMinutes: 7
 content: |
   [!include[](includes/5-spark-sql.md)]
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/6-visualize-data.yml b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/6-visualize-data.yml
@@ -4,14 +4,10 @@ title: Visualize data in a Spark notebook
 metadata:
   title: Visualize data in a Spark notebook
   description: "Visualize data in a Spark notebook"
-  ms.date: 04/16/2025
-  author: wwlpublish
+  ms.date: 05/22/2025
+  author: theresa-i
   ms.author: theresai
   ms.topic: unit
-  ms.custom:
-  - build-2023
-  - build-2023-dataai
-  - build-2023-fabric
 durationInMinutes: 5
 content: |
   [!include[](includes/6-visualize-data.md)]
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/7-exercise-spark.yml b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/7-exercise-spark.yml
@@ -4,14 +4,10 @@ title: Exercise - Analyze data with Apache Spark
 metadata:
   title: Exercise - Analyze data with Apache Spark
   description: "Exercise - Analyze data with Apache Spark"
-  ms.date: 04/16/2025
-  author: wwlpublish
+  ms.date: 05/22/2025
+  author: theresa-i
   ms.author: theresai
   ms.topic: unit
-  ms.custom:
-  - build-2023
-  - build-2023-dataai
-  - build-2023-fabric
 durationInMinutes: 45
 content: |
   [!include[](includes/7-exercise-spark.md)]
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/8-knowledge-check.yml b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/8-knowledge-check.yml
@@ -6,14 +6,10 @@ metadata:
   ai_generated_module_assessment: true
   title: Module assessment
   description: "Knowledge check"
-  ms.date: 04/16/2025
-  author: wwlpublish
+  ms.date: 05/22/2025
+  author: theresa-i
   ms.author: theresai
   ms.topic: unit
-  ms.custom:
-  - build-2023
-  - build-2023-dataai
-  - build-2023-fabric
 durationInMinutes: 3
 quiz:
   title: "Check your knowledge"
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/9-summary.yml b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/9-summary.yml
@@ -4,14 +4,10 @@ title: Summary
 metadata:
   title: Summary
   description: "Summary"
-  ms.date: 04/16/2025
-  author: wwlpublish
+  ms.date: 05/22/2025
+  author: theresa-i
   ms.author: theresai
   ms.topic: unit
-  ms.custom:
-  - build-2023
-  - build-2023-dataai
-  - build-2023-fabric
 durationInMinutes: 1
 content: |
   [!include[](includes/9-summary.md)]
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/2-spark.md b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/2-spark.md
@@ -22,9 +22,9 @@ Microsoft Fabric provides a *starter pool* in each workspace, enabling Spark job
 Additionally, you can create custom Spark pools with specific node configurations that support your particular data processing needs.
 
 > [!NOTE]
-> The ability to customize Spark pool settings can be disabled by Fabric administrators at the Fabric Capacity level. For more information, see **[Capacity administration settings for Data Engineering and Data Science](/fabric/data-engineering/capacity-settings-overview)** in the Fabric documentation.
+> The ability to customize Spark pool settings can be disabled by Fabric administrators at the Fabric Capacity level. For more information, see **[Capacity administration settings for Data Engineering and Data Science](/fabric/data-engineering/capacity-settings-overview?azure-portal=true)** in the Fabric documentation.
 
-You can manage settings for the starter pool and create new Spark pools in the **Data Engineering/Science** section of the workspace settings.
+You can manage settings for the starter pool and create new Spark pools in the **Admin portal** section of the workspace settings, under **Capacity settings**, then **Data Engineering/Science Settings.**
 
 ![Screenshot of the Spark settings page in Microsoft Fabric.](../media/spark-settings.png)
 
@@ -37,7 +37,7 @@ Specific configuration settings for Spark pools include:
 If you create one or more custom Spark pools in a workspace, you can set one of them (or the starter pool) as the default pool to be used if a specific pool is not specified for a given Spark job.
 
 > [!TIP]
-> For more information about managing Spark pools in Microsoft Fabric, see **[Configuring starter pools in Microsoft Fabric](/fabric/data-engineering/configure-starter-pools)** and **[How to create custom Spark pools in Microsoft Fabric](/fabric/data-engineering/create-custom-spark-pools)** in the Microsoft Fabric documentation.
+> For more information about managing Spark pools in Microsoft Fabric, see **[Configuring starter pools in Microsoft Fabric](/fabric/data-engineering/configure-starter-pools?azure-portal=true)** and **[How to create custom Spark pools in Microsoft Fabric](/fabric/data-engineering/create-custom-spark-pools?azure-portal=true)** in the Microsoft Fabric documentation.
 
 ## Runtimes and environments
 
@@ -50,7 +50,7 @@ In some cases, organizations may need to define multiple *environments* to suppo
 Microsoft Fabric supports multiple Spark runtimes, and will continue to add support for new runtimes as they are released. You can use the workspace settings interface to specify the Spark runtime that is used by default environment when a Spark pool is started.
 
 > [!TIP]
-> For more information about Spark runtimes in Microsoft Fabric, see **[Apache Spark Runtimes in Fabric](/fabric/data-engineering/runtime)** in the Microsoft Fabric documentation.
+> For more information about Spark runtimes in Microsoft Fabric, see **[Apache Spark Runtimes in Fabric](/fabric/data-engineering/runtime?azure-portal=true)** in the Microsoft Fabric documentation.
 
 ### Environments in Microsoft Fabric
 
@@ -71,7 +71,7 @@ When creating an environment, you can:
 After creating at least one custom environment, you can specify it as the default environment in the workspace settings.
 
 > [!TIP]
-> For more information about using custom environments in Microsoft Fabric, see **[Create, configure, and use an environment in Microsoft Fabric](/fabric/data-engineering/create-and-use-environment)** in the Microsoft Fabric documentation.
+> For more information about using custom environments in Microsoft Fabric, see **[Create, configure, and use an environment in Microsoft Fabric](/fabric/data-engineering/create-and-use-environment=azure-portal=true)** in the Microsoft Fabric documentation.
 
 ## Additional Spark configuration options
 
@@ -99,7 +99,7 @@ To enable the native execution engine for a specific script or notebook, you can
 ```
 
 > [!TIP]
-> For more information about the native execution engine, see **[Native execution engine for Fabric Spark](/fabric/data-engineering/native-execution-engine-overview)** in the Microsoft Fabric documentation.
+> For more information about the native execution engine, see **[Native execution engine for Fabric Spark](/fabric/data-engineering/native-execution-engine-overview?azure-portal=true)** in the Microsoft Fabric documentation.
 
 ### High concurrency mode
 
@@ -108,7 +108,7 @@ When you run Spark code in Microsoft Fabric, a Spark session is initiated. You c
 To enable high concurrency mode, use the **Data Engineering/Science** section of the workspace settings interface.
 
 > [!TIP]
-> For more information about high concurrency mode, see **[High concurrency mode in Apache Spark for Fabric](/fabric/data-engineering/high-concurrency-overview)** in the Microsoft Fabric documentation.
+> For more information about high concurrency mode, see **[High concurrency mode in Apache Spark for Fabric](/fabric/data-engineering/high-concurrency-overview?azure-portal=true)** in the Microsoft Fabric documentation.
 
 ### Automatic MLFlow logging
 
@@ -119,5 +119,5 @@ MLFlow is an open source library that is used in data science workloads to manag
 Administrators can manage Spark settings at a Fabric capacity level, enabling them to restrict and override Spark settings in workspaces within an organization.
 
 > [!TIP]
-> For more information about managing Spark configuration at the Fabric capacity level, see **[Configure and manage data engineering and data science settings for Fabric capacities](/fabric/data-engineering/capacity-settings-management)** in the Microsoft Fabric documentation.
+> For more information about managing Spark configuration at the Fabric capacity level, see **[Configure and manage data engineering and data science settings for Fabric capacities](/fabric/data-engineering/capacity-settings-management?azure-portal-true)** in the Microsoft Fabric documentation.
 
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/3-spark-code.md b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/3-spark-code.md
@@ -1,7 +1,3 @@
----
-ms.custom:
-  - build-2023
----
 To edit and run Spark code in Microsoft Fabric, you can use *notebooks*, or you can define a *Spark job*.
 
 ## Notebooks
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/5-spark-sql.md b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/5-spark-sql.md
@@ -1,7 +1,3 @@
----
-ms.custom:
-  - build-2023
----
 The Dataframe API is part of a Spark library named Spark SQL, which enables data analysts to use SQL expressions to query and manipulate data.
 
 ## Creating database objects in the Spark catalog
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/6-visualize-data.md b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/6-visualize-data.md
@@ -1,7 +1,3 @@
----
-ms.custom:
-  - build-2023
----
 One of the most intuitive ways to analyze the results of data queries is to visualize them as charts. Notebooks in Microsoft Fabric provide some basic charting capabilities in the user interface, and when that functionality doesn't provide what you need, you can use one of the many Python graphics libraries to create and display data visualizations in the notebook.
 
 ## Using built-in notebook charts
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/7-exercise-spark.md b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/7-exercise-spark.md
@@ -1,7 +1,3 @@
----
-ms.custom:
-  - build-2023
----
 Now it's your opportunity to work with Apache Spark in Microsoft Fabric. In this exercise, you'll use a Spark notebook to analyze and visualize data from files in lakehouse.
 
 > [!NOTE]
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/9-summary.md b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/includes/9-summary.md
@@ -1,7 +1,3 @@
----
-ms.custom:
-  - build-2023
----
 Apache Spark is a key technology used in big data analytics. Spark support in Microsoft Fabric enables you to integrate big data processing in Spark with the other data analytics and visualization capabilities of the platform.
 
 > [!TIP]
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/index.yml b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/index.yml
@@ -3,15 +3,11 @@ uid: learn.wwl.use-apache-spark-work-files-lakehouse
 metadata:
   title: Use Apache Spark in Microsoft Fabric
   description: "Apache Spark is a core technology for large-scale data analytics. Microsoft Fabric provides support for Spark clusters, enabling you to analyze and process data at scale."
-  ms.date: 04/16/2025
-  author: wwlpublish
+  ms.date: 05/22/2025
+  author: theresa-i
   ms.author: theresai
   ms.topic: module-standard-task-based
   ms.collection: N/A
-  ms.custom:
-  - build-2023
-  - build-2023-dataai
-  - build-2023-fabric
   ms.service: fabric
   ai-usage: human-only
 title: Use Apache Spark in Microsoft Fabric
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/media/notebook-chart.png b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/media/notebook-chart.png
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/media/notebook.png b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/media/notebook.png
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/media/spark-environment.png b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/media/spark-environment.png
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/media/spark-job.png b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/media/spark-job.png
diff --git a/learn-pr/wwl/use-apache-spark-work-files-lakehouse/media/spark-settings.png b/learn-pr/wwl/use-apache-spark-work-files-lakehouse/media/spark-settings.png