MicrosoftDocs
diff --git a/‎.openpublishing.redirection.azure-monitor.json
Lines changed: 48 additions & 2 deletions b/‎.openpublishing.redirection.azure-monitor.json
Lines changed: 48 additions & 2 deletions
diff --git a/‎articles/.openpublishing.redirection.private-multi-access-edge-compute-mec.json
Lines changed: 15 additions & 0 deletions b/‎articles/.openpublishing.redirection.private-multi-access-edge-compute-mec.json
Lines changed: 15 additions & 0 deletions
diff --git a/‎articles/advisor/advisor-resiliency-reviews.md
Lines changed: 2 additions & 2 deletions b/‎articles/advisor/advisor-resiliency-reviews.md
Lines changed: 2 additions & 2 deletions
diff --git a/‎articles/ai-services/openai/concepts/customizing-llms.md
Lines changed: 102 additions & 0 deletions b/‎articles/ai-services/openai/concepts/customizing-llms.md
Lines changed: 102 additions & 0 deletions
diff --git a/‎articles/ai-services/openai/concepts/use-your-data.md
Lines changed: 10 additions & 3 deletions b/‎articles/ai-services/openai/concepts/use-your-data.md
Lines changed: 10 additions & 3 deletions
@@ -4131,7 +4131,7 @@
         },
         {
             "source_path_from_root": "/articles/azure-monitor/platform/alerts-using-migration-tool.md",
-            "redirect_url": "/azure/azure-monitor/alerts/alerts-using-migration-tool",
+            "redirect_url": "/previous-versions/azure/azure-monitor/alerts/alerts-using-migration-tool",
             "redirect_document_id": false
         },
         {
@@ -6259,6 +6259,46 @@
             "redirect_url": "/previous-versions/azure/azure-monitor/autoscale/tutorial-autoscale-performance-schedule",
             "redirect_document_id": false
         },
+        {
+            "source_path_from_root": "/articles/azure-monitor/alerts/alerts-automatic-migration.md",
+            "redirect_url": "/previous-versions/azure/azure-monitor/alerts/alerts-automatic-migration",
+            "redirect_document_id": false
+        },
+        {
+            "source_path_from_root": "/articles/azure-monitor/alerts/alerts-classic.overview.md",
+            "redirect_url": "/previous-versions/azure/azure-monitor/alerts/alerts-classic.overview",
+            "redirect_document_id": false
+        },
+        {
+            "source_path_from_root": "/articles/azure-monitor/alerts/alerts-classic-portal.md",
+            "redirect_url": "/previous-versions/azure/azure-monitor/alerts/alerts-classic-portal",
+            "redirect_document_id": false
+        },
+        {
+            "source_path_from_root": "/articles/azure-monitor/alerts/alerts-enable-template.md",
+            "redirect_url": "/previous-versions/azure/azure-monitor/alerts/alerts-enable-template",
+            "redirect_document_id": false
+        },
+        {
+            "source_path_from_root": "/articles/azure-monitor/alerts/alerts-prepare-migration.md",
+            "redirect_url": "/previous-versions/azure/azure-monitor/alerts/alerts-prepare-migration",
+            "redirect_document_id": false
+        },
+        {
+            "source_path_from_root": "/articles/azure-monitor/alerts/alerts-understand-migration.md",
+            "redirect_url": "/previous-versions/azure/azure-monitor/alerts/alerts-understand-migration",
+            "redirect_document_id": false
+        },
+        {
+            "source_path_from_root": "/articles/azure-monitor/alerts/alerts-webhooks.md",
+            "redirect_url": "/previous-versions/azure/azure-monitor/alerts/alerts-webhooks",
+            "redirect_document_id": false
+        },
+        {
+            "source_path_from_root": "/articles/azure-monitor/alerts/api-alerts.md",
+            "redirect_url": "/previous-versions/azure/azure-monitor/alerts/api-alerts",
+            "redirect_document_id": false
+        },
         {
             "source_path_from_root": "/articles/azure-monitor/essentials/metrics-supported.md",
             "redirect_url": "/azure/azure-monitor/reference/supported-metrics/metrics-index",
@@ -6621,7 +6661,12 @@
         },
         {
             "source_path_from_root": "/articles/azure-monitor/monitor-reference.md",
-            "redirect_url": "/azure/azure-monitor/overview",
+            "redirect_url": "/azure/azure-monitor/monitor-azure-monitor-reference",
+            "redirect_document_id": false
+        },
+        {
+            "source_path_from_root": "/articles/azure-monitor/azure-monitor-monitoring-reference.md",
+            "redirect_url": "/azure/azure-monitor/monitor-azure-monitor-reference",
             "redirect_document_id": false
         },
         {
@@ -6644,5 +6689,6 @@
             "redirect_url": "/azure/azure-monitor/essentials/data-collection-rule-create-edit?tabs=arm#manually-create-a-dcr",
             "redirect_document_id": false
         }
+
     ]
 }
@@ -0,0 +1,15 @@
+{
+    "redirections": [
+        {
+            "source_path_from_root": "/articles/private-multi-access-edge-compute-mec/affirmed-private-network-service-overview.md ",
+            "redirect_url": "/azure/private-multi-access-edge-compute-mec/overview",
+            "redirect_document_id": false
+        },
+        {
+            "source_path_from_root": "/articles/private-multi-access-edge-compute-mec/deploy-affirmed-private-network-service-solution.md ",
+            "redirect_url": "/azure/private-multi-access-edge-compute-mec/overview",
+            "redirect_document_id": false
+                
+        }
+   ]
+}
@@ -61,8 +61,8 @@ You can manage access to Advisor personalized recommendations using the followin
 | **Name** | **Description** |
 |---|:---:|
 |Subscription Reader|View reviews for a workload and recommendations linked to them.|
-|Subscription Owner<br>Subscription Contributor|View reviews for a workload, triage recommendations linked to those reviews, manage review recommendation lifecycle.|
-|Advisor Recommendations Contributor (Assessments and Reviews)|View review recommendations, accept review recommendations, manage review recommendations' lifecycle.|
+|Subscription Owner<br>Subscription Contributor|View reviews for a workload, triage recommendations linked to those reviews, manage the recommendation lifecycle.|
+|Advisor Recommendations Contributor (Assessments and Reviews)|View accepted recommendations, and manage the recommendation lifecycle.|
 
 You can find detailed instructions on how to assign a role using the Azure portal - [Assign Azure roles using the Azure portal - Azure RBAC](/azure/role-based-access-control/role-assignments-portal?tabs=delegate-condition). Additional information is available in [Steps to assign an Azure role - Azure RBAC](/azure/role-based-access-control/role-assignments-steps).
 
 
@@ -0,0 +1,102 @@
+---
+title: Azure OpenAI Service getting started with customizing a large language model (LLM)
+titleSuffix: Azure OpenAI Service
+description: Learn more about the concepts behind customizing an LLM with Azure OpenAI.
+ms.topic: conceptual
+ms.date: 03/26/2024
+ms.service: azure-ai-openai
+manager: nitinme
+author: mrbullwinkle
+ms.author: mbullwin
+recommendations: false
+---
+
+# Getting started with customizing a large language model (LLM)
+
+There are several techniques for adapting a pre-trained language model to suit a specific task or domain. These include prompt engineering, RAG (Retrieval Augmented Generation), and fine-tuning. These three techniques are not mutually exclusive but are complementary methods that in combination can be applicable to a specific use case. In this article, we'll explore these techniques, illustrative use cases, things to consider, and provide links to resources to learn more and get started with each.
+
+## Prompt engineering
+
+### Definition
+
+[Prompt engineering](./prompt-engineering.md) is a technique that is both art and science, which involves designing prompts for generative AI models. This process utilizes in-context learning ([zero shot and few shot](./prompt-engineering.md#examples)) and, with iteration, improves accuracy and relevancy in responses, optimizing the performance of the model.
+
+### Illustrative use cases
+
+A Marketing Manager at an environmentally conscious company can use prompt engineering to help guide the model to generate descriptions that are more aligned with their brand’s tone and style. For instance, they can add a prompt like "Write a product description for a new line of eco-friendly cleaning products that emphasizes quality, effectiveness, and highlights the use of environmentally friendly ingredients" to the input. This will help the model generate descriptions that are aligned with their brand’s values and messaging.
+
+### Things to consider
+
+- **Prompt engineering** is the starting point for generating desired output from generative AI models.
+
+- **Craft clear instructions**: Instructions are commonly used in prompts and guide the model's behavior. Be specific and leave as little room for interpretation as possible. Use analogies and descriptive language to help the model understand your desired outcome.
+
+- **Experiment and iterate**: Prompt engineering is an art that requires experimentation and iteration. Practice and gain experience in crafting prompts for different tasks. Every model might behave differently, so it's important to adapt prompt engineering techniques accordingly.
+
+### Getting started
+
+- [Introduction to prompt engineering](./prompt-engineering.md)
+- [Prompt engineering techniques](./advanced-prompt-engineering.md)
+- [15 tips to become a better prompt engineer for generative AI](https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/15-tips-to-become-a-better-prompt-engineer-for-generative-ai/ba-p/3882935)
+- [The basics of prompt engineering (video)](https://www.youtube.com/watch?v=e7w6QV1NX1c)
+
+## RAG (Retrieval Augmented Generation)
+
+### Definition 
+
+[RAG (Retrieval Augmented Generation)](../../../ai-studio/concepts/retrieval-augmented-generation.md) is a method that integrates external data into a Large Language Model prompt to generate relevant responses. This approach is particularly beneficial when using a large corpus of unstructured text based on different topics. It allows for answers to be grounded in the organization’s knowledge base (KB), providing a more tailored and accurate response.
+
+RAG is also advantageous when answering questions based on an organization’s private data or when the public data that the model was trained on might have become outdated. This helps ensure that the responses are always up-to-date and relevant, regardless of the changes in the data landscape.
+
+### Illustrative use case
+
+A corporate HR department is looking to provide an intelligent assistant that answers specific employee health insurance related questions such as "are eyeglasses covered?" RAG is used to ingest the extensive and numerous documents associated with insurance plan policies to enable the answering of these specific types of questions.
+
+### Things to consider
+
+- RAG helps ground AI output in real-world data and reduces the likelihood of fabrication.
+
+- RAG is helpful when there is a need to answer questions based on private proprietary data.
+
+- RAG is helpful when you might want questions answered that are recent (for example, before the cutoff date of when the [model version](./models.md) was last trained).
+
+### Getting started
+
+- [Retrieval Augmented Generation in Azure AI Studio - Azure AI Studio | Microsoft Learn](../../../ai-studio/concepts/retrieval-augmented-generation.md)
+- [Retrieval Augmented Generation (RAG) in Azure AI Search](../../../search/retrieval-augmented-generation-overview.md)
+- [Retrieval Augmented Generation using Azure Machine Learning prompt flow (preview)](../../../machine-learning/concept-retrieval-augmented-generation.md)
+
+## Fine-tuning
+
+### Definition
+
+[Fine-tuning](../how-to/fine-tuning.md), specifically [supervised fine-tuning](https://techcommunity.microsoft.com/t5/ai-azure-ai-services-blog/fine-tuning-now-available-with-azure-openai-service/ba-p/3954693?lightbox-message-images-3954693=516596iC5D02C785903595A) in this context, is an iterative process that adapts an existing large language model to a provided training set in order to improve performance, teach the model new skills, or reduce latency. This approach is used when the model needs to learn and generalize over specific topics, particularly when these topics are generally small in scope.
+
+Fine-tuning requires the use of high-quality training data, in a [special example based format](../how-to/fine-tuning.md#example-file-format), to create the new fine-tuned Large Language Model. By focusing on specific topics, fine-tuning allows the model to provide more accurate and relevant responses within those areas of focus.
+
+### Illustrative use case
+
+An IT department has been using GPT-4 to convert natural language queries to SQL, but they have found that the responses are not always reliably grounded in their schema, and the cost is prohibitively high.
+
+They fine-tune GPT-3.5-Turbo with hundreds of requests and correct responses and produce a model that performs better than the base model with lower costs and latency.
+
+### Things to consider
+
+- Fine-tuning is an advanced capability; it enhances LLM with after-cutoff-date knowledge and/or domain specific knowledge. Start by evaluating the baseline performance of a standard model against their requirements before considering this option.
+
+- Having a baseline for performance without fine-tuning is essential for knowing whether fine-tuning has improved model performance. Fine-tuning with bad data makes the base model worse, but without a baseline, it's hard to detect regressions.
+
+- Good cases for fine-tuning include steering the model to output content in a specific and customized style, tone, or format, or tasks where the information needed to steer the model is too long or complex to fit into the prompt window.
+
+- Fine-tuning costs:
+
+  - Fine-tuning can reduce costs across two dimensions: (1) by using fewer tokens depending on the task (2) by using a smaller model (for example GPT 3.5 Turbo can potentially be fine-tuned to achieve the same quality of GPT-4 on a particular task).
+
+  - Fine-tuning has upfront costs for training the model. And additional hourly costs for hosting the custom model once it's deployed.
+
+### Getting started
+
+- [When to use Azure OpenAI fine-tuning](./fine-tuning-considerations.md)
+- [Customize a model with fine-tuning](../how-to/fine-tuning.md)
+- [Azure OpenAI GPT 3.5 Turbo fine-tuning tutorial](../tutorials/fine-tune.md)
+- [To fine-tune or not to fine-tune? (Video)](https://www.youtube.com/watch?v=0Jo-z-MFxJs)
@@ -350,6 +350,10 @@ You can modify the following additional settings in the **Data parameters** sect
 |**Retrieved documents**     |  This parameter is an integer that can be set to 3, 5, 10, or 20, and controls the number of document chunks provided to the large language model for formulating the final response. By default, this is set to 5. The search process can be noisy and sometimes, due to chunking, relevant information might be spread across multiple chunks in the search index. Selecting a top-K number, like 5, ensures that the model can extract relevant information, despite the inherent limitations of search and chunking. However, increasing the number too high can potentially distract the model. Additionally, the maximum number of documents that can be effectively used depends on the version of the model, as each has a different context size and capacity for handling documents. If you find that responses are missing important context, try increasing this parameter. This is the `topNDocuments` parameter in the API, and is 5 by default. |
 | **Strictness**     | Determines the system's aggressiveness in filtering search documents based on their similarity scores. The system queries Azure Search or other document stores, then decides which documents to provide to large language models like ChatGPT. Filtering out irrelevant documents can significantly enhance the performance of the end-to-end chatbot. Some documents are excluded from the top-K results if they have low similarity scores before forwarding them to the model. This is controlled by an integer value ranging from 1 to 5. Setting this value to 1 means that the system will minimally filter documents based on search similarity to the user query. Conversely, a setting of 5 indicates that the system will aggressively filter out documents, applying a very high similarity threshold. If you find that the chatbot omits relevant information, lower the filter's strictness (set the value closer to 1) to include more documents. Conversely, if irrelevant documents distract the responses, increase the threshold (set the value closer to 5). This is the `strictness` parameter in the API, and set to 3 by default. |
 
+### Uncited references
+
+It's possible for the model to return `"TYPE":"UNCITED_REFERENCE"` instead of `"TYPE":CONTENT` in the API for documents that are retrieved from the data source, but not included in the citation. This can be useful for debugging, and you can control this behavior by modifying the **strictness** and **retrieved documents** runtime parameters described above.
+
 ### System message
 
 You can define a system message to steer the model's reply when using Azure OpenAI On Your Data. This message allows you to customize your replies on top of the retrieval augmented generation (RAG) pattern that Azure OpenAI On Your Data uses. The system message is used in addition to an internal base prompt to provide the experience. To support this, we truncate the system message after a specific [number of tokens](#token-usage-estimation-for-azure-openai-on-your-data) to ensure the model can answer questions using your data. If you are defining extra behavior on top of the default experience, ensure that your system prompt is detailed and explains the exact expected customization. 
@@ -550,10 +554,9 @@ token_output = TokenEstimator.estimate_tokens(input_text)
 
 ## Troubleshooting 
 
-### Failed ingestion jobs
-
-To troubleshoot a failed job, always look out for errors or warnings specified either in the API response or Azure OpenAI studio. Here are some of the common errors and warnings: 
+To troubleshoot failed operations, always look out for errors or warnings specified either in the API response or Azure OpenAI studio. Here are some of the common errors and warnings: 
 
+### Failed ingestion jobs
 
 **Quota Limitations Issues** 
 
@@ -583,6 +586,10 @@ Resolution:
 
 This means the storage account isn't accessible with the given credentials. In this case, please review the storage account credentials passed to the API and ensure the storage account isn't hidden behind a private endpoint (if a private endpoint isn't configured for this resource). 
 
+### 503 errors when sending queries with Azure AI Search
+
+Each user message can translate to multiple search queries, all of which get sent to the search resource in parallel. This can produce throttling behavior when the amount of search replicas and partitions is low. The maximum number of queries per second that a single partition and single replica can support may not be sufficient. In this case, consider increasing your replicas and partitions, or adding sleep/retry logic in your application. See the [Azure AI Search documentation](../../../search/performance-benchmarks.md) for more information.
+
 ## Regional availability and model support
 
 You can use Azure OpenAI On Your Data with an Azure OpenAI resource in the following regions: