diff --git a/data-explorer/kusto-tocs/management/toc.yml b/data-explorer/kusto-tocs/management/toc.yml index d3b1ecee9f..16321c2077 100644 --- a/data-explorer/kusto-tocs/management/toc.yml +++ b/data-explorer/kusto-tocs/management/toc.yml @@ -238,6 +238,8 @@ items: items: - name: Columns management href: /kusto/management/columns?view=azure-data-explorer&preserve-view=true + - name: Change column type without data loss + href: /kusto/management/change-column-type-without-data-loss?view=azure-data-explorer&preserve-view=true - name: .alter column command href: /kusto/management/alter-column?view=azure-data-explorer&preserve-view=true - name: .drop column command @@ -614,25 +616,25 @@ items: items: - name: Row level security policy href: /kusto/management/row-level-security-policy?view=azure-data-explorer&preserve-view=true - displayName: row_level_security policy + displayName: row_level_security policy, row level security - name: .alter materialized-view policy row_level_security command href: /kusto/management/alter-materialized-view-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true - displayName: .alter materialized-view row level security policy + displayName: .alter materialized-view row level security policy, row level security - name: .alter table policy row_level_security command href: /kusto/management/alter-table-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true - displayName: .alter table row level security policy, .alter table row_level_security policy + displayName: .alter table row level security policy, .alter table row_level_security policy, row level security - name: .delete materialized-view policy row_level_security command - displayName: .delete materialized-view policy row_level_security command + displayName: .delete materialized-view policy row_level_security command, row level security href: /kusto/management/delete-materialized-view-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true - name: .delete table policy row_level_security command - displayName: .delete table policy row_level_security command + displayName: .delete table policy row_level_security command, row level security href: /kusto/management/delete-table-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true - name: .show materialized-view policy row_level_security command href: /kusto/management/show-materialized-view-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true - displayName: .show materialized view row level security policy, .show materialized-view row_level_security policy, .show materialized view row_level_security policy + displayName: .show materialized view row level security policy, .show materialized-view row_level_security policy, .show materialized view row_level_security policy, row level security - name: .show table policy row_level_security command href: /kusto/management/show-table-row-level-security-policy-command?view=azure-data-explorer&preserve-view=true - displayName: .show table row level security policy, .show table row_level_security policy + displayName: .show table row level security policy, .show table row_level_security policy, row level security - name: Row order items: - name: Row order policy command diff --git a/data-explorer/kusto-tocs/query/toc.yml b/data-explorer/kusto-tocs/query/toc.yml index 99e6d5a541..4287a2c64d 100644 --- a/data-explorer/kusto-tocs/query/toc.yml +++ b/data-explorer/kusto-tocs/query/toc.yml @@ -1441,6 +1441,10 @@ items: href: /kusto/query/graph-match-operator?view=azure-data-explorer&preserve-view=true - name: graph-to-table href: /kusto/query/graph-to-table-operator?view=azure-data-explorer&preserve-view=true + - name: graph-shortest-paths + href: /kusto/query/graph-shortest-paths-operator?view=azure-data-explorer&preserve-view=true + - name: graph-mark-components + href: /kusto/query/graph-mark-components-operator?view=azure-data-explorer&preserve-view=true - name: Geospatial items: - name: Geospatial clustering overview @@ -1753,6 +1757,8 @@ items: href: /kusto/query/diffpatterns-text-plugin?view=azure-data-explorer&preserve-view=true - name: Query connectivity plugins items: + - name: ai_embed_text plugin + href: /kusto/query/ai-embed-text-plugin?view=azure-data-explorer&preserve-view=true - name: azure_digital_twins_query_request plugin href: /kusto/query/azure-digital-twins-query-request-plugin?view=azure-data-explorer&preserve-view=true - name: cosmosdb_sql_request plugin diff --git a/data-explorer/kusto/management/managed-identity-policy.md b/data-explorer/kusto/management/managed-identity-policy.md index 3cb53f43ad..f3e20c44a4 100644 --- a/data-explorer/kusto/management/managed-identity-policy.md +++ b/data-explorer/kusto/management/managed-identity-policy.md @@ -3,7 +3,7 @@ title: Kusto ManagedIdentity policy description: Learn about the ManagedIdentity policy to control managed identities. ms.reviewer: slneimer ms.topic: reference -ms.date: 08/11/2024 +ms.date: 11/12/2024 monikerRange: "azure-data-explorer" --- # Managed Identity policy @@ -56,6 +56,7 @@ The following values specify authentication to a `usage` using the configured ma |---|---| | `All` | All current and future usages are allowed. | | `AutomatedFlows`| Run a [Continuous Export](data-export/continuous-data-export.md) or [Update Policy](update-policy.md) automated flow on behalf of a managed identity. | +| `AzureAI`| Authenticate to an Azure OpenAI service using the *ai_embed_text* plugin with a managed identity. | | `DataConnection` | Authenticate to data connections to an Event Hub or an Event Grid. | |`ExternalTable` | Authenticate to external tables using connection strings configured with a managed identity. | | `NativeIngestion` | Authenticate to an SDK for native ingestion from an external source. | diff --git a/data-explorer/kusto/query/ai-embed-text-plugin.md b/data-explorer/kusto/query/ai-embed-text-plugin.md new file mode 100644 index 0000000000..e1d628f32d --- /dev/null +++ b/data-explorer/kusto/query/ai-embed-text-plugin.md @@ -0,0 +1,143 @@ +--- +title: ai_embed_text plugin (Preview) +description: Learn how to use the ai_embed_text plugin to embed text via language models, enabling various AI-related scenarios such as RAG application and semantic search. +ms.reviewer: alexans +ms.topic: reference +ms.date: 11/12/2024 +monikerRange: "azure-data-explorer" +--- +# ai_embed_text plugin (Preview) + +> [!INCLUDE [applies](../includes/applies-to-version/applies.md)] [!INCLUDE [azure-data-explorer](../includes/applies-to-version/azure-data-explorer.md)] + +The `ai_embed_text` plugin allows embedding of text using language models, enabling various AI-related scenarios such as Retrieval Augmented Generation (RAG) applications and semantic search. The plugin supports Azure OpenAI Service embedding models accessed using managed identity. + +## Prerequisites + +* An Azure OpenAI Service configured with [managed identity](/azure/ai-services/openai/how-to/managed-identity) +* [Managed identity and callout policies](#configure-managed-identity-and-callout-policies) configured to allow communication with Azure OpenAI services + +## Syntax + +`evaluate` `ai_embed_text` `(`*text*, *connectionString* [`,` *options* [`,` *IncludeErrorMessages*]]`)` + +[!INCLUDE [syntax-conventions-note](../includes/syntax-conventions-note.md)] + +## Parameters + +| Name | Type | Required | Description | +|--|--|--|--| +| *text* | `string` | :heavy_check_mark: | The text to embed. The value can be a column reference or a constant scalar. | +| *connectionString* | `string` | :heavy_check_mark: | The connection string for the language model in the format `;`; replace `` and `` with the AI model deployment URI and the authentication method respectively. | +| *options* | `dynamic` | | The options that control calls to the embedding model endpoint. See [Options](#options). | +| *IncludeErrorMessages* | `bool` | | Indicates whether to output errors in a new column in the output table. Default value: `false`. | + +## Options + +The following table describes the options that control the way the requests are made to the embedding model endpoint. + +| Name | Type | Description | +|--|--|--| +| `RecordsPerRequest` | `int` | Specifies the number of records to process per request. Default value: `1`. | +| `CharsPerRequest` | `int` | Specifies the maximum number of characters to process per request. Default value: `0` (unlimited). Azure OpenAI counts tokens, with each token approximately translating to four characters. | +| `RetriesOnThrottling` | `int` | Specifies the number of retry attempts when throttling occurs. Default value: `0`. | +| `GlobalTimeout` | `timespan` | Specifies the maximum time to wait for a response from the embedding model. Default value: `null` | +| `ModelParameters` | `dynamic` | Parameters specific to the embedding model, such as embedding dimensions or user identifiers for monitoring purposes. Default value: `null`. | +| `ReturnSuccessfulOnly` | `bool` | Indicates whether to return only the successfully processed items. Default value: `false`. If the *IncludeErrorMessages* parameter is set to `true`, this option is always set to `false`. | + +## Configure managed identity and callout policies + +To use the `ai_embed_text` plugin, you must configure the following policies: + +* [managed identity](../management/managed-identity-policy.md): Allow the system-assigned managed identity to authenticate to Azure OpenAI services. +* [callout](../management/callout-policy.md): Authorize the AI model endpoint domain. + +To configure these policies, use the commands in the following steps: + +1. Configure the managed identity: + + + ~~~kusto + .alter-merge cluster policy managed_identity + ``` + [ + { + "ObjectId": "system", + "AllowedUsages": "AzureAI" + } + ] + ``` + ~~~ + +1. Configure the callout policy: + + + ~~~kusto + .alter-merge cluster policy callout + ``` + [ + { + "CalloutType": "azure_openai", + "CalloutUriRegex": "https://[A-Za-z0-9\\-]{3,63}\\.openai\\.azure\\.com/.*", + "CanCall": true + } + ] + ``` + ~~~ + +## Returns + +Returns the following new embedding columns: + +* A column with the **_embedding** suffix that contains the embedding values +* If configured to return errors, a column with the **_embedding_error** suffix, which contains error strings or is left empty if the operation is successful. + +Depending on the input type, the plugin returns different results: + +* **Column reference**: Returns one or more records with additional columns are prefixed by the reference column name. For example, if the input column is named **TextData**, the output columns are named **TextData_embedding** and, if configured to return errors, **TextData_embedding_error**. +* **Constant scalar**: Returns a single record with additional columns that are not prefixed. The column names are **_embedding** and, if configured to return errors, **_embedding_error**. + +## Examples + +The following example embeds the text `Embed this text using AI` using the Azure OpenAI Embedding model. + + +```kusto +let expression = 'Embed this text using AI'; +let connectionString = 'https://myaccount.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2024-06-01;managed_identity=system'; +evaluate ai_embed_text(expression, connectionString) +``` + +The following example embeds multiple texts using the Azure OpenAI Embedding model. + + +~~~kusto +let connectionString = 'https://myaccount.openai.azure.com/openai/deployments/text-embedding-3-small/embeddings?api-version=2024-06-01;managed_identity=system'; +let options = dynamic({ + "RecordsPerRequest": 10, + "CharsPerRequest": 10000, + "RetriesOnThrottling": 1, + "GlobalTimeout": 2m +}); +datatable(TextData: string) +[ + "First text to embed", + "Second text to embed", + "Third text to embed" +] +| evaluate ai_embed_text(TextData, connectionString, options , true) +~~~ + +## Best practices + +Azure OpenAI embedding models are subject to heavy throttling, and frequent calls to this plugin can quickly reach throttling limits. + +To efficiently use the `ai_embed_text` plugin while minimizing throttling and costs, follow these best practices: + +* **Control request size**: Adjust the number of records (`RecordsPerRequest`) and characters per request (`CharsPerRequest`). +* **Control query timeout**: Set `GlobalTimeout` to a value lower than the query [timeout](../set-timeout-limits.md) to ensure progress isn't lost on successful calls up to that point. +* **Handle rate limits more gracefully**: Set retries on throttling (`RetriesOnThrottling`). + +## Related content + +* [series_cosine_similarity()](series-cosine-similarity-function.md) diff --git a/data-explorer/kusto/query/series-cosine-similarity-function.md b/data-explorer/kusto/query/series-cosine-similarity-function.md index 604e79401d..5e5a98596c 100644 --- a/data-explorer/kusto/query/series-cosine-similarity-function.md +++ b/data-explorer/kusto/query/series-cosine-similarity-function.md @@ -55,3 +55,7 @@ datatable(s1:dynamic, s2:dynamic) |---|---|---| |[0.1,0.2,0.1,0.2]|[0.11,0.2,0.11,0.21]|0.99935343825504| |[0.1,0.2,0.1,0.2]|[1,2,3,4]|0.923760430703401| + +## Related content + +* [ai_embed_text plugin (Preview)](ai-embed-text-plugin.md) diff --git a/data-explorer/kusto/query/toc.yml b/data-explorer/kusto/query/toc.yml index 300d692e4a..7f763c0884 100644 --- a/data-explorer/kusto/query/toc.yml +++ b/data-explorer/kusto/query/toc.yml @@ -1588,6 +1588,8 @@ items: href: diffpatterns-text-plugin.md - name: Query connectivity plugins items: + - name: ai_embed_text plugin + href: ai-embed-text-plugin.md - name: azure_digital_twins_query_request plugin href: azure-digital-twins-query-request-plugin.md - name: cosmosdb_sql_request plugin