Merge pull request #274865 from samtarver/ingestion-agent-docs-fixes

prmerger-automator[bot] · web-flow · commit 86310dff7121 · 2024-05-10T14:36:43.000Z
Ingestion agent docs fixes
diff --git a/articles/operator-insights/ingestion-agent-configuration-reference.md b/articles/operator-insights/ingestion-agent-configuration-reference.md
@@ -8,6 +8,7 @@ ms.service: operator-insights
 ms.topic: conceptual
 ms.date: 12/06/2023
 ---
+
 # Configuration reference for Azure Operator Insights ingestion agent
 
 This reference provides the complete set of configuration for the [Azure Operator Insights ingestion agent](ingestion-agent-overview.md), listing all fields with explanatory comments.
@@ -22,12 +23,12 @@ This reference shows two pipelines: one with an MCC EDR source and one with an S
 
 ```yaml
 # A unique identifier for this agent instance. Reserved URL characters must be percent-encoded. It's included in the upload path to the Data Product's input storage account.
-agent_id: agent01 
+agent_id: agent01
 # Config for secrets providers. We support reading secrets from Azure Key Vault and from the VM's local filesystem.
 # Multiple secret providers can be defined and each must be given a unique name, which is referenced later in the config.
 # A secret provider of type `key_vault` which contains details required to connect to the Azure Key Vault and allow connection to the Data Product's input storage account. This is always required.
 # A secret provider of type `file_system`, which specifies a directory on the VM where secrets are stored. For example for an SFTP pull source, for storing credentials for connecting to an SFTP server.
-secret_providers: 
+secret_providers:
   - name: data_product_keyvault_mi
     key_vault:
       vault_name: contoso-dp-kv
@@ -73,7 +74,7 @@ sink:
   # Optional A string giving an optional base path to use in the container in the Data Product's input storage account. Reserved URL characters must be percent-encoded. See the Data Product for what value, if any, is required.
   base_path: base-path
   sas_token:
-    # This must reference a secret provider configured above. 
+    # This must reference a secret provider configured above.
     secret_provider: data_product_keyvault_mi
     # The name of a secret in the corresponding provider.
     # This will be the name of a secret in the Key Vault.
@@ -102,13 +103,13 @@ source:
   mcc_edrs:
     # The maximum amount of data to buffer in memory before uploading. Units are B, KiB, MiB, GiB, etc.
     message_queue_capacity: 32 MiB
-    # Quick check on the maximum RAM that the agent should use.   
-    # This is a guide to check the other tuning parameters, rather than a hard limit. 
+    # Quick check on the maximum RAM that the agent should use.
+    # This is a guide to check the other tuning parameters, rather than a hard limit.
     maximum_overall_capacity: 1216 MiB
     listener:
       # The TCP port to listen on.  Must match the port MCC is configured to send to.  Defaults to 36001.
       port: 36001
-      # EDRs greater than this size are dropped. Subsequent EDRs continue to be processed. 
+      # EDRs greater than this size are dropped. Subsequent EDRs continue to be processed.
       # This condition likely indicates MCC sending larger than expected EDRs. MCC is not normally expected
       # to send EDRs larger than the default size. If EDRs are being dropped because of this limit,
       # investigate and confirm that the EDRs are valid, and then increase this value. Units are B, KiB, MiB, GiB, etc.
@@ -118,7 +119,7 @@ source:
       # corrupt EDRs to Azure. You should not need to change this value. Units are B, KiB, MiB, GiB, etc.
       hard_maximum_message_size: 100000 B
     batching:
-      # The maximum size of a single blob (file) to store in the Data Product's input storage account. 
+      # The maximum size of a single blob (file) to store in the Data Product's input storage account.
       maximum_blob_size: 128 MiB. Units are B, KiB, MiB, GiB, etc.
       # The maximum time to wait when no data is received before uploading pending batched data to the Data Product's input storage account. Examples: 30s, 10m, 1h, 1d.
       blob_rollover_period: 5m
@@ -149,16 +150,17 @@ source:
         # Only for use with password authentication. The name of the file containing the password in the secrets_directory folder
         secret_name: sftp-user-password
         # Only for use with private key authentication. The name of the file containing the SSH key in the secrets_directory folder
-        key_secret: sftp-user-ssh-key
+        key_secret_name: sftp-user-ssh-key
         # Optional. Only for use with private key authentication. The passphrase for the SSH key. This can be omitted if the key is not protected by a passphrase.
         passphrase_secret_name: sftp-user-ssh-key-passphrase
     filtering:
       # The path to a folder on the SFTP server that files will be uploaded to Azure Operator Insights from.
       base_path: /path/to/sftp/folder
       # Optional. A regular expression to specify which files in the base_path folder should be ingested. If not specified, the agent will attempt to ingest all files in the base_path folder (subject to exclude_pattern, settling_time and exclude_before_time).
-      include_pattern: "*\.csv$"
+      include_pattern: ".*\.csv$" # Only include files which end in ".csv"
       # Optional. A regular expression to specify any files in the base_path folder which should not be ingested. Takes priority over include_pattern, so files which match both regular expressions will not be ingested.
-      exclude_pattern: '\.backup$'    
+      # The exclude_pattern can also be used to ignore whole directories, but the pattern must still match all files under that directory. e.g. `^excluded-dir/.*$` or `^excluded-dir/` but *not* `^excluded-dir$`
+      exclude_pattern: "^\.staging/|\.backup$" # Exclude all file paths that start with ".staging/" or end in ".backup"
       # A duration, such as "10s", "5m", "1h".. During an upload run, any files last modified within the settling time are not selected for upload, as they may still be being modified.
       settling_time: 1m
       # Optional. A datetime that adheres to the RFC 3339 format. Any files last modified before this datetime will be ignored.
diff --git a/articles/operator-insights/monitor-troubleshoot-ingestion-agent.md b/articles/operator-insights/monitor-troubleshoot-ingestion-agent.md
@@ -32,6 +32,8 @@ Metrics are reported in a simple human-friendly form.
 
 To collect a diagnostics package, SSH to the Virtual Machine and run the command `/usr/bin/microsoft/az-aoi-ingestion-gather-diags`. This command generates a date-stamped zip file in the current directory that you can copy from the system.
 
+If you have configured collection of logs through the Azure Monitor agent, you can view ingestion agent logs in the portal view of your Log Analytics workspace, and may not need to collect a diagnostics package to debug your issues.
+
 > [!NOTE]
 > Microsoft Support might request diagnostics packages when investigating an issue. Diagnostics packages don't contain any customer data or the value of any credentials.
 
@@ -117,6 +119,7 @@ Symptoms: No data appears in Azure Data Explorer. Logs of category `Ingestion` d
 
 - Check that the agent is running on all VMs and isn't reporting errors in logs.
 - Check that files exist in the correct location on the SFTP server, and that they aren't being excluded due to file source config (see [Files are missing](#files-are-missing)).
+- Ensure that the configured SFTP user can read all directories under the `base_path`, which file source config doesn't exclude.
 - Check the network connectivity and firewall configuration between the ingestion agent VM and the Data Product's input storage account.
 
 ### Files are missing
diff --git a/articles/operator-insights/set-up-ingestion-agent.md b/articles/operator-insights/set-up-ingestion-agent.md
@@ -139,6 +139,9 @@ On the SFTP server:
 
 1. Ensure port 22/TCP to the VM is open.
 1. Create a new user, or determine an existing user on the SFTP server that the ingestion agent should use to connect to the SFTP server.
+    - By default the ingestion agent searches every directory under the base path, so this user must be able to read all of them. Any directories that the user does not have permission to access must be excluded using the `exclude_pattern` configuration.
+    > [!Note]
+    > Implicitly excluding directories by not specifying them in the included pattern is not sufficient to stop the agent searching those directories. See [the configuration reference](ingestion-agent-configuration-reference.md) for more detail on excluding directories.
 1. Determine the authentication method that the ingestion agent should use to connect to the SFTP server. The agent supports:
     - Password authentication
     - SSH key authentication
@@ -277,7 +280,12 @@ The configuration you need is specific to the type of source and your Data Produ
         - `user`: the name of the user on the SFTP server that the agent should use to connect.
         - Depending on the method of authentication you chose in [Prepare the VMs](#prepare-the-vms), set either `password` or `private_key`.
             - For password authentication, set `secret_name` to the name of the file containing the password in the `secrets_directory` folder. 
-            - For SSH key authentication, set `key_secret` to the name of the file containing the SSH key in the `secrets_directory` folder. If the private key is protected with a passphrase, set `passphrase_secret_name` to the name of the file containing the passphrase in the `secrets_directory` folder.
+            - For SSH key authentication, set `key_secret_name` to the name of the file containing the SSH key in the `secrets_directory` folder. If the private key is protected with a passphrase, set `passphrase_secret_name` to the name of the file containing the passphrase in the `secrets_directory` folder.
+            - All secret files should have permissions of `600` (`rw-------`), and an owner of `az-aoi-ingestion` so only the ingestion agent and privileged users can read them.
+            ```
+            sudo chmod 600 <secrets_directory>/*
+            sudo chown az-aoi-ingestion <secrets_directory>/*
+            ```
         
         For required or recommended values for other fields, refer to the documentation for your Data Product.
 
@@ -327,11 +335,12 @@ If you're running the ingestion agent on an Azure VM or on an on-premises VM con
 To collect ingestion agent logs, follow [the Azure Monitor documentation to install the Azure Monitor Agent and configure log collection](../azure-monitor/agents/data-collection-text-log.md).
 
 - These docs use the Az PowerShell module to create a logs table. Follow the [Az PowerShell module install documentation](/powershell/azure/install-azure-powershell) first.
-  - The `YourOptionalColumn` section from the sample `$tableParams` JSON is unnecessary for the ingestion agent, and can be removed.
+    - The `YourOptionalColumn` section from the sample `$tableParams` JSON is unnecessary for the ingestion agent, and can be removed.
 - When adding a data source to your data collection rule, add a `Custom Text Logs` source type, with file pattern `/var/log/az-aoi-ingestion/stdout.log`.
-- After adding the data collection rule, you can query these logs through the Log Analytics workspace. Use the following query to make them easier to work with:
+- We also recommend following [the documentation to add a `Linux Syslog` Data source](../azure-monitor/agents/data-collection-syslog.md) to your data collection rule, to allow for auditing of all processes running on the VM.
+- After adding the data collection rule, you can query the ingestion agent logs through the Log Analytics workspace. Use the following query to make them easier to work with:
   ```
-  RawAgentLogs_CL
+  <CustomTableName>
   | extend RawData = replace_regex(RawData, '\\x1b\\[\\d{1,4}m', '')  // Remove any color tags
   | parse RawData with TimeGenerated: datetime '  ' Level ' ' Message  // Parse the log lines into the TimeGenerated, Level and Message columns for easy filtering
   | order by TimeGenerated desc