Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
84b5a39
add pester tests for k8s-extension
Mar 13, 2025
feb18f1
fix testcases for nodepool image issues (#5)
bavneetsingh16 May 23, 2025
c39d105
update readme and version release notes (#6)
bavneetsingh16 May 23, 2025
a9ecd65
fix: simplify logic and enable correct recording rule groups for mana…
bragi92 May 29, 2025
4683050
update readme and version release notes (#6)
bavneetsingh16 May 23, 2025
cb0788d
fix: simplify logic and enable correct recording rule groups for mana…
bragi92 May 29, 2025
bb96daf
Extend ContainerInsights Extension for high log scale mode support (#9)
wanlonghenry Jul 22, 2025
91618ab
update python version to 3.13 (#10)
bavneetsingh16 Jul 31, 2025
18d83a8
add pester tests for k8s-extension
Mar 13, 2025
03f1c57
fix testcases for nodepool image issues (#5)
bavneetsingh16 May 23, 2025
077f73d
update readme and version release notes (#6)
bavneetsingh16 May 23, 2025
7eb1321
fix: simplify logic and enable correct recording rule groups for mana…
bragi92 May 29, 2025
66275eb
update readme and version release notes (#6)
bavneetsingh16 May 23, 2025
60c5d8e
fix: simplify logic and enable correct recording rule groups for mana…
bragi92 May 29, 2025
551ae64
Extend ContainerInsights Extension for high log scale mode support (#9)
wanlonghenry Jul 22, 2025
5c64b8e
update python version to 3.13 (#10)
bavneetsingh16 Jul 31, 2025
ba7be02
Merge branch 'main' of https://github.com/AzureArcForKubernetes/k8s-e…
Aug 6, 2025
5fabc3a
update readme and version release notes (#12)
bavneetsingh16 Aug 6, 2025
b11e530
remove extension specific pester tests
Aug 6, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/k8s-extension/HISTORY.rst
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,10 @@
Release History
===============

1.6.7
+++++++++++++++++++
* microsoft.azuremonitor.containers: Extend ContainerInsights Extension for high log scale mode support.

1.6.6
++++++++++++++++++
* microsoft.entraworkloadiam: Remove code that is no longer in use.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,21 @@
logger = get_logger(__name__)
DCR_API_VERSION = "2022-06-01"

ContainerInsightsStreams = [
"Microsoft-ContainerLog",
"Microsoft-ContainerLogV2-HighScale",
"Microsoft-KubeEvents",
"Microsoft-KubePodInventory",
"Microsoft-KubeNodeInventory",
"Microsoft-KubePVInventory",
"Microsoft-KubeServices",
"Microsoft-KubeMonAgentEvents",
"Microsoft-InsightsMetrics",
"Microsoft-ContainerInventory",
"Microsoft-ContainerNodeInventory",
"Microsoft-Perf",
]


class ContainerInsights(DefaultExtension):
def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_type, cluster_rp,
Expand Down Expand Up @@ -83,6 +98,7 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
# Delete DCR-A if it exists incase of MSI Auth
useAADAuth = False
isDCRAExists = False
enable_high_log_scale_mode = False
cluster_rp, _ = get_cluster_rp_api_version(cluster_type=cluster_type, cluster_rp=cluster_rp)
try:
extension = client.get(resource_group_name, cluster_rp, cluster_type, cluster_name, name)
Expand All @@ -95,10 +111,15 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
return

subscription_id = get_subscription_id(cmd.cli_ctx)
resources = cf_resources(cmd.cli_ctx, subscription_id)
# handle cluster type here
cluster_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/{2}/{3}/{4}'.format(subscription_id, resource_group_name, cluster_rp, cluster_type, cluster_name)
workspace_resource_id = None
if (extension is not None) and (extension.configuration_settings is not None):
configSettings = extension.configuration_settings
# Extract workspace resource ID if present
if 'logAnalyticsWorkspaceResourceID' in configSettings:
workspace_resource_id = configSettings['logAnalyticsWorkspaceResourceID']
# omsagent is being renamed to ama-logs. Check for both for compatibility
if 'omsagent.useAADAuth' in configSettings:
useAADAuthSetting = configSettings['omsagent.useAADAuth']
Expand All @@ -108,6 +129,16 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
useAADAuthSetting = configSettings['amalogs.useAADAuth']
if (isinstance(useAADAuthSetting, str) and str(useAADAuthSetting).lower() == "true") or (isinstance(useAADAuthSetting, bool) and useAADAuthSetting):
useAADAuth = True

# Check if high log scale mode was enabled
if useAADAuth and 'amalogs.enableHighLogScaleMode' in configSettings:
highLogScaleSetting = configSettings['amalogs.enableHighLogScaleMode']
if isinstance(highLogScaleSetting, str):
enable_high_log_scale_mode = (highLogScaleSetting.lower() == "true")
elif isinstance(highLogScaleSetting, bool):
enable_high_log_scale_mode = highLogScaleSetting
else:
raise InvalidArgumentValueError('amalogs.enableHighLogScaleMode value MUST be either true/false or boolean type')
if useAADAuth:
association_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{cluster_resource_id}/providers/Microsoft.Insights/dataCollectionRuleAssociations/ContainerInsightsExtension?api-version={DCR_API_VERSION}"
for _ in range(3):
Expand All @@ -131,6 +162,41 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
except Exception:
pass # its OK to ignore the exception since MSI auth in preview

if useAADAuth:
# Get the workspace region if workspace_resource_id is available
workspace_region = None
if workspace_resource_id:
try:
workspace_resource = resources.get_by_id(workspace_resource_id, '2015-11-01-preview')
workspace_region = workspace_resource.location.replace(" ", "").lower()
except Exception as ex:
logger.warning("Skipping DCR and DCE deletion due to inability to determine workspace region")
return
# If workspace_region still couldn't be determined, skip deletion
if not workspace_region:
logger.warning("Workspace region could not be determined. Skipping DCR and DCE deletion.")
return

# Use workspace_region for DCR name to match creation logic
dcr_name = f"MSCI-{workspace_region}-{cluster_name}"
dcr_name = dcr_name[0:64]

dcr_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{resource_group_name}/providers/Microsoft.Insights/dataCollectionRules/{dcr_name}"
dcr_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{dcr_resource_id}?api-version={DCR_API_VERSION}"
response = send_raw_request(cmd.cli_ctx, "GET", dcr_url)
dcr_config = json.loads(response.text)
# Delete the DCR
for _ in range(3):
try:
send_raw_request(cmd.cli_ctx, "DELETE", dcr_url,)
logger.info(f"Successfully deleted DCR: {dcr_name}")
break
except Exception as ex:
logger.warning(f"Error deleting DCR: {str(ex)}")
pass

if enable_high_log_scale_mode:
_delete_dce_for_dcr(cmd, subscription_id, resource_group_name, dcr_config)

# Custom Validation Logic for Container Insights

Expand Down Expand Up @@ -464,6 +530,7 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
subscription_id = get_subscription_id(cmd.cli_ctx)
workspace_resource_id = ''
useAADAuth = True
enableHighLogScaleMode = False # Default value
if 'amalogs.useAADAuth' not in configuration_settings:
configuration_settings['amalogs.useAADAuth'] = "true"
extensionSettings = {}
Expand Down Expand Up @@ -520,6 +587,16 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
raise InvalidArgumentValueError('streams must be an array type')
extensionSettings["dataCollectionSettings"] = dataCollectionSettings

if useAADAuth and 'amalogs.enableHighLogScaleMode' in configuration_settings:
enableHighLogScaleMode = configuration_settings['amalogs.enableHighLogScaleMode']
if isinstance(enableHighLogScaleMode, str):
enableHighLogScaleMode_str = enableHighLogScaleMode.lower()
if enableHighLogScaleMode_str not in ["true", "false"]:
raise InvalidArgumentValueError('amalogs.enableHighLogScaleMode value MUST be either true or false')
enableHighLogScaleMode = (enableHighLogScaleMode_str == "true")
elif not isinstance(enableHighLogScaleMode, bool):
raise InvalidArgumentValueError('amalogs.enableHighLogScaleMode value MUST be either true or false')

workspace_resource_id = workspace_resource_id.strip()

if configuration_protected_settings is not None:
Expand Down Expand Up @@ -548,7 +625,7 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
if is_ci_extension_type:
if useAADAuth:
logger.info("creating data collection rule and association")
_ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings)
_ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings, enableHighLogScaleMode)
elif not _is_container_insights_solution_exists(cmd, workspace_resource_id):
logger.info("Creating ContainerInsights solution resource, since it doesn't exist and it is using legacy authentication")
_ensure_container_insights_for_monitoring(cmd, workspace_resource_id).result()
Expand Down Expand Up @@ -597,6 +674,37 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_r
configuration_settings['amalogs.domain'] = 'opinsights.azure.microsoft.scloud'


def _delete_dce_for_dcr(cmd, subscription_id, cluster_resource_group_name, dcr_config):
"""Delete Data Collection Endpoint associated with a DCR if it exists"""
try:
if ("properties" in dcr_config and
"dataCollectionEndpointId" in dcr_config["properties"] and
dcr_config["properties"]["dataCollectionEndpointId"]):

dce_id = dcr_config["properties"]["dataCollectionEndpointId"]
dce_parts = dce_id.split('/')

if len(dce_parts) > 0:
dce_name = dce_parts[-1]
dce_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionEndpoints/{dce_name}"
dce_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{dce_resource_id}?api-version=2022-06-01"
# Try to delete up to 3 times
for retry in range(3):
try:
send_raw_request(cmd.cli_ctx, "DELETE", dce_url)
logger.info("Successfully deleted DCE: %s", dce_name)
return True
except CLIError as e:
if "ResourceNotFound" in str(e):
return True
if retry == 2:
logger.warning("Failed to delete DCE: %s - %s", dce_name, str(e))
return False
logger.info("Retrying DCE deletion after error: %s", str(e))
except CLIError:
pass
return True

def get_existing_container_insights_extension_dcr_tags(cmd, dcr_url):
tags = {}
_MAX_RETRY_TIMES = 3
Expand All @@ -617,7 +725,7 @@ def get_existing_container_insights_extension_dcr_tags(cmd, dcr_url):
return tags


def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings):
def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_rp, cluster_type, cluster_name, workspace_resource_id, extensionSettings, enable_high_log_scale_mode):
from azure.core.exceptions import HttpResponseError

cluster_region = ''
Expand Down Expand Up @@ -652,6 +760,18 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
dataCollectionRuleName = dataCollectionRuleName[0:64]
dcr_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionRules/{dataCollectionRuleName}"

# ingestion DCE MUST be in workspace region
ingestionDataCollectionEndpointName = f"MSCI-ingest-{workspace_region}-{cluster_name}"
# Max length of the DCE name is 43 chars
ingestionDataCollectionEndpointName = _trim_suffix_if_needed(ingestionDataCollectionEndpointName[0:43])
ingestion_dce_resource_id = None

# create ingestion DCE if high log scale mode enabled
if enable_high_log_scale_mode:
ingestion_dce_resource_id = create_data_collection_endpoint(
cmd, subscription_id, cluster_resource_group_name, workspace_region, ingestionDataCollectionEndpointName
)

# first get the association between region display names and region IDs (because for some reason
# the "which RPs are available in which regions" check returns region display names)
region_names_to_id = {}
Expand All @@ -677,6 +797,8 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
# get existing tags on the container insights extension DCR if the customer added any
existing_tags = get_existing_container_insights_extension_dcr_tags(cmd, dcr_url)
streams = ["Microsoft-ContainerInsights-Group-Default"]
if enable_high_log_scale_mode:
streams = ContainerInsightsStreams
if extensionSettings is None:
extensionSettings = {}
if 'dataCollectionSettings' in extensionSettings.keys():
Expand All @@ -691,6 +813,11 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
}
extensionSettings["dataCollectionSettings"] = dataCollectionSettings

if enable_high_log_scale_mode:
for i, v in enumerate(streams):
if v == "Microsoft-ContainerLogV2":
streams[i] = "Microsoft-ContainerLogV2-HighScale"

# create the DCR
dcr_creation_body = json.dumps(
{
Expand Down Expand Up @@ -722,6 +849,7 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
}
]
},
"dataCollectionEndpointId": ingestion_dce_resource_id
},
}
)
Expand Down Expand Up @@ -755,3 +883,34 @@ def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_
error = e
else:
raise error


def create_data_collection_endpoint(cmd, subscription_id, cluster_resource_group_name, workspace_region, ingestionDataCollectionEndpointName):
# create the ingestion DCE
ingestion_dce_resource_id = f"/subscriptions/{subscription_id}/resourceGroups/{cluster_resource_group_name}/providers/Microsoft.Insights/dataCollectionEndpoints/{ingestionDataCollectionEndpointName}"
ingestion_dce_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{ingestion_dce_resource_id}?api-version=2022-06-01"
ingestion_dce_creation_body = json.dumps({
"location": workspace_region,
"kind": "Linux",
"properties": {
"networkAcls": {
"publicNetworkAccess": "Enabled"
}
}
})
error = None
for _ in range(3):
try:
send_raw_request(cmd.cli_ctx, "PUT", ingestion_dce_url, body=ingestion_dce_creation_body)
return ingestion_dce_resource_id
except AzCLIError as e:
error = e
if error:
raise error
return ingestion_dce_resource_id


def _trim_suffix_if_needed(s, suffix="-"):
if s.endswith(suffix):
s = s[:-len(suffix)]
return s
2 changes: 1 addition & 1 deletion src/k8s-extension/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
# TODO: Add any additional SDK dependencies here
DEPENDENCIES = []

VERSION = "1.6.6"
VERSION = "1.6.7"

with open("README.rst", "r", encoding="utf-8") as f:
README = f.read()
Expand Down
Loading