Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/CODEOWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@

/src/ip-group/ @necusjz @kairu-ms @jsntcy

/src/connectedk8s/ @bavneetsingh16 @deeksha345 @anagg929
/src/connectedk8s/ @bavneetsingh16 @deeksha345 @anagg929 @atchutbarli

/src/storagesync/ @jsntcy

Expand Down
22 changes: 12 additions & 10 deletions src/connectedk8s/azext_connectedk8s/_constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,13 @@
AHB_Enum_Values = ["True", "False", "NotApplicable"]
Feature_Values = ["cluster-connect", "azure-rbac", "custom-locations"]
CRD_FOR_FORCE_DELETE = [
"arccertificates.clusterconfig.azure.com",
"azureclusteridentityrequests.clusterconfig.azure.com",
"azureextensionidentities.clusterconfig.azure.com",
"connectedclusters.arc.azure.com",
"customlocationsettings.clusterconfig.azure.com",
"extensionconfigs.clusterconfig.azure.com",
"gitconfigs.clusterconfig.azure.com",
"arccertificates.clusterconfig.azure",
"azureclusteridentityrequests.clusterconfig.azure",
"azureextensionidentities.clusterconfig.azure",
"connectedclusters.arc.azure",
"customlocationsettings.clusterconfig.azure",
"extensionconfigs.clusterconfig.azure",
"gitconfigs.clusterconfig.azure",
]
Helm_Install_Release_Userfault_Messages = [
"forbidden",
Expand Down Expand Up @@ -304,6 +304,7 @@
KAP_CR_Save_Failed_Fault_Type = "Error occured while fetching KAP CR snapshot"
Fetch_KAP_CR_Save_Failed_Fault_Type = "Exception occured while fetching KAP CR snapshot"
Fetch_Arc_Agent_Logs_Failed_Fault_Type = "Error occured in arc agents logger"
Fetch_Namespace_Pod_Logs_Failed_Fault_Type = "Error occured in namespace pods logger"
Fetch_Arc_Agents_Events_Logs_Failed_Fault_Type = (
"Error occured in arc agents events logger"
)
Expand Down Expand Up @@ -371,6 +372,7 @@
# Name of the checks and operations
Retrieve_Arc_Agents_Event_Logs = "retrieved_arc_agents_event_logs"
Retrieve_Arc_Agents_Logs = "retrieved_arc_agents_logs"
Retrieve_Namespace_Logs = "retrieved_namespace_logs"
Retrieve_Deployments_Logs = "retrieved_deployments_logs"
Retrieve_Arc_Workload_Identity_Events_Logs = (
"retrieved_arc_workload_identity_event_logs"
Expand Down Expand Up @@ -418,7 +420,7 @@

# Connect Precheck Diagnoser constants
Cluster_Diagnostic_Checks_Job_Registry_Path = (
"mcr.microsoft.com/azurearck8s/helmchart/stable/clusterdiagnosticchecks:0.2.2"
"azurearck8s/helmchart/stable/clusterdiagnosticchecks:0.2.2"
)
Cluster_Diagnostic_Checks_Helm_Install_Failed_Fault_Type = (
"Error while installing cluster diagnostic checks helm release"
Expand Down Expand Up @@ -481,8 +483,8 @@
DEFAULT_MAX_ONBOARDING_TIMEOUT_HELMVALUE_SECONDS = "1200"

# URL constants
CLIENT_PROXY_MCR_TARGET = "mcr.microsoft.com/azureconnectivity/proxy"
HELM_MCR_URL = "mcr.microsoft.com/azurearck8s/helm"
CLIENT_PROXY_MCR_TARGET = "azureconnectivity/proxy"
HELM_MCR_URL = "azurearck8s/helm"
HELM_VERSION = "v3.12.2"
Download_And_Install_Kubectl_Fault_Type = "Failed to download and install kubectl"
Azure_Access_Token_Variable = "AZURE_ACCESS_TOKEN"
Expand Down
5 changes: 5 additions & 0 deletions src/connectedk8s/azext_connectedk8s/_params.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,3 +605,8 @@ def load_arguments(self: Connectedk8sCommandsLoader, _: CLICommand) -> None:
action="store_true",
help="Skip SSL verification for any cluster connection.",
)
c.argument(
"kube_namespace",
options_list=["--kube-namespace"],
help="Kube namespace to troubleshoot from current machine.",
)
8 changes: 7 additions & 1 deletion src/connectedk8s/azext_connectedk8s/_precheckutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import azext_connectedk8s._utils as azext_utils

if TYPE_CHECKING:
from knack.commands import CLICommand
from kubernetes.client import BatchV1Api, CoreV1Api

logger = get_logger(__name__)
Expand All @@ -30,6 +31,7 @@


def fetch_diagnostic_checks_results(
cmd: CLICommand,
corev1_api_instance: CoreV1Api,
batchv1_api_instance: BatchV1Api,
helm_client_location: str,
Expand All @@ -52,6 +54,7 @@ def fetch_diagnostic_checks_results(
# Executing the cluster_diagnostic_checks job and fetching the logs obtained
cluster_diagnostic_checks_container_log = (
executing_cluster_diagnostic_checks_job(
cmd,
corev1_api_instance,
batchv1_api_instance,
helm_client_location,
Expand Down Expand Up @@ -135,6 +138,7 @@ def fetch_diagnostic_checks_results(


def executing_cluster_diagnostic_checks_job(
cmd: CLICommand,
corev1_api_instance: CoreV1Api,
batchv1_api_instance: BatchV1Api,
helm_client_location: str,
Expand Down Expand Up @@ -208,8 +212,10 @@ def executing_cluster_diagnostic_checks_job(
)
return None

mcr_url = azext_utils.get_mcr_path(cmd)

chart_path = azext_utils.get_chart_path(
consts.Cluster_Diagnostic_Checks_Job_Registry_Path,
f"{mcr_url}/{consts.Cluster_Diagnostic_Checks_Job_Registry_Path}",
kube_config,
kube_context,
helm_client_location,
Expand Down
91 changes: 91 additions & 0 deletions src/connectedk8s/azext_connectedk8s/_troubleshootutils.py
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,97 @@ def retrieve_arc_agents_logs(

return consts.Diagnostic_Check_Failed, storage_space_available

def retrieve_namespace_logs(
corev1_api_instance: CoreV1Api,
filepath_with_timestamp: str,
storage_space_available: bool,
kube_namespace: str,
) -> tuple[str, bool]:
print(f"Step: {get_utctimestring()}: Retrieve logs from pods in '{kube_namespace}' namespace.")
try:
if storage_space_available:
# To retrieve all of the pods that are present in the Cluster namespace
namespace_pod_list = corev1_api_instance.list_namespaced_pod(
namespace=kube_namespace
)
# creating a folder for the namespace inside the timestamp folder
namespace_folder_name = f"ns_{kube_namespace}"
namespace_logs_path = os.path.join(
filepath_with_timestamp, namespace_folder_name
)
os.mkdir(namespace_logs_path)

# Traversing through all pods in the namespace
for each_namespace_pod in namespace_pod_list.items:
# Fetching the current Pod name and creating a folder with that name inside the timestamp folder
pod_name = each_namespace_pod.metadata.name

pod_name_logs_path = os.path.join(namespace_logs_path, pod_name)
with contextlib.suppress(FileExistsError):
os.mkdir(pod_name_logs_path)
# If the pod is not in Running state we wont be able to get logs of the containers
if each_namespace_pod.status.phase != "Running":
continue
# Traversing through all of the containers present inside each pods
for each_container in each_namespace_pod.spec.containers:
# Fetching the Container name
container_name = each_container.name
# Creating a text file with the name of the container and adding that containers logs in it
container_log = corev1_api_instance.read_namespaced_pod_log(
name=pod_name, container=container_name, namespace=kube_namespace
)
# Path to add the pods container logs.
namespace_pod_container_logs_path = os.path.join(
pod_name_logs_path, container_name + ".txt"
)
with open(namespace_pod_container_logs_path, "w+") as container_file:
container_file.write(str(container_log))

return consts.Diagnostic_Check_Passed, storage_space_available

# For handling storage or OS exception that may occur during the execution
except OSError as e:
if "[Errno 28]" in str(e):
storage_space_available = False
telemetry.set_exception(
exception=e,
fault_type=consts.No_Storage_Space_Available_Fault_Type,
summary="No space left on device",
)
shutil.rmtree(filepath_with_timestamp, ignore_errors=False)
else:
logger.exception(
"An exception has occured while trying to fetch the namespace pod "
"logs from the cluster."
)
telemetry.set_exception(
exception=e,
fault_type=consts.Fetch_Namespace_Pod_Logs_Failed_Fault_Type,
summary="Error occured in namespace pods logger",
)
diagnoser_output.append(
"An exception has occured while trying to fetch the namespace pods logs from "
f"the cluster. Exception: {e}\n"
)

# To handle any exception that may occur during the execution
except Exception as e:
logger.exception(
"An exception has occured while trying to fetch the namespace pods logs "
"from the cluster."
)
telemetry.set_exception(
exception=e,
fault_type=consts.Fetch_Namespace_Pod_Logs_Failed_Fault_Type,
summary="Error occured in namespace pods logger",
)
diagnoser_output.append(
"An exception has occured while trying to fetch the namespace pods logs from the "
f"cluster. Exception: {e}\n"
)

return consts.Diagnostic_Check_Failed, storage_space_available


def retrieve_arc_agents_event_logs(
filepath_with_timestamp: str,
Expand Down
22 changes: 22 additions & 0 deletions src/connectedk8s/azext_connectedk8s/_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,28 @@
# pylint: disable=bare-except


def get_mcr_path(cmd: CLICommand) -> str:
active_directory_array = cmd.cli_ctx.cloud.endpoints.active_directory.split(".")

# default for public, mc, ff clouds
mcr_postfix = active_directory_array[2]
# special cases for USSec, exclude part of suffix
if len(active_directory_array) == 4 and active_directory_array[2] == "microsoft":
mcr_postfix = active_directory_array[3]
# special case for USNat
elif len(active_directory_array) == 5:
mcr_postfix = (
active_directory_array[2]
+ "."
+ active_directory_array[3]
+ "."
+ active_directory_array[4]
)

mcr_url = f"mcr.microsoft.{mcr_postfix}"
return mcr_url


def validate_connect_rp_location(cmd: CLICommand, location: str) -> None:
subscription_id = (
os.getenv("AZURE_SUBSCRIPTION_ID")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,18 @@
from azure.cli.core import azclierror, telemetry
from azure.cli.core.style import Style, print_styled_text
from knack import log
from knack.commands import CLICommand

import azext_connectedk8s._constants as consts
import azext_connectedk8s._fileutils as file_utils
import azext_connectedk8s._utils as utils

logger = log.get_logger(__name__)


# Downloads client side proxy to connect to Arc Connectivity Platform
def install_client_side_proxy(
arc_proxy_folder: Optional[str], debug: bool = False
cmd: CLICommand, arc_proxy_folder: Optional[str], debug: bool = False
) -> str:
client_operating_system = _get_client_operating_system()
client_architecture = _get_client_architeture()
Expand All @@ -48,7 +50,11 @@ def install_client_side_proxy(
)

_download_proxy_from_MCR(
install_dir, proxy_name, client_operating_system, client_architecture
cmd,
install_dir,
proxy_name,
client_operating_system,
client_architecture,
)
_check_proxy_installation(install_dir, proxy_name, debug)

Expand All @@ -64,15 +70,21 @@ def install_client_side_proxy(


def _download_proxy_from_MCR(
dest_dir: str, proxy_name: str, operating_system: str, architecture: str
cmd: CLICommand,
dest_dir: str,
proxy_name: str,
operating_system: str,
architecture: str,
) -> None:
mar_target = f"{consts.CLIENT_PROXY_MCR_TARGET}/{operating_system.lower()}/{architecture}/arc-proxy"
mcr_url = utils.get_mcr_path(cmd)

mar_target = f"{mcr_url}/{consts.CLIENT_PROXY_MCR_TARGET}/{operating_system.lower()}/{architecture}/arc-proxy"
logger.debug(
"Downloading Arc Connectivity Proxy from %s in Microsoft Artifact Regristy.",
mar_target,
)

client = oras.client.OrasClient()
client = oras.client.OrasClient(hostname=mcr_url)
t0 = time.time()

try:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
if TYPE_CHECKING:
from subprocess import Popen

from knack.commands import CLICommmand
from knack.commands import CLICommand
from requests.models import Response

from azext_connectedk8s.vendored_sdks.preview_2024_07_01.models import (
Expand All @@ -30,7 +30,7 @@


def handle_post_at_to_csp(
cmd: CLICommmand,
cmd: CLICommand,
api_server_port: int,
tenant_id: str,
clientproxy_process: Popen[bytes],
Expand Down
Loading
Loading