Skip to content

Commit fc543c4

Browse files
authored
{AKS}: fix BlockingIOError by setting stdout to non-blocking (#9473)
1 parent c337353 commit fc543c4

File tree

6 files changed

+484
-57
lines changed

6 files changed

+484
-57
lines changed

src/aks-agent/HISTORY.rst

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,15 @@ To release a new version, please select a new version number (usually plus 1 to
1212
Pending
1313
+++++++
1414

15+
1.0.0b14
16+
* Fix: set stdout to blocking mode to avoid "BlockingIOError: [Errno 35] write could not complete without blocking"
17+
* Fix: gracefully handle the connection reset error
18+
* Fix: correct the prompt to user `az aks agent-init` to initialize the aks agent
19+
* Fix: dont echo the user input for Linux users
20+
* Close websocket and restore terminal settings after `az aks agent` ends
21+
1522
1.0.0b13
16-
* fix subscription id not correclty set in helm chart
23+
* Fix subscription id not correctly set in helm chart
1724

1825
1.0.0b12
1926
++++++++

src/aks-agent/azext_aks_agent/agent/aks.py

Lines changed: 79 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,10 @@
66

77
import errno
88
import os
9+
import subprocess
910

11+
import yaml
12+
from azure.cli.core.azclierror import AzCLIError
1013
from knack.log import get_logger
1114
from knack.util import CLIError
1215

@@ -24,28 +27,99 @@
2427
def get_aks_credentials(
2528
client: str,
2629
resource_group_name: str,
27-
cluster_name: str
30+
cluster_name: str,
31+
admin: bool = False,
32+
user="clusterUser",
2833
) -> str:
2934
"""Get AKS cluster kubeconfig."""
30-
credentialResults = client.list_cluster_user_credentials(
31-
resource_group_name, cluster_name
32-
)
35+
36+
credentialResults = None
37+
if admin:
38+
credentialResults = client.list_cluster_admin_credentials(
39+
resource_group_name, cluster_name)
40+
else:
41+
if user.lower() == 'clusteruser':
42+
credentialResults = client.list_cluster_user_credentials(
43+
resource_group_name, cluster_name)
44+
elif user.lower() == 'clustermonitoringuser':
45+
credentialResults = client.list_cluster_monitoring_user_credentials(
46+
resource_group_name, cluster_name)
47+
else:
48+
raise AzCLIError("invalid user type for get credentials: {}".format(user))
49+
3350
if not credentialResults:
3451
raise CLIError("No Kubernetes credentials found.")
3552

3653
kubeconfig = credentialResults.kubeconfigs[0].value.decode(
3754
encoding='UTF-8')
38-
3955
kubeconfig_path = _get_kubeconfig_file_path(resource_group_name, cluster_name)
4056

4157
# Ensure the kubeconfig file exists and write kubeconfig to it
4258
with os.fdopen(os.open(kubeconfig_path, os.O_RDWR | os.O_CREAT | os.O_TRUNC, 0o600), 'wt') as f:
4359
f.write(kubeconfig)
60+
try:
61+
# Check if kubeconfig requires kubelogin with devicecode and convert it
62+
if _uses_kubelogin_devicecode(kubeconfig):
63+
import shutil
64+
if shutil.which("kubelogin"):
65+
try:
66+
# Run kubelogin convert-kubeconfig -l azurecli
67+
subprocess.run(
68+
["kubelogin", "convert-kubeconfig", "-l", "azurecli"],
69+
cwd=os.path.dirname(kubeconfig_path),
70+
check=True,
71+
)
72+
logger.info("Converted kubeconfig to use Azure CLI authentication.")
73+
except subprocess.CalledProcessError as e:
74+
logger.warning("Failed to convert kubeconfig with kubelogin: %s", str(e))
75+
except Exception as e: # pylint: disable=broad-except
76+
logger.warning("Error running kubelogin: %s", str(e))
77+
else:
78+
raise AzCLIError(
79+
"The kubeconfig uses devicecode authentication which requires kubelogin. "
80+
"Please install kubelogin from https://github.com/Azure/kubelogin or run "
81+
"'az aks install-cli' to install both kubectl and kubelogin. "
82+
"If devicecode login fails, try running "
83+
"'kubelogin convert-kubeconfig -l azurecli' to unblock yourself."
84+
)
85+
except (IndexError, ValueError) as exc:
86+
raise CLIError("Fail to find kubeconfig file.") from exc
4487

4588
logger.info("Kubeconfig downloaded successfully to: %s", kubeconfig_path)
4689
return kubeconfig_path
4790

4891

92+
def _uses_kubelogin_devicecode(kubeconfig: str) -> bool:
93+
try:
94+
config = yaml.safe_load(kubeconfig)
95+
96+
# Check if users section exists and has at least one user
97+
if not config or not config.get('users') or len(config['users']) == 0:
98+
return False
99+
100+
first_user = config['users'][0]
101+
user_info = first_user.get('user', {})
102+
exec_info = user_info.get('exec', {})
103+
104+
# Check if command is kubelogin
105+
command = exec_info.get('command', '')
106+
if 'kubelogin' not in command:
107+
return False
108+
109+
# Check if args contains --login and devicecode
110+
args = exec_info.get('args', [])
111+
# Join args into a string for easier pattern matching
112+
args_str = ' '.join(args)
113+
# Check for '--login devicecode' or '-l devicecode'
114+
if '--login devicecode' in args_str or '-l devicecode' in args_str:
115+
return True
116+
return False
117+
except (yaml.YAMLError, KeyError, TypeError, AttributeError) as e:
118+
# If there's any error parsing the kubeconfig, assume it doesn't require kubelogin
119+
logger.debug("Error parsing kubeconfig: %s", str(e))
120+
return False
121+
122+
49123
def _get_kubeconfig_file_path( # pylint: disable=unused-argument
50124
resource_group_name: str,
51125
cluster_name: str,

src/aks-agent/azext_aks_agent/agent/k8s/aks_agent_manager.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -689,7 +689,7 @@ def exec_aks_agent(self, command_flags: str = "") -> bool:
689689
error_msg = f"Failed to find AKS agent pods: {result}\n"
690690
error_msg += (
691691
"The AKS agent may not be deployed. "
692-
"Run 'az aks agent --init' to initialize the deployment."
692+
"Run 'az aks agent-init' to initialize the deployment."
693693
)
694694
raise AzCLIError(error_msg)
695695

@@ -698,7 +698,7 @@ def exec_aks_agent(self, command_flags: str = "") -> bool:
698698
error_msg = "No running AKS agent pods found.\n"
699699
error_msg += (
700700
"The AKS agent may not be deployed. "
701-
"Run 'az aks agent --init' to initialize the deployment."
701+
"Run 'az aks agent-init' to initialize the deployment."
702702
)
703703
raise AzCLIError(error_msg)
704704

0 commit comments

Comments
 (0)