Skip to content

Commit 0b1bc8f

Browse files
Access entry fix (#267)
1 parent 5c42bcd commit 0b1bc8f

File tree

3 files changed

+514
-0
lines changed

3 files changed

+514
-0
lines changed
Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
"""
2+
Cluster utilities for EKS access validation and management.
3+
"""
4+
5+
import logging
6+
from typing import Optional, Tuple, Dict, Any
7+
8+
import boto3
9+
import botocore
10+
from botocore.exceptions import ClientError
11+
12+
logger = logging.getLogger(__name__)
13+
14+
15+
def _get_current_aws_identity(session: boto3.Session) -> Tuple[str, str]:
16+
"""
17+
Get the current AWS identity (ARN and type).
18+
19+
Args:
20+
session: Boto3 session
21+
22+
Returns:
23+
Tuple of (principal_arn, identity_type)
24+
"""
25+
sts_client = session.client('sts')
26+
identity = sts_client.get_caller_identity()
27+
28+
arn = identity['Arn']
29+
30+
# Determine identity type
31+
if ':user/' in arn:
32+
identity_type = 'user'
33+
elif ':role/' in arn:
34+
identity_type = 'role'
35+
elif ':assumed-role/' in arn:
36+
identity_type = 'assumed-role'
37+
# For assumed roles, we need to get the base role ARN
38+
# arn:aws:sts::123456789012:assumed-role/MyRole/session-name
39+
# becomes arn:aws:iam::123456789012:role/MyRole
40+
parts = arn.split('/')
41+
if len(parts) >= 3:
42+
base_arn = arn.replace(':sts:', ':iam:').replace(':assumed-role/', ':role/').rsplit('/', 1)[0]
43+
arn = base_arn
44+
else:
45+
identity_type = 'unknown'
46+
47+
return arn, identity_type
48+
49+
50+
def _check_access_entry_exists(
51+
eks_client: botocore.client.BaseClient,
52+
cluster_name: str,
53+
principal_arn: str
54+
) -> Tuple[bool, Optional[Dict[str, Any]], Optional[str]]:
55+
"""
56+
Check if the given principal has an access entry for the EKS cluster.
57+
58+
Args:
59+
eks_client: Boto3 EKS client
60+
cluster_name: Name of the EKS cluster
61+
principal_arn: ARN of the principal to check
62+
63+
Returns:
64+
Tuple of (has_access, access_entry_details, error_message)
65+
"""
66+
try:
67+
response = eks_client.describe_access_entry(
68+
clusterName=cluster_name,
69+
principalArn=principal_arn
70+
)
71+
return True, response.get('accessEntry'), None
72+
73+
except ClientError as e:
74+
error_code = e.response['Error']['Code']
75+
76+
if error_code == 'ResourceNotFoundException':
77+
# No access entry found for this principal
78+
return False, None, f"No access entry found for principal: {principal_arn}"
79+
elif error_code == 'AccessDeniedException':
80+
# User doesn't have permission to check access entries
81+
return False, None, f"Access denied when checking access entries. You may not have eks:DescribeAccessEntry permission."
82+
elif error_code == 'ClusterNotFoundException':
83+
# Cluster doesn't exist
84+
return False, None, f"EKS cluster '{cluster_name}' not found."
85+
else:
86+
# Other error
87+
return False, None, f"Error checking access entry: {e.response['Error']['Message']}"
88+
89+
except Exception as e:
90+
return False, None, f"Unexpected error checking access entry: {str(e)}"
91+
92+
93+
def validate_eks_access_before_kubeconfig_update(
94+
session: boto3.Session,
95+
cluster_name: str,
96+
eks_name: str
97+
) -> Tuple[bool, str]:
98+
"""
99+
Validate that the current user has EKS access before attempting kubeconfig update.
100+
101+
Args:
102+
session: Boto3 session
103+
cluster_name: Name of the HyperPod cluster (for error messages)
104+
eks_name: Name of the EKS cluster
105+
106+
Returns:
107+
Tuple of (has_access, message)
108+
"""
109+
try:
110+
# Get current AWS identity
111+
principal_arn, identity_type = _get_current_aws_identity(session)
112+
logger.debug(f"Current AWS identity: {principal_arn} (type: {identity_type})")
113+
114+
# Create EKS client
115+
eks_client = session.client('eks')
116+
117+
# Check if the principal has an access entry
118+
has_access, access_entry, error_msg = _check_access_entry_exists(
119+
eks_client, eks_name, principal_arn
120+
)
121+
122+
if has_access:
123+
success_msg = f"✓ Access confirmed for {principal_arn}"
124+
if access_entry:
125+
kubernetes_groups = access_entry.get('kubernetesGroups', [])
126+
username = access_entry.get('username', 'N/A')
127+
success_msg += f"\n - Username: {username}"
128+
success_msg += f"\n - Kubernetes Groups: {', '.join(kubernetes_groups) if kubernetes_groups else 'None'}"
129+
return True, success_msg
130+
else:
131+
# Access validation failed - provide clear error message
132+
error_message = (
133+
f"✗ Cannot connect to EKS cluster '{eks_name}': {error_msg}\n\n"
134+
f"Your AWS identity '{principal_arn}' (type: {identity_type}) does not have an access entry "
135+
f"for this EKS cluster.\n\n"
136+
f"To resolve this issue:\n"
137+
f"1. Contact your cluster administrator to add your identity to the EKS access entries\n"
138+
f"2. Refer to this documentation to create an access entry: https://docs.aws.amazon.com/cli/latest/reference/eks/create-access-entry.html\n"
139+
f"3. Verify your AWS credentials and region are correct\n"
140+
f"4. Ensure you have the necessary EKS permissions (eks:DescribeAccessEntry)"
141+
)
142+
return False, error_message
143+
144+
except Exception as e:
145+
return False, f"Unexpected error validating EKS access: {str(e)}"

src/sagemaker/hyperpod/cli/commands/cluster.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@
5757
set_logging_level,
5858
store_current_hyperpod_context,
5959
)
60+
from sagemaker.hyperpod.cli.cluster_utils import (
61+
validate_eks_access_before_kubeconfig_update,
62+
)
6063
from sagemaker.hyperpod.cli.validators.cluster_validator import (
6164
ClusterValidator,
6265
)
@@ -591,6 +594,29 @@ def timeout_handler(signum, frame):
591594
)
592595

593596
eks_name = get_name_from_arn(eks_cluster_arn)
597+
598+
# Proactively validate EKS access before attempting kubeconfig update
599+
logger.debug("Validating EKS access entries before kubeconfig update...")
600+
try:
601+
has_access, message = validate_eks_access_before_kubeconfig_update(
602+
session, cluster_name, eks_name
603+
)
604+
605+
if has_access:
606+
logger.debug(message)
607+
else:
608+
# Access validation failed - provide clear error message
609+
logger.error(message)
610+
sys.exit(1)
611+
612+
except Exception as validation_error:
613+
# If access validation fails unexpectedly, log warning but continue
614+
# This ensures backward compatibility if the validation has issues
615+
logger.warning(
616+
f"Could not validate EKS access entries: {validation_error}. "
617+
f"Proceeding with kubeconfig update..."
618+
)
619+
594620
_update_kube_config(eks_name, region, None)
595621
k8s_client = KubernetesClient()
596622
k8s_client.set_context(eks_cluster_arn, namespace)

0 commit comments

Comments
 (0)