1111# ANY KIND, either express or implied. See the License for the specific
1212# language governing permissions and limitations under the License.
1313from typing import Optional
14+ import boto3
1415
1516from hyperpod_cli .clients .kubernetes_client import (
1617 KubernetesClient ,
1920from hyperpod_cli .service .list_pods import (
2021 ListPods ,
2122)
23+ from hyperpod_cli .utils import (
24+ get_eks_cluster_name ,
25+ get_hyperpod_cluster_region ,
26+ )
2227from kubernetes .client .rest import ApiException
2328from kubernetes .client import V1ResourceAttributes
2429
30+ AMAZON_ClOUDWATCH_OBSERVABILITY = "amazon-cloudwatch-observability"
31+
2532class GetLogs :
2633 def __init__ (self ):
2734 return
@@ -57,7 +64,67 @@ def get_training_job_logs(
5764 raise RuntimeError (
5865 f"Given pod name { pod_name } is not associated with training job { job_name } in namespace { namespace } "
5966 )
60-
6167 return k8s_client .get_logs_for_pod (pod_name , namespace )
6268 except ApiException as e :
6369 raise RuntimeError (f"Unexpected API error: { e .reason } ({ e .status } )" )
70+
71+ def generate_cloudwatch_link (
72+ self ,
73+ pod_name : str ,
74+ namespace : Optional [str ],
75+ ):
76+ eks_cluster_name = get_eks_cluster_name ()
77+
78+ if self .is_container_insights_addon_enabled (eks_cluster_name ):
79+ k8s_client = KubernetesClient ()
80+
81+ # pod_details is a V1Pod object
82+ pod_details = k8s_client .get_pod_details (pod_name , namespace )
83+
84+ # get node name
85+ if pod_details .spec and pod_details .spec .node_name :
86+ node_name = pod_details .spec .node_name
87+ else :
88+ node_name = None
89+
90+ # get container name
91+ if pod_details .spec and pod_details .spec .containers and pod_details .spec .containers [0 ].name :
92+ container_name = pod_details .spec .containers [0 ].name
93+ else :
94+ container_name = None
95+
96+ # get container_id
97+ if pod_details .status and pod_details .status .container_statuses and pod_details .status .container_statuses [0 ].container_id :
98+ full_container_id = pod_details .status .container_statuses [0 ].container_id
99+
100+ # full_container_id has format "containerd://xxxxxxxxxx"
101+ container_id = full_container_id [13 :] if full_container_id .startswith ('containerd://' ) else None
102+ else :
103+ container_id = None
104+
105+ # Cloudwatch container insight log groups should have the same pod log as API response
106+ if node_name and pod_name and namespace and container_name and container_id :
107+ region = get_hyperpod_cluster_region ()
108+
109+ cloudwatch_url = self .get_log_url (eks_cluster_name , region , node_name , pod_name , namespace , container_name , container_id )
110+ cloudwatch_link = f'The pod cloudwatch log stream link is { cloudwatch_url } '
111+ else :
112+ cloudwatch_link = 'Failed to load container insights CloudWatch Link!'
113+ else :
114+ cloudwatch_link = None
115+
116+ return cloudwatch_link
117+
118+ def get_log_url (self , eks_cluster_name , region , node_name , pod_name , namespace , container_name , container_id ):
119+ console_prefix = f'https://{ region } .console.aws.amazon.com/cloudwatch/home?region={ region } #'
120+ log_group_prefix = f'logsV2:log-groups/log-group/$252Faws$252Fcontainerinsights$252F{ eks_cluster_name } $252Fapplication/log-events/'
121+ log_stream = f'{ node_name } -application.var.log.containers.{ pod_name } _{ namespace } _{ container_name } -{ container_id } .log'
122+
123+ return console_prefix + log_group_prefix + log_stream
124+
125+ def is_container_insights_addon_enabled (self , eks_cluster_name ):
126+ response = boto3 .client ("eks" ).list_addons (clusterName = eks_cluster_name , maxResults = 50 )
127+ if AMAZON_ClOUDWATCH_OBSERVABILITY in response .get ('addons' , []):
128+ return True
129+ else :
130+ return False
0 commit comments