|  | 
|  | 1 | +#!/usr/bin/env python | 
|  | 2 | +# | 
|  | 3 | +# This script shows the basics of getting data out of Sysdig Monitor by executing a PromQL query | 
|  | 4 | +# that returns the top 5 Kubernetes workloads consuming the highest percentage of their allocated CPU | 
|  | 5 | +# by comparing actual usage to defined CPU limits. The query is executed over a 5-minute time window. | 
|  | 6 | +# | 
|  | 7 | + | 
|  | 8 | +import sys | 
|  | 9 | +import time | 
|  | 10 | +from datetime import datetime | 
|  | 11 | + | 
|  | 12 | +from sdcclient import SdcClient | 
|  | 13 | + | 
|  | 14 | + | 
|  | 15 | +def print_prometheus_results_as_table(results): | 
|  | 16 | +    if not results: | 
|  | 17 | +        print("No data found for the query.") | 
|  | 18 | +        return | 
|  | 19 | + | 
|  | 20 | +    # Store time series data | 
|  | 21 | +    all_timestamps = set() | 
|  | 22 | +    label_keys = [] | 
|  | 23 | +    time_series_by_label = {} | 
|  | 24 | + | 
|  | 25 | +    for series in results: | 
|  | 26 | +        metric = series.get("metric", {}) | 
|  | 27 | +        label = ','.join(f'{k}={v}' for k, v in sorted(metric.items())) | 
|  | 28 | +        label_keys.append(label) | 
|  | 29 | +        time_series_by_label[label] = {} | 
|  | 30 | + | 
|  | 31 | +        for timestamp, value in series.get("values", []): | 
|  | 32 | +            ts = int(float(timestamp)) | 
|  | 33 | +            all_timestamps.add(ts) | 
|  | 34 | +            time_series_by_label[label][ts] = value | 
|  | 35 | + | 
|  | 36 | +    # Prepare header | 
|  | 37 | +    label_keys = sorted(set(label_keys)) | 
|  | 38 | +    all_timestamps = sorted(all_timestamps) | 
|  | 39 | + | 
|  | 40 | +    print(f"{'Timestamp':<25} | " + " | ".join(f"{label}" for label in label_keys)) | 
|  | 41 | +    print("-" * (26 + len(label_keys) * 25)) | 
|  | 42 | + | 
|  | 43 | +    # Print each row, filling in missing values with "N/A" | 
|  | 44 | +    for ts in all_timestamps: | 
|  | 45 | +        dt = datetime.fromtimestamp(ts).isoformat() | 
|  | 46 | +        row_values = [] | 
|  | 47 | +        for label in label_keys: | 
|  | 48 | +            value = time_series_by_label.get(label, {}).get(ts, "N/A") | 
|  | 49 | +            row_values.append(value) | 
|  | 50 | +        print(f"{dt:<25} | " + " | ".join(f"{val:>20}" for val in row_values)) | 
|  | 51 | + | 
|  | 52 | + | 
|  | 53 | +# | 
|  | 54 | +# Parse arguments | 
|  | 55 | +# | 
|  | 56 | +if len(sys.argv) != 3: | 
|  | 57 | +    print(('usage: %s <sysdig-token> <hostname>' % sys.argv[0])) | 
|  | 58 | +    print('You can find your token at https://app.sysdigcloud.com/#/settings/user') | 
|  | 59 | +    sys.exit(1) | 
|  | 60 | + | 
|  | 61 | +sdc_token = sys.argv[1] | 
|  | 62 | +hostname = sys.argv[2] | 
|  | 63 | + | 
|  | 64 | +sdclient = SdcClient(sdc_token, hostname) | 
|  | 65 | + | 
|  | 66 | +# | 
|  | 67 | +# A PromQL query to execute. The query retrieves the top 5 workloads in a specific Kubernetes | 
|  | 68 | +# cluster that are using the highest percentage of their allocated CPU resources. It calculates | 
|  | 69 | +# this by comparing the actual CPU usage of each workload to the CPU limits set for them and | 
|  | 70 | +# then ranks the results to show the top 5. | 
|  | 71 | +# | 
|  | 72 | +query = ''' | 
|  | 73 | +topk (5, | 
|  | 74 | +    sum by (kube_cluster_name, kube_namespace_name, kube_workload_name) ( | 
|  | 75 | +      rate( | 
|  | 76 | +        sysdig_container_cpu_cores_used{ | 
|  | 77 | +          kube_cluster_name="dev-cluster" | 
|  | 78 | +        }[10m] | 
|  | 79 | +      ) | 
|  | 80 | +    ) | 
|  | 81 | +  / | 
|  | 82 | +    sum by (kube_cluster_name, kube_namespace_name, kube_workload_name) ( | 
|  | 83 | +      kube_pod_container_resource_limits{ | 
|  | 84 | +        kube_cluster_name="dev-cluster", | 
|  | 85 | +        resource="cpu" | 
|  | 86 | +      } | 
|  | 87 | +    ) | 
|  | 88 | +) | 
|  | 89 | +''' | 
|  | 90 | + | 
|  | 91 | +# | 
|  | 92 | +# Time window: | 
|  | 93 | +#   - end is the current time | 
|  | 94 | +#   - start is the current time minus 5 minutes | 
|  | 95 | +# | 
|  | 96 | +end = int(time.time()) | 
|  | 97 | +start = end - 5 * 60  # 5 minutes ago | 
|  | 98 | + | 
|  | 99 | +# | 
|  | 100 | +# Step: | 
|  | 101 | +#   - resolution step, how far should timestamp of each resulting sample be apart | 
|  | 102 | +# | 
|  | 103 | +step = 60 | 
|  | 104 | + | 
|  | 105 | +# | 
|  | 106 | +# Load data | 
|  | 107 | +# | 
|  | 108 | +ok, response_json = sdclient.get_data_promql(query, start, end, step) | 
|  | 109 | + | 
|  | 110 | +# | 
|  | 111 | +# Show the result | 
|  | 112 | +# | 
|  | 113 | +if ok: | 
|  | 114 | +    # | 
|  | 115 | +    # Read the response. The JSON looks like this: | 
|  | 116 | +    # | 
|  | 117 | +    # { | 
|  | 118 | +    #     "data":   { | 
|  | 119 | +    #         "result":     [ | 
|  | 120 | +    #             { | 
|  | 121 | +    #                 "metric": {}, | 
|  | 122 | +    #                 "values": [ | 
|  | 123 | +    #                     [ | 
|  | 124 | +    #                         1744210080, | 
|  | 125 | +    #                         "0.58" | 
|  | 126 | +    #                     ], | 
|  | 127 | +    #                     [ | 
|  | 128 | +    #                         1744210140, | 
|  | 129 | +    #                         "0.58" | 
|  | 130 | +    #                     ], | 
|  | 131 | +    #                     [ | 
|  | 132 | +    #                         1744210200, | 
|  | 133 | +    #                         "0.58" | 
|  | 134 | +    #                     ], | 
|  | 135 | +    #                     [ | 
|  | 136 | +    #                         1744210260, | 
|  | 137 | +    #                         "0.5799999999999998" | 
|  | 138 | +    #                     ], | 
|  | 139 | +    #                     [ | 
|  | 140 | +    #                         1744210320, | 
|  | 141 | +    #                         "0.5799999999999998" | 
|  | 142 | +    #                     ], | 
|  | 143 | +    #                     [ | 
|  | 144 | +    #                         1744210380, | 
|  | 145 | +    #                         "0.5799999999999998" | 
|  | 146 | +    #                     ] | 
|  | 147 | +    #                 ] | 
|  | 148 | +    #             } | 
|  | 149 | +    #         ], | 
|  | 150 | +    #         "resultType": "matrix" | 
|  | 151 | +    #     }, | 
|  | 152 | +    #     "status": "success" | 
|  | 153 | +    # } | 
|  | 154 | +    # | 
|  | 155 | + | 
|  | 156 | + | 
|  | 157 | +    # | 
|  | 158 | +    # Print summary (what, when) | 
|  | 159 | +    # | 
|  | 160 | +    results = response_json.get("data", {}).get("result", []) | 
|  | 161 | +    print_prometheus_results_as_table(results) | 
|  | 162 | + | 
|  | 163 | +else: | 
|  | 164 | +    print(response_json) | 
|  | 165 | +    sys.exit(1) | 
0 commit comments