From e10f818ede207540d90ed7db23efd504c1caa9b6 Mon Sep 17 00:00:00 2001 From: Petr Maj Date: Wed, 23 Apr 2025 16:30:47 +0200 Subject: [PATCH 1/2] Added basic variance metric --- .../java/perfgenie/utils/ArgusQueryT.java | 110 ++++++++++++++++++ .../main/java/perfgenie/utils/SideBySide.java | 70 ++++++++++- utils/src/main/resources/argus.json | 11 +- 3 files changed, 188 insertions(+), 3 deletions(-) diff --git a/utils/src/main/java/perfgenie/utils/ArgusQueryT.java b/utils/src/main/java/perfgenie/utils/ArgusQueryT.java index 65cb406..625d269 100644 --- a/utils/src/main/java/perfgenie/utils/ArgusQueryT.java +++ b/utils/src/main/java/perfgenie/utils/ArgusQueryT.java @@ -288,6 +288,89 @@ public static List getCanaryPods(String startquery, String endquery, Str } } + /* Like getArgusMetric, but returns all of the datapoints in a double[]. + */ + public static DatapointsQueryResponse getArgusMetricDatapoints(String m, long timestampStart, long timestampEnd, String instance, String domain, String cell, List pods) { + if (pods.size() == 0) { + return null; + } + if ((System.currentTimeMillis() - lastUpdated) > 5 * 60 * 1000) {//5 min + updateAccessToken(); + lastUpdated = System.currentTimeMillis(); + } + DatapointsQueryResponse response = new DatapointsQueryResponse(); + String queryT = ac.queries.get(ac.metrics.get(m).get("type")); + + String query = queryT.replaceAll("START", String.valueOf(timestampStart)); + query = query.replaceAll("END", String.valueOf(timestampEnd)); + query = query.replaceAll("SCOPE", ac.metrics.get(m).get("scope")); + query = query.replaceAll("METRIC", ac.metrics.get(m).get("metric")); + query = query.replaceAll("INSTANCE", instance); + query = query.replaceAll("DOMAIN", domain); + query = query.replaceAll("CELL", cell); + String podstr = ""; + for (int i = 0; i < pods.size(); i++) { + if (i == 0) { + podstr = pods.get(i); + } else { + podstr = podstr + "|" + pods.get(i); + } + } + query = query.replaceAll("POD", podstr); + + response.query = query; + + try { + query = URLEncoder.encode(query, StandardCharsets.UTF_8.toString()); + } catch (Exception e) { + System.out.println(cell+ " " +m+"1 " + e.getMessage()); + return null; + } + String metricCommand = "curl -H \"Authorization: Bearer " + accessToken + "\" " + "https://monitoring-api.salesforce.com/argusws/metrics?expression=" + query; + + String metric = ""; + if (accessToken != null) { + metric = "{\"array\":" + executeCurlCommand(metricCommand) + "}"; + } else { + try { + if (substrate == null) { + metric = "{\"array\":" + Resources.toString(Resources.getResource("apt.json"), StandardCharsets.UTF_8) + "}"; + } + } catch (Exception e) { + metric = "{}"; + System.out.println(cell + " " +m+"2 " + e.getMessage()); + } + } + + ArrayList data = new ArrayList<>(); + try { + JSONObject jsonObject = new JSONObject(metric); + JSONArray jsonArray = jsonObject.getJSONArray("array"); + for (int i = 0; i < jsonArray.length(); i++) { + JSONObject object = jsonArray.getJSONObject(i); + JSONObject datapoints = object.getJSONObject("datapoints"); + Iterator keys = datapoints.keys(); + while (keys.hasNext()) { + String k = keys.next().toString(); + data.add(datapoints.getDouble(String.valueOf(k))); + } + } + if (!data.isEmpty()) { + response.setDatapoints(data.stream().mapToDouble(Double::doubleValue).toArray()); + return response; + } + } catch (Exception e) { + System.out.println("query->" + response.query); + System.out.println("metric->" + metric); + System.out.println(cell + " " +m+"3 " + e.getMessage()); + return null; + } + System.out.println("query->" + response.query); + System.out.println("metric->" + metric); + System.out.println(cell + " " +m+"4 "); + return null; + } + public static QueryResponse getArgusMetric(String m, long timestampStart, long timestampEnd, String instance, String domain, String cell, List pods) { if (pods.size() == 0) { return null; @@ -430,6 +513,33 @@ public void setMetric(Double metric) { } } + /* Query response that returns all datapoints in a double array, not just the last one. + */ + static class DatapointsQueryResponse { + double [] datapoints; + + public String getQuery() { return query; } + + public void setQuery(String query) { + this.query = query; + } + + String query; + + DatapointsQueryResponse() { + datapoints = null; + query = null; + } + + public double [] getDatapoints() { + return datapoints; + } + + public void setDatapoints(double [] datapoints) { + this.datapoints = datapoints; + } + } + public static QueryResponse getMetric(String queryT, long timestampStart, long timestampEnd, String instance, String domain, String cell, List pods) { if (pods.size() == 0) { return null; diff --git a/utils/src/main/java/perfgenie/utils/SideBySide.java b/utils/src/main/java/perfgenie/utils/SideBySide.java index 32665d5..fab5c0c 100644 --- a/utils/src/main/java/perfgenie/utils/SideBySide.java +++ b/utils/src/main/java/perfgenie/utils/SideBySide.java @@ -101,7 +101,7 @@ public static CanaryResponse processSideBySideCanaryTask(long timestampStart, lo Double metricPercentChange = 100.0 * ((res1.getMetric() / rCount1) - (res2.getMetric() / rCount2)) / (res1.getMetric() / rCount1); record.add(metricPercentChange); header.add(metricList.get(i) + "/r %c:number"); - }else { + } else { record.add(null); header.add(metricList.get(i) + "1:number"); record.add(null); @@ -125,11 +125,77 @@ public static CanaryResponse processSideBySideCanaryTask(long timestampStart, lo record.add(getMetricDashboardURL(canary.finalStart,canary.finalEnd,instance,domain,cell)); header.add("metrics:url"); + double varianceZulu = getVarianceOf("cCPUTimePerReq", canary.finalStart, canary.finalEnd, instance, domain, cell, canary.pod1); + double varianceZing = getVarianceOf("cCPUTimePerReq", canary.finalStart, canary.finalEnd, instance, domain, cell, canary.pod2); + + record.add(varianceZulu);//cell + header.add("varianceZulu:number"); + record.add(varianceZing);//cell + header.add("varianceZing:number"); + return new CanaryResponse(header,record); } return null; } + static double getVarianceOf(String metric, long timestampStart, long timestampEnd, String instance, String domain, String cell, List pods) { + double result = 0; + ArrayList data = new ArrayList<>(); + + // get the required metric + for (String pod : pods) { + ArgusQueryT.DatapointsQueryResponse res = ArgusQueryT.getArgusMetricDatapoints(metric, timestampStart, timestampEnd, instance, domain, cell, Collections.singletonList(pod)); + if (res == null) + continue; + // we have valid result, create array from the datapoints we got back, sort & reverse so that we go from highest to lowest + double[] arr = res.datapoints; + Arrays.sort(arr); + reverseArray(arr); + data.add(arr); + } + + return calculateVariance(data); + } + + /* calculates the variance of given set of observations. This is *not* the standard statistical variance, but an + integral metric that sorts each dataset from highest to lowest and then at each observation sums up the + difference between min and max. The summarized number is then divided by the number of observations + */ + static double calculateVariance(ArrayList from) { + // sort the arrays in descending order + int maxL = 0; + for (double [] a : from) { + Arrays.sort(a); + reverseArray(a); + // and figure out the max length + if (maxL < a.length) + maxL = a.length; + } + double result = 0; + for (int i = 0; i < maxL; i++) { + double min = Double.POSITIVE_INFINITY; + double max = Double.NEGATIVE_INFINITY; + for (double [] a : from) { + if (i < a.length) { + if (a[i] < min) + min = a[i]; + if (a[i] > max) + max = a[i]; + } + } + result = result + (max - min); + } + return result / maxL; + } + + static void reverseArray(double [] a) { + for (int i = 0; i < a.length / 2; i++) { + double temp = a[i]; + a[i] = a[a.length - 1 - i]; + a[a.length - 1 - i] = temp; + } + } + public static String getCanaryDashboardURL(List pod1, List pod2, long curfinalStart, long curfinalEnd, String instance, String domain, String cell) { String URL = "https://moncloud-grafana.sfproxy.monitoring.aws-esvc1-useast2.aws.sfdc.cl/d/evIw19pHz/zulu-zing-canary-falcon-rpulle-automation?orgId=1&"; @@ -164,7 +230,6 @@ public static String getMetricDashboardURL(long curfinalStart, long curfinalEnd, return URL1; } - public static CanaryDetails processZingCanary(String start, String end, String instance, String domain, String cell) { String metric = ArgusQueryT.getGCMetric(start, end, instance, domain, cell); return parse(metric); @@ -425,6 +490,7 @@ private static void generateCombinationsHelper(List list, int k, int sta } } + // run this test to do stuff public static void main(String[] args) { try { int start = 6; diff --git a/utils/src/main/resources/argus.json b/utils/src/main/resources/argus.json index 6c45000..e15ab90 100644 --- a/utils/src/main/resources/argus.json +++ b/utils/src/main/resources/argus.json @@ -3,7 +3,8 @@ "cumulative": "DIFF(GROUPBYTAG(START:END:SCOPE:METRIC{cell=CELL,k8s_container_name=coreapp,k8s_pod_name=POD,role=app}:max:all-max,#k8s_container_name#,#SUM#),GROUPBYTAG(START:END:SCOPE:METRIC{cell=CELL,k8s_container_name=coreapp,k8s_pod_name=POD,role=app}:min:all-min,#k8s_container_name#,#SUM#))", "average": "DOWNSAMPLE(ALIASBYREGEX(ALIAS(GROUPBYTAG(START:END:SCOPE:METRIC{cell=CELL,k8s_container_name=coreapp,k8s_pod_name=POD,role=app}:avg:1m-avg,#k8s_container_name#,#AVERAGE#),#:#,#literal#,#CELL#,#literal#),#(.*)::\\{#),#1d-avg#,#0#,#abs#)", "ccumulative": "DIFF(GROUPBYTAG(START:END:SCOPE:METRIC{k8s_container_name=coreapp,k8s_pod_name=POD}:max:all-max,#k8s_container_name#,#SUM#),GROUPBYTAG(START:END:SCOPE:METRIC{k8s_container_name=coreapp,k8s_pod_name=POD}:min:all-min,#k8s_container_name#,#SUM#))", - "caverage": "DOWNSAMPLE(ALIASBYREGEX(ALIAS(GROUPBYTAG(START:END:SCOPE:METRIC{k8s_container_name=coreapp,k8s_pod_name=POD}:avg:1m-avg,#k8s_container_name#,#AVERAGE#),#:#,#literal#,#CELL#,#literal#),#(.*)::\\{#),#1d-avg#,#0#,#abs#)" + "caverage": "DOWNSAMPLE(ALIASBYREGEX(ALIAS(GROUPBYTAG(START:END:SCOPE:METRIC{k8s_container_name=coreapp,k8s_pod_name=POD}:avg:1m-avg,#k8s_container_name#,#AVERAGE#),#:#,#literal#,#CELL#,#literal#),#(.*)::\\{#),#1d-avg#,#0#,#abs#)", + "cdatapointsPerRequest" : "DIVIDE(DERIVATIVE(START:END:SCOPE:METRIC{k8s_container_name=coreapp,k8s_pod_name=POD}),LAG(START:END:SCOPE:SFDC_type-ServerMetrics.LogMetric-COUNT{k8s_container_name=coreapp,k8s_pod_name=POD,role=app},1))" }, "metrics": { "rCPUTime" :{ @@ -38,6 +39,14 @@ "type": "ccumulative", "enabled": true }, + "cCPUTimePerReq":{ + "name": "cCPUTimeReq", + "description": "container CPU time / Req (all datapoints)", + "scope": "cadvisor.aws.INSTANCE.DOMAIN", + "metric": "container_cpu_usage_seconds_total", + "type": "cdatapointsPerRequest", + "enabled": true + }, "5xx":{ "name": "5xx", "description": "5xx response count", From 91a040de455633a90c924a42a3af5b059e035696 Mon Sep 17 00:00:00 2001 From: Petr Maj Date: Wed, 23 Apr 2025 17:31:13 +0200 Subject: [PATCH 2/2] Added bootstrap --- .../main/java/perfgenie/utils/SideBySide.java | 106 ++++++++++++++++-- 1 file changed, 98 insertions(+), 8 deletions(-) diff --git a/utils/src/main/java/perfgenie/utils/SideBySide.java b/utils/src/main/java/perfgenie/utils/SideBySide.java index fab5c0c..79e59c0 100644 --- a/utils/src/main/java/perfgenie/utils/SideBySide.java +++ b/utils/src/main/java/perfgenie/utils/SideBySide.java @@ -4,15 +4,33 @@ import org.json.JSONObject; import java.util.*; +import java.util.stream.DoubleStream; import static perfgenie.utils.Canary.*; +/** Triple for median, confidence and variance. + */ +class VarianceResult { + double median; + double confidence; + double variance; + + VarianceResult(double median, double confidence, double variance) { + this.median = median; + this.confidence = confidence; + this.variance = variance; + } +} + public class SideBySide { public static long mindiff = 3600000; public static long maxTimeWindow = 5 * 60 * 60 * 1000; // 5 hours due to argus query limitations, need to switch to huron + static int bootstrapCount = 1000; + static int bootstrapSize = 5000; + public static CanaryResponse processSideBySideCanary(long timestampStart, long timestampEnd, String cell) { return processSideBySideCanaryTask(timestampStart, timestampEnd, podsInstance.get(cell), podsDomain.get(cell), cell); } @@ -125,12 +143,12 @@ public static CanaryResponse processSideBySideCanaryTask(long timestampStart, lo record.add(getMetricDashboardURL(canary.finalStart,canary.finalEnd,instance,domain,cell)); header.add("metrics:url"); - double varianceZulu = getVarianceOf("cCPUTimePerReq", canary.finalStart, canary.finalEnd, instance, domain, cell, canary.pod1); - double varianceZing = getVarianceOf("cCPUTimePerReq", canary.finalStart, canary.finalEnd, instance, domain, cell, canary.pod2); + VarianceResult varianceZulu = getVarianceOf("cCPUTimePerReq", canary.finalStart, canary.finalEnd, instance, domain, cell, canary.pod1); + VarianceResult varianceZing = getVarianceOf("cCPUTimePerReq", canary.finalStart, canary.finalEnd, instance, domain, cell, canary.pod2); - record.add(varianceZulu);//cell + record.add(varianceZulu.variance);//cell header.add("varianceZulu:number"); - record.add(varianceZing);//cell + record.add(varianceZing.variance);//cell header.add("varianceZing:number"); return new CanaryResponse(header,record); @@ -138,8 +156,16 @@ public static CanaryResponse processSideBySideCanaryTask(long timestampStart, lo return null; } - static double getVarianceOf(String metric, long timestampStart, long timestampEnd, String instance, String domain, String cell, List pods) { - double result = 0; + /* Gets variance info for given metric. + + We get the metric for all selected pods and times and then calculate the variance, which is defined as + sum(max - min) / length across pods and observations with observations sorted from highest to lowest. + + The function also calculates the median value and its confidence using bootstrap, if bootstrap count is greater + than zero. This works by taking N samples from all the pod's values, calculating medians of the those datasets + and then reporting the mean and confidence on this distribution. + */ + static VarianceResult getVarianceOf(String metric, long timestampStart, long timestampEnd, String instance, String domain, String cell, List pods) { ArrayList data = new ArrayList<>(); // get the required metric @@ -154,6 +180,7 @@ static double getVarianceOf(String metric, long timestampStart, long timestampEn data.add(arr); } + // calculate the stats return calculateVariance(data); } @@ -161,7 +188,7 @@ static double getVarianceOf(String metric, long timestampStart, long timestampEn integral metric that sorts each dataset from highest to lowest and then at each observation sums up the difference between min and max. The summarized number is then divided by the number of observations */ - static double calculateVariance(ArrayList from) { + static VarianceResult calculateVariance(ArrayList from) { // sort the arrays in descending order int maxL = 0; for (double [] a : from) { @@ -185,7 +212,46 @@ static double calculateVariance(ArrayList from) { } result = result + (max - min); } - return result / maxL; + + // if bootstrap is disabled we are done + if (bootstrapCount == 0) + return new VarianceResult(0, 0, result / maxL); + + // flatten the pod datasets (we don't care they are sorted as we do random sapling anyways) + double[] input = from.stream() + .flatMapToDouble(DoubleStream::of) + .toArray(); + + // the array of medians for the sampled datasets (not expecting normal distribution) + double[] medians = new double[bootstrapCount]; + // create the sampled datasets and fill the medians array + for (int i = 0; i < medians.length; i++) { + double[] dataset = sample(input, bootstrapSize); + medians[i] = calculateMedian(dataset); + } + // calculate array's median (now we expect normal distribution), and standard deviation & error + double medianMean = calculateMean(medians); + double sd = calculateSd(medians, medianMean); + double se = sd / Math.sqrt(medians.length); + + // determine z-score based on the confidence intervals we want and calculate the confidence + double zScore = 3.291; // for 0.999 + //double zScore = 2.576; // for 0.99 + //double zScore = 1.960; // for 0.95 + double confidence = se * zScore; + + return new VarianceResult(medianMean, confidence, result / maxL); + } + + static double[] sample(double[] input, int sampleSize) { + // Concatenate all double arrays into a single double[] + double [] result = new double [sampleSize]; + Random rand = new Random(); + for (int i = 0; i < sampleSize; i++) { + int index = rand.nextInt(input.length); + result[i] = input[index]; + } + return result; } static void reverseArray(double [] a) { @@ -196,6 +262,30 @@ static void reverseArray(double [] a) { } } + static double calculateMean(double [] from) { + if (from.length == 0) + return 0; + double sum = 0.0; + for (double v : from) + sum += v; + return sum / from.length; + } + + static double calculateSd(double [] from, double mean) { + if (from.length == 0) + return 0; + double sd = 0.0; + for (double v : from) + sd += Math.pow(v - mean, 2); + return Math.sqrt(sd / from.length); + } + + static double calculateMedian(double[] from) { + Arrays.sort(from); + return from[from.length / 2]; + } + + public static String getCanaryDashboardURL(List pod1, List pod2, long curfinalStart, long curfinalEnd, String instance, String domain, String cell) { String URL = "https://moncloud-grafana.sfproxy.monitoring.aws-esvc1-useast2.aws.sfdc.cl/d/evIw19pHz/zulu-zing-canary-falcon-rpulle-automation?orgId=1&";