Appease check formatting

ianayl · ianayl · commit 6193059a881e · 2025-01-22T11:04:30.000-08:00
diff --git a/devops/scripts/benchmarking/aggregate.py b/devops/scripts/benchmarking/aggregate.py
@@ -6,17 +6,18 @@
 
 import common
 
+
 # Simple median calculation
 class SimpleMedian:
 
-	def __init__(self):
-		self.elements = []
+    def __init__(self):
+        self.elements = []
 
-	def add(self, n: float):
-		self.elements.append(n)
+    def add(self, n: float):
+        self.elements.append(n)
 
-	def get_median(self) -> float:
-		return statistics.median(elements)
+    def get_median(self) -> float:
+        return statistics.median(elements)
 
 
 # Calculate medians incrementally using a heap: Useful for when dealing with
@@ -26,88 +27,100 @@ def get_median(self) -> float:
 # with precommit in mind, but if this only runs nightly, it would actually be
 # faster to do a normal median calculation.
 class StreamingMedian:
-	
+
     def __init__(self):
-		# Gist: we keep a minheap and a maxheap, and store the median as the top
-		# of the minheap. When a new element comes it gets put into the heap
-		# based on if the element is bigger than the current median. Then, the
-		# heaps are heapified and the median is repopulated by heapify.
+        # Gist: we keep a minheap and a maxheap, and store the median as the top
+        # of the minheap. When a new element comes it gets put into the heap
+        # based on if the element is bigger than the current median. Then, the
+        # heaps are heapified and the median is repopulated by heapify.
         self.minheap_larger = []
         self.maxheap_smaller = []
-		# Note: numbers on maxheap should be negative, as heapq
-        # is minheap by default
+
+    # Note: numbers on maxheap should be negative, as heapq
+    # is minheap by default
 
     def add(self, n: float):
         if len(self.maxheap_smaller) == 0 or -self.maxheap_smaller[0] >= n:
             heapq.heappush(self.maxheap_smaller, -n)
         else:
             heapq.heappush(self.minheap_larger, n)
 
-		# Ensure minheap has more elements than maxheap
+        # Ensure minheap has more elements than maxheap
         if len(self.maxheap_smaller) > len(self.minheap_larger) + 1:
-            heapq.heappush(self.minheap_larger,
-						   -heapq.heappop(self.maxheap_smaller))
+            heapq.heappush(self.minheap_larger, -heapq.heappop(self.maxheap_smaller))
         elif len(self.maxheap_smaller) < len(self.minheap_larger):
-            heapq.heappush(self.maxheap_smaller,
-						   -heapq.heappop(self.minheap_larger))
+            heapq.heappush(self.maxheap_smaller, -heapq.heappop(self.minheap_larger))
 
     def get_median(self) -> float:
         if len(self.maxheap_smaller) == len(self.minheap_larger):
-			# Equal number of elements smaller and larger than "median":
-			# thus, there are two median values. The median would then become
-			# the average of both median values.
+            # Equal number of elements smaller and larger than "median":
+            # thus, there are two median values. The median would then become
+            # the average of both median values.
             return (-self.maxheap_smaller[0] + self.minheap_larger[0]) / 2.0
         else:
-			# Otherwise, median is always in minheap, as minheap is always
-			# bigger
+            # Otherwise, median is always in minheap, as minheap is always
+            # bigger
             return -self.maxheap_smaller[0]
 
 
 def aggregate_median(runner: str, benchmark: str, cutoff: str):
 
-	# Get all .csv benchmark samples for the requested runner + benchmark
-	def csv_samples() -> list[str]:
-		# TODO check that the path below is valid directory
-		cache_dir = Path(f"{common.PERF_RES_PATH}/{runner}/{benchmark}")
-		# TODO check for time range; What time range do I want?
-		return filter(lambda f: f.is_file() and
-						common.valid_timestamp(str(f)[-19:-4]) and str(f)[-19:-4] > cutoff,
-						cache_dir.glob(f"{benchmark}-*_*.csv"))
-	
-	# Calculate median of every desired metric:
-	aggregate_s = dict()
-	for sample_path in csv_samples():
-		with open(sample_path, 'r') as sample_file:
-			for s in csv.DictReader(sample_file):
-				test_case = s["TestCase"]
-				# Construct entry in aggregate_s for test case if it does not
-				# exist already:
-				if test_case not in aggregate_s:
-					aggregate_s[test_case] = \
-				 		{ metric: SimpleMedian() for metric in common.metrics_variance }
-
-				for metric in common.metrics_variance:
-					aggregate_s[test_case][metric].add(common.sanitize(s[metric]))
-
-	# Write calculated median (aggregate_s) as a new .csv file:
-	with open(f"{common.PERF_RES_PATH}/{runner}/{benchmark}/{benchmark}-median.csv", 'w') as output_csv:
-		writer = csv.DictWriter(output_csv,
-							    fieldnames=["TestCase", *common.metrics_variance.keys()])
-		writer.writeheader()
-		for test_case in aggregate_s:
-			writer.writerow({ "TestCase": test_case } | 
-				{ metric: aggregate_s[test_case][metric].get_median() 
-					for metric in common.metrics_variance })
-	
-		
+    # Get all .csv benchmark samples for the requested runner + benchmark
+    def csv_samples() -> list[str]:
+        # TODO check that the path below is valid directory
+        cache_dir = Path(f"{common.PERF_RES_PATH}/{runner}/{benchmark}")
+        # TODO check for time range; What time range do I want?
+        return filter(
+            lambda f: f.is_file()
+            and common.valid_timestamp(str(f)[-19:-4])
+            and str(f)[-19:-4] > cutoff,
+            cache_dir.glob(f"{benchmark}-*_*.csv"),
+        )
+
+    # Calculate median of every desired metric:
+    aggregate_s = dict()
+    for sample_path in csv_samples():
+        with open(sample_path, "r") as sample_file:
+            for s in csv.DictReader(sample_file):
+                test_case = s["TestCase"]
+                # Construct entry in aggregate_s for test case if it does not
+                # exist already:
+                if test_case not in aggregate_s:
+                    aggregate_s[test_case] = {
+                        metric: SimpleMedian() for metric in common.metrics_variance
+                    }
+
+                for metric in common.metrics_variance:
+                    aggregate_s[test_case][metric].add(common.sanitize(s[metric]))
+
+    # Write calculated median (aggregate_s) as a new .csv file:
+    with open(
+        f"{common.PERF_RES_PATH}/{runner}/{benchmark}/{benchmark}-median.csv", "w"
+    ) as output_csv:
+        writer = csv.DictWriter(
+            output_csv, fieldnames=["TestCase", *common.metrics_variance.keys()]
+        )
+        writer.writeheader()
+        for test_case in aggregate_s:
+            writer.writerow(
+                {"TestCase": test_case}
+                | {
+                    metric: aggregate_s[test_case][metric].get_median()
+                    for metric in common.metrics_variance
+                }
+            )
+
+
 if __name__ == "__main__":
-	if len(sys.argv) < 4:
-		print(f"Usage: {sys.argv[0]} <runner name> <test case name> <cutoff date YYYYMMDD_HHMMSS>")
-		exit(1)
-	if not common.valid_timestamp(sys.argv[3]):
-		print(sys.argv)
-		print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.")
-		exit(1)
-	common.load_configs()
-	#                <runner>,    <test case>, <cutoff>
-	aggregate_median(sys.argv[1], sys.argv[2], sys.argv[3])
+    if len(sys.argv) < 4:
+        print(
+            f"Usage: {sys.argv[0]} <runner name> <test case name> <cutoff date YYYYMMDD_HHMMSS>"
+        )
+        exit(1)
+    if not common.valid_timestamp(sys.argv[3]):
+        print(sys.argv)
+        print(f"Bad cutoff timestamp, please use YYYYMMDD_HHMMSS.")
+        exit(1)
+    common.load_configs()
+    #                <runner>,    <test case>, <cutoff>
+    aggregate_median(sys.argv[1], sys.argv[2], sys.argv[3])
diff --git a/devops/scripts/benchmarking/common.py b/devops/scripts/benchmarking/common.py
@@ -8,10 +8,10 @@
 
 
 def sanitize(stat: str) -> float:
-	# Get rid of %
-	if stat[-1] == '%':
-		stat = stat[:-1]
-	return float(stat)
+    # Get rid of %
+    if stat[-1] == '%':
+        stat = stat[:-1]
+    return float(stat)
 
 
 def load_configs():
@@ -26,11 +26,11 @@ def load_configs():
 
     global PERF_RES_PATH, metrics_variance, metrics_recorded
     global BENCHMARK_ERROR_LOG, BENCHMARK_SLOW_LOG
-    perf_res_re   = re.compile(r'^PERF_RES_PATH=(.*)$', re.M)
+    perf_res_re = re.compile(r'^PERF_RES_PATH=(.*)$', re.M)
     m_variance_re = re.compile(r'^METRICS_VARIANCE=(.*)$', re.M)
     m_recorded_re = re.compile(r'^METRICS_RECORDED=(.*)$', re.M)
-    b_slow_re     = re.compile(r'^BENCHMARK_SLOW_LOG=(.*)$', re.M)
-    b_error_re    = re.compile(r'^BENCHMARK_ERROR_LOG=(.*)$', re.M)
+    b_slow_re = re.compile(r'^BENCHMARK_SLOW_LOG=(.*)$', re.M)
+    b_error_re = re.compile(r'^BENCHMARK_ERROR_LOG=(.*)$', re.M)
 
     with open(benchmarking_ci_conf_path, 'r') as configs_file:
         configs_str = configs_file.read()
diff --git a/devops/scripts/benchmarking/compare.py b/devops/scripts/benchmarking/compare.py
@@ -3,57 +3,63 @@
 import sys
 from pathlib import Path
 
-import common 
+import common
+
 
 # TODO compare_to(metric) instead?
 def compare_to_median(runner: str, test_name: str, test_csv_path: str):
-	median_path = f"{common.PERF_RES_PATH}/{runner}/{test_name}/{test_name}-median.csv"
-
-	if not os.path.isfile(test_csv_path):
-		print("Invalid test file provided: " + test_csv_path)
-		exit(-1)
-	if not os.path.isfile(median_path):
-		print(f"Median file for test {test_name} not found at {median_path}.\n" +
-		      "Please build the median using the aggregate workflow.")
-		exit(-1)
-
-	median = dict()
-	with open(median_path, 'r') as median_csv:
-		for stat in csv.DictReader(median_csv):
-			median[stat["TestCase"]] = \
-					{ metric: float(stat[metric]) for metric in common.metrics_variance }
-
-	# TODO read status codes from a config file instead?
-	status = 0
-	failure_counts = { metric: 0 for metric in common.metrics_variance }
-	with open(test_csv_path, 'r') as sample_csv:
-		for sample in csv.DictReader(sample_csv):
-			# Ignore test cases we haven't profiled before
-			if sample["TestCase"] not in median:
-				continue
-			test_median = median[sample["TestCase"]]
-			for metric, threshold in common.metrics_variance.items():
-				max_tolerated = test_median[metric] * (1 + threshold)
-				if common.sanitize(sample[metric]) >  max_tolerated:
-					print("vvv FAILED vvv")
-					print(sample['TestCase'])
-					print(f"{metric}: {common.sanitize(sample[metric])} -- Historic avg. {test_median[metric]} (max tolerance {threshold*100}%: {max_tolerated})")
-					print("^^^^^^^^^^^^^^")
-					with open(common.BENCHMARK_SLOW_LOG, 'a') as slow_log:
-						slow_log.write(
-							f"-- {test_name}::{sample['TestCase']}\n"
-							f"   {metric}: {common.sanitize(sample[metric])} -- Historic avg. {test_median[metric]} (max tol. {threshold*100}%: {max_tolerated})\n"
-						)
-					status = 1
-					failure_counts[metric] += 1
-	if status != 0:
-		print(f"Failure counts: {failure_counts}")
-	return status
+    median_path = f"{common.PERF_RES_PATH}/{runner}/{test_name}/{test_name}-median.csv"
+
+    if not os.path.isfile(test_csv_path):
+        print("Invalid test file provided: " + test_csv_path)
+        exit(-1)
+    if not os.path.isfile(median_path):
+        print(
+            f"Median file for test {test_name} not found at {median_path}.\n"
+            + "Please build the median using the aggregate workflow."
+        )
+        exit(-1)
+
+    median = dict()
+    with open(median_path, "r") as median_csv:
+        for stat in csv.DictReader(median_csv):
+            median[stat["TestCase"]] = {
+                metric: float(stat[metric]) for metric in common.metrics_variance
+            }
+
+    # TODO read status codes from a config file instead?
+    status = 0
+    failure_counts = {metric: 0 for metric in common.metrics_variance}
+    with open(test_csv_path, "r") as sample_csv:
+        for sample in csv.DictReader(sample_csv):
+            # Ignore test cases we haven't profiled before
+            if sample["TestCase"] not in median:
+                continue
+            test_median = median[sample["TestCase"]]
+            for metric, threshold in common.metrics_variance.items():
+                max_tolerated = test_median[metric] * (1 + threshold)
+                if common.sanitize(sample[metric]) > max_tolerated:
+                    print("vvv FAILED vvv")
+                    print(sample["TestCase"])
+                    print(
+                        f"{metric}: {common.sanitize(sample[metric])} -- Historic avg. {test_median[metric]} (max tolerance {threshold*100}%: {max_tolerated})"
+                    )
+                    print("^^^^^^^^^^^^^^")
+                    with open(common.BENCHMARK_SLOW_LOG, "a") as slow_log:
+                        slow_log.write(
+                            f"-- {test_name}::{sample['TestCase']}\n"
+                            f"   {metric}: {common.sanitize(sample[metric])} -- Historic avg. {test_median[metric]} (max tol. {threshold*100}%: {max_tolerated})\n"
+                        )
+                    status = 1
+                    failure_counts[metric] += 1
+    if status != 0:
+        print(f"Failure counts: {failure_counts}")
+    return status
 
 
 if __name__ == "__main__":
-	if len(sys.argv) < 4:
-		print(f"Usage: {sys.argv[0]} <runner name> <test name> <test csv path>")
-		exit(-1)
-	common.load_configs()
-	exit(compare_to_median(sys.argv[1], sys.argv[2], sys.argv[3]))
+    if len(sys.argv) < 4:
+        print(f"Usage: {sys.argv[0]} <runner name> <test name> <test csv path>")
+        exit(-1)
+    common.load_configs()
+    exit(compare_to_median(sys.argv[1], sys.argv[2], sys.argv[3]))