From 0f988ffd88e90f4b786ab16076dcc7d3b36e06a3 Mon Sep 17 00:00:00 2001 From: RohithReddy Nedhunuri Date: Thu, 11 Sep 2025 20:27:59 +0000 Subject: [PATCH 1/2] Add --columns parameter to knnPerfTest.py for custom output selection --- gradle/knn.gradle | 7 ++++++- src/python/knnPerfTest.py | 25 +++++++++++++++++++------ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/gradle/knn.gradle b/gradle/knn.gradle index 745f6660..10ad7497 100644 --- a/gradle/knn.gradle +++ b/gradle/knn.gradle @@ -49,7 +49,12 @@ task runKnnPerfTest (type: Exec) { doFirst { def runs = project.hasProperty('runs') ? project.property('runs') : '1' - commandLine 'python3', 'src/python/knnPerfTest.py', '--runs', runs + def columns = project.hasProperty('columns') ? project.property('columns') : null + def cmdArgs = ['python3', 'src/python/knnPerfTest.py', '--runs', runs] + if (columns != null) { + cmdArgs += ['--columns', columns] + } + commandLine cmdArgs } } diff --git a/src/python/knnPerfTest.py b/src/python/knnPerfTest.py index 2beee34f..89981446 100644 --- a/src/python/knnPerfTest.py +++ b/src/python/knnPerfTest.py @@ -135,8 +135,16 @@ def advance(ix, values): return True return False +def get_skip_headers_from_columns(selected_columns): + if not selected_columns: + return set() -def run_knn_benchmark(checkout, values): + selected_set = set(col.strip() for col in selected_columns.split(',')) + all_headers = set(OUTPUT_HEADERS) + return all_headers - selected_set + + +def run_knn_benchmark(checkout, values, selected_columns=None): indexes = [0] * len(values.keys()) indexes[-1] = -1 args = [] @@ -278,7 +286,11 @@ def run_knn_benchmark(checkout, values): # TODO: be more careful when we skip/show headers e.g. if some of the runs involve filtering, # turn filterType/selectivity back on for all runs # skip_headers = {'selectivity', 'filterType', 'visited'} - skip_headers = {"selectivity", "filterType", "visited"} + if selected_columns: + skip_headers = get_skip_headers_from_columns(selected_columns) + skip_headers.update({"selectivity", "filterType", "visited"}) + else: + skip_headers = {"selectivity", "filterType", "visited"} if "-forceMerge" not in this_cmd: skip_headers.add("force_merge(s)") @@ -431,11 +443,11 @@ def chart_args_label(args): return str(args) -def run_n_knn_benchmarks(LUCENE_CHECKOUT, PARAMS, n): +def run_n_knn_benchmarks(LUCENE_CHECKOUT, PARAMS, n, selected_columns=None): rec, lat, net, avg = [], [], [], [] tests = [] for i in range(n): - results, skip_headers = run_knn_benchmark(LUCENE_CHECKOUT, PARAMS) + results, skip_headers = run_knn_benchmark(LUCENE_CHECKOUT, PARAMS, selected_columns) tests.append(results) first_4_numbers = results[0][0].split("\t")[:4] first_4_numbers = [float(num) for num in first_4_numbers] @@ -475,11 +487,12 @@ def run_n_knn_benchmarks(LUCENE_CHECKOUT, PARAMS, n): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run KNN benchmarks") parser.add_argument("--runs", type=int, default=1, help="Number of times to run the benchmark (default: 1)") + parser.add_argument("--columns", help="Comma-separated list of columns to display (default: all)") n = parser.parse_args() # Where the version of Lucene is that will be tested. Now this will be sourced from gradle.properties LUCENE_CHECKOUT = getLuceneDirFromGradleProperties() if n.runs == 1: - run_knn_benchmark(LUCENE_CHECKOUT, PARAMS) + run_knn_benchmark(LUCENE_CHECKOUT, PARAMS, n.columns) else: - run_n_knn_benchmarks(LUCENE_CHECKOUT, PARAMS, n.runs) + run_n_knn_benchmarks(LUCENE_CHECKOUT, PARAMS, n.runs, n.columns) From 26c37f74a566c58a0d8264994bedf8ca8f36e7f0 Mon Sep 17 00:00:00 2001 From: RohithReddy Nedhunuri Date: Thu, 18 Sep 2025 01:40:45 +0000 Subject: [PATCH 2/2] Add --columns parameter to knnPerfTest.py for custom output selection --- src/python/knnPerfTest.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/python/knnPerfTest.py b/src/python/knnPerfTest.py index 89981446..e15a3514 100644 --- a/src/python/knnPerfTest.py +++ b/src/python/knnPerfTest.py @@ -135,11 +135,12 @@ def advance(ix, values): return True return False + def get_skip_headers_from_columns(selected_columns): if not selected_columns: return set() - selected_set = set(col.strip() for col in selected_columns.split(',')) + selected_set = set(col.strip() for col in selected_columns.split(",")) all_headers = set(OUTPUT_HEADERS) return all_headers - selected_set @@ -487,7 +488,8 @@ def run_n_knn_benchmarks(LUCENE_CHECKOUT, PARAMS, n, selected_columns=None): if __name__ == "__main__": parser = argparse.ArgumentParser(description="Run KNN benchmarks") parser.add_argument("--runs", type=int, default=1, help="Number of times to run the benchmark (default: 1)") - parser.add_argument("--columns", help="Comma-separated list of columns to display (default: all)") + available_columns = ", ".join(OUTPUT_HEADERS) + parser.add_argument("--columns", help=f"Comma-separated list of columns to display. Available columns: {available_columns} (default: all)") n = parser.parse_args() # Where the version of Lucene is that will be tested. Now this will be sourced from gradle.properties