@@ -84,43 +84,36 @@ def collect_benchmark_result(
84
84
path = pathlib .Path (benchmark_path )
85
85
try :
86
86
results_dict : Dict [str , List [Union [int , float , None ]]] = {}
87
- bytes_files = sorted (path .rglob ("*.bytesprocessed" ))
88
- millis_files = sorted (path .rglob ("*.slotmillis" ))
89
- bq_seconds_files = sorted (path .rglob ("*.bq_exec_time_seconds" ))
87
+ # Use local_seconds_files as the baseline
90
88
local_seconds_files = sorted (path .rglob ("*.local_exec_time_seconds" ))
91
- query_char_count_files = sorted (path .rglob ("*.query_char_count" ))
92
-
93
89
error_files = sorted (path .rglob ("*.error" ))
94
-
95
- if not (
96
- len (millis_files )
97
- == len (bq_seconds_files )
98
- <= len (bytes_files )
99
- == len (query_char_count_files )
100
- == len (local_seconds_files )
101
- ):
102
- raise ValueError (
103
- "Mismatch in the number of report files for bytes, millis, seconds and query char count: \n "
104
- f"millis_files: { len (millis_files )} \n "
105
- f"bq_seconds_files: { len (bq_seconds_files )} \n "
106
- f"bytes_files: { len (bytes_files )} \n "
107
- f"query_char_count_files: { len (query_char_count_files )} \n "
108
- f"local_seconds_files: { len (local_seconds_files )} \n "
109
- )
110
-
111
- has_full_metrics = len (bq_seconds_files ) == len (local_seconds_files )
112
-
113
- for idx in range (len (local_seconds_files )):
114
- query_char_count_file = query_char_count_files [idx ]
115
- local_seconds_file = local_seconds_files [idx ]
116
- bytes_file = bytes_files [idx ]
117
- filename = query_char_count_file .relative_to (path ).with_suffix ("" )
118
- if filename != local_seconds_file .relative_to (path ).with_suffix (
119
- ""
120
- ) or filename != bytes_file .relative_to (path ).with_suffix ("" ):
121
- raise ValueError (
122
- "File name mismatch among query_char_count, bytes and seconds reports."
123
- )
90
+ benchmarks_with_missing_files = []
91
+
92
+ for local_seconds_file in local_seconds_files :
93
+ base_name = local_seconds_file .name .removesuffix (".local_exec_time_seconds" )
94
+ base_path = local_seconds_file .parent / base_name
95
+ filename = base_path .relative_to (path )
96
+
97
+ # Construct paths for other metric files
98
+ bytes_file = pathlib .Path (f"{ base_path } .bytesprocessed" )
99
+ millis_file = pathlib .Path (f"{ base_path } .slotmillis" )
100
+ bq_seconds_file = pathlib .Path (f"{ base_path } .bq_exec_time_seconds" )
101
+ query_char_count_file = pathlib .Path (f"{ base_path } .query_char_count" )
102
+
103
+ # Check if all corresponding files exist
104
+ missing_files = []
105
+ if not bytes_file .exists ():
106
+ missing_files .append (bytes_file .name )
107
+ if not millis_file .exists ():
108
+ missing_files .append (millis_file .name )
109
+ if not bq_seconds_file .exists ():
110
+ missing_files .append (bq_seconds_file .name )
111
+ if not query_char_count_file .exists ():
112
+ missing_files .append (query_char_count_file .name )
113
+
114
+ if missing_files :
115
+ benchmarks_with_missing_files .append ((str (filename ), missing_files ))
116
+ continue
124
117
125
118
with open (query_char_count_file , "r" ) as file :
126
119
lines = file .read ().splitlines ()
@@ -135,26 +128,13 @@ def collect_benchmark_result(
135
128
lines = file .read ().splitlines ()
136
129
total_bytes = sum (int (line ) for line in lines ) / iterations
137
130
138
- if not has_full_metrics :
139
- total_slot_millis = None
140
- bq_seconds = None
141
- else :
142
- millis_file = millis_files [idx ]
143
- bq_seconds_file = bq_seconds_files [idx ]
144
- if filename != millis_file .relative_to (path ).with_suffix (
145
- ""
146
- ) or filename != bq_seconds_file .relative_to (path ).with_suffix ("" ):
147
- raise ValueError (
148
- "File name mismatch among query_char_count, bytes, millis, and seconds reports."
149
- )
150
-
151
- with open (millis_file , "r" ) as file :
152
- lines = file .read ().splitlines ()
153
- total_slot_millis = sum (int (line ) for line in lines ) / iterations
131
+ with open (millis_file , "r" ) as file :
132
+ lines = file .read ().splitlines ()
133
+ total_slot_millis = sum (int (line ) for line in lines ) / iterations
154
134
155
- with open (bq_seconds_file , "r" ) as file :
156
- lines = file .read ().splitlines ()
157
- bq_seconds = sum (float (line ) for line in lines ) / iterations
135
+ with open (bq_seconds_file , "r" ) as file :
136
+ lines = file .read ().splitlines ()
137
+ bq_seconds = sum (float (line ) for line in lines ) / iterations
158
138
159
139
results_dict [str (filename )] = [
160
140
query_count ,
@@ -207,13 +187,9 @@ def collect_benchmark_result(
207
187
f"{ index } - query count: { row ['Query_Count' ]} ,"
208
188
+ f" query char count: { row ['Query_Char_Count' ]} ,"
209
189
+ f" bytes processed sum: { row ['Bytes_Processed' ]} ,"
210
- + (f" slot millis sum: { row ['Slot_Millis' ]} ," if has_full_metrics else "" )
211
- + f" local execution time: { formatted_local_exec_time } seconds"
212
- + (
213
- f", bigquery execution time: { round (row ['BigQuery_Execution_Time_Sec' ], 1 )} seconds"
214
- if has_full_metrics
215
- else ""
216
- )
190
+ + f" slot millis sum: { row ['Slot_Millis' ]} ,"
191
+ + f" local execution time: { formatted_local_exec_time } "
192
+ + f", bigquery execution time: { round (row ['BigQuery_Execution_Time_Sec' ], 1 )} seconds"
217
193
)
218
194
219
195
geometric_mean_queries = geometric_mean_excluding_zeros (
@@ -239,30 +215,26 @@ def collect_benchmark_result(
239
215
f"---Geometric mean of queries: { geometric_mean_queries } ,"
240
216
+ f" Geometric mean of queries char counts: { geometric_mean_query_char_count } ,"
241
217
+ f" Geometric mean of bytes processed: { geometric_mean_bytes } ,"
242
- + (
243
- f" Geometric mean of slot millis: { geometric_mean_slot_millis } ,"
244
- if has_full_metrics
245
- else ""
246
- )
218
+ + f" Geometric mean of slot millis: { geometric_mean_slot_millis } ,"
247
219
+ f" Geometric mean of local execution time: { geometric_mean_local_seconds } seconds"
248
- + (
249
- f", Geometric mean of BigQuery execution time: { geometric_mean_bq_seconds } seconds---"
250
- if has_full_metrics
251
- else ""
252
- )
220
+ + f", Geometric mean of BigQuery execution time: { geometric_mean_bq_seconds } seconds---"
253
221
)
254
222
255
- error_message = (
256
- "\n "
257
- + "\n " .join (
258
- [
259
- f"Failed: { error_file .relative_to (path ).with_suffix ('' )} "
260
- for error_file in error_files
261
- ]
223
+ all_errors : List [str ] = []
224
+ if error_files :
225
+ all_errors .extend (
226
+ f"Failed: { error_file .relative_to (path ).with_suffix ('' )} "
227
+ for error_file in error_files
262
228
)
263
- if error_files
264
- else None
265
- )
229
+ if (
230
+ benchmarks_with_missing_files
231
+ and os .getenv ("BENCHMARK_AND_PUBLISH" , "false" ) == "true"
232
+ ):
233
+ all_errors .extend (
234
+ f"Missing files for benchmark '{ name } ': { files } "
235
+ for name , files in benchmarks_with_missing_files
236
+ )
237
+ error_message = "\n " + "\n " .join (all_errors ) if all_errors else None
266
238
return (
267
239
benchmark_metrics .reset_index ().rename (columns = {"index" : "Benchmark_Name" }),
268
240
error_message ,
0 commit comments