Skip to content

Commit 209d0d4

Browse files
authored
chore: add bq execution time to benchmark (#2033)
* chore: update benchmark metrics * fix metric calculation
1 parent 5b8bdec commit 209d0d4

File tree

3 files changed

+60
-83
lines changed

3 files changed

+60
-83
lines changed

bigframes/session/metrics.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,12 +45,17 @@ def count_job_stats(
4545
bytes_processed = getattr(row_iterator, "total_bytes_processed", 0) or 0
4646
query_char_count = len(getattr(row_iterator, "query", "") or "")
4747
slot_millis = getattr(row_iterator, "slot_millis", 0) or 0
48-
exec_seconds = 0.0
48+
created = getattr(row_iterator, "created", None)
49+
ended = getattr(row_iterator, "ended", None)
50+
exec_seconds = (
51+
(ended - created).total_seconds() if created and ended else 0.0
52+
)
4953

5054
self.execution_count += 1
5155
self.query_char_count += query_char_count
5256
self.bytes_processed += bytes_processed
5357
self.slot_millis += slot_millis
58+
self.execution_secs += exec_seconds
5459

5560
elif query_job.configuration.dry_run:
5661
query_char_count = len(query_job.query)

scripts/run_and_publish_benchmark.py

Lines changed: 53 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -84,43 +84,36 @@ def collect_benchmark_result(
8484
path = pathlib.Path(benchmark_path)
8585
try:
8686
results_dict: Dict[str, List[Union[int, float, None]]] = {}
87-
bytes_files = sorted(path.rglob("*.bytesprocessed"))
88-
millis_files = sorted(path.rglob("*.slotmillis"))
89-
bq_seconds_files = sorted(path.rglob("*.bq_exec_time_seconds"))
87+
# Use local_seconds_files as the baseline
9088
local_seconds_files = sorted(path.rglob("*.local_exec_time_seconds"))
91-
query_char_count_files = sorted(path.rglob("*.query_char_count"))
92-
9389
error_files = sorted(path.rglob("*.error"))
94-
95-
if not (
96-
len(millis_files)
97-
== len(bq_seconds_files)
98-
<= len(bytes_files)
99-
== len(query_char_count_files)
100-
== len(local_seconds_files)
101-
):
102-
raise ValueError(
103-
"Mismatch in the number of report files for bytes, millis, seconds and query char count: \n"
104-
f"millis_files: {len(millis_files)}\n"
105-
f"bq_seconds_files: {len(bq_seconds_files)}\n"
106-
f"bytes_files: {len(bytes_files)}\n"
107-
f"query_char_count_files: {len(query_char_count_files)}\n"
108-
f"local_seconds_files: {len(local_seconds_files)}\n"
109-
)
110-
111-
has_full_metrics = len(bq_seconds_files) == len(local_seconds_files)
112-
113-
for idx in range(len(local_seconds_files)):
114-
query_char_count_file = query_char_count_files[idx]
115-
local_seconds_file = local_seconds_files[idx]
116-
bytes_file = bytes_files[idx]
117-
filename = query_char_count_file.relative_to(path).with_suffix("")
118-
if filename != local_seconds_file.relative_to(path).with_suffix(
119-
""
120-
) or filename != bytes_file.relative_to(path).with_suffix(""):
121-
raise ValueError(
122-
"File name mismatch among query_char_count, bytes and seconds reports."
123-
)
90+
benchmarks_with_missing_files = []
91+
92+
for local_seconds_file in local_seconds_files:
93+
base_name = local_seconds_file.name.removesuffix(".local_exec_time_seconds")
94+
base_path = local_seconds_file.parent / base_name
95+
filename = base_path.relative_to(path)
96+
97+
# Construct paths for other metric files
98+
bytes_file = pathlib.Path(f"{base_path}.bytesprocessed")
99+
millis_file = pathlib.Path(f"{base_path}.slotmillis")
100+
bq_seconds_file = pathlib.Path(f"{base_path}.bq_exec_time_seconds")
101+
query_char_count_file = pathlib.Path(f"{base_path}.query_char_count")
102+
103+
# Check if all corresponding files exist
104+
missing_files = []
105+
if not bytes_file.exists():
106+
missing_files.append(bytes_file.name)
107+
if not millis_file.exists():
108+
missing_files.append(millis_file.name)
109+
if not bq_seconds_file.exists():
110+
missing_files.append(bq_seconds_file.name)
111+
if not query_char_count_file.exists():
112+
missing_files.append(query_char_count_file.name)
113+
114+
if missing_files:
115+
benchmarks_with_missing_files.append((str(filename), missing_files))
116+
continue
124117

125118
with open(query_char_count_file, "r") as file:
126119
lines = file.read().splitlines()
@@ -135,26 +128,13 @@ def collect_benchmark_result(
135128
lines = file.read().splitlines()
136129
total_bytes = sum(int(line) for line in lines) / iterations
137130

138-
if not has_full_metrics:
139-
total_slot_millis = None
140-
bq_seconds = None
141-
else:
142-
millis_file = millis_files[idx]
143-
bq_seconds_file = bq_seconds_files[idx]
144-
if filename != millis_file.relative_to(path).with_suffix(
145-
""
146-
) or filename != bq_seconds_file.relative_to(path).with_suffix(""):
147-
raise ValueError(
148-
"File name mismatch among query_char_count, bytes, millis, and seconds reports."
149-
)
150-
151-
with open(millis_file, "r") as file:
152-
lines = file.read().splitlines()
153-
total_slot_millis = sum(int(line) for line in lines) / iterations
131+
with open(millis_file, "r") as file:
132+
lines = file.read().splitlines()
133+
total_slot_millis = sum(int(line) for line in lines) / iterations
154134

155-
with open(bq_seconds_file, "r") as file:
156-
lines = file.read().splitlines()
157-
bq_seconds = sum(float(line) for line in lines) / iterations
135+
with open(bq_seconds_file, "r") as file:
136+
lines = file.read().splitlines()
137+
bq_seconds = sum(float(line) for line in lines) / iterations
158138

159139
results_dict[str(filename)] = [
160140
query_count,
@@ -207,13 +187,9 @@ def collect_benchmark_result(
207187
f"{index} - query count: {row['Query_Count']},"
208188
+ f" query char count: {row['Query_Char_Count']},"
209189
+ f" bytes processed sum: {row['Bytes_Processed']},"
210-
+ (f" slot millis sum: {row['Slot_Millis']}," if has_full_metrics else "")
211-
+ f" local execution time: {formatted_local_exec_time} seconds"
212-
+ (
213-
f", bigquery execution time: {round(row['BigQuery_Execution_Time_Sec'], 1)} seconds"
214-
if has_full_metrics
215-
else ""
216-
)
190+
+ f" slot millis sum: {row['Slot_Millis']},"
191+
+ f" local execution time: {formatted_local_exec_time}"
192+
+ f", bigquery execution time: {round(row['BigQuery_Execution_Time_Sec'], 1)} seconds"
217193
)
218194

219195
geometric_mean_queries = geometric_mean_excluding_zeros(
@@ -239,30 +215,26 @@ def collect_benchmark_result(
239215
f"---Geometric mean of queries: {geometric_mean_queries},"
240216
+ f" Geometric mean of queries char counts: {geometric_mean_query_char_count},"
241217
+ f" Geometric mean of bytes processed: {geometric_mean_bytes},"
242-
+ (
243-
f" Geometric mean of slot millis: {geometric_mean_slot_millis},"
244-
if has_full_metrics
245-
else ""
246-
)
218+
+ f" Geometric mean of slot millis: {geometric_mean_slot_millis},"
247219
+ f" Geometric mean of local execution time: {geometric_mean_local_seconds} seconds"
248-
+ (
249-
f", Geometric mean of BigQuery execution time: {geometric_mean_bq_seconds} seconds---"
250-
if has_full_metrics
251-
else ""
252-
)
220+
+ f", Geometric mean of BigQuery execution time: {geometric_mean_bq_seconds} seconds---"
253221
)
254222

255-
error_message = (
256-
"\n"
257-
+ "\n".join(
258-
[
259-
f"Failed: {error_file.relative_to(path).with_suffix('')}"
260-
for error_file in error_files
261-
]
223+
all_errors: List[str] = []
224+
if error_files:
225+
all_errors.extend(
226+
f"Failed: {error_file.relative_to(path).with_suffix('')}"
227+
for error_file in error_files
262228
)
263-
if error_files
264-
else None
265-
)
229+
if (
230+
benchmarks_with_missing_files
231+
and os.getenv("BENCHMARK_AND_PUBLISH", "false") == "true"
232+
):
233+
all_errors.extend(
234+
f"Missing files for benchmark '{name}': {files}"
235+
for name, files in benchmarks_with_missing_files
236+
)
237+
error_message = "\n" + "\n".join(all_errors) if all_errors else None
266238
return (
267239
benchmark_metrics.reset_index().rename(columns={"index": "Benchmark_Name"}),
268240
error_message,

testing/constraints-3.11.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@ google-auth==2.38.0
152152
google-auth-httplib2==0.2.0
153153
google-auth-oauthlib==1.2.2
154154
google-cloud-aiplatform==1.106.0
155-
google-cloud-bigquery==3.35.1
155+
google-cloud-bigquery==3.36.0
156156
google-cloud-bigquery-connection==1.18.3
157157
google-cloud-bigquery-storage==2.32.0
158158
google-cloud-core==2.4.3

0 commit comments

Comments
 (0)