Skip to content

Commit b194de5

Browse files
committed
Add configurable block_size to write microbenchmarks and fix parallel test race conditions
1 parent 890a6fa commit b194de5

File tree

6 files changed

+33
-11
lines changed

6 files changed

+33
-11
lines changed

gcsfs/tests/conftest.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -330,12 +330,13 @@ def _create_extended_gcsfs(gcs_factory, buckets_to_delete, populate_bucket, **kw
330330
extended_gcsfs = gcs_factory(**kwargs)
331331
# Only create/delete/populate the bucket if we are NOT using the real GCS endpoint.
332332
if not is_real_gcs:
333-
try:
334-
extended_gcsfs.rm(TEST_ZONAL_BUCKET, recursive=True)
335-
except FileNotFoundError:
336-
pass
337-
extended_gcsfs.mkdir(TEST_ZONAL_BUCKET)
338-
buckets_to_delete.add(TEST_ZONAL_BUCKET)
333+
if not extended_gcsfs.exists(TEST_ZONAL_BUCKET):
334+
extended_gcsfs.mkdir(TEST_ZONAL_BUCKET)
335+
buckets_to_delete.add(TEST_ZONAL_BUCKET)
336+
337+
if not extended_gcsfs.exists(TEST_BUCKET):
338+
extended_gcsfs.mkdir(TEST_BUCKET)
339+
buckets_to_delete.add(TEST_BUCKET)
339340
try:
340341
if populate_bucket:
341342
# To avoid hitting object mutation limits, only pipe files if they

gcsfs/tests/perf/microbenchmarks/conftest.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,7 @@ def gcsfs_benchmark_write(extended_gcs_factory, request):
136136
params,
137137
"benchmark-write",
138138
create_files=False,
139+
gcs_kwargs={"block_size": params.block_size_bytes},
139140
)
140141

141142

gcsfs/tests/perf/microbenchmarks/test_configs.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ def test_write_configurator(mock_config_dependencies):
107107
common = {
108108
"bucket_types": ["regional"],
109109
"chunk_sizes_mb": [10],
110+
"block_sizes_mb": [32],
110111
"rounds": 1,
111112
"runtime": 30,
112113
}
@@ -117,9 +118,13 @@ def test_write_configurator(mock_config_dependencies):
117118

118119
assert len(cases) == 1
119120
case = cases[0]
120-
assert case.name == "write_test_2procs_1threads_10MB_chunk_regional_30s_duration"
121+
assert (
122+
case.name
123+
== "write_test_2procs_1threads_10MB_chunk_32MB_block_regional_30s_duration"
124+
)
121125
assert case.file_size_bytes == 0
122126
assert case.chunk_size_bytes == 10 * MB
127+
assert case.block_size_bytes == 32 * MB
123128
assert case.processes == 2
124129
assert case.files == 2 # threads * processes
125130

gcsfs/tests/perf/microbenchmarks/write_fixed_duration/configs.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,22 +12,32 @@ def build_cases(self, scenario, common_config):
1212
threads_list = scenario.get("threads", [1])
1313
bucket_types = common_config.get("bucket_types", ["regional"])
1414
chunk_sizes_mb = common_config.get("chunk_sizes_mb", [64, 100])
15+
block_sizes_mb = common_config.get("block_sizes_mb", [16])
16+
scenario_block_sizes_mb = scenario.get("block_sizes_mb")
17+
if scenario_block_sizes_mb:
18+
block_sizes_mb = scenario_block_sizes_mb
1519
runtime = common_config.get("runtime", 30)
1620
rounds = common_config.get("rounds", 1)
1721

1822
cases = []
1923
param_combinations = itertools.product(
20-
procs_list, threads_list, chunk_sizes_mb, bucket_types
24+
procs_list, threads_list, chunk_sizes_mb, block_sizes_mb, bucket_types
2125
)
2226

23-
for procs, threads, chunk_size_mb, bucket_type in param_combinations:
27+
for (
28+
procs,
29+
threads,
30+
chunk_size_mb,
31+
block_size_mb,
32+
bucket_type,
33+
) in param_combinations:
2434
bucket_name = self.get_bucket_name(bucket_type)
2535
if not bucket_name:
2636
continue
2737

2838
name = (
2939
f"{scenario['name']}_{procs}procs_{threads}threads_"
30-
f"{chunk_size_mb}MB_chunk_{bucket_type}_{runtime}s_duration"
40+
f"{chunk_size_mb}MB_chunk_{block_size_mb}MB_block_{bucket_type}_{runtime}s_duration"
3141
)
3242

3343
params = WriteFixedDurationBenchmarkParameters(
@@ -38,7 +48,8 @@ def build_cases(self, scenario, common_config):
3848
processes=procs,
3949
files=threads * procs,
4050
rounds=rounds,
41-
chunk_size_bytes=chunk_size_mb * MB,
51+
chunk_size_bytes=int(chunk_size_mb * MB),
52+
block_size_bytes=int(block_size_mb * MB),
4253
file_size_bytes=0,
4354
runtime=runtime,
4455
)

gcsfs/tests/perf/microbenchmarks/write_fixed_duration/parameters.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,5 +9,8 @@ class WriteFixedDurationBenchmarkParameters(IOBenchmarkParameters):
99
Defines the parameters for a write benchmark test cases with runtime.
1010
"""
1111

12+
# The block size for gcsfs file buffering.
13+
block_size_bytes: int
14+
1215
# Time in seconds the test should run.
1316
runtime: int

gcsfs/tests/perf/microbenchmarks/write_fixed_duration/test_write.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,5 +127,6 @@ def args_builder(gcs_instance, i, shared_arr):
127127
worker_target=_process_worker_fixed_duration,
128128
args_builder=args_builder,
129129
benchmark_group=BENCHMARK_GROUP,
130+
gcs_kwargs={"block_size": params.block_size_bytes},
130131
request=request,
131132
)

0 commit comments

Comments
 (0)