Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion gcsfs/tests/perf/microbenchmarks/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def _benchmark_listing_fixture_helper(

levels[d] = current_level_folders

# Create empyt folders first if specified
# Create empty folders first if specified
if create_folders:
logging.info(
f"Setting up benchmark '{params.name}': creating {len(target_dirs)} "
Expand Down
6 changes: 3 additions & 3 deletions gcsfs/tests/perf/microbenchmarks/delete/configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,15 @@ common:

scenarios:
- name: "delete_flat"
folders: [256]
folders: [1024, 2048, 4096]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Instead of updating the folders i'd suggest create a new scenario with these options. This will impact the daily runs as it will take long time to create these as part of setup. So if you really want to run a daily trigger that compares large number of folders, better create different scenarios and trigger pointing to these scenarios.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just increasing the number of folders won't actually increase the setup time as we are not making explicit calls to create folders using mkdir but they are implicitly getting created during file creation.

But as we are increasing the scenarios from one (256) to three (1024, 2048, 4096), more scenarios will run now and hence delete benchmarks would take more time. However, I suggest keeping them because delete benchmark's latency has significant contribution from the number of folders, and we only observe latency differences in HNS and standard buckets at 2k and 4k folders.

files: [65536, 131072]

- name: "delete_recursive"
depth: 8
folders: [256]
folders: [1024, 2048, 4096]
files: [65536, 131072]

- name: "delete_recursive_deep"
depth: 24
folders: [256]
folders: [1024, 2048, 4096]
files: [65536, 131072]
15 changes: 12 additions & 3 deletions gcsfs/tests/perf/microbenchmarks/info/configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,22 @@ class InfoConfigurator(ListingConfigurator):
param_class = InfoBenchmarkParameters

def _get_folders_list(self, scenario, common_config):
return common_config.get("folders", [1])
return scenario.get("folders", [1])

def _get_files_list(self, scenario, common_config):
return common_config.get("files", [1])
return scenario.get("files", [1])

def _get_extra_iterables(self, scenario, common_config):
return [scenario.get("target_types", ["bucket", "folder", "file"])]
target_types = scenario.get("target_type")
if target_types:
if isinstance(target_types, str):
target_types = [target_types]
else:
target_types = scenario.get("target_types", ["bucket", "folder", "file"])

return [
target_types,
]

def _create_case_name(
self,
Expand Down
62 changes: 54 additions & 8 deletions gcsfs/tests/perf/microbenchmarks/info/configs.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,26 +3,72 @@ common:
- "regional"
- "zonal"
- "hns"
files:
- 100
folders:
- 100

scenarios:
- name: "info"
- name: "info_file"
pattern: "info"
depth: 1
target_type: "file"
files: [10000]
folders: [256]

- name: "info_deep"
- name: "info_folder"
pattern: "info"
depth: 1
target_type: "folder"
files: [65536] # list_objects performance can be measured for standard buckets with high number of files
folders: [256]

- name: "info_bucket"
pattern: "info"
depth: 1
target_type: "bucket"
files: [1]
folders: [1]

- name: "info_deep_file"
pattern: "info"
depth: 10
target_type: "file"
files: [10000]
folders: [256]

- name: "info_deep_folder"
pattern: "info"
depth: 10
target_type: "folder"
files: [65536]
folders: [256]

- name: "info_multi_process"
- name: "info_multi_process_file"
pattern: "info"
depth: 1
target_type: "file"
files: [10000]
folders: [256]
processes: [4, 8]

- name: "info_multi_process_folder"
pattern: "info"
depth: 1
target_type: "folder"
files: [65536]
folders: [256]
processes: [4, 8]


- name: "info_multi_process_deep_file"
pattern: "info"
depth: 10
target_type: "file"
files: [10000]
folders: [256]
processes: [4, 8]

- name: "info_multi_process_deep"
- name: "info_multi_process_deep_folder"
pattern: "info"
depth: 10
target_type: "folder"
files: [65536]
folders: [256]
processes: [4, 8]
12 changes: 7 additions & 5 deletions gcsfs/tests/perf/microbenchmarks/info/test_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def _info_op(gcs, path, pattern="info"):
except FileNotFoundError:
pass
duration_ms = (time.perf_counter() - start_time) * 1000
logging.info(f"{pattern.upper()} : {path} - {duration_ms:.2f} ms.")
logging.debug(f"{pattern.upper()} : {path} - {duration_ms:.2f} ms.")


def _info_ops(gcs, paths, pattern="info"):
Expand Down Expand Up @@ -71,12 +71,15 @@ def test_info_multi_threaded(benchmark, gcsfs_benchmark_info, monitor):

paths = _get_target_paths(target_dirs, file_paths, params)

chunks = _chunk_list(paths, params.threads)
args_list = [(gcs, chunks[i], params.pattern) for i in range(params.threads)]

run_multi_threaded(
benchmark,
monitor,
params,
_info_ops,
(gcs, paths, params.pattern),
args_list,
BENCHMARK_GROUP,
)

Expand Down Expand Up @@ -123,9 +126,8 @@ def test_info_multi_process(
):
gcs, target_dirs, file_paths, prefix, params = gcsfs_benchmark_info

chunks = _chunk_list(
_get_target_paths(target_dirs, file_paths, params), params.processes
)
paths = _get_target_paths(target_dirs, file_paths, params)
chunks = _chunk_list(paths, params.processes)

def args_builder(gcs_instance, i, shared_arr):
return (
Expand Down
2 changes: 0 additions & 2 deletions gcsfs/tests/perf/microbenchmarks/rename/test_rename.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,5 +41,3 @@ def test_rename_recursive(benchmark, gcsfs_benchmark_rename, monitor):
(gcs, prefix, prefix_renamed),
BENCHMARK_GROUP,
)
# Adding a sleep of 60 secs here to ensure that deletion works
time.sleep(60)
6 changes: 4 additions & 2 deletions gcsfs/tests/perf/microbenchmarks/test_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,16 @@ def test_listing_configurator(mock_config_dependencies):

def test_info_configurator(mock_config_dependencies):
"""Test that InfoConfigurator correctly builds benchmark parameters."""
common = {"bucket_types": ["regional"], "files": [100], "folders": [1]}
common = {"bucket_types": ["regional"]}
scenario = {
"name": "info_test",
"processes": [1],
"threads": [1],
"depth": 0,
"pattern": "info",
"target_types": ["file"],
"target_type": "file",
"files": [100],
"folders": [1],
}

configurator = InfoConfigurator("dummy")
Expand Down
Loading