-
Notifications
You must be signed in to change notification settings - Fork 52
Open
Description
I'm a big fan of collections defaulting to being empty and not None, and I a lot APIs default to this, whether you are returning a dict, or some sort of queryset from a database.
Then I can always
for k, v in collection.items():
...
It occurs to me, there must be a lot of empty dicts across python itself and I wonder if there is a performance hit, is it faster to do the loop below if we default to None, or if we default to empty.
if collection:
for k, v in collection.items():
...
I've added a very basic benchmark, and it does seem quicker with the if
when run only once, which is probably the case often in web services that are coming up, and in scripts as they start or commandline python tools.
import timeit
import statistics
from collections import defaultdict
def iterate_empty():
"""Iterate over an empty dict directly."""
empty_dict = {}
result = 0
for key, value in empty_dict.items():
result += value
return result
def check_then_iterate():
"""Check if dict is empty before iterating."""
empty_dict = {}
result = 0
if empty_dict:
for key, value in empty_dict.items():
result += value
return result
def none_then_check_then_iterate():
"""Use None as default, check for None, then check if empty before iterating."""
empty_dict = None
result = 0
if empty_dict is not None:
if empty_dict:
for key, value in empty_dict.items():
result += value
return result
def defaultdict_iterate():
"""Use defaultdict and iterate directly."""
empty_dict = defaultdict(int)
result = 0
for key, value in empty_dict.items():
result += value
return result
def iterate_non_empty():
"""Baseline: Iterate over a dict with one item."""
non_empty = {'a': 1}
result = 0
for key, value in non_empty.items():
result += value
return result
def check_then_iterate_non_empty():
"""Baseline: Check then iterate over a dict with one item."""
non_empty = {'a': 1}
result = 0
if non_empty:
for key, value in non_empty.items():
result += value
return result
# Number of iterations for each test
iterations = 1000000
number_of_runs = 7
# Run benchmarks
results = {}
print("Running benchmarks (lower is better)...")
for func_name, func in [
("iterate_empty", iterate_empty),
("check_then_iterate", check_then_iterate),
("none_then_check_then_iterate", none_then_check_then_iterate),
("defaultdict_iterate", defaultdict_iterate),
("iterate_non_empty", iterate_non_empty),
("check_then_iterate_non_empty", check_then_iterate_non_empty),
]:
times = []
for _ in range(number_of_runs):
time_taken = timeit.timeit(func, number=iterations)
times.append(time_taken)
# Remove the highest and lowest times and calculate the average
times.sort()
trimmed_times = times[1:-1]
avg_time = statistics.mean(trimmed_times)
std_dev = statistics.stdev(trimmed_times) if len(trimmed_times) > 1 else 0
results[func_name] = (avg_time, std_dev)
print(f"{func_name}: {avg_time:.6f} seconds (±{std_dev:.6f})")
# Calculate relative performance
baseline = results["iterate_empty"]
print("\nRelative performance (compared to iterate_empty):")
for name, (time, std_dev) in results.items():
relative = time / baseline[0]
print(f"{name}: {relative:.2f}x ({time/iterations*1e9:.2f} ns per operation)")
# Now test with dictionaries of different sizes
print("\n===== Testing with different dictionary sizes =====")
def test_with_size(size):
# Create dictionaries of specified size
test_dict = {i: i for i in range(size)}
def iterate_dict():
result = 0
for _, value in test_dict.items():
result += value
return result
def check_then_iterate_dict():
result = 0
if test_dict:
for _, value in test_dict.items():
result += value
return result
iterate_time = timeit.timeit(iterate_dict, number=100000)
check_time = timeit.timeit(check_then_iterate_dict, number=100000)
print(f"Size {size}:")
print(f" Direct iteration: {iterate_time:.6f} seconds")
print(f" Check then iterate: {check_time:.6f} seconds")
print(f" Ratio (check/direct): {check_time/iterate_time:.2f}x")
# Test with different sizes
for size in [0, 1, 10, 100, 1000]:
test_with_size(size)
dg-pb
Metadata
Metadata
Assignees
Labels
No labels