Optimise (non?) iteration of empty dicts/collections ?

I'm a big fan of collections defaulting to being empty and not None, and I a lot APIs default to this, whether you are returning a dict, or some sort of queryset from a database.

Then I can always

```
for k, v in collection.items():
    ...
```

It occurs to me, there must be a *lot* of empty dicts across python itself and I wonder if there is a performance hit, is it faster to do the loop below if we default to None, or if we default to empty.

```
if collection:
    for k, v in collection.items():
        ...
```

I've added a very basic benchmark, and it does seem quicker with the `if` when run only once, which is probably the case often in web services that are coming up, and in scripts as they start or commandline python tools.


```python
import timeit
import statistics
from collections import defaultdict

def iterate_empty():
    """Iterate over an empty dict directly."""
    empty_dict = {}
    result = 0
    for key, value in empty_dict.items():
        result += value
    return result

def check_then_iterate():
    """Check if dict is empty before iterating."""
    empty_dict = {}
    result = 0
    if empty_dict:
        for key, value in empty_dict.items():
            result += value
    return result

def none_then_check_then_iterate():
    """Use None as default, check for None, then check if empty before iterating."""
    empty_dict = None
    result = 0
    if empty_dict is not None:
        if empty_dict:
            for key, value in empty_dict.items():
                result += value
    return result

def defaultdict_iterate():
    """Use defaultdict and iterate directly."""
    empty_dict = defaultdict(int)
    result = 0
    for key, value in empty_dict.items():
        result += value
    return result

def iterate_non_empty():
    """Baseline: Iterate over a dict with one item."""
    non_empty = {'a': 1}
    result = 0
    for key, value in non_empty.items():
        result += value
    return result

def check_then_iterate_non_empty():
    """Baseline: Check then iterate over a dict with one item."""
    non_empty = {'a': 1}
    result = 0
    if non_empty:
        for key, value in non_empty.items():
            result += value
    return result

# Number of iterations for each test
iterations = 1000000
number_of_runs = 7

# Run benchmarks
results = {}

print("Running benchmarks (lower is better)...")

for func_name, func in [
    ("iterate_empty", iterate_empty),
    ("check_then_iterate", check_then_iterate),
    ("none_then_check_then_iterate", none_then_check_then_iterate),
    ("defaultdict_iterate", defaultdict_iterate),
    ("iterate_non_empty", iterate_non_empty),
    ("check_then_iterate_non_empty", check_then_iterate_non_empty),
]:
    times = []
    for _ in range(number_of_runs):
        time_taken = timeit.timeit(func, number=iterations)
        times.append(time_taken)
    
    # Remove the highest and lowest times and calculate the average
    times.sort()
    trimmed_times = times[1:-1]
    avg_time = statistics.mean(trimmed_times)
    std_dev = statistics.stdev(trimmed_times) if len(trimmed_times) > 1 else 0
    
    results[func_name] = (avg_time, std_dev)
    print(f"{func_name}: {avg_time:.6f} seconds (±{std_dev:.6f})")

# Calculate relative performance
baseline = results["iterate_empty"]
print("\nRelative performance (compared to iterate_empty):")
for name, (time, std_dev) in results.items():
    relative = time / baseline[0]
    print(f"{name}: {relative:.2f}x ({time/iterations*1e9:.2f} ns per operation)")

# Now test with dictionaries of different sizes
print("\n===== Testing with different dictionary sizes =====")

def test_with_size(size):
    # Create dictionaries of specified size
    test_dict = {i: i for i in range(size)}
    
    def iterate_dict():
        result = 0
        for _, value in test_dict.items():
            result += value
        return result
    
    def check_then_iterate_dict():
        result = 0
        if test_dict:
            for _, value in test_dict.items():
                result += value
        return result
    
    iterate_time = timeit.timeit(iterate_dict, number=100000)
    check_time = timeit.timeit(check_then_iterate_dict, number=100000)
    
    print(f"Size {size}:")
    print(f"  Direct iteration: {iterate_time:.6f} seconds")
    print(f"  Check then iterate: {check_time:.6f} seconds")
    print(f"  Ratio (check/direct): {check_time/iterate_time:.2f}x")

# Test with different sizes
for size in [0, 1, 10, 100, 1000]:
    test_with_size(size)

```

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

Optimise (non?) iteration of empty dicts/collections ? #720

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Optimise (non?) iteration of empty dicts/collections ? #720

Description

Metadata

Metadata

Assignees

Labels

Type

Projects

Milestone

Relationships

Development

Issue actions