-
-
Notifications
You must be signed in to change notification settings - Fork 132
Open
Labels
Description
What type of report is this?
Improvement
Please describe the issue.
It would be good if a GitHub action ran a test that generated plots comparing performance to numpy, these could then be pushed to a GitHub page and viewable.
If you have a suggestion on how it should be, add it below.
An example is that:
Which shows at which density sparse is more efficient for different numbers of dimensions.
Which I generated with:
import sparse
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
def test_boolean():
# Generate density values (11 points between 0.00-0.01)
densities = np.linspace(0.00, 1.00, num=100)
dims = range(1, 5)
size = 2000
sparse_mem: list[list[int]] = []
numpy_mem: list[list[int]] = []
for dim in dims: # Dimensions 1-3 (0D removed)
print(f"dim: {dim}")
dim_size = int(float(size) ** (1 / float(dim)))
sparse_mem_dim: list[int] = []
numpy_mem_dim: list[int] = []
for density in tqdm(densities):
# Sparse array memory
sparse_arr = sparse.random([dim_size for _ in range(dim)], density=density)
sparse_mem_dim.append(sparse_arr.nbytes)
# Dense array memory
dense_arr = np.empty([dim_size for _ in range(dim)])
numpy_mem_dim.append(dense_arr.nbytes)
sparse_mem.append(sparse_mem_dim)
numpy_mem.append(numpy_mem_dim)
# Plotting
plt.figure(figsize=(10, 6))
for i, d in enumerate(dims):
plt.plot(densities, sparse_mem[i], "o", alpha=0.5, label=f"Sparse {d}D ")
plt.plot(densities, numpy_mem[i], "o", alpha=0.5, label=f"Numpy {d}D")
plt.xlabel("Density")
plt.ylabel("Memory Usage (bytes)")
plt.title(f"Memory Usage vs Density for nD Arrays")
plt.legend()
plt.grid(True)
plt.savefig(f"memory_usage.png")
plt.close()