Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions reports/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,45 @@
3. Add any new repositories to config.py
4. Run `uv run main.py`
5. Run `uv run plot.py`

## Running a report on pypi package download stats

Pypi stats only go back 180 days. After installing the pypistats package you can run a command to get daily downloads:

```bash
pypistats overall virtualizarr -sd 2025-05-15 -ed 2025-11-11 --daily -f tsv --mirrors without > virtualizarr-report.tsv
```
And then generate more informative stats using the analyze_downloads.py script:

```bash
# With default cutoff date (2025-07-21)
python analyze_downloads.py virtualizarr-report.tsv

# With custom cutoff date
python analyze_downloads.py virtualizarr-report.tsv --cutoff-date 2025-08-15

# Show help
python analyze_downloads.py --help
```

Using the previous pypi command as an example, the following is output from the script:

```bash
$ python analyze_downloads.py virtualizarr-report.tsv
Download Analysis
============================================================
Cutoff date: 2025-07-21

Before 2025-07-21 (inclusive):
- Number of days: 64
- Total downloads: 3,822
- Average daily downloads: 59.72

After 2025-07-21:
- Number of days: 113
- Total downloads: 20,682
- Average daily downloads: 183.03

Change: +206.48%
Absolute difference: +123.31 downloads/day
```
106 changes: 106 additions & 0 deletions reports/analyze_downloads.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
#!/usr/bin/env python3
"""
Analyze download statistics before and after a given date using a pypistats report output from a command like:
pypistats overall virtualizarr -sd 2025-05-15 -ed 2025-11-11 --daily -f tsv --mirrors without > virtualizarr-report.tsv
"""

import csv
from datetime import datetime
from pathlib import Path


def analyze_downloads(tsv_file: str, cutoff_date: str = "2025-07-21"):
"""
Calculate average daily downloads before and after a cutoff date.

Args:
tsv_file: Path to the TSV file
cutoff_date: The date to split the data (format: YYYY-MM-DD)
"""
cutoff = datetime.strptime(cutoff_date, "%Y-%m-%d")

before_downloads = []
after_downloads = []

with open(tsv_file, 'r') as f:
reader = csv.DictReader(f, delimiter='\t')

for row in reader:
# Skip non-data rows
if row['category'] == 'Total' or not row['date']:
continue

try:
date = datetime.strptime(row['date'], "%Y-%m-%d")
downloads = int(row['downloads'])

if date < cutoff:
before_downloads.append(downloads)
elif date > cutoff:
after_downloads.append(downloads)
else: # date == cutoff
# Include cutoff date in "before" period
before_downloads.append(downloads)

except (ValueError, KeyError) as e:
print(f"Skipping row due to error: {e}")
continue

# Calculate averages
avg_before = sum(before_downloads) / len(before_downloads) if before_downloads else 0
avg_after = sum(after_downloads) / len(after_downloads) if after_downloads else 0

# Calculate percentage change
if avg_before > 0:
percent_change = ((avg_after - avg_before) / avg_before) * 100
else:
percent_change = 0

# Print results
print(f"Download Analysis")
print(f"=" * 60)
print(f"Cutoff date: {cutoff_date}")
print()
print(f"Before {cutoff_date} (inclusive):")
print(f" - Number of days: {len(before_downloads)}")
print(f" - Total downloads: {sum(before_downloads):,}")
print(f" - Average daily downloads: {avg_before:.2f}")
print()
print(f"After {cutoff_date}:")
print(f" - Number of days: {len(after_downloads)}")
print(f" - Total downloads: {sum(after_downloads):,}")
print(f" - Average daily downloads: {avg_after:.2f}")
print()
print(f"Change: {percent_change:+.2f}%")
print(f"Absolute difference: {avg_after - avg_before:+.2f} downloads/day")


if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser(
description="Analyze download statistics before and after a given date",
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
Example:
%(prog)s virtualizarr-report.tsv --cutoff-date 2025-07-21
"""
)
parser.add_argument(
"tsv_file",
help="Path to the TSV file containing download statistics"
)
parser.add_argument(
"--cutoff-date",
default="2025-07-21",
help="Date to split the analysis (format: YYYY-MM-DD, default: 2025-07-21)"
)

args = parser.parse_args()

tsv_path = Path(args.tsv_file)
if not tsv_path.exists():
print(f"Error: {tsv_path} not found")
exit(1)

analyze_downloads(str(tsv_path), args.cutoff_date)
1 change: 1 addition & 0 deletions reports/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ dependencies = [
"matplotlib>=3.10.3",
"pandas>=2.3.0",
"pygithub>=2.6.1",
"pypistats>=1.11.0"
]
182 changes: 182 additions & 0 deletions reports/virtualizarr-report.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
"category" "date" "percent" "downloads"
"without_mirrors" "2025-09-23" "3.85%" 944
"without_mirrors" "2025-11-10" "3.79%" 929
"without_mirrors" "2025-10-10" "3.22%" 789
"without_mirrors" "2025-08-22" "3.08%" 755
"without_mirrors" "2025-11-07" "3.06%" 750
"without_mirrors" "2025-11-11" "2.60%" 637
"without_mirrors" "2025-11-08" "2.38%" 584
"without_mirrors" "2025-09-02" "2.35%" 577
"without_mirrors" "2025-07-01" "2.32%" 569
"without_mirrors" "2025-08-25" "2.23%" 546
"without_mirrors" "2025-11-06" "2.07%" 508
"without_mirrors" "2025-11-09" "2.04%" 499
"without_mirrors" "2025-08-14" "1.93%" 473
"without_mirrors" "2025-08-27" "1.82%" 446
"without_mirrors" "2025-08-29" "1.82%" 446
"without_mirrors" "2025-09-04" "1.64%" 401
"without_mirrors" "2025-09-16" "1.54%" 378
"without_mirrors" "2025-10-11" "1.39%" 340
"without_mirrors" "2025-08-21" "1.30%" 318
"without_mirrors" "2025-10-27" "1.23%" 301
"without_mirrors" "2025-09-08" "1.15%" 281
"without_mirrors" "2025-08-13" "1.12%" 275
"without_mirrors" "2025-09-25" "1.09%" 268
"without_mirrors" "2025-10-01" "1.05%" 257
"without_mirrors" "2025-09-29" "1.04%" 255
"without_mirrors" "2025-08-23" "1.01%" 248
"without_mirrors" "2025-09-15" "1.00%" 244
"without_mirrors" "2025-09-03" "0.96%" 235
"without_mirrors" "2025-09-24" "0.95%" 232
"without_mirrors" "2025-08-12" "0.93%" 228
"without_mirrors" "2025-07-05" "0.87%" 212
"without_mirrors" "2025-08-04" "0.84%" 207
"without_mirrors" "2025-11-03" "0.82%" 202
"without_mirrors" "2025-06-24" "0.82%" 201
"without_mirrors" "2025-09-22" "0.80%" 195
"without_mirrors" "2025-07-16" "0.78%" 192
"without_mirrors" "2025-08-11" "0.78%" 190
"without_mirrors" "2025-11-04" "0.78%" 190
"without_mirrors" "2025-08-26" "0.77%" 188
"without_mirrors" "2025-11-05" "0.77%" 188
"without_mirrors" "2025-09-27" "0.72%" 176
"without_mirrors" "2025-10-13" "0.72%" 176
"without_mirrors" "2025-07-03" "0.71%" 173
"without_mirrors" "2025-09-05" "0.69%" 170
"without_mirrors" "2025-06-26" "0.68%" 167
"without_mirrors" "2025-08-05" "0.68%" 166
"without_mirrors" "2025-09-10" "0.67%" 165
"without_mirrors" "2025-09-30" "0.66%" 162
"without_mirrors" "2025-10-28" "0.65%" 160
"without_mirrors" "2025-08-19" "0.64%" 157
"without_mirrors" "2025-07-22" "0.64%" 156
"without_mirrors" "2025-07-08" "0.63%" 155
"without_mirrors" "2025-10-31" "0.58%" 141
"without_mirrors" "2025-08-08" "0.57%" 140
"without_mirrors" "2025-09-12" "0.57%" 140
"without_mirrors" "2025-09-06" "0.56%" 137
"without_mirrors" "2025-10-05" "0.56%" 136
"without_mirrors" "2025-10-06" "0.51%" 124
"without_mirrors" "2025-09-11" "0.50%" 123
"without_mirrors" "2025-07-30" "0.50%" 122
"without_mirrors" "2025-09-26" "0.49%" 121
"without_mirrors" "2025-10-23" "0.47%" 114
"without_mirrors" "2025-07-15" "0.44%" 107
"without_mirrors" "2025-08-01" "0.43%" 106
"without_mirrors" "2025-08-28" "0.43%" 106
"without_mirrors" "2025-08-15" "0.42%" 102
"without_mirrors" "2025-10-21" "0.41%" 100
"without_mirrors" "2025-09-07" "0.40%" 99
"without_mirrors" "2025-10-29" "0.40%" 99
"without_mirrors" "2025-10-20" "0.40%" 97
"without_mirrors" "2025-10-09" "0.39%" 96
"without_mirrors" "2025-10-14" "0.39%" 96
"without_mirrors" "2025-07-23" "0.39%" 95
"without_mirrors" "2025-10-03" "0.39%" 95
"without_mirrors" "2025-05-16" "0.38%" 94
"without_mirrors" "2025-09-17" "0.38%" 92
"without_mirrors" "2025-07-14" "0.37%" 91
"without_mirrors" "2025-07-31" "0.37%" 91
"without_mirrors" "2025-08-06" "0.37%" 90
"without_mirrors" "2025-07-28" "0.36%" 89
"without_mirrors" "2025-05-20" "0.35%" 86
"without_mirrors" "2025-07-21" "0.35%" 86
"without_mirrors" "2025-07-24" "0.34%" 84
"without_mirrors" "2025-09-09" "0.34%" 84
"without_mirrors" "2025-05-27" "0.33%" 81
"without_mirrors" "2025-06-02" "0.33%" 81
"without_mirrors" "2025-09-18" "0.33%" 81
"without_mirrors" "2025-10-04" "0.33%" 81
"without_mirrors" "2025-06-25" "0.33%" 80
"without_mirrors" "2025-10-15" "0.32%" 79
"without_mirrors" "2025-08-07" "0.29%" 72
"without_mirrors" "2025-10-07" "0.29%" 72
"without_mirrors" "2025-10-24" "0.29%" 72
"without_mirrors" "2025-10-22" "0.29%" 71
"without_mirrors" "2025-05-17" "0.29%" 70
"without_mirrors" "2025-10-02" "0.29%" 70
"without_mirrors" "2025-05-23" "0.28%" 69
"without_mirrors" "2025-05-28" "0.28%" 68
"without_mirrors" "2025-08-20" "0.28%" 68
"without_mirrors" "2025-06-11" "0.27%" 66
"without_mirrors" "2025-10-30" "0.27%" 66
"without_mirrors" "2025-05-30" "0.27%" 65
"without_mirrors" "2025-08-24" "0.27%" 65
"without_mirrors" "2025-05-21" "0.26%" 64
"without_mirrors" "2025-08-18" "0.26%" 63
"without_mirrors" "2025-06-30" "0.24%" 59
"without_mirrors" "2025-10-16" "0.24%" 58
"without_mirrors" "2025-07-10" "0.23%" 57
"without_mirrors" "2025-09-01" "0.23%" 57
"without_mirrors" "2025-10-12" "0.23%" 57
"without_mirrors" "2025-05-29" "0.22%" 55
"without_mirrors" "2025-06-10" "0.22%" 55
"without_mirrors" "2025-09-19" "0.22%" 55
"without_mirrors" "2025-11-01" "0.22%" 55
"without_mirrors" "2025-07-02" "0.22%" 54
"without_mirrors" "2025-09-28" "0.22%" 54
"without_mirrors" "2025-10-17" "0.22%" 54
"without_mirrors" "2025-07-07" "0.21%" 52
"without_mirrors" "2025-07-18" "0.21%" 51
"without_mirrors" "2025-06-23" "0.18%" 45
"without_mirrors" "2025-07-29" "0.18%" 43
"without_mirrors" "2025-05-15" "0.17%" 42
"without_mirrors" "2025-09-20" "0.17%" 42
"without_mirrors" "2025-06-12" "0.16%" 39
"without_mirrors" "2025-06-14" "0.16%" 38
"without_mirrors" "2025-07-11" "0.14%" 35
"without_mirrors" "2025-08-09" "0.14%" 35
"without_mirrors" "2025-08-16" "0.14%" 35
"without_mirrors" "2025-09-14" "0.14%" 35
"without_mirrors" "2025-09-21" "0.14%" 35
"without_mirrors" "2025-06-16" "0.14%" 34
"without_mirrors" "2025-07-25" "0.13%" 33
"without_mirrors" "2025-10-08" "0.12%" 30
"without_mirrors" "2025-10-26" "0.12%" 29
"without_mirrors" "2025-07-17" "0.11%" 26
"without_mirrors" "2025-06-17" "0.10%" 25
"without_mirrors" "2025-05-22" "0.10%" 24
"without_mirrors" "2025-05-24" "0.10%" 24
"without_mirrors" "2025-06-20" "0.10%" 24
"without_mirrors" "2025-07-13" "0.10%" 24
"without_mirrors" "2025-07-09" "0.09%" 22
"without_mirrors" "2025-10-25" "0.09%" 22
"without_mirrors" "2025-05-19" "0.09%" 21
"without_mirrors" "2025-06-05" "0.09%" 21
"without_mirrors" "2025-10-19" "0.09%" 21
"without_mirrors" "2025-08-17" "0.08%" 20
"without_mirrors" "2025-08-31" "0.08%" 20
"without_mirrors" "2025-05-25" "0.08%" 19
"without_mirrors" "2025-06-06" "0.07%" 18
"without_mirrors" "2025-09-13" "0.07%" 18
"without_mirrors" "2025-10-18" "0.07%" 18
"without_mirrors" "2025-06-01" "0.07%" 16
"without_mirrors" "2025-06-28" "0.07%" 16
"without_mirrors" "2025-08-10" "0.07%" 16
"without_mirrors" "2025-11-02" "0.07%" 16
"without_mirrors" "2025-06-18" "0.06%" 15
"without_mirrors" "2025-08-03" "0.06%" 15
"without_mirrors" "2025-08-30" "0.06%" 15
"without_mirrors" "2025-06-04" "0.06%" 14
"without_mirrors" "2025-06-13" "0.05%" 13
"without_mirrors" "2025-06-09" "0.05%" 12
"without_mirrors" "2025-06-21" "0.05%" 12
"without_mirrors" "2025-05-26" "0.04%" 11
"without_mirrors" "2025-06-03" "0.04%" 11
"without_mirrors" "2025-07-19" "0.04%" 11
"without_mirrors" "2025-07-12" "0.04%" 10
"without_mirrors" "2025-07-27" "0.04%" 9
"without_mirrors" "2025-06-27" "0.03%" 8
"without_mirrors" "2025-07-04" "0.03%" 7
"without_mirrors" "2025-07-20" "0.03%" 7
"without_mirrors" "2025-07-26" "0.03%" 7
"without_mirrors" "2025-08-02" "0.03%" 7
"without_mirrors" "2025-06-07" "0.02%" 6
"without_mirrors" "2025-06-29" "0.02%" 4
"without_mirrors" "2025-06-08" "0.01%" 3
"without_mirrors" "2025-06-22" "0.01%" 3
"without_mirrors" "2025-06-15" "0.01%" 2
"Total" 24,504

Date range: 2025-05-15 - 2025-11-11