Skip to content

Commit 31cf9ec

Browse files
committed
speed up dry run
1 parent fa0d572 commit 31cf9ec

File tree

3 files changed

+8
-6
lines changed

3 files changed

+8
-6
lines changed

pymetrics/main.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ def collect_pypi_downloads(
5151
LOGGER.info(f'Collecting new downloads for projects={projects}')
5252

5353
csv_path = get_path(output_folder, 'pypi.csv')
54-
previous = get_previous_pypi_downloads(input_file=None, output_folder=output_folder)
54+
previous = get_previous_pypi_downloads(
55+
input_file=None, output_folder=output_folder, dry_run=dry_run
56+
)
5557

5658
pypi_downloads = get_pypi_downloads(
5759
projects=projects,

pymetrics/pypi.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,6 @@ def get_pypi_downloads(
129129
if previous is not None:
130130
if isinstance(projects, str):
131131
projects = (projects,)
132-
133132
previous_projects = previous[previous.project.isin(projects)]
134133
min_date = previous_projects.timestamp.min().date()
135134
max_date = previous_projects.timestamp.max().date()

pymetrics/summarize.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import logging
44
import os
5+
import random
56

67
import pandas as pd
78
from packaging.version import Version, parse
@@ -110,7 +111,7 @@ def _sum_counts(base_count, dep_to_count, parent_to_count):
110111
return base_count + sum(parent_to_count.values()) + sum(dep_to_count.values())
111112

112113

113-
def get_previous_pypi_downloads(input_file, output_folder):
114+
def get_previous_pypi_downloads(input_file, output_folder, dry_run=False):
114115
"""Read pypi.csv and return a DataFrame of the downloads.
115116
116117
Args:
@@ -138,12 +139,12 @@ def get_previous_pypi_downloads(input_file, output_folder):
138139
'cpu': pd.CategoricalDtype(),
139140
},
140141
}
142+
if dry_run:
143+
read_csv_kwargs['nrows'] = 10_000
141144
data = load_csv(csv_path, read_csv_kwargs=read_csv_kwargs)
142145
LOGGER.info('Parsing version column to Version class objects')
143-
if data and 'version' in data.columns:
146+
if 'version' in data.columns:
144147
data['version'] = data['version'].apply(parse)
145-
if not data:
146-
data = pd.DataFrame()
147148
return data
148149

149150

0 commit comments

Comments
 (0)