Skip to content

Commit c06394d

Browse files
committed
fix dry run
1 parent 7ddaae4 commit c06394d

File tree

2 files changed

+19
-4
lines changed

2 files changed

+19
-4
lines changed

.github/workflows/daily_collection.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,6 @@ jobs:
7575
uv run python -m pymetrics.slack_utils \
7676
-r ${{ github.run_id }} \
7777
-c ${{ github.event.inputs.slack_channel || 'sdv-alerts' }} \
78-
-m 'Daily Collect PyMetrics failed :fire: :dumpster-fire: :fire:'
78+
-m 'Daily Collection PyMetrics failed :fire: :dumpster-fire: :fire:'
7979
env:
8080
SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }}

pymetrics/anaconda.py

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,15 @@
2323
TIME_COLUMN = 'time'
2424
PKG_COLUMN = 'pkg_name'
2525
ANACONDA_BUCKET_PATH = 's3://anaconda-package-data/conda'
26+
ANACONDA_COLUMNS = [
27+
TIME_COLUMN,
28+
'data_source',
29+
PKG_COLUMN,
30+
'pkg_version',
31+
'pkg_platform',
32+
'pkg_python',
33+
'counts',
34+
]
2635

2736

2837
def _read_anaconda_parquet(URL, pkg_names=None):
@@ -80,6 +89,9 @@ def _get_previous_anaconda_downloads(output_folder, filename):
8089
}
8190
csv_path = get_path(output_folder, filename)
8291
previous = load_csv(csv_path, read_csv_kwargs=read_csv_kwargs)
92+
if not previous:
93+
previous = pd.DataFrame(columns=ANACONDA_COLUMNS)
94+
previous[TIME_COLUMN] = pd.to_datetime(previous[TIME_COLUMN])
8395
return previous
8496

8597

@@ -179,9 +191,12 @@ def collect_anaconda_downloads(
179191
pkg_names=projects,
180192
)
181193
if len(new_downloads) > 0:
182-
# Keep only the newest data (on a per day basis) for all packages
183-
previous = previous[previous[TIME_COLUMN].dt.date != iteration_datetime.date()]
184-
previous = pd.concat([previous, new_downloads], ignore_index=True)
194+
if len(previous) == 0:
195+
previous = new_downloads
196+
else:
197+
# Keep only the newest data (on a per day basis) for all packages
198+
previous = previous[previous[TIME_COLUMN].dt.date != iteration_datetime.date()]
199+
previous = pd.concat([previous, new_downloads], ignore_index=True)
185200

186201
previous = previous.sort_values(TIME_COLUMN)
187202
LOGGER.info('Obtained %s new downloads', all_downloads_count - len(previous))

0 commit comments

Comments
 (0)