diff --git a/.github/workflows/daily_collection.yaml b/.github/workflows/daily_collection.yaml index 1545d31..7745148 100644 --- a/.github/workflows/daily_collection.yaml +++ b/.github/workflows/daily_collection.yaml @@ -40,7 +40,7 @@ jobs: uv pip install . - name: Collect PyPI Downloads run: | - uv run download-analytics collect-pypi \ + uv run pymetrics collect-pypi \ --verbose \ --max-days ${{ inputs.max_days_pypi || 30 }} \ --add-metrics \ @@ -50,7 +50,7 @@ jobs: BIGQUERY_CREDENTIALS: ${{ secrets.BIGQUERY_CREDENTIALS }} - name: Collect Anaconda Downloads run: | - uv run download-analytics collect-anaconda \ + uv run pymetrics collect-anaconda \ --output-folder gdrive://1UnDYovLkL4gletOF5328BG1X59mSHF-Z \ --max-days ${{ inputs.max_days_anaconda || 90 }} \ --verbose @@ -72,6 +72,6 @@ jobs: uv pip install -U pip uv pip install -e .[dev] - name: Slack alert if failure - run: uv run python -m download_analytics.slack_utils -r ${{ github.run_id }} -c ${{ github.event.inputs.slack_channel || 'sdv-alerts' }} + run: uv run python -m pymetrics.slack_utils -r ${{ github.run_id }} -c ${{ github.event.inputs.slack_channel || 'sdv-alerts' }} env: SLACK_TOKEN: ${{ secrets.SLACK_TOKEN }} diff --git a/.github/workflows/daily_summarize.yaml b/.github/workflows/daily_summarize.yaml index b5c731e..5c997f2 100644 --- a/.github/workflows/daily_summarize.yaml +++ b/.github/workflows/daily_summarize.yaml @@ -30,7 +30,7 @@ jobs: uv pip install . - name: Run Summarize run: | - uv run download-analytics summarize \ + uv run pymetrics summarize \ --output-folder gdrive://10QHbqyvptmZX4yhu2Y38YJbVHqINRr0n env: PYDRIVE_CREDENTIALS: ${{ secrets.PYDRIVE_CREDENTIALS }} @@ -67,7 +67,7 @@ jobs: uv pip install .[dev] - name: Slack alert if failure run: | - uv run python -m download_analytics.slack_utils \ + uv run python -m pymetrics.slack_utils \ -r ${{ github.run_id }} \ -c ${{ github.event.inputs.slack_channel || 'sdv-alerts' }} \ -m 'Summarize Analytics build failed :fire: :dumpster-fire: :fire:' diff --git a/.github/workflows/dryrun.yaml b/.github/workflows/dryrun.yaml index 9811928..00f664d 100644 --- a/.github/workflows/dryrun.yaml +++ b/.github/workflows/dryrun.yaml @@ -28,7 +28,7 @@ jobs: uv pip install . - name: Collect PyPI Downloads - Dry Run run: | - uv run download-analytics collect-pypi \ + uv run pymetrics collect-pypi \ --verbose \ --max-days 30 \ --add-metrics \ @@ -39,7 +39,7 @@ jobs: BIGQUERY_CREDENTIALS: ${{ secrets.BIGQUERY_CREDENTIALS }} - name: Collect Anaconda Downloads - Dry Run run: | - uv run download-analytics collect-anaconda \ + uv run pymetrics collect-anaconda \ --output-folder gdrive://1UnDYovLkL4gletOF5328BG1X59mSHF-Z \ --max-days 90 \ --verbose \ @@ -48,7 +48,7 @@ jobs: PYDRIVE_CREDENTIALS: ${{ secrets.PYDRIVE_CREDENTIALS }} - name: Summarize - Dry Run run: | - uv run download-analytics summarize \ + uv run pymetrics summarize \ --verbose \ --output-folder gdrive://10QHbqyvptmZX4yhu2Y38YJbVHqINRr0n \ --dry-run diff --git a/.github/workflows/manual.yaml b/.github/workflows/manual.yaml index 1dc642c..cf6f07c 100644 --- a/.github/workflows/manual.yaml +++ b/.github/workflows/manual.yaml @@ -36,7 +36,7 @@ jobs: uv pip install . - name: Collect Downloads Data run: | - uv run download-analytics collect-pypi \ + uv run pymetrics collect-pypi \ --verbose \ --projects ${{ github.event.inputs.projects }} \ ${{ github.event.inputs.max_days && '--max-days ' || '' }} \ diff --git a/README.md b/README.md index 410f5be..18e8d74 100644 --- a/README.md +++ b/README.md @@ -1,11 +1,11 @@ -# Download Analytics +# PyMetrics -The Download Analytics project allows you to extract download metrics for Python libraries published on [PyPI](https://pypi.org/) and [Anaconda](https://www.anaconda.com/). +The PyMetrics project allows you to extract download metrics for Python libraries published on [PyPI](https://pypi.org/) and [Anaconda](https://www.anaconda.com/). The DataCebo team uses these scripts to report download counts for the libraries in the [SDV ecosystem](https://sdv.dev/) and other libraries. ## Overview -The Download Analytics project is a collection of scripts and tools to extract information +The PyMetrics project is a collection of scripts and tools to extract information about OSS project downloads from different sources and to analyze them to produce user engagement metrics. diff --git a/docs/COLLECTED_DATA.md b/docs/COLLECTED_DATA.md index 096dbd9..209cab9 100644 --- a/docs/COLLECTED_DATA.md +++ b/docs/COLLECTED_DATA.md @@ -1,13 +1,13 @@ -# Data collected by Download Analytics +# Data collected by PyMetrics -The Download Analytics project collects data about downloads from multiple sources. +The PyMetrics project collects data about downloads from multiple sources. This guide explains the exact data that is being collected from each source, as well as the aggregations metrics that are computed on them. ## PyPI Downloads -Download Analytics collects information about the downloads from PyPI by making queries to the +PyMetrics collects information about the downloads from PyPI by making queries to the [public PyPI download statistics dataset on BigQuery](https://console.cloud.google.com/bigquery?p=bigquery-public-data&d=pypi&page=dataset) by running the following query: @@ -51,7 +51,7 @@ the given time period, with the following columns: ## Aggregation Metrics -If the `--add-metrics` option is passed to `download-analytics`, a spreadsheet with aggregation +If the `--add-metrics` option is passed to `pymetrics`, a spreadsheet with aggregation metrics will be created alongside the raw PyPI downloads CSV file for each individual project. The aggregation metrics spreasheets contain the following tabs: diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md index 603eb30..4284702 100644 --- a/docs/DEVELOPMENT.md +++ b/docs/DEVELOPMENT.md @@ -1,30 +1,30 @@ -# Download Analytics Development Guide +# PyMetrics Development Guide -This guide covers how to download and install **Download Analytics** to run it locally and +This guide covers how to download and install **PyMetrics** to run it locally and modify its code. ## Install -**Download Analytics** is not released to any public Python package repository, so the only +**PyMetrics** is not released to any public Python package repository, so the only way to run it is to download the code from Github and install from source. -1. Clone the [github repository](https://github.com/datacebo/download-analytics) +1. Clone the [github repository](https://github.com/datacebo/pymetrics) ```bash -git clone git@github.com:datacebo/download-analytics +git clone git@github.com:datacebo/pymetrics ``` 2. Create a `virtualenv` (or `conda` env) to host the project and its dependencies. The example below covers the creation of a `virtualenv` using `virtualenvwrapper` with Python 3.8. ```bash -mkvirtualenv download-analytics -p $(which python3.8) +mkvirtualenv pymetrics -p $(which python3.8) ``` 3. Enter the project folder and install the project: ```bash -cd download-analytics +cd pymetrics make install ``` @@ -32,15 +32,15 @@ For development, run `make install-develop` instead. ## Command Line Interface -After the installation, a new `download-analytics` command will have been registered inside your +After the installation, a new `pymetrics` command will have been registered inside your `virtualenv`. This command can be used in conjunction with the `collect-pypi` action to collect downloads data from BigQuery and store the output locally or in Google Drive. Here is the entire list of arguments that the command line has: ```bash -$ download-analytics collect-pypi --help -usage: download-analytics collect-pypi [-h] [-v] [-l LOGFILE] [-o OUTPUT_FOLDER] [-a AUTHENTICATION_CREDENTIALS] +$ pymetrics collect-pypi --help +usage: pymetrics collect-pypi [-h] [-v] [-l LOGFILE] [-o OUTPUT_FOLDER] [-a AUTHENTICATION_CREDENTIALS] [-c CONFIG_FILE] [-p [PROJECTS [PROJECTS ...]]] [-s START_DATE] [-m MAX_DAYS] [-d] [-f] [-M] @@ -73,7 +73,7 @@ and store the downloads data into a Google Drive folder alongside the correspond metric spreadsheets would look like this: ```bash -$ download-analytics collect-pypi --verbose --projects sdv ctgan --start-date 2021-01-01 \ +$ pymetrics collect-pypi --verbose --projects sdv ctgan --start-date 2021-01-01 \ --add-metrics --output-folder gdrive://10QHbqyvptmZX4yhu2Y38YJbVHqINRr0n ``` @@ -83,7 +83,7 @@ have a look at the [COLLECTED_DATA.md](COLLECTED_DATA.md) document. ## Python Interface The Python entry point that is equivalent to the CLI explained above is the function -`download_analytics.main.collect_downloads`. +`pymetrics.main.collect_downloads`. This function has the following interface: diff --git a/docs/SETUP.md b/docs/SETUP.md index d30a806..6457ab0 100644 --- a/docs/SETUP.md +++ b/docs/SETUP.md @@ -1,6 +1,6 @@ -# Download Analytics Setup +# PyMetrics Setup -The Download Analytics project requires privileged access to the following resources: +The PyMetrics project requires privileged access to the following resources: - Google Drive, which is accessed via the `PyDrive` library. - Google Big Query, which is accessed via the `google-cloud-bigquery` library. @@ -31,10 +31,10 @@ if contains the application KEY which should never be made public. Once the file is created, you can follow these steps: -1. Run the `download-analytics collect-pypi` command. If the `settings.yaml` file has been properly +1. Run the `pymetrics collect-pypi` command. If the `settings.yaml` file has been properly created, this will **open a new tab on your web browser**, where you need to authenticate. - | ![pydrive-collect](imgs/pydrive-collect.png "Run the `download-analytics collect-pypi` Command") | + | ![pydrive-collect](imgs/pydrive-collect.png "Run the `pymetrics collect-pypi` Command") | | - | 2. Click on the Google account which you which to authenticate with. Notice that the account that @@ -67,7 +67,7 @@ be provided to you by a privileged admin. Once you have this JSON file, you have two options: 1. Pass the path to the authentication file with the `-a` or `--authentication-credentials` - argument to the `download-analytics collect-pypi` command. + argument to the `pymetrics collect-pypi` command. | ![bigquery-a](imgs/bigquery-a.png "Pass the credentials on command line") | | - | @@ -80,12 +80,12 @@ Once you have this JSON file, you have two options: ## Github Actions Setup -When using Download Analytics via Github Actions, the authentication credentials for Google +When using PyMetrics via Github Actions, the authentication credentials for Google Drive and Big Query must be passed as repository `secrets`, which will later on be declared as environment variables. -1. Open the [Settings page of the Download Analytics repository]( - https://github.com/datacebo/download-analytics/settings/secrets/actions) and click on `Secrets`. +1. Open the [Settings page of the PyMetrics repository]( + https://github.com/datacebo/pymetrics/settings/secrets/actions) and click on `Secrets`. | ![secrets](imgs/secrets.png "Open the secrets page of the repository") | | - | diff --git a/docs/WORKFLOWS.md b/docs/WORKFLOWS.md index ea54c12..c3a1ad9 100644 --- a/docs/WORKFLOWS.md +++ b/docs/WORKFLOWS.md @@ -1,4 +1,4 @@ -# Download Analytics Workflows +# PyMetrics Workflows This document describes how to perform the two most common workflows: @@ -65,13 +65,13 @@ after it is merged, the downloads of the new library will start to be added to t ## One Shot - Collecting data over a specific period. -Download Analytics is prepared to collect data for one or more libraries over a specific period -using a [GitHub Actions Workflow](https://github.com/datacebo/download-analytics/actions/workflows/manual.yaml). +PyMetrics is prepared to collect data for one or more libraries over a specific period +using a [GitHub Actions Workflow](https://github.com/datacebo/pymetrics/actions/workflows/manual.yaml). In order to do this, you will need to follow these steps: -1. Enter the [GitHub Actions Section of the repository](https://github.com/datacebo/download-analytics/actions) - and click on the [Manual Collection Workflow](https://github.com/datacebo/download-analytics/actions/workflows/manual.yaml). +1. Enter the [GitHub Actions Section of the repository](https://github.com/datacebo/pymetrics/actions) + and click on the [Manual Collection Workflow](https://github.com/datacebo/pymetrics/actions/workflows/manual.yaml). | ![manual-collection](imgs/manual-collection.png "Manual Collection Workflow") | | - | diff --git a/download_analytics/__init__.py b/pymetrics/__init__.py similarity index 100% rename from download_analytics/__init__.py rename to pymetrics/__init__.py diff --git a/download_analytics/__main__.py b/pymetrics/__main__.py similarity index 94% rename from download_analytics/__main__.py rename to pymetrics/__main__.py index d0ebcec..68d3854 100644 --- a/download_analytics/__main__.py +++ b/pymetrics/__main__.py @@ -1,4 +1,4 @@ -"""Download Analytics CLI.""" +"""PyMetrics CLI.""" import argparse import logging @@ -9,9 +9,9 @@ import yaml -from download_analytics.anaconda import collect_anaconda_downloads -from download_analytics.main import collect_downloads -from download_analytics.summarize import summarize_downloads +from pymetrics.anaconda import collect_anaconda_downloads +from pymetrics.main import collect_downloads +from pymetrics.summarize import summarize_downloads LOGGER = logging.getLogger(__name__) @@ -22,7 +22,7 @@ def _env_setup(logfile, verbosity): format_ = '%(asctime)s - %(levelname)s - %(message)s' level = (3 - verbosity) * 10 logging.basicConfig(filename=logfile, level=level, format=format_) - logging.getLogger('download_analytics').setLevel(level) + logging.getLogger('pymetrics').setLevel(level) logging.getLogger().setLevel(logging.WARN) @@ -119,8 +119,8 @@ def _get_parser(): help='Do not upload the results. Just calculate them.', ) parser = argparse.ArgumentParser( - prog='download-analytics', - description='Download Analytics Command Line Interface', + prog='pymetrics', + description='PyMetrics Command Line Interface', parents=[logging_args], ) parser.set_defaults(action=None) @@ -255,7 +255,7 @@ def _get_parser(): def main(): - """Run the Download Analytics CLI.""" + """Run the PyMetrics CLI.""" parser = _get_parser() if len(sys.argv) < 2: parser.print_help() diff --git a/download_analytics/anaconda.py b/pymetrics/anaconda.py similarity index 98% rename from download_analytics/anaconda.py rename to pymetrics/anaconda.py index c4cbc3c..f25bef2 100644 --- a/download_analytics/anaconda.py +++ b/pymetrics/anaconda.py @@ -9,8 +9,8 @@ import requests from tqdm import tqdm -from download_analytics.output import append_row, create_csv, get_path, load_csv -from download_analytics.time_utils import drop_duplicates_by_date +from pymetrics.output import append_row, create_csv, get_path, load_csv +from pymetrics.time_utils import drop_duplicates_by_date LOGGER = logging.getLogger(__name__) dir_path = os.path.dirname(os.path.realpath(__file__)) diff --git a/download_analytics/bq.py b/pymetrics/bq.py similarity index 100% rename from download_analytics/bq.py rename to pymetrics/bq.py diff --git a/download_analytics/drive.py b/pymetrics/drive.py similarity index 100% rename from download_analytics/drive.py rename to pymetrics/drive.py diff --git a/download_analytics/main.py b/pymetrics/main.py similarity index 92% rename from download_analytics/main.py rename to pymetrics/main.py index 69f6f73..6123a93 100644 --- a/download_analytics/main.py +++ b/pymetrics/main.py @@ -2,10 +2,10 @@ import logging -from download_analytics.metrics import compute_metrics -from download_analytics.output import create_csv, get_path -from download_analytics.pypi import get_pypi_downloads -from download_analytics.summarize import get_previous_pypi_downloads +from pymetrics.metrics import compute_metrics +from pymetrics.output import create_csv, get_path +from pymetrics.pypi import get_pypi_downloads +from pymetrics.summarize import get_previous_pypi_downloads LOGGER = logging.getLogger(__name__) diff --git a/download_analytics/metrics.py b/pymetrics/metrics.py similarity index 98% rename from download_analytics/metrics.py rename to pymetrics/metrics.py index dd2a050..f98a595 100644 --- a/download_analytics/metrics.py +++ b/pymetrics/metrics.py @@ -5,7 +5,7 @@ import pandas as pd -from download_analytics.output import create_spreadsheet +from pymetrics.output import create_spreadsheet LOGGER = logging.getLogger(__name__) diff --git a/download_analytics/output.py b/pymetrics/output.py similarity index 97% rename from download_analytics/output.py rename to pymetrics/output.py index 83cbcaf..0125ac6 100644 --- a/download_analytics/output.py +++ b/pymetrics/output.py @@ -6,7 +6,7 @@ import pandas as pd -from download_analytics import drive +from pymetrics import drive LOGGER = logging.getLogger(__name__) @@ -118,7 +118,7 @@ def create_csv(output_path, data): def load_spreadsheet(spreadsheet): - """Load a spreadsheet previously created by download-analytics. + """Load a spreadsheet previously created by pymetrics. Args: spreadsheet (str or stream): @@ -154,7 +154,7 @@ def load_spreadsheet(spreadsheet): def load_csv(csv_path, read_csv_kwargs=None): - """Load a CSV previously created by download-analytics. + """Load a CSV previously created by pymetrics. Args: csv_path (str): diff --git a/download_analytics/pypi.py b/pymetrics/pypi.py similarity index 99% rename from download_analytics/pypi.py rename to pymetrics/pypi.py index e50da07..ab92833 100644 --- a/download_analytics/pypi.py +++ b/pymetrics/pypi.py @@ -5,7 +5,7 @@ import pandas as pd -from download_analytics.bq import run_query +from pymetrics.bq import run_query LOGGER = logging.getLogger(__name__) diff --git a/download_analytics/slack_utils.py b/pymetrics/slack_utils.py similarity index 94% rename from download_analytics/slack_utils.py rename to pymetrics/slack_utils.py index f600551..12b8df0 100644 --- a/download_analytics/slack_utils.py +++ b/pymetrics/slack_utils.py @@ -5,7 +5,7 @@ from slack_sdk import WebClient -GITHUB_URL_PREFIX = 'https://github.com/datacebo/download-analytics/actions/runs/' +GITHUB_URL_PREFIX = 'https://github.com/datacebo/pymetrics/actions/runs/' DEFAULT_SLACK_CHANNEL = 'sdv-alerts-debug' @@ -92,7 +92,7 @@ def get_parser(): '--message', type=str, help='The message to post.', - default='Download Analytics build failed :fire: :dumpster-fire: :fire:', + default='PyMetrics build failed :fire: :dumpster-fire: :fire:', ) parser.set_defaults(action=send_alert) diff --git a/download_analytics/summarize.py b/pymetrics/summarize.py similarity index 98% rename from download_analytics/summarize.py rename to pymetrics/summarize.py index 5e62799..1cc7d2b 100644 --- a/download_analytics/summarize.py +++ b/pymetrics/summarize.py @@ -6,8 +6,8 @@ import pandas as pd from packaging.version import Version, parse -from download_analytics.output import append_row, create_spreadsheet, get_path, load_csv -from download_analytics.time_utils import get_current_year, get_min_max_dt_in_year +from pymetrics.output import append_row, create_spreadsheet, get_path, load_csv +from pymetrics.time_utils import get_current_year, get_min_max_dt_in_year TOTAL_COLUMN_NAME = 'Total Since Beginning' ECOSYSTEM_COLUMN_NAME = 'Ecosystem' diff --git a/download_analytics/time_utils.py b/pymetrics/time_utils.py similarity index 100% rename from download_analytics/time_utils.py rename to pymetrics/time_utils.py diff --git a/pyproject.toml b/pyproject.toml index 9d6836e..73105df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ requires = ["setuptools", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "download-analytics" +name = "pymetrics" version = "0.0.1.dev0" description = "Scripts to extract metrics about OSS project downloads." readme = "README.md" @@ -31,13 +31,13 @@ dependencies = [ ] [project.urls] -Homepage = "https://github.com/DataCebo/download-analytics" +Homepage = "https://github.com/DataCebo/pymetrics" [project.scripts] -download-analytics = "download_analytics.__main__:main" +pymetrics = "pymetrics.__main__:main" [tool.setuptools.packages.find] -include = ['download_analytics', 'download_analytics.*'] +include = ['pymetrics', 'pymetrics.*'] [project.optional-dependencies] dev = [ @@ -53,7 +53,7 @@ test = [ preview = true line-length = 100 indent-width = 4 -src = ["download_analytics"] +src = ["pymetrics"] target-version = "py313" exclude = [ "docs", @@ -80,7 +80,7 @@ docstring-code-format = true docstring-code-line-length = "dynamic" [tool.ruff.lint.isort] -known-first-party = ["download_analytics"] +known-first-party = ["pymetrics"] lines-between-types = 0 [tool.ruff.lint.per-file-ignores] diff --git a/tests/unit/test_time_utils.py b/tests/unit/test_time_utils.py index fd6f413..67d53a6 100644 --- a/tests/unit/test_time_utils.py +++ b/tests/unit/test_time_utils.py @@ -2,7 +2,7 @@ import pandas as pd -from download_analytics.time_utils import ( +from pymetrics.time_utils import ( drop_duplicates_by_date, get_current_year, get_first_datetime_in_year,