Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/manual.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Manual collection
name: Manual Collection

on:
workflow_dispatch:
Expand Down
8 changes: 6 additions & 2 deletions .github/workflows/weekly_collection.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,16 @@ jobs:
run: |
uv pip install -U pip
uv pip install .
- name: Collect Github Data
- name: Collect GitHub Data
run: |
uv run github-analytics collect -v -q -t ${{ secrets.GITHUB_TOKEN }} -m -c weekly.yaml
env:
PYDRIVE_CREDENTIALS: ${{ secrets.PYDRIVE_CREDENTIALS }}

- name: Consolidate GitHub Data
run: |
uv run github-analytics consolidate -v -c weekly.yaml
env:
PYDRIVE_CREDENTIALS: ${{ secrets.PYDRIVE_CREDENTIALS }}
alert:
needs: [weekly_github_collection]
runs-on: ubuntu-latest
Expand Down
45 changes: 43 additions & 2 deletions github_analytics/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@

import yaml

from github_analytics.consolidate import consolidate_metrics
from github_analytics.main import collect_projects, collect_traffic
from github_analytics.summarize import summarize_metrics

Expand Down Expand Up @@ -140,6 +141,19 @@ def _summarize(args, parser):
)


def _consolidate(args, parser):
config = _load_config(args.config_file)
output_folder = args.output_folder or config.get('output_folder', '.')
projects = config['projects']

consolidate_metrics(
projects=projects,
output_folder=output_folder,
dry_run=args.dry_run,
verbose=args.verbose,
)


def _get_parser():
# Logging
logging_args = argparse.ArgumentParser(add_help=False)
Expand All @@ -163,7 +177,7 @@ def _get_parser():
action = parser.add_subparsers(title='action')
action.required = True

# collect
# Collect
collect = action.add_parser('collect', help='Collect github metrics.', parents=[logging_args])
collect.set_defaults(action=_collect)

Expand All @@ -172,7 +186,7 @@ def _get_parser():
'--output-folder',
type=str,
required=False,
help='Output folder path. Defaults to .',
help='Output folder path. Defaults to output folder in config-file.',
)
collect.add_argument('-t', '--token', type=str, required=False, help='Github Token to use.')
collect.add_argument(
Expand Down Expand Up @@ -203,6 +217,31 @@ def _get_parser():
action='store_false',
help='Start from scratch instead of incrementing over existing data.',
)
# Consolidate
consolidate = action.add_parser(
'consolidate', help='Consolidate github metrics', parents=[logging_args]
)
consolidate.set_defaults(action=_consolidate)
consolidate.add_argument(
'-c',
'--config-file',
type=str,
default='config.yaml',
help='Path to the configuration file.',
)
consolidate.add_argument(
'-d',
'--dry-run',
action='store_true',
help='Do not actually create the conslidated overview file. Just calculate it.',
)
consolidate.add_argument(
'-o',
'--output-folder',
type=str,
required=False,
help='Output folder path. Defaults to output folder in config-file.',
)

# Traffic
traffic = action.add_parser(
Expand All @@ -229,6 +268,8 @@ def _get_parser():
help='Projects to collect. Defaults to ALL if not given',
)
traffic.add_argument('-r', '--repositories', nargs='*', help='List of repositories to add.')

# Summarize
summarize = action.add_parser(
'summarize', help='Summarize the github analytics information.', parents=[logging_args]
)
Expand Down
67 changes: 67 additions & 0 deletions github_analytics/consolidate.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
"""Consolidate Overview Function."""

import logging
import os

import pandas as pd
from tqdm import tqdm

from github_analytics.constants import (
ECOSYSTEM_COLUMN_NAME,
METRIC_COLUMN_NAME,
METRICS_SHEET_NAME,
VALUE_COLUMN_NAME,
)
from github_analytics.output import create_spreadsheet, load_spreadsheet

OUTPUT_FILENAME = 'Consolidated_Overview'
SHEET_NAME = 'Overview'

LOGGER = logging.getLogger(__name__)


def consolidate_metrics(projects, output_folder, dry_run=False, verbose=True):
"""Consolidate GitHub Metrics from multiple spreadsheets on Google Drive.

Args:
projects (list[str]):
List of projects/ecosysems to consolidate. The project must
exactly match the file in the Google Drive folder.

output_path (str):
Output path on Google Drive that contains the Google Spreasheets.

dry_run (bool):
Whether of not to actually upload the results to Google Drive.
If True, it just calculate the results. Defaults to False.

verbose (bool):
If True, will output the dataframes of the summary metrics
(one dataframe for each sheet). Defaults to False.
"""
rows = []
for project in tqdm(projects):
row_info = {ECOSYSTEM_COLUMN_NAME: project}
filepath = os.path.join(output_folder, project)
df = load_spreadsheet(filepath, sheet_name=METRICS_SHEET_NAME)
row = df[[METRIC_COLUMN_NAME, VALUE_COLUMN_NAME]].T
row = row.reset_index(drop=True)

row = row.rename(columns=row.iloc[0])
row = row.drop(labels=row.index[0])

row_values = row.to_dict(orient='records')
row_values = row_values[0]
row_info.update(row_values)
if verbose:
LOGGER.info(f' {project} values: {row_info}')
rows.append(row_info)

consolidated_df = pd.DataFrame(rows)
sheets = {SHEET_NAME: consolidated_df}
if verbose:
LOGGER.info(f'Sheet Name: {SHEET_NAME}')
LOGGER.info(consolidated_df.to_string())
if not dry_run:
output_path = os.path.join(output_folder, OUTPUT_FILENAME)
create_spreadsheet(output_path=output_path, sheets=sheets)
8 changes: 8 additions & 0 deletions github_analytics/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
"""Shared constants between functions."""

ECOSYSTEM_COLUMN_NAME = 'Ecosystem'

METRIC_COLUMN_NAME = 'metric'
VALUE_COLUMN_NAME = 'value'

METRICS_SHEET_NAME = 'Metrics'
1 change: 0 additions & 1 deletion github_analytics/drive.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@ def split_drive_path(path):
"""Extract the folder and filename from the google drive path string."""
assert is_drive_path(path), f'{path} is not a google drive path'
folder, filename = path[9:].split('/')

return folder, filename


Expand Down
3 changes: 2 additions & 1 deletion github_analytics/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import pandas as pd

from github_analytics.constants import METRICS_SHEET_NAME
from github_analytics.drive import get_or_create_gdrive_folder
from github_analytics.github.repository import RepositoryClient
from github_analytics.github.repository_owner import RepositoryOwnerClient
Expand Down Expand Up @@ -227,7 +228,7 @@ def collect_project_metrics(
}
if add_metrics:
metrics = compute_metrics(issues, pull_requests, users, contributors, stargazers)
sheets = dict({'Metrics': metrics}, **sheets)
sheets = dict({METRICS_SHEET_NAME: metrics}, **sheets)

if output_path:
create_spreadsheet(output_path, sheets)
Expand Down
38 changes: 20 additions & 18 deletions github_analytics/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import pandas as pd

from github_analytics.constants import METRIC_COLUMN_NAME, VALUE_COLUMN_NAME


def compute_metrics(issues, pull_requests, users, contributors, stargazers):
"""Compute metrics for the given data.
Expand Down Expand Up @@ -39,48 +41,48 @@ def compute_metrics(issues, pull_requests, users, contributors, stargazers):

return pd.DataFrame([
{
'metric': 'num_issues',
'value': num_issues,
METRIC_COLUMN_NAME: 'num_issues',
VALUE_COLUMN_NAME: num_issues,
'description': 'Total number of Issues',
},
{
'metric': 'num_pull_requests',
'value': num_pull_requests,
METRIC_COLUMN_NAME: 'num_pull_requests',
VALUE_COLUMN_NAME: num_pull_requests,
'description': 'Total number of Pull Requests',
},
{
'metric': 'num_users',
'value': num_users,
METRIC_COLUMN_NAME: 'num_users',
VALUE_COLUMN_NAME: num_users,
'description': 'Total number of Issue Users',
},
{
'metric': 'num_contgributors',
'value': num_contributors,
METRIC_COLUMN_NAME: 'num_contgributors',
VALUE_COLUMN_NAME: num_contributors,
'description': 'Total number of Contributors',
},
{
'metric': 'num_stargazers',
'value': num_stargazers,
METRIC_COLUMN_NAME: 'num_stargazers',
VALUE_COLUMN_NAME: num_stargazers,
'description': 'Total number of Stargazers',
},
{
'metric': 'num_non_contributor_users',
'value': num_non_contrib_users,
METRIC_COLUMN_NAME: 'num_non_contributor_users',
VALUE_COLUMN_NAME: num_non_contrib_users,
'description': 'Total number of Users that are not Contributors',
},
{
'metric': 'num_non_contributor_stargazers',
'value': num_non_contrib_stars,
METRIC_COLUMN_NAME: 'num_non_contributor_stargazers',
VALUE_COLUMN_NAME: num_non_contrib_stars,
'description': 'Total number of Stargazers that are not Contributors',
},
{
'metric': 'USR',
'value': usr,
METRIC_COLUMN_NAME: 'USR',
VALUE_COLUMN_NAME: usr,
'description': 'Users / Stargazers ratio',
},
{
'metric': 'USR-C',
'value': usrc,
METRIC_COLUMN_NAME: 'USR-C',
VALUE_COLUMN_NAME: usrc,
'description': 'USR Excluding Contributors',
},
])
11 changes: 8 additions & 3 deletions github_analytics/output.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,10 +97,15 @@ def load_spreadsheet(spreadsheet, sheet_name=None):
path = spreadsheet

sheets = pd.read_excel(spreadsheet, sheet_name=sheet_name)
for sheet in sheets.values(): # noqa
if not sheet_name:
for sheet in sheets.values(): # noqa
for column in DATE_COLUMNS:
if column in sheet:
sheet[column] = pd.to_datetime(sheet[column], utc=True).dt.tz_convert(None)
else:
for column in DATE_COLUMNS:
if column in sheet:
sheet[column] = pd.to_datetime(sheet[column], utc=True).dt.tz_convert(None)
if column in sheets:
sheets[column] = pd.to_datetime(sheets[column], utc=True).dt.tz_convert(None)

LOGGER.info('Loaded spreadsheet %s', path)

Expand Down
6 changes: 3 additions & 3 deletions github_analytics/summarize.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import pandas as pd

from github_analytics.constants import ECOSYSTEM_COLUMN_NAME
from github_analytics.output import create_spreadsheet, load_spreadsheet
from github_analytics.time_utils import get_current_year, get_min_max_dt_in_year

Expand All @@ -13,7 +14,6 @@

LOGGER = logging.getLogger(__name__)

ECOSYSTEM_COLUMN_NAME = 'Ecosystem'
TOTAL_COLUMN_NAME = 'Total Since Beginning'
OUTPUT_FILENAME = 'GitHub_Summary'
SHEET_NAMES = ['Unique users', 'User issues', 'vendor-mapping']
Expand Down Expand Up @@ -60,10 +60,10 @@ def summarize_metrics(

dry_run (bool):
Whether of not to actually upload the summary results.
If true, it just calculate the summary results. Defaults to False.
If True, it just calculate the summary results. Defaults to False.

verbose (bool):
If true, will output the dataframes of the summary metrics
If True, will output the dataframes of the summary metrics
(one dataframe for each sheet). Defaults to False.

"""
Expand Down