Mbed-TLS · mpg · Jun 30, 2020 · Sep 30, 2020 · Dec 24, 2020 · Dec 24, 2020
diff --git a/pr-metrics/.gitignore b/pr-metrics/.gitignore
@@ -0,0 +1,4 @@
+__pycache__
+pr-data.p
+*.png
+*.csv
diff --git a/pr-metrics/Readme.md b/pr-metrics/Readme.md
@@ -0,0 +1,47 @@
+These scripts collect some metrics about mbed TLS PRs over time.
+
+Usage
+-----
+
+1. `./get-pr-data.py` - this takes a long time and requires the environment
+   variable `GITHUB_API_TOKEN` to be set to a valid [github API
+token](https://help.github.com/en/github/authenticating-to-github/creating-a-personal-access-token) (unauthenticated access to the API has a limit on the number or requests that is too low for our number of PRs). It generates `pr-data.p` with pickled data.
+2. `PR_LAST_DATE=20yy-mm-dd ./do.sh` - this works offline from the data in
+   `pr-data.p` and generates a bunch of png and csv files.
+
+Requirements
+------------
+
+These scripts require:
+
+- Python >= 3.6 (required by recent enough matplotlib)
+- matplotlib >= 3.1 (3.0 doesn't work)
+- PyGithub >= 1.43 (any version should work, that was just the oldest tested)
+
+### Ubuntu 20.04 (and probaly 18.04)
-### Ubuntu 20.04 (and probaly 18.04)
+### Ubuntu 20.04 (and probably 18.04)
-### Ubuntu 20.04 (and probaly 18.04)
+### Ubuntu 20.04 (and probably 18.04)
+
+A simple `apt install python3-github python3-matplotlib` is enough.
+
+### Ubuntu 16.04
+
+On Ubuntu 16.04, by default only Python 3.5 is available, which doesn't
+support a recent enough matplotlib to support those scripts, so the following
+was used to run those scripts on 16.04:
+
+    sudo add-apt-repository ppa:deadsnakes/ppa
+    sudo apt update
+    sudo apt install python3.6 python3.6-venv
+    python3.6 -m venv 36env
+    source 36env/bin/activate
+    pip install --upgrade pip
+    pip install matlplotlib
+    pip install pygithub
+
+See `requirements.txt` for an example of a set of working versions.
+
+Note: if you do this, I strongly recommend uninstalling python3.6,
+python3.6-venv and all their dependencies, then removing the deadsnakes PPA
+before any upgrade to 18.04. Failing to do so will result in
+dependency-related headaches as some packages in 18.04 depend on a specific
+version of python3.6 but the version from deadsnakes is higher, so apt won't
+downgrade it and manual intervention will be required.
diff --git a/pr-metrics/do.sh b/pr-metrics/do.sh
@@ -0,0 +1,9 @@
+#!/bin/sh
+
+set -eu
+
+for topic in created closed pending lifetime; do
+    echo "PRs $topic..."
+    rm -f prs-${topic}.png prs-${topic}.csv
+    ./pr-${topic}.py > prs-${topic}.csv
+done
diff --git a/pr-metrics/get-pr-data.py b/pr-metrics/get-pr-data.py
@@ -0,0 +1,41 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Get PR data from github and pickle it."""
+
+import pickle
+import os
+
+from github import Github
+
+if "GITHUB_API_TOKEN" in os.environ:
+    token = os.environ["GITHUB_API_TOKEN"]
+else:
+    print("You need to provide a GitHub API token")
+
+g = Github(token)
+r = g.get_repo("ARMMbed/mbedtls")
+
+prs = list()
+for p in r.get_pulls(state="all"):
+    print(p.number)
+    # Accessing p.mergeable forces completion of PR data (by default, only
+    # basic info such as status and dates is available) but makes things
+    # slower (about 10x). Only do that for open PRs; we don't need the extra
+    # info for old PRs (only the dates which are part of the basic info).
+    if p.state == 'open':
+        dummy = p.mergeable
+    prs.append(p)
+
+# After a branch has been updated, github doesn't immediately go and recompute
+# potential conflicts for all open PRs against this branch; instead it does
+# that when the info is requested and even then it's done asynchronously: the
+# first request might return no data, but if we come back after we've done all
+# the other PRs, the info should have become available in the meantime.
+for p in prs:
+    if p.state == 'open' and p.mergeable is None:
+        print(p.number, 'update')
+        p.update()
+
+with open("pr-data.p", "wb") as f:
+    pickle.dump(prs, f)
diff --git a/pr-metrics/pending-mergeability.py b/pr-metrics/pending-mergeability.py
@@ -0,0 +1,36 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Produce summary or PRs pending per branch and their mergeability status."""
+
+import pickle
+from datetime import datetime
+from collections import Counter
+
+with open("pr-data.p", "rb") as f:
+    prs = pickle.load(f)
+
+c_open = Counter()
+c_mergeable = Counter()
+c_recent = Counter()
+c_recent2 = Counter()
+
+for p in prs:
+    if p.state != "open":
+        continue
+
+    branch = p.base.ref
+    c_open[branch] += 1
+    if p.mergeable:
+        c_mergeable[branch] += 1
+        days = (datetime.now() - p.updated_at).days
+        if days < 31:
+            c_recent[branch] += 1
+        if days < 8:
+            c_recent2[branch] += 1
+
+
+print("              branch:       open,  mergeable,       <31d,        <8d")
+for b in sorted(c_open, key=lambda b: c_open[b], reverse=True):
+    print("{:>20}: {: 10}, {: 10}, {: 10}, {:10}".format(
+            b, c_open[b], c_mergeable[b], c_recent[b], c_recent2[b]))
diff --git a/pr-metrics/pr-closed.py b/pr-metrics/pr-closed.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Produce graph of PRs closed by time period."""
+
+from prs import pr_dates, quarter, first, last
+
+from collections import Counter
+
+import matplotlib.pyplot as plt
+
+first_q = quarter(first)
+last_q = quarter(last)
+
+cnt_all = Counter()
+cnt_com = Counter()
+
+for beg, end, com in pr_dates():
+    if end is None:
+        continue
+    q = quarter(end)
+    cnt_all[q] += 1
+    if com:
+        cnt_com[q] += 1
+
+quarters = tuple(sorted(q for q in cnt_all if first_q <= q <= last_q))
+
+prs_com = tuple(cnt_com[q] for q in quarters)
+prs_team = tuple(cnt_all[q] - cnt_com[q] for q in quarters)
+
+width = 0.9
+fig, ax = plt.subplots()
+ax.bar(quarters, prs_com, width, label="community")
+ax.bar(quarters, prs_team, width, label="core team", bottom=prs_com)
+ax.legend(loc="upper left")
+ax.grid(True)
+ax.set_xlabel("quarter")
+ax.set_ylabel("Number or PRs closed")
+ax.tick_params(axis="x", labelrotation=90)
+fig.suptitle("Number of PRs closed per quarter")
+fig.set_size_inches(12.8, 7.2)  # default 100 dpi -> 720p
+fig.savefig("prs-closed.png")
+
+print("Quarter,community closed,total closed")
+for q in quarters:
+    print("{},{},{}".format(q, cnt_com[q], cnt_all[q]))
diff --git a/pr-metrics/pr-created.py b/pr-metrics/pr-created.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Produce graph of PRs created by time period."""
+
+from prs import pr_dates, quarter, first, last
+
+from collections import Counter
+
+import matplotlib.pyplot as plt
+
+first_q = quarter(first)
+last_q = quarter(last)
+
+cnt_all = Counter()
+cnt_com = Counter()
+
+for beg, end, com in pr_dates():
+    q = quarter(beg)
+    cnt_all[q] += 1
+    if com:
+        cnt_com[q] += 1
+
+quarters = tuple(sorted(q for q in cnt_all if first_q <= q <= last_q))
+
+prs_com = tuple(cnt_com[q] for q in quarters)
+prs_team = tuple(cnt_all[q] - cnt_com[q] for q in quarters)
+
+width = 0.9
+fig, ax = plt.subplots()
+ax.bar(quarters, prs_com, width, label="community")
+ax.bar(quarters, prs_team, width, label="core team", bottom=prs_com)
+ax.legend(loc="upper left")
+ax.grid(True)
+ax.set_xlabel("quarter")
+ax.set_ylabel("Number or PRs created")
+ax.tick_params(axis="x", labelrotation=90)
+fig.suptitle("Number of PRs created per quarter")
+fig.set_size_inches(12.8, 7.2)  # default 100 dpi -> 720p
+fig.savefig("prs-created.png")
+
+print("Quarter,community created,total created")
+for q in quarters:
+    print("{},{},{}".format(q, cnt_com[q], cnt_all[q]))
diff --git a/pr-metrics/pr-lifetime.py b/pr-metrics/pr-lifetime.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Produce graph of lifetime of PRs over time."""
+
+from prs import pr_dates, quarter, first, last
+
+from collections import defaultdict
+
+import matplotlib.pyplot as plt
+from datetime import datetime, timedelta
+
+first_q = quarter(first)
+last_q = quarter(last)
+
+lifetimes_all = defaultdict(list)
+lifetimes_com = defaultdict(list)
+
+for beg, end, com in pr_dates():
+    # If the PR is still open and it's recent, assign an arbitrary large
+    # lifetime. (The exact value doesn't matter for computing the median, as
+    # long as it's greater than the median - that is, as long as we've closed
+    # at least half the PRs created that quarter. Otherwise the large value
+    # will make that pretty visible.)
+    if end is None:
+        today = datetime.now().date()
+        lt_so_far = (today - beg).days
+        lt = max(365, lt_so_far)
+    else:
+        lt = (end - beg).days
+
+    # Shit one month (that is, for q2 count March to May, not April to July).
+    # This is because we want to measure this at the end of each quarter, but
+    # including PRs raised too recently skew the results. Shifting one month
+    # means we had time to look at the PR by the time we generate quaterly
+    # metrics.
+    q = quarter(beg - timedelta(days=30))
+    lifetimes_all[q].append(lt)
+    if com:
+        lifetimes_com[q].append(lt)
+
+quarters = tuple(sorted(q for q in lifetimes_all if first_q <= q <= last_q))
+
+for q in quarters:
+    lifetimes_all[q].sort()
+    lifetimes_com[q].sort()
+
+
+def median(sl):
+    """Return the median value of a sorted list of numbers (0 if empty)."""
+    index = (len(sl) - 1) / 2
+    if index < 0:
+        return 0
+    if int(index) == index:
+        return sl[int(index)]
+
+    i, j = int(index - 0.5), int(index + 0.5)
+    return (sl[i] + sl[j]) / 2
+
+
+med_all = tuple(median(lifetimes_all[q]) for q in quarters)
+med_com = tuple(median(lifetimes_com[q]) for q in quarters)
+
+fig, ax = plt.subplots()
+ax.plot(quarters, med_all, "b-", label="median overall")
+ax.plot(quarters, med_com, "r-", label="median community")
+ax.legend(loc="upper right")
+ax.grid(True)
+ax.set_xlabel("quarter")
+ax.set_ylabel("median lifetime in days of PRs created that quarter (shifted 1 month)")
+ax.tick_params(axis="x", labelrotation=90)
+bot, top = ax.set_ylim()
+ax.set_ylim(0, min(365, top))  # we don't care about values over 1 year
+fig.suptitle("Median lifetime of PRs per quarter (shifted 1 month) (less is better)")
+fig.set_size_inches(12.8, 7.2)  # default 100 dpi -> 720p
+fig.savefig("prs-lifetime.png")
+
+print("Quarter,median overall,median community")
+for q, a, c in zip(quarters, med_all, med_com):
+    print("{},{},{}".format(q, int(a), int(c)))
diff --git a/pr-metrics/pr-pending.py b/pr-metrics/pr-pending.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+# coding: utf-8
+
+"""Produce graph of PRs pending over time."""
+
+from prs import pr_dates, first, last
+
+from datetime import datetime, timedelta
+from collections import Counter
+
+import matplotlib.pyplot as plt
+
+cnt_tot = Counter()
+cnt_com = Counter()
+
+for beg, end, com in pr_dates():
+    if end is None:
+        tomorrow = datetime.now().date() + timedelta(days=1)
+        n_days = (tomorrow - beg).days
+    else:
+        n_days = (end - beg).days
+    dates = Counter(beg + timedelta(days=i) for i in range(n_days))
+    cnt_tot.update(dates)
+    if com:
+        cnt_com.update(dates)
+
+dates = tuple(sorted(d for d in cnt_tot.keys() if first <= d <= last))
+
+
+def avg(cnt, date):
+    """Average number of open PRs over a week."""
-    """Average number of open PRs over a week."""
+    """Average number of open PRs over a week ending at date."""
-    """Average number of open PRs over a week."""
+    """Average number of open PRs over a week ending at date."""
+    return sum(cnt[date - timedelta(days=i)] for i in range(7)) / 7
+
+
+nb_tot = tuple(avg(cnt_tot, d) for d in dates)
+nb_com = tuple(avg(cnt_com, d) for d in dates)
+nb_team = tuple(tot - com for tot, com in zip(nb_tot, nb_com))
+
+fig, ax = plt.subplots()
+ax.plot(dates, nb_tot, "b-", label="total")
+ax.plot(dates, nb_team, "c-", label="core team")
+ax.plot(dates, nb_com, "r-", label="community")
+ax.legend(loc="upper left")
+ax.grid(True)
+ax.set_xlabel("date")
+ax.set_ylabel("number of open PRs (sliding average over a week)")
+fig.suptitle("Number of PRs pending over time (less is better)")
+fig.set_size_inches(12.8, 7.2)  # default 100 dpi -> 720p
+fig.savefig("prs-pending.png")
+
+print("date,pending total, pending community")
+for d in dates:
+    tot, com = cnt_tot[d], cnt_com[d]
+    print("{},{},{}".format(d, tot, com))