Skip to content

Commit ee3f5f8

Browse files
committed
move plotting code to shared library
1 parent 5ed2a7d commit ee3f5f8

File tree

2 files changed

+153
-141
lines changed

2 files changed

+153
-141
lines changed

scripts/3-report/gcs_report.py

Lines changed: 9 additions & 141 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,7 @@
1111
import traceback
1212

1313
# Third-party
14-
import matplotlib.pyplot as plt
15-
import matplotlib.ticker as ticker
1614
import pandas as pd
17-
from matplotlib import colormaps
1815
from pygments import highlight
1916
from pygments.formatters import TerminalFormatter
2017
from pygments.lexers import PythonTracebackLexer
@@ -23,6 +20,7 @@
2320
sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
2421

2522
# First-party/Local
23+
import plot # noqa: E402
2624
import shared # noqa: E402
2725

2826
# Setup
@@ -71,136 +69,6 @@ def parse_arguments():
7169
return args
7270

7371

74-
def number_formatter(x, pos):
75-
"""
76-
Use the millions formatter for x-axis
77-
78-
The two args are the value (x) and tick position (pos)
79-
"""
80-
if x >= 1e9:
81-
return f"{x * 1e-9:,.0f}B"
82-
elif x >= 1e6:
83-
return f"{x * 1e-6:,.0f}M"
84-
elif x >= 1e3:
85-
return f"{x * 1e-3:,.0f}K"
86-
else:
87-
return f"{x:,.0f}"
88-
89-
90-
def annotate_ylabels(ax, data, data_label, colors):
91-
i = 0
92-
c = 0
93-
ytick = ax.yaxis.get_major_ticks(numticks=1)[0]
94-
# defaults: ytick.major.size + ytick.major.pad
95-
indent = -1 * (ytick.get_tick_padding() + ytick.get_pad())
96-
for index, row in data.iterrows():
97-
if c > len(colors):
98-
c = 0
99-
100-
# annotate totals
101-
ax.annotate(
102-
f" {row[data_label]:>15,d}",
103-
(indent, i - 0.1),
104-
xycoords=("axes points", "data"),
105-
color=colors[c],
106-
fontsize="x-small",
107-
horizontalalignment="right",
108-
verticalalignment="top",
109-
)
110-
111-
# annotate percentages
112-
percent = row[data_label] / data[data_label].sum() * 100
113-
if percent < 0.1:
114-
percent = "< .1%"
115-
else:
116-
percent = f"{percent:4.1f}%"
117-
ax.annotate(
118-
percent,
119-
(1.02, i),
120-
xycoords=("axes fraction", "data"),
121-
backgroundcolor=colors[c],
122-
color="white",
123-
fontsize="x-small",
124-
horizontalalignment="left",
125-
verticalalignment="center",
126-
)
127-
128-
i += 1
129-
c += 1
130-
return ax
131-
132-
133-
def combined_plot(
134-
args, data, title, name_label, data_label, bar_xscale=None, bar_ylabel=None
135-
):
136-
if len(data) > 10:
137-
raise shared.QuantifyingException(
138-
"the combined_plot() function is limited to a maximum of 10 data"
139-
" points"
140-
)
141-
142-
plt.rcParams.update({"font.family": "monospace", "figure.dpi": 300})
143-
144-
height = 1 + len(data) * 0.5
145-
if height < 2.5:
146-
height = 2.5
147-
148-
fig, (ax1, ax2) = plt.subplots(
149-
1, 2, figsize=(8, height), width_ratios=(2, 1), layout="constrained"
150-
)
151-
colors = colormaps["tab10"].colors
152-
153-
# 1st axes: horizontal barplot of counts
154-
# pad tick labels to make room for annotation
155-
tick_labels = []
156-
for index, row in data.iterrows():
157-
count = f"{row[data_label]:,d}"
158-
tick_labels.append(f"{index}\n{' ' * len(count)}")
159-
if bar_xscale == "log":
160-
log = True
161-
else:
162-
bar_xscale = "linear"
163-
log = False
164-
ax1.barh(y=tick_labels, width=data[data_label], color=colors, log=log)
165-
ax1.tick_params(axis="x", which="major", labelrotation=45)
166-
ax1.set_xlabel("Number of works")
167-
ax1.xaxis.set_major_formatter(ticker.FuncFormatter(number_formatter))
168-
if bar_ylabel is not None:
169-
ax1.set_ylabel(bar_ylabel)
170-
else:
171-
ax1.set_ylabel(name_label)
172-
ax1 = annotate_ylabels(ax1, data, data_label, colors)
173-
174-
# 2nd axes: pie chart of percentages
175-
data.plot.pie(
176-
ax=ax2,
177-
y=data_label,
178-
colors=colors,
179-
labels=None,
180-
legend=False,
181-
radius=1.25,
182-
)
183-
ax2.set_title("Percent")
184-
ax2.set_ylabel(None)
185-
186-
# plot
187-
plt.suptitle(title)
188-
plt.annotate(
189-
f"Creative Commons (CC)\nbar x scale: {bar_xscale}, data from"
190-
f" {args.quarter}",
191-
(0.95, 5),
192-
xycoords=("figure fraction", "figure points"),
193-
color="gray",
194-
fontsize="x-small",
195-
horizontalalignment="right",
196-
)
197-
198-
if args.show_plots:
199-
plt.show()
200-
201-
return plt
202-
203-
20472
def gcs_intro(args):
20573
"""
20674
Write Google Custom Search (GCS) introduction.
@@ -247,7 +115,7 @@ def plot_products(args):
247115
data = data[::-1] # reverse order
248116

249117
title = "Products totals and percentages "
250-
plt = combined_plot(
118+
plt = plot.combined_plot(
251119
args=args,
252120
data=data,
253121
title=title,
@@ -292,7 +160,7 @@ def plot_tool_status(args):
292160
data.sort_values(name_label, ascending=False, inplace=True)
293161

294162
title = "CC legal tools status"
295-
plt = combined_plot(
163+
plt = plot.combined_plot(
296164
args=args,
297165
data=data,
298166
title=title,
@@ -335,7 +203,7 @@ def plot_current_tools(args):
335203
data.sort_values(name_label, ascending=False, inplace=True)
336204

337205
title = "Current CC legal tools"
338-
plt = combined_plot(
206+
plt = plot.combined_plot(
339207
args=args,
340208
data=data,
341209
title=title,
@@ -377,7 +245,7 @@ def plot_old_tools(args):
377245
data.sort_values(name_label, ascending=False, inplace=True)
378246

379247
title = "Old CC legal tools"
380-
plt = combined_plot(
248+
plt = plot.combined_plot(
381249
args=args,
382250
data=data,
383251
title=title,
@@ -422,7 +290,7 @@ def plot_retired_tools(args):
422290
data.sort_values(name_label, ascending=False, inplace=True)
423291

424292
title = "Retired CC legal tools"
425-
plt = combined_plot(
293+
plt = plot.combined_plot(
426294
args=args,
427295
data=data,
428296
title=title,
@@ -471,7 +339,7 @@ def plot_countries_highest_usage(args):
471339
data = data[::-1] # reverse order
472340

473341
title = "Countries with highest usage of current tools"
474-
plt = combined_plot(
342+
plt = plot.combined_plot(
475343
args=args,
476344
data=data,
477345
title=title,
@@ -524,7 +392,7 @@ def plot_languages_highest_usage(args):
524392
data = data[::-1] # reverse order
525393

526394
title = "Languages with highest usage of current tools"
527-
plt = combined_plot(
395+
plt = plot.combined_plot(
528396
args=args,
529397
data=data,
530398
title=title,
@@ -574,7 +442,7 @@ def plot_free_culture(args):
574442
data = pd.read_csv(file_path, index_col=name_label)
575443

576444
title = "Approved for Free Cultural Works"
577-
plt = combined_plot(
445+
plt = plot.combined_plot(
578446
args=args,
579447
data=data,
580448
title=title,

scripts/plot.py

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
# Standard library
2+
import os
3+
import sys
4+
5+
# Third-party
6+
import matplotlib.pyplot as plt
7+
import matplotlib.ticker as ticker
8+
from matplotlib import colormaps
9+
10+
# Add parent directory so shared can be imported
11+
sys.path.append(os.path.dirname(__file__))
12+
13+
# First-party/Local
14+
import shared # noqa: E402
15+
16+
17+
def annotate_ylabels(ax, data, data_label, colors):
18+
i = 0
19+
c = 0
20+
ytick = ax.yaxis.get_major_ticks(numticks=1)[0]
21+
# defaults: ytick.major.size + ytick.major.pad
22+
indent = -1 * (ytick.get_tick_padding() + ytick.get_pad())
23+
for index, row in data.iterrows():
24+
if c > len(colors):
25+
c = 0
26+
27+
# annotate totals
28+
ax.annotate(
29+
f" {row[data_label]:>15,d}",
30+
(indent, i - 0.1),
31+
xycoords=("axes points", "data"),
32+
color=colors[c],
33+
fontsize="x-small",
34+
horizontalalignment="right",
35+
verticalalignment="top",
36+
)
37+
38+
# annotate percentages
39+
percent = row[data_label] / data[data_label].sum() * 100
40+
if percent < 0.1:
41+
percent = "< .1%"
42+
else:
43+
percent = f"{percent:4.1f}%"
44+
ax.annotate(
45+
percent,
46+
(1.02, i),
47+
xycoords=("axes fraction", "data"),
48+
backgroundcolor=colors[c],
49+
color="white",
50+
fontsize="x-small",
51+
horizontalalignment="left",
52+
verticalalignment="center",
53+
)
54+
55+
i += 1
56+
c += 1
57+
return ax
58+
59+
60+
def combined_plot(
61+
args, data, title, name_label, data_label, bar_xscale=None, bar_ylabel=None
62+
):
63+
if len(data) > 10:
64+
raise shared.QuantifyingException(
65+
"the combined_plot() function is limited to a maximum of 10 data"
66+
" points"
67+
)
68+
69+
plt.rcParams.update({"font.family": "monospace", "figure.dpi": 300})
70+
71+
height = 1 + len(data) * 0.5
72+
if height < 2.5:
73+
height = 2.5
74+
75+
fig, (ax1, ax2) = plt.subplots(
76+
1, 2, figsize=(8, height), width_ratios=(2, 1), layout="constrained"
77+
)
78+
colors = colormaps["tab10"].colors
79+
80+
# 1st axes: horizontal barplot of counts
81+
# pad tick labels to make room for annotation
82+
tick_labels = []
83+
for index, row in data.iterrows():
84+
count = f"{row[data_label]:,d}"
85+
tick_labels.append(f"{index}\n{' ' * len(count)}")
86+
if bar_xscale == "log":
87+
log = True
88+
else:
89+
bar_xscale = "linear"
90+
log = False
91+
ax1.barh(y=tick_labels, width=data[data_label], color=colors, log=log)
92+
ax1.tick_params(axis="x", which="major", labelrotation=45)
93+
ax1.set_xlabel("Number of works")
94+
ax1.xaxis.set_major_formatter(ticker.FuncFormatter(number_formatter))
95+
if bar_ylabel is not None:
96+
ax1.set_ylabel(bar_ylabel)
97+
else:
98+
ax1.set_ylabel(name_label)
99+
ax1 = annotate_ylabels(ax1, data, data_label, colors)
100+
101+
# 2nd axes: pie chart of percentages
102+
data.plot.pie(
103+
ax=ax2,
104+
y=data_label,
105+
colors=colors,
106+
labels=None,
107+
legend=False,
108+
radius=1.25,
109+
)
110+
ax2.set_title("Percent")
111+
ax2.set_ylabel(None)
112+
113+
# plot
114+
plt.suptitle(title)
115+
plt.annotate(
116+
f"Creative Commons (CC)\nbar x scale: {bar_xscale}, data from"
117+
f" {args.quarter}",
118+
(0.95, 5),
119+
xycoords=("figure fraction", "figure points"),
120+
color="gray",
121+
fontsize="x-small",
122+
horizontalalignment="right",
123+
)
124+
125+
if args.show_plots:
126+
plt.show()
127+
128+
return plt
129+
130+
131+
def number_formatter(x, pos):
132+
"""
133+
Use the millions formatter for x-axis
134+
135+
The two args are the value (x) and tick position (pos)
136+
"""
137+
if x >= 1e9:
138+
return f"{x * 1e-9:,.0f}B"
139+
elif x >= 1e6:
140+
return f"{x * 1e-6:,.0f}M"
141+
elif x >= 1e3:
142+
return f"{x * 1e-3:,.0f}K"
143+
else:
144+
return f"{x:,.0f}"

0 commit comments

Comments
 (0)