Skip to content

Commit 0713b37

Browse files
committed
add plot_totals_by_product
1 parent f314773 commit 0713b37

File tree

1 file changed

+81
-6
lines changed

1 file changed

+81
-6
lines changed

scripts/3-report/gcs_report.py

Lines changed: 81 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ def parse_arguments():
6464
return args
6565

6666

67-
def visualize_top_25_tools(args):
67+
def plot_top_25_tools(args):
6868
"""
6969
Create a bar chart for the top 25 legal tools
7070
"""
@@ -125,7 +125,81 @@ def millions_formatter(x, pos):
125125
LOGGER.info("Visualization by license type created.")
126126

127127

128-
# def visualize_by_country(data, args):
128+
def plot_totals_by_product(args):
129+
"""
130+
Create a bar chart of the totals by product
131+
"""
132+
file_path = shared.path_join(
133+
PATHS["data_2-process"], "gcs_totals_by_product.csv"
134+
)
135+
LOGGER.info(__doc__)
136+
LOGGER.info(f"data file: {file_path.replace(PATHS['repo'], '.')}")
137+
data = pd.read_csv(file_path)
138+
139+
plt.figure(figsize=(10, 5))
140+
y_column = "CC legal tool product"
141+
ax = sns.barplot(
142+
data,
143+
x="Count",
144+
y=y_column,
145+
hue=y_column,
146+
palette="pastel",
147+
legend=False,
148+
)
149+
for index, row in data.iterrows():
150+
ax.annotate(
151+
f"{row['Count']:>15,d}",
152+
(0 + 80, index),
153+
xycoords=("axes points", "data"),
154+
color="black",
155+
fontsize="x-small",
156+
horizontalalignment="right",
157+
verticalalignment="center",
158+
)
159+
plt.title(f"Totals by product ({args.quarter})")
160+
plt.ylabel("Creative Commons (CC) legal tool product")
161+
plt.xscale("log")
162+
plt.xlabel("Number of references")
163+
164+
# Use the millions formatter for x-axis
165+
def millions_formatter(x, pos):
166+
"""
167+
The two args are the value and tick position
168+
"""
169+
return f"{x * 1e-6:,.0f}M"
170+
171+
ax.xaxis.set_major_formatter(ticker.FuncFormatter(millions_formatter))
172+
173+
plt.tight_layout()
174+
if args.show_plots:
175+
plt.show()
176+
177+
image_path = shared.path_join(
178+
PATHS["data_phase"], "gcs_totals_by_product.png"
179+
)
180+
LOGGER.info(f"image file: {image_path.replace(PATHS['repo'], '.')}")
181+
182+
if args.enable_save:
183+
# Create the directory if it does not exist
184+
os.makedirs(PATHS["data_phase"], exist_ok=True)
185+
plt.savefig(image_path)
186+
187+
shared.update_readme(
188+
PATHS,
189+
image_path,
190+
"Google Custom Search",
191+
"Bar chart showing how many documents there are for each Creative"
192+
" Commons (CC) legal tool. **There are a total of"
193+
f" {data['Count'].sum():,d} documents that are either CC licensed"
194+
" or put in the public domain using a CC legal tool.**",
195+
"Totals by product",
196+
args,
197+
)
198+
199+
LOGGER.info("Visualization by license type created.")
200+
201+
202+
# def plot_by_country(data, args):
129203
# """
130204
# Create a bar chart for the number of webpages licensed by country.
131205
# """
@@ -202,7 +276,7 @@ def millions_formatter(x, pos):
202276
# LOGGER.info("Visualization by country created.")
203277
#
204278
#
205-
# def visualize_by_language(data, args):
279+
# def plot_by_language(data, args)data/2024Q4/README.md:
206280
# """
207281
# Create a bar chart for the number of webpages licensed by language.
208282
# """
@@ -286,9 +360,10 @@ def main():
286360
shared.log_paths(LOGGER, PATHS)
287361
shared.git_fetch_and_merge(args, PATHS["repo"])
288362

289-
# visualize_by_country(data, args)
290-
visualize_top_25_tools(args)
291-
# visualize_by_language(data, args)
363+
plot_top_25_tools(args)
364+
plot_totals_by_product(args)
365+
# plot_by_country(data, args)
366+
# plot_by_language(data, args)
292367

293368
args = shared.git_add_and_commit(
294369
args,

0 commit comments

Comments
 (0)