@@ -64,7 +64,7 @@ def parse_arguments():
6464 return args
6565
6666
67- def visualize_top_25_tools (args ):
67+ def plot_top_25_tools (args ):
6868 """
6969 Create a bar chart for the top 25 legal tools
7070 """
@@ -125,7 +125,81 @@ def millions_formatter(x, pos):
125125 LOGGER .info ("Visualization by license type created." )
126126
127127
128- # def visualize_by_country(data, args):
128+ def plot_totals_by_product (args ):
129+ """
130+ Create a bar chart of the totals by product
131+ """
132+ file_path = shared .path_join (
133+ PATHS ["data_2-process" ], "gcs_totals_by_product.csv"
134+ )
135+ LOGGER .info (__doc__ )
136+ LOGGER .info (f"data file: { file_path .replace (PATHS ['repo' ], '.' )} " )
137+ data = pd .read_csv (file_path )
138+
139+ plt .figure (figsize = (10 , 5 ))
140+ y_column = "CC legal tool product"
141+ ax = sns .barplot (
142+ data ,
143+ x = "Count" ,
144+ y = y_column ,
145+ hue = y_column ,
146+ palette = "pastel" ,
147+ legend = False ,
148+ )
149+ for index , row in data .iterrows ():
150+ ax .annotate (
151+ f"{ row ['Count' ]:>15,d} " ,
152+ (0 + 80 , index ),
153+ xycoords = ("axes points" , "data" ),
154+ color = "black" ,
155+ fontsize = "x-small" ,
156+ horizontalalignment = "right" ,
157+ verticalalignment = "center" ,
158+ )
159+ plt .title (f"Totals by product ({ args .quarter } )" )
160+ plt .ylabel ("Creative Commons (CC) legal tool product" )
161+ plt .xscale ("log" )
162+ plt .xlabel ("Number of references" )
163+
164+ # Use the millions formatter for x-axis
165+ def millions_formatter (x , pos ):
166+ """
167+ The two args are the value and tick position
168+ """
169+ return f"{ x * 1e-6 :,.0f} M"
170+
171+ ax .xaxis .set_major_formatter (ticker .FuncFormatter (millions_formatter ))
172+
173+ plt .tight_layout ()
174+ if args .show_plots :
175+ plt .show ()
176+
177+ image_path = shared .path_join (
178+ PATHS ["data_phase" ], "gcs_totals_by_product.png"
179+ )
180+ LOGGER .info (f"image file: { image_path .replace (PATHS ['repo' ], '.' )} " )
181+
182+ if args .enable_save :
183+ # Create the directory if it does not exist
184+ os .makedirs (PATHS ["data_phase" ], exist_ok = True )
185+ plt .savefig (image_path )
186+
187+ shared .update_readme (
188+ PATHS ,
189+ image_path ,
190+ "Google Custom Search" ,
191+ "Bar chart showing how many documents there are for each Creative"
192+ " Commons (CC) legal tool. **There are a total of"
193+ f" { data ['Count' ].sum ():,d} documents that are either CC licensed"
194+ " or put in the public domain using a CC legal tool.**" ,
195+ "Totals by product" ,
196+ args ,
197+ )
198+
199+ LOGGER .info ("Visualization by license type created." )
200+
201+
202+ # def plot_by_country(data, args):
129203# """
130204# Create a bar chart for the number of webpages licensed by country.
131205# """
@@ -202,7 +276,7 @@ def millions_formatter(x, pos):
202276# LOGGER.info("Visualization by country created.")
203277#
204278#
205- # def visualize_by_language (data, args):
279+ # def plot_by_language (data, args)data/2024Q4/README.md :
206280# """
207281# Create a bar chart for the number of webpages licensed by language.
208282# """
@@ -286,9 +360,10 @@ def main():
286360 shared .log_paths (LOGGER , PATHS )
287361 shared .git_fetch_and_merge (args , PATHS ["repo" ])
288362
289- # visualize_by_country(data, args)
290- visualize_top_25_tools (args )
291- # visualize_by_language(data, args)
363+ plot_top_25_tools (args )
364+ plot_totals_by_product (args )
365+ # plot_by_country(data, args)
366+ # plot_by_language(data, args)
292367
293368 args = shared .git_add_and_commit (
294369 args ,
0 commit comments