|
7 | 7 | The motivation behind the `StatisticalGraphics` package is the desire to develop a powerful, yet easy-to-use solution for creating statistical graphics in Julia. |
8 | 8 | The package uses [`vega`](https://vega.github.io/vega/) (see also [D3](https://d3js.org/)) for producing the final outputs. |
9 | 9 |
|
10 | | - |
| 10 | +See [documentation](https://sl-solution.github.io/StatisticalGraphics.jl/stable/) for more information. |
11 | 11 |
|
12 | | -# Examples |
| 12 | +[](https://sl-solution.github.io/StatisticalGraphics.jl/stable/) |
13 | 13 |
|
14 | | -```julia |
15 | | -using StatisticalGraphics |
16 | | -using InMemoryDatasets |
17 | | -ds = Dataset(x=1:100, y=rand(100), y2=rand(100) .+ 5, group=rand(1:2, 100)); |
18 | | -sgplot( |
19 | | - ds, |
20 | | - [ |
21 | | - Line(x=:x, y=:y2, y2axis=true, group=:group), |
22 | | - Scatter(x=:x, y=:y2, group=:group) |
23 | | - ], |
24 | | - nominal = [:group], |
25 | | - xaxis = Axis(grid=true), |
26 | | - yaxis = Axis(grid=true) |
27 | | - ) |
28 | | -``` |
29 | | - |
30 | | - |
31 | | - |
32 | | -**Histogram** |
33 | | - |
34 | | -Histogram of a column overlaid by kde and fitted normal distribution. |
35 | | - |
36 | | -```julia |
37 | | -ds = Dataset(x=randn(100)); |
38 | | -sgplot( |
39 | | - ds, |
40 | | - [ |
41 | | - Histogram(x=:x, color=:steelblue, outlinethickness=0.5, space=0.5), |
42 | | - Density(x=:x, type=:kernel, color=:red, fillopacity=0.3), |
43 | | - Density(x=:x, color=:green, fillopacity=0.3) |
44 | | - ], |
45 | | - xaxis = Axis(offset=10, domain=false), |
46 | | - yaxis = Axis(offset=10, domain=false, grid=true) |
47 | | - ) |
48 | | -``` |
49 | | - |
50 | | - |
51 | | - |
52 | | -**Iris Data** |
53 | | - |
54 | | -```julia |
55 | | -iris = filereader("assets/iris.csv") |
56 | | -colors = [:steelblue, :darkorange, :darkred, :green] |
57 | | -sgplot( |
58 | | - iris, |
59 | | - reduce(vcat, |
60 | | - [ |
61 | | - [ |
62 | | - Violin(x=i, category=5, color=:white), |
63 | | - BoxPlot(x=i, category=5, boxwidth=0.1, whiskerdash=[0]), |
64 | | - Scatter(x=i, y=5, jitter=[0,20], color=colors[i], outlinecolor=:white, opacity=0.5) |
65 | | - ] |
66 | | - for i in 1:4 |
67 | | - ] |
68 | | - ), |
69 | | - wallcolor=:lightgray, |
70 | | - xaxis=Axis(offset=10, grid=true, gridcolor=:white, values = -1:9, domain=false, ticksize=0), |
71 | | - yaxis=Axis(offset=10, padding=0.1, domain=false, ticksize=0), |
72 | | - groupcolormodel=colors, |
73 | | - font="Times" |
74 | | - ) |
75 | | -``` |
76 | | - |
77 | | - |
78 | | - |
79 | | -**unemployment stacked area plot across industries** |
80 | | - |
81 | | -Reproducing an example from the [`vega`](https://vega.github.io)`s examples collection. |
82 | | - |
83 | | -```julia |
84 | | -unemployment = filereader("assets/unemployment_across_industry.csv", types = Dict(2=>Date)) |
85 | | -sort!(unemployment, :series, rev=true) # keep alphabetical order |
86 | | -modify!(groupby(unemployment, :date), :count=>cumsum=>:cum_sum) |
87 | | -sort!(unemployment, [:date,:cum_sum], rev=[false,true]) # put the larger areas behind the smaller one |
88 | | - |
89 | | -sgplot( |
90 | | - unemployment, |
91 | | - Band(x=:date, lower=0.0, upper=:cum_sum, group=:series, opacity=1), |
92 | | - nominal = [:series], |
93 | | - xaxis=Axis(type=:time, nice=false), |
94 | | - yaxis=Axis(title=""), |
95 | | - groupcolormodel = Dict(:scheme=>"category20b"), |
96 | | - ) |
97 | | -``` |
98 | | - |
99 | | - |
100 | | - |
101 | | -# Examples - grouped datasets |
102 | | - |
103 | | -**Cars example** |
104 | | - |
105 | | -The following bar chart shows the average horsepower of different cars (bar categories) across different number of cylinders (panel). The color of each bar is computed based on the mean of acceleration inside each group of cars (bar categories) and the bars inside each panel are sorted by the maximum horsepower. |
106 | | - |
107 | | -```julia |
108 | | -cars = filereader("assets/cars.csv", types = Dict(9=>Date)) |
109 | | -make_fmt(x) = split(x)[1] |
110 | | -setformat!(cars, :Name => make_fmt) |
111 | | -sgplot( |
112 | | - groupby(cars, :Cylinders), |
113 | | - Bar(response=:Horsepower, x=:Name, |
114 | | - stat=IMD.mean, |
115 | | - colorresponse=:Acceleration, |
116 | | - colorstat=IMD.mean, |
117 | | - orderresponse=:Horsepower, |
118 | | - orderstat=IMD.maximum, |
119 | | - outlinethickness=0.5, |
120 | | - space=0, |
121 | | - colormodel=["#d53e4f", "#fc8d59", "#fee08b", "#e6f598", "#99d594"] |
122 | | - ), |
123 | | - |
124 | | - layout = :row, |
125 | | - columnspace = 5, |
126 | | - linkaxis=:y, |
127 | | - proportional=true, |
128 | | - |
129 | | - stepsize=15, |
130 | | - xaxis=Axis(title="Make", angle=-90, baseline=:middle, align=:right, ticksize=0, domain=false, titlepadding=20), |
131 | | - yaxis=Axis(title="Horsepower", domain=false), |
132 | | - |
133 | | - headercolname = false, |
134 | | - headersize=12, |
135 | | - headerfontweight=900, |
136 | | - |
137 | | - height=400, |
138 | | - ) |
139 | | -``` |
140 | | - |
141 | | - |
142 | | - |
143 | | -**usage** |
144 | | - |
145 | | -```julia |
146 | | -panel_example = Dataset(rand(1:4, 1000, 10), :auto) |
147 | | -sgplot( |
148 | | - gatherby(panel_example, [:x3, :x4]), |
149 | | - Bar(x=:x1, group=:x2), |
150 | | - nominal = [:x2], |
151 | | - layout = :lattice, |
152 | | - width = 100, |
153 | | - height = 100 |
154 | | - ) |
155 | | -``` |
156 | | - |
157 | | - |
158 | | - |
159 | | -**panel** |
160 | | - |
161 | | -```julia |
162 | | -sgplot( |
163 | | - groupby(panel_example, [:x5, :x6]), |
164 | | - Pie(category=:x7, |
165 | | - label=:both, |
166 | | - labelsize=8, |
167 | | - innerradius=0.4 |
168 | | - ), |
169 | | - width = 100, |
170 | | - height = 100, |
171 | | - columns=5, |
172 | | - legend=false |
173 | | - ) |
174 | | -``` |
175 | | - |
176 | | - |
177 | | - |
178 | | - |
179 | | -```julia |
180 | | -fun_example = Dataset(rand(1:4, 1000, 4), :auto) |
181 | | -sgplot( |
182 | | - gatherby(fun_example, [:x3, :x4]), |
183 | | - Bar(x=:x1, group=:x2, barcorner=15), |
184 | | - nominal = :x2, |
185 | | - layout = :lattice, |
186 | | - rowspace=5, |
187 | | - columnspace=5, |
188 | | - width = 100, |
189 | | - height = 100, |
190 | | - wallcolor=:lightgray, |
191 | | - showheaders = false, |
192 | | - xaxis=Axis(show=false), |
193 | | - yaxis=Axis(show=false), |
194 | | - legend=false, |
195 | | - clip=false |
196 | | - ) |
197 | | -``` |
198 | | - |
199 | | - |
200 | | - |
201 | | -**[U-District Cuisine Example](https://vega.github.io/vega/examples/u-district-cuisine/)** |
202 | | - |
203 | | -Reproducing an example from the [`vega`](https://vega.github.io)`s examples collection. |
204 | | - |
205 | | -```julia |
206 | | -udistrict = filereader("assets/udistrict.csv") |
207 | | -# contains some information - use to customise the appearance |
208 | | -udistrict_info = filereader("assets/udistrict_info.csv", quotechar='"') |
209 | | - |
210 | | -# order data |
211 | | -leftjoin!(udistrict, udistrict_info, on = :key) |
212 | | -sort!(udistrict, :order) |
213 | | - |
214 | | -# actual graph |
215 | | -sgplot( |
216 | | - gatherby(udistrict, :names), |
217 | | - |
218 | | - Density(x=:lat, type=:kernel, bw=0.0005, npoints=200, |
219 | | - scale=(x; samplesize, args...)->x .* samplesize, # to match the scale in the original example |
220 | | - |
221 | | - group=:names, |
222 | | - grouporder=:data, |
223 | | - |
224 | | - fillopacity=0.7, |
225 | | - color=:white |
226 | | - ), |
227 | | - yaxis=Axis(show=false), |
228 | | - xaxis=Axis(title="", |
229 | | - grid=true, |
230 | | - griddash=[2], |
231 | | - values=([47.6516, 47.655363, 47.6584, 47.6614, 47.664924, 47.668519], ["Boat St.", "40th St.", "42nd St.", "45th St.", "50th St.", "55th St."]) |
232 | | - ), |
233 | | - |
234 | | - layout=:column, |
235 | | - width=800, |
236 | | - height=70, |
237 | | - rowspace=-50, # to force overlaps |
238 | | - panelborder=false, |
239 | | - |
240 | | - headercolname=false, |
241 | | - headerangle=0, |
242 | | - headerloc=:start, |
243 | | - headeralign=:left, |
244 | | - |
245 | | - # set the font for the whole graph |
246 | | - font="Times", |
247 | | - italic=true, |
248 | | - fontweight=100, |
249 | | - |
250 | | - # change default colors |
251 | | - groupcolormodel=udistrict_info[:, :color], |
252 | | - |
253 | | - legend=false |
254 | | - ) |
255 | | -``` |
256 | | - |
257 | | - |
258 | | - |
259 | | -**Scatter plot with regression line** |
260 | | - |
261 | | -```julia |
262 | | -movies = filereader("assets/movies.csv", dlmstr="::") |
263 | | -using Chain |
264 | | -@chain movies begin |
265 | | - delete("Major Genre", by = contains("Concert"), missings=false) |
266 | | - groupby("Major Genre") |
267 | | - sgplot( |
268 | | - [ |
269 | | - Scatter(x="Rotten Tomatoes Rating", y="IMDB Rating", size=10), |
270 | | - Reg( |
271 | | - x="Rotten Tomatoes Rating", y="IMDB Rating", |
272 | | - degree=3, |
273 | | - clm=true, |
274 | | - ) |
275 | | - ], |
276 | | - xaxis=Axis(grid=true,gridcolor=:white), |
277 | | - yaxis=Axis(grid=true,gridcolor=:white), |
278 | | - height=200, |
279 | | - width=200, |
280 | | - columns=4, |
281 | | - columnspace=15, |
282 | | - rowspace=15, |
283 | | - headercolname=false, |
284 | | - headeroffset=-20, |
285 | | - headercolor=:white, |
286 | | - headersize=20, |
287 | | - headeritalic=true, |
288 | | - wallcolor=:lightgray, |
289 | | - clip=false |
290 | | - ) |
291 | | -end |
292 | | -``` |
293 | | - |
294 | | - |
295 | | - |
296 | | -**automatic labelling for Scatter and Bubble plots** |
297 | | - |
298 | | -```julia |
299 | | -using Chain |
300 | | -using DLMReader |
301 | | -nations = filereader("assets/nations.csv", emptycolname=true, quotechar='"') |
302 | | -@chain nations begin |
303 | | - sort([:population, :continent], rev=[true, false]); |
304 | | - filter(:year, by = ==(2010)); |
305 | | - sgplot( |
306 | | - Bubble(x=:gdpPercap, |
307 | | - y=:lifeExp, |
308 | | - colorresponse=:region, |
309 | | - colormodel=:category, |
310 | | - size=:population, |
311 | | - outlinecolor=:white, |
312 | | - labelresponse=:country, |
313 | | - labelsize=8, |
314 | | - labelcolor=:colorresponse, |
315 | | - maxsize=70, |
316 | | - tooltip=true |
317 | | - ), |
318 | | - clip=false, |
319 | | - xaxis=Axis(type=:log, nice=false), |
320 | | - ) |
321 | | -end |
322 | | -``` |
323 | | - |
324 | | - |
325 | | - |
326 | | -**Polygon example** |
327 | | - |
328 | | -```julia |
329 | | -using Chain |
330 | | -triangle(a, mul=[1,1,1]) = [(0.0, 0.0) .* mul[1], (sqrt(2 * a^2 - 2 * a^2 * cos(a)), 0.0) .* mul[2], ((a^2 - a^2 * cos(a)) / sqrt( |
331 | | - 2 * a^2 - 2 * a^2 * cos(a)), (a^2 * sin(a)) / sqrt(2 * a^2 - 2 * a^2 * cos(a))) .* mul[3]] |
332 | | -ds = Dataset(x=range(0.01, 3, step=0.091)) |
333 | | -@chain ds begin |
334 | | - modify!( |
335 | | - :x => byrow(x->x/10) => :opacity, |
336 | | - :x => byrow(triangle) => :t1, |
337 | | - :x => byrow(x->triangle(x, [(1,-1), (1,-1), (3.1,-1)])) => :t2 |
338 | | - ) |
339 | | - |
340 | | - flatten!(r"^t") |
341 | | - |
342 | | - modify!( |
343 | | - :t1 => splitter => [:x1, :y1], |
344 | | - :t2 => splitter => [:x2, :y2] |
345 | | - ) |
346 | | - sgplot( |
347 | | - [ |
348 | | - Polygon(x="x$i", y="y$i", |
349 | | - id=:x, |
350 | | - opacityresponse=:opacity, |
351 | | - color=:darkgreen, |
352 | | - outline=false) |
353 | | - for i in 1:2 |
354 | | - ], |
355 | | - height=200, |
356 | | - width=800, |
357 | | - xaxis=Axis(show=false), |
358 | | - yaxis=Axis(show=false) |
359 | | - ) |
360 | | -end |
361 | | -``` |
362 | | - |
363 | | - |
364 | | - |
365 | | -## the `sggrid` function |
366 | | - |
367 | | -```julia |
368 | | -ds = Dataset(x=randn(100), y=randn(100)); |
369 | | -h_x = sgplot(ds, Histogram(x=:x, space=0), xaxis=Axis(show=false), yaxis=Axis(show=false), height=200); |
370 | | -h_y = sgplot(ds, Histogram(y=:y, space=0), xaxis=Axis(show=false), yaxis=Axis(show=false), width=200); |
371 | | -xy = sgplot(ds, Scatter(x=:x, y=:y), xaxis=Axis(domain=false), yaxis=Axis(domain=false)); |
372 | | -sggrid(h_x, sggrid(xy, h_y), columns=1) |
373 | | -``` |
374 | | - |
375 | | - |
0 commit comments