Skip to content

Commit 071cefa

Browse files
committed
Add linear regression to data science example using kixi.stats
1 parent 6abc223 commit 071cefa

File tree

1 file changed

+33
-12
lines changed

1 file changed

+33
-12
lines changed

notebooks/data_science.clj

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
;; # A small data science example 🔢
22
^{:nextjournal.clerk/visibility {:code :hide}}
33
(ns data-science
4-
(:require [clojure.string :as str]
5-
[clojure.set :refer [join rename-keys project]]
4+
(:require [clojure.set :refer [join rename-keys project]]
5+
[clojure.string :as str]
6+
[dk.ative.docjure.spreadsheet :as ss]
7+
[kixi.stats.core :as kixi-stats]
8+
[kixi.stats.protocols :as kixi-p]
69
[meta-csv.core :as csv]
7-
[next.jdbc.sql :as sql]
810
[next.jdbc :as jdbc]
911
[next.jdbc.result-set :as rs]
10-
[dk.ative.docjure.spreadsheet :as ss]
12+
[next.jdbc.sql :as sql]
1113
[nextjournal.clerk :as clerk]))
1214

1315
;; # Exploring the world in data
@@ -179,21 +181,40 @@
179181

180182
(clerk/table world-happiness)
181183

184+
;; Next, we're computing a linear regression for this dataset using [kixi.stats](https://github.com/MastodonC/kixi.stats).
185+
^{::clerk/viewer kixi-p/parameters}
186+
(def linear-regression
187+
(transduce identity (kixi-stats/simple-linear-regression :score :gdp) world-happiness))
188+
189+
;; We'll use this linear regression to augment out dataset so each datapoint also gets a `:regression` value.
190+
(def world-happiness+regression
191+
(mapv (fn [{:as datapoint :keys [score]}]
192+
(assoc datapoint :regression (kixi-p/measure linear-regression score)))
193+
world-happiness))
194+
182195
;; Let's graph the relationship between happiness and GDP to get a
183196
;; bird's eye view on the situation over our entire dataset. You can
184197
;; mouse over individual data points to get more info:
185198

199+
^{:nextjournal.clerk/visibility {:code :hide :result :show}}
186200
(clerk/vl
187-
{:data {:values world-happiness}
201+
{:data {:values world-happiness+regression}
188202
:width 700
189203
:height 500
190-
:mark {:type "point"
191-
:tooltip {:field :country}}
192-
:encoding {:x {:field :score
193-
:type :quantitative
194-
:scale {:zero false}}
195-
:y {:field :gdp
196-
:type :quantitative}}})
204+
:layer [{:mark {:type "point"
205+
:tooltip {:field :country}}
206+
:encoding {:x {:field :score
207+
:type :quantitative
208+
:scale {:zero false}}
209+
:y {:field :gdp
210+
:type :quantitative}}}
211+
{:mark {:type "line" :color "#ccc"}
212+
:encoding {:x {:field :score
213+
:type :quantitative
214+
:scale {:zero false}}
215+
:y {:field :regression
216+
:type :quantitative}}}]})
217+
197218

198219
;; It looks, as we might have expected, like richer countries are
199220
;; happier than poor ones in general, though with variations and

0 commit comments

Comments
 (0)