|
1 | 1 | ;; # A small data science example 🔢
|
2 | 2 | ^{:nextjournal.clerk/visibility {:code :hide}}
|
3 | 3 | (ns data-science
|
4 |
| - (:require [clojure.string :as str] |
5 |
| - [clojure.set :refer [join rename-keys project]] |
| 4 | + (:require [clojure.set :refer [join rename-keys project]] |
| 5 | + [clojure.string :as str] |
| 6 | + [dk.ative.docjure.spreadsheet :as ss] |
| 7 | + [kixi.stats.core :as kixi-stats] |
| 8 | + [kixi.stats.protocols :as kixi-p] |
6 | 9 | [meta-csv.core :as csv]
|
7 |
| - [next.jdbc.sql :as sql] |
8 | 10 | [next.jdbc :as jdbc]
|
9 | 11 | [next.jdbc.result-set :as rs]
|
10 |
| - [dk.ative.docjure.spreadsheet :as ss] |
| 12 | + [next.jdbc.sql :as sql] |
11 | 13 | [nextjournal.clerk :as clerk]))
|
12 | 14 |
|
13 | 15 | ;; # Exploring the world in data
|
|
179 | 181 |
|
180 | 182 | (clerk/table world-happiness)
|
181 | 183 |
|
| 184 | +;; Next, we're computing a linear regression for this dataset using [kixi.stats](https://github.com/MastodonC/kixi.stats). |
| 185 | +^{::clerk/viewer kixi-p/parameters} |
| 186 | +(def linear-regression |
| 187 | + (transduce identity (kixi-stats/simple-linear-regression :score :gdp) world-happiness)) |
| 188 | + |
| 189 | +;; We'll use this linear regression to augment out dataset so each datapoint also gets a `:regression` value. |
| 190 | +(def world-happiness+regression |
| 191 | + (mapv (fn [{:as datapoint :keys [score]}] |
| 192 | + (assoc datapoint :regression (kixi-p/measure linear-regression score))) |
| 193 | + world-happiness)) |
| 194 | + |
182 | 195 | ;; Let's graph the relationship between happiness and GDP to get a
|
183 | 196 | ;; bird's eye view on the situation over our entire dataset. You can
|
184 | 197 | ;; mouse over individual data points to get more info:
|
185 | 198 |
|
| 199 | +^{:nextjournal.clerk/visibility {:code :hide :result :show}} |
186 | 200 | (clerk/vl
|
187 |
| - {:data {:values world-happiness} |
| 201 | + {:data {:values world-happiness+regression} |
188 | 202 | :width 700
|
189 | 203 | :height 500
|
190 |
| - :mark {:type "point" |
191 |
| - :tooltip {:field :country}} |
192 |
| - :encoding {:x {:field :score |
193 |
| - :type :quantitative |
194 |
| - :scale {:zero false}} |
195 |
| - :y {:field :gdp |
196 |
| - :type :quantitative}}}) |
| 204 | + :layer [{:mark {:type "point" |
| 205 | + :tooltip {:field :country}} |
| 206 | + :encoding {:x {:field :score |
| 207 | + :type :quantitative |
| 208 | + :scale {:zero false}} |
| 209 | + :y {:field :gdp |
| 210 | + :type :quantitative}}} |
| 211 | + {:mark {:type "line" :color "#ccc"} |
| 212 | + :encoding {:x {:field :score |
| 213 | + :type :quantitative |
| 214 | + :scale {:zero false}} |
| 215 | + :y {:field :regression |
| 216 | + :type :quantitative}}}]}) |
| 217 | + |
197 | 218 |
|
198 | 219 | ;; It looks, as we might have expected, like richer countries are
|
199 | 220 | ;; happier than poor ones in general, though with variations and
|
|
0 commit comments