|
5 | 5 | (require math/array) |
6 | 6 | (require math/matrix) |
7 | 7 | (require plot) |
| 8 | +(require db) |
| 9 | + |
| 10 | +;; db connection |
| 11 | +(define conn (sqlite3-connect #:database "my.db")) |
8 | 12 |
|
9 | 13 | ; require custom functions |
10 | 14 | (require "graphs.rkt") |
|
53 | 57 | ; calculate mean vector (mean of z) (might not work on small numbers?) |
54 | 58 | (define mean-vector (matrix-mean z 0)) |
55 | 59 |
|
56 | | -; create N X N matrix of containing covariance of all properties |
57 | | -(define co-variance-matrix |
58 | | - (array/ |
59 | | - (array* (array-axis-swap (array- z mean-vector) 1 0) (array- z mean-vector)) |
60 | | - (array (- (vector-ref (array-shape z) 0) 1)))) |
| 60 | +; create N X N matrix of containing covariance of all properties, need to figure out how to |
| 61 | +; multiply matrices of different dimensions in racket, hardcoded covariance matrix to define it for |
| 62 | +; now, should be cov[x, y] = ∑i (xi – E[x]) (yi – E[y]) / (n-1) or (array z - mean vector transposed) |
| 63 | +; multiplied by (z - mean vector) all divided by N - 1 (149 in this case) |
| 64 | +(define iris-co-variance-matrix |
| 65 | +; (array/ |
| 66 | +; (array* (array-axis-swap (array- z mean-vector) 1 0) (array- z mean-vector)) |
| 67 | +; (array (- (vector-ref (array-shape z) 0) 1)))) |
| 68 | + (array #[#[1.00671141 -0.11010327 0.87760486 0.82344326] |
| 69 | + #[-0.11010327 1.00671141 -0.42333835 -0.358937] |
| 70 | + #[0.87760486 -0.42333835 1.00671141 0.96921855] |
| 71 | + #[0.82344326 -0.358937 0.96921855 1.00671141]])) |
| 72 | + |
61 | 73 |
|
62 | 74 | ; calculate eigenvectors and eigenvalues |
63 | | -(define eigenvalues 0) |
64 | | -(define eigenvectors 0) |
| 75 | +(define iris-eigenvalues |
| 76 | + (array #[2.93035378 0.92740362 0.14834223 0.02074601])) |
| 77 | + |
| 78 | +(define iris-eigenvectors |
| 79 | + (array #[#[0.52237162 -0.37231836 -0.72101681 0.26199559] |
| 80 | + #[-0.26335492 -0.92555649 0.24203288 -0.12413481] |
| 81 | + #[0.58125401 -0.02109478 0.14089226 -0.80115427] |
| 82 | + #[0.56561105 -0.06541577 0.6338014 0.52354627]])) |
65 | 83 |
|
66 | 84 | ; use eigenvectors with the 2 or 3 highest eigenvalues to create projection matrix |
67 | | - |
68 | | -; take dot product of z and projection matrix |
| 85 | +; this funciton takes two arguments, the first is an array of eigenvectors, the second is the number |
| 86 | +; number of dimentions for the new array |
| 87 | +; want to define a function here that takes an array and only keeps the n first number of columns |
| 88 | +; like 3x3 array and 2 will remove the last column from it |
| 89 | +(define (projection-matrix eigenvectors dim) 0) |
| 90 | +; (define (iter dim count) |
| 91 | +; (if (> count dim) |
| 92 | +; eigenvectors |
| 93 | +; ((map (lambda (x) (remove-last x)) eigenvectors)))) |
| 94 | +; (iter dim 1)) |
| 95 | + |
| 96 | +; take the first 2 or 3 columns from the corresponding eigenvector/value pairs |
| 97 | +(define iris-projection-matrix |
| 98 | + (array #[#[0.52237162 -0.37231836 -0.72101681] |
| 99 | + #[-0.26335492 -0.92555649 0.24203288] |
| 100 | + #[0.58125401 -0.02109478 0.14089226] |
| 101 | + #[0.56561105 -0.06541577 0.6338014]])) |
69 | 102 |
|
70 | 103 | ; plot with result of dot product |
71 | | - |
| 104 | +; z mulitplied by the projection-matrix |
| 105 | +(define iris-pca |
| 106 | + 0) |
| 107 | +; placeholder for actual graph once I figure out matrix muliplication and the eigenvectors/values |
| 108 | +(define pca1 (query-rows |
| 109 | + conn |
| 110 | + "select pc1, pc2, pc3 from iris_pca where class = 'Iris-setosa'")) |
| 111 | + |
| 112 | +(define pca2 (query-rows |
| 113 | + conn |
| 114 | + "select pc1, pc2, pc3 from iris_pca where class = 'Iris-versicolor'")) |
| 115 | + |
| 116 | +(define pca3 (query-rows |
| 117 | + conn |
| 118 | + "select pc1, pc2, pc3 from iris_pca where class = 'Iris-virginica'")) |
| 119 | + |
| 120 | +; pca of iris dataset |
| 121 | +(plot3d (list (points3d pca1 #:sym 'dot #:size 20 #:color 1) |
| 122 | + (points3d pca2 #:sym 'dot #:size 20 #:color 2) |
| 123 | + (points3d pca3 #:sym 'dot #:size 20 #:color 3)) |
| 124 | + #:altitude 25 |
| 125 | + #:title "3D PCA of iris dataset") |
| 126 | + |
| 127 | +(disconnect conn) |
72 | 128 |
|
73 | 129 | ; quick test of 3d plot |
74 | 130 | ;(plot3d (points3d (array->list* iris-array)) |
|
0 commit comments