-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathpca.rkt
More file actions
147 lines (112 loc) · 5.38 KB
/
pca.rkt
File metadata and controls
147 lines (112 loc) · 5.38 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
#lang racket
(require net/url)
(require csv-reading)
(require math/array)
(require math/matrix)
(require plot)
;; require custom functions
(require "graphs.rkt")
(require "data-filtering.rkt")
;; define list of iris data directly from url
(define iris-url "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data")
(define iris-raw ((compose csv->list get-pure-port string->url) iris-url))
;; converts iris csv to a list with just numbers (stored as string), drops the last column in a list
;; of lists
(define iris-raw-num-str (filter-last-csv iris-raw))
;; create a mutable NxM array of the iris dataset
(define iris-array (list*->array (strlst-to-numlsts iris-raw-num-str) number?))
;; calculate mean and standard deviation or iris
;; define an array with the sum of each column
(define iris-sum (array-axis-sum iris-array 0))
;; function that takes one argument, a 2d array (matrix) and returns the mean of every column
(define (matrix-mean matrix)
(array/ (array-axis-sum matrix 0) (array (vector-ref (array-shape matrix) 0))))
(define iris-mean (matrix-mean iris-array))
;; function that takes one argument, a 2d array (matrix) and returns the std of every column
(define (matrix-std matrix)
(array-map sqrt (array/
(array-axis-sum (array-map sqr (array- matrix (matrix-mean matrix))) 0)
(array (vector-ref (array-shape matrix) 0))
)))
(define iris-std (matrix-std iris-array))
;; function that takes a NXM array and standardizes the values (z = (x - mean) / std)
(define (standardize-matrix matrix)
(array/ (array- matrix (matrix-mean matrix)) (matrix-std matrix)))
;; define standardized iris
(define z (standardize-matrix iris-array))
;; calculate mean vector (mean of z)
(define mean-vector (matrix-mean z))
;; create N X N matrix of containing covariance of all properties, need to figure out how to
;; multiply matrices of different dimensions in racket, hardcoded covariance matrix to define it for
;; now, should be cov[x, y] = ∑i (xi – E[x]) (yi – E[y]) / (n-1) or
;; (array z - mean vector transposed) multiplied by (z - mean vector) all divided by
;; N - 1 (149 in this case)
(define iris-co-variance-matrix
(array/
(matrix* (array-axis-swap (array- z mean-vector) 1 0) (array- z mean-vector))
(array (- (vector-ref (array-shape z) 0) 1))))
;; calculate eigenvectors and eigenvalues from the covariance matrix (this part was done with python)
(define iris-eigenvalues
(array #[2.93035378 0.92740362 0.14834223 0.02074601]))
(define iris-eigenvectors
(array #[#[0.52237162 -0.37231836 -0.72101681 0.26199559]
#[-0.26335492 -0.92555649 0.24203288 -0.12413481]
#[0.58125401 -0.02109478 0.14089226 -0.80115427]
#[0.56561105 -0.06541577 0.6338014 0.52354627]]))
;; use eigenvectors with the 2 or 3 highest eigenvalues to create projection matrix
;; this function takes two arguments, the first is an array of eigenvectors, the second is the number
;; number of dimensions for the new array
;; want to define a function here that takes an array and only keeps the n first number of columns
;; like 3x3 array and 2 will remove the last column from it
;; take the first 2 or 3 columns from the corresponding eigenvector/value pairs
(define iris-projection-matrix
(list*->array (map (lambda (x) (remove-last x)) (array->list* iris-eigenvectors)) number?))
;; plot with result of dot product
;; z multiplied by the projection-matrix
(define iris-pca
(array->list* (matrix* z iris-projection-matrix)))
(define iris-pca-classes
(map (lambda (x y) (append-last x y)) iris-pca
(map (lambda (x) (class x)) (remove-last iris-raw))))
(define identity
(lambda (x)
x))
(define pca-class
(lambda (x)
(if (eqv? x 'name)
"Class"
(car (cdr (cdr (cdr x)))))))
(define filter-3c
(lambda (data parm expr [class-t "none"])
(if (same-class class-t "none")
(foldr (lambda (x y) (if (expr (string-to-number (parm x))) (cons x y) y)) '() data)
;;filter then get the average
(foldr (lambda (x y) (if (expr (string-to-number (parm x))) (cons x y) y)) '()
(foldr (lambda (x y) (if (same-class (pca-class x) class-t) (cons x y) y))
'() data)))))
(define pca1 (remove-last-col (filter-3c iris-pca-classes petal-width identity "Iris-setosa")))
(define pca2 (remove-last-col (filter-3c iris-pca-classes petal-width identity "Iris-versicolor")))
(define pca3 (remove-last-col (filter-3c iris-pca-classes petal-width identity "Iris-virginica")))
;; pca of iris dataset
(plot3d (list (points3d pca1 #:sym 'dot #:size 20 #:color 1 #:label "Iris-setosa")
(points3d pca2 #:sym 'dot #:size 20 #:color 2 #:label "Iris-versicolor")
(points3d pca3 #:sym 'dot #:size 20 #:color 3 #:label "Iris-virginica"))
#:altitude 25
#:title "3D PCA of iris dataset")
;; lambdas for filter
(define less-than
(lambda (y)
(lambda (x)
(< x y))))
(define greater-than
(lambda (y)
(lambda (x)
(> x y))))
(define equal-to
(lambda (y)
(lambda (x)
(= x y))))
;; Some test data set for plotting
(define Iris-virginica (filter (remove-last iris-raw) petal-width identity "Iris-virginica"))
(define Iris-versicolor (filter (remove-last iris-raw) petal-width identity "Iris-versicolor"))
(define Iris-setosa (filter (remove-last iris-raw) petal-width identity "Iris-setosa"))