-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathdata-filtering.rkt
More file actions
258 lines (214 loc) · 8.81 KB
/
data-filtering.rkt
File metadata and controls
258 lines (214 loc) · 8.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
#lang racket
(require math/statistics)
(provide total count average standard-deviation min max filter make-linear-regression)
(provide petal-width sepal-length sepal-width petal-length class)
(provide same-class)
(provide remove-last)
(provide filter-last-csv)
(provide str-to-num-lst strlst-to-numlsts)
(provide merge-lists)
(provide append-last)
(provide string-to-number)
(provide remove-last-col)
;;used for data abstractions
;;Data abstractions of iris dataset
(define petal-width
(lambda (x)
(if (eqv? x 'name)
"Petal Width"
(car x))))
(define sepal-length
(lambda (x)
(if (eqv? x 'name)
"Sepal Length"
(car (cdr x)))))
(define sepal-width
(lambda (x)
(if (eqv? x 'name)
"Sepal Width"
(car (cdr (cdr x))))))
(define petal-length
(lambda (x)
(if (eqv? x 'name)
"Petal Length"
(car (cdr (cdr (cdr x)))))))
(define class
(lambda (x)
(if (eqv? x 'name)
"Class"
(car (cdr (cdr (cdr (cdr x))))))))
;; data helper functions
(define (same-class class1 class2)
(string=? class1 class2))
;; function to remove last element from list, needed to remove class from iris csv to form an array
;; and to remove the last element
(define (remove-last lst)
(reverse (cdr (reverse lst))))
;; function that takes a list of lists created from a csv file, will remove the last list in the
;; first level and final element of all other lists
(define (filter-last-csv lst-of-lsts)
(define remove-last-lst (remove-last lst-of-lsts))
(map (lambda (x) (remove-last x)) remove-last-lst))
;; similar to csv only just drops the column, no newline list at the end of a csv
(define (remove-last-col lst-of-lsts)
(map (lambda (x) (remove-last x)) lst-of-lsts))
;; helper function to append the class column back onto a list after computing the clustering
;; takes a list and and element and appends it to the end
(define (append-last lst elm)
(reverse (append (list elm) (reverse lst))))
;; converts a list of strings to numbers recursively
(define (str-to-num-lst items)
(if (null? items)
'()
(cons (string->number (car items))
(str-to-num-lst (cdr items)))))
;; converts a list of lists of strings to numbers using a mapping of the str-to-num-lst function
(define (strlst-to-numlsts lst-of-str)
(map (lambda (x) (str-to-num-lst x)) lst-of-str))
;; Data manipulation functions
;; (total param data class) → number?
;; data: list
;; param: procedure (should be one of the data abstractions
;; class: string
;; usage:
;; > (total (remove-last iris-raw) petal-width)
;; > (total (remove-last iris-raw) petal-width "Iris-virginica")
(define total
;; create a lambda to have an optional argument
(lambda (data parm [class-t "none"])
(if (same-class class-t "none")
(foldr (lambda (x y) (+ (string->number (parm x)) y)) 0 data)
;; filter then get the average
(foldr (lambda (x y) (+ (string->number (parm x)) y)) 0
(foldr (lambda (x y) (if (same-class (class x) class-t) (cons x y) y)) '() data)))))
;; (count data class) → number?
;; data: list
;; class: string
;; usage:
;; > (count (remove-last iris-raw))
;; > (count (remove-last iris-raw) "Iris-virginica")
(define count
;; create a lambda to have an optional argument
(lambda (data [class-t "none"])
(if (same-class class-t "none")
(foldr (lambda (x y) (+ 1 y)) 0 data)
;; filter then get the average
(foldr (lambda (x y) (+ 1 y)) 0
(foldr (lambda (x y) (if (same-class (class x) class-t) (cons x y) y)) '() data)))))
;; (average param data class) → number?
;; data: list
;; param: procedure (should be one of the data abstractions
;; class: string
;; usage:
;; > (average (remove-last iris-raw) petal-width)
;; > (average (remove-last iris-raw) petal-width "Iris-virginica")
(define average
;; create a lambda to have an optional argument
(lambda (data parm [class-t "none"])
(/ (total data parm class-t) (count data class-t))))
;; (standard-deviation param data class) → number?
;; data: list
;; param: procedure (should be one of the data abstractions)
;; class: string
;; usage:
;; > (standard-deviation (remove-last iris-raw) petal-width)
;; > (standard-deviation(remove-last iris-raw) petal-width "Iris-virginica")
;;(define standard-deviation
;; (lambda (data parm [class-t "none"])
;; (sqrt (/ (foldr
;; (lambda (x y)
;; (+ (expt (- (string->number (parm x)) (average data parm class-t)) 2) y)) 0 data)
;; (total data parm class-t)))))
;;(define standard-deviation
;; (lambda (data parm [class-t "none"])
;; (let* ([total-points (total data parm class-t)]
;; [mean (average data parm class-t)])
;; (sqrt (/ (foldr + 0 (map (lambda (x)
;; (expt (- (string->number (parm x)) mean) 2))
;; data))total-points)))))
(define standard-deviation
(lambda (data parm [class-t "none"])
(stddev (map (lambda (x) (string->number (parm x))) data))))
;; (sqrt (/ (foldr + 0 (map (lambda (x)
;; (expt (- (string->number (parm x)) mean) 2))
;; data))total-points)))))
;; min data parm class-t) → number?
;; data: list
;; parm: procedure (should be one of the data abstractions)
;; class: string
(define min
(lambda (data parm [class-t "none"])
(if (same-class class-t "none")
(foldr (lambda (x y) (if (< (string->number (parm x)) y)
(string->number (parm x))
y)) 9999999999 data)
;;filter then get the average
(foldr (lambda (x y) (if (< (string->number (parm x)) y)
(string->number (parm x))
y)) 9999999999
(foldr (lambda (x y) (if (same-class (class x) class-t) (cons x y) y)) '() data)))))
;; (max data parm class-t) → number?
;; data: list
;; parm: procedure (should be one of the data abstractions)
;; class: string
(define max
(lambda (data parm [class-t "none"])
(if (same-class class-t "none")
(foldr (lambda (x y) (if (> (string->number (parm x)) y) (string->number (parm x)) y)) 0 data)
;;filter then get the average
(foldr (lambda (x y) (if (> (string->number (parm x)) y)(string->number (parm x)) y)) 0
(foldr (lambda (x y) (if (same-class (class x) class-t) (cons x y) y)) '() data)))))
(define (string-to-number arg)
(if (number? arg)
arg
(string->number arg)))
(define filter
(lambda (data parm expr [class-t "none"])
(if (same-class class-t "none")
(foldr (lambda (x y) (if (expr (string-to-number (parm x))) (cons x y) y)) '() data)
;;filter then get the average
(foldr (lambda (x y) (if (expr (string-to-number (parm x))) (cons x y) y)) '()
(foldr (lambda (x y) (if (same-class (class x) class-t) (cons x y) y)) '() data)))))
;;Calculates the alpha and the beta for the linear reggresion
;; x-points -> list of numbers
;; y-points -> list of numbers
;; returns -> a list of two elments the beta and the alpha
(define (calc-linear-regression x-points y-points)
;;takes two list an create a new list with each element being
;; a pair of the nth element of that list
(define (mergelist x y)
(if (or (null? x) (null? y))
'()
(cons (list (car x) (car y))
(mergelist (cdr x) (cdr y)))))
(let* ([x-mean (/ (foldr + 0 x-points) (length x-points))]
[y-mean (/ (foldr + 0 y-points) (length y-points))]
[numerator (foldr
(lambda(x y) (+ (* (- (car x) x-mean) (- (car (cdr x)) y-mean))))
0
(mergelist x-points y-points))]
[denominator (foldr (lambda(x y) (+ (expt (- x y-mean) 2))) 0 x-points)]
[slope (/ numerator denominator)]
[intercept (- y-mean (* slope x-mean))])
(list slope intercept)))
;; takes a data set an extracts the two paramters passed to the function
;; also turns the strings into numbers
;; data -> list cotainin all the data from an entry in the database
;; col1 -> the first parameter to exract
;; col2 -> the second parameter to exract
(define (make-linear-regression data-set col1 col2)
(calc-linear-regression (foldr
(lambda(x y) (cons (string->number (col1 x)) y))
'() data-set)
(foldr
(lambda(x y) (cons (string->number (col2 x)) y))
'()
data-set)
))
;(foldr (lambda(x y) (cons (col1 x) y)) '() data-set))
;data-set)
;; function that flattens a list of lists into a single lists
(define (merge-lists list-of-lists)
(if (null? list-of-lists)
'()
(append (car list-of-lists) (merge-lists (cdr list-of-lists)))))