@@ -234,16 +234,17 @@ set.seed(1)
234
234
# load data
235
235
cancer <- read_csv("data/unscaled_wdbc.csv") |>
236
236
# convert the character Class variable to the factor datatype
237
- mutate(Class = as_factor(Class))
237
+ mutate(Class = as_factor(Class)) |>
238
+ # rename the factor values to be more readable
239
+ mutate(Class = fct_recode(Class, "Malignant" = "M", "Benign" = "B"))
238
240
239
241
# create scatter plot of tumor cell concavity versus smoothness,
240
242
# labeling the points be diagnosis class
241
243
perim_concav <- cancer |>
242
244
ggplot(aes(x = Smoothness, y = Concavity, color = Class)) +
243
245
geom_point(alpha = 0.5) +
244
246
labs(color = "Diagnosis") +
245
- scale_color_manual(labels = c("Malignant", "Benign"),
246
- values = c("orange2", "steelblue2")) +
247
+ scale_color_manual(values = c("orange2", "steelblue2")) +
247
248
theme(text = element_text(size = 12))
248
249
249
250
perim_concav
@@ -268,7 +269,7 @@ in the data does not influence the data that ends up in the training and testing
268
269
Second, it ** stratifies** the \index{stratification} data by the class label, to ensure that roughly
269
270
the same proportion of each class ends up in both the training and testing sets. For example,
270
271
in our data set, roughly 63% of the
271
- observations are from the benign class ( ` B ` ) , and 37% are from the malignant class ( ` M ` ) ,
272
+ observations are from the benign class, and 37% are from the malignant class,
272
273
so ` initial_split ` ensures that roughly 63% of the training data are benign,
273
274
37% of the training data are malignant,
274
275
and the same proportions exist in the testing data.
@@ -958,8 +959,7 @@ for (i in 1:length(ks)) {
958
959
size = 5.) +
959
960
labs(color = "Diagnosis") +
960
961
ggtitle(paste("K = ", ks[[i]])) +
961
- scale_color_manual(labels = c("Malignant", "Benign"),
962
- values = c("orange2", "steelblue2")) +
962
+ scale_color_manual(values = c("orange2", "steelblue2")) +
963
963
theme(text = element_text(size = 18), axis.title=element_text(size=18))
964
964
}
965
965
0 commit comments