Skip to content

Commit 6a7b5b2

Browse files
replace M/B with malignant/benign in clsfn2
1 parent 4a8b565 commit 6a7b5b2

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

source/classification2.Rmd

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -234,16 +234,17 @@ set.seed(1)
234234
# load data
235235
cancer <- read_csv("data/unscaled_wdbc.csv") |>
236236
# convert the character Class variable to the factor datatype
237-
mutate(Class = as_factor(Class))
237+
mutate(Class = as_factor(Class)) |>
238+
# rename the factor values to be more readable
239+
mutate(Class = fct_recode(Class, "Malignant" = "M", "Benign" = "B"))
238240
239241
# create scatter plot of tumor cell concavity versus smoothness,
240242
# labeling the points be diagnosis class
241243
perim_concav <- cancer |>
242244
ggplot(aes(x = Smoothness, y = Concavity, color = Class)) +
243245
geom_point(alpha = 0.5) +
244246
labs(color = "Diagnosis") +
245-
scale_color_manual(labels = c("Malignant", "Benign"),
246-
values = c("orange2", "steelblue2")) +
247+
scale_color_manual(values = c("orange2", "steelblue2")) +
247248
theme(text = element_text(size = 12))
248249
249250
perim_concav
@@ -268,7 +269,7 @@ in the data does not influence the data that ends up in the training and testing
268269
Second, it **stratifies** the \index{stratification} data by the class label, to ensure that roughly
269270
the same proportion of each class ends up in both the training and testing sets. For example,
270271
in our data set, roughly 63% of the
271-
observations are from the benign class (`B`), and 37% are from the malignant class (`M`),
272+
observations are from the benign class, and 37% are from the malignant class,
272273
so `initial_split` ensures that roughly 63% of the training data are benign,
273274
37% of the training data are malignant,
274275
and the same proportions exist in the testing data.
@@ -958,8 +959,7 @@ for (i in 1:length(ks)) {
958959
size = 5.) +
959960
labs(color = "Diagnosis") +
960961
ggtitle(paste("K = ", ks[[i]])) +
961-
scale_color_manual(labels = c("Malignant", "Benign"),
962-
values = c("orange2", "steelblue2")) +
962+
scale_color_manual(values = c("orange2", "steelblue2")) +
963963
theme(text = element_text(size = 18), axis.title=element_text(size=18))
964964
}
965965

0 commit comments

Comments
 (0)