-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathmodel.Rmd
More file actions
72 lines (62 loc) · 1.9 KB
/
model.Rmd
File metadata and controls
72 lines (62 loc) · 1.9 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
---
title: "R Notebook"
output: html_notebook
---
```{r}
library(lubridate)
library(ggplot2)
library(dplyr)
library(stringr)
library(caret)
library(rpart)
library(rattle)
library(ROSE)
library(ROCR)
library(MASS)
library(ipred)
library(plyr)
library(rpart.plot)
```
```{r}
mydata <- read.csv("Sample1.csv")
dim(mydata)
```
```{r}
levels(mydata$DefaultedLoans)
table(mydata$DefaultedLoans)
```
```{r}
table(mydata$DefaultedLoans, mydata$CreditRating)
ggplot(mydata, aes(x = InterestRate))+ geom_histogram(aes(fill = CreditRating)) + facet_wrap(~DefaultedLoans, ncol = 1)
```
```{r}
index = createDataPartition(y = mydata$DefaultedLoans, p = 0.90)[[1]]
loans.sample <- mydata[-index,]
ggplot(loans.sample, aes(x = LoanBalance, y = InterestRate)) + geom_point(aes(color = MortgageYears))
```
```{r}
index = createDataPartition(y = mydata$DefaultedLoans, p = 0.8)[[1]]
mydata.test <- mydata[-index,]
mydata.train <- mydata[index,]
mydata.rpart.0 <- rpart(DefaultedLoans ~ ., data = mydata.train)
mydata.rpart.1 <- rpart(DefaultedLoans ~ . , data = mydata.train,
control=rpart.control(minsplit=10, minbucket = 3, cp=0.0006))
fancyRpartPlot(mydata.rpart.1)
```
```{r}
predictions.1 <- (predict(mydata.rpart.1, mydata.test, type = "class"))
confusionMatrix(predictions.1, mydata.test$DefaultedLoans)
```
```{r}
roc.curve(mydata.test$DefaultedLoans, predict(mydata.rpart.1, mydata.test, type = "prob")[,1], plot = TRUE)
```
```{r}
mydata.oversampled <- ovun.sample(DefaultedLoans ~ ., data = mydata.train, method = "over", N = 52410, seed = 13)$data
table(mydata.oversampled$DefaultedLoans)
```
```{r}
tune <- data.frame(0.001)
colnames(tune) <- "cp"
tr_control <- trainControl(method = "cv",number = 10, verboseIter = TRUE)
mydata.rpart.oversampled <- train(DefaultedLoans ~., data = mydata.oversampled, method = "rpart", trControl = tr_control, tuneGrid = tune, control=rpart.control(minsplit=10, minbucket = 3))
```