diff --git a/.RData b/.RData index 5567909..fd1d1c7 100644 Binary files a/.RData and b/.RData differ diff --git a/.Rhistory b/.Rhistory index ea91c6f..fa112b9 100644 --- a/.Rhistory +++ b/.Rhistory @@ -1,512 +1,512 @@ -dm <- dim(GAX) -dm -colnames(GAX)[dm[2]] -colnames(GAX)[dm[2]] <- "Direction" -colnames(GAX) -issd <- "2013-01-01" -ised <- "2017-12-31" -ossd <- "2018-01-01" -osed <- "2018-11-20" -isrow <- which(index(GAX) >= issd & index(GAX) <= ised) -osrow <- which(index(GAX) >= ossd & index(GAX) <= osed) -isGAX <- GAX[isrow,] -osGAX <- GAX[osrow,] -#표준화# -isme <- apply(isGAX, 2, mean) -isstd <- apply(isGAX, 2, sd) -isidn <- matrix(1, dim(isGAX)[1], dim(isGAX)[2]) -norm_isGAX <- (isGAX - t(isme * t(isGAX))) / t(isstd * t(isidn)) -dm<-dim(isGAX) -norm_isGAX[,dm[2]] <- direction[isrow] -formula <- paste("Direction ~ .", sep ="") -model <-glm(formula, family = "binomial", norm_isGAX) -summary(model) -pred <- predict(model, norm_isGAX) -prob <- 1/(1+exp(-(pred))) -par(mflow = c(2,1)) -plot(pred,type = "l") -plot(prob, type = "l") -head(prob) -tail(prob) -pred_direction <- NULL -pred_direction[prob > 0.5] <- 1 -pred_direction[prob <= 0.5] <- 0 -pred_direction -#오차행렬에 집어넣을 때, pred_direction이랑 norm_isdji 간 객체 차 존재. Table로 집어넣어야 함, e1071패키지 설치 후 실행 -matrix <- confusionMatrix(table(pred_direction,norm_isGAX$Direction)) -matrix -#예측 정확도 94% -#정규화 -osidn <- matrix(1,dim(osGAX)[1], dim(osGAX)[2]) -norm_osGAX <- (osGAX - t(isme*t(osidn))) / t(isstd*t(osidn)) -dm <- dim(osGAX) -norm_osGAX[,dm[2]] <- direction[osrow] -#표본 외 데이터 값, 확률 -ospred <- predict(model, norm_osGAX) -osprob <- 1/(1+exp(-(ospred))) -ospred_direction <- NULL -ospred_direction[osprob >0.5]<-1 -ospred_direction[osprob<=0.5]<-0 -osmatrix <- confusionMatrix(table(ospred_direction, norm_osGAX$Direction)) -osmatrix -#84% accuracy, 트레이딩 비용, 시장 슬리피지 고려X, 다른 전략 X 오로지 예측 -#### 트레이딩 후 Cummulative return 구하는 코딩 -#로지스틱 CUM RET 구하는 법 -signal <- ifelse(ospred_direction == 1, 1, ifelse(ospred_direction == 0,-1,0)) -testrow <- which(index(GAX) >= ossd & index(GAX) <= osed) -GAX <- GOLD.AX$GOLD.AX.Close -ret <- GAX/lag(GAX) -1 -ret <- ret[testrow] -ret -cost <- 0 -length(signal) -length(ret) -dim(ret) -length(testrow) -trade_ret <- ret * Lag(signal) - cost -dim(ret) -cumm_ret <- Return.cumulative(trade_ret) -anual_ret <- Return.annualized(trade_ret) -charts.PerformanceSummary(trade_ret) -### Data ### -getSymbols("GOLD.AX", src = "yahoo") -GAX <- GOLD.AX -GAX <- GAX[,"GOLD.AX.Close"] -GAX <- na.omit(GAX) -#Varaible # -avg10 <- rollapply(GAX, 10, mean) -avg20 <- rollapply(GAX, 20, mean) -std10 <- rollapply(GAX,10,sd) -std20 <- rollapply(GAX,20,sd) -rsi5 <- RSI(GAX, 5, "SMA") -rsi14 <- RSI(GAX, 14, "SMA") -macd12269 <- MACD(GAX, 12, 26, 9, "SMA") -macd7205 <- MACD(GAX, 7, 20, 5, "SMA") -bbands <- BBands(GAX, 20, "SMA", 2) -direction <- NULL -direction[GAX > Lag(GAX, 20)] <- 1 -direction[GAX < Lag(GAX, 20)] <- 0 -GAX <- cbind(GAX, avg10, avg20, std10, std20, rsi5, rsi14, macd12269, macd7205, bbands, direction) -dm <- dim(GAX) -dm -colnames(GAX)[dm[2]] -colnames(GAX)[dm[2]] <- "Direction" -colnames(GAX) -issd <- "2013-01-01" -ised <- "2017-12-31" -ossd <- "2018-01-01" -osed <- "2018-11-20" -isrow <- which(index(GAX) >= issd & index(GAX) <= ised) -osrow <- which(index(GAX) >= ossd & index(GAX) <= osed) -isGAX <- GAX[isrow,] -osGAX <- GAX[osrow,] -#표준화# -isme <- apply(isGAX, 2, mean) -isstd <- apply(isGAX, 2, sd) -isidn <- matrix(1, dim(isGAX)[1], dim(isGAX)[2]) -norm_isGAX <- (isGAX - t(isme * t(isGAX))) / t(isstd * t(isidn)) -dm<-dim(isGAX) -norm_isGAX[,dm[2]] <- direction[isrow] -formula <- paste("Direction ~ .", sep ="") -model <-glm(formula, family = "binomial", norm_isGAX) -summary(model) -pred <- predict(model, norm_isGAX) -prob <- 1/(1+exp(-(pred))) -par(mflow = c(2,1)) -plot(pred,type = "l") -plot(prob, type = "l") -head(prob) -tail(prob) -pred_direction <- NULL -pred_direction[prob > 0.5] <- 1 -pred_direction[prob <= 0.5] <- 0 -pred_direction -#오차행렬에 집어넣을 때, pred_direction이랑 norm_isdji 간 객체 차 존재. Table로 집어넣어야 함, e1071패키지 설치 후 실행 -matrix <- confusionMatrix(table(pred_direction,norm_isGAX$Direction)) -matrix -#예측 정확도 94% -#정규화 -osidn <- matrix(1,dim(osGAX)[1], dim(osGAX)[2]) -norm_osGAX <- (osGAX - t(isme*t(osidn))) / t(isstd*t(osidn)) -dm <- dim(osGAX) -norm_osGAX[,dm[2]] <- direction[osrow] -#표본 외 데이터 값, 확률 -ospred <- predict(model, norm_osGAX) -osprob <- 1/(1+exp(-(ospred))) -ospred_direction <- NULL -ospred_direction[osprob >0.5]<-1 -ospred_direction[osprob<=0.5]<-0 -osmatrix <- confusionMatrix(table(ospred_direction, norm_osGAX$Direction)) -osmatrix -#84% accuracy, 트레이딩 비용, 시장 슬리피지 고려X, 다른 전략 X 오로지 예측 -#### 트레이딩 후 Cummulative return 구하는 코딩 -#로지스틱 CUM RET 구하는 법 -signal <- ifelse(ospred_direction == 1, 1, ifelse(ospred_direction == 0,-1,0)) -testrow <- which(index(GAX) >= ossd & index(GAX) <= osed) -GAX <- GOLD.AX$GOLD.AX.Close -ret <- GAX/lag(GAX) -1 -ret <- ret[testrow] -ret -cost <- 0 -length(signal) -length(ret) -dim(ret) -length(testrow) -trade_ret <- ret * Lag(signal) - cost -dim(ret) -cumm_ret <- Return.cumulative(trade_ret) -anual_ret <- Return.annualized(trade_ret) -charts.PerformanceSummary(trade_ret) -cumm_ret -anual_ret -### Neural Network Machine Learning ### -rm(list=ls()) -getSymbols("GOLD.AX", src = "yahoo") -GAX <- GOLD.AX[,"GOLD.AX.Close"] -GAX <- na.omit(GAX) -chartSeries(ClCl(GAX)) -plot(GAX) -#수익률 계산, 델타 함수가 안되어요 ㅜㅜ -ret <- GAX/lag(GAX) -1 # 중요함 -avg10 <- rollapply(GAX,10,mean) -avg20 <- rollapply(GAX,20,mean) -std10<-rollapply(GAX,10,sd) -std20<-rollapply(GAX,20,sd) -rsi5 <- RSI(GAX,5,"SMA") -rsi14<-RSI(GAX,14,"SMA") -macd12269 <- MACD(GAX,12,26,9,"SMA") -macd7205 <- MACD(GAX,7,20,5,"SMA") -bbands<-BBands(GAX,20,"SMA",2) -#지난 20일간 수익률 2% 이상이면 up, -2% down, 그사이 nowhere -direction <- data.frame(matrix(NA,dim(GAX)[1],1)) -#20일 수익률 -lagret <- (GAX - Lag(GAX,20)) / Lag(GAX,20) -direction[lagret > 0.02] <- "Up" -direction[lagret < -0.02] <- "Down" -direction[lagret < 0.02 & lagret > -0.02] <- "Nowhere" -GAX <- cbind(GAX,avg10,avg20, std10, std20,rsi5,rsi14,macd12269,macd7205,bbands) -#훈련용, 검증용, 평가용 데이터집합 -train_sdate <- "2013-01-01" -train_edate <- "2016-12-31" -vali_sdate <- "2017-01-01" -vali_edate <- "2017-12-31" -test_sdate <- "2018-01-01" -test_edate <- "2018-12-31" -trainrow <- which(index(GAX) >= train_sdate & index(GAX) <= train_edate) -valirow <- which(index(GAX) >= vali_sdate & index(GAX) <= vali_edate) -testrow <- which(index(GAX) >= test_sdate & index(GAX) <= test_edate) -trainGAX <- GAX[trainrow,] -valiGAX <- GAX[valirow,] -testGAX <- GAX[testrow,] -trainme <- apply(trainGAX,2,mean) -trainstd <- apply(trainGAX,2,sd) -#정규화 -trainidn <- (matrix(1,dim(trainGAX)[1],dim(trainGAX)[2])) -valiidn <- (matrix(1,dim(valiGAX)[1],dim(valiGAX)[2])) -testidn <- (matrix(1,dim(testGAX)[1],dim(testGAX)[2])) -norm_trainGAX <- (trainGAX - t(trainme*t(trainidn)))/t(trainstd*t(trainidn)) -norm_valiGAX <- (valiGAX - t(trainme*t(valiidn)))/t(trainstd*t(valiidn)) -norm_testGAX <- (testGAX - t(trainme*t(testidn)))/t(trainstd*t(testidn)) -traindir <- direction[trainrow,1] -validir <- direction[valirow,1] -testdir<-direction[testrow,1] -#신경망 적합(정규화열,날짜별 방향, 신경수, 트레이스 출력 여부) -set.seed(1) -model <- nnet(norm_trainGAX, class.ind(traindir),size = 4, trace = F) -model -dim(norm_trainGAX) -vali_pred <- predict(model, norm_valiGAX) -head(vali_pred) -vali_pred_class <- data.frame(matrix(NA,dim(vali_pred)[1],1)) -vali_pred_class[vali_pred[,"Down"]>0.5, 1]<- "Down" -vali_pred_class[vali_pred[,"Nowhere"]>0.5,1] <- "Nowhere" -vali_pred_class[vali_pred[,"Up"]>0.5, 1] <- "Up" -vali_pred_class -matrix<- confusionMatrix(table(vali_pred_class[,1],validir)) -matrix -#87.65% -test_pred <- predict(model,norm_testGAX) -test_pred_class <- data.frame(matrix(NA,dim(test_pred)[1],1)) -test_pred_class[test_pred[,"Down"]>0.5, 1]<- "Down" -test_pred_class[test_pred[,"Nowhere"]>0.5,1] <- "Nowhere" -test_pred_class[test_pred[,"Up"]>0.5, 1] <- "Up" -test_matrix <- confusionMatrix(table(test_pred_class[,1],testdir)) -test_matrix -#82pro -#Signal generator -signal <- ifelse(test_pred_class == "Up", 1, ifelse(test_pred_class == "Down",-1,0)) -ret <- ret[testrow] -ret -cost <- 0 -trade_ret <- ret * Lag(signal) - cost -cumm_ret <- Return.cumulative(trade_ret) -anual_ret <- Return.annualized(trade_ret) -charts.PerformanceSummary(trade_ret) -#charts.PerformanceSummary(cumm_ret) -#plot(cumm_ret) -#Deep Neural Network -set.seed(1) -model <- dbn.dnn.train(norm_testGAX,class.ind(traindir),hidden=c(3,4,6)) -nn.predict(model,norm_valiGAX) -nn.test(model,norm_valiGAX,class.ind(validir),t=0.4) -data <- cbind(as.data.frame(norm_trainGAX),traindir) -class(norm_trainGAX) -class(traindir) -h2o.init() -datah2o <- as.h2o(data,"h2o") -class(datah2o) -dim(datah2o) -#은닉층이 4개, 각 뉴런수 4,5,2,7 -model <- h2o.deeplearning(1:15, 16,training_frame = datah2o,hidden = c(4,5,2,7)) -vali_pred <- predict(model, as.h2o(norm_valiGAX,"h2o")) -vali_pred <- as.data.frame(vali_pred) -vali_pred_class <- data.frame(matrix(NA,dim(vali_pred)[1],1)) -vali_pred_class[vali_pred[,"Down"]>0.5, 1]<- "Down" -vali_pred_class[vali_pred[,"Nowhere"]>0.5,1] <- "Nowhere" -vali_pred_class[vali_pred[,"Up"]>0.5, 1] <- "Up" -vali_matrix <- confusionMatrix(table(vali_pred_class[,1],validir)) -vali_matrix -## Deep Graph ## -signal <- ifelse(vali_pred_class == "Up", 1, ifelse(vali_pred_class == "Down",-1,0)) -ret <- GAX/lag(GAX) -1 # 중요함 -ret <- ret[valirow] -ret <- ret[,1] -cost <- 0 -trade_ret <- ret * Lag(signal) - cost -cumm_ret <- Return.cumulative(trade_ret) -anual_ret <- Return.annualized(trade_ret) -charts.PerformanceSummary(trade_ret) -rm(list=ls()) -getSymbols("GOLD.AX", src = "yahoo") -GAX <- GOLD.AX -GAX <- GAX[,"GOLD.AX.Close"] -GAX <- na.omit(GAX) -avg10 <- rollapply(GAX, 10, mean) -avg20 <- rollapply(GAX, 20, mean) -std10 <- rollapply(GAX,10,sd) -std20 <- rollapply(GAX,20,sd) -rsi5 <- RSI(GAX, 5, "SMA") -rsi14 <- RSI(GAX, 14, "SMA") -macd12269 <- MACD(GAX, 12, 26, 9, "SMA") -macd7205 <- MACD(GAX, 7, 20, 5, "SMA") -bbands <- BBands(GAX, 20, "SMA", 2) -direction <- NULL -direction[GAX > Lag(GAX, 20)] <- 1 -direction[GAX < Lag(GAX, 20)] <- 0 -GAX <- cbind(GAX, avg10, avg20, std10, std20, rsi5, rsi14, macd12269, macd7205, bbands, direction) -dm <- dim(GAX) -dm -colnames(GAX)[dm[2]] -colnames(GAX)[dm[2]] <- "Direction" -colnames(GAX) -issd <- "2013-01-01" -ised <- "2017-12-31" -ossd <- "2018-01-01" -osed <- "2018-12-31" -isrow <- which(index(GAX) >= issd & index(GAX) <= ised) -osrow <- which(index(GAX) >= ossd & index(GAX) <= osed) -isGAX <- GAX[isrow,] -osGAX <- GAX[osrow,] -#표준화 -isme <- apply(isGAX, 2, mean) -isstd <- apply(isGAX, 2, sd) -isidn <- matrix(1, dim(isGAX)[1], dim(isGAX)[2]) -norm_isGAX <- (isGAX - t(isme * t(isidn))) / t(isstd * t(isidn)) -norm_isGAX -dm<-dim(isGAX) -norm_isGAX[,dm[2]] <- direction[isrow] -#정규화 -osidn <- matrix(1,dim(osGAX)[1], dim(osGAX)[2]) -norm_osGAX <- (osGAX - t(isme*t(osidn))) / t(isstd*t(osidn)) -dm <- dim(osGAX) -norm_osGAX[,dm[2]] <- direction[osrow] -#방향설정 -GAX <- GOLD.AX -GAX <- GAX[,"GOLD.AX.Close"] #다시 설정 -GAX <- na.omit(GAX) -lagret <- (GAX - Lag(GAX,20))/Lag(GAX,20) -direction[lagret >0.02]<-"Up" -direction[lagret < -0.02]<-"Down" -direction[lagret < 0.02 & lagret > -0.02]<-"Nowhere" -isdir <- direction[isrow] -osdir <- direction[osrow] -model <- svm(norm_isGAX, as.factor(isdir)) -model -pred <- predict(model, norm_osGAX) -head(pred) -table(pred,osdir) -model -#적중률 -sum(diag(table(pred,osdir)))/sum(table(pred,osdir)) -####서포트벡터 트레이딩 후 Cummulative return 구하는 코딩 -signal <- ifelse(pred == "Up", 1, ifelse(pred == "Down",-1,0)) -#테스트할 날짜 데이터 집합 -testrow <- which(index(GAX) >= ossd & index(GAX) <= osed) -testrow -#리턴 구하는 공식과 그 공식 활용 테스트 날짜별 리턴 -ret <- GAX/lag(GAX) -1 -ret <- ret[testrow] -ret -cost <- 0 -trade_ret <- ret * Lag(signal) - cost -length(signal) -dim(ret) -cumm_ret <- Return.cumulative(trade_ret) -anual_ret <- Return.annualized(trade_ret) -charts.PerformanceSummary(trade_ret) -### K-clustering ### -rm(list=ls()) -#Data -getSymbols("GOLD.AX", src = "yahoo") -GAX <- GOLD.AX -GAX <- GAX[,"GOLD.AX.Close"] -GAX <- na.omit(GAX) -#variable -avg10 <- rollapply(GAX, 10, mean) -avg20 <- rollapply(GAX, 20, mean) -std10 <- rollapply(GAX,10,sd) -std20 <- rollapply(GAX,20,sd) -rsi5 <- RSI(GAX, 5, "SMA") -rsi14 <- RSI(GAX, 14, "SMA") -macd12269 <- MACD(GAX, 12, 26, 9, "SMA") -macd7205 <- MACD(GAX, 7, 20, 5, "SMA") -bbands <- BBands(GAX, 20, "SMA", 2) -direction <- NULL -direction[GAX > Lag(GAX, 20)] <- 1 -direction[GAX < Lag(GAX, 20)] <- 0 -GAX <- cbind(GAX, avg10, avg20, std10, std20, rsi5, rsi14, macd12269, macd7205, bbands, direction) -dm <- dim(GAX) -dm -colnames(GAX)[dm[2]] -colnames(GAX)[dm[2]] <- "Direction" -colnames(GAX) -issd <- "2013-01-01" -ised <- "2017-12-31" -ossd <- "2018-01-01" -osed <- "2018-12-31" -isrow <- which(index(GAX) >= issd & index(GAX) <= ised) -osrow <- which(index(GAX) >= ossd & index(GAX) <= osed) -isGAX <- GAX[isrow,] -osGAX <- GAX[osrow,] -#표준화 -isme <- apply(isGAX, 2, mean) -isstd <- apply(isGAX, 2, sd) -isidn <- matrix(1, dim(isGAX)[1], dim(isGAX)[2]) -norm_isGAX <- (isGAX - t(isme * t(isidn))) / t(isstd * t(isidn)) -dm<-dim(isGAX) -norm_isGAX[,dm[2]] <- direction[isrow] -osidn <- matrix(1,dim(osGAX)[1], dim(osGAX)[2]) -norm_osGAX <- (osGAX - t(isme*t(osidn))) / t(isstd*t(osidn)) -dm <- dim(osGAX) -norm_osGAX[,dm[2]] <- direction[osrow] -clusters <- 3 -set.seed(1) -#디렉션 제거 -dm<- dim(isGAX) -isGAX[,-dm[2]] -isGAX <- isGAX[,-dm[2]] -norm_isGAX <- norm_isGAX[,-dm[2]] -dm<-dim(osGAX) -osGAX<-osGAX[,-dm[2]] -norm_osGAX<-norm_osGAX[,-dm[2]] -#isdji 바탕으로 클러스터화 모델 -model <- kmeans(norm_isGAX, clusters) -head(model$cluster) -model$cluster -model$center -model$size -#클러스터 내제곱합과 총제곱합 비율 최소화 -model$tot.withinss -model$totss -model$tot.withinss/model$totss -ospredict <- cl_predict(model, norm_osGAX) -norm_osGAX -head(ospredict) -ospredict -GAX <- GOLD.AX -GAX <- GAX[,"GOLD.AX.Close"] -GAX <- na.omit(GAX) -lagret <- (GAX - Lag(GAX,20))/Lag(GAX,20) -direction[lagret >0.02]<-"Up" -direction[lagret < -0.02]<-"Down" -direction[lagret < 0.02 & lagret > -0.02]<-"Nowhere" -isdir <- direction[isrow] -osdir <- direction[osrow] -neighborhood <- 3 -set.seed(1) -dim(norm_isGAX) -dim(norm_osGAX) -model <- knn(norm_isGAX, norm_osGAX, isdir, neighborhood) -model -head(model) -summary(model) -matrix <- confusionMatrix(table(model, osdir)) -matrix -diag(matrix$table) -#for 사용해 confusionmatrix의 행렬 계산, 총 대각선 요소-총대각선 요소 수/총데이터요소수 -accuracy<- NULL -for(i in c(1:100)){ -model <- knn(isGAX, osGAX, isdir, i) -matrix <- confusionMatrix(table(model,osdir)) -diag <- sum(diag(matrix$table)) -total <- sum(matrix$table) -accuracy[i] <- (total - diag)/total +plot(ROCperf) +data("GermanCredit") +FraudData <- GermanCredit[,1:10] +head(FraudData) +# +len <- dim(FraudData)[1] +train <- sample(1:len, 0.8*len) +TrainData <- FraudData[train,] +TestData <- FraudData[-train,] +library(randomForest) +fraud_model <- randomForest(Class ~. , data=TrainData, ntree =50, proximity = TRUE) +print(fraud_model) +plot(fraud_model) +plot(fraud_model) +importance(fraud_model) +# +TestPred <- predict(fraud_model, newdata = TestData) +table(TestPred, TestData$Class) +library(PerformanceAnalytics) +data(edhec) +data <- edhec["1999", 3:5] +data +colnames(data) = c("DC", "EM", "EMN") +data +# +wts <- xtx(matrix(c(0.3, 0.3, 0.4), nrow =1, ncol = 3), as.Date("1998-12-31")) +# +wts <- wtx(matrix(c(0.3, 0.3, 0.4), nrow =1, ncol = 3), as.Date("1998-12-31")) +# +wts <- xts(matrix(c(0.3, 0.3, 0.4), nrow =1, ncol = 3), as.Date("1998-12-31")) +colnames(wts) <- colnames(data) +wts +# +Return.portfolio(data, weights = wts, rebalance_on = "months", verbose =TRUE) +library(randomForest) +library(mlbench) +library(caret) +data("Shuttle") +Analsis_Data <- head(Shuttle, 10000) +Analysis_Data <- head(Shuttle, 10000) +X <- Analysis_Data +X <- Analysis_Data[,1:9] +Y <- Analysis_Data[,10] +X <- Analysis_Data[,1:9] #~9열 +Y <- Analysis_Data[,10] #10열 +control <- trainControl(method = "repeatedcv", number =5, repeats = 3) +seed <- 4 +metric <- "Accuracy" +set.seed(seed) +Count_var <- sqrt(ncol(X)) +tunegrid <- expand.grid(.mtry = Count_var) +set.seed(seed) +Count_var <- sqrt(ncol(X)) +tunegrid <- expand.grid(.mtry = Count_var) +rf_baseline <- train(Class ~ ., data = Analysis_Data, method = "rf", metric = metric, tuneGrid = tunegrid, trControl = control) +print(rf_baseline) +plot(rf_gridsearch_method) +# +control <- trainControl(method = "repeatedcv", number =5, repeats = 3, search= "grid") +set.seed(seed) +tunegrid <- expand.grid(.mtry = c(1:8)) +rf_gridsearch_method <- train(Class ~ ., data = Analysis_Data, method = "rf", metric = metric, tuneGrid = tunegrid, trControl = control) +set.seed(seed) +tunegrid <- expand.grid(.mtry = c(1:8)) +rf_gridsearch_method <- train(Class ~ ., data = Analysis_Data, method = "rf", metric = metric, tuneGrid = tunegrid, trControl = control) +# +control <- trainControl(method = "repeatedcv", number =5, repeats = 3, search= "grid") +set.seed(seed) +tunegrid <- expand.grid(.mtry = c(1:8)) +rf_gridsearch_method <- train(Class ~ ., data = Analysis_Data, method = "rf", metric = metric, tuneGrid = tunegrid, trControl = control) +#유전자알고리즘 +library(genalg) +library(ggplot2) +data("Shuttle") +Analysis_Data <- head(Shuttle, 10000) +X <- Analysis_Data[,1:9] #~9열 +Y <- Analysis_Data[,10] #10열 +control <- trainControl(method = "repeatedcv", number =5, repeats = 3) +seed <- 4 +metric <- "Accuracy" +set.seed(seed) +Count_var <- sqrt(ncol(X)) +tunegrid <- expand.grid(.mtry = Count_var) +rf_baseline <- train(Class ~ ., data = Analysis_Data, method = "rf", metric = metric, tuneGrid = tunegrid, trControl = control) +print(rf_baseline) +# better tool +control <- trainControl(method = "repeatedcv", number =5, repeats = 3, search= "grid") +set.seed(seed) +tunegrid <- expand.grid(.mtry = c(1:8)) +rf_gridsearch_method <- train(Class ~ ., data = Analysis_Data, method = "rf", metric = metric, tuneGrid = tunegrid, trControl = control) +print(rf_gridsearch_method) +plot(rf_gridsearch_method) +plot(rf_gridsearch_method) +View(rf_gridsearch_method) +plot(rf_gridsearch_method +plot(rf_gridsearch_method) +plot(rf_gridsearch_method) +plot(data) +plot(rf_gridsearch_method) +print(rf_gridsearch_method) +plot(rf_gridsearch_method) +source('C:/Users/Shinhyunjin/Dropbox/금융공학/R Programming/Learning Quantitative Finance with R/Ch8 Optimization.R', encoding = 'UTF-8', echo=TRUE) +InputDataset <- data.frame(Stocks = c("Stock1", "Stock2", "Stock3","Stock4","Stock5", "Stock6"), retruns = c(10,11,15,20,12,13), weight = c(0.1, 0.2, 0.1, 0.2, 0.2, 0.3)) +WTlimit <- 1 +InputDataset +# +evaluationFunc <- function(x){ +current_solution_returns <- x %*% InputDataset$retruns +current_solution_weight <- x %*% InputDataset$weight +if(current_solution_weight > WTlimit) +return(0) else return(-current_solution_returns) +} +# +GAmodel <- rbga.bin(size =6, popSize =100, iters =50, mutationChange = 0.01, elitism = T, evalFunc = evaluationFunc) +cat(summary(GAmodel)) +# +GAmodel <- rbga.bin(size =6, popSize =100, iters =50, mutationChance = 0.01, elitism = T, evalFunc = evaluationFunc) +cat(summary(GAmodel)) +install.packages("GA") +library(GA) +install.packages("GA") +install.packages("GA") +library(PerformanceAnalytics) +library(randomForest) +library(mlbench) +library(caret) +#유전자알고리즘 +library(genalg) +library(ggplot2) +install.packages("GA") +library(GA) +InputDataset <- data.frame(Stocks = c("Stock1", "Stock2", "Stock3","Stock4","Stock5", "Stock6"), retruns = c(10,11,15,20,12,13), weight = c(0.1, 0.2, 0.1, 0.2, 0.2, 0.3)) +WTlimit <- 1 +InputDataset +# +evaluationFunc <- function(x){ +current_solution_returns <- x %*% InputDataset$retruns +current_solution_weight <- x %*% InputDataset$weight +if(current_solution_weight > WTlimit) +return(0) else return(-current_solution_returns) +} +# +GAmodel <- rbga.bin(size =6, popSize =100, iters =50, mutationChance = 0.01, elitism = T, evalFunc = evaluationFunc) +cat(summary(GAmodel)) #GA result에서 제외되어야할 주식의 비중을 말해준다. +# +# +data(economics) +Data_Analysis <- data.frame(economics[,2:4]) +head(Data_Analysis) } -accuracy -plot(accuracy,type = "l") -plot(accuracy,label = "Accuracy of K % " ,type = "l") -#for 사용해 confusionmatrix의 행렬 계산, 총 대각선 요소-총대각선 요소 수/총데이터요소수 -accuracy<- NULL -for(i in c(1:100)){ -model <- knn(isGAX, osGAX, isdir, i) -matrix <- confusionMatrix(table(model,osdir)) -diag <- sum(diag(matrix$table)) -total <- sum(matrix$table) -accuracy[i] <- diag/total +return(SSE) +OLS_GA <- funciton(Data_Analysis, a0, a1, a2){ +attach(Data_Analysis, warn.conflicts = F) +Y_hat <- a0 + a1*pop + a2*psavert +SSE = t(pce-Y_hat) %*% (pce-Y_har) +detach(Data_Analysis) +return(SSE) +} +# +OLS_GA <- funciton(Data_Analysis, a0, a1, a2){ +attach(Data_Analysis, warn.conflicts = F) +Y_hat <- a0 + a1*pop + a2*psavert +SSE = t(pce-Y_hat) %*% (pce-Y_har) +detach(Data_Analysis) +return(SSE) +} +# +OLS_GA <- funciton(Data_Analysis, a0, a1, a2){ +attach(Data_Analysis, warn.conflicts = F) +Y_hat <- a0 + a1*pop + a2*psavert +SSE = t(pce-Y_hat) %*% (pce-Y_har) +detach(Data_Analysis) +return(SSE) +} +detach(Data_Analysis) +OLS_GA <- funciton(Data_Analysis, a0, a1, a2){ +attach(Data_Analysis, warn.conflicts = F) +Y_hat <- a0 + a1*pop + a2*psavert +SSE = t(pce-Y_hat) %*% (pce-Y_hat) +detach(Data_Analysis) +return(SSE) +} +Data_Analysis +# +OLS_GA <- function(Data_Analysis, a0, a1, a2){ +attach(Data_Analysis, warn.conflicts = F) +Y_hat <- a0 + a1*pop + a2*psavert +SSE = t(pce-Y_hat) %*% (pce-Y_hat) +detach(Data_Analysis) +return(SSE) +} +# +ga.OLS_GA <- ga(type = 'real-valued', min= c(-100,-100,-100), max = c(100,100,100), popSize=500, maxiter = 500, names=c('intercept', 'pop','psavert'), keepBest=T, fitness = function(a) - OLS_GA(Data_Analysis, a[1],a[2],a[3])) +summary(ga.OLS_GA) +install.packages("fOptions") +library(fOptions) +model <- GBSOption(TypeFlag = "c", S = 900, X = 950, Time = 1/4, r =0.02, sigma =0.22, b= 0.02) +# type : c or p, 기초자산, 행사가격, 만기, 무위험이자율, 변동성, 보유비용순 +model +GBSOption(TypeFlag = "p", S = 900, X = 950, Time = 1/4, r = 0.02, sigma = 0.22, b = 0.02) +CRRBinomialTreeOption(TypeFlag = "ce", S =900, X =950, Time = 1/4, r = 0.02, b = 0.02, sigma = 0.22, n = 3) +CRRBinomialTreeOption(TypeFlag = "pe", S = 900, X = 950, Time = 1/4, r = 0.02, b = 0.02, sigma = 0.22, n = 3) +# +model <- BinomialTreeOption(TypeFlag = "ce", S = 900, X = 950, Time = 1/4, r = 0.02, b = 0.02, sigma =0.22, n=3) +# +model <- BinomialTreeOption(TypeFlag = "ce", S = 900, X = 950, Time = 1/4, r = 0.02, b = 0.02, sigma =0.22, n=3) +BinomialTreePlot(model, dy =1, xlab = "Time steps", ylab = "Option Value", xlim = c(0,4), ylim=c(-3,4)) +title(main = "Call Option Tree") +retrun(pr)} +retrun(pr)} +return(pr)} +return(pr)} +##함수로 정의 +func <- function(n){ +pr <- CRRBinomialTreeOption(TypeFlag = "ce", S = 900, X = 950, Time = 1/4, r = 0.02, b =0.02, sigma = 0.22, n = n)@price +return(pr) } -accuracy -plot(accuracy ,type = "l") -install.packages('tidyverse') -### ##K Cluster 모형 CumRet 구하는법 -GAX <- GOLD.AX -GAx <- GOLD.AX[,"GOLD.AX.Close"] -GAX <- na.omit(GAX) -signal <- ifelse( model== "Up", 1, ifelse( model== "Down",-1,0)) -testrow <- which(index(GAX) >= ossd & index(GAX) <= osed) -ret <- GAX/lag(GAX,1) -1 -ret <- ret[testrow] -ret -cost <- 0 -length(signal) -length(ret) -length(testrow) -trade_ret <- ret * Lag(signal) - cost -signal -length(signal) -dim(ret) -cumm_ret <- Return.cumulative(trade_ret) -anual_ret <- Return.annualized(trade_ret) -charts.PerformanceSummary(trade_ret) -GAX <- GOLD.AX -GAx <- GOLD.AX[,"GOLD.AX.Close"] -GAX <- na.omit(GAX) -GAX <- GOLD.AX -GAX <- GOLD.AX[,"GOLD.AX.Close"] -GAX <- na.omit(GAX) -signal <- ifelse( model== "Up", 1, ifelse( model== "Down",-1,0)) -testrow <- which(index(GAX) >= ossd & index(GAX) <= osed) -ret <- GAX/lag(GAX,1) -1 -ret <- ret[testrow] -ret -cost <- 0 -length(signal) -length(ret) -length(testrow) -trade_ret <- ret * Lag(signal) - cost -signal -length(signal) -dim(ret) -cumm_ret <- Return.cumulative(trade_ret) -anual_ret <- Return.annualized(trade_ret) -charts.PerformanceSummary(trade_ret) -plot(GAX) +# +price <- sapply(1:100, func) # 1~100반 +# +price <- sapply(1:100, func) # 1~100반 +# +price <- sapply(1:100, func) # 1~100반복 +plot(price, type="l", xlab - "Number of steps", ylab = "Option Value") +plot(price, type="l", xlab = "Number of steps", ylab = "Option Value") +bs_price <- GBSOption(TypeFlag = "c", S = 900, X = 950, Time = 1/4, r = 0.02, sigma = 0.22, b = 0.02)@price +abline(h = bs_price, col = 'red') +legend("topright", legend = c('CRR-price', 'BS-price'), col = c('black', 'red'), pch = 19) +title(main = "Call Option Pricing models") +GBSGreeks(Selection = "delta", TypeFlag = "c", S = 900, X = 950, Time = 1/4, r = 0.02, b = 0.02, sigma = 0.22) +GBSGreeks(Selection = "gamma", TypeFlag = "c", S = 900, X = 950, Time = 1/4, r = 0.02, b = 0.02, sigma =0.22) +# +portfolio <- sapply(c('c', 'p'), function(otype){sapply(500:1500, function(price){ +GBSGreeks(Selection = 'delta', +TypeFlag = otype, +S=price, X = 950, +Time = 1/4, r = 0.02, +b = 0.02, +sigma =0.22) +}) +}) +head(protfolio) +head(portfolio) +## Straddle Delta +plot(500:1500, rowSums(portfolio), type = 'l', xlab = 'underlying Price', ylab = 'Straddle Delta') +install.packages("RQuantLib", type = 'binary') +library(RQuantLib) +iv <- EuropeanOptionImpliedVolatility("call", 11.10, 100, 100, 0.01, 0.03 ,05, 0.4) +iv +iv_a <- AmericanOptionImpliedVolatility("call", 11.10, 100, 100, 0.01, 0.03 ,05, 0.4) +iv_a +library(fOptions) +install.packages("RQuantLib", type = 'binary') +library(RQuantLib) +install.packages("termstrc") +library(termstrc) +install.packages("termstrc") +library(termstrc) +install.packages("termstrc") +library(termstrc) +install.packages("termstrc") +library(termstrc) +library(termstrc) +library(termstrc) +install.packages("termstrc") +library(termstrc) +library(termstrc) +data(govbonds) +install.packages("termstrc +") +install.packages("termstrc", lib = "/data/Rpackges/") +install.packages("termstrc", lib = "C:/Users/Shinhyunjin/Documents/R/win-library/") +library(termstrc) +install.packages("~/R/win-library/3.4/termstrc.zip", repos = NULL, type = "win.binary") +install.packages("termstrc", lib = "C:/Users/Shinhyunjin/Documents/R/win-library/3.4/") +install.packages("termstrc", lib = "C:/Users/Shinhyunjin/Documents/R/win-library/3.4/") +install.packages("termstrc-master", lib = "C:/Users/Shinhyunjin/Documents/R/win-library/3.4/") +install.packages("termstrc-master", lib = "C:/Users/Shinhyunjin/Documents/R/win-library/3.4/") +install.packages("termstrc", lib = "C:/Users/Shinhyunjin/Documents/R/win-library/3.4/") +library(termstrc, lib.loc= "C:/Users/Shinhyunjin/Documents/R/win-library/3.4/") +install.packages("termstrc.zip", lib = "C:/Users/Shinhyunjin/Documents/R/win-library/3.4/") +install.packages("termstrc", lib = "C:/Users/Shinhyunjin/Documents/R/win-library/3.4/") +install.packages("~/R/win-library/3.4/termstrc_1.3.7.tar.gz", repos = NULL, type = "source") +library(termstrc) +install.packages("~/R/win-library/3.4/termstrc.tar.gz", repos = NULL, type = "source") +install.packages("termstrc", lib = "C:/Users/Shinhyunjin/Documents/R/win-library/3.4/") +library(termstrc) +install.packages("termstrc", lib = "C:/Users/Shinhyunjin/Documents/R/win-library/3.4/") +install.packages("https://cran.r-project.org/src/contrib/Archive/termstrc/") +install.packages("termstrc", repos="http://R-Forge.R-project.org") +library(termstrc) +library(rgl) +library(termstrc) +library(termstrc.tar.gz) +install.packages("termstrc", repos="http://R-Forge.R-project.org") +install.packages("termstrc", repos="http://R-Forge.R-project.org") +install.packages("CreditMetrics") +library(CreditMetrics) +rc <- c("AAA", "AA", "A", "BBB","BB","B", "CCC", "D") +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creidmatrix.csv', header=T, fileEncoding="UTF-8-BOM") +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T, fileEncoding="UTF-8-BOM") +raw.data +cm.cs(raw.data, lgd) +raw.data +type(raw.data) +dtype(raw.data) +raw.data <- data.frame(raw.data) +lgd <- 0.2 # 부도시손실률 +cm.cs(raw.data, lgd) +raw.data <- data.frame(raw.data) +raw.data +raw.data <- matrix(c(raw.data)) +raw.data +lgd <- 0.2 # 부도시손실률 +cm.cs(raw.data, lgd) +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T, fileEncoding="UTF-8-BOM") +raw.data <- data.frame(raw.data) +### 9-2 신용파생상품 ### +rc <- c("AAA", "AA", "A", "BBB", "BB", "B", "CCC", "Default") +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T, fileEncoding="UTF-8-BOM") +raw.data <- data.frame(raw.data) +raw.data <- matrix(c(raw.data), 8,8,dimnames = list(rc,rc), byrow=TRUE) +raw.data +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T, fileEncoding="UTF-8-BOM") +#raw.data <- data.frame(raw.data) +raw.data <- matrix(c(raw.data), 8,8,dimnames = list(rc,rc), byrow=TRUE) +raw.data +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T, fileEncoding="UTF-8-BOM") +View(raw.data) +raw.data +### 9-2 신용파생상품 ### +rc <- c("AAA", "AA", "A", "BBB", "BB", "B", "CCC", "Default") +#raw.data <- data.frame(raw.data) +raw.data <- matrix(c(raw.data), 8,8,dimnames = list(rc,rc), byrow=TRUE) +raw.data +### 9-2 신용파생상품 ### +rc <- c("AAA", "AA", "A", "BBB", "BB", "B", "CCC", "Default") +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T, fileEncoding="UTF-8-BOM") +#raw.data <- data.frame(raw.data) +raw.data <- matrix(c(raw.data), dimnames = list(rc,rc), byrow=TRUE) +#raw.data <- data.frame(raw.data) +raw.data <- matrix(c(raw.data), dimnames = list(rc,rc)) +### 9-2 신용파생상품 ### +rc <- c("AAA", "AA", "A", "BBB", "BB", "B", "CCC", "Default") +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T, fileEncoding="UTF-8-BOM") +raw.data +raw.data[1,1] +raw.data <- raw.data[,1:8] +raw.data +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T , fileEncoding="UTF-8-BOM") +raw.data <- raw.data[,2:9] +raw.data +raw.data <- matrix(c(raw.data), dimnames = list(rc,rc)) +raw.data +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T , fileEncoding="UTF-8-BOM") +raw.data <- raw.data[1:8,2:9] +raw.data +raw.data +raw.data <- matrix(c(raw.data), 8,8,dimnames = list(rc,rc)) +raw.data +lgd <- 0.2 # 부도시손실률 +cm.cs(raw.data, lgd) +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T , fileEncoding="UTF-8-BOM") +raw.data <- raw.data[1:8,2:9] +raw.data +raw.data <- matrix(c(raw.data), dimnames = list(rc,rc)) +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T , fileEncoding="UTF-8-BOM") +raw.data +raw.data <- raw.data[1:8,2:9] +raw.data +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T , fileEncoding="UTF-8-BOM") +raw.data <- raw.data[0:8,2:9] +raw.data +raw.data <- matrix(c(raw.data)) +raw.data +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T , fileEncoding="UTF-8-BOM") +raw.data <- raw.data[2:9,2:9] +raw.data +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', header=T , fileEncoding="UTF-8-BOM") +raw.data +View(raw.data) +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', fileEncoding="UTF-8-BOM") +raw.data <- raw.data[1:8,2:9] +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', fileEncoding="UTF-8-BOM") +raw.data +View(raw.data) +raw.data <- raw.data[],1:9] +raw.data <- raw.data[,1:9] +raw.data +View(raw.data) +raw.data <- raw.data[2:8, 2:8] +raw.data +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', fileEncoding="UTF-8-BOM") +raw.data <- raw.data[1:8, 1:8] +raw.data +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', fileEncoding="UTF-8-BOM") +raw.data <- raw.data[1:8, 2:9] +raw.data +raw.data <- matrix(raw.data, nrwo = 8, ncol = 8, byrow = TRUE, dimnames = list(rc,rc)) +raw.data <- matrix(raw.data, nrow = 8, ncol = 8, byrow = TRUE, dimnames = list(rc,rc)) +raw.data +lgd <- 0.2 # 부도시손실률 +cm.cs(raw.data, lgd) +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv', fileEncoding="UTF-8-BOM") +raw.data <- raw.data[1:8, 2:9] +#raw.data <- raw.data[1:8, 2:9] +raw.data +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv',header= F fileEncoding="UTF-8-BOM") +#raw.data <- raw.data[1:8, 2:9] +raw.data +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv',header= F ,fileEncoding="UTF-8-BOM") +#raw.data <- raw.data[1:8, 2:9] +raw.data +raw.data <- matrix(raw.data, nrow = 8, ncol = 8, byrow = TRUE, dimnames = list(rc,rc)) +raw.data +raw.data <- matrix(c(raw.data), nrow = 8, ncol = 8, byrow = TRUE, dimnames = list(rc,rc)) +raw.data +lgd <- 0.2 # 부도시손실률 +cm.cs(raw.data, lgd) +#raw.data <- raw.data[1:8, 2:9] +raw.data +### 9-2 신용파생상품 ### +rc <- c("AAA", "AA", "A", "BBB", "BB", "B", "CCC", "Default") +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv',header= F ,fileEncoding="UTF-8-BOM") +raw.data +c(raw.data) +raw.data <- matrix(raw.data, nrow = 8, ncol = 8, byrow = TRUE, dimnames = list(rc,rc)) +raw.data <- read.csv('C:/Users/Shinhyunjin/Dropbox/data/creditmatrix.csv',header= F ,fileEncoding="UTF-8-BOM") +#raw.data <- raw.data[1:8, 2:9] +raw.data +raw.data <- matrix(raw.data, nrow = 8, ncol = 8, byrow = TRUE, dimnames = list(rc,rc)) +raw.data +M <- matrix(c(90.81, 8.33, 0.68, 0.06, 0.08, 0.02, 0.01, 0.01, +0.70, 90.65, 7.79, 0.64, 0.06, 0.13, 0.02, 0.01, +0.09, 2.27, 91.05, 5.52, 0.74, 0.26, 0.01, 0.06, +0.02, 0.33, 5.95, 85.93, 5.30, 1.17, 1.12, 0.18, +0.03, 0.14, 0.67, 7.73, 80.53, 8.84, 1.00, 1.06, +0.01, 0.11, 0.24, 0.43, 6.48, 83.46, 4.07, 5.20, +0.21, 0, 0.22, 1.30, 2.38, 11.24, 64.86, 19.79, +0, 0, 0, 0, 0, 0, 0, 100 +)/100, 8, 8, dimnames = list(rc, rc), byrow = TRUE) +lgd <- 0.2 # 부도시손실률 +cm.cs(raw.data, lgd) +cm.cs(M, lgd) +# +ead <- c(140000,100000,100000) # 부도시 exposure +N <- 3 # firms +n <- 50000 #난수 +r <- 0.03 #rf +rating <- c("BBB", "AA", "B") +firmnames <- c("Blizzard", "Activision", "Nexon") +alpha <- 0.99 +rho <- matrix(c(1,0.4,0.6,0.4,1,0.5,0.6,0.5,1),3,3,dimnames=list(firmnames, firmnames),byrow=TRUE) +cm.CVaR(M, lgd, ead, N, n,r,rho,alpha,rating) +pnl <- cm.gain(M, lgd, ead, N, n, r, rho,rating) +pnl +install.packages("credule") +library(credule) #신용파생상 +yct = c(1,2,3,4,5,7) +ycr = c(0.0050, 0.0070, 0.0080, 0.0100, 0.0120, 0.0150) +cct = c(1,3,5,7) +ccsp = c(0.99, 0.98, 0.95, 0.92) +tenors = c(1,3,5,7) +yct = c(1,2,3,4,5,7) #테너 (테너 : 채무발생일 ~ 만기일간 기한, 결제기간이라고함) +ycr = c(0.0050, 0.0070, 0.0080, 0.0100, 0.0120, 0.0150) #수익률곡선할인률 +cct = c(1,3,5,7) #테너 +ccsp = c(0.99, 0.98, 0.95, 0.92) #생존확률 +tenors = c(1,3,5,7) #만기 +r = 0.4 #회수율 +priceCDS(yct, ycr, cct, ccsp, tenor, r) +priceCDS(yct, ycr, cct, ccsp, tenors, r) +# +cdsSpreads = c(0.0050, 0.0070, 0.0090, 0.0110) +bootstrapCDS(yct,ycr,cct,ccsp,r) +install.packages("GUIDE") +library(GUIDE) +## 금리파생상품 ## +irswapvalue() +## 금리파생상품 ## +irswapvalue() +install.packages("fExoticOptions") +library(fExoticOptions) +price <- GeometricAverageRateOption("c", 110, 120, 0.5, 0.03, 0.05 ,0.1) +price +#2번 방법 +TurnbullWakemanAsianApproxOption(TypeFlag = "p", S = 100, SA = 102, X = 120, Time = 0.5, +time = 0.25, tau = 0, r = 0.03, b = 0.05, sigma = 0.1)@price +#3번 방법 +LevyAsianApproxOption(TypeFlag = "p", S = 100, SA = 102, X = 120, Time = 0.5, +time = 0.25, r = 0.03, b = 0.05, sigma = 0.1) +#3번 방법 +LevyAsianApproxOption(TypeFlag = "p", S = 100, SA = 102, X = 120, Time = 0.5, +time = 0.25, r = 0.03, b = 0.05, sigma = 0.1)@price +# Down and Out +StandardBarrierOption(TypeFlag = "cdo", S =100, X=90, H = 95, K = 3, Time=0.5, r = 0.08, b = 0.04, sigma = 0.25)@price +#2번 Up and out down and out call (Double Barrier) +DoubleBarrierOption(TypeFlag = "co", S = 100,X=100, L = 50, U = 150, TIme =0.25, +r = 0.1 ,b=0.1, sigma = 0.15, delta1 = -0.1, delta2 = 0.1)@price +#2번 Up and out down and out call (Double Barrier) +DoubleBarrierOption(TypeFlag = "co", S = 100,X=100, L = 50, U = 150, Time =0.25, +r = 0.1 ,b=0.1, sigma = 0.15, delta1 = -0.1, delta2 = 0.1)@price +#3번 룩백 Barrier Up and out +LookBarrierOption(TypeFlag = "cuo", S = 100, X = 100, H = 130, Time1 = 0.25, Time2 = 1, r=0.1,b=0.1, sigma=0.15)@price +#3번 룩백 Barrier Up and out +LookBarrierOption(TypeFlag = "cuo", S = 100, X = 100, H = 130, time1 = 0.25, Time2 = 1, r=0.1,b=0.1, sigma=0.15)@price +#4번 Gap Digital +GapOption(TypeFlag = "c", S = 50, X1= 50, X2= 57, Time = 0.5, r = 0.09, b = 0.09, sigma = 0.20) +## 5번 Cash or Nothing : 만기시점에 기초자산가격이 행사가격에 도달할 경우 미리 정한가 지불 ## +CashOrNothingOption(TypeFlag = "p", S = 100, X = 80, K = 10, Time = 9/12, r = 0.06, b = 0, sigma = 0.35) +TwoAssetCashOrNothingOption(TypeFlag = "c", S1 = 100, S2 = 100, X1 = 110, X2 = 90, +K = 10, Time = 0.5, r=0.1, b1 = 0.05, b2= 0.06, sigma1 = 0.2, sigma2=0.25, +rho = 0.5)@price diff --git a/Ch1 Basic R.R b/Ch1 Basic R.R new file mode 100644 index 0000000..b3e97e5 --- /dev/null +++ b/Ch1 Basic R.R @@ -0,0 +1,197 @@ +### Ch1. R 속으로 ### + +## 패키지 설치 ## +#자동설치법 +install.packages("ggplot2") +install.packages("forecast") +library(ggplot2) +library(forecast) +#수동설치법 +install.packages("ggplot2", lib = "/data/Rpackages/") +library(ggplot2, lib.loc = "/data/Rpackages/") + +## Data ## +#class(변수)는 변수의 데이터 형태를 말해준다. +# Character +a <- "hello" +print(class(a)) +# numeric +b <- 2.5 +print(class(b)) +# integer +c <- 6L +print(class(c)) +#complex +d <- 1 + 2i +print(class(d)) +# Logic +e<- FALSE +print(class(e)) + +## Data 유형 ## +#1. Vector + +a <- "Quantitative" +b <- "Finance" +c(a,b) + +v <- c(1,2,3) +v + +#2. List + +list1 = list(c(4,5,6), "hello" ,24.5) +print(list1) +print(list1[2]) #두번째 요소 + +#list merge +list1 <- list(5,6,7) #3행 +list2 <- list("a","b","c") #3행 +combined_list <- c(list1, list2) +print(combined_list) #6행 + +#3.Matrix +m <- matrix(c(1,2,3,4,5,6), nrow =2, ncol =3) #nrow행, ncol열 +print(m) + +#4. array +a <- array(c(4,5), dim = c(3,3,2)) #c(행,열,차원) +print(a) + +#5. Factor + +a <- c(2,3,4,2,3) +fact <- factor(a) #서로 다른 값을 구분해 저장 +print(fact) +print(nlevels(fact)) #서로 다른 값의 개수 + +#6. Data Frame + +data <- data.frame( + Name = c("A","B","c"), age = c(18,20,23), gender = c("F","M","M") +) +print(data) + + +## 데이터 내보내고 갖고 오기 import and export ## + +print(getwd()) #디렉토리 경로 +#setwd는 디렉토리를 설정해준다. + +#csv +data <- read.csv("C:/Users/Shinhyunjin/Dropbox/data/gold.csv") +data +print(is.data.frame(data)) #dataframe을 확인 + +write.csv(data,"result.csv") #csv 쓰기 +output <- read.csv("result.csv") +print(output) + +#xlsx +install.packages('rJava') +library(rJava) +install.packages("xlsx", INSTALL_opts=c("--no-multiarch")) +any(grepl("xlsx", installed.packages())) #설치확인 +library(xlsx) +xlsxdata <- read.xlsx("C:/Users/Shinhyunjin/Dropbox/data/hw1data.xlsx", sheetIndex=1) + +output <- write.xlsx(data, "result.xlsx") +output <- read.csv("result.csv") +print(output) + +#Web data + +URL <- "http://finance.google.co.uk/finance/historical?q=GOOGL ++&startdate=JAN+01%2c+2016 ++&enddate=DEC+31%2c+2016 ++&output=csv" +google<- as.data.frame(read.csv(URL)) +head(google) + +#Database +install.packages("RMySQL") +library(RMySQL) + +## 함수 ## +#value까지 순차적으로 제곱을 반복적으로 도출하는 함수 +findingSqrFunc <- function(value){ + for(j in 1:value){ + sqr <- j^2 + print(sqr) + } +} + +findingSqrFunc(4) + +#인수없이 5의 배수 도출하는 함수 + +function_test <- function(){ + for(i in 1:3){ + print(i*5) + } +} +function_test() + +#인수가 있는 함수 +function_test2 <- function(a,b,c){ + result<-a*b+c + print(result) +} +function_test2(1,2,3) + +### 반복문 ### + +#1. if +x <-5 +if(x>0){ + print("참") +} else{ + print("거짓") +} + +#2. for 정의된 갯수만큼 실행 +var <- c(3,6,8,9,11,16) +counter <- 0 +for (val in var){ + if(val%%2 !=0) counter=counter+1 #홀수를 세는 코드 -> 2의 배수가 아니다! +} +print(counter) + +#3. While 계속 반복함 +var <- c("hello") +counter <- 4 +while(counter < 7){ + print(var) + counter = counter+1 +} + +#4. apply + +sample = matrix(c(1:10), nrow =5, ncol = 2) #5행 2열 +apply(sample, 1, sum) # 데이터, 행별(2이면 열별) , 합 의미 + +#5. sapply +sapply(1:5, function(x) x^3) + +#6. break + +vec <- c("hello") +counter <- 5 +repeat { + print(vec) + counter <- counter +1 + if(counter>8){ + break + } +} + +#7. next +vec<- c(2,3,4,5,6) +for (i in vec){ + if(i==4){ + next # 4에서 건너뛰는데 이때 4를 제외하고 건너뜀 (next) + } + print(i) +} + } +} \ No newline at end of file diff --git a/Ch1. Basic R b/Ch1. Basic R new file mode 100644 index 0000000..86c4c3a --- /dev/null +++ b/Ch1. Basic R @@ -0,0 +1,140 @@ +### Ch1. R 속으로 ### + +## 패키지 설치 ## +#자동설치법 +install.packages("ggplot2") +install.packages("forecast") +library(ggplot2) +library(forecast) +#수동설치법 +install.packages("ggplot2", lib = "/data/Rpackages/") +library(ggplot2, lib.loc = "/data/Rpackages/") + +## Data ## +#class(변수)는 변수의 데이터 형태를 말해준다. +# Character +a <- "hello" +print(class(a)) +# numeric +b <- 2.5 +print(class(b)) +# integer +c <- 6L +print(class(c)) +#complex +d <- 1 + 2i +print(class(d)) +# Logic +e<- FALSE +print(class(e)) + +## Data 유형 ## +#1. Vector + +a <- "Quantitative" +b <- "Finance" +c(a,b) + +v <- c(1,2,3) +v + +#2. List + +list1 = list(c(4,5,6), "hello" ,24.5) +print(list1) +print(list1[2]) #두번째 요소 + +#list merge +list1 <- list(5,6,7) #3행 +list2 <- list("a","b","c") #3행 +combined_list <- c(list1, list2) +print(combined_list) #6행 + +#3.Matrix +m <- matrix(c(1,2,3,4,5,6), nrow =2, ncol =3) #nrow행, ncol열 +print(m) + +#4. array +a <- array(c(4,5), dim = c(3,3,2)) #c(행,열,차원) +print(a) + +#5. Factor + +a <- c(2,3,4,2,3) +fact <- factor(a) #서로 다른 값을 구분해 저장 +print(fact) +print(nlevels(fact)) #서로 다른 값의 개수 + +#6. Data Frame + +data <- data.frame( + Name = c("A","B","c"), age = c(18,20,23), gender = c("F","M","M") +) +print(data) + + +## 데이터 내보내고 갖고 오기 import and export ## + +print(getwd()) #디렉토리 경로 +#setwd는 디렉토리를 설정해준다. + +#csv +data <- read.csv("C:/Users/Shinhyunjin/Dropbox/data/gold.csv") +data +print(is.data.frame(data)) #dataframe을 확인 + +write.csv(data,"result.csv") #csv 쓰기 +output <- read.csv("result.csv") +print(output) + +#xlsx +install.packages('rJava') +library(rJava) +install.packages("xlsx", INSTALL_opts=c("--no-multiarch")) +any(grepl("xlsx", installed.packages())) #설치확인 +library(xlsx) +xlsxdata <- read.xlsx("C:/Users/Shinhyunjin/Dropbox/data/hw1data.xlsx", sheetIndex=1) + +output <- write.xlsx(data, "result.xlsx") +output <- read.csv("result.csv") +print(output) + +#Web data + +URL <- "http://finance.google.co.uk/finance/historical?q=GOOGL ++&startdate=JAN+01%2c+2016 ++&enddate=DEC+31%2c+2016 ++&output=csv" +google<- as.data.frame(read.csv(URL)) +head(google) + +#Database +install.packages("RMySQL") +library(RMySQL) + +## 함수 ## +#value까지 순차적으로 제곱을 반복적으로 도출하는 함수 +findingSqrFunc <- function(value){ + for(j in 1:value){ + sqr <- j^2 + print(sqr) + } +} + +findingSqrFunc(4) + +#인수없이 5의 배수 도출하는 함수 + +function_test <- function(){ + for(i in 1:3){ + print(i*5) + } +} +function_test() + +#인수가 있는 함수 +function_test2 <- function(a,b,c){ + result<-a*b+c + print(result) +} +function_test2(1,2,3) \ No newline at end of file diff --git a/Ch1. Basic R Programming b/Ch1. Basic R Programming new file mode 100644 index 0000000..a4cce2b --- /dev/null +++ b/Ch1. Basic R Programming @@ -0,0 +1 @@ +### Ch1. Basic R Programming ## diff --git a/Ch1.R b/Ch1.R new file mode 100644 index 0000000..b3e97e5 --- /dev/null +++ b/Ch1.R @@ -0,0 +1,197 @@ +### Ch1. R 속으로 ### + +## 패키지 설치 ## +#자동설치법 +install.packages("ggplot2") +install.packages("forecast") +library(ggplot2) +library(forecast) +#수동설치법 +install.packages("ggplot2", lib = "/data/Rpackages/") +library(ggplot2, lib.loc = "/data/Rpackages/") + +## Data ## +#class(변수)는 변수의 데이터 형태를 말해준다. +# Character +a <- "hello" +print(class(a)) +# numeric +b <- 2.5 +print(class(b)) +# integer +c <- 6L +print(class(c)) +#complex +d <- 1 + 2i +print(class(d)) +# Logic +e<- FALSE +print(class(e)) + +## Data 유형 ## +#1. Vector + +a <- "Quantitative" +b <- "Finance" +c(a,b) + +v <- c(1,2,3) +v + +#2. List + +list1 = list(c(4,5,6), "hello" ,24.5) +print(list1) +print(list1[2]) #두번째 요소 + +#list merge +list1 <- list(5,6,7) #3행 +list2 <- list("a","b","c") #3행 +combined_list <- c(list1, list2) +print(combined_list) #6행 + +#3.Matrix +m <- matrix(c(1,2,3,4,5,6), nrow =2, ncol =3) #nrow행, ncol열 +print(m) + +#4. array +a <- array(c(4,5), dim = c(3,3,2)) #c(행,열,차원) +print(a) + +#5. Factor + +a <- c(2,3,4,2,3) +fact <- factor(a) #서로 다른 값을 구분해 저장 +print(fact) +print(nlevels(fact)) #서로 다른 값의 개수 + +#6. Data Frame + +data <- data.frame( + Name = c("A","B","c"), age = c(18,20,23), gender = c("F","M","M") +) +print(data) + + +## 데이터 내보내고 갖고 오기 import and export ## + +print(getwd()) #디렉토리 경로 +#setwd는 디렉토리를 설정해준다. + +#csv +data <- read.csv("C:/Users/Shinhyunjin/Dropbox/data/gold.csv") +data +print(is.data.frame(data)) #dataframe을 확인 + +write.csv(data,"result.csv") #csv 쓰기 +output <- read.csv("result.csv") +print(output) + +#xlsx +install.packages('rJava') +library(rJava) +install.packages("xlsx", INSTALL_opts=c("--no-multiarch")) +any(grepl("xlsx", installed.packages())) #설치확인 +library(xlsx) +xlsxdata <- read.xlsx("C:/Users/Shinhyunjin/Dropbox/data/hw1data.xlsx", sheetIndex=1) + +output <- write.xlsx(data, "result.xlsx") +output <- read.csv("result.csv") +print(output) + +#Web data + +URL <- "http://finance.google.co.uk/finance/historical?q=GOOGL ++&startdate=JAN+01%2c+2016 ++&enddate=DEC+31%2c+2016 ++&output=csv" +google<- as.data.frame(read.csv(URL)) +head(google) + +#Database +install.packages("RMySQL") +library(RMySQL) + +## 함수 ## +#value까지 순차적으로 제곱을 반복적으로 도출하는 함수 +findingSqrFunc <- function(value){ + for(j in 1:value){ + sqr <- j^2 + print(sqr) + } +} + +findingSqrFunc(4) + +#인수없이 5의 배수 도출하는 함수 + +function_test <- function(){ + for(i in 1:3){ + print(i*5) + } +} +function_test() + +#인수가 있는 함수 +function_test2 <- function(a,b,c){ + result<-a*b+c + print(result) +} +function_test2(1,2,3) + +### 반복문 ### + +#1. if +x <-5 +if(x>0){ + print("참") +} else{ + print("거짓") +} + +#2. for 정의된 갯수만큼 실행 +var <- c(3,6,8,9,11,16) +counter <- 0 +for (val in var){ + if(val%%2 !=0) counter=counter+1 #홀수를 세는 코드 -> 2의 배수가 아니다! +} +print(counter) + +#3. While 계속 반복함 +var <- c("hello") +counter <- 4 +while(counter < 7){ + print(var) + counter = counter+1 +} + +#4. apply + +sample = matrix(c(1:10), nrow =5, ncol = 2) #5행 2열 +apply(sample, 1, sum) # 데이터, 행별(2이면 열별) , 합 의미 + +#5. sapply +sapply(1:5, function(x) x^3) + +#6. break + +vec <- c("hello") +counter <- 5 +repeat { + print(vec) + counter <- counter +1 + if(counter>8){ + break + } +} + +#7. next +vec<- c(2,3,4,5,6) +for (i in vec){ + if(i==4){ + next # 4에서 건너뛰는데 이때 4를 제외하고 건너뜀 (next) + } + print(i) +} + } +} \ No newline at end of file diff --git a/Ch2 Basic Statistics.R b/Ch2 Basic Statistics.R new file mode 100644 index 0000000..8a31e18 --- /dev/null +++ b/Ch2 Basic Statistics.R @@ -0,0 +1,257 @@ +### Ch2. Basic Statistics : Modeling ### + + +### 1. Probability Distribution ### + +#1. Normal Distribution # + +Sampledata = read.csv("C:/Users/Shinhyunjin/Dropbox/data/WTI etf.csv") +head(Sampledata) + +#누적밀도함수 dnorm +y <- dnorm(Sampledata$price, mean = mean(Sampledata$price), sd = sd(Sampledata$price, na.rm= FALSE)) +plot(Sampledata$price, y) + +#누적분포함수 pnorm +pnorm(.02, mean= mean(Sampledata$price), sd = sd(Sampledata$price, na.rm=FALSE)) + +#분위수함수 qnorm +qnorm(0.159837, mean= mean(Sampledata$price), sd=sd(Sampledata$price, na.rm=FALSE),lower.tail=FALSE) + +#난수행성함수 rnorm +rnorm(5, mean=mean(Sampledata$price), sd = sd(Sampledata$price, na.rm =FALSE)) + +#2. Lognormal Distribution + +#dlnorm 로그정규분포의 밀도함수 + +y <- dlnorm(Sampledata$volume, meanlog = mean(Sampledata$volume), sd=sd(Sampledata$volume, na.rm=FALSE) + ) +plot(Sampledata$volume, y) + +#plnorm 로그정규분포의 누적확률분포함수 +y <- plnorm(Sampledata$volume, meanlog = mean(Sampledata$volume), sdlog = sd(Sampledata$volume, na.r=FALSE)) +plot(Sampledata$volume,y) + +#qlnorm 로그정규분포의 q분위수 구하기 +#rlnorm 로그정ㅂ분포의 난수생성 + + +##3. 포아송 ## + +ppois(15, lambda = 10) +ppois(15, lambda = 10, lower= FALSE) #오른쪽 꼬리 확률 + +##4. 균등분포 이론- 연속균등분포 ## + +runif(10, min = 1, max= 5) + +##5. 극단값 이론 ## + +install.packages("POT") # peaks over threshold by generalized pareto distribution +library(POT) +# +data(ardieres) +abc <- ardieres[1:10000,] +events <- clust(abc,u=1.5, tim.cond=8/365, clust.max= TRUE) +par(mfrow = c(2,2)) +mrlplot(events[,"obs"]) +diplot(events) +tcplot(events[,"obs"], which = 1) +tcplot(events[,"obs"], which = 2) +# +obs <- events[,"obs"] +ModelFit <- fitgpd(obs, thresh= 5, "pwmu") +ModelFit + + +### 3. 표본 추출 ### + +##3-1 무작위 표본추출 ## +#비복원 -> 중복을 불가하게한 무작위표본 추출 +RandomSample <- Sampledata[sample(1:nrow(Sampledata), 10, replace = FALSE),] +RandomSample +#복원 -> 중복을 허용한 무작위표본 추출 +RandomSample <- Sampledata[sample(1:nrow(Sampledata), 10, replace = TRUE),] +RandomSample + +## 3-2 층화 표본추출 ## + +install.packages("sampling") +library(sampling) +table(Sampledata$volume, Sampledata$price) + +#다른 그룹에서 표본 추출 + +Stratsubset = strata(Sampledata, c("volume", "price"), size = c(2,2,2,2), method="srswor") +Stratsubset + +### 4. 통계량 ### + +#평균값 +mean(Sampledata$price) +#중앙값 +median(Sampledata$price) +#최빈값 +findmode <- function(x){ + uniqx <- unique(x) + uniqx[which.max(tabulate(match(x, uniqx)))] +} +findmode(Sampledata$price) +#요약 +summary(Sampledata$price) +#적률 +library(e1071) +moment(Sampledata$price, order =3, center=TRUE) +#첨도 -> 뾰족한 정도 +kurtosis(Sampledata$price) +#왜도 -> 분포의 대칭성, 평균이 중앙갑보다 작으면 left-skewed, 크면 right-skewed. +skewness(Sampledata$price) #크다 -> right-skewed네 + +### 5. 상관관계 ### + +install.packages("Hmisc") +library(Hmisc) + +x <- Sampledata[,2:3] +x<-na.omit(x) +rcorr(as.matrix(x), type = "pearson") +acf(Sampledata$price) +pacf(Sampledata$price) +ccf(Sampledata$price, Sampledata$volume, main = "ccf plot") + +### 6. 가설검정 ### + +## 6-1 분산을 아는 모평균의 왼쪽꼬리검정 ## +xbar = 9.9 +mu0 = 10 +sig = 1.1 +n = 30 +z= (xbar-mu0)/(sig/sqrt(n)) +z + +alpha = 0.05 +z.alpha = qnorm(1-alpha) +-z.alpha + +pnorm(z) #0.05보다 크므로 채택 + +## 6-2 분산을 아는 모평균의 오른쪽꼬리검정 ## + +xbar = 5.1 +mu0 = 5 +sig = 0.25 +n = 30 +z = (xbar-mu0)/(sig/sqrt(n)) +z +alpha=0.5 +z.alpha = qnorm(1-alpha) +z.alpha +pnorm(z, lower.tail=FALSE) #0.05보다 작으므로 기각 + +## 6-3 분산을 아는 모평균의 양측검정 ## + +xbar = 1.5 +mu0 = 2 +sig = 0.1 +n = 30 +z= (xbar-mu0)/(sig/sqrt(n)) +z + +alpha=0.05 +z.half.alpha=qnorm(1-alpha/2) +c(-z.half.alpha, z.half.alpha) +2*pnorm(z) # 0.05보다 작으므로 귀무가설을 기각한다. + +## 6-4. 분산을 모르는 t 모평균의 왼쪽꼬리검정 + +xbar = 0.9 +mu0 = 1 +sig = 0.1 +n = 30 +t = (xbar-mu0)/(sig/sqrt(n)) + +alpha= 0.05 +t.alpha = qt(1-alpha, df= n-1) +-t.alpha + +pt(t, df=n-1) # 0.05보다 작으므로 귀무가설을 기각한다 . + +## 6-5. 분산을 모르는 t 모평균의 오른쪽꼬리검정 + +xbar = 3.1 +mu0 = 3 +sig = 0.2 +n = 30 +t = (xbar-mu0)/(sig/sqrt(n)) +t +alpha= 0.05 +t.alpha = qt(1-alpha, df= n-1) +t.alpha + +alpha = 0.05 +t.alpha = qt(1-alpha, df = n-1) +t.alpha +pt(t, df = n-1, lower.tail = FALSE) #0.05보다 작으므로 귀무가설을 기각한다 . + +## 6-6. 분산을 모르는 모평균의 양측검정 \ + +xbar = 1.9 +mu0 = 2 +sig = 0.1 +n = 30 +t = (xbar-mu0)/(sig/sqrt(n)) +t + +alpha= 0.05 +t.half.alpha = qt(1-alpha, df= n-1) +c(-t.half.alpha, t.half.alpha) + +### 7. 파라미터 추정 ### + +## 7-1 MLE (Maximum Likelihood Estimation ##) ## +install.packages("stats4") +library(stats4) +set.seed(100) +NO_values <- 100 +Y <- rnorm(NO_values, mean =5, sd =1) +mean(Y) +sd(Y) +LogL <- function(mu, sigma){ + A = dnorm(Y, mu, sigma) + - sum(log(A)) +} + +mle(LogL, start = list(mu=2, sigma=2), method = "L-BFGS-B", lower = c(-Inf, 0), upper=c(Inf,Inf)) + +## 7-2 선형모델 + +Y <- Sampledata$price +X <- Sampledata$volume +fit <- lm (Y~X) +summary(fit) + +### 8 이상치검출 ### + +## 8-1 상자 +boxplot(Sampledata$volume, main = "Volume", boxwex=0.1) + +## 8-2 Local outlier factor dkfrhflwma + +install.packages("DMwR") +library(DMwR) +outlier.scores <- lofactor(Sampledata$volume, k =4) +plot(density(outlier.scores)) +order(outlier.scores, decreasing = T)[1:5] #상위 5개의 이상치 , 행번호가 출력됨 + +### 9. 표준화와 정규화 ### + +## 9-1 표준화 +scale(Sampledata$volume, center =TRUE, scale =FALSE) # 중심화 +scale(Sampledata$volume, center = TRUE, scale =TRUE) # 표준화 + +## 9-2 정규화 + +normalized = (Sampledata$volume-min(Sampledata$volume))/ + (max(Sampledata$volume)-min(Sampledata$volume)) +normalized diff --git a/Ch2.R b/Ch2.R new file mode 100644 index 0000000..e63f844 --- /dev/null +++ b/Ch2.R @@ -0,0 +1,3 @@ +### Ch2. Basic Statistics ### + +# diff --git a/Ch3 Statistical Analysis.R b/Ch3 Statistical Analysis.R new file mode 100644 index 0000000..2208f28 --- /dev/null +++ b/Ch3 Statistical Analysis.R @@ -0,0 +1,167 @@ +### Ch3. Statistical Analysis ### + + +### 1. Linear Regression ## + +data <- read.csv("C:/Users/Shinhyunjin/Dropbox/data/rdata1.csv") +head(data) + +#scatter +Yprice = data$price.etf.autos +Xprice = data$price.etf.insurance +scatter.smooth(Yprice, Xprice, type="p",xlab = "ETF insur", ylab = "ETF autos") + +#regression +LinearR.lm = lm(Yprice ~ Xprice, data = data) +coeffs = coefficients(LinearR.lm);coeffs + +predict(LinearR.lm) + +summary(LinearR.lm)$r.squared +summary(LinearR.lm) + +#선형회귀모델의 신뢰구간 +Predictdata = data.frame(Xprice = 8000) +predict(LinearR.lm, Predictdata, interval = "confidence") # 예측값 신뢰구간 + +#잔차도 +LinearR.res = resid(LinearR.lm) +plot(Xprice, LinearR.res, ylab = "Residuals",xlab = "Xprice",main = "Residual Plot") +#표준화잔차도 +LinearRSTD.res = rstandard(LinearR.lm) +plot(Xprice, LinearRSTD.res, ylab = "Standardized Residuals", xlab = "Xprice", main = "Residual Plot") + +#오차의 정규분포 +qqnorm(LinearRSTD.res, ylab = "Standardized Residuals", xlab = "Normal Scores", main = "Error Normal Distribution plot") +qqline(LinearRSTD.res) + +### 2. Multiple Regression ### + +data2 <- read.csv("C:/Users/Shinhyunjin/Dropbox/data/rdata1.csv") +Yprice <- data2$etf.kospi +X1price <- data2$price.etf.autos +X2price <- data2$price.etf.insurance +X3price <- data2$price.etf.energy.chemical +X4price <- data2$price.etf.bank +MultipleR.lm = lm(Yprice ~ X1price + X2price + X3price + X4price, data = data2) +summary(MultipleR.lm) + +#예측값 +newdata = data.frame(X1price = 13000, X2price = 7900, X3price = 11200, X4price = 7400) +predict(MultipleR.lm, newdata) +predict(MultipleR.lm, newdata, interval = "confidence")#confidence level prediction + +### 3. Multicollinearity " + +install.packages("car") +library(car) +vif(MultipleR.lm) + +### 4. ANOVA ### +data3 <- read.csv("C:/Users/Shinhyunjin/Dropbox/data/rdata1.csv") +Yprice <- data3$etf.kospi +X1price <- data3$price.etf.autos +boxplot(Yprice ~ X1price) + +oneway.test(data3$etf.kospi ~ data3$price.etf.autos, var.equal = TRUE) + +### 4. Feature Selection ### +#상관관계 +data4 <- read.csv("C:/Users/Shinhyunjin/Dropbox/data/rdata1.csv") +Yprice <- data4$etf.kospi +X1price <- data4$price.etf.autos +X2price <- data4$price.etf.insurance +X3price <- data4$price.etf.energy.chemical +X4price <- data4$price.etf.bank +correlationMatrix <- cor(data4[,2:5]) +correlationMatrix + +### 5. 단계식 변수선택 ### + +install.packages("MASS") +library(MASS) +MultipleR.lm = lm(Yprice ~ X1price+X2price+X3price+X4price, data = data4) +step <- stepAIC(MultipleR.lm, direction = "both") +step$anova + +#분류에의한 변수선택 +install.packages("mlbench") +install.packages("caret") +library(mlbench) +library(caret) +library(randomForest) + +control <-rfeControl(functions = rfFuncs, method = "cv", number = 10) +Output <- rfe(data4[,3:6],data4[,2:3], sizes = c(3:6), rfeControl = control) +predictors(Output) +plot(Output, type = c("g","o")) + +###6. 웨이블릿 분석 ### + +install.packages("wavelets") +library(wavelets) +library(quantmod) + +getSymbols("HSPX", from = "2018-01-01", to = "2018-12-29") +HSPX + +hspx <- HSPX$HSPX.Close +ret_hspx <- Delt(hspx, k=1) +par(mfrow = c(2,1)) +plot(xts(hspx), type = "l") +plot(xts(ret_hspx), type = "l") + +head(hspx) +tail(hspx) + +#웨이블릿 변환 +hspx2 <- as.ts(hspx) +model <- wavelets::dwt(hspx2, filter = "la8", n.levels = 3) +model +model@W #웨이블릿계수 +model@V #척도화계수 +plot(model) + +#하르필터 +model2 <- wavelets::dwt(hspx2, filter = "haar", n.levels = 3) +plot(model2) + +#역이산 웨이블릿 +imodel <- idwt(model2, fast = TRUE) + +#다중해상도분석 +model3 <- mra(hspx2, filter = "la8", n.levels = 3) + +#최대중첩이산웨이블릿(MODWT) +model4 <- modwt(hspx2, filter = "la8",n.levels = 5) +plot.modwt(model4) + +### 7. 고속푸리에변환 (FFT) ### + +model5 <- fft(hspx2) +rp = Re(model5) #실수부real part +ip = Im(model5) #허수부imaginary part +absmodel <- abs(model5) #절대값 +plot(absmodel) + +norm_absmodel <- absmodel[1:(length(hspx2)/2)] +Angle = atan2(ip,rp) + +spec_density <- spectrum(hspx2, method = c("pgram", "ar")) + +### 8. 힐버트 변환 ### + +install.packages("seewave", repos="http://cran.at.r-project.org/")수 +library(seewave) +model <- hilbert(hspx2,1) #1은 주파수 +summary(model) + +rp<-Re(model) +ip <-Im(model) +ifreq(hspx2, 1, ylim = c(0,0.0001)) +ifreq(hspx2, 1, phase = "TRUE", ylim= c(-0.5,1)) #위상을 준다 phase +phase_diff <- phase1 - phase2 +#값도출 +output = ifreq(hspx2, 1, plot =FALSE) +freq <- output$f +phase <- output$p diff --git a/Ch4 time series analysis.R b/Ch4 time series analysis.R new file mode 100644 index 0000000..ba52d0f --- /dev/null +++ b/Ch4 time series analysis.R @@ -0,0 +1,60 @@ +### Ch4 Time Series Analysis ### + +### 3. ARCH GARCH 시리즈 ### + +## 3-1 GARCH BAsic ## +install.packages("rugarch") +library(rugarch) + +gspec.ru <- ugarchspec(mean.model = list(armaOrder = c(0,0)),distribution = "std") +gfit.ru <- ugarchfit(gspec.ru, price) +coef(gfit.ru) + +#forecast +FutureForecast = ugarchforecast(gfit.ru, n.ahead=5) +FutureForecast + +## 3-2 EGARCH -> 지수형 GARCH로서 시장시나리오에 적합함 ## +# return form + no N/A data +return <- na.omit(return) +egarchetf.spec = ugarchspec(variance.model = list(model = "eGARCH", garchOrder=c(1,1)),mean.model = list(armaOrder = c(0,0))) +egarchetf.fit = ugarchfit(egarchetf.spec, return) +egarchetf.fit +coef(egarchetf.fit) + +FutureForecast = ugarchforecast(egarchetf.fit, n.ahead =5) +FutureForecast + +## 3-3 VGARCH -> 벡터 GARCH 및 다변량 GARCH라고 한다.## + +install.packages("rmgarch") +install.packages("PerformanceAnalytics") +library(rmgarch) +library(PerformanceAnalytics) + +getSymbols("114800.KS", from = "2018-01-01", to = "2018-12-31") +data1 <- `114800.KS` +price1 <- data1$`114800.KS.Close` +return1 <- Delt(price1) +return1 <- na.omit(return1) + +getSymbols("122630.KS", from = "2018-01-01", to = "2018-12-31") +data2 <- `122630.KS` +price2 <- data2$`122630.KS.Close` +return2 <- Delt(price2) +return2 <- na.omit(return2) + +data <- cbind(return1, return2) + +garch_spec = ugarchspec(mean.model = list(armaOrder = c(2,1)), variance.model = list(garchOrder = c(1,1),model = "sGARCH"),distribution.model = "norm") +dcc.garch_spec = dccspec(uspec = multispec(replicate(2, garch_spec)), dccOrder = c(1,1),distribution = "mvnorm") +dcc_fit = dccfit(dcc.garch_spec, data = data) +fcst = dccforecast(dcc_fit, n.ahead =5) +fcst + +## 3-4 DCC 동적조건부 상관관계-> 간결하고 설명력 좋음.## + +garch_spec2 = ugarchspec(mean.model = list(armaOrder = c(0,0)), variance.model = list(garchOrder = c(1,1),model = "sGARCH"),distribution.model = "norm") +dcc.garch_spec2 = dccspec(uspec = multispec(replicate(2, garch_spec2)), dccOrder = c(1,1),distribution = "mvnorm") +dcc_fit2 = dccfit(dcc.garch_spec2, data = data, fit.control = list(scale=TRUE)) +dcc_fit2 diff --git a/Ch5 Algorithm Trading.R b/Ch5 Algorithm Trading.R new file mode 100644 index 0000000..6487bf4 --- /dev/null +++ b/Ch5 Algorithm Trading.R @@ -0,0 +1,420 @@ +### Ch5 Algorithm Trading ### + +library(quantmod) +library(PerformanceAnalytics) +library(tseries) +library(fPortfolio) + +## 5-1 모멘텀 / 방향성 트레이딩 ## + +getSymbols("226490.KS", from = "2015-09-09", to= "2018-12-31") +getSymbols("169950.KS", from = "2015-09-09", to = "2018-12-31") + +kospietf <- `226490.KS` +kospietf_p <- kospietf$`226490.KS.Close` +kospietf_p <- na.omit(kospietf_p) + +kospietf_r <- Delt(kospietf_p, k = 1) +kospietf_r <- na.omit(kospietf_r) + +chinaetf <- `169950.KS` +chinaetf_p <- chinaetf$`169950.KS.Close` +chinaetf_p <- na.omit(chinaetf_p) + +chinaetf_r <- Delt(chinaetf_p, k = 1) +chinaetf_r <- na.omit(chinaetf_r) + +#추세분석 +par(mfrow = c(2,1)) +plot(kospietf_p,type="l") +plot(kospietf_r, type = "l") + +#날짜 나누기 +in_sd <- "2015-09-09" +in_ed <- "2017-12-31" +out_sd <- "2018-01-01" +out_ed <- Sys.Date() + +in_kospi <- kospietf_p[(index(kospietf_p) >= in_sd & index(kospietf_p) <= in_ed),] +in_kospi <- na.omit(in_kospi) +in_ret_kospit <- kospietf_r[(index(kospietf_r) >= in_sd & index(kospietf_r) <= in_ed),] +out_kospi <- kospietf_p[(index(kospietf_p) >= out_sd & index(kospietf_p) <= out_ed),] +out_kospi <- na.omit(out_kospi) +out_ret_kospit <- kospietf_r[(index(kospietf_r) >= out_sd & index(kospietf_r) <= out_ed),] + + +#Variables + +macd <- MACD(in_kospi, nFast=7, nSlow = 12, nSig = 15, maType="SMA", percent = FALSE) +#macd : 이동평균선 사이의 관계 -> 추세방향과 주가 움직임 분석에 용이 +bb <- BBands(in_kospi, n=20, maType="SMA", sd = 2) +#bb : 상하한선의 폭 분석, 변동성에 비례함, 과매수매도 분석 +signal <- NULL +signal <- ifelse(in_kospi > bb[,'up'] & macd[,'macd'] > macd[,'signal'],1,ifelse(in_kospi 추세방향과 주가 움직임 분석에 용이 +bb_o <- BBands(out_kospi, n=20, maType="SMA", sd = 2) +#bb : 상하한선의 폭 분석, 변동성에 비례함, 과매수매도 분석 +signal_o <- NULL +signal_o <- ifelse(out_kospi > bb_o[,'up'] & macd_o[,'macd'] > macd_o[,'signal'],1,ifelse(out_kospiub, 1, ifelse(diff roll_ub, -1, ifelse(hedge_ratio < roll_lb, 1, 0)) +lagsignal <- Lag(signal, 1) +signal <- ifelse(lagsignal == -1 & hedge_ratio > roll_me,-1,ifelse(lagsignal ==1 & hedge_ratio < roll_me, 1,0)) + +spread_return <- kospietf_r - chinaetf_r +trade_return <- spread_return * lag(signal) - cost + +### 5-4 공적분 기반 페어 트레이딩 ### +adf.test(kospietf_p) + +diff <- kospietf_p - Lag(kospietf_p, 1) #1차차분 +adf.test(diff[!is.na(diff)]) + +model <- lm(kospietf_p ~ chinaetf_p +0) +model +summary(model) +adf.test(as.ts(model$residuals)) + +plot(kospietf_p, type = "l", main = "KODEX KOSPI ETF & CHINA ETF") +par(mfrow = c(2,1)) +lines(chinaetf_p * model$coefficients, col = "red") +plot(as.xts(model$residuals), type = "l") + +roll_me <- rollapply(model$residuals, 14, mean) +roll_std <- rollapply(model$residuals, 14, sd) +n <- 1 +roll_ub <- roll_me + n * roll_std +roll_lb <- roll_me - n * roll_std +signal <- NULL +signal <- ifelse(model$residuals > roll_ub, -1, ifelse(model$residuals < roll_lb, 1, 0)) +lagsignal <- Lag(signal, 1) +signal <- ifelse(lagsignal ==-1 & model$residuals > roll_me, -1, ifelse(lagsignal ==1 & model$residuals < roll_me, 1, 0)) + +#### 5-4 CAPM #### +pfdata <- cbind(kospietf_r, chinaetf_r) + +rf <- rep(0, dim(pfdata)[1]) +model <- lm((pfdata[,2] -rf ) ~ (pfdata[,1]- rf)) +model + +CAPM.beta(pfdata[,2], pfdata[,1]) +CAPM.alpha(pfdata[,2], pfdata[,1]) +plot(as.ts(chinaetf_r), as.ts(kospietf_r), xlim = c(0,0.04), ylim = c(0,0.04),xlab = "CHINA ETF Return", ylab="KOSPI ETF Return") +abline(model, col = "red") + +#### 5-5 Multi Factor Model -> do not Run #### + +con = gzcon(url('http://www.systematicportfolio.com/sit.gz', 'rb')) +source(con) +close(con) + +dow.jones.components <- function(){ + url = 'http://money.cnn.com/data/dow30/' + txt = join(readLines(url)) + temp = gsub(pattern = '">', replacement = '', txt, perl =TRUE) + temp = gsub(pattern = '', replacement = '', temp, perl = TRUE) + temp = extract.table.from.webpage(temp, 'Volume', has.header=T) + trim(temp[,'Company']) +} +tickers=dow.jones.components() + +#ticker 크로울링 +data.fund <- new.env() +temp = paste(iif(nchar(tickers) <=3, 'NYSE:', 'NASDAQ:'), tickers, sep = '') +for(i in 1:len(tickers)) data.fund[[tickers[i]]] = fund.data(temp[i], 80) +save(data.fund, file = 'data.fund.Rdata') +#가격 크로울링 +data <- new.env() +getSymbols(tickers, src = 'yahoo', from = '1970-01-01', env = data, auto.assign = T) +for(i in ls(data)) data[[i]] = adjustOHLC(data[[i]], use.Adusted=T) +save(data,file='data.Rdata') +#날짜 변수 +date.fund.data <- function(data){ + quarter.end.date = as.Date(paste(data['quarter end date',],'/1',sep=''),'%Y/%m/%d') + quarterly.indicator = data['quarterly indicator',] + date.preliminary.data.loaded = as.Date(data['date preliminary data loaded',],],'%Y-%m-%d')+1 + months = seq(quarter.end.date[1], tail(quarter.end.date, 1)+365, by = '1 month') + index = match(quarter.end.date, months) + quarter.end.date = months[iif(quarterly.indicator =='4', index +3, index+2)+1]-1 + fund.date = date.preliminary.data.loaded + fund.date[is.na(fund.date)] = quarter.end.date[is.na(fund.date)] + return(fund.date) +} + +## fundamental analysis ## + +for(i in tickers){ + fund = data.fund[[i]] + fund.date = date.fund.data(fund) + EPS = get.fund.data('Diluted EPS from Total Operations', fund, fund.date, is.12m.rolling= T) + CSHO = get.fund.data('total common shares out', fund, fund.date) # common shares outstanding + CEQ = get.fund.data('total equity', fund, fund.date) #common equity + data[[i]] = merge(data[[i]], EPS, CSHO, CEQ) +} +# +bt.prep(data, align = 'keep.all', dates='1995::2011') +prices= data$prices +prices = bt.apply.matrix(prices, function(x) ifna.prev(x)) +# +factors = list() +#재무비율 +factors$TV = list() +#시가총액 +CSHO = bt.apply(data, function(x), ifna.prev(x[,'CSHO'])) +MKVAL = prices * CSHO +# EPS +EPS = bt.apply(data, function(x) ifna.prev(x[,'EPS'])) +factors$TV$EP = EPS / prices +# 자기자본대 주가비율 +CEQ = bt.apply(data, function(x) ifna.prev(x[,'CEQ'])) +factors$TV$BP = CEQ / MKVAL + +#횡단면 Z점수로 변환 +for(i in names(factors$TV)){ + factors$TV[[i]] = (factors$TV[[i]] - cap.wewighted.mean(factors$TV[[i]], MKVAL))/apply(factors$TV[[i]], 1, sd, na.rm =T) +} + +load.packages("abind") +temp = abind(factors$TV, along = 3) + +factors$TV$AVG = factors$TV[[1]] +factors$TV$AVG[] = apply(temp, c(1,2), mean, na.rm= T) + +#월말 찾기 +month.ends = endpoints(prices, 'months') +prices = prices[month.ends, ] +n = ncol (prices) +nperiods = nrow(prices) + +ret = prices / mlag(prices) -1 +next.month.ret = mlag(ret,-1) +MKVAL = MKVAL[month.ends,] +for(j in 1:len(factors)){ + for(i in 1:len(factors[[j]])){ + factors[[j]][[i]] = factors[[j]][[i]][month.ends,] + } +} + +out = compute.quantiles(factors$TV$AVG, next.month.ret, plot = F) +models=list() +for(i in 1:5){ + data$weight[] = NA + data$weight[month.ends,] = iif(out$qunatiles ==i, out$weights,0) + capital = 100000 + data$weight[] = (capital/prices) * (data$weight) + models[[paste('Q',i,sep='')]] = bt.run(data,type='share', captial = capital) +} + +#스프레드 +data$weight[] = NA +data$weight[month.ends,] = iif(out$qunatiles ==5, out$weights, iif(out$qunatiles==1, -out$weights, 0)) +capital = 10000 +data$weight[]=(capital/prices)*(data$weight) +models$Q5_Q1 = bt.run(data, type = 'share', capital = capital) + +factors.avg = list() +for(j in names(factors)) factors.avg[[j]] = factors[[j]]$AVG +factors.avg = add.avg.factor(factors.avg) +nperiods = nrow(nex.month.ret) +n = ncol(next.month.ret) + +#각 요인에 대한 행렬 생성 +factors.matrix = abind(factors.avg, along = 3) +all.data = factors.matrix + +#베타 +beta = all.data[,1,] * NA + +# all.data에 next.month.ret추가 +all.data = abind(next.month.ret, all.data, along = 3) +dimnames(all.data)[[3]][1] = 'Ret' +all.data[is.na(all.data)] <- 0 +#베타예측(요인수익률) +for(t in 30:(nperiods-1)){ + temp = all.data[t:t,,] + x = temp[,-1] + y = temp[,1] + beta[(t+1),] = lm(y~x-1)$coefficients +} + +#알파수익률 예측 생성 +alpha = next.month.ret * NA +for(t in 40:(nperiods-1)){ + coef = colMeans(beta[(t-5):t,], na.rm=T) + alpha[t,] = rowSums(all.data[t,,-1] * t(repmat(coef, 1, n)), na.rm = T) +} + +#### 5-5 Portfolio Theory #### + +stockData <- new.env() +symbols <- c("278530.KS","069500.KS","114800.KS","122630.KS") +start_date <- as.Date("2018-01-01") +getSymbols(symbols, src= "yahoo", env = stockData, from =start_date) +x<-list() +# +for (i in 1:length(symbols)){ + x[[i]] <- get(symbols[i], pos = stockData) + x[[i]]$gl <- ((Cl(x[[i]]) - Op(x[[i]])) / Op(x[[i]]))*100 + if(i==1) + data<-Cl(x[[i]]) + else + data <- cbind(data, Cl(x[[i]])) +} +# +data_ret <- apply(data,2,Delt) +data_ret <- na.omit(data_ret) +napos <- which(apply(data_ret,2,is.na)) +avg_ret <- apply(data_ret,2,mean) +covariance_mat <- cov(data_ret, use = 'na') +weights <- c(0.2, 0.3,0.35,0.15) +# +source("C:/Users/Shinhyunjin/Dropbox/data/portfolio.R") +# Normal +weightedport = getPortfolio(er=avg_ret, cov.mat = covariance_mat, weights =weights) +weightedport +# MVP +minvar_port <- globalMin.portfolio(avg_ret, covariance_mat) +minvar_port +# Efficient +rf <-0 +efficient_port <- efficient.portfolio(avg_ret, covariance_mat, rf) +efficient_port +# Tangent +tangency_port <- tangency.portfolio(avg_ret, covariance_mat, rf) +tangency_port + +## EF +efficient_frontier <- efficient.frontier(avg_ret, covariance_mat, alpha.min =-2,alpha.max=2, nport=50) +plot(efficient_frontier, plot.assets =T) +points(minvar_port$sd, minvar_port$er, col = "blue") +points(tangency_port$sd, tangency_port$er, col = "red") +tangent_sharpe_ratio = (tangency_port$er - rf) / tangency_port$sd +abline(a=rf, b=tangent_sharpe_ratio) \ No newline at end of file diff --git a/Ch7 Risk Management.R b/Ch7 Risk Management.R new file mode 100644 index 0000000..41ba62c --- /dev/null +++ b/Ch7 Risk Management.R @@ -0,0 +1,204 @@ +#### Ch7. Risk Management #### + +library(quantmod) +library(tseries) +library(PerformanceAnalytics) +install.packages("SACCR") #바젤규제 패키지 +library(SACCR) +library(caret) +library(caTools) +library(ROCR) +library(randomForest) +### 7-1 시장리스크 ### +## Beta로 측정 ## +#Data -> KOSDAQ ETF와 KOSPI ETF +getSymbols("232080.KS", from = "2016-01-01") +getSymbols("277630.KS", from = "2016-01-01") +#Variables +KOSDA <- `232080.KS` +KOSPI <- `277630.KS` +KOSDA <- KOSDA$`232080.KS.Close` +KOSPI <- KOSPI$`277630.KS.Close` +KOSDA <- na.omit(KOSDA) +KOSPI <- na.omit(KOSPI) +KOSDA_m <- to.monthly(KOSDA)[,"KOSDA.Close"] +KOSPI_m <- to.monthly(KOSPI)[,"KOSPI.Close"] +KOSDA_r_m <- Delt(KOSDA_m) +KOSPI_r_m <- Delt(K + OSPI_m) + +#기간 +KOSDA_r_m <- KOSDA_r_m[(index(KOSDA_r_m)>="2017-10")] +KOSPI_r_m <- KOSPI_r_m[(index(KOSPI_r_m)>="2017-10")] + +#결과 +betafit <- lm(KOSPI_r_m ~ KOSDA_r_m) +result <- summary(betafit) +result +result$coefficients[2,1] + +### 7-2 포트폴리오 리스크 ### + +getSymbols("099140.KS") +getSymbols("117680.KS") +getSymbols("226490.KS") +getSymbols("140710.KS") + +China <- `099140.KS` +Steel <- `117680.KS` +KOSPI <- `226490.KS` +Trans <- `140710.KS` + +China <- na.omit(China) +Steel <- na.omit(Steel) +KOSPI <- na.omit(KOSPI) +Trans <- na.omit(Trans) + +China_m <- to.monthly(China)[,"China.Close"] +Steel_m <- to.monthly(Steel)[,"Steel.Close"] +KOSPI_m <- to.monthly(KOSPI)[,"KOSPI.Close"] +Trans_m <- to.monthly(Trans)[,"Trans.Close"] + +China_m_r <- Delt(China_m) +Steel_m_r <- Delt(Steel_m) +KOSPI_m_r <- Delt(KOSPI_m) +Trans_m_r <- Delt(Trans_m) + +China_m_r <- China_m_r[(index(China_m_r) >= "2016-01")] +Steel_m_r <- Steel_m_r[(index(Steel_m_r) >= "2016-01")] +KOSPI_m_r <- KOSPI_m_r[(index(KOSPI_m_r) >= "2016-01")] +Trans_m_r <- Trans_m_r[(index(Trans_m_r) >= "2016-01")] + +China_m_r_p <- China_m_r - 0.0175 +Steel_m_r_p <- Steel_m_r - 0.0175 +KOSPI_m_r_p <- KOSPI_m_r - 0.0175 +Trans_m_r_p <- Trans_m_r - 0.0175 + +MeanSD <- rbind(cbind("China ETF", mean(China_m_r_p), sd(China_m_r_p)), + cbind("Steel ETF", mean(Steel_m_r_p), sd(Steel_m_r_p)), + cbind("KOSPI ETF", mean(KOSPI_m_r_p), sd(KOSPI_m_r_p)), + cbind("Trans ETF", mean(Trans_m_r_p), sd(Trans_m_r_p))) +MeanSD + +lm1 <- lm(China_m_r_p ~ KOSPI_m_r_p) +lm2 <- lm(Steel_m_r_p ~ KOSPI_m_r_p) +lm3 <- lm(Trans_m_r_p ~ KOSPI_m_r_p) + +return_avg <- matrix(c((lm1$coefficients[2]*mean(KOSPI_m_r_p)), + (lm2$coefficients[2]*mean(KOSPI_m_r_p)), + (lm3$coefficients[2]*mean(KOSPI_m_r_p))),nrow=1) +covariance <- cov(Data) # 원래는 Single Index Model대로 해야하나 일단 이렇게함 +covariance <- matrix(c(covariance), nrow=3) +sol <- portfolio.optim(x = return_avg, covmat = covariance, shorts = F) +sol$pw # Weight + + +### 7-3 Value at Risk (VaR) ### + +## 7-3-1 파라미터 VaR - 분산공분산법 ## + +mean = 2 +sigma = 4 +alpha = 0.05 +Var_paramatic = qnorm(alpha, mean, sigma) +Var_paramatic + +#Expected Short fall +alpha_z = qnorm(alpha) +ES_paramatic = mean + sigma * (dnorm(alpha_z)/(1-alpha)) +ES_paramatic + +## 7-3-2 역사적 VaR ## +#Data +symbollist = c("AMD", "AAPL", "ORCL") +getSymbols(symbollist, form = "2017-01-01", to = "2019-01-10") +AMD = AMD[,"AMD.Adjusted", drop = F] +AAPL = AAPL[,"AAPL.Adjusted", drop = F] +ORCL = ORCL[,"ORCL.Adjusted", drop = F] +AMD_return = CalculateReturns(AMD, method = "log") +AAPL_return = CalculateReturns(AAPL, method = "log") +ORCL_return = CalculateReturns(ORCL, method = "log") +AMD_return = AMD_return[-1,] +AAPL_return =AAPL_return[-1,] +ORCL_return = ORCL_return[-1,] +bind_return <- cbind(AMD_return, AAPL_return, ORCL_return) +head(bind_return) +# 역사적 VaR +HVAR <- VaR(bind_return, p=0.95, method = "historical") +HVAR +# ES +HCVAR <- ES(bind_return, p =0.95, method = "historical") +HCVAR +# component VaR +VaR(bind_return, p=0.95, portfolio_method = "component") +# Margninal VaR +VaR(bind_return, p=0.95, portfolio_method = "marginal") + +### 7-4 몬테카를로 시뮬레이션 ### + +Sample_Size <- 2000 +set.seed(2345) +Z <- rnorm(Sample_Size) +mean <- 0.2 +sigma <- 0.25 +deltat <- 0.0833333 +returns <- mean*deltat + sigma*Z*sqrt(deltat) +hist(returns, breaks =50) +# +Mean_new <- mean(returns) *12 +Mean_new +std_new <- sd(returns)*(12)^(0.5) +std_new +# +VaR(returns, p = 0.95, method = "historical") + +### 7-5 바젤 ### + +CalcEAD(50, 400) # 대체비용, 예상미래익스포져 순 + +### 7-6 개인 신용리스크 ### +data("GermanCredit") +LRData <- GermanCredit[,1:10] +str(LRData) +summary(LRData) +# +set.seed(100) +res = sample.split(LRData$Class, 0.6) +Train_data = subset(LRData, res == TRUE) +Test_data = subset(LRData, res == FALSE) + +lgfit = glm(Class ~. , data = Train_data, family = "binomial") +summary(lgfit) +#유의한 변수 따로 +lgfit = glm(Class ~ Duration + InstallmentRatePercentage + Age, family = "binomial", data = Train_data) +summary(lgfit) +# +Train_data$predicted.risk = predict(lgfit, newdata = Train_data, type = "response") +table(Train_data$Class, as.numeric(Train_data$predicted.risk >=0.05)) +# +pred= prediction(Train_data$predicted.risk, Train_data$Class) +as.numeric(performance(pred, "auc")@y.values) +# 그래프 +predict_Train = predict(lgfit, type = "response") +ROCpred = prediction(predict_Train, Train_data$Class) +ROCperf = performance(ROCpred, "tpr", "fpr") +plot(ROCperf) + +### 7-7 사기탐지 ### + +data("GermanCredit") +FraudData <- GermanCredit[,1:10] +head(FraudData) +# +len <- dim(FraudData)[1] +train <- sample(1:len, 0.8*len) +TrainData <- FraudData[train,] +TestData <- FraudData[-train,] +fraud_model <- randomForest(Class ~. , data=TrainData, ntree =50, proximity = TRUE) +# 결과 +print(fraud_model) +plot(fraud_model) +importance(fraud_model) +# +TestPred <- predict(fraud_model, newdata = TestData) +table(TestPred, TestData$Class) diff --git a/Ch8 Optimization.R b/Ch8 Optimization.R new file mode 100644 index 0000000..8c913aa --- /dev/null +++ b/Ch8 Optimization.R @@ -0,0 +1,80 @@ +### Ch8 Optimization ### + +library(PerformanceAnalytics) +library(randomForest) +library(mlbench) +library(caret) +#유전자알고리즘 +library(genalg) +library(ggplot2) +install.packages("GA") +library(GA) +### 8-1 주기적 재조정 (dynamic reblancing) ### + +data(edhec) +data <- edhec["1999", 3:5] #3~5 열 +colnames(data) = c("DC", "EM", "EMN") +data +# +wts <- xts(matrix(c(0.3, 0.3, 0.4), nrow =1, ncol = 3), as.Date("1998-12-31")) +colnames(wts) <- colnames(data) +wts +### Dynamic 조정 ### +Return.portfolio(data, weights = wts, rebalance_on = "months", verbose =TRUE) + +### 8-2 전진분석 walk forward testing ### +## 최적 파라미터 결정 ## + +# 그리드 평가 # + +data("Shuttle") +Analysis_Data <- head(Shuttle, 10000) +X <- Analysis_Data[,1:9] #~9열 +Y <- Analysis_Data[,10] #10열 +control <- trainControl(method = "repeatedcv", number =5, repeats = 3) +seed <- 4 +metric <- "Accuracy" +set.seed(seed) +Count_var <- sqrt(ncol(X)) +tunegrid <- expand.grid(.mtry = Count_var) +rf_baseline <- train(Class ~ ., data = Analysis_Data, method = "rf", metric = metric, tuneGrid = tunegrid, trControl = control) +print(rf_baseline) +# better tool +control <- trainControl(method = "repeatedcv", number =5, repeats = 3, search= "grid") +set.seed(seed) +tunegrid <- expand.grid(.mtry = c(1:8)) +rf_gridsearch_method <- train(Class ~ ., data = Analysis_Data, method = "rf", metric = metric, tuneGrid = tunegrid, trControl = control) +print(rf_gridsearch_method) +plot(rf_gridsearch_method) + + +### 8-3 유전자 알고리즘 ### + +InputDataset <- data.frame(Stocks = c("Stock1", "Stock2", "Stock3","Stock4","Stock5", "Stock6"), retruns = c(10,11,15,20,12,13), weight = c(0.1, 0.2, 0.1, 0.2, 0.2, 0.3)) +WTlimit <- 1 +InputDataset +# +evaluationFunc <- function(x){ + current_solution_returns <- x %*% InputDataset$retruns + current_solution_weight <- x %*% InputDataset$weight + if(current_solution_weight > WTlimit) + return(0) else return(-current_solution_returns) +} +# +GAmodel <- rbga.bin(size =6, popSize =100, iters =50, mutationChance = 0.01, elitism = T, evalFunc = evaluationFunc) +cat(summary(GAmodel)) #GA result에서 제외되어야할 주식의 비중을 말해준다. +# +data(economics) +Data_Analysis <- data.frame(economics[,2:4]) +head(Data_Analysis) +# +OLS_GA <- function(Data_Analysis, a0, a1, a2){ + attach(Data_Analysis, warn.conflicts = F) + Y_hat <- a0 + a1*pop + a2*psavert + SSE = t(pce-Y_hat) %*% (pce-Y_hat) + detach(Data_Analysis) + return(SSE) +} +# +ga.OLS_GA <- ga(type = 'real-valued', min= c(-100,-100,-100), max = c(100,100,100), popSize=500, maxiter = 500, names=c('intercept', 'pop','psavert'), keepBest=T, fitness = function(a) - OLS_GA(Data_Analysis, a[1],a[2],a[3])) +summary(ga.OLS_GA) #intercpet, pop, psavert 계수가 결과값! diff --git a/Ch9 Derivative Pricing.R b/Ch9 Derivative Pricing.R new file mode 100644 index 0000000..e08d843 --- /dev/null +++ b/Ch9 Derivative Pricing.R @@ -0,0 +1,154 @@ +##### Ch9. 파생상품 가격결정 ##### + +library(fOptions) +install.packages("RQuantLib", type = 'binary') +library(RQuantLib) +install.packages("CreditMetrics") +library(CreditMetrics) #신용파생상품 +install.packages("credule") +library(credule) #신용파생상품 +install.packages("GUIDE") #금리파생상품 +library(GUIDE) +install.packages("fExoticOptions") +library(fExoticOptions) +### 9-1 바닐라 옵션 ### + +## 9-1-1 블랙숄즈 ## + +GBSOption(TypeFlag = "c", S = 900, X = 950, Time = 1/4, r =0.02, sigma =0.22, b= 0.02) +GBSOption(TypeFlag = "p", S = 900, X = 950, Time = 1/4, r = 0.02, sigma = 0.22, b = 0.02) +# type : c or p, 기초자산, 행사가격, 만기, 무위험이자율, 변동성, 보유비용순 + +## 9-1-2 Cox Ross Rubinstein -> 이항모형 ## + +CRRBinomialTreeOption(TypeFlag = "ce", S =900, X =950, Time = 1/4, r = 0.02, b = 0.02, sigma = 0.22, n = 3) +CRRBinomialTreeOption(TypeFlag = "pe", S = 900, X = 950, Time = 1/4, r = 0.02, b = 0.02, sigma = 0.22, n = 3) +# +model <- BinomialTreeOption(TypeFlag = "ce", S = 900, X = 950, Time = 1/4, r = 0.02, b = 0.02, sigma =0.22, n=3) +BinomialTreePlot(model, dy =1, xlab = "Time steps", ylab = "Option Value", xlim = c(0,4), ylim=c(-3,4)) +title(main = "Call Option Tree") +##함수로 정의 +func <- function(n){ + pr <- CRRBinomialTreeOption(TypeFlag = "ce", S = 900, X = 950, Time = 1/4, r = 0.02, b =0.02, sigma = 0.22, n = n)@price + return(pr) +} +# 최종 비교 그래프 +price <- sapply(1:100, func) # 1~100반복 +plot(price, type="l", xlab = "Number of steps", ylab = "Option Value") +bs_price <- GBSOption(TypeFlag = "c", S = 900, X = 950, Time = 1/4, r = 0.02, sigma = 0.22, b = 0.02)@price +abline(h = bs_price, col = 'red') +legend("topright", legend = c('CRR-price', 'BS-price'), col = c('black', 'red'), pch = 19) +title(main = "Call Option Pricing models") + +## Greeks +GBSGreeks(Selection = "delta", TypeFlag = "c", S = 900, X = 950, Time = 1/4, r = 0.02, b = 0.02, sigma = 0.22) +GBSGreeks(Selection = "gamma", TypeFlag = "c", S = 900, X = 950, Time = 1/4, r = 0.02, b = 0.02, sigma =0.22) +# +portfolio <- sapply(c('c', 'p'), function(otype){sapply(500:1500, function(price){ + GBSGreeks(Selection = 'delta', + TypeFlag = otype, + S=price, X = 950, + Time = 1/4, r = 0.02, + b = 0.02, + sigma =0.22) +}) +}) + +head(portfolio) + +## Straddle Delta +plot(500:1500, rowSums(portfolio), type = 'l', xlab = 'underlying Price', ylab = 'Straddle Delta') + +##Implied Volatility ### + +iv <- EuropeanOptionImpliedVolatility("call", 11.10, 100, 100, 0.01, 0.03 ,05, 0.4) +#순서 (옵션type, 옵션가격, 기초자산, 행사가격, 배당수익률, 무위험수익률, 잔존만기, 변동성initial) +iv +iv_a <- AmericanOptionImpliedVolatility("call", 11.10, 100, 100, 0.01, 0.03 ,05, 0.4) +iv_a + +### 9-2 신용파생상품 ### +rc <- c("AAA", "AA", "A", "BBB", "BB", "B", "CCC", "Default") +M <- matrix(c(90.81, 8.33, 0.68, 0.06, 0.08, 0.02, 0.01, 0.01, + 0.70, 90.65, 7.79, 0.64, 0.06, 0.13, 0.02, 0.01, + 0.09, 2.27, 91.05, 5.52, 0.74, 0.26, 0.01, 0.06, + 0.02, 0.33, 5.95, 85.93, 5.30, 1.17, 1.12, 0.18, + 0.03, 0.14, 0.67, 7.73, 80.53, 8.84, 1.00, 1.06, + 0.01, 0.11, 0.24, 0.43, 6.48, 83.46, 4.07, 5.20, + 0.21, 0, 0.22, 1.30, 2.38, 11.24, 64.86, 19.79, + 0, 0, 0, 0, 0, 0, 0, 100 + )/100, 8, 8, dimnames = list(rc, rc), byrow = TRUE) +lgd <- 0.2 # 부도시손실률 +cm.cs(M, lgd) #신용스프레드 +# +ead <- c(140000,100000,100000) # 부도시 exposure +N <- 3 # firms +n <- 50000 #난수 +r <- 0.03 #rf +rating <- c("BBB", "AA", "B") +firmnames <- c("Blizzard", "Activision", "Nexon") +alpha <- 0.99 +rho <- matrix(c(1,0.4,0.6,0.4,1,0.5,0.6,0.5,1),3,3,dimnames=list(firmnames, firmnames),byrow=TRUE) +cm.CVaR(M, lgd, ead, N, n,r,rho,alpha,rating) #신용VaR +pnl <- cm.gain(M, lgd, ead, N, n, r, rho,rating) # 시뮬레이션 신용VaR +pnl + +## CDS ## + +yct = c(1,2,3,4,5,7) #테너 (테너 : 채무발생일 ~ 만기일간 기한, 결제기간이라고함) +ycr = c(0.0050, 0.0070, 0.0080, 0.0100, 0.0120, 0.0150) #수익률곡선할인률 +cct = c(1,3,5,7) #테너 +ccsp = c(0.99, 0.98, 0.95, 0.92) #생존확률 +tenors = c(1,3,5,7) #만기 +r = 0.4 #회수율 +priceCDS(yct, ycr, cct, ccsp, tenors, r) +# +cdsSpreads = c(0.0050, 0.0070, 0.0090, 0.0110) +bootstrapCDS(yct,ycr,cct,ccsp,r) + +## 금리파생상품 ## +irswapvalue() + + +### 9-3 Exotic Option ### + +## 9-3-1 Asian Option - 평균가 ## +#1번 방법 +price <- GeometricAverageRateOption("c", 110, 120, 0.5, 0.03, 0.05 ,0.1) +price +#2번 방법 +TurnbullWakemanAsianApproxOption(TypeFlag = "p", S = 100, SA = 102, X = 120, Time = 0.5, + time = 0.25, tau = 0, r = 0.03, b = 0.05, sigma = 0.1)@price +#3번 방법 +LevyAsianApproxOption(TypeFlag = "p", S = 100, SA = 102, X = 120, Time = 0.5, + time = 0.25, r = 0.03, b = 0.05, sigma = 0.1)@price + +## 9-3-2 Barrier Option ## + +## 1번 Down and Out +StandardBarrierOption(TypeFlag = "cdo", S =100, X=90, H = 95, K = 3, Time=0.5, r = 0.08, b = 0.04, sigma = 0.25)@price +#H는 경계값, K는 리베이트값 + +##2번 Up and out down and out call (Double Barrier) +DoubleBarrierOption(TypeFlag = "co", S = 100,X=100, L = 50, U = 150, Time =0.25, + r = 0.1 ,b=0.1, sigma = 0.15, delta1 = -0.1, delta2 = 0.1)@price +#L하한, U상한, 그에 맞는 Delta1, Delta2 + +##3번 룩백 Barrier Up and out +LookBarrierOption(TypeFlag = "cuo", S = 100, X = 100, H = 130, time1 = 0.25, Time2 = 1, r=0.1,b=0.1, sigma=0.15)@price + +##4번 Gap Digital +GapOption(TypeFlag = "c", S = 50, X1= 50, X2= 57, Time = 0.5, r = 0.09, b = 0.09, sigma = 0.20) +#두개의 행사가격 + +## 5번 Cash or Nothing : 만기시점에 기초자산가격이 행사가격에 도달할 경우 미리 정한가 지불 ## +CashOrNothingOption(TypeFlag = "p", S = 100, X = 80, K = 10, Time = 9/12, r = 0.06, b = 0, sigma = 0.35) + +## 6번 Two ASSet cash or nothing down-up 옵션 ## +# 1번 자산 가격 > 행사가격 & 2번 자산가격 > 행사가격 일 경우 고정된 현금 지불 + +TwoAssetCashOrNothingOption(TypeFlag = "c", S1 = 100, S2 = 100, X1 = 110, X2 = 90, + K = 10, Time = 0.5, r=0.1, b1 = 0.05, b2= 0.06, sigma1 = 0.2, sigma2=0.25, + rho = 0.5)@price +# K는 만기시 지불하는 그 고정현금액 +# rho는 상관관계 \ No newline at end of file diff --git a/Learning Quantitative Finance with R.Rproj b/Learning Quantitative Finance with R.Rproj new file mode 100644 index 0000000..3af27f6 --- /dev/null +++ b/Learning Quantitative Finance with R.Rproj @@ -0,0 +1,13 @@ +Version: 1.0 + +RestoreWorkspace: Default +SaveWorkspace: Default +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX