-
h2o 패키지를 이용한 로지스틱 회귀분석R/DeepLearning 2018. 7. 3. 21:41
install.packages("h2o")
install.packages("pROC")
library(h2o)
library(pROC)localH2O <- h2o.init(ip = "localhost",
port = 54321,
startH2O = TRUE,
min_mem_size = "20G",
nthreads = 8)occupancy_train <- read.csv(file = "e:/R/Study/occupancy_data/datatraining.txt",
stringsAsFactors = TRUE)occupancy_test <- read.csv(file = "e:/R/Study/occupancy_data/datatest.txt",
stringsAsFactors = TRUE)x <- colnames(occupancy_train)[2:6]
y <- colnames(occupancy_train)[7]occupancy_train$Occupancy <- as.factor(occupancy_train$Occupancy)
occupancy_test$Occupancy <- as.factor(occupancy_test$Occupancy)occupancy_train.hex <- as.h2o(x = occupancy_train, destination_frame = "occupancy_train.hex")
occupancy_test.hex <- as.h2o(x = occupancy_test, destination_frame = "occupancy_test.hex")occupancy_train.glm <- h2o.glm(x = x,
y = y,
training_frame = "occupancy_train.hex",
seed = 1234567,
family = "binomial",
lambda_search = TRUE,
alpha = 0.5,
nfolds = 5)# 훈련 정확도
occupancy_train.glm@model$training_metrics@metrics$AUC# 교차검증 정확도
occupancy_train.glm@model$cross_validation_metrics@metrics$AUC# 예측
yhat <- h2o.predict(occupancy_train.glm, occupancy_test.hex)
yhat# 검증 정확도
yhat$pmax <- pmax(yhat$p0, yhat$p1, na.rm = TRUE)
roc_obj <- pROC::roc(c(as.matrix(occupancy_test.hex$Occupancy)), c(as.matrix(yhat$pmax)))
auc(roc_obj)# 변수의 중요도와 성능계산
h2o.varimp_plot(occupancy_train.glm, num_of_features = 5)[출처] R 딥러닝 쿡북, PKS프라카시, 아슈투니 스리, 크리슈나 라오 지음, 정지완 옮김, p62~66
'R > DeepLearning' 카테고리의 다른 글
신경망(ANN : Artificial Neural Network)의 특징 (0) 2018.08.08