ABOUT ME

-

Today
-
Yesterday
-
Total
-
  • h2o 패키지를 이용한 로지스틱 회귀분석
    R/DeepLearning 2018. 7. 3. 21:41

    install.packages("h2o")

    install.packages("pROC")
    library(h2o)
    library(pROC)



    localH2O <- h2o.init(ip = "localhost", 
    port = 54321, 
    startH2O = TRUE,
    min_mem_size = "20G",
    nthreads = 8)



    occupancy_train <- read.csv(file = "e:/R/Study/occupancy_data/datatraining.txt",
    stringsAsFactors = TRUE)

    occupancy_test <- read.csv(file = "e:/R/Study/occupancy_data/datatest.txt",
    stringsAsFactors = TRUE)



    x <- colnames(occupancy_train)[2:6]
    y <- colnames(occupancy_train)[7]



    occupancy_train$Occupancy <- as.factor(occupancy_train$Occupancy)
    occupancy_test$Occupancy <- as.factor(occupancy_test$Occupancy)



    occupancy_train.hex <- as.h2o(x = occupancy_train, destination_frame = "occupancy_train.hex")
    occupancy_test.hex <- as.h2o(x = occupancy_test, destination_frame = "occupancy_test.hex")



    occupancy_train.glm <- h2o.glm(x = x,
    y = y,
    training_frame = "occupancy_train.hex",
    seed = 1234567,
    family = "binomial",
    lambda_search = TRUE,
    alpha = 0.5,
    nfolds = 5)



    # 훈련 정확도
    occupancy_train.glm@model$training_metrics@metrics$AUC



    # 교차검증 정확도
    occupancy_train.glm@model$cross_validation_metrics@metrics$AUC



    # 예측
    yhat <- h2o.predict(occupancy_train.glm, occupancy_test.hex)
    yhat



    # 검증 정확도
    yhat$pmax <- pmax(yhat$p0, yhat$p1, na.rm = TRUE)
    roc_obj <- pROC::roc(c(as.matrix(occupancy_test.hex$Occupancy)), c(as.matrix(yhat$pmax)))
    auc(roc_obj)



    # 변수의 중요도와 성능계산
    h2o.varimp_plot(occupancy_train.glm, num_of_features = 5)


    자동 대체 텍스트를 사용할 수 없습니다.


    [출처] R 딥러닝 쿡북, PKS프라카시, 아슈투니 스리, 크리슈나 라오 지음, 정지완 옮김, p62~66


    'R > DeepLearning' 카테고리의 다른 글

    신경망(ANN : Artificial Neural Network)의 특징  (0) 2018.08.08
Designed by Tistory.