## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE, fig.height = 5, fig.width = 7, collapse = TRUE) library(recipes) library(dplyr) library(knitr) library(kableExtra) library(sparseR) ## ----------------------------------------------------------------------------- data(iris) summary(iris) ## ----------------------------------------------------------------------------- srl <- sparseR(Sepal.Width ~ ., data = iris, k = 1, seed = 1) ## ----------------------------------------------------------------------------- srl ## ----------------------------------------------------------------------------- # At the lambda which minimizes CVE summary(srl, at = "cvmin") # At the lambda which is within 1 SE of the minimum CVE summary(srl, at = "cv1se") ## ----------------------------------------------------------------------------- plot(srl, plot_type = "cv") ## ----------------------------------------------------------------------------- plot(srl, plot_type = "path") ## ----------------------------------------------------------------------------- X <- model.matrix(Sepal.Width ~ .*., data = iris)[,-1] ## ----------------------------------------------------------------------------- set.seed(1) srl2 <- sparseR(pre_process = FALSE, model_matrix = X, y = iris$Sepal.Width) ## ----------------------------------------------------------------------------- set.seed(1) srl2 <- sparseR(pre_process = FALSE, model_matrix = X, y = iris$Sepal.Width, max.iter = 1e6) ## ----------------------------------------------------------------------------- srl2 summary(srl2, at = "cv1se") ## ----echo = TRUE, eval = FALSE------------------------------------------------ # plot(srl2) ## ----echo = FALSE, eval = TRUE, fig.height=8---------------------------------- old_par <- par(mfrow = c(2,1)) plot(srl2) par(old_par) ## ----------------------------------------------------------------------------- set.seed(1) srl3 <- sparseR(Sepal.Width ~ ., data = iris, k = 1, pre_proc_opts = c("none"), max.iter = 1e6) srl3 summary(srl3, at = "cv1se") ## ----------------------------------------------------------------------------- cc <- iris %>% select(Sepal.Length, Petal.Length, Petal.Width) %>% apply(2, min, na.rm = TRUE) p1 <- sparseR_prep(Sepal.Width ~ ., iris, k = 0, extra_opts = list(centers = cc)) (c2min <- bake(p1, iris)) summary(c2min) ## ----------------------------------------------------------------------------- srl_centered2min <- sparseR(Sepal.Width ~ ., iris, extra_opts = list(centers = cc), seed = 1) ## ----------------------------------------------------------------------------- p2 <- sparseR_prep(Sepal.Width ~ ., iris, k = 0, extra_opts = list(center_fn = function(x, ...) min(x, ...))) (c2min2 <- bake(p2, iris)) identical(c2min2, c2min) ## ----warning=FALSE------------------------------------------------------------ effect_plot(srl, "Petal.Width", by = "Species") ## ----warning=FALSE------------------------------------------------------------ effect_plot(srl_centered2min, "Petal.Width", by = "Species") ## ----echo = TRUE, eval = FALSE------------------------------------------------ # plot(srl3, plot_type = "cv", ylim = c(0,.2)) # abline(h = min(srl3$fit$cve), col = "red") # plot(srl_centered2min, plot_type = "cv", ylim = c(0,.2)) # abline(h = min(srl3$fit$cve), col = "red") # plot(srl, plot_type = "cv", ylim = c(0,.2)) # abline(h = min(srl3$fit$cve), col = "red") # # effect_plot(srl3, "Petal.Width", by = "Species", # plot.args = list(ylim = c(1.5, 4.8))) # effect_plot(srl_centered2min, "Petal.Width", by = "Species", # plot.args = list(ylim = c(1.5, 4.8))) # effect_plot(srl, "Petal.Width", by = "Species", # plot.args = list(ylim = c(1.5, 4.8))) ## ----echo = FALSE, warning=FALSE, fig.height=6, fig.width=8, out.width="100%"---- old_par <- par(mfrow = c(2,3), mar = c(4,4,5,2) + .1) plot(srl3, plot_type = "cv", ylim = c(0,.2)) title("Centered to zero", line = 3) abline(h = min(srl3$fit$cve), col = "red") plot(srl_centered2min, plot_type = "cv", ylim = c(0,.2)) title("Centered to minimum values", line = 3) abline(h = min(srl3$fit$cve), col = "red") plot(srl, plot_type = "cv", ylim = c(0,.2)) title("Centered to mean", line = 3) abline(h = min(srl3$fit$cve), col = "red") par(mar = c(4,4,2,2) +.1) effect_plot(srl3, "Petal.Width", by = "Species", plot.args = list(ylim = c(1.5, 4.8))) effect_plot(srl_centered2min, "Petal.Width", by = "Species", plot.args = list(ylim = c(1.5, 4.8))) effect_plot(srl, "Petal.Width", by = "Species", plot.args = list(ylim = c(1.5, 4.8))) par(old_par) ## ----------------------------------------------------------------------------- p1 <- predict(srl3, at = "cvmin") p2 <- predict(srl_centered2min, at = "cvmin") p3 <- predict(srl, at = "cvmin") cor(cbind(p1, p2 ,p3)) pairs(cbind(p1, p2 ,p3)) ## ----------------------------------------------------------------------------- # At CV1se p4 <- predict(srl3, at = "cv1se") p5 <- predict(srl_centered2min, at = "cv1se") p6 <- predict(srl, at = "cv1se") cor(cbind(p1, p2 ,p3, p4,p5,p6)) pairs(cbind(p1, p2 ,p3, p4,p5,p6)) ## ----echo = FALSE, eval = TRUE, warning=FALSE--------------------------------- old_par <- par(mfrow = c(1,3)) effect_plot(srl3, "Petal.Width", by = "Species", at = "cv1se", plot.args = list(ylim = c(1.5, 5))) effect_plot(srl_centered2min, "Petal.Width", by = "Species", at = "cv1se", plot.args = list(ylim = c(1.5, 5))) effect_plot(srl, "Petal.Width", by = "Species", at = "cv1se", plot.args = list(ylim = c(1.5, 5))) par(old_par) ## ----echo = TRUE, eval = FALSE, warning = FALSE------------------------------- # effect_plot(srl3, "Petal.Width", by = "Species", at = "cv1se") # effect_plot(srl_centered2min, "Petal.Width", by = "Species", at = "cv1se") # effect_plot(srl, "Petal.Width", by = "Species", at = "cv1se") ## ----warning = FALSE---------------------------------------------------------- ## Centered model (rbic1 <- sparseRBIC_step(Sepal.Width ~ ., iris, pre_proc_opts = c("center", "scale"))) # Non-centered model (rbic2 <- sparseRBIC_step(Sepal.Width ~ ., iris, pre_proc_opts = c("scale"))) ## ----echo = TRUE, eval=FALSE, warning = FALSE--------------------------------- # effect_plot(rbic1, "Petal.Width", by = "Species", plot.args = list(ylim = c(1.5, 5))) # effect_plot(rbic2, "Petal.Width", by = "Species", plot.args = list(ylim = c(1.5, 5))) # effect_plot(rbic1, "Sepal.Length", by = "Species") # effect_plot(rbic2, "Sepal.Length", by = "Species") ## ----echo = FALSE, eval=TRUE, warning = FALSE, fig.height=8, fig.width = 6---- old_par <- par(mfrow = c(2,2)) effect_plot(rbic1, "Petal.Width", by = "Species", plot.args = list(ylim = c(1.5, 5))) effect_plot(rbic2, "Petal.Width", by = "Species", plot.args = list(ylim = c(1.5, 5))) effect_plot(rbic1, "Sepal.Length", by = "Species") effect_plot(rbic2, "Sepal.Length", by = "Species") par(old_par) ## ----------------------------------------------------------------------------- summary(rbic1) summary(rbic2) ## ----eval = FALSE------------------------------------------------------------- # s1 <- sparseRBIC_sampsplit(rbic1) ## ----echo = FALSE------------------------------------------------------------- s1 <- sparseRBIC_sampsplit(rbic1, S = 10) s1$results %>% kable(digits = 5) %>% kable_styling(full_width = FALSE) ## ----eval = FALSE------------------------------------------------------------- # s2 <- sparseRBIC_sampsplit(rbic2) ## ----echo = FALSE------------------------------------------------------------- s2 <- sparseRBIC_sampsplit(rbic2, S = 10) s2$results %>% kable(digits = 5) %>% kable_styling(full_width = FALSE) ## ----eval = FALSE, message=FALSE---------------------------------------------- # set.seed(1) # ## Centered model # b1 <- sparseRBIC_bootstrap(rbic1) ## ----echo = FALSE, message= FALSE--------------------------------------------- set.seed(1) b1 <- sparseRBIC_bootstrap(rbic1, B = 10) b1$results %>% kable(digits = 5) %>% kable_styling(full_width = FALSE) ## ----eval = FALSE, message = FALSE-------------------------------------------- # set.seed(1) # ## Uncentered model # b2 <- sparseRBIC_bootstrap(rbic2) ## ----echo = FALSE------------------------------------------------------------- set.seed(1) b2 <- sparseRBIC_bootstrap(rbic2, B = 10) b2$results %>% kable(digits = 5) %>% kable_styling(full_width = FALSE) ## ----------------------------------------------------------------------------- data("irlcs_radon_syn") summary(irlcs_radon_syn) ## ----------------------------------------------------------------------------- irlcs_radon_syn <- select(irlcs_radon_syn, -ID) ## ----------------------------------------------------------------------------- set.seed(13) N <- nrow(irlcs_radon_syn) trainIDX <- sample(1:N, N * .75) train <- irlcs_radon_syn[trainIDX,] test <- irlcs_radon_syn[-trainIDX,] ## ----results = "hold"--------------------------------------------------------- prep_obj <- sparseR_prep(CASE ~ ., data = train, k = 0, poly = 1) prep_obj ## ----------------------------------------------------------------------------- MASS::truehist(train$SMKYRS) ## ----------------------------------------------------------------------------- sparseR_prep(CASE ~ ., data = train, k = 0, poly = 1, filter = "zv") ## ----------------------------------------------------------------------------- sparseR_prep(CASE ~ ., data = train, k = 0, poly = 1, extra_opts = list(unique_cut = 5)) ## ----------------------------------------------------------------------------- sparseR_prep(CASE ~ ., data = train, k = 1, poly = 1, extra_opts = list(unique_cut = 5)) sparseR_prep(CASE ~ ., data = train, k = 1, poly = 2, extra_opts = list(unique_cut = 5)) ## ----------------------------------------------------------------------------- lso <- list( SRL = sparseR(CASE ~ ., train, seed = 1), ## SRL model APL = sparseR(CASE ~ ., train, seed = 1, gamma = 0), ## APL model ME = sparseR(CASE ~ ., train, seed = 1, k = 0), ## Main effects model SRLp = sparseR(CASE ~ ., train, seed = 1, poly = 2) ## SRL + polynomials ) ## ----------------------------------------------------------------------------- lso ## ----echo = TRUE, eval = FALSE------------------------------------------------ # n <- lapply(lso, plot, log.l = TRUE) ## ----echo = FALSE, eval = TRUE, fig.height= 9, fig.width=7-------------------- old_par <- par(mfrow = c(4,2), mar = c(3, 4, 4, 2)) n <- lapply(lso, plot, log.l = TRUE) par(old_par) ## ----------------------------------------------------------------------------- mcp <- list( SRM = sparseR(CASE ~ ., train, seed = 1, penalty = "MCP"), ## SRM model APM = sparseR(CASE ~ ., train, seed = 1, gamma = 0, penalty = "MCP"), ## APM model MEM = sparseR(CASE ~ ., train, seed = 1, k = 0, penalty = "MCP"), ## Main effects MCP model SRMp = sparseR(CASE ~ ., train, seed = 1, poly = 2, penalty = "MCP") ## SRM + polynomials ) ## ----echo = TRUE, eval = FALSE------------------------------------------------ # n <- lapply(mcp, plot, log.l = TRUE) ## ----echo = FALSE, eval = TRUE, fig.height= 9, fig.width=7-------------------- old_par <- par(mfrow = c(4,2), mar = c(3, 4, 4, 2)) n <- lapply(mcp, plot, log.l = TRUE) par(old_par) ## ----echo = TRUE, eval = FALSE------------------------------------------------ # scad <- list( # SRS = sparseR(CASE ~ ., train, seed = 1, penalty = "SCAD"), ## SRS model # APS = sparseR(CASE ~ ., train, seed = 1, gamma = 0, penalty = "SCAD"), ## APS model # MES = sparseR(CASE ~ ., train, seed = 1, k = 0, penalty = "SCAD"), ## Main effects SCAD model # SRSp = sparseR(CASE ~ ., train, seed = 1, poly = 2, penalty = "SCAD") ## SRS + polynomials # ) # # n <- lapply(scad, plot, log.l = TRUE) ## ----echo = TRUE, eval = FALSE------------------------------------------------ # lapply(lso, function(x) bind_rows(x$results_summary, x$results1se_summary)) # lapply(mcp, function(x) bind_rows(x$results_summary, x$results1se_summary)) ## ----echo = FALSE, message = FALSE-------------------------------------------- lso_sum <- lapply(lso, function(x) bind_cols(x$results_summary[, c(1, 5, 2:4)], x$results1se_summary[, -c(1:2, 5)], .name_repair = "universal")) %>% bind_rows() mcp_sum <- lapply(mcp, function(x) bind_cols(x$results_summary[, c(1, 5, 2:4)], x$results1se_summary[, -c(1:2, 5)])) %>% bind_rows() names(mcp_sum)[6:7] <- names(lso_sum)[6:7] <- names(lso_sum)[4:5] <- names(mcp_sum)[4:5] <- c("Selected", "Saturation") lso_sum %>% kable(digits = 3) %>% kable_styling("striped", full_width = FALSE) %>% add_header_above(header = c(" " = 3, "Min CV" = 2, "CV1se" = 2)) %>% group_rows(index = c("SRL" = 2, "APL" = 2, "MEL" = 1, "SRLp" = 3)) mcp_sum %>% kable(digits = 3) %>% kable_styling("striped", full_width = FALSE) %>% add_header_above(header = c(" " = 3, "Min CV" = 2, "CV1se" = 2)) %>% group_rows(index = c("SRM" = 2, "APM" = 2, "MEM" = 1, "SRMp" = 3)) ## ----------------------------------------------------------------------------- ## Load Data set, correctly code factors + outcome data("Detrano") cleveland$thal <- factor(cleveland$thal) cleveland$case <- 1*(cleveland$num > 0) # Convert variables into factor variables if necessary! summary(cleveland) sapply(cleveland, function(x) length(unique(x))) cleveland$sex <- factor(cleveland$sex) cleveland$fbs <- factor(cleveland$fbs) cleveland$exang <- factor(cleveland$exang) # Set seed for reproducibility set.seed(167) # Split data into test and train N <- nrow(cleveland) trainIDX <- sample(1:N, N*.5) trainDF <- cleveland[trainIDX,] %>% select(-num) testDF <- cleveland[-trainIDX,] %>% select(-num) # Simulate missing data trainDF$thal[2] <- trainDF$thalach[1] <- NA lso <- list( SRL = sparseR(case ~ ., trainDF, seed = 1), ## SRL model APL = sparseR(case ~ ., trainDF, seed = 1, gamma = 0), ## APL model ME = sparseR(case ~ ., trainDF, seed = 1, k = 0), ## Main effects model SRLp = sparseR(case ~ ., trainDF, seed = 1, poly = 2) ## SRL + polynomials ) lso plot(lso$SRL) ## ----echo = TRUE, eval = FALSE------------------------------------------------ # lapply(lso, plot, log.l = TRUE) ## ----eval = TRUE, echo=FALSE, fig.height=8, fig.width=7----------------------- old_par <- par(mfrow = c(4,2)) res <- lapply(lso, plot, log.l = TRUE) par(old_par)