## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set( fig.width = 5 , fig.height = 3.5, fig.align = 'center' ) oldpar <- list(mar = par()$mar, mfrow = par()$mfrow) ## ----------------------------------------------------------------------------- library(nnet) library(classmap) ## ----------------------------------------------------------------------------- set.seed(123) nn.iris <- nnet(Species ~ ., data = iris, size = 3) names(nn.iris) head(nn.iris$fitted.values) # matrix of posterior probabilities. nn.iris$wts # the "weights" (coefficients, some are negative). summary(nn.iris) ## ----------------------------------------------------------------------------- train_x <- as.matrix(iris[, -5]) # original input: 4 variables. # extract "weights" (coefficients) from each layer: bias1 <- nn.iris$wts[c(1, 6, 11)] # intercepts to hidden layer betas1 <- cbind(nn.iris$wts[2:5], nn.iris$wts[7:10], nn.iris$wts[12:15]) # slopes to hidden layer bias2 <- nn.iris$wts[c(16, 20, 24)] # intercepts to output layer betas2 <- cbind(nn.iris$wts[17:19], nn.iris$wts[21:23], nn.iris$wts[25:27]) # slopes to output layer. ## ----------------------------------------------------------------------------- sigmoid <- function(x) 1 / (1 + exp(-x)) H <- t(t(train_x %*% betas1) + bias1) # = hidden layer X <- t(t(sigmoid(H) %*% betas2) + bias2) # = layer before softmax. pairs(X, col = unclass(iris$Species)) # G=3 classes ==> 3 dimensions. ## ----------------------------------------------------------------------------- softmax <- function(x) exp(x) / sum(exp(x)) outprobs <- t(apply(X, 1, softmax)) trainprobs <- nn.iris$fitted.values # this is after softmax range(trainprobs - outprobs) # near 0, so we have a match. ## ----------------------------------------------------------------------------- y <- iris[, 5] vcrtrain <- vcr.neural.train(X, y, trainprobs) names(vcrtrain) vcrtrain$predint[c(1:10, 51:60, 101:110)] # prediction as integer vcrtrain$pred[c(1:10, 51:60, 101:110)] # prediction as label vcrtrain$altint[c(1:10, 51:60, 101:110)] # alternative label as integer vcrtrain$altlab[c(1:10, 51:60, 101:110)] # alternative label # Probability of Alternative Class (PAC) of each object: vcrtrain$PAC[1:3] # summary(vcrtrain$PAC) # f(i, g) is the distance from case i to class g: vcrtrain$fig[1:5, ] # for the first 5 objects: # The farness of an object i is the f(i, g) to its own class: vcrtrain$farness[1:5] # summary(vcrtrain$farness) # The "overall farness" of an object is defined as the # lowest f(i, g) it has to any class g (including its own): summary(vcrtrain$ofarness) confmat.vcr(vcrtrain) ## ----------------------------------------------------------------------------- cols <- c("red", "darkgreen", "blue") # stacked mosaic plot: stplot <- stackedplot(vcrtrain, classCols = cols, main = "Stacked plot of nnet on iris data") stplot # silhouette plot: silplot(vcrtrain, classCols = cols, main = "Silhouette plot of nnet on iris data") # class maps: classmap(vcrtrain, "setosa", classCols = cols) # Very tight class (low PAC, no high farness). classmap(vcrtrain, "versicolor", classCols = cols) # Not so tight, one point is predicted as virginica. classmap(vcrtrain, "virginica", classCols = cols) # Also tight. ## ----------------------------------------------------------------------------- test_x <- train_x[1:100, ] ynew <- y[1:100] ynew[c(1:10, 51:60)] <- NA pairs(train_x, col = as.numeric(y) + 1, pch = 19) # 3 colors pairs(test_x, col = as.numeric(ynew) + 1, pch = 19) # only red and green ## ----------------------------------------------------------------------------- predprobs <- predict(nn.iris, test_x, type = "raw") range(predprobs - trainprobs[1:100, ]) # perfect match # Reconstruct this prediction: Hnew <- t(t(test_x %*% betas1) + bias1) # hidden layer Xnew <- t(t(sigmoid(Hnew) %*% betas2) + bias2) # layer before softmax probsnew <- t(apply(Xnew, 1, softmax)) range(probsnew - predprobs) # ~0, so we have a match ## ----------------------------------------------------------------------------- vcrtest <- vcr.neural.newdata(Xnew, ynew, probsnew, vcrtrain) plot(vcrtest$predint, vcrtrain$predint[1:100]); abline(0, 1) # identical, OK plot(vcrtest$altint, vcrtrain$altint[1:100]); abline(0, 1) # identical when not NA, OK plot(vcrtest$PAC, vcrtrain$PAC[1:100]); abline(0, 1) # OK vcrtest$farness # length 100, with NA's where ynew is NA plot(vcrtest$farness, vcrtrain$farness[1:100]); abline(0, 1) # identical where not NA plot(vcrtest$fig, vcrtrain$fig[1:100, ]); abline(0, 1) # same, OK vcrtest$ofarness # 100, withOUT NA's: ofarness even exists for cases with missing ynew, for which farness cannot exist. plot(vcrtest$ofarness, vcrtrain$ofarness[1:100]); abline(0, 1) # same, OK confmat.vcr(vcrtest) # as expected: ## ----------------------------------------------------------------------------- stplot # to compare with: stackedplot(vcrtest, classCols = cols, separSize = 1.5, minSize = 1, main = "Stacked plot of nnet on subset of iris data") silplot(vcrtest, classCols = cols, main = "Silhouette plot of nnet on subset of iris data") classmap(vcrtrain, 1, classCols = cols) classmap(vcrtest, 1, classCols = cols) # same, but fewer points # classmap(vcrtrain, 2, classCols = cols) classmap(vcrtest, 2, classCols = cols) # same, but fewer points # # classmap(vcrtrain, 3, classCols = cols) # classmap(vcrtest, 3, classCols = cols) # # Class number 3 with label virginica has no objects to visualize. ## ----------------------------------------------------------------------------- data("data_floralbuds") ## ----------------------------------------------------------------------------- set.seed(123) nn.buds <- nnet(y ~ ., data = data_floralbuds, size = 2) names(nn.buds) head(nn.buds$fitted.values) # matrix of posterior probabilities of each class nn.buds$wts # the "weights" (coefficients, some negative) summary(nn.buds) # A hidden layer with 2 neurons: ## ----------------------------------------------------------------------------- train_x <- as.matrix(data_floralbuds[, -7]) # extract weights for each layer: bias1 <- nn.buds$wts[c(1, 8)] betas1 <- cbind(nn.buds$wts[2:7], nn.buds$wts[9:14]) bias2 <- nn.buds$wts[c(15, 18, 21, 24)] betas2 <- cbind(nn.buds$wts[16:17], nn.buds$wts[19:20], nn.buds$wts[22:23], nn.buds$wts[25:26]) ## ----------------------------------------------------------------------------- H <- t(t(train_x %*% betas1) + bias1) # hidden layer X <- t(t(sigmoid(H) %*% betas2) + bias2) # layer before softmax outprobs <- t(apply(X, 1, softmax)) trainprobs <- nn.buds$fitted.values # posterior probabilities range(trainprobs - outprobs) # ~0 # OK, we have reconstructed the posterior probabilities pairs(X, col = data_floralbuds$y) ## ----------------------------------------------------------------------------- y <- data_floralbuds$y vcrtrain <- vcr.neural.train(X, y, trainprobs) cols <- c("saddlebrown", "orange", "olivedrab4", "royalblue3") ## ----------------------------------------------------------------------------- stackedplot(vcrtrain, classCols = cols, main = "Stacked plot of nnet on floral buds data") # Silhouette plot: silplot(vcrtrain, classCols = cols, main = "Silhouette plot of nnet on floral buds data") # Quasi residual plot: PAC <- vcrtrain$PAC feat <- rowSums(train_x); xlab <- "rowSums(X)" # pdf("Floralbuds_quasi_residual_plot.pdf", width = 5, height = 4.8) qresplot(PAC, feat, xlab = xlab, plotErrorBars = TRUE, fac = 2, main = "Floral buds: quasi residual plot") # images with higher sum are easier to classify # dev.off() cor.test(feat, PAC, method = "spearman") # rho = -0.238, p-value < 2e-8 # ==> decreasing trend is significant # Class maps: classmap(vcrtrain, "branch", classCols = cols) classmap(vcrtrain, "bud", classCols = cols) classmap(vcrtrain, "scales", classCols = cols) classmap(vcrtrain, "support", classCols = cols)