## ----echo = FALSE, results = "hide", message = FALSE-------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") library(bit) library(microbenchmark) # rmarkdown::render("vignettes/bit-performance.Rmd") # these are the real settings for the performance vignette times <- 5 Domain <- c(small=1e3, big=1e6) Sample <- c(small=1e3, big=1e6) # these are the settings to keep the cost of CRAN low #times <- 5 #Domain <- c(small=1e1, big=1e3) #Sample <- c(small=1e1, big=1e3) pagebreak <- function() { if (knitr::is_latex_output()) "\\newpage" else '
' } ## ----echo=TRUE, results='asis'------------------------------------------------ a <- 1L b <- 1e7L i <- sample(a:b, 1e3) x <- c( R = median(microbenchmark((a:b)[-i], times=times)$time), bit = median(microbenchmark(bit_rangediff(c(a, b), i), times=times)$time), merge = median(microbenchmark(merge_rangediff(c(a, b), bit_sort(i)), times=times)$time) ) knitr::kable(as.data.frame(as.list(x / x["R"] * 100)), caption="% of time relative to R", digits=1) ## ----echo=FALSE, results='asis', echo=3:5------------------------------------- # TODO(r-lib/lintr#773): nolint as a chunk option. # nolint start: strings_as_factors_linter. knitr::kable( data.frame( coin="random 50%", often="random 99%", rare="random 1%", chunk="contiguous chunk of 5%" ), caption="selection characteristic" ) # nolint end: strings_as_factors_linter. ## ----echo=FALSE, results='asis'----------------------------------------------- B <- booltypes[c("logical", "bit", "bitwhich", "which", "ri")] M <- c("size", "[]", "[which]", "[which]<-TRUE", "[]<-logical", "!", "&", "|", "==", "!=", "summary") G <- list( coin = function(n) sample(c(FALSE, TRUE), n, replace=TRUE, prob=c(0.5, 0.5)), often = function(n) sample(c(FALSE, TRUE), n, replace=TRUE, prob=c(0.01, 0.99)), rare = function(n) sample(c(FALSE, TRUE), n, replace=TRUE, prob=c(0.99, 0.01)), chunk = function(n) ri(n %/% 20, 2L * n %/% 20, n) ) X <- vector("list", length(B) * length(G)) dim(X) <- c(booltype=length(B), data=length(G)) dimnames(X) <- list(booltype=names(B), data=names(G)) tim <- array(NA, dim=c(booltype=length(B), metric=length(M), data=length(G)), dimnames=list(booltype=names(B), metric=M, data=names(G)) ) for (g in names(G)) { x <- G[[g]](Sample[["big"]]) if (g %in% c("coin", "often", "rare")) w <- as.which(as.logical(x)) for (b in B) { if (booltypes[[b]] < 'ri' || (b == 'ri' && g == 'chunk')) { X[[b, g]] <- as.booltype(x, b) if (g %in% c("coin", "often", "rare") && b %in% c("logical", "bit", "bitwhich")) { l <- as.booltype(logical(Sample[["big"]]), b) tim[b, "[which]", g] <- median(microbenchmark(l[w], times=times)$time) tim[b, "[which]<-TRUE", g] <- median(microbenchmark(l[w] <- TRUE, times=times)$time) tim[b, "[]", g] <- median(microbenchmark(l[], times=times)$time) tim[b, "[]<-logical", g] <- median(microbenchmark(l[] <- x, times=times)$time) } tim[b, "size", g] <- object.size(X[[b, g]]) } } } for (g in names(G)) { for (b in c("logical", "bit", "bitwhich")) { x <- X[[b, g]] if (!is.null(x)) { tim[b, "!", g] <- median(microbenchmark(!x, times=times)$time) tim[b, "&", g] <- median(microbenchmark(x & x, times=times)$time) tim[b, "|", g] <- median(microbenchmark(x | x, times=times)$time) tim[b, "==", g] <- median(microbenchmark(x == x, times=times)$time) tim[b, "!=", g] <- median(microbenchmark(x != x, times=times)$time) tim[b, "summary", g] <- median(microbenchmark(summary.booltype(x), times=times)$time) } } } i <- match("size", M) for (b in rev(names(B))) { # logical was in first position, so we do this last! tim[b, i, ] <- 100 * tim[b, i, ] / tim["logical", i, ] tim[b, -i, ] <- 100 * tim[b, -i, ] / max(tim["logical", -i, ], na.rm=TRUE) } #rm(X) ## ----echo=FALSE, fig.cap = "% size and execution time for bit (b) and bitwhich (w) relative to logical (R) in the 'rare' scenario"---- x <- tim[1:3, , "rare"] m <- rep("", ncol(x)) m <- as.vector(rbind(m, colnames(x), m)) dotchart(x, xlim=c(0, max(100, max(x))), labels=m, pch=c("R", "b", "w"), col=c("black", "blue", "red"), main="% size and timings in 'rare' scenario", sub="l='logical' b='bit' w='bitwhich' % of max(R) in all scenarios" ) ## ----echo=FALSE, fig.cap = "% size and execution time for bit (b) and bitwhich (w) relative to logical (R) in the 'often' scenario"---- x <- tim[1:3, , "often"] dotchart(x, xlim=c(0, max(100, max(x))), labels=m, pch=c("R", "b", "w"), col=c("black", "blue", "red"), main="% size and timings in 'often' scenario", sub="l='logical' b='bit' w='bitwhich' % of max(R) in all scenarios" ) ## ----echo=FALSE, fig.cap = "% size and execution time for bit (b) and bitwhich (w) relative to logical (R) in the 'coin' scenario"---- x <- tim[1:3, , "coin"] dotchart(x, xlim=c(0, max(100, max(x))), labels=m, pch=c("R", "b", "w"), col=c("black", "blue", "red"), main="% size and timings in 'coin' scenario", sub="l='logical' b='bit' w='bitwhich' % of max(R) in all scenarios" ) ## ----echo=FALSE, results='asis'----------------------------------------------- knitr::kable(round(tim[, "size", ], 1), caption="% bytes of logical") ## ----echo=FALSE, results='asis'----------------------------------------------- knitr::kable(round(tim[, "[]", ], 1), caption="% time of logical") ## ----echo=FALSE, results='asis'----------------------------------------------- knitr::kable(round(tim[, "[]<-logical", ], 1), caption="% time of logical") ## ----echo=FALSE, results='asis'----------------------------------------------- knitr::kable(round(tim[, "[which]", ], 1), caption="% time of logical") ## ----echo=FALSE, results='asis'----------------------------------------------- knitr::kable(round(tim[, "[which]<-TRUE", ], 1), caption="% time of logical") ## ----echo=FALSE, results='asis'----------------------------------------------- knitr::kable(round(tim[, "!", ], 1), caption="% time for Boolean NOT") ## ----echo=FALSE, results='asis'----------------------------------------------- knitr::kable(round(tim[, "&", ], 1), caption="% time for Boolean &") ## ----echo=FALSE, results='asis'----------------------------------------------- knitr::kable(round(tim[, "|", ], 1), caption="% time for Boolean |") ## ----echo=FALSE, results='asis'----------------------------------------------- knitr::kable(round(tim[, "==", ], 1), caption="% time for Boolean ==") ## ----echo=FALSE, results='asis'----------------------------------------------- knitr::kable(round(tim[, "!=", ], 1), caption="% time for Boolean !=") ## ----echo=FALSE, results='asis'----------------------------------------------- knitr::kable(round(tim[, "summary", ][1:2, 1:2], 1), caption="% time for Boolean summary") ## ----echo=FALSE, results='asis'----------------------------------------------- binaryDomain <- list( smallsmall = rep(Domain["small"], 2L), smallbig=Domain, bigsmall=rev(Domain), bigbig=rep(Domain["big"], 2L) ) binarySample <- list( smallsmall = rep(Sample["small"], 2L), smallbig=Sample, bigsmall=rev(Sample), bigbig=rep(Sample["big"], 2L) ) M <- c("R", "bit", "merge") G <- c("sort", "sortunique") D <- c("unsorted", "sorted") sortM <- vector("list", length(M) * length(G)) dim(sortM) <- c(method=length(M), goal=length(G)) dimnames(sortM) <- list(method=M, goal=G) sortM[["R", "sort"]] <- sort sortM[["R", "sortunique"]] <- function(x) sort(unique(x)) sortM[["bit", "sort"]] <- bit_sort sortM[["bit", "sortunique"]] <- bit_sort_unique timsort <- array(NA_integer_, dim=c(M=2, G=length(G), D=length(D), N=length(Domain)), dimnames=list(M=M[1:2], G=G, D=D, N=names(Domain)) ) for (n in names(Domain)) { x <- sample(Domain[[n]], Sample[[n]], replace = TRUE) d <- "unsorted" for (m in c("R", "bit")) { for (g in G) { timsort[m, g, d, n] <- median(microbenchmark(sortM[[m, g]](x), times=times)$time) } } x <- bit_sort(x) d <- "sorted" for (m in 1:2) { for (g in G) { timsort[m, g, d, n] <- median(microbenchmark(sortM[[m, g]](x), times=times)$time) } } } binaryU <- c("match", "in", "notin", "union", "intersect", "setdiff", "symdiff", "setequal", "setearly") binaryM <- vector("list", length(M) * length(binaryU)) dim(binaryM) <- c(method=length(M), task=length(binaryU)) dimnames(binaryM) <- list(method=M, task=binaryU) binaryM[["R", "match"]] <- match binaryM[["merge", "match"]] <- merge_match binaryM[["R", "in"]] <- get("%in%") binaryM[["bit", "in"]] <- bit_in binaryM[["merge", "in"]] <- merge_in binaryM[["R", "notin"]] <- function(x, y) !(x %in% y) binaryM[["bit", "notin"]] <- function(x, y) !bit_in(x, y) binaryM[["merge", "notin"]] <- merge_notin binaryM[["R", "union"]] <- union binaryM[["bit", "union"]] <- bit_union binaryM[["merge", "union"]] <- merge_union binaryM[["R", "intersect"]] <- intersect binaryM[["bit", "intersect"]] <- bit_intersect binaryM[["merge", "intersect"]] <- merge_intersect binaryM[["R", "setdiff"]] <- setdiff binaryM[["bit", "setdiff"]] <- bit_setdiff binaryM[["merge", "setdiff"]] <- merge_setdiff binaryM[["R", "symdiff"]] <- function(x, y) union(setdiff(x, y), setdiff(y, x)) binaryM[["bit", "symdiff"]] <- bit_symdiff binaryM[["merge", "symdiff"]] <- merge_symdiff binaryM[["R", "setequal"]] <- function(x, y) setequal(x, x) # we compare x to x which avoids early termination binaryM[["bit", "setequal"]] <- function(x, y) bit_setequal(x, x) binaryM[["merge", "setequal"]] <- function(x, y) merge_setequal(x, x) binaryM[["R", "setearly"]] <- function(x, y) setequal(x, y) # we compare x to x which avoids early termination binaryM[["bit", "setearly"]] <- function(x, y) bit_setequal(x, y) binaryM[["merge", "setearly"]] <- function(x, y) merge_setequal(x, y) unaryU <- c("unique", "duplicated", "anyDuplicated", "sumDuplicated") unaryM <- vector("list", length(M) * length(unaryU)) dim(unaryM) <- c(method=length(M), task=length(unaryU)) dimnames(unaryM) <- list(method=M, task=unaryU) unaryM[["R", "unique"]] <- unique unaryM[["bit", "unique"]] <- bit_unique unaryM[["merge", "unique"]] <- merge_unique unaryM[["R", "duplicated"]] <- duplicated unaryM[["bit", "duplicated"]] <- bit_duplicated unaryM[["merge", "duplicated"]] <- merge_duplicated unaryM[["R", "anyDuplicated"]] <- anyDuplicated unaryM[["bit", "anyDuplicated"]] <- bit_anyDuplicated unaryM[["merge", "anyDuplicated"]] <- merge_anyDuplicated unaryM[["R", "sumDuplicated"]] <- function(x) sum(duplicated(x)) unaryM[["bit", "sumDuplicated"]] <- bit_sumDuplicated unaryM[["merge", "sumDuplicated"]] <- merge_sumDuplicated tim <- array(NA_integer_, dim=c(M=length(M), U=length(unaryU) + length(binaryU), N=length(binaryDomain), D=length(D)), dimnames=list(M=M, U=c(unaryU, binaryU), N=names(binaryDomain), D=D) ) for (n in names(binaryDomain)) { xnam <- names(binaryDomain[[n]])[1] ynam <- names(binaryDomain[[n]])[2] x <- sample(binaryDomain[[n]][1], binarySample[[n]][1], replace = FALSE) y <- sample(binaryDomain[[n]][2], binarySample[[n]][2], replace = FALSE) d <- "unsorted" if (length(x) == length(y)) { for (u in unaryU) { for (m in setdiff(M, "merge")) { f <- unaryM[[m, u]] if (!is.null(f)) tim[m, u, n, d] <- median(microbenchmark(f(x), times=times)$time) } } } for (u in binaryU) { for (m in setdiff(M, "merge")) { f <- binaryM[[m, u]] if (!is.null(f)) tim[m, u, n, d] <- median(microbenchmark(f(x, y), times=times)$time) } } x <- bit_sort(x) y <- bit_sort(y) d <- "sorted" if (length(x) == length(y)) { for (u in unaryU) { for (m in M) { f <- unaryM[[m, u]] if (!is.null(f)) { tim[m, u, n, d] <- median(microbenchmark(f(x), times=times)$time) # now plug-in measures for unsorted merge if (m == "merge") { tim["merge", u, n, "unsorted"] <- timsort["bit", "sort", "unsorted", xnam] + tim["merge", u, n, "sorted"] } } } } } for (u in binaryU) { for (m in M) { f <- binaryM[[m, u]] if (!is.null(f)) { tim[m, u, n, d] <- median(microbenchmark(f(x, y), times=times)$time) # now plug-in measures for unsorted merge if (m == "merge") { tim["merge", u, n, "unsorted"] = timsort["bit", "sort", "unsorted", xnam] + timsort["bit", "sort", "unsorted", ynam] + tim["merge", u, n, "sorted"] } } } } } ## ----echo=FALSE, fig.cap = "Execution time for R (R) and bit (b)"------------- y <- timsort[, , , "big"] y <- 100 * y / max(y["R", , ], na.rm=TRUE) oldpar <- par(mfrow=c(2, 1), mar=c(5, 8, 2, 1)) x <- y[, , "unsorted"] dotchart(x, xlim=c(0, max(100, max(y))), labels="", pch=c("R", "b"), xlab="execution time", main="unsorted", col=c("red", "blue") ) x <- y[, , "sorted"] dotchart(x, xlim=c(0, max(100, max(y))), labels="", pch=c("R", "b"), xlab="execution time", main="sorted", col=c("red", "blue") ) par(oldpar) ## ----echo=FALSE, results='hide'----------------------------------------------- tim2 <- tim for (n in names(binaryDomain)) { for (d in D) { tim2[, , n, d] <- 100 * tim[, , n, d] / max(tim["R", , n, d], na.rm=TRUE) } } ## ----echo=FALSE, fig.cap = "Execution time for R, bit and merge relative to most expensive R in 'unsorted bigbig' scenario"---- y <- tim2[, , "bigbig", ] y <- 100 * y / max(y["R", , ], na.rm=TRUE) x <- y[, , "unsorted"] m <- rep("", ncol(x)) m <- as.vector(rbind(m, colnames(x), m)) dotchart(x, xlim=c(0, max(100, max(y, na.rm=TRUE))), labels=m, pch=c("R", "b", "m"), col=c("red", "blue", "black"), main="Timings in 'unsorted bigbig' scenario", sub="R='hash' b='bit' m='merge'" ) ## ----echo=FALSE, fig.cap = "Execution time for R, bit and merge in 'sorted bigbig' scenario"---- x <- y[, , "sorted"] dotchart(x, xlim=c(0, max(y, na.rm=TRUE)), labels=m, pch=c("R", "b", "m"), col=c("red", "blue", "black"), main="Timings in 'sorted bigbig' scenario", sub="R='hash' b='bit' m='merge'" ) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- 100 * timsort["bit", , , ] / timsort["R", , , ] s <- "sorted" knitr::kable(x[, s, ], caption=paste(s, "data relative to R's sort"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, s, ], caption=paste(s, "data relative to R's sort"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- f <- function(u) { n <- c("smallsmall", "bigbig") x <- tim[c("bit", "merge", "merge"), u, n, ] dimnames(x)$M[3] <- "sort" dimnames(x)$N <- c("small", "big") x["sort", , "unsorted"] <- timsort["bit", "sort", "unsorted", ] x["sort", , "sorted"] <- 0 for (m in dimnames(x)$M) { x[m, , ] <- x[m, , ] / tim["R", u, n, ] * 100 } x } x <- f("unique") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- f("duplicated") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- f("anyDuplicated") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- f("sumDuplicated") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- f <- function(u) { x <- tim[c("bit", "merge", "merge"), u, , ] dimnames(x)$M[3] <- "sort" s <- timsort["bit", "sort", "unsorted", ] x["sort", , "unsorted"] <- rep(s, c(2, 2)) + c(s, s) x["sort", , "sorted"] <- 0 for (m in dimnames(x)$M) { x[m, , ] <- x[m, , ] / tim["R", u, , ] * 100 } x } x <- f("match") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- f("in") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- f("notin") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- f("union") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- f("intersect") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- f("setdiff") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- f("symdiff") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- f("setequal") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- x <- f("setearly") s <- "sorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1) ## ----echo=FALSE, results='asis'----------------------------------------------- s <- "unsorted" knitr::kable(x[, , s], caption=paste(s, "data relative to R"), digits=1)