--- title: "Introduction to the parglm package" author: "Benjamin Christoffersen and Tom Palmer" date: "2026-05-12" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Introduction to the parglm package} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- The motivation for the `parglm` package is a parallel version of the `glm` function. It solves the iteratively re-weighted least squares using a QR decomposition with column pivoting with `DGEQP3` function from LAPACK. The computation is done in parallel as in the `bam` function in the `mgcv` package. The cost is an additional $O(Mp^2 + p^3)$ where $p$ is the number of coefficients and $M$ is the number chunks to be computed in parallel. The advantage is that you do not need to compile the package with an optimized BLAS or LAPACK which supports multithreading. The package also includes a method that computes the Fisher information and then solves the normal equation as in the `speedglm` package. This is faster but less numerically stable. ## Example of computation time Below, we estimate a logistic regression with 1000000 observations and 50 covariates. We vary the number of cores being used with the `nthreads` argument to `parglm.control`. The `method` argument sets which method is used. `LINPACK` uses the same pivoted QR (`dqrdc2`) as `glm.fit` for the final decomposition; `LAPACK` uses `DGEQP3` from LAPACK throughout; and `FAST` solves the normal equations from the Fisher information, similar to the `speedglm` package. ``` r ##### # simulate n # number of observations #> [1] 1000000 p # number of covariates #> [1] 50 set.seed(68024947) X <- matrix(rnorm(n * p, 1/p, 1/sqrt(p)), n, ncol = p) df <- data.frame(y = 1/(1 + exp(-(rowSums(X) - 1))) > runif(n), X) ``` ``` r ##### # compute and measure time. Setup call to make library(microbenchmark) library(speedglm) #> Loading required package: Matrix #> Loading required package: MASS #> Loading required package: biglm #> Loading required package: DBI library(fastglm) library(glm2) #> #> Attaching package: 'glm2' #> The following object is masked from 'package:MASS': #> #> crabs library(biglm) library(mgcv) #> Loading required package: nlme #> This is mgcv 1.9-4. For overview type '?mgcv'. #> #> Attaching package: 'mgcv' #> The following object is masked from 'package:fastglm': #> #> negbin library(parglm) X_mat <- model.matrix(y ~ ., df) biglm_form <- reformulate(names(df)[-1], response = "y") bam_form <- biglm_form cl <- list( quote(microbenchmark), glm = quote(glm (y ~ ., binomial(), df)), speedglm = quote(speedglm(y ~ ., family = binomial(), data = df)), glm2 = quote(glm2 (y ~ ., binomial(), df)), fastglm = quote(fastglm (X_mat, as.numeric(df$y), family = binomial())), fastglm3 = quote(fastglm (X_mat, as.numeric(df$y), family = binomial(), method = 3L)), bigglm = quote(bigglm (biglm_form, df, family = binomial())), bam = quote(bam (bam_form, family = binomial(), data = df)), times = 11L) tfunc <- function(method = "LINPACK", nthreads) parglm(y ~ ., binomial(), df, control = parglm.control(method = method, nthreads = nthreads)) cl <- c( cl, lapply(1:n_threads, function(i) bquote(tfunc(nthreads = .(i))))) names(cl)[10:(10L + n_threads - 1L)] <- paste0("parglm-LINPACK-", 1:n_threads) cl <- c( cl, lapply(1:n_threads, function(i) bquote(tfunc( nthreads = .(i), method = "FAST")))) names(cl)[(10L + n_threads):(10L + 2L * n_threads - 1L)] <- paste0("parglm-FAST-", 1:n_threads) cl <- c( cl, lapply(1:n_threads, function(i) bquote(tfunc( nthreads = .(i), method = "LAPACK")))) names(cl)[(10L + 2L * n_threads):(10L + 3L * n_threads - 1L)] <- paste0("parglm-LAPACK-", 1:n_threads) cl <- as.call(cl) cl # the call we make #> microbenchmark(glm = glm(y ~ ., binomial(), df), speedglm = speedglm(y ~ #> ., family = binomial(), data = df), glm2 = glm2(y ~ ., binomial(), #> df), fastglm = fastglm(X_mat, as.numeric(df$y), family = binomial()), #> fastglm3 = fastglm(X_mat, as.numeric(df$y), family = binomial(), #> method = 3L), bigglm = bigglm(biglm_form, df, family = binomial()), #> bam = bam(bam_form, family = binomial(), data = df), times = 11L, #> `parglm-LINPACK-1` = tfunc(nthreads = 1L), `parglm-LINPACK-2` = tfunc(nthreads = 2L), #> `parglm-LINPACK-3` = tfunc(nthreads = 3L), `parglm-LINPACK-4` = tfunc(nthreads = 4L), #> `parglm-LINPACK-5` = tfunc(nthreads = 5L), `parglm-LINPACK-6` = tfunc(nthreads = 6L), #> `parglm-LINPACK-7` = tfunc(nthreads = 7L), `parglm-LINPACK-8` = tfunc(nthreads = 8L), #> `parglm-LINPACK-9` = tfunc(nthreads = 9L), `parglm-LINPACK-10` = tfunc(nthreads = 10L), #> `parglm-FAST-1` = tfunc(nthreads = 1L, method = "FAST"), #> `parglm-FAST-2` = tfunc(nthreads = 2L, method = "FAST"), #> `parglm-FAST-3` = tfunc(nthreads = 3L, method = "FAST"), #> `parglm-FAST-4` = tfunc(nthreads = 4L, method = "FAST"), #> `parglm-FAST-5` = tfunc(nthreads = 5L, method = "FAST"), #> `parglm-FAST-6` = tfunc(nthreads = 6L, method = "FAST"), #> `parglm-FAST-7` = tfunc(nthreads = 7L, method = "FAST"), #> `parglm-FAST-8` = tfunc(nthreads = 8L, method = "FAST"), #> `parglm-FAST-9` = tfunc(nthreads = 9L, method = "FAST"), #> `parglm-FAST-10` = tfunc(nthreads = 10L, method = "FAST"), #> `parglm-LAPACK-1` = tfunc(nthreads = 1L, method = "LAPACK"), #> `parglm-LAPACK-2` = tfunc(nthreads = 2L, method = "LAPACK"), #> `parglm-LAPACK-3` = tfunc(nthreads = 3L, method = "LAPACK"), #> `parglm-LAPACK-4` = tfunc(nthreads = 4L, method = "LAPACK"), #> `parglm-LAPACK-5` = tfunc(nthreads = 5L, method = "LAPACK"), #> `parglm-LAPACK-6` = tfunc(nthreads = 6L, method = "LAPACK"), #> `parglm-LAPACK-7` = tfunc(nthreads = 7L, method = "LAPACK"), #> `parglm-LAPACK-8` = tfunc(nthreads = 8L, method = "LAPACK"), #> `parglm-LAPACK-9` = tfunc(nthreads = 9L, method = "LAPACK"), #> `parglm-LAPACK-10` = tfunc(nthreads = 10L, method = "LAPACK")) out <- eval(cl) #> Warning in microbenchmark(glm = glm(y ~ ., binomial(), df), speedglm = #> speedglm(y ~ : less accurate nanosecond times to avoid potential integer #> overflows ``` ``` r s <- summary(out) # result from `microbenchmark` print(s[, c("expr", "min", "mean", "median", "max")], digits = 3, row.names = FALSE) #> expr min mean median max #> glm 2404 2480 2460 2663 #> speedglm 725 787 761 1065 #> glm2 2427 2526 2482 2976 #> fastglm 1895 1992 1940 2162 #> fastglm3 547 558 555 585 #> bigglm 6173 6416 6379 7011 #> bam 4906 5067 5055 5340 #> parglm-LINPACK-1 1825 1879 1870 1963 #> parglm-LINPACK-2 1646 1704 1676 1832 #> parglm-LINPACK-3 1569 1608 1607 1648 #> parglm-LINPACK-4 1560 1611 1605 1743 #> parglm-LINPACK-5 1397 1439 1427 1537 #> parglm-LINPACK-6 1398 1421 1412 1471 #> parglm-LINPACK-7 1379 1473 1419 1713 #> parglm-LINPACK-8 1356 1457 1406 1846 #> parglm-LINPACK-9 1382 1418 1413 1465 #> parglm-LINPACK-10 1347 1372 1374 1393 #> parglm-FAST-1 449 510 491 592 #> parglm-FAST-2 387 414 408 488 #> parglm-FAST-3 372 414 414 460 #> parglm-FAST-4 367 423 405 527 #> parglm-FAST-5 381 431 387 675 #> parglm-FAST-6 348 413 395 584 #> parglm-FAST-7 347 374 368 406 #> parglm-FAST-8 348 393 381 454 #> parglm-FAST-9 348 407 387 556 #> parglm-FAST-10 338 373 372 440 #> parglm-LAPACK-1 1825 1885 1873 2058 #> parglm-LAPACK-2 1646 1670 1665 1733 #> parglm-LAPACK-3 1586 1617 1609 1685 #> parglm-LAPACK-4 1557 1598 1588 1676 #> parglm-LAPACK-5 1390 1444 1440 1580 #> parglm-LAPACK-6 1378 1399 1391 1451 #> parglm-LAPACK-7 1368 1411 1403 1489 #> parglm-LAPACK-8 1376 1403 1396 1475 #> parglm-LAPACK-9 1365 1420 1400 1597 #> parglm-LAPACK-10 1363 1413 1401 1507 ``` The plot below shows median run times versus the number of cores. Coloured horizontal lines show the single-threaded reference times for `glm`, `speedglm`, `glm2`, `fastglm`, `bigglm`, and `bam`. We could have used `glm.fit` and `parglm.fit`. This would make the relative difference bigger as both call e.g., `model.matrix` and `model.frame` which do take some time. To show this point, we first compute how much time this takes and then we make the plot. The black solid line is the computation time of `model.matrix` and `model.frame`. ``` r modmat_time <- microbenchmark( modmat_time = { mf <- model.frame(y ~ ., df); model.matrix(terms(mf), mf) }, times = 10) modmat_time # time taken by `model.matrix` and `model.frame` #> Unit: milliseconds #> expr min lq mean median uq max #> modmat_time 75.486453 93.180659 98.3097262 95.229306 112.323805 121.203257 #> neval #> 10 ``` ``` r par(mar = c(4.5, 4.5, .5, 9)) o <- aggregate(time ~ expr, out, median)[, 2] / 10^9 ylim <- range(o, 0); ylim[2] <- ylim[2] + .04 * diff(ylim) o_linpack <- o[8L:(n_threads + 7L)] o_fast <- o[(n_threads + 8L):(2L * n_threads + 7L)] o_lapack <- o[(2L * n_threads + 8L):(3L * n_threads + 7L)] ref_cols <- c("#E69F00", "#56B4E9", "#009E73", "#0072B2", "#F0E442", "#D55E00", "#CC79A7") ref_lty <- c("dashed", "dotted", "dotdash", "longdash", "44", "twodash", "1373") plot(1:n_threads, o_linpack, xlab = "Number of cores", yaxs = "i", ylim = ylim, ylab = "Run time (s)", pch = 16, cex = 1.4) points(1:n_threads, o_fast, pch = 1, cex = 1.4, lwd = 2) points(1:n_threads, o_lapack, pch = 4, cex = 1.4, lwd = 2) for (i in 1:7) abline(h = o[i], lty = ref_lty[i], col = ref_cols[i], lwd = 2) abline(h = median(modmat_time$time) / 10^9, lty = "solid", col = "black", lwd = 2) usr <- par("usr") par(xpd = TRUE) legend(x = usr[2], y = usr[4], xjust = 0, yjust = 1, legend = c("glm", "speedglm", "glm2", "fastglm", "fastglm (LDLT)", "bigglm", "bam", "model.matrix", "parglm LINPACK", "parglm FAST", "parglm LAPACK"), lty = c(ref_lty, "solid", NA, NA, NA), lwd = c(2, 2, 2, 2, 2, 2, 2, 2, NA, NA, NA), col = c(ref_cols, "black", "black", "black", "black"), pch = c(NA, NA, NA, NA, NA, NA, NA, NA, 16, 1, 4), bty = "n") ```
Plot of runtime versus number of cores.
Plot of runtime versus number of cores for n = 100,000.
Plot of runtime versus number of cores for n = 10,000.
Plot of runtime versus number of cores for n = 100,000 and p = 5.
Plot of runtime versus number of cores for n = 1,000,000 and p = 20.