Prerequisites

R packages

library(TmCalculator)
library(BSgenome)
library(GenomicRanges)

Step 1 — Building the E. coli BSgenome data package

TmCalculator retrieves sequences from BSgenome data packages. A prebuilt E. coli package is available on Bioconductor (BSgenome.Ecoli.NCBI.20080805), but it is an older multi-strain snapshot and does not provide the RefSeq assembly used here (GCF_000005845.2, ASM584v2, sequence U00096.3). Because the MutL-AR peaks, microsatellite, cruciform, ssDNA, and GATC tracks are all defined against this assembly, we forge a matching BSgenome package locally from the NCBI RefSeq sequence using BSgenomeForge, ensuring that genome sequence and feature coordinates share a single reference. This step only needs to be run once.

library(BSgenomeForge)

forgeBSgenomeDataPkgFromNCBI(
  assembly_accession = "GCF_000005845.2",
  pkg_maintainer     = "Junhui Li <ljh.biostat@gmail.com>",
  destdir            = "."
)

install.packages(
  "./BSgenome.Ecoli.NCBI.ASM584v2",
  repos = NULL,
  type  = "source"
)

During TmCalculator package development and vignette building (e.g. R CMD check), we cannot call forgeBSgenomeDataPkgFromNCBI() in the vignette environment, so the chunks below install the pre-forged BSgenome.Ecoli.NCBI.ASM584v2 package from GitHub instead.

ecoli_pkg   <- "BSgenome.Ecoli.NCBI.ASM584v2"
genome_obj  <- "Ecoli"   # BSgenomeObjname in DESCRIPTION; not the package name

.ecoli_genome_ready <- function() {
  if (!requireNamespace(ecoli_pkg, quietly = TRUE)) return(FALSE)
  exists(genome_obj, envir = asNamespace(ecoli_pkg), inherits = FALSE)
}

if (!requireNamespace(ecoli_pkg, quietly = TRUE)) {
  if (!requireNamespace("remotes", quietly = TRUE)) {
    utils::install.packages("remotes", repos = "https://cloud.r-project.org")
  }
  remotes::install_github(
    "JunhuiLi1017/BSgenome.Ecoli.NCBI.ASM584v2",
    upgrade = "never",
    quiet = TRUE
  )
}

if (!.ecoli_genome_ready()) {
  if (!requireNamespace("BiocManager", quietly = TRUE)) {
    utils::install.packages("BiocManager", repos = "https://cloud.r-project.org")
  }
  if (!requireNamespace("BSgenomeForge", quietly = TRUE)) {
    BiocManager::install("BSgenomeForge", ask = FALSE, update = FALSE)
  }
  pkgdir <- BSgenomeForge::forgeBSgenomeDataPkgFromNCBI(
    assembly_accession = "GCF_000005845.2",
    pkg_maintainer     = "Junhui Li <ljh.biostat@gmail.com>",
    destdir            = tempdir()
  )
  utils::install.packages(pkgdir, repos = NULL, type = "source", quiet = TRUE)
  if (ecoli_pkg %in% loadedNamespaces()) {
    unloadNamespace(ecoli_pkg)
  }
}

if (!.ecoli_genome_ready()) {
  stop(
    "Could not load genome object '", genome_obj, "' from package '", ecoli_pkg, "'.\n",
    "Run the manual 'forge' chunk above, or forgeBSgenomeDataPkgFromNCBI() locally.",
    call. = FALSE
  )
}

Load the genome (object Ecoli; package BSgenome.Ecoli.NCBI.ASM584v2 for coordinates):

ecoli_pkg  <- "BSgenome.Ecoli.NCBI.ASM584v2"
genome_obj <- "Ecoli"

suppressPackageStartupMessages(library(ecoli_pkg, character.only = TRUE))
genome      <- base::get(genome_obj, envir = asNamespace(ecoli_pkg))
genome_name <- ecoli_pkg
chr_name    <- "U00096.3"
chr_length  <- length(genome[[chr_name]])

cat("Chromosome:", chr_name, "\n")

## Chromosome: U00096.3

cat("Length:    ", format(chr_length, big.mark = ","), "bp\n")

## Length:     4,641,652 bp

Step 2 — Generate Genomic Windows

make_genomiccoord() tiles the chromosome into non-overlapping 200 bp bins.

bins_gc <- make_genomiccoord(
  bsgenome    = genome_name,
  chromosomes = chr_name,
  window      = 200L,
  slide       = 200L,
  start       = 1,
  end         = chr_length,
  strand      = "+"
)

cat("Total windows:", length(bins_gc), "\n")

## Total windows: 23208

Resolve coordinates against the BSgenome package:

input_new <- list(pkg_name = genome_name, seq = bins_gc)
runtime1 <- system.time({
  gr_batch <- to_genomic_ranges_fast(input_new)
})

cat(sprintf(
  "Coordinate resolution: %.2f s (elapsed)\n",
  runtime1["elapsed"]
))

## Coordinate resolution: 0.44 s (elapsed)

Step 3 — Compute Genome-wide Tm

We use the nearest-neighbour method (SantaLucia & Hicks 2004) at 50 mM Na+.

runtime2 <- system.time({
  tm_ASM584v2 <- tm_calculate(
    gr_batch,
    method   = "tm_nn",
    nn_table = "DNA_NN_SantaLucia_2004",
    Na       = 50            # mM; standard PCR-like conditions
  )
})

cat(sprintf(
  "Tm calculation: %.2f s (elapsed) for %s windows\n",
  runtime2["elapsed"],
  format(length(bins_gc), big.mark = ",")
))

## Tm calculation: 5.47 s (elapsed) for 23,208 windows

Tm <- as.data.frame(tm_ASM584v2$gr[, c("Tm", "GC")])
summary(Tm[, c("Tm", "GC")])

##        Tm              GC        
##  Min.   :64.86   Min.   :0.2050  
##  1st Qu.:77.67   1st Qu.:0.4750  
##  Median :79.67   Median :0.5200  
##  Mean   :79.21   Mean   :0.5079  
##  3rd Qu.:81.28   3rd Qu.:0.5500  
##  Max.   :91.23   Max.   :0.7500

Step 4 — Visualise with `plot_genome_track()`

plot_genome_track() is the unified plotting function in TmCalculator. It supports both linear (karyoploteR-based) and circular (base R graphics) layouts from the same track list, with features including ideogram tracks, per-track highlights, proportional track heights, multi-region zoom, and customisable legends.

Assemble the track list

We overlay the Tm/GC profile with four independently measured genomic datasets:

Track	Source
MutL-AR	MutL protein ChIP-seq peaks (mismatch-repair activity)
Microsatellites	Tandem-repeat density per 200 bp bin
Cruciform	Cruciform-forming sequence density per 200 bp bin
ssDNA	Single-stranded DNA regions
GATC sites	GATC methylation-site density per 200 bp bin

# Reference labels: replication origin (ori) and terminus (dif)
label <- data.frame(
  seqnames = genome_name,
  start    = c(3925804, 1590777),
  end      = c(3925804, 1590777),
  label    = c("ori", "dif")
)
data(ecoli_rep_hotspots)
tracks <- list(
  # Ideogram: MutL-AR peaks shown inside the chromosome bar
  list(type = "rect", data = ecoli_rep_hotspots$all_peaks_IP_mutH,
       col = "#2C3E50", bg.col = "grey", name = "MutL-AR",
       legend_font_col = "#2C3E50", ideogram = TRUE, height = 0.5),

  # Sequence thermodynamics
  list(type = "line", data = Tm, value_col = "GC",
       name = "GC content", col = "#4A90E2",
       legend_font_col = "#4A90E2"),
  list(type = "line", data = Tm, value_col = "Tm",
       name = "Melting temp", col = "#E06666",
       legend_font_col = "#E06666", height = 2),

  # Repeat / structural features
  list(type = "line", data = ecoli_rep_hotspots$bins_rep,
       value_col = "count", name = "Microsatellites", col = "#2ECC71",
       legend_font_col = "#2ECC71"),
  list(type = "line", data = ecoli_rep_hotspots$bins_cru,
       value_col = "count", name = "Cruciform", col = "#3B3E6B",
       legend_font_col = "#3B3E6B"),

  # ssDNA regions
  list(data = ecoli_rep_hotspots$ssdna, name = "ssDNA",
       col = "#8E44AD", legend_font_col = "#8E44AD"),

  # GATC methylation sites
  list(type = "line", data = ecoli_rep_hotspots$bins_gatc,
       value_col = "count", name = "GATC sites", col = "#D35400",
       legend_font_col = "#D35400"),

  # Global highlight: translucent bands at MutL-AR peaks across all tracks
  list(type = "highlight", data = ecoli_rep_hotspots$all_peaks_IP_mutH,
       col = "#F1C40F", alpha = 0.18)
)

Circular genome map

plot_genome_track(
  genome_name = genome_name,
  genome_size = chr_length,
  track_list  = tracks,
  circular    = TRUE,
  label       = label
)

Circular genome map of E. coli K-12 MG1655. Concentric rings from outside in: MutL-AR peaks (grey ideogram), GC content, melting temperature, microsatellite density, cruciform sequences, ssDNA regions, and GATC site density. Yellow highlight bands mark MutL-AR peak regions.

Linear genome map

The same tracks list works for a linear karyoploteR layout — simply omit circular = TRUE. The ideogram track is drawn inside the chromosome bar; all other tracks are stacked as horizontal panels.

plot_genome_track(
  genome_name = genome_name,
  genome_size = chr_length,
  track_list  = tracks
)

Linear genome view of E. coli K-12 MG1655. MutL-AR peaks are drawn inside the chromosome ideogram bar.

Zoom — single region

The zoom parameter accepts a character string (e.g. "chr:start-end") or a GRanges object. In linear mode, the karyoploteR view is restricted to that region; in circular mode, only data overlapping the region is drawn.

plot_genome_track(
  genome_name = genome_name,
  genome_size = chr_length,
  track_list  = tracks,
  zoom        = c("U00096.3:100000-500000")
)

Zoomed linear view of the 1.0-2.0 Mb region.

Zoom — multiple regions

Pass a character vector to zoom to view several disjoint regions at once. In linear mode each region is drawn as a separate stacked panel; in circular mode the regions are concatenated around the circle with small gaps between them.

plot_genome_track(
  genome_name = genome_name,
  genome_size = chr_length,
  track_list  = tracks,
  zoom        = c("U00096.3:100000-500000",
                  "U00096.3:3600000-4500000")
)

Two zoomed regions displayed as stacked linear panels.

Circular canvas panning

Use canvas.xlim and canvas.ylim to pan and magnify a portion of the circular plot. The circle.margin parameter controls whitespace around the plot.

plot_genome_track(
  genome_name   = genome_name,
  genome_size   = chr_length,
  track_list    = tracks,
  circular      = TRUE,
  canvas.xlim   = c(0.5, 1),
  canvas.ylim   = c(0,   1),
  circle.margin = c(0.05, 0.05)
)

Panned circular view showing the upper-right quadrant of the E. coli chromosome.

Per-track highlights

Individual tracks can carry a highlight field to draw coloured bands within that track only. This is useful for marking specific regions of interest on a per-track basis:

tracks_hl <- tracks
tracks_hl[[4]]$highlight <- list(
  data  = ecoli_rep_hotspots$bins_rep[1100:1200, ],
  col   = "black",
  alpha = 0.12
)

plot_genome_track(
  genome_name = genome_name,
  genome_size = chr_length,
  track_list  = tracks_hl,
  circular    = TRUE
)

Circular plot with per-track highlight bands on the Microsatellites track.

Step 5 — Statistical Testing

We test whether Tm values inside MutL-AR peaks differ significantly from the rest of the genome.

Build MutL-AR peak GRanges

mutH_peaks <- GRanges(
  seqnames = ecoli_rep_hotspots$all_peaks_IP_mutH$chr,
  ranges   = IRanges(start = ecoli_rep_hotspots$all_peaks_IP_mutH$start,
                     end   = ecoli_rep_hotspots$all_peaks_IP_mutH$end)
)
seqlevels(mutH_peaks) <- "U00096.3"

Annotate Tm tiles with peak membership

mutH_peaks$peak_id <- paste0("mutH_", seq_along(mutH_peaks))

tm_annot <- integrate_granges(
  gr_tm          = tm_ASM584v2$gr,
  gr_features    = mutH_peaks,
  strategy       = "overlap",
  feature_cols   = "peak_id",
  keep_unmatched = TRUE
)

tm_annot$in_mutH <- ifelse(is.na(tm_annot$peak_id), "non_peak", "peak")
table(tm_annot$in_mutH)

## 
## non_peak     peak 
##    22402      806

Wilcoxon test on Tm and GC

res <- compare_groups(
  gr          = tm_annot,
  target      = c("Tm", "GC"),
  method      = "wilcoxon",
  group       = "in_mutH",
  alternative = "greater",
  posthoc     = FALSE
)
res$results

##   target   group   method              test statistic df      p.value n_groups
## 1     Tm in_mutH wilcoxon Wilcoxon rank-sum  10720489 NA 6.715524e-20        2
## 2     GC in_mutH wilcoxon Wilcoxon rank-sum  10806568 NA 8.549859e-22        2
##   n_total    group_levels                  group_n
## 1   23208 non_peak | peak non_peak=22402, peak=806
## 2   23208 non_peak | peak non_peak=22402, peak=806

res$summary

##      group     n       mean         sd   median target
## 1 non_peak 22402 79.2584239 3.07166962 79.68406     Tm
## 2     peak   806 77.9959119 3.61358037 78.79222     Tm
## 3 non_peak 22402  0.5088340 0.06346952  0.52000     GC
## 4     peak   806  0.4822333 0.07394710  0.50000     GC

Interpreting the Results

MutL-AR-associated windows have lower Tm and higher GC content than the rest of the genome (p < 0.001 for both). MutL-AR peaks cluster near the replication terminus, where replication forks converge and mismatch density is highest. This spatial coincidence with locally elevated microsatellite density and cruciform-forming sequences is consistent with the replication stress model of MMR recruitment.

GATC methylation sites are distributed genome-wide but show local density fluctuations that partially anti-correlate with GC content, reflecting the sequence context requirements for Dam methyltransferase (5’-GATC-3’).

Computational Performance

On a standard desktop computer (single core, R 4.3):

Step	Function	Time (elapsed)
Coordinate generation	`make_genomiccoord()` + `coor_to_genomic_ranges()`	~0.44 s
Tm calculation (23,208 windows)	`tm_calculate()`	~5.47 s
Total		~5.91 s

For human-scale analyses (non-overlapping 200 bp windows across the ~3.1 Gb haploid genome, ~15.5 M windows), we recommend running tm_calculate() in parallelly using BiocParallel and splitting input by chromosome. Memory requirements scale approximately linearly with the number of windows.

Session Information

sessionInfo()

## R version 4.4.1 (2024-06-14)
## Platform: x86_64-apple-darwin20
## Running under: macOS Sonoma 14.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] BSgenome.Ecoli.NCBI.ASM584v2_1.0.0 BSgenome_1.72.0                   
##  [3] rtracklayer_1.64.0                 BiocIO_1.14.0                     
##  [5] Biostrings_2.72.1                  XVector_0.44.0                    
##  [7] GenomicRanges_1.56.2               GenomeInfoDb_1.40.1               
##  [9] IRanges_2.38.1                     S4Vectors_0.42.1                  
## [11] BiocGenerics_0.50.0                TmCalculator_1.0.7                
## 
## loaded via a namespace (and not attached):
##   [1] DBI_1.2.3                   bitops_1.0-9               
##   [3] gridExtra_2.3               rlang_1.1.7                
##   [5] magrittr_2.0.4              biovizBase_1.52.0          
##   [7] otel_0.2.0                  matrixStats_1.5.0          
##   [9] compiler_4.4.1              RSQLite_2.4.0              
##  [11] GenomicFeatures_1.56.0      png_0.1-8                  
##  [13] vctrs_0.7.1                 ProtGenerics_1.36.0        
##  [15] stringr_1.6.0               pkgconfig_2.0.3            
##  [17] crayon_1.5.3                fastmap_1.2.0              
##  [19] backports_1.5.0             Rsamtools_2.20.0           
##  [21] rmarkdown_2.30              UCSC.utils_1.0.0           
##  [23] bit_4.6.0                   xfun_0.58                  
##  [25] zlibbioc_1.50.0             cachem_1.1.0               
##  [27] jsonlite_2.0.0              blob_1.2.4                 
##  [29] DelayedArray_0.30.1         BiocParallel_1.38.0        
##  [31] parallel_4.4.1              cluster_2.1.6              
##  [33] R6_2.6.1                    VariantAnnotation_1.50.0   
##  [35] bslib_0.10.0                stringi_1.8.7              
##  [37] RColorBrewer_1.1-3          bezier_1.1.2               
##  [39] rpart_4.1.23                jquerylib_0.1.4            
##  [41] Rcpp_1.1.1                  SummarizedExperiment_1.34.0
##  [43] knitr_1.51                  base64enc_0.1-6            
##  [45] Matrix_1.7-0                nnet_7.3-19                
##  [47] tidyselect_1.2.1            rstudioapi_0.18.0          
##  [49] dichromat_2.0-0.1           abind_1.4-8                
##  [51] yaml_2.3.12                 codetools_0.2-20           
##  [53] curl_6.2.3                  lattice_0.22-6             
##  [55] tibble_3.2.1                regioneR_1.36.0            
##  [57] Biobase_2.64.0              KEGGREST_1.44.1            
##  [59] evaluate_1.0.5              foreign_0.8-87             
##  [61] karyoploteR_1.30.0          pillar_1.10.2              
##  [63] MatrixGenerics_1.16.0       checkmate_2.3.2            
##  [65] generics_0.1.4              RCurl_1.98-1.17            
##  [67] ensembldb_2.28.1            ggplot2_3.5.2              
##  [69] scales_1.4.0                glue_1.8.0                 
##  [71] lazyeval_0.2.2              Hmisc_5.2-3                
##  [73] tools_4.4.1                 data.table_1.17.4          
##  [75] GenomicAlignments_1.40.0    XML_3.99-0.18              
##  [77] grid_4.4.1                  colorspace_2.1-1           
##  [79] AnnotationDbi_1.66.0        GenomeInfoDbData_1.2.12    
##  [81] htmlTable_2.4.3             restfulr_0.0.15            
##  [83] Formula_1.2-5               cli_3.6.5                  
##  [85] S4Arrays_1.4.1              dplyr_1.1.4                
##  [87] AnnotationFilter_1.28.0     gtable_0.3.6               
##  [89] sass_0.4.10                 digest_0.6.39              
##  [91] SparseArray_1.4.8           rjson_0.2.23               
##  [93] htmlwidgets_1.6.4           farver_2.1.2               
##  [95] memoise_2.0.1               htmltools_0.5.9            
##  [97] lifecycle_1.0.5             httr_1.4.7                 
##  [99] bit64_4.6.0-1               bamsignals_1.36.0

Genome-wide Melting Temperature Profiling: An E. coli Case Study

Junhui Li, Lihua Julie Zhu

2026-06-25

Introduction

Prerequisites

R packages

Step 1 — Building the E. coli BSgenome data package

Step 2 — Generate Genomic Windows

Step 3 — Compute Genome-wide Tm

Step 4 — Visualise with `plot_genome_track()`

Assemble the track list

Circular genome map

Linear genome map

Zoom — single region

Zoom — multiple regions

Circular canvas panning

Per-track highlights

Step 5 — Statistical Testing

Build MutL-AR peak GRanges

Annotate Tm tiles with peak membership

Wilcoxon test on Tm and GC

Interpreting the Results

Computational Performance

Session Information

Genome-wide Melting Temperature Profiling: An E. coli Case Study

Junhui Li, Lihua Julie Zhu

2026-06-25

Introduction

Prerequisites

R packages

Step 1 — Building the E. coli BSgenome data package

Step 2 — Generate Genomic Windows

Step 3 — Compute Genome-wide Tm

Step 4 — Visualise with plot_genome_track()

Assemble the track list

Circular genome map

Linear genome map

Zoom — single region

Zoom — multiple regions

Circular canvas panning

Per-track highlights

Step 5 — Statistical Testing

Build MutL-AR peak GRanges

Annotate Tm tiles with peak membership

Wilcoxon test on Tm and GC

Interpreting the Results

Computational Performance

Session Information

Step 4 — Visualise with `plot_genome_track()`