The TmCalculator package provides comprehensive options for calculating melting temperatures (Tm) of DNA/RNA sequences. This vignette demonstrates the package’s functionality, including:
tm_wallace via tm_calculate())tm_nn)plot_karyotype_genome(),
plot_tm_genome_tracks(), plot_tm_heatmap(),
and plot_tm_linear()plot_tm_*_interactive()
helpersseqs <- c("ATGCGCGAAAT", "GCTAGCTAGCT")
names(seqs) <- c("chr1:1-11:+:seq1", "chr2:1-11:+:seq2")
result <- tm_calculate(seqs, method = "tm_wallace")
result$gr
#> GRanges object with 2 ranges and 4 metadata columns:
#> seqnames ranges strand | sequence complement GC Tm
#> <Rle> <IRanges> <Rle> | <character> <character> <numeric> <numeric>
#> 1 chr1 1-11 + | ATGCGCGAAAT TACGCGCTTTA 45.4545 32
#> 2 chr2 1-11 + | GCTAGCTAGCT CGATCGATCGA 54.5455 34
#> -------
#> seqinfo: 2 sequences from an unspecified genome; no seqlengthsCoordinates should follow
"chr:start-end:strand:genome_pkg:seq_name".
coords <- c(
"chr1:1898000-1898050:+:BSgenome.Hsapiens.UCSC.hg38:seq1",
"chr2:2563000-2563050:-:BSgenome.Hsapiens.UCSC.hg38:seq2"
)
result <- tm_calculate(coords, method = "tm_nn")
result$gr
#> GRanges object with 2 ranges and 6 metadata columns:
#> seqnames ranges strand | sequence region_id
#> <Rle> <IRanges> <Rle> | <character> <character>
#> seq1 chr1 1898000-1898050 + | AAAAACCCCACAAACCTCCC.. seq1
#> seq2 chr2 2563000-2563050 - | GCAGCCTTTTGAGGAGCCCA.. seq2
#> genome_pkg GC complement Tm
#> <character> <numeric> <character> <numeric>
#> seq1 BSgenome.Hsapiens.UC.. 0.411765 TTTTTGGGGTGTTTGGAGGG.. 66.2208
#> seq2 BSgenome.Hsapiens.UC.. 0.686275 CGTCGGAAAACTCCTCGGGT.. 78.5226
#> -------
#> seqinfo: 2 sequences from an unspecified genomeresult$options
#> $Ambiguous
#> [1] FALSE
#>
#> $Shift
#> [1] 0
#>
#> $`Thermodynamic NN values`
#> [1] "DNA_NN_SantaLucia_2004: SantaLucia J (2004) <doi:10.1146/annurev.biophys.32.110601.141800>"
#>
#> $`Thermodynamic values for terminal mismatches`
#> [1] "DNA_TMM_Bommarito_2000: Bommarito S (2000) <doi:10.1093/nar/28.9.1929>"
#>
#> $`Thermodynamic values for internal mismatches`
#> [1] "DNA_IMM_Peyret_1999: Peyret N (1999) <doi:10.1021/bi9825091> & Allawi H T (1997) <doi:10.1021/bi962590c> & Santalucia N (2005) <doi:10.1093/nar/gki918>"
#>
#> $`Thermodynamic values for dangling ends`
#> [1] "DNA_DE_Bommarito_2000: Bommarito S (2000) <doi:10.1093/nar/28.9.1929>"
#>
#> $`Concentration of the higher concentrated strand`
#> [1] 25
#>
#> $`Concentration of the lower concentrated strand`
#> [1] 25
#>
#> $`Sequence self-complementary`
#> [1] FALSE
#>
#> $Na
#> [1] 50
#>
#> $K
#> [1] 0
#>
#> $Tris
#> [1] 0
#>
#> $Mg
#> [1] 0
#>
#> $dNTPs
#> [1] 0
#>
#> $`Salt correction method`
#> [1] "Schildkraut2010"
#>
#> $`Percent of DMSO`
#> [1] 0
#>
#> $`Formamide concentration`
#> [1] 0
#>
#> $`DMSO factor`
#> [1] 0.75
#>
#> $`Formamide concentration unit`
#> [1] "percent"
#>
#> $`Formamide factor`
#> [1] 0.65
#>
#> $`Skipped regions containing N`
#> GRanges object with 0 ranges and 5 metadata columns:
#> seqnames ranges strand | sequence region_id genome_pkg GC
#> <Rle> <IRanges> <Rle> | <DNAStringSet> <character> <character> <numeric>
#> complement
#> <character>
#> -------
#> seqinfo: 2 sequences from an unspecified genomefasta_file <- system.file("extdata", "example1.fasta", package = "TmCalculator")
result_fa <- tm_calculate(fasta_file, method = "tm_nn")
result_fa$gr
#> GRanges object with 3 ranges and 4 metadata columns:
#> seqnames ranges strand | sequence
#> <Rle> <IRanges> <Rle> | <character>
#> 1 chr22 13209021-13209099 + | ATGCGATCGATCGATCGATC..
#> 2 chr1 1-76 * | GCAACGACGACGACGACGAC..
#> 3 chr14 13200-13222 * | GCGATCGATCGATCGATCGA..
#> complement GC Tm
#> <character> <numeric> <numeric>
#> 1 TACGCTAGCTAGCTAGCTAG.. 50.0000 69.4905
#> 2 CGTTGCTGCTGCTGCTGCTG.. 62.5000 70.9932
#> 3 CGCTAGCTAGCTAGCTAGCT.. 56.5217 50.8499
#> -------
#> seqinfo: 3 sequences from an unspecified genome; no seqlengthsGCTAGCCGA[C]AATGGGCAGATAGTAGAAA
CGATCGGCT[A]TTACCCGTCTATCATCTTT
seqs <- c("GCTAGCCGACAATGGGCAGATAGTAGAAA")
comp_seqs <- c("CGATCGGCTATTACCCGTCTATCATCTTT")
result <- tm_calculate(
input_seq = seqs,
complement_seq = comp_seqs,
method = "tm_nn"
)
result$gr
#> GRanges object with 1 range and 4 metadata columns:
#> seqnames ranges strand | sequence complement
#> <Rle> <IRanges> <Rle> | <character> <character>
#> 1 chr1 1-29 * | GCTAGCCGACAATGGGCAGA.. CGATCGGCTATTACCCGTCT..
#> GC Tm
#> <numeric> <numeric>
#> 1 48.2759 55.1755
#> -------
#> seqinfo: 1 sequence from an unspecified genome; no seqlengthsGCTAGCCGA[C]AATGGGCAGATAGTAGAAA
GATCGGCT[A]TTACCCGTCTATCATCTTT
seqs <- c("GCTAGCCGACAATGGGCAGATAGTAGAAA")
comp_seqs <- c("GATCGGCTATTACCCGTCTATCATCTTT")
result <- tm_calculate(
input_seq = seqs,
complement_seq = comp_seqs,
method = "tm_nn",
shift = -1
)
result$gr
#> GRanges object with 1 range and 4 metadata columns:
#> seqnames ranges strand | sequence complement
#> <Rle> <IRanges> <Rle> | <character> <character>
#> 1 chr1 1-29 * | GCTAGCCGACAATGGGCAGA.. GATCGGCTATTACCCGTCTA..
#> GC Tm
#> <numeric> <numeric>
#> 1 48.2759 53.3375
#> -------
#> seqinfo: 1 sequence from an unspecified genome; no seqlengthsmake_genomiccoord() and
to_genomic_ranges_fast()For large tiling jobs, generate coordinate strings with
make_genomiccoord() and convert them with the fast
backend:
coords <- make_genomiccoord(
bsgenome = BSgenome.Hsapiens.UCSC.hg38,
chromosomes = "chr1",
window = 200L,
slide = 50L,
trim_N = "ends",
verbose = FALSE
)
gr <- to_genomic_ranges_fast(
list(pkg_name = "BSgenome.Hsapiens.UCSC.hg38", seq = head(coords, 20))
)
tm_tiled <- tm_calculate(gr, method = "tm_nn")Visualization functions expect a GRanges object with a
numeric Tm metadata column (and optionally GC
from tm_calculate()).
set.seed(123)
chr1_starts <- sort(sample(1:249250621, 100))
chr1_ends <- chr1_starts + sample(50:200, 100, replace = TRUE)
chr2_starts <- sort(sample(1:243199373, 50))
chr2_ends <- chr2_starts + sample(50:200, 50, replace = TRUE)
tm_results <- GRanges(
seqnames = Rle(c(rep("chr1", 100), rep("chr2", 50))),
ranges = IRanges(
start = c(chr1_starts, chr2_starts),
end = c(chr1_ends, chr2_ends)
),
strand = Rle(sample(c("+", "-"), 150, replace = TRUE)),
Tm = c(runif(100, 60, 80), runif(50, 60, 80)),
GC = c(runif(100, 30, 70), runif(50, 30, 70))
)
genome(seqinfo(tm_results)) <- "hg38"
gr_tm <- GRanges(
seqnames = c("chr1", "chr2", "chr1", "chr2", "chr1"),
ranges = IRanges(
start = c(100, 200, 300, 400, 150),
end = c(150, 250, 350, 450, 200)
),
Tm = c(65.5, 68.2, 70.1, 63.8, 72.0)
)plot_karyotype_genome)plot_karyotype_genome() uses
karyoploteR with a flexible track_list
(same idea as plot_circos_genome()).
track_list <- list(
list(type = "points", data = tm_results, value_col = "Tm",
col = "#e41a1c", name = "Tm (deg C)"),
list(type = "bars", data = tm_results, value_col = "GC",
col = "#377eb8", name = "GC (%)")
)
plot_karyotype_genome(
genome_assembly = "hg38",
track_list = track_list,
chromosomes = c("chr1", "chr2"),
title_name = "Tm and GC - hg38"
)
plot_karyotype_genome(
genome_assembly = "hg38",
track_list = list(
list(type = "line", data = tm_results[seqnames(tm_results) == "chr1"],
value_col = "Tm", col = "#984ea3", name = "Tm (deg C)")
),
chromosomes = "chr1",
zoom = "chr1:50000000-150000000",
title_name = "Tm - chr1 zoom"
)plot_tm_genome_tracks)plot_tm_genome_tracks() uses Gviz for a
genome-browser layout, or ggplot2 in
x_axis = "regions" mode for sparse probes.
plot_tm_genome_tracks(
tm_results,
chromosome_to_plot = "chr1",
genome_assembly = "hg38",
track_type = "gradient",
zoom = "chr1:10000000-80000000"
)
p_regions <- plot_tm_genome_tracks(
tm_results,
chromosome_to_plot = "chr1",
genome_assembly = "hg38",
x_axis = "regions",
track_type = "points",
color_by = "Tm"
)
print(p_regions)plot_tm_heatmap)plot_tm_linear)plot_tm_linear(
gr_tm,
color_by = "Tm",
facet_by_chr = FALSE,
add_line = TRUE
)
#> `geom_line()`: Each group consists of only one observation.
#> ℹ Do you need to adjust the group aesthetic?plot_tm_heatmap_interactive(gr_tm, genome_assembly = "hg19", plot_type = "karyogram")
plot_tm_genome_tracks_interactive(
tm_results,
chromosome_to_plot = "chr1",
genome_assembly = "hg38",
x_axis = "regions"
)
plot_tm_karyotype_interactive(
tm_results,
genome_assembly = "hg38",
chromosomes = c("chr1", "chr2")
)The TmCalculator package provides tools for calculating and analyzing
melting temperatures of DNA/RNA sequences. Use
plot_karyotype_genome() and
plot_tm_genome_tracks() for genome-context views,
plot_tm_heatmap() for interactive overviews, and
plot_tm_linear() for compact region-by-region
comparisons.
For more information, see the package documentation and function examples.