## ----include = FALSE---------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ## ----setup, message=FALSE----------------------------------------------------- library(CIDER) library(Seurat) library(pheatmap) library(ggplot2) library(cowplot) library(viridis) ## ----------------------------------------------------------------------------- # Download the data data_url <- "https://figshare.com/ndownloader/files/31469387" data_file <- file.path(tempdir(), "pancreas_counts.RData") if (!file.exists(data_file)) { message("Downloading count data to temporary directory...") download.file(data_url, destfile = data_file, mode = "wb") } ## ----------------------------------------------------------------------------- # Load counts matrix and metadata data_env <- new.env() loaded_objects <- load(file = data_file, envir = data_env) pancreas_counts <- data_env[[loaded_objects[1]]] load("../data/pancreas_meta.RData") # meta data/cell information seu <- CreateSeuratObject(counts = pancreas_counts, meta.data = pancreas_meta) table(seu$Batch) ## ----seurat-pipeline---------------------------------------------------------- seu <- NormalizeData(seu, verbose = FALSE) seu <- FindVariableFeatures(seu, selection.method = "vst", nfeatures = 2000, verbose = FALSE) seu <- ScaleData(seu, verbose = FALSE) seu <- RunPCA(seu, npcs = 20, verbose = FALSE) seu <- RunTSNE(seu, reduction = "pca", dims = 1:12) ## ----scatterPlot-confounded, fig.width=7, fig.height=3------------------------ p1 <- scatterPlot(seu, "pca",colour.by = "Batch", title = "PCA") p2 <- scatterPlot(seu, "tsne",colour.by = "Batch", title = "t-SNE") plot_grid(p1, p2) ## ----fig.width=5, fig.height=3------------------------------------------------ seu <- FindNeighbors(seu, dims = 1:12) seu <- FindClusters(seu) scatterPlot(seu, "tsne",colour.by = "seurat_clusters", title = "t-SNE") ## ----fig.width=6, fig.height=3------------------------------------------------ res <- data.frame(table(seu$seurat_clusters, seu$Batch)) ggplot(res, aes(fill=Var2, y=Freq, x=Var1)) + geom_bar(position="stack", stat="identity") + xlab("CIDER_cluster") + ylab("Proportions") ## ----initial_cluster---------------------------------------------------------- seu$initial_cluster <- paste(seu$Group, seu$Batch, sep = "_") table(seu$initial_cluster) ## ----run-getIDER-noparallel--------------------------------------------------- ider <- getIDEr(seu, group.by.var = "initial_cluster", batch.by.var = "Batch", downsampling.size = 35, use.parallel = FALSE, verbose = FALSE) ## ----heatmap, fig.height=7, fig.width=7--------------------------------------- groups <- c("alpha","beta","delta", "gamma","ductal","endothelial", "activated_stellate", "quiescent_stellate", "macrophage") idx1 <- paste0(groups, "_human") idx2 <- paste0(groups, "_mouse") pheatmap::pheatmap( ider[[1]][idx1, idx2], color = inferno(10), border_color = NA, display_numbers = TRUE, cluster_rows = FALSE, cluster_cols = FALSE, width = 7, height = 5, cellwidth = 22, cellheight = 22 ) ## ----final-clustering--------------------------------------------------------- seu <- finalClustering(seu, ider, cutree.h = 0.45) head(seu@meta.data) ## ----summary-tsne-plots, fig.width=10, fig.height=4--------------------------- plot_list <- list() plot_list[[1]] <- scatterPlot(seu, "tsne", colour.by = "CIDER_cluster", title = "asCIDER clustering results") plot_list[[2]] <- scatterPlot(seu, "tsne", colour.by = "Group", title = "Ground truth of cell populations") plot_grid(plotlist = plot_list, ncol = 2) ## ----fig.height=3, fig.width=5------------------------------------------------ res <- data.frame(table(seu$CIDER_cluster, seu$Batch)) ggplot(res, aes(fill=Var2, y=Freq, x=Var1)) + geom_bar(position="stack", stat="identity") + xlab("CIDER_cluster") + ylab("Proportions") ## ----sessionInfo-------------------------------------------------------------- sessionInfo()