TL;DR

You may click on the following links to fast forward to the codes directly producing the Figures:

Back to index page.

About this vignette

This vignette aims to reproduces Figure 3 in the original publication (Lin et al., 2022).

Here we only show how plain plots were generated directly from data. To reach the final illustrations in the publication, these plots may require further editing in graphic design softwares (i.e. Adobe Illustrator). Those beautification steps are not central to our analysis and therefore will be omitted.

Initialization

suppressPackageStartupMessages({
  library(Seurat)
  library(SeuratDisk)
  library(tidyverse)
  library(ggplot2)
  library(patchwork)
  library(RColorBrewer)
  library(cvms)
  library(epitools)
})
querydata <- LoadH5Seurat('../data/AAV_MG_scRNAseq/query_clean.h5Seurat', verbose = F)
query_transferred <- LoadH5Seurat('../data/AAV_MG_scRNAseq/query_transferred.h5Seurat', verbose = F)
refdata <- LoadH5Seurat('../data/AAV_MG_scRNAseq/refdata_clean.h5Seurat', verbose = F)
refquery <- LoadH5Seurat('../data/AAV_MG_scRNAseq/refquery.h5Seurat', verbose = F)

Reproducing Plots

Figure 3a

Schematics showing the workflows of scRNAseq experiments and analysis.

f3a1 <- DimPlot(querydata, pt.size = 2, group.by = "group", cols = brewer.pal(n = 12, name = "Paired")[c(7,6)]) +
  ggtitle(NULL) +
  theme_minimal() +
  theme(
    legend.position = "none",
    axis.title = element_blank(),
    axis.text = element_blank()
  )
f3a2 <- DimPlot(refdata, group.by = "treat", pt.size = 2, shuffle = TRUE) +
  scale_color_manual(values = brewer.pal(n = 12, name = "Paired")[c(5,3,1)])+
  theme_minimal() +
  ggtitle(NULL) +
  theme(
    legend.position = "none",
    axis.title = element_blank(),
    axis.text = element_blank()
  )
f3a1 / f3a2

Figure 3b-d

Descriptive analysis of the Query Dataset (Smart-seq2). Discrimination of “Transduced” and “Untransduced” cells are based on whether mScarlet transcripts are expressed in the cells (cell is “Transduced” if mScarlet > 0).

Figure 3b

DimPlot(querydata, group.by = "transduce_serotype", pt.size = 2, 
        cols = c(brewer.pal(n = 12, name = "Paired")[c(12,8,6)],"#CCCCCC")) +
  NoAxes() +
  ggtitle(NULL) +
  theme(
    legend.position = "none",
    axis.title = element_blank(),
    axis.text = element_blank()
  )

Figure 3c

queryaav_sub <- subset(querydata, subset = group == "AAV")
queryaav_sub <- queryaav_sub %>%
  NormalizeData() %>%
  FindVariableFeatures() %>%
  ScaleData() %>%
  RunPCA() %>%
  FindNeighbors(dims = 1:20) %>%
  FindClusters(resolution = 0.3) %>%
  RunUMAP(dims = 1:20)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 407
## Number of edges: 18673
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.7391
## Number of communities: 2
## Elapsed time: 0 seconds
DimPlot(queryaav_sub, group.by = "transduce_serotype", pt.size = 2, 
        cols = c(brewer.pal(12, "Paired")[c(12,8)],"#CCCCCC")) +
  NoAxes() +
  ggtitle(NULL) +
  theme(
    legend.position = "none",
    axis.title = element_blank(),
    axis.text = element_blank()
  )

Figure 3d

## Order by factor
querydata$transduce_serotype <- factor(querydata$transduce_serotype, 
                                       levels = c("LPS","AAV-MG1.1","AAV-MG1.2","Untransduced"))
umap_lps <- as.data.frame(Embeddings(querydata[["umap"]])) %>% 
  mutate(transduce_serotype = querydata$transduce_serotype) %>%
  ggplot(mapping = aes(x = UMAP_1, y = UMAP_2, color = transduce_serotype)) +
  geom_point(aes(size = transduce_serotype), position = "jitter") +
  scale_size_manual(values = c(1.2,0.6,0.6,0.6)) +
  scale_color_manual(values = c(brewer.pal(12, "Paired")[c(6,11,7)],"#CCCCCC")) +
  theme(
    legend.position = "none",
    panel.background = element_blank(),
    panel.border = element_rect(color = "gray", size = 1, fill = NA),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(),
    legend.key = element_blank()
    )

queryaav_sub$transduce_serotype <- factor(queryaav_sub$transduce_serotype,
                                          levels = c("AAV-MG1.1","AAV-MG1.2","Untransduced"))
umap_mg1.1 <- as.data.frame(Embeddings(queryaav_sub[["umap"]])) %>% 
  mutate(transduce_serotype = queryaav_sub$transduce_serotype) %>%
  ggplot(mapping = aes(x = UMAP_1, y = UMAP_2, color = transduce_serotype)) +
  geom_point(aes(size = transduce_serotype), position = "jitter") +
  scale_size_manual(values = c(1.2,0.6,0.6)) +
  scale_color_manual(values = c(brewer.pal(12, "Paired")[c(12,7)],"#CCCCCC")) +
  theme(
    legend.position = "none",
    panel.background = element_blank(),
    panel.border = element_rect(color = "gray", size = 1, fill = NA),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(),
    legend.key = element_blank()
    )

umap_mg1.2 <- as.data.frame(Embeddings(queryaav_sub[["umap"]])) %>% 
  mutate(transduce_serotype = queryaav_sub$transduce_serotype) %>%
  ggplot(mapping = aes(x = UMAP_1, y = UMAP_2, color = transduce_serotype)) +
  geom_point(aes(size = transduce_serotype), position = "jitter") +
  scale_size_manual(values = c(0.6,1.2,0.6)) +
  scale_color_manual(values = c(brewer.pal(12, "Paired")[c(11,8)],"#CCCCCC")) +
  theme(
    legend.position = "none",
    panel.background = element_blank(),
    panel.border = element_rect(color = "gray", size = 1, fill = NA),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(),
    legend.key = element_blank()
    )

umap_non_transduced <- as.data.frame(Embeddings(queryaav_sub[["umap"]])) %>% 
  mutate(transduce_serotype = queryaav_sub$transduce_serotype) %>%
  ggplot(mapping = aes(x = UMAP_1, y = UMAP_2, color = transduce_serotype)) +
  geom_point(aes(size = transduce_serotype), position = "jitter") +
  scale_size_manual(values = c(0.6,0.6,1.2)) +
  scale_color_manual(values = c(brewer.pal(12, "Paired")[c(11,7)],"#999999")) +
  theme(
    legend.position = "none",
    panel.background = element_blank(),
    panel.border = element_rect(color = "gray", size = 1, fill = NA),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(),
    legend.key = element_blank()
    )

querydata$transduce_serotype <- factor(querydata$transduce_serotype, 
                                       levels = c("Untransduced","AAV-MG1.2","AAV-MG1.1","LPS"))
violin_mScarlet <- VlnPlot(querydata, 
                           features = "mScarlet_norm", 
                           group.by = "transduce_serotype", 
                           cols = c("#CCCCCC",brewer.pal(12, "Paired")[c(7,11,5)]),
                           pt.size = 1) + 
  coord_flip() + 
  NoLegend() +
  ggtitle(NULL) +
  ylab("mScarlet Expression") +
  theme(
    legend.position = "none",
    axis.title.y = element_blank(),
    axis.text.y = element_blank(),
    axis.title.x = element_text(size = 18)
  )
((umap_lps / umap_mg1.1 / umap_mg1.2 / umap_non_transduced) | violin_mScarlet) + plot_layout(widths = c(0.9,1.6))

Figure 3e

We notice that in the UMAP embedding showing the subset of microglia subjected to AAV delivery, the transduced and untransduced microglia are positioned in a “well-mixed” fashion, suggesting that the transduced microglia may share a similar transcriptional state with untransduced microglia.

DimPlot(queryaav_sub, group.by = "transduction", 
        cols = brewer.pal(n = 12, name = "Paired")[c(8,2)],
        pt.size = 2) + 
  NoAxes() + 
  NoLegend() + 
  theme_minimal() +
  ggtitle(NULL) +
  theme(
    panel.background = element_blank(),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(),
    legend.position = "bottom"
  )

To examine the relationship between transduction and transcriptional state in a more rigorous manner, we perform unsupervised clustering in the AAV delivery group using Louvain algorithm. Two cluster were obtained with distinct transcriptional states, as shown in the plots below.

Idents(queryaav_sub) <- "seurat_clusters"
queryaav_sub <- RenameIdents(queryaav_sub, `0` = "C1", `1` = "C2")
DimPlot(queryaav_sub, pt.size = 2) + 
  NoAxes() + 
  NoLegend() + 
  theme_minimal() +
  ggtitle(NULL) +
  theme(
    panel.background = element_blank(),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(),
    legend.position = "bottom"
  )

Figure 3f

Then, we collect the confusion matrix summarizing the microglia transduction state and Louvain cluster, and use epitools package to calculate the Odds Ratio (OR) from the confusion matrix. The Odds Ratio quantifies the association between two everts. In essence, an OR equal to 1 indicates that two events are independent. Here, we use epitools package to compute the OR, and test its value against 1 using “wald” method. The test result suggests that the OR does not deviate significantly from 1, suggesting that the trancriptional state and transduction state in microglia are independent.

Finally, we use plot_confusion_matrix() from the cvms package to visualize the confusion matrix.

df <- queryaav_sub[[c("transduction","seurat_clusters")]]
df <- dplyr::rename(df, "Louvain clusters" = "seurat_clusters")

basic_table <- table(df)

epitools::oddsratio(basic_table, method = "wald")$measure[-1,]
##  estimate     lower     upper 
## 0.7932876 0.5268557 1.1944544
epitools::oddsratio(basic_table, method = "wald")$p.value
##               two-sided
## transduction   midp.exact fisher.exact chi.square
##   Transduced           NA           NA         NA
##   Untransduced  0.2696457    0.2978506  0.2670871
## cvms::plot_confusion_matrix() use integer levels
df$transduction <- dplyr::recode(df$transduction, "Transduced" = 1, "Untransduced" = 0)

basic_table <- table(df)
cfm <- as_tibble(basic_table)

g <- cvms::plot_confusion_matrix(cfm, 
                                 target_col = "Louvain clusters",
                                 prediction_col = "transduction",
                                 counts_col = "n",
                                 add_arrows = FALSE)
g + 
  xlab("Louvain clusters") +
  ylab("Transduction") +
  scale_x_discrete(labels = c("C1","C2"), position = "top") +
  scale_y_discrete(labels = c("Untransduced","Transduced")) +
  theme(
    text = element_text(size = 18)
  )

Figure 3g

Joint visualization by ‘De novo’ recomputed UMAP of query and reference datasets.

as.data.frame(Embeddings(refquery[["umap"]])) %>%
  mutate(id = refquery$id, ident = refquery$ident) %>%
  ggplot(mapping = aes(x = UMAP_1, y = UMAP_2, color = ident, group = id)) +
  geom_point(aes(shape = id), size = 1.5) +
  scale_color_manual(values = brewer.pal(n = 12, name = "Paired")[c(5,6,3,1,8,2)]) +
  scale_shape_manual(values = c(20,23)) +
  theme_minimal() +
  theme(
    panel.background = element_blank(),
    axis.title = element_blank(),
    axis.ticks = element_blank(),
    axis.text = element_blank(),
    legend.position = "none"
    )

Figure 3h

query_transferred[[c("predicted.state","ident")]] %>%
  ggplot(mapping = aes(predicted.state)) +
  geom_bar(aes(fill = ident), width = 0.5) +
  scale_x_discrete(labels = c("Homeostatic" = "Saline", "Reactive" = "LPS & Poly(i:c)")) +
  scale_y_continuous(expand = c(0,0), limits = c(0,390)) +
  ylab("Number of cells") +
  xlab("Predicted state") +
  scale_fill_manual(values = brewer.pal(n = 12, name = "Paired")[c(2,8,6)]) +
  theme_classic() +
  theme(
    legend.position = "right",
    axis.title = element_text(size = 18),
    axis.text.y = element_text(size = 12),
    axis.text.x = element_text(size = 13),
  )

Session info

sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.4 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/liblapack.so.3
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=zh_CN.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=zh_CN.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=zh_CN.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=zh_CN.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] epitools_0.5-10.1     cvms_1.3.3            RColorBrewer_1.1-3   
##  [4] patchwork_1.1.1       forcats_0.5.1         stringr_1.4.0        
##  [7] dplyr_1.0.9           purrr_0.3.4           readr_2.1.2          
## [10] tidyr_1.2.0           tibble_3.1.7          ggplot2_3.3.6        
## [13] tidyverse_1.3.1       SeuratDisk_0.0.0.9020 sp_1.5-0             
## [16] SeuratObject_4.1.0    Seurat_4.1.1         
## 
## loaded via a namespace (and not attached):
##   [1] readxl_1.4.0          backports_1.4.1       plyr_1.8.7           
##   [4] igraph_1.3.2          lazyeval_0.2.2        splines_4.0.5        
##   [7] listenv_0.8.0         scattermore_0.8       digest_0.6.29        
##  [10] htmltools_0.5.2       fansi_1.0.3           checkmate_2.1.0      
##  [13] magrittr_2.0.3        tensor_1.5            cluster_2.1.3        
##  [16] ROCR_1.0-11           tzdb_0.3.0            globals_0.15.0       
##  [19] modelr_0.1.8          matrixStats_0.62.0    spatstat.sparse_2.1-1
##  [22] colorspace_2.0-3      rvest_1.0.2           ggrepel_0.9.1        
##  [25] haven_2.5.0           xfun_0.31             crayon_1.5.1         
##  [28] jsonlite_1.8.0        progressr_0.10.1      spatstat.data_2.2-0  
##  [31] survival_3.3-1        zoo_1.8-10            glue_1.6.2           
##  [34] polyclip_1.10-0       gtable_0.3.0          leiden_0.4.2         
##  [37] future.apply_1.9.0    abind_1.4-5           scales_1.2.0         
##  [40] DBI_1.1.2             spatstat.random_2.2-0 miniUI_0.1.1.1       
##  [43] Rcpp_1.0.8.3          viridisLite_0.4.0     xtable_1.8-4         
##  [46] reticulate_1.25       spatstat.core_2.4-4   bit_4.0.4            
##  [49] htmlwidgets_1.5.4     httr_1.4.3            ellipsis_0.3.2       
##  [52] ica_1.0-2             farver_2.1.0          pkgconfig_2.0.3      
##  [55] sass_0.4.1            uwot_0.1.11           dbplyr_2.2.0         
##  [58] deldir_1.0-6          utf8_1.2.2            labeling_0.4.2       
##  [61] tidyselect_1.1.2      rlang_1.0.2           reshape2_1.4.4       
##  [64] later_1.3.0           munsell_0.5.0         cellranger_1.1.0     
##  [67] tools_4.0.5           cli_3.3.0             generics_0.1.2       
##  [70] broom_0.8.0           ggridges_0.5.3        evaluate_0.15        
##  [73] fastmap_1.1.0         yaml_2.3.5            goftest_1.2-3        
##  [76] fs_1.5.2              knitr_1.39            bit64_4.0.5          
##  [79] fitdistrplus_1.1-8    RANN_2.6.1            pbapply_1.5-0        
##  [82] future_1.26.1         nlme_3.1-158          mime_0.12            
##  [85] ggrastr_1.0.1         xml2_1.3.3            hdf5r_1.3.5          
##  [88] compiler_4.0.5        rstudioapi_0.13       beeswarm_0.4.0       
##  [91] plotly_4.10.0         png_0.1-7             spatstat.utils_2.3-1 
##  [94] reprex_2.0.1          bslib_0.3.1           stringi_1.7.6        
##  [97] highr_0.9             RSpectra_0.16-1       rgeos_0.5-9          
## [100] lattice_0.20-45       Matrix_1.4-1          vctrs_0.4.1          
## [103] pillar_1.7.0          lifecycle_1.0.1       spatstat.geom_2.4-0  
## [106] lmtest_0.9-40         jquerylib_0.1.4       RcppAnnoy_0.0.19     
## [109] data.table_1.14.2     cowplot_1.1.1         irlba_2.3.5          
## [112] httpuv_1.6.5          R6_2.5.1              promises_1.2.0.1     
## [115] KernSmooth_2.23-20    gridExtra_2.3         vipor_0.4.5          
## [118] parallelly_1.32.0     codetools_0.2-18      MASS_7.3-57          
## [121] assertthat_0.2.1      withr_2.5.0           sctransform_0.3.3    
## [124] mgcv_1.8-40           parallel_4.0.5        hms_1.1.1            
## [127] grid_4.0.5            rpart_4.1-15          rmarkdown_2.14       
## [130] Rtsne_0.16            shiny_1.7.1           lubridate_1.8.0      
## [133] ggbeeswarm_0.6.0