scGOclust
is a package that leverages Gene Ontology to
analyse the functional profile of cells with scRNA-seq data.
# load required libraries
library(Seurat)
#> Attaching SeuratObject
library(pheatmap)
library(httr)
## if (!require("devtools")) install.packages("devtools")
## install latest from source
## for reprodubcibility we do not update dependencies
# devtools::install_github("YY-SONG0718/scGOclust", upgrade_dependencies = FALSE)
library(scGOclust)
# get a gene to GO BP terms mapping table
# remove electronically inferred records
# sometimes ensembl complains about ssh certificate has expired, this is a known issue, run this code
::set_config(httr::config(ssl_verifypeer = FALSE))
httr
#mmu_tbl = ensemblToGo(species = 'mmusculus', GO_linkage_type = c('experimental', 'phylogenetic', 'computational', 'author', 'curator' ))
#dme_tbl = ensemblToGo(species = 'dmelanogaster', GO_linkage_type = c('experimental', 'phylogenetic', 'computational', 'author', 'curator' ))
# here we load the example data for convenience
data(mmu_tbl)
data(dme_tbl)
# load the gene expression raw count objects
data(mmu_subset)
data(dme_subset)
ls()
#> [1] "dme_subset" "dme_tbl" "mmu_subset" "mmu_tbl"
## construct a Seurat object with GO BP as features
<- makeGOSeurat(ensembl_to_GO = mmu_tbl, feature_type = 'external_gene_name', seurat_obj = mmu_subset)
mmu_go_obj #> collect data
#> compute GO to cell matrix, might take a few secs
#> time used: 0.67 secs
#> returning GO Seurat object
<- makeGOSeurat(ensembl_to_GO = dme_tbl, feature_type = 'external_gene_name', seurat_obj = dme_subset)
dme_go_obj #> collect data
#> compute GO to cell matrix, might take a few secs
#> time used: 0.21 secs
#> returning GO Seurat object
# specify the column with cell type annotation in seurat_obj@meta.data
<- getCellTypeGO(go_seurat_obj = mmu_go_obj, cell_type_col = 'cell_type_annotation')
mmu_ct_go #> perform normalization and log1p for mmu_go_obj
#> Centering and scaling data matrix
<- getCellTypeGO(go_seurat_obj = dme_go_obj, cell_type_col = 'annotation')
dme_ct_go #> perform normalization and log1p for dme_go_obj
#> Centering and scaling data matrix
# heatmap of Pearson's correlation coefficient of cell type average BP profiles within species
= cellTypeGOCorr(cell_type_go = mmu_ct_go, corr_method = 'pearson')
mmu_corr pheatmap(mmu_corr)
= cellTypeGOCorr(cell_type_go = dme_ct_go, corr_method = 'pearson')
dme_corr pheatmap(dme_corr)
# calculate Pearson's correlation coefficient of cell type average BP profiles across species
= crossSpeciesCellTypeGOCorr(species_1 = 'mmusculus', species_2 = 'dmelanogaster', cell_type_go_sp1 = mmu_ct_go, cell_type_go_sp2 = dme_ct_go, corr_method = 'pearson') corr
# cross-species cell type profile heatmap
pheatmap(corr, width = 9, height = 10)
pheatmap(corr, scale = 'column', width = 9, height = 10)
# sheatmap tries to put cells with higher values on the diagonal
# helpful when cross-species cell type similarity signal is less clear
::sheatmap((corr + 0.5), width = 9, height = 10) slanter
# scale by row or column to see relative similarity
::sheatmap((corr + 0.5), scale = 'column', width = 9, height = 10) slanter
# analyze the cell-by-GO BP profile as a count matrix
= analyzeGOSeurat(go_seurat_obj = mmu_go_obj, cell_type_col = 'cell_type_annotation')
mmu_go_analyzed #> perform normalization and log1p for mmu_go_obj
#> Computing nearest neighbor graph
#> Computing SNN
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#>
#> Number of nodes: 219
#> Number of edges: 9890
#>
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.4728
#> Number of communities: 4
#> Elapsed time: 0 seconds
#> Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
#> To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
#> This message will be shown once per session
#> 23:44:45 UMAP embedding parameters a = 0.9922 b = 1.112
#> 23:44:45 Read 219 rows and found 50 numeric columns
#> 23:44:45 Using Annoy for neighbor search, n_neighbors = 30
#> 23:44:45 Building Annoy index with metric = cosine, n_trees = 50
#> 0% 10 20 30 40 50 60 70 80 90 100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 23:44:45 Writing NN index file to temp file /var/folders/37/wf962dk574750g0xnnlxjwvm0000gp/T//RtmpsSBDIl/filee12716dbcd69
#> 23:44:45 Searching Annoy index using 1 thread, search_k = 3000
#> 23:44:45 Annoy recall = 100%
#> 23:44:45 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
#> 23:44:46 Initializing from normalized Laplacian + noise (using irlba)
#> 23:44:46 Commencing optimization for 500 epochs, with 7820 positive edges
#> 23:44:47 Optimization finished
# UMAP plot of the analyzed cell-by-GO BP profile
# labeled by previously specified cell annotation column in meta.data
DimPlot(mmu_go_analyzed, label = TRUE) + NoLegend()
= analyzeGOSeurat(go_seurat_obj = dme_go_obj, cell_type_col = 'annotation')
dme_go_analyzed #> perform normalization and log1p for dme_go_obj
#> Computing nearest neighbor graph
#> Computing SNN
#> Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
#>
#> Number of nodes: 180
#> Number of edges: 6418
#>
#> Running Louvain algorithm...
#> Maximum modularity in 10 random starts: 0.5467
#> Number of communities: 4
#> Elapsed time: 0 seconds
#> 23:44:47 UMAP embedding parameters a = 0.9922 b = 1.112
#> 23:44:47 Read 180 rows and found 50 numeric columns
#> 23:44:47 Using Annoy for neighbor search, n_neighbors = 30
#> 23:44:47 Building Annoy index with metric = cosine, n_trees = 50
#> 0% 10 20 30 40 50 60 70 80 90 100%
#> [----|----|----|----|----|----|----|----|----|----|
#> **************************************************|
#> 23:44:47 Writing NN index file to temp file /var/folders/37/wf962dk574750g0xnnlxjwvm0000gp/T//RtmpsSBDIl/filee12755a8512d
#> 23:44:47 Searching Annoy index using 1 thread, search_k = 3000
#> 23:44:47 Annoy recall = 100%
#> 23:44:48 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
#> 23:44:48 Initializing from normalized Laplacian + noise (using irlba)
#> 23:44:48 Commencing optimization for 500 epochs, with 6610 positive edges
#> 23:44:49 Optimization finished
DimPlot(dme_go_analyzed, label = TRUE) + NoLegend()
## calculation takes a few minutes due to the Wilcoxon signed rank test
= getCellTypeSharedGO(species_1 = 'mmusculus', species_2 = 'dmelanogaster', analyzed_go_seurat_sp1 = mmu_go_analyzed, analyzed_go_seurat_sp2 = dme_go_analyzed, cell_type_col_sp1 = 'cell_type_annotation', cell_type_col_sp2 = 'annotation')
ct_shared_go
head(ct_shared_go)
# query shared GO terms for specific cell type pairs
getCellTypeSharedTerms(shared_go = ct_shared_go,
cell_type_sp1 = 'intestine_Enteroendocrine cell',
cell_type_sp2 = 'enteroendocrine cell',
return_full = FALSE)
sessionInfo()
#> R version 4.1.2 (2021-11-01)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur 10.16
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
#>
#> locale:
#> [1] C/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] scGOclust_0.1.2 httr_1.4.5 pheatmap_1.0.12 SeuratObject_4.1.3
#> [5] Seurat_4.3.0
#>
#> loaded via a namespace (and not attached):
#> [1] BiocFileCache_2.2.1 plyr_1.8.8 igraph_1.4.1
#> [4] lazyeval_0.2.2 sp_1.6-0 splines_4.1.2
#> [7] listenv_0.9.0 scattermore_0.8 GenomeInfoDb_1.30.1
#> [10] ggplot2_3.4.1 digest_0.6.31 htmltools_0.5.5
#> [13] fansi_1.0.4 magrittr_2.0.3 memoise_2.0.1
#> [16] tensor_1.5 cluster_2.1.3 ROCR_1.0-11
#> [19] limma_3.50.3 globals_0.16.2 Biostrings_2.62.0
#> [22] matrixStats_0.63.0 spatstat.sparse_3.0-1 prettyunits_1.1.1
#> [25] colorspace_2.1-0 rappdirs_0.3.3 blob_1.2.3
#> [28] ggrepel_0.9.3 xfun_0.38 dplyr_1.1.1
#> [31] crayon_1.5.2 RCurl_1.98-1.6 jsonlite_1.8.4
#> [34] progressr_0.13.0 spatstat.data_3.0-1 survival_3.3-1
#> [37] zoo_1.8-11 glue_1.6.2 slanter_0.2-0
#> [40] polyclip_1.10-4 gtable_0.3.3 zlibbioc_1.40.0
#> [43] XVector_0.34.0 leiden_0.4.3 future.apply_1.10.0
#> [46] BiocGenerics_0.40.0 abind_1.4-5 scales_1.2.1
#> [49] DBI_1.1.2 spatstat.random_3.1-4 miniUI_0.1.1.1
#> [52] Rcpp_1.0.10 progress_1.2.2 viridisLite_0.4.1
#> [55] xtable_1.8-4 reticulate_1.28 bit_4.0.4
#> [58] stats4_4.1.2 htmlwidgets_1.6.2 RColorBrewer_1.1-3
#> [61] ellipsis_0.3.2 ica_1.0-3 farver_2.1.1
#> [64] pkgconfig_2.0.3 XML_3.99-0.9 dbplyr_2.1.1
#> [67] sass_0.4.5 uwot_0.1.14 deldir_1.0-6
#> [70] utf8_1.2.3 labeling_0.4.2 tidyselect_1.2.0
#> [73] rlang_1.1.0 reshape2_1.4.4 later_1.3.0
#> [76] AnnotationDbi_1.56.2 munsell_0.5.0 tools_4.1.2
#> [79] cachem_1.0.7 cli_3.6.1 generics_0.1.3
#> [82] RSQLite_2.2.18 ggridges_0.5.4 evaluate_0.20
#> [85] stringr_1.5.0 fastmap_1.1.1 yaml_2.3.7
#> [88] goftest_1.2-3 knitr_1.42 bit64_4.0.5
#> [91] fitdistrplus_1.1-8 purrr_1.0.1 RANN_2.6.1
#> [94] KEGGREST_1.34.0 pbapply_1.7-0 future_1.32.0
#> [97] nlme_3.1-157 mime_0.12 pracma_2.4.2
#> [100] xml2_1.3.3 biomaRt_2.50.3 compiler_4.1.2
#> [103] rstudioapi_0.13 filelock_1.0.2 curl_5.0.0
#> [106] plotly_4.10.1 png_0.1-8 spatstat.utils_3.0-2
#> [109] tibble_3.2.1 bslib_0.4.2 stringi_1.7.12
#> [112] highr_0.10 lattice_0.20-45 Matrix_1.5-1
#> [115] vctrs_0.6.1 networkD3_0.4 pillar_1.9.0
#> [118] lifecycle_1.0.3 spatstat.geom_3.1-0 lmtest_0.9-40
#> [121] jquerylib_0.1.4 RcppAnnoy_0.0.20 data.table_1.14.8
#> [124] cowplot_1.1.1 bitops_1.0-7 irlba_2.3.5.1
#> [127] httpuv_1.6.9 patchwork_1.1.2 R6_2.5.1
#> [130] promises_1.2.0.1 KernSmooth_2.23-20 gridExtra_2.3
#> [133] IRanges_2.28.0 parallelly_1.35.0 codetools_0.2-18
#> [136] assertthat_0.2.1 MASS_7.3-57 withr_2.5.0
#> [139] sctransform_0.3.5 GenomeInfoDbData_1.2.7 S4Vectors_0.32.4
#> [142] hms_1.1.1 parallel_4.1.2 grid_4.1.2
#> [145] tidyr_1.3.0 rmarkdown_2.21 Rtsne_0.16
#> [148] spatstat.explore_3.1-0 Biobase_2.54.0 shiny_1.7.4