
Cell-Type Deconvolution with VISTA (airway)
VISTA Development Team
VISTA-deconvolution.RmdOverview
This vignette demonstrates how to run cell-type
deconvolution with VISTA using the Bioconductor
airway dataset.
Workflow covered:
- Build a
VISTAobject from airway counts and metadata. - Run
run_cell_deconvolution()(xCell2 backend). - Extract cell-fraction estimates with
get_cell_fractions(). - Visualize sample-level composition with
plot_celltype_barplot(). - Compare treatment groups with
get_celltype_group_dotplot(). - Inspect cell-type/sample structure with
get_celltype_heatmap().
Prepare Airway Data
data("airway", package = "airway")
counts_matrix <- SummarizedExperiment::assay(airway, "counts")
sample_metadata <- as.data.frame(SummarizedExperiment::colData(airway))
# Keep sample IDs explicit for VISTA alignment.
sample_metadata$sample_names <- rownames(sample_metadata)
sample_metadata$cond_long <- ifelse(sample_metadata$dex == "trt", "dexamethasone", "untreated")
count_data <- as.data.frame(counts_matrix) %>%
tibble::rownames_to_column("gene_id")
# Ensure column order follows sample_metadata.
count_data <- count_data[, c("gene_id", sample_metadata$sample_names)]
dim(count_data)
#> [1] 63677 9
sample_metadata[, c("sample_names", "cell", "dex", "cond_long")]
#> sample_names cell dex cond_long
#> SRR1039508 SRR1039508 N61311 untrt untreated
#> SRR1039509 SRR1039509 N61311 trt dexamethasone
#> SRR1039512 SRR1039512 N052611 untrt untreated
#> SRR1039513 SRR1039513 N052611 trt dexamethasone
#> SRR1039516 SRR1039516 N080611 untrt untreated
#> SRR1039517 SRR1039517 N080611 trt dexamethasone
#> SRR1039520 SRR1039520 N061011 untrt untreated
#> SRR1039521 SRR1039521 N061011 trt dexamethasoneCreate VISTA Object
vista_airway <- create_vista(
counts = count_data,
sample_info = sample_metadata,
column_geneid = "gene_id",
group_column = "cond_long",
group_numerator = "dexamethasone",
group_denominator = "untreated",
covariates = "cell",
method = "deseq2",
min_counts = 10,
min_replicates = 1
)
# Add gene annotations (used for downstream interpretation and fallback symbol mapping).
vista_airway <- set_rowdata(
vista_airway,
orgdb = org.Hs.eg.db,
columns = c("SYMBOL", "GENENAME", "ENTREZID"),
keytype = "ENSEMBL"
)
vista_airway
#> class: VISTA
#> dim: 18086 8
#> metadata(12): de_results de_summary ... design comparison
#> assays(1): norm_counts
#> rownames(18086): ENSG00000000003 ENSG00000000419 ... ENSG00000273487
#> ENSG00000273488
#> rowData names(4): baseMean SYMBOL GENENAME ENTREZID
#> colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
#> colData names(12): SampleName cell ... sizeFactor sample_namesRun Deconvolution
run_cell_deconvolution() currently uses xCell2.
If xCell2 is unavailable, this vignette will skip
deconvolution sections.
cat("Package 'xCell2' is not installed; deconvolution steps are skipped.\n")
cat("Install it to run these sections:\n")
cat(" BiocManager::install('xCell2')\n")
# First try default threshold, then relax minSharedGenes if needed.
deconv_try <- tryCatch(
run_cell_deconvolution(
x = vista_airway,
method = "xCell2",
gene_id_type = "ensembl"
),
error = function(e) e
)
if (inherits(deconv_try, "error")) {
msg <- conditionMessage(deconv_try)
if (grepl("minSharedGenes", msg, fixed = TRUE)) {
message("Retrying xCell2 deconvolution with minSharedGenes = 0.6")
deconv_try <- tryCatch(
run_cell_deconvolution(
x = vista_airway,
method = "xCell2",
gene_id_type = "ensembl",
minSharedGenes = 0.6
),
error = function(e) e
)
}
}
if (inherits(deconv_try, "error")) {
has_deconv <- FALSE
message("Deconvolution could not be completed in this environment:\n", conditionMessage(deconv_try))
} else {
vista_deconv <- deconv_try
has_deconv <- TRUE
vista_deconv
}
#> class: VISTA
#> dim: 18086 8
#> metadata(13): de_results de_summary ... comparison cell_fractions
#> assays(1): norm_counts
#> rownames(18086): ENSG00000000003 ENSG00000000419 ... ENSG00000273487
#> ENSG00000273488
#> rowData names(4): baseMean SYMBOL GENENAME ENTREZID
#> colnames(8): SRR1039508 SRR1039509 ... SRR1039520 SRR1039521
#> colData names(12): SampleName cell ... sizeFactor sample_namesInspect Cell Fractions
cell_fractions <- get_cell_fractions(vista_deconv)
dim(cell_fractions)
#> [1] 8 43
cell_fractions[1:min(4, nrow(cell_fractions)), 1:min(6, ncol(cell_fractions))]
#> neutrophil monocyte megakaryocyte-erythroid progenitor cell
#> SRR1039508 2.746005e-22 0.000000e+00 0.000000e+00
#> SRR1039509 4.648547e-23 0.000000e+00 1.157681e-05
#> SRR1039512 0.000000e+00 1.220985e-20 5.108035e-05
#> SRR1039513 4.611114e-22 0.000000e+00 1.347784e-05
#> CD4-positive, alpha-beta T cell regulatory T cell
#> SRR1039508 2.783925e-05 1.816553e-24
#> SRR1039509 3.985213e-05 0.000000e+00
#> SRR1039512 1.816519e-05 0.000000e+00
#> SRR1039513 2.921939e-05 0.000000e+00
#> central memory CD4-positive, alpha-beta T cell
#> SRR1039508 0.000000e+00
#> SRR1039509 6.472498e-31
#> SRR1039512 2.746680e-23
#> SRR1039513 0.000000e+00Plot Cell-Type Composition
plot_celltype_barplot(
x = vista_deconv,
group_column = "cond_long",
top_n = 12,
collapse_other = TRUE,
normalize = "sample",
facet_by_group = TRUE,
font_size = 11
)
Group-Level Dot Plot
This plot summarizes deconvolution signal by treatment group while keeping sample-level points visible.
get_celltype_group_dotplot(
x = vista_deconv,
group_column = "cond_long",
top_n = 12,
summary_fun = "mean",
error = "se",
add_points = TRUE,
point_size = 2.5,
font_size = 11
)
Cell-Type Heatmap
The heatmap is useful to inspect sample-level deconvolution structure and concordance within groups.
get_celltype_heatmap(
x = vista_deconv,
group_column = "cond_long",
top_n = 20,
transform = "zscore",
cluster_rows = TRUE,
cluster_columns = FALSE,
show_values = FALSE,
font_size = 10
)
Notes on Interpretation
- xCell2 outputs are typically enrichment-like abundance scores rather than absolute percentages.
- Use group-level differences and consistency across replicates as the primary interpretation signal.
- Treat results as hypothesis-generating; validate with orthogonal assays when possible.
Session Info
sessionInfo()
#> R version 4.5.2 (2025-10-31)
#> Platform: x86_64-pc-linux-gnu
#> Running under: Ubuntu 24.04.3 LTS
#>
#> Matrix products: default
#> BLAS: /usr/lib/x86_64-linux-gnu/openblas-pthread/libblas.so.3
#> LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.26.so; LAPACK version 3.12.0
#>
#> locale:
#> [1] LC_CTYPE=C.UTF-8 LC_NUMERIC=C LC_TIME=C.UTF-8
#> [4] LC_COLLATE=C.UTF-8 LC_MONETARY=C.UTF-8 LC_MESSAGES=C.UTF-8
#> [7] LC_PAPER=C.UTF-8 LC_NAME=C LC_ADDRESS=C
#> [10] LC_TELEPHONE=C LC_MEASUREMENT=C.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: UTC
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats4 stats graphics grDevices datasets utils methods
#> [8] base
#>
#> other attached packages:
#> [1] org.Hs.eg.db_3.22.0 AnnotationDbi_1.72.0
#> [3] ggplot2_4.0.2 tidyr_1.3.2
#> [5] tibble_3.3.1 dplyr_1.2.0
#> [7] airway_1.30.0 SummarizedExperiment_1.40.0
#> [9] Biobase_2.70.0 GenomicRanges_1.62.1
#> [11] Seqinfo_1.0.0 IRanges_2.44.0
#> [13] S4Vectors_0.48.0 BiocGenerics_0.56.0
#> [15] generics_0.1.4 MatrixGenerics_1.22.0
#> [17] matrixStats_1.5.0 VISTA_0.99.0
#> [19] BiocStyle_2.38.0
#>
#> loaded via a namespace (and not attached):
#> [1] splines_4.5.2 filelock_1.0.3
#> [3] ggplotify_0.1.3 R.oo_1.27.1
#> [5] polyclip_1.10-7 graph_1.88.1
#> [7] minpack.lm_1.2-4 XML_3.99-0.22
#> [9] httr2_1.2.2 lifecycle_1.0.5
#> [11] rstatix_0.7.3 edgeR_4.8.2
#> [13] lattice_0.22-7 MASS_7.3-65
#> [15] backports_1.5.0 magrittr_2.0.4
#> [17] limma_3.66.0 sass_0.4.10
#> [19] rmarkdown_2.30 jquerylib_0.1.4
#> [21] yaml_2.3.12 otel_0.2.0
#> [23] ggtangle_0.1.1 cowplot_1.2.0
#> [25] DBI_1.2.3 RColorBrewer_1.1-3
#> [27] abind_1.4-8 quadprog_1.5-8
#> [29] purrr_1.2.1 R.utils_2.13.0
#> [31] msigdbr_25.1.1 pracma_2.4.6
#> [33] yulab.utils_0.2.4 tweenr_2.0.3
#> [35] rappdirs_0.3.4 gdtools_0.5.0
#> [37] enrichplot_1.30.4 ggrepel_0.9.6
#> [39] tidytree_0.4.7 annotate_1.88.0
#> [41] pkgdown_2.2.0 codetools_0.2-20
#> [43] DelayedArray_0.36.0 DOSE_4.4.0
#> [45] ggforce_0.5.0 tidyselect_1.2.1
#> [47] aplot_0.2.9 farver_2.1.2
#> [49] BiocFileCache_3.0.0 jsonlite_2.0.0
#> [51] Formula_1.2-5 systemfonts_1.3.1
#> [53] progress_1.2.3 tools_4.5.2
#> [55] ggnewscale_0.5.2 treeio_1.34.0
#> [57] xCell2_1.2.3 ragg_1.5.0
#> [59] Rcpp_1.1.1 glue_1.8.0
#> [61] SparseArray_1.10.8 xfun_0.56
#> [63] DESeq2_1.50.2 qvalue_2.42.0
#> [65] withr_3.0.2 BiocManager_1.30.27
#> [67] fastmap_1.2.0 GGally_2.4.0
#> [69] digest_0.6.39 R6_2.6.1
#> [71] gridGraphics_0.5-1 textshaping_1.0.4
#> [73] colorspace_2.1-2 GO.db_3.22.0
#> [75] RSQLite_2.4.6 R.methodsS3_1.8.2
#> [77] fontLiberation_0.1.0 renv_1.1.4
#> [79] data.table_1.18.2.1 prettyunits_1.2.0
#> [81] httr_1.4.8 htmlwidgets_1.6.4
#> [83] S4Arrays_1.10.1 ontologyIndex_2.12
#> [85] scatterpie_0.2.6 ggstats_0.12.0
#> [87] pkgconfig_2.0.3 gtable_0.3.6
#> [89] blob_1.3.0 S7_0.2.1
#> [91] SingleCellExperiment_1.32.0 XVector_0.50.0
#> [93] clusterProfiler_4.18.4 htmltools_0.5.9
#> [95] fontBitstreamVera_0.1.1 carData_3.0-6
#> [97] bookdown_0.46 fgsea_1.36.2
#> [99] zigg_0.0.2 GSEABase_1.72.0
#> [101] scales_1.4.0 png_0.1-8
#> [103] ggfun_0.2.0 knitr_1.51
#> [105] tzdb_0.5.0 reshape2_1.4.5
#> [107] nlme_3.1-168 curl_7.0.0
#> [109] cachem_1.1.0 stringr_1.6.0
#> [111] BiocVersion_3.22.0 parallel_4.5.2
#> [113] desc_1.4.3 pillar_1.11.1
#> [115] grid_4.5.2 vctrs_0.7.1
#> [117] ggpubr_0.6.3 car_3.1-5
#> [119] tidydr_0.0.6 dbplyr_2.5.2
#> [121] xtable_1.8-8 cluster_2.1.8.1
#> [123] singscore_1.30.0 evaluate_1.0.5
#> [125] readr_2.2.0 cli_3.6.5
#> [127] locfit_1.5-9.12 compiler_4.5.2
#> [129] rlang_1.1.7 crayon_1.5.3
#> [131] ggsignif_0.6.4 labeling_0.4.3
#> [133] plyr_1.8.9 fs_1.6.6
#> [135] ggiraph_0.9.6 stringi_1.8.7
#> [137] viridisLite_0.4.3 BiocParallel_1.44.0
#> [139] assertthat_0.2.1 babelgene_22.9
#> [141] Biostrings_2.78.0 lazyeval_0.2.2
#> [143] GOSemSim_2.36.0 fontquiver_0.2.1
#> [145] Matrix_1.7-4 hms_1.1.4
#> [147] patchwork_1.3.2 bit64_4.6.0-1
#> [149] KEGGREST_1.50.0 statmod_1.5.1
#> [151] AnnotationHub_4.0.0 Rfast_2.1.5.2
#> [153] igraph_2.2.2 broom_1.0.12
#> [155] memoise_2.0.1 RcppParallel_5.1.11-1
#> [157] bslib_0.10.0 ggtree_4.0.4
#> [159] fastmatch_1.1-8 bit_4.6.0
#> [161] ape_5.8-1 gson_0.1.0