The following is an example of using the Vitessce widget to visualize a reference and mapped query dataset, with mapping performed by Seurat v4 and scripts from Azimuth.
First, install the R dependencies:
install.packages("Seurat")
install.packages("devtools")
install.packages("BiocManager")
BiocManager::install("glmGamPoi")
devtools::install_github("satijalab/azimuth")
devtools::install_github("mojaveazure/seurat-disk")
Download the dataset and map the query to the reference:
library(vitessceR)
library(Seurat)
library(Azimuth)
source("https://raw.githubusercontent.com/satijalab/azimuth/master/R/helpers.R")
library(Matrix)
# Download query dataset
url <- "https://www.dropbox.com/s/cmbvq2og93lnl9z/pbmc_10k_v3_filtered_feature_bc_matrix.h5?dl=1"
data_dir <- file.path("data", "azimuth")
h5_file <- file.path(data_dir, "pbmc_10k_v3_filtered_feature_bc_matrix.h5")
dir.create(data_dir, showWarnings = FALSE)
if(!file.exists(h5_file)) {
download.file(url, destfile = h5_file)
}
# Download the reference
# Change the file path based on where the reference is located on your system.
reference <- LoadReference(path = "https://seurat.nygenome.org/azimuth/references/v1.0.0/human_pbmc", seconds = 30L)
# Load the query object for mapping
# Change the file path based on where the query file is located on your system.
query <- LoadFileInput(path = h5_file)
# Calculate nCount_RNA and nFeature_RNA if the query does not
# contain them already
if (!all(c("nCount_RNA", "nFeature_RNA") %in% c(colnames(x = query[[]])))) {
calcn <- as.data.frame(x = Seurat:::CalcN(object = query))
colnames(x = calcn) <- paste(
colnames(x = calcn),
"RNA",
sep = '_'
)
query <- AddMetaData(
object = query,
metadata = calcn
)
rm(calcn)
}
# Calculate percent mitochondrial genes if the query contains genes
# matching the regular expression "^MT-"
if (any(grepl(pattern = '^MT-', x = rownames(x = query)))) {
query <- PercentageFeatureSet(
object = query,
pattern = '^MT-',
col.name = 'percent.mt',
assay = "RNA"
)
}
# Filter cells based on the thresholds for nCount_RNA and nFeature_RNA
# you set in the app
cells.use <- query[["nCount_RNA", drop = TRUE]] <= 79534 &
query[["nCount_RNA", drop = TRUE]] >= 501 &
query[["nFeature_RNA", drop = TRUE]] <= 7211 &
query[["nFeature_RNA", drop = TRUE]] >= 54
# If the query contains mitochondrial genes, filter cells based on the
# thresholds for percent.mt you set in the app
if ("percent.mt" %in% c(colnames(x = query[[]]))) {
cells.use <- cells.use & (query[["percent.mt", drop = TRUE]] <= 97 &
query[["percent.mt", drop = TRUE]] >= 0)
}
# Remove filtered cells from the query
query <- query[, cells.use]
# Preprocess with SCTransform
query <- SCTransform(
object = query,
assay = "RNA",
new.assay.name = "refAssay",
residual.features = rownames(x = reference$map),
reference.SCT.model = reference$map[["refAssay"]]@SCTModel.list$refmodel,
method = 'glmGamPoi',
ncells = 2000,
n_genes = 2000,
do.correct.umi = FALSE,
do.scale = FALSE,
do.center = TRUE
)
# Find anchors between query and reference
anchors <- FindTransferAnchors(
reference = reference$map,
query = query,
k.filter = NA,
reference.neighbors = "refdr.annoy.neighbors",
reference.assay = "refAssay",
query.assay = "refAssay",
reference.reduction = "refDR",
normalization.method = "SCT",
features = intersect(rownames(x = reference$map), VariableFeatures(object = query)),
dims = 1:50,
n.trees = 20,
mapping.score.k = 100
)
# Transfer cell type labels and impute protein expression
#
# Transferred labels are in metadata columns named "predicted.*"
# The maximum prediction score is in a metadata column named "predicted.*.score"
# The prediction scores for each class are in an assay named "prediction.score.*"
# The imputed assay is named "impADT" if computed
refdata <- lapply(X = "celltype.l2", function(x) {
reference$map[[x, drop = TRUE]]
})
names(x = refdata) <- "celltype.l2"
if (TRUE) {
refdata[["impADT"]] <- GetAssayData(
object = reference$map[['ADT']],
slot = 'data'
)
}
query <- TransferData(
reference = reference$map,
query = query,
dims = 1:50,
anchorset = anchors,
refdata = refdata,
n.trees = 20,
store.weights = TRUE
)
# Calculate the embeddings of the query data on the reference SPCA
query <- IntegrateEmbeddings(
anchorset = anchors,
reference = reference$map,
query = query,
reductions = "pcaproject",
reuse.weights.matrix = TRUE
)
# Calculate the query neighbors in the reference
# with respect to the integrated embeddings
query[["query_ref.nn"]] <- FindNeighbors(
object = Embeddings(reference$map[["refDR"]])[, 1:50],
query = Embeddings(query[["integrated_dr"]]),
return.neighbor = TRUE,
l2.norm = TRUE,
n.trees = 20
)
# The reference used in the app is downsampled compared to the reference on which
# the UMAP model was computed. This step, using the helper function NNTransform,
# corrects the Neighbors to account for the downsampling.
query <- NNTransform(
object = query,
meta.data = reference$map[[]]
)
# Project the query to the reference UMAP.
query[["umap.proj"]] <- RunUMAP(
object = query[["query_ref.nn"]],
reduction.model = reference$map[["refUMAP"]],
reduction.key = 'UMAP_'
)
# Calculate mapping score and add to metadata
query <- AddMetaData(
object = query,
metadata = MappingScore(anchors = anchors),
col.name = "mapping.score"
)
ref_obj <- reference$plot
qry_obj <- query
# Trick SeuratDisk into saving the UMAP even though it is not based on an internal assay
ref_obj@reductions$refUMAP@assay.used <- "RNA"
#### Use Vitessce for visualization ####
ref_adata_path <- file.path(data_dir, "ref.h5ad.zarr")
qry_adata_path <- file.path(data_dir, "qry.h5ad.zarr")
vitessceAnalysisR::seurat_to_anndata_zarr(ref_obj, ref_adata_path, assay = Seurat::DefaultAssay(ref_obj))
vitessceAnalysisR::seurat_to_anndata_zarr(qry_obj, qry_adata_path, assay = Seurat::DefaultAssay(qry_obj))
# Create Vitessce view config
vc <- VitessceConfig$new(schema_version = "1.0.16", name = "Azimuth")
ref_dataset <- vc$add_dataset("Reference")$add_object(
AnnDataWrapper$new(
adata_path = ref_adata_path,
obs_embedding_paths = c("obsm/X_refUMAP"),
obs_embedding_names = c("UMAP"),
obs_set_paths = c("obs/celltype.l2")
)
)
qry_dataset <- vc$add_dataset("Query")$add_object(
AnnDataWrapper$new(
adata_path = qry_adata_path,
obs_embedding_paths = c("obsm/X_umap.proj"),
obs_embedding_names = c("UMAP"),
obs_set_paths = c("obs/predicted.celltype.l2"),
obs_set_names = c("celltype.l2"),
obs_set_score_paths = c("obs/predicted.celltype.l2.score")
)
)
ref_plot <- vc$add_view(ref_dataset, Component$SCATTERPLOT, mapping = "UMAP")
qry_plot <- vc$add_view(qry_dataset, Component$SCATTERPLOT, mapping = "UMAP")
cell_sets <- vc$add_view(ref_dataset, Component$OBS_SETS)
cell_sets_2 <- vc$add_view(qry_dataset, Component$OBS_SETS)
vc$link_views(
c(ref_plot, qry_plot),
c(CoordinationType$EMBEDDING_ZOOM, CoordinationType$EMBEDDING_TARGET_X, CoordinationType$EMBEDDING_TARGET_Y),
c_values = c(1, 0, 0)
)
vc$layout(hconcat(vconcat(ref_plot, qry_plot), vconcat(cell_sets, cell_sets_2)))
# Render the Vitessce widget
vc$widget(theme = "light")