Usage with Seurat: Reference Mapping • vitessceR

The following is an example of using the Vitessce widget to visualize a reference and mapped query dataset, with mapping performed by Seurat v4 and scripts from Azimuth.

First, install the R dependencies:

install.packages("Seurat")
install.packages("devtools")
install.packages("BiocManager")
BiocManager::install("glmGamPoi")
devtools::install_github("satijalab/azimuth")
devtools::install_github("mojaveazure/seurat-disk")

Download the dataset and map the query to the reference:

library(vitessceR)
library(Seurat)
library(Azimuth)
source("https://raw.githubusercontent.com/satijalab/azimuth/master/R/helpers.R")
library(Matrix)

# Download query dataset
url <- "https://www.dropbox.com/s/cmbvq2og93lnl9z/pbmc_10k_v3_filtered_feature_bc_matrix.h5?dl=1"
data_dir <- file.path("data", "azimuth")
h5_file <- file.path(data_dir, "pbmc_10k_v3_filtered_feature_bc_matrix.h5")
dir.create(data_dir, showWarnings = FALSE)
if(!file.exists(h5_file)) {
  download.file(url, destfile = h5_file)
}

# Download the reference
# Change the file path based on where the reference is located on your system.
reference <- LoadReference(path = "https://seurat.nygenome.org/azimuth/references/v1.0.0/human_pbmc", seconds = 30L)

# Load the query object for mapping
# Change the file path based on where the query file is located on your system.
query <- LoadFileInput(path = h5_file)

# Calculate nCount_RNA and nFeature_RNA if the query does not
# contain them already
if (!all(c("nCount_RNA", "nFeature_RNA") %in% c(colnames(x = query[[]])))) {
    calcn <- as.data.frame(x = Seurat:::CalcN(object = query))
    colnames(x = calcn) <- paste(
      colnames(x = calcn),
      "RNA",
      sep = '_'
    )
    query <- AddMetaData(
      object = query,
      metadata = calcn
    )
    rm(calcn)
}

# Calculate percent mitochondrial genes if the query contains genes
# matching the regular expression "^MT-"
if (any(grepl(pattern = '^MT-', x = rownames(x = query)))) {
  query <- PercentageFeatureSet(
    object = query,
    pattern = '^MT-',
    col.name = 'percent.mt',
    assay = "RNA"
  )
}

# Filter cells based on the thresholds for nCount_RNA and nFeature_RNA
# you set in the app
cells.use <- query[["nCount_RNA", drop = TRUE]] <= 79534 &
  query[["nCount_RNA", drop = TRUE]] >= 501 &
  query[["nFeature_RNA", drop = TRUE]] <= 7211 &
  query[["nFeature_RNA", drop = TRUE]] >= 54

# If the query contains mitochondrial genes, filter cells based on the
# thresholds for percent.mt you set in the app
if ("percent.mt" %in% c(colnames(x = query[[]]))) {
  cells.use <- cells.use & (query[["percent.mt", drop = TRUE]] <= 97 &
    query[["percent.mt", drop = TRUE]] >= 0)
}

# Remove filtered cells from the query
query <- query[, cells.use]

# Preprocess with SCTransform
query <- SCTransform(
  object = query,
  assay = "RNA",
  new.assay.name = "refAssay",
  residual.features = rownames(x = reference$map),
  reference.SCT.model = reference$map[["refAssay"]]@SCTModel.list$refmodel,
  method = 'glmGamPoi',
  ncells = 2000,
  n_genes = 2000,
  do.correct.umi = FALSE,
  do.scale = FALSE,
  do.center = TRUE
)

# Find anchors between query and reference
anchors <- FindTransferAnchors(
  reference = reference$map,
  query = query,
  k.filter = NA,
  reference.neighbors = "refdr.annoy.neighbors",
  reference.assay = "refAssay",
  query.assay = "refAssay",
  reference.reduction = "refDR",
  normalization.method = "SCT",
  features = intersect(rownames(x = reference$map), VariableFeatures(object = query)),
  dims = 1:50,
  n.trees = 20,
  mapping.score.k = 100
)

# Transfer cell type labels and impute protein expression
#
# Transferred labels are in metadata columns named "predicted.*"
# The maximum prediction score is in a metadata column named "predicted.*.score"
# The prediction scores for each class are in an assay named "prediction.score.*"
# The imputed assay is named "impADT" if computed

refdata <- lapply(X = "celltype.l2", function(x) {
  reference$map[[x, drop = TRUE]]
})
names(x = refdata) <- "celltype.l2"
if (TRUE) {
  refdata[["impADT"]] <- GetAssayData(
    object = reference$map[['ADT']],
    slot = 'data'
  )
}
query <- TransferData(
  reference = reference$map,
  query = query,
  dims = 1:50,
  anchorset = anchors,
  refdata = refdata,
  n.trees = 20,
  store.weights = TRUE
)

# Calculate the embeddings of the query data on the reference SPCA
query <- IntegrateEmbeddings(
  anchorset = anchors,
  reference = reference$map,
  query = query,
  reductions = "pcaproject",
  reuse.weights.matrix = TRUE
)

# Calculate the query neighbors in the reference
# with respect to the integrated embeddings
query[["query_ref.nn"]] <- FindNeighbors(
  object = Embeddings(reference$map[["refDR"]])[, 1:50],
  query = Embeddings(query[["integrated_dr"]]),
  return.neighbor = TRUE,
  l2.norm = TRUE,
  n.trees = 20
)

# The reference used in the app is downsampled compared to the reference on which
# the UMAP model was computed. This step, using the helper function NNTransform,
# corrects the Neighbors to account for the downsampling.
query <- NNTransform(
  object = query,
  meta.data = reference$map[[]]
)

# Project the query to the reference UMAP.
query[["umap.proj"]] <- RunUMAP(
  object = query[["query_ref.nn"]],
  reduction.model = reference$map[["refUMAP"]],
  reduction.key = 'UMAP_'
)

# Calculate mapping score and add to metadata
query <- AddMetaData(
  object = query,
  metadata = MappingScore(anchors = anchors),
  col.name = "mapping.score"
)

ref_obj <- reference$plot
qry_obj <- query

# Trick SeuratDisk into saving the UMAP even though it is not based on an internal assay
ref_obj@reductions$refUMAP@assay.used <- "RNA"

#### Use Vitessce for visualization ####

ref_adata_path <- file.path(data_dir, "ref.h5ad.zarr")
qry_adata_path <- file.path(data_dir, "qry.h5ad.zarr")

vitessceAnalysisR::seurat_to_anndata_zarr(ref_obj, ref_adata_path, assay = Seurat::DefaultAssay(ref_obj))
vitessceAnalysisR::seurat_to_anndata_zarr(qry_obj, qry_adata_path, assay = Seurat::DefaultAssay(qry_obj))

# Create Vitessce view config
vc <- VitessceConfig$new(schema_version = "1.0.16", name = "Azimuth")
ref_dataset <- vc$add_dataset("Reference")$add_object(
  AnnDataWrapper$new(
    adata_path = ref_adata_path,
    obs_embedding_paths = c("obsm/X_refUMAP"),
    obs_embedding_names = c("UMAP"),
    obs_set_paths = c("obs/celltype.l2")
  )
)
qry_dataset <- vc$add_dataset("Query")$add_object(
  AnnDataWrapper$new(
    adata_path = qry_adata_path,
    obs_embedding_paths = c("obsm/X_umap.proj"),
    obs_embedding_names = c("UMAP"),
    obs_set_paths = c("obs/predicted.celltype.l2"),
    obs_set_names = c("celltype.l2"),
    obs_set_score_paths = c("obs/predicted.celltype.l2.score")
  )
)

ref_plot <- vc$add_view(ref_dataset, Component$SCATTERPLOT, mapping = "UMAP")
qry_plot <- vc$add_view(qry_dataset, Component$SCATTERPLOT, mapping = "UMAP")
cell_sets <- vc$add_view(ref_dataset, Component$OBS_SETS)
cell_sets_2 <- vc$add_view(qry_dataset, Component$OBS_SETS)

vc$link_views(
  c(ref_plot, qry_plot),
  c(CoordinationType$EMBEDDING_ZOOM, CoordinationType$EMBEDDING_TARGET_X, CoordinationType$EMBEDDING_TARGET_Y),
  c_values = c(1, 0, 0)
)

vc$layout(hconcat(vconcat(ref_plot, qry_plot), vconcat(cell_sets, cell_sets_2)))

# Render the Vitessce widget
vc$widget(theme = "light")