Perhaps the most useful analysis we can perform with this data is to study how cells change in response to their spatial context. Here some expertise/taste is required: how we define spatial context should depend on our understanding our how our tissues are spatially organized and on the biological questions we want to answer.
There are two general approaches we can employ:
We start by loading the data we’ll need, which was output by the earlier scripts from the workflow vignette:
# packages:
library(mclust)
#> Package 'mclust' version 6.1.1
#> Type 'citation("mclust")' for citing this R package in publications.
library(irlba)
#> Loading required package: Matrix
library(spatstat)
#> Loading required package: spatstat.data
#> Warning: package 'spatstat.data' was built under R version 4.4.0
#> Loading required package: spatstat.geom
#> spatstat.geom 3.2-9
#> Loading required package: spatstat.random
#> Warning: package 'spatstat.random' was built under R version 4.4.0
#> spatstat.random 3.2-3
#> Loading required package: spatstat.explore
#> Warning: package 'spatstat.explore' was built under R version 4.4.0
#> Loading required package: nlme
#> spatstat.explore 3.2-7
#> Loading required package: spatstat.model
#> Loading required package: rpart
#> spatstat.model 3.2-11
#> Loading required package: spatstat.linnet
#> spatstat.linnet 3.1-5
#>
#> spatstat 3.0-8
#> For an introduction to spatstat, type 'beginner'
library(spatstat.geom)
library(spatstat.explore)
# load data:
# note these files are for convenience during analysis, and are not a NanoString-supported format
mydir <- "../"
norm <- readRDS(paste0(mydir, "processed_data/norm.RDS"))
metadata <- readRDS(paste0(mydir, "/processed_data/metadata.RDS"))
celltype <- readRDS(paste0(mydir, "processed_data/celltype.RDS"))
xy <- readRDS(paste0(mydir, "/processed_data/xy.RDS"))
The functions we need for spatial annotations are all here:
source("utils/spatial functions.R")
#equivalently, source from github:
#source("https://raw.githubusercontent.com/Nanostring-Biostats/CosMx-Analysis-Scratch-Space/vignette-melanoma/code/vignette/utils/spatial%20functions.R")
Quite a few algorithms now exist for spatial clustering, many quite complex. Be warned that some of these become computationally intractable with the very large datasets now routinely produced by spatial platforms. We find that a simple, computationally quick approach works well: we record the mean expression in each cell’s neighborhood, and we cluster cells based on these values.
# define neighbors:
neighbors_sparsematrix <- nearestNeighborGraph(x = xy[, 1], y = xy[, 2], N = 50, subset = metadata$tissue)
# get matrix of mean expression in each cell's neighbors:
neighborhoodexpression <- neighbor_colMeans(x = norm, neighbors = neighbors_sparsematrix)
# reduce the matrix to its top PCs:
pc <- irlba::prcomp_irlba(neighborhoodexpression, n = 25)$x
# cluster the top PCs:
temp <- mclust::Mclust(pc,
G = 6, # 6 clusters
modelNames = c("EEI"))
spatialclust <- temp$classification
# plot the results:
cols <- c("#616161","#4285f4","#db4437","#f4b400","#0f9d58","#ab47bc","#00acc1",
"#ff7043","#9e9d24","#5c6bc0","#f06292","#00796b","#c2185b","#7e57c2",
"#03a9f4","#8bc34a","#fdd835","#fb8c00","#8d6e63","#9e9e9e","#607d8b")
# plot:
sub = TRUE
plot(xy[sub, ], pch = 16, cex = 0.1,
asp = 1, xlab = "", ylab = "",
col = cols[as.numeric(as.factor(spatialclust[sub]))])
# add to metadata:
metadata$spatialclust <- spatialclust
Alternatively, we can cluster cells based on the cell type composition of their neighborhoods:
## get neighborhood cell type abundance:
# create a point process object:
pp <- spatstat.geom::ppp(xy[, 1], xy[, 2], xrange = range(xy[, 1]), yrange = range(xy[, 2]))
marks(pp) <- celltype
spatstat.geom::marks(pp) <- as.factor(spatstat.geom::marks(pp))
# count neighbors of each db cluster:
neighbormarks <- spatstat.explore::marktable(X = pp, R = NULL, N = 50, exclude=TRUE, collapse=FALSE)
neighbormarks <- as.matrix(neighbormarks)
rownames(neighbormarks) <- rownames(xy)
head(neighbormarks)
#> mark
#> point b B.cell d_1 d_2 Dendritic.cell e Endothelial f Fibroblast
#> c_1_1_1 0 0 3 3 3 25 4 4 0
#> c_1_1_2 0 0 3 4 3 25 4 4 0
#> c_1_1_3 0 0 3 4 2 25 4 5 0
#> c_1_1_4 0 0 2 4 3 27 3 5 0
#> c_1_1_5 0 0 1 6 3 26 2 5 0
#> c_1_1_6 0 0 0 17 0 32 0 0 0
#> mark
#> point Macrophage Mast.cell Monocyte Neutrophil NK.cell Plasma Plasmablast
#> c_1_1_1 5 1 1 1 0 0 0
#> c_1_1_2 4 1 2 0 0 0 0
#> c_1_1_3 4 1 1 1 0 0 0
#> c_1_1_4 3 1 1 1 0 0 0
#> c_1_1_5 3 1 1 1 1 0 0
#> c_1_1_6 0 0 0 0 0 0 0
#> mark
#> point Plasmacytoid.dendritic.cell T-cell T.cell.regulatory
#> c_1_1_1 0 0 0
#> c_1_1_2 0 0 0
#> c_1_1_3 0 0 0
#> c_1_1_4 0 0 0
#> c_1_1_5 0 0 0
#> c_1_1_6 0 1 0
## cluster it:
spatialclust <- mclust::Mclust(neighbormarks, #sweep(neighbormarks, 1, rowSums(neighbormarks), "/"),
G = 8, # 8 clusters
modelNames = c("EEI"))$classification
## plot the results:
# subset for faster plotting:
sub <- sample(1:nrow(xy), round(nrow(xy) / 20), replace = FALSE)
# color palette:
cols <- c("#616161","#4285f4","#db4437","#f4b400","#0f9d58","#ab47bc","#00acc1",
"#ff7043","#9e9d24","#5c6bc0","#f06292","#00796b","#c2185b","#7e57c2",
"#03a9f4","#8bc34a","#fdd835","#fb8c00","#8d6e63","#9e9e9e","#607d8b")
# plot:
plot(xy[sub, ], pch = 16, cex = 0.2,
asp = 1,
col = cols[as.numeric(as.factor(spatialclust[sub])) %% length(cols) + 1])
Here we present three ways to annotate cells based on their spatial context:
Example: you might categorize cells by how far they are from the nearest tumor cell:
# record IDs and distances to nearest 50 neighbors:
tumorcelltypes <- c("b", "e", "f", "d_1", "d_2")
neighbors_dists <- FNN::get.knnx(data = xy[is.element(celltype, tumorcelltypes), ],
query = xy,
k = 5)
str(neighbors_dists)
#> List of 2
#> $ nn.index: int [1:111804, 1:5] 17 17 17 20 46 1 2 3 4 5 ...
#> $ nn.dist : num [1:111804, 1:5] 0.01971 0.01171 0.00497 0.0063 0.01001 ...
# record distance to a cell type:
metadata$disttonearesttumor <- neighbors_dists$nn.dist[, 1]
# record 3rd-closest distance to the cell type:
metadata$distto3rdnearesttumor <- neighbors_dists$nn.dist[, 3]
Example: you might categorize cells by how many of a given cell type are in their neighborhood:
# create a point process object:
pp <- spatstat.geom::ppp(xy[, 1], xy[, 2], xrange = range(xy[, 1]), yrange = range(xy[, 2]))
marks(pp) <- celltype
spatstat.geom::marks(pp) <- celltype
spatstat.geom::marks(pp) <- as.factor(spatstat.geom::marks(pp))
# count neighbors of each db cluster:
neighbormarks <- spatstat.explore::marktable(X = pp, R = NULL, N = 50, exclude=TRUE, collapse=FALSE)
neighbormarks <- as.matrix(neighbormarks)
rownames(neighbormarks) <- rownames(xy)
head(neighbormarks)[, 1:10]
#> mark
#> point b B.cell d_1 d_2 Dendritic.cell e Endothelial f Fibroblast
#> c_1_1_1 0 0 3 3 3 25 4 4 0
#> c_1_1_2 0 0 3 4 3 25 4 4 0
#> c_1_1_3 0 0 3 4 2 25 4 5 0
#> c_1_1_4 0 0 2 4 3 27 3 5 0
#> c_1_1_5 0 0 1 6 3 26 2 5 0
#> c_1_1_6 0 0 0 17 0 32 0 0 0
#> mark
#> point Macrophage
#> c_1_1_1 5
#> c_1_1_2 4
#> c_1_1_3 4
#> c_1_1_4 3
#> c_1_1_5 3
#> c_1_1_6 0
# save results for your desired cell types:
metadata$n_tumor_neighbors <- rowSums(neighbormarks[, tumorcelltypes])
metadata$n_fibroblast_neighbors <- neighbormarks[, "Fibroblast"]
# save the sum of several related cell types:
metadata$n_lymphoid_neighbors <- rowSums(neighbormarks[, c("T-cell", "T.cell.regulatory", "B.cell", "NK.cell", "Plasma", "Plasmablast")])
Example: immune cells respond to cytokine signaling, so you might want to see how much of a given cytokine each cell is exposed to.
# get cells' nearest 50 spatial neighbors:
neighbors_sparsematrix <- nearestNeighborGraph(x = xy[, 1], y = xy[, 2], N = 50, subset = metadata$tissue)
# get neighborhood expression of a chosen gene:
gene <- "C1QA"
metadata$neighborhood_C1QA_expression <- neighbor_mean(x = norm[, gene], neighbors = neighbors_sparsematrix)
Save all these results for later:
saveRDS(metadata, file = paste0(mydir, "/processed_data/metadata_with_spatialcontext.RDS"))