scSeq Data

Single cell data from kidney published in (Lamarthée et al. 2023) under PID E-MTAB-12051 was re-analyzed from a read count level with Seurat v5.3.1.1000 (Hao et al. 2023) in R version 4.5.1 (2025-06-13).

counts = fread('data/E_MTAB_12051_raw_counts.csv', sep=',', header=TRUE) %>% data.frame(row.names=1)
meta = fread('data/E_MTAB_12051_metadata.csv', sep=',', header=TRUE, skip=0, col.names=c('Barcode', 'Sample')) %>% data.frame() %>% mutate(Barcode = gsub('-', '.', Barcode)) %>% arrange(Barcode)
row.names(meta) = meta$Barcode
stopifnot(all(meta$Barcode %in% colnames(counts)))
stopifnot(all(colnames(counts) %in% meta$Barcode))
expressed_genes = rowSums(counts>0)/ncol(counts)*100
# filter low occurance genes and reorder barcode columns to align with meta data
counts = counts[, meta$Barcode]
# counts = counts[expressed_genes > 0.01, meta$Barcode]
meta$nCounts = colSums(counts)
meta$nFeatures = colSums(counts>0)
meta$percent.mt = colSums(counts[grep('^MT-', row.names(counts)), ])/meta$nCounts*100
meta$Reject = meta$nFeatures < 100 | meta$nCounts < 500 | meta$percent.mt > 50
# meta$Reject = meta$nFeatures < 400 | meta$nFeatures > 10000 | meta$percent.mt > 25

ggplot(meta, aes(x=factor(Sample), fill=Sample, alpha=factor(!Reject))) + scale_alpha_ordinal(range=c(.5, 1)) +
   geom_bar(show.legend=F) + coord_flip() + labs(y='# cells', x='', title='Cells per sample') + theme_classic() + scale_y_continuous(expand=c(0.001, 0.001))

if(use_cached) {
  so = readRDS('data/kidney_seurat.RDS')
} else {
  so <- CreateSeuratObject(counts=counts, meta.data=meta, assay='RNA', project='kidney')
}

rm(counts); gc()
##              used    (Mb) gc trigger    (Mb)   max used    (Mb)
## Ncells   13425527   717.1   19786340  1056.8   19786340  1056.8
## Vcells 1649714836 12586.4 3372386219 25729.3 3372384347 25729.3

Data quality

VlnPlot(so, features=c('nFeatures', 'nCounts', 'percent.mt'), group.by='Sample', alpha=0.1)
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
##   Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

FeatureScatter(so, feature1='nCounts', feature2='nFeatures', group.by='Sample', shuffle=T)

Clustering

Before integration

Neighbor computation and subsequent clustering and UMAP generation depend on comparability of the different samples. Since these were prepared separately, transcriptional shifts can occur between samples that make comparison across samples unsuitable. The authors in (Lamarthée et al. 2023) state in their methods that all datasets were previously integrated by anchors.

DimPlot(so, reduction="unintegrated.umap", group.by="Sample", label=T, label.size=3, repel=T)

After integration

DimPlot(so, reduction="CCA.umap", group.by="Sample")

Idents(so) = so@meta.data$CCA.cluster
DimPlot(so, reduction="CCA.umap", group.by="CCA.cluster", label=T, label.size=3, repel=T)

Marker genes

Top five up-regulated genes per cluster against all others are shown.

markers = FindAllMarkers(so, group.by='CCA.cluster', min.pct=0.25, only.pos=T, verbose=F)
markers.top = markers %>% group_by(cluster) %>% top_n(5, avg_log2FC)
markers.top = markers.top[, c('gene', 'cluster', 'avg_log2FC', 'p_val_adj')]
markers.top$avg_log2FC = signif(markers.top$avg_log2FC, digits=2)
markers.top$p_val_adj = signif(markers.top$p_val_adj, digits=2)
DotPlot(so, features=unique(markers.top$gene), scale.by='size', scale=T) + scico::scale_color_scico(palette="vik") + RotatedAxis() + ggtitle('Top 2 markers per cluster')

DoHeatmap(subset(so, downsample=2000), features=unique(markers.top$gene), assay='RNA', group.by='CCA.cluster', slot='counts') + scale_fill_continuous(type='viridis')

Reference Annotation

Cells are mapped reference data based on overlapping transcripts and dimensionally reduced anchors.

Azimuth Kidney

Locally computed annotation prediction based on a reference kidney dataset at three different granularities.

DimPlot(so, group.by='predicted.annotation.l1', reduction='CCA.umap') + scale_color_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l1))))

DimPlot(so, group.by='predicted.annotation.l2', reduction='CCA.umap', repel=T, label=T) + theme(legend.position='none') + scale_color_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l2))))

DimPlot(so, group.by='predicted.annotation.l3', reduction='CCA.umap', repel=T, label=T) + theme(legend.position='none') + scale_color_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l3))))

ggplot(so@meta.data, aes(x=CCA.cluster, fill=predicted.annotation.l1)) + geom_bar(position='fill') + 
   theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l1)))) + scale_y_continuous(expand=c(0.001, 0.001))

ggplot(so@meta.data, aes(x=CCA.cluster, fill=predicted.annotation.l2)) + geom_bar(position='fill') + 
   theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l2)))) + scale_y_continuous(expand=c(0.001, 0.001))

ggplot(so@meta.data, aes(x=CCA.cluster, fill=predicted.annotation.l3)) + geom_bar(position='fill') + 
   theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l3)))) + scale_y_continuous(expand=c(0.001, 0.001))

CloudAzimuth

Cloud based annotation on two different granularities.

DimPlot(so, group.by='azimuth_medium', reduction='CCA.umap', repel=T, label=T) + scale_color_manual(values=biramp(length(unique(so@meta.data$azimuth_medium))))

DimPlot(so, group.by='azimuth_fine', reduction='CCA.umap', repel=T, label=T) +
  scale_color_manual(values=biramp(length(unique(so@meta.data$azimuth_fine)))) +
  theme(legend.position='none')

ggplot(so@meta.data, aes(x=CCA.cluster, fill=azimuth_medium)) + geom_bar(position='fill') + 
   theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$azimuth_medium)))) + scale_y_continuous(expand=c(0.001, 0.001))

ggplot(so@meta.data, aes(x=CCA.cluster, fill=azimuth_fine)) + geom_bar(position='fill') + 
   theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$azimuth_fine)))) + scale_y_continuous(expand=c(0.001, 0.001))

Genes of interest

Transcripts previously described in (Tamassia et al. 2023) as specific for non-classical CD16^+ monocytes in human blood were manually screened for differential expression in the above dataset.

gois = c('CUX1', 'SIGLEC10', 'SPN', 'IFITM2', 'MS4A7', 'NAP1L1', 'TCF7L2', 'LYPD2', 'FCGR3A', 'CDKN1C')
stopifnot(all(gois %in% Features(so)))
FeaturePlot(so, gois, ncol=2, reduction='CCA.umap')

VlnPlot(so, gois, ncol=2, layer='counts')

DotPlot(so, features=gois)

Pseudo bulk DEG

bulk = bulk_counts(so, c('Sample', 'Mono'))

dds <- DESeqDataSetFromMatrix(countData=bulk$counts, colData=bulk$col_data, design= ~ Mono + Sample)  # customize as you want
dds <- dds[rowSums(counts(dds)) >= 5, ]
dds <- DESeq(dds)

# Example: Get DE genes for Health comparison (e.g. Disease vs Control)
res <- results(dds, contrast = c("Mono", "Mono", "other"))
datatable(res %>% as.data.frame() %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, log2FoldChange) %>% round(3), filter='top', extensions='Buttons', options=list(dom='Bfrtip', buttons=c('copy', 'csv', 'excel', 'pdf', 'print')))
DoHeatmap(subset(so, downsample=10000), features=res %>% as.data.frame() %>% top_n(10, log2FoldChange) %>% rownames(), slot='counts')

# Shrink log2 fold changes (optional but recommended for visualizations)
res_shrink <- lfcShrink(dds, coef = "Mono_other_vs_Mono", type = "apeglm")
datatable(res_shrink %>% as.data.frame() %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, log2FoldChange) %>% round(3), filter='top', extensions='Buttons', options=list(dom='Bfrtip', buttons=c('copy', 'csv', 'excel', 'pdf', 'print')))
DoHeatmap(subset(so, downsample=10000), features=res_shrink %>% as.data.frame() %>% top_n(10, -log2FoldChange) %>% rownames(), slot='counts')

References

Hao, Yuhan, Tim Stuart, Madeline H Kowalski, Saket Choudhary, Paul Hoffman, Austin Hartman, Avi Srivastava, et al. 2023. “Dictionary Learning for Integrative, Multimodal and Scalable Single-Cell Analysis.” Nature Biotechnology. https://doi.org/10.1038/s41587-023-01767-y.
Lamarthée, Baptiste, Jasper Callemeyn, Yannick Van Herck, Asier Antoranz, Dany Anglicheau, Patrick Boada, Jan Ulrich Becker, et al. 2023. Transcriptional and spatial profiling of the kidney allograft unravels a central role for FcyRIII+ innate immune cells in rejection.” Nat. Commun. 14 (4359): 1–22. https://doi.org/10.1038/s41467-023-39859-7.
Tamassia, Nicola, Francisco Bianchetto-Aguilera, Sara Gasperini, Alessio Grimaldi, Claudia Montaldo, Federica Calzetti, Elisa Gardiman, et al. 2023. The slan antigen identifies the prototypical non-classical CD16+-monocytes in human blood.” Front. Immunol. 14 (October): 1287656. https://doi.org/10.3389/fimmu.2023.1287656.

  1. Center for Synthetic Microbiology (SYNMIKRO), Philipps-Universität Marburg, ↩︎

  2. Faculty of Medicine, Institute of Pharmacology, University of Marburg, 35043 Marburg, Germany↩︎

  3. Faculty of Medicine, Institute of Pharmacology, University of Marburg, 35043 Marburg, Germany, Max-Planck-Institute for Heart and Lung Research, 61231 Bad Nauheim, Germany↩︎