Single cell data from kidney published in (Lamarthée et al. 2023) under PID E-MTAB-12051 was re-analyzed from a read count level with Seurat v5.3.1.1000 (Hao et al. 2023) in R version 4.5.1 (2025-06-13).
counts = fread('data/E_MTAB_12051_raw_counts.csv', sep=',', header=TRUE) %>% data.frame(row.names=1)
meta = fread('data/E_MTAB_12051_metadata.csv', sep=',', header=TRUE, skip=0, col.names=c('Barcode', 'Sample')) %>% data.frame() %>% mutate(Barcode = gsub('-', '.', Barcode)) %>% arrange(Barcode)
row.names(meta) = meta$Barcode
stopifnot(all(meta$Barcode %in% colnames(counts)))
stopifnot(all(colnames(counts) %in% meta$Barcode))
expressed_genes = rowSums(counts>0)/ncol(counts)*100
# filter low occurance genes and reorder barcode columns to align with meta data
counts = counts[, meta$Barcode]
# counts = counts[expressed_genes > 0.01, meta$Barcode]
meta$nCounts = colSums(counts)
meta$nFeatures = colSums(counts>0)
meta$percent.mt = colSums(counts[grep('^MT-', row.names(counts)), ])/meta$nCounts*100
meta$Reject = meta$nFeatures < 100 | meta$nCounts < 500 | meta$percent.mt > 50
# meta$Reject = meta$nFeatures < 400 | meta$nFeatures > 10000 | meta$percent.mt > 25
ggplot(meta, aes(x=factor(Sample), fill=Sample, alpha=factor(!Reject))) + scale_alpha_ordinal(range=c(.5, 1)) +
geom_bar(show.legend=F) + coord_flip() + labs(y='# cells', x='', title='Cells per sample') + theme_classic() + scale_y_continuous(expand=c(0.001, 0.001))
if(use_cached) {
so = readRDS('data/kidney_seurat.RDS')
} else {
so <- CreateSeuratObject(counts=counts, meta.data=meta, assay='RNA', project='kidney')
}
rm(counts); gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 13425527 717.1 19786340 1056.8 19786340 1056.8
## Vcells 1649714836 12586.4 3372386219 25729.3 3372384347 25729.3
VlnPlot(so, features=c('nFeatures', 'nCounts', 'percent.mt'), group.by='Sample', alpha=0.1)
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
FeatureScatter(so, feature1='nCounts', feature2='nFeatures', group.by='Sample', shuffle=T)
Neighbor computation and subsequent clustering and UMAP generation depend on comparability of the different samples. Since these were prepared separately, transcriptional shifts can occur between samples that make comparison across samples unsuitable. The authors in (Lamarthée et al. 2023) state in their methods that all datasets were previously integrated by anchors.
DimPlot(so, reduction="unintegrated.umap", group.by="Sample", label=T, label.size=3, repel=T)
DimPlot(so, reduction="CCA.umap", group.by="Sample")
Idents(so) = so@meta.data$CCA.cluster
DimPlot(so, reduction="CCA.umap", group.by="CCA.cluster", label=T, label.size=3, repel=T)
Top five up-regulated genes per cluster against all others are shown.
markers = FindAllMarkers(so, group.by='CCA.cluster', min.pct=0.25, only.pos=T, verbose=F)
markers.top = markers %>% group_by(cluster) %>% top_n(5, avg_log2FC)
markers.top = markers.top[, c('gene', 'cluster', 'avg_log2FC', 'p_val_adj')]
markers.top$avg_log2FC = signif(markers.top$avg_log2FC, digits=2)
markers.top$p_val_adj = signif(markers.top$p_val_adj, digits=2)
DotPlot(so, features=unique(markers.top$gene), scale.by='size', scale=T) + scico::scale_color_scico(palette="vik") + RotatedAxis() + ggtitle('Top 2 markers per cluster')
DoHeatmap(subset(so, downsample=2000), features=unique(markers.top$gene), assay='RNA', group.by='CCA.cluster', slot='counts') + scale_fill_continuous(type='viridis')
Cells are mapped reference data based on overlapping transcripts and dimensionally reduced anchors.
Locally computed annotation prediction based on a reference kidney dataset at three different granularities.
DimPlot(so, group.by='predicted.annotation.l1', reduction='CCA.umap') + scale_color_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l1))))
DimPlot(so, group.by='predicted.annotation.l2', reduction='CCA.umap', repel=T, label=T) + theme(legend.position='none') + scale_color_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l2))))
DimPlot(so, group.by='predicted.annotation.l3', reduction='CCA.umap', repel=T, label=T) + theme(legend.position='none') + scale_color_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l3))))
ggplot(so@meta.data, aes(x=CCA.cluster, fill=predicted.annotation.l1)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l1)))) + scale_y_continuous(expand=c(0.001, 0.001))
ggplot(so@meta.data, aes(x=CCA.cluster, fill=predicted.annotation.l2)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l2)))) + scale_y_continuous(expand=c(0.001, 0.001))
ggplot(so@meta.data, aes(x=CCA.cluster, fill=predicted.annotation.l3)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l3)))) + scale_y_continuous(expand=c(0.001, 0.001))
Cloud based annotation on two different granularities.
DimPlot(so, group.by='azimuth_medium', reduction='CCA.umap', repel=T, label=T) + scale_color_manual(values=biramp(length(unique(so@meta.data$azimuth_medium))))
DimPlot(so, group.by='azimuth_fine', reduction='CCA.umap', repel=T, label=T) +
scale_color_manual(values=biramp(length(unique(so@meta.data$azimuth_fine)))) +
theme(legend.position='none')
ggplot(so@meta.data, aes(x=CCA.cluster, fill=azimuth_medium)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$azimuth_medium)))) + scale_y_continuous(expand=c(0.001, 0.001))
ggplot(so@meta.data, aes(x=CCA.cluster, fill=azimuth_fine)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$azimuth_fine)))) + scale_y_continuous(expand=c(0.001, 0.001))
Transcripts previously described in (Tamassia et al. 2023) as specific for non-classical CD16^+ monocytes in human blood were manually screened for differential expression in the above dataset.
gois = c('CUX1', 'SIGLEC10', 'SPN', 'IFITM2', 'MS4A7', 'NAP1L1', 'TCF7L2', 'LYPD2', 'FCGR3A', 'CDKN1C')
stopifnot(all(gois %in% Features(so)))
FeaturePlot(so, gois, ncol=2, reduction='CCA.umap')
VlnPlot(so, gois, ncol=2, layer='counts')
DotPlot(so, features=gois)
bulk = bulk_counts(so, c('Sample', 'Mono'))
dds <- DESeqDataSetFromMatrix(countData=bulk$counts, colData=bulk$col_data, design= ~ Mono + Sample) # customize as you want
dds <- dds[rowSums(counts(dds)) >= 5, ]
dds <- DESeq(dds)
# Example: Get DE genes for Health comparison (e.g. Disease vs Control)
res <- results(dds, contrast = c("Mono", "Mono", "other"))
datatable(res %>% as.data.frame() %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, log2FoldChange) %>% round(3), filter='top', extensions='Buttons', options=list(dom='Bfrtip', buttons=c('copy', 'csv', 'excel', 'pdf', 'print')))
DoHeatmap(subset(so, downsample=10000), features=res %>% as.data.frame() %>% top_n(10, log2FoldChange) %>% rownames(), slot='counts')
# Shrink log2 fold changes (optional but recommended for visualizations)
res_shrink <- lfcShrink(dds, coef = "Mono_other_vs_Mono", type = "apeglm")
datatable(res_shrink %>% as.data.frame() %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, log2FoldChange) %>% round(3), filter='top', extensions='Buttons', options=list(dom='Bfrtip', buttons=c('copy', 'csv', 'excel', 'pdf', 'print')))
DoHeatmap(subset(so, downsample=10000), features=res_shrink %>% as.data.frame() %>% top_n(10, -log2FoldChange) %>% rownames(), slot='counts')
Center for Synthetic Microbiology (SYNMIKRO), Philipps-Universität Marburg, clemens@thoelken.org↩︎
Faculty of Medicine, Institute of Pharmacology, University of Marburg, 35043 Marburg, Germany↩︎
Faculty of Medicine, Institute of Pharmacology, University of Marburg, 35043 Marburg, Germany, Max-Planck-Institute for Heart and Lung Research, 61231 Bad Nauheim, Germany↩︎