Single cell data from kidney published in (Lamarthée et al. 2023) under PID E-MTAB-12051 was re-analyzed from a read count level with Seurat v5.3.1.1000 (Hao et al. 2023) in R version 4.5.1 (2025-06-13).
counts = fread('data/E_MTAB_12051_raw_counts.csv', sep=',', header=TRUE) %>% data.frame(row.names=1)
meta = fread('data/E_MTAB_12051_metadata.csv', sep=',', header=TRUE, skip=0, col.names=c('Barcode', 'Sample')) %>% data.frame() %>% mutate(Barcode = gsub('-', '.', Barcode)) %>% arrange(Barcode)
row.names(meta) = meta$Barcode
stopifnot(all(meta$Barcode %in% colnames(counts)))
stopifnot(all(colnames(counts) %in% meta$Barcode))
expressed_genes = rowSums(counts>0)/ncol(counts)*100
# filter low occurance genes and reorder barcode columns to align with meta data
counts = counts[, meta$Barcode]
# counts = counts[expressed_genes > 0.01, meta$Barcode]
meta$nCounts = colSums(counts)
meta$nFeatures = colSums(counts>0)
meta$percent.mt = colSums(counts[grep('^MT-', row.names(counts)), ])/meta$nCounts*100
meta$Reject = meta$nFeatures < 100 | meta$nCounts < 500 | meta$percent.mt > 50
# meta$Reject = meta$nFeatures < 400 | meta$nFeatures > 10000 | meta$percent.mt > 25
ggplot(meta, aes(x=factor(Sample), fill=Sample, alpha=factor(!Reject))) + scale_alpha_ordinal(range=c(.5, 1)) +
geom_bar(show.legend=F) + coord_flip() + labs(y='# cells', x='', title='Cells per sample') + theme_classic() + scale_y_continuous(expand=c(0.001, 0.001))
if(use_cached) {
so = readRDS('data/kidney_seurat.RDS')
# so.full = readRDS('data/kidney_full.RDS')
} else {
so <- CreateSeuratObject(counts=counts, meta.data=meta, assay='RNA', project='kidney')
}
rm(counts); gc()
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 13802745 737.2 20016877 1069.1 20016877 1069.1
## Vcells 190823299 1455.9 2698490995 20587.9 3373110941 25734.8
VlnPlot(so, features=c('nFeatures', 'nCounts', 'percent.mt'), group.by='Sample', alpha=0.1)
## Warning: `PackageCheck()` was deprecated in SeuratObject 5.0.0.
## ℹ Please use `rlang::check_installed()` instead.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## ℹ The deprecated feature was likely used in the Seurat package.
## Please report the issue at <https://github.com/satijalab/seurat/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
FeatureScatter(so, feature1='nCounts', feature2='nFeatures', group.by='Sample', shuffle=T)
Neighbor computation and subsequent clustering and UMAP generation depend on comparability of the different samples. Since these were prepared separately, transcriptional shifts can occur between samples that make comparison across samples unsuitable. The authors in (Lamarthée et al. 2023) state in their methods that all datasets were previously integrated by anchors.
DimPlot(so, reduction="unintegrated.umap", group.by="Sample", label=T, label.size=3, repel=T)
DimPlot(so, reduction="CCA.umap", group.by="Sample")
Idents(so) = factor(so@meta.data$annotation)
DimPlot(so, reduction="CCA.umap", group.by="annotation", label=T, label.size=3, repel=T)
DimPlot(subset(so, CCA.cluster==4), reduction="CCA.umap", group.by="annotation", label=T, label.size=4, repel=T) + xlim(c(1,4.5)) + ylim(c(9,12))
DimPlot(so, reduction="CCA.umap", group.by="cd14v16", label.size=4, repel=T, pt.size=2)
DimPlot(subset(so, CCA.cluster==4), reduction="CCA.umap", group.by="cd14v16", label.size=4, repel=T, pt.size=1) + xlim(c(1,4.5)) + ylim(c(9,11.5)) + scale_color_manual(values=c('purple', 'red', 'blue', 'gray'))
DimPlot(subset(so, CCA.cluster==4), reduction="CCA.umap", split.by='cd14v16', group.by="cd14v16", label.size=4, repel=T, pt.size=1) + xlim(c(1,4.5)) + ylim(c(9,11.5)) + scale_color_manual(values=c('purple', 'red', 'blue', 'gray'))
Cells are mapped reference data based on overlapping transcripts and dimensionally reduced anchors.
Locally computed annotation prediction based on a reference kidney dataset at three different granularities.
most coarse annotation level
DimPlot(so, group.by='predicted.annotation.l1', reduction='CCA.umap') + scale_color_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l1))))
ggplot(so@meta.data, aes(x=CCA.cluster, fill=predicted.annotation.l1)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l1)))) + scale_y_continuous(expand=c(0.001, 0.001))
DimPlot(so, group.by='predicted.annotation.l2', reduction='CCA.umap', repel=T, label=T) + theme(legend.position='none') + scale_color_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l2))))
ggplot(so@meta.data, aes(x=CCA.cluster, fill=predicted.annotation.l2)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l2)))) + scale_y_continuous(expand=c(0.001, 0.001))
DimPlot(subset(so, CCA.cluster==4), reduction="CCA.umap", group.by="predicted.annotation.l2", label=T, label.size=4, repel=T) + xlim(c(1,4.5)) + ylim(c(9,12))
ggplot(subset(so, CCA.cluster==4)@meta.data, aes(x=annotation, fill=predicted.annotation.l2)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_y_continuous(expand=c(0.001, 0.001))
most granular annotation level
DimPlot(so, group.by='predicted.annotation.l3', reduction='CCA.umap', repel=T, label=T) + theme(legend.position='none') + scale_color_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l3))))
ggplot(so@meta.data, aes(x=CCA.cluster, fill=predicted.annotation.l3)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$predicted.annotation.l3)))) + scale_y_continuous(expand=c(0.001, 0.001))
DimPlot(subset(so, CCA.cluster==4), reduction="CCA.umap", group.by="predicted.annotation.l3", label=T, label.size=4, repel=T) + xlim(c(1,4.5)) + ylim(c(9,12))
ggplot(subset(so, CCA.cluster==4)@meta.data, aes(x=annotation, fill=predicted.annotation.l3)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_y_continuous(expand=c(0.001, 0.001))
ggplot(subset(so, CCA.cluster==4)@meta.data, aes(x=cd14v16, fill=predicted.annotation.l3)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_y_continuous(expand=c(0.001, 0.001))
Cloud based annotation on two different granularities.
DimPlot(so, group.by='azimuth_medium', reduction='CCA.umap', repel=T, label=T) + scale_color_manual(values=biramp(length(unique(so@meta.data$azimuth_medium))))
ggplot(so@meta.data, aes(x=CCA.cluster, fill=azimuth_medium)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$azimuth_medium)))) + scale_y_continuous(expand=c(0.001, 0.001))
DimPlot(subset(so, CCA.cluster==4), reduction="CCA.umap", group.by="azimuth_medium", label=T, label.size=4, repel=T) + xlim(c(1,4.5)) + ylim(c(9,12))
ggplot(subset(so, CCA.cluster==4)@meta.data, aes(x=annotation, fill=azimuth_medium)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_y_continuous(expand=c(0.001, 0.001))
DimPlot(so, group.by='azimuth_fine', reduction='CCA.umap', repel=T, label=T) +
scale_color_manual(values=biramp(length(unique(so@meta.data$azimuth_fine)))) +
theme(legend.position='none')
ggplot(so@meta.data, aes(x=CCA.cluster, fill=azimuth_fine)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_fill_manual(values=biramp(length(unique(so@meta.data$azimuth_fine)))) + scale_y_continuous(expand=c(0.001, 0.001))
DimPlot(subset(so, CCA.cluster==4), reduction="CCA.umap", group.by="azimuth_fine", label=T, label.size=4, repel=T) + xlim(c(1,4.5)) + ylim(c(9,12))
ggplot(subset(so, CCA.cluster==4)@meta.data, aes(x=annotation, fill=azimuth_fine)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_y_continuous(expand=c(0.001, 0.001))
DimPlot(subset(so, CCA.cluster==4), reduction="CCA.umap", split.by="cd14v16", group.by='cd14v16', label=T, label.size=4, repel=T) + xlim(c(1,4.5)) + ylim(c(9,12))
ggplot(subset(so, CCA.cluster==4)@meta.data, aes(x=cd14v16, fill=azimuth_fine)) + geom_bar(position='fill') +
theme_classic() + coord_flip() + scale_y_continuous(expand=c(0.001, 0.001))
Transcripts previously described in (Tamassia et al. 2023) as specific for non-classical CD16^+ monocytes in human blood were manually screened for differential expression in the above dataset.
gois = c('CUX1', 'SIGLEC10', 'SPN', 'IFITM2', 'MS4A7', 'NAP1L1', 'TCF7L2', 'LYPD2', 'FCGR3A', 'CDKN1C', 'C1QA', 'C1QB', 'C1QC')
stopifnot(all(gois %in% Features(so)))
FeaturePlot(so, gois, ncol=3, reduction='CCA.umap')
VlnPlot(so, gois, group.by='annotation', ncol=3, layer='counts')
VlnPlot(subset(so, CCA.cluster==4), gois, group.by='annotation', ncol=3, layer='counts')
DotPlot(so, features=gois)
mat <- GetAssayData(so.mono, assay="RNA", layer="data")[gois, ]
mat <- mat[, colSums(mat) > 1]
df <- mat %>%
t() %>%
as.data.frame() %>%
tibble::rownames_to_column("cell") %>%
pivot_longer(
cols = all_of(gois),
names_to = "gene",
values_to = "expression"
) %>%
mutate(gene = factor(gene, levels = gois)) %>%
left_join(
so.mono@meta.data %>%
tibble::rownames_to_column("cell") %>%
select(cell, annotation),
by = "cell"
)
df_norm <- df %>%
group_by(gene) %>%
mutate(
expression = log(expression+1),
expression = (expression - min(expression)) /
(max(expression) - min(expression) + 1e-9)
) %>%
ungroup()
df_mean <- df_norm %>% group_by(annotation, gene) %>% summarize(expression = mean(expression), .groups = "drop")
ggplot() +
# background lines
geom_line(
data = df_norm,
aes(x = gene, y = expression, group = cell, color = annotation),
alpha = 0.05,
linewidth = 0.5
) +
geom_line( data = df_mean, aes(x = gene, y = expression, color = annotation, group=annotation), linewidth = 2 ) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
theme_minimal() +
labs(
x = NULL,
y = "Normalized expression (per gene)",
title = "Parallel coordinates plot with boxplots for gene expression"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.minor = element_blank()
)
ggplot() +
geom_boxplot(
data = df_norm,
aes(x = gene, y = expression, fill = annotation),
width = 0.6,
position = position_dodge(width = 0.7),
alpha = 0.6,
outlier.size = 0.5
) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
theme_minimal() +
labs(
x = NULL,
y = "Normalized expression (per gene)",
title = "Parallel coordinates plot with boxplots for gene expression"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.minor = element_blank()
)
ggplot() +
geom_violin(
data = df_norm,
aes(x = gene, y = expression, fill = annotation),
width = 0.6,
position = position_dodge(width = 0.7),
alpha = 0.6,
outlier.size = 0.5
) +
scale_color_brewer(palette = "Dark2") +
scale_fill_brewer(palette = "Dark2") +
theme_minimal() +
labs(
x = NULL,
y = "Normalized expression (per gene)",
title = "Parallel coordinates plot with boxplots for gene expression"
) +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.minor = element_blank()
)
Top five up-regulated genes per cluster against all others are shown.
markers = FindAllMarkers(so, group.by='CCA.cluster', min.pct=0.25, only.pos=T, verbose=F)
markers.top = markers %>% group_by(cluster) %>% top_n(5, avg_log2FC)
markers.top = markers.top[, c('gene', 'cluster', 'avg_log2FC', 'p_val_adj')]
datatable(markers.top %>% mutate(across(where(is.numeric), signif, 3)), filter='top')
DotPlot(so, features=unique(markers.top$gene), scale.by='size', scale=T) + scico::scale_color_scico(palette="vik") + RotatedAxis() + ggtitle('Top markers per cluster')
DoHeatmap(subset(so, downsample=10000), features=unique(markers.top$gene), assay='RNA', group.by='CCA.cluster', slot='counts') + scale_fill_continuous(type='viridis')
Differentially Expressed Genes in pseudo bulk RNA-Seq analysis (cell expression levels are aggregated per sample)
dds = bulk_deseq(so.mono, c('Sample', 'annotation'), design=~annotation)
res <- results(dds, contrast=c("annotation", "Monocyte", "Monocyte derived/Macrophage"))
datatable(res %>% as.data.frame() %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% round(3), filter='top')
DoHeatmap(so.mono, features=res %>% as.data.frame() %>% filter(padj < 0.05) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% rownames(), group.by='annotation', slot='counts')
# Shrink log2 fold changes (optional but recommended for visualizations)
res_shrink <- lfcShrink(dds, coef="annotation_Monocyte.derived.Macrophage_vs_Monocyte", type="apeglm")
datatable(res_shrink %>% as.data.frame() %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, log2FoldChange) %>% round(3), filter='top')
DoHeatmap(so.mono, features=res_shrink %>% as.data.frame() %>% filter(padj < 0.05) %>% arrange(-log2FoldChange) %>% top_n(20, -log2FoldChange) %>% rownames(), group.by='annotation', slot='counts')
dds = bulk_deseq(so.mono, c('Sample', 'cd14pos', 'cd16pos'), design=~cd14pos*cd16pos)
res <- results(dds, contrast=c("cd14pos", "CD14", "noCD14"))
volcan(res)
datatable(res %>% as.data.frame() %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% round(3), filter='top')
DoHeatmap(so.mono, features=res %>% as.data.frame() %>% filter(padj < 0.05) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% rownames(), group.by='annotation', slot='counts')
res <- results(dds, contrast=c("cd16pos", "CD16", "noCD16"))
volcan(res)
datatable(res %>% as.data.frame() %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% round(3), filter='top')
DoHeatmap(so.mono, features=res %>% as.data.frame() %>% filter(padj < 0.05) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% rownames(), group.by='annotation', slot='counts')
dds = bulk_deseq(so.mono, c('Sample', 'cd14v16'), design=~cd14v16)
res <- results(dds, contrast=c("cd14v16", "CD14andCD16", "noCD14butCD16"))
datatable(res %>% as.data.frame() %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% round(3), filter='top')
DoHeatmap(so.mono, features=res %>% as.data.frame() %>% filter(padj < 0.05) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% rownames(), group.by='annotation', slot='counts')
res <- results(dds, contrast=c("cd14v16", "CD14andCD16", "CD14noCD16"))
datatable(res %>% as.data.frame() %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% round(3), filter='top')
DoHeatmap(so.mono, features=res %>% as.data.frame() %>% filter(padj < 0.05) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% rownames(), group.by='annotation', slot='counts')
res <- results(dds, contrast=c("cd14v16", "CD14noCD16", "noCD14noCD16"))
datatable(res %>% as.data.frame() %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% round(3), filter='top')
DoHeatmap(so.mono, features=res %>% as.data.frame() %>% filter(padj < 0.05) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% rownames(), group.by='annotation', slot='counts')
res <- results(dds, contrast=c("cd14v16", "noCD14butCD16", "noCD14noCD16"))
res$padj[is.na(res$padj)] = 1
volcan(res)
datatable(res %>% as.data.frame() %>% filter(padj < 0.05) %>% select(log2FoldChange, padj) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% round(3), filter='top')
DoHeatmap(so.mono, features=res %>% as.data.frame() %>% filter(padj < 0.05) %>% arrange(-log2FoldChange) %>% top_n(20, abs(log2FoldChange)) %>% rownames(), group.by='annotation', slot='counts')
Center for Synthetic Microbiology (SYNMIKRO), Philipps-Universität Marburg, clemens@thoelken.org↩︎
Faculty of Medicine, Institute of Pharmacology, University of Marburg, 35043 Marburg, Germany↩︎
Faculty of Medicine, Institute of Pharmacology, University of Marburg, 35043 Marburg, Germany, Max-Planck-Institute for Heart and Lung Research, 61231 Bad Nauheim, Germany↩︎