写在开头
上周基于文章给的代码以及自己的尝试,复现了一下文章中 FeaturePlot 批量绘图与美化实战
也到了我看上的Figure1D啦! 用热图的方式可视化了top20的marker基因,也结合了相对应的GO富集分析结果图。
那就一起来看看如何得到这个好看的图叭!
读取数据及获取marker基因
首先把 注释信息以及降维聚类分群的结果数据读取进来,并且将注释的metadata信息匹配到降维聚类结果中
###加载包和数据----
rm(list=ls())
options(stringsAsFactors = F)
source('scRNA_scripts/lib.R')
load("sce.all.Rdata")
sce.all.int = readRDS('2-harmony/sce.all_int.rds')
sce.all.int@meta.data = sce.all
sel.clust = "celltype"
sce.all.int <- SetIdent(sce.all.int, value = sel.clust)
table(sce.all.int@active.ident)
marker基因获取: 使用FindAllMarker计算全部细胞亚群的marker基因,然后根据文章图表结果,提取前20的marker基因
#使用FindAllMarker计算marker基因,并提取top20基因----
cell_marker <- FindAllMarkers(sce.all.int, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25, verbose = FALSE)
top = cell_marker %>% group_by(cluster) %>% top_n(n = 20, wt = avg_log2FC )
文章代码简析
照例文章也是给了Figure1D的实现代码,但是没有采用文章的分析代码,原因如下:
-
由于在分析过程中,文章使用的是SCT的处理方式,而复现的时候数据分析用的是三步法,注释分群信息也有少许区别,所以结果数据不完全一致
-
根据文章代码提取数据可视化没有得到类似的结果,所以进行了调整
######## Figure 1D
top = cell_marker %>% group_by(cluster) %>% top_n(n = 20, wt = avg_log2FC )
top = rbind(filter(top,cluster == 'T cell'),filter(top,cluster == 'Myeloid'),filter(top,cluster == 'B cell'),filter(top,cluster == 'Endo'),
filter(top,cluster == 'Mesenchyme'),filter(top,cluster == 'Epi'),filter(top,cluster == 'Malignant'))
exp = AverageExpression(scdata4,group.by = 'celltype2',features = top$gene)
scdata4$celltype2 = factor(scdata4$celltype2,levels = c('T cell','Myeloid','B cell','Endo','Mesenchyme','Epi','Malignant'))
exp = exp$SCT
exp = exp[,c('T cell','Myeloid','B cell','Endo','Mesenchyme','Epi','Malignant')]
bk <- c(seq(-2,2,by=0.01))
color = c(colorRampPalette(colors = c("white","#FEF9F7"))(length(bk)/2),colorRampPalette(colors = c("#FEF9F7","#FB6848"))(length(bk)/2))
pheatmap(exp, fontsize=6,
color = color,scale = 'row',
border_color = "grey60",border=T,
cluster_cols = F, cluster_rows = F,
show_rownames = F, show_colnames = T)
library(clusterProfiler)
res1 = bitr(top$gene,fromType= "SYMBOL" ,toType="ENTREZID",OrgDb="org.Hs.eg.db")
rownames(res1) = res1$SYMBOL
kk <- enrichKEGG(res1[filter(top,cluster == 'T cell')$gene ,2], pvalueCutoff = 0.05,qvalueCutoff = 0.1)
ego_BP = data.frame()
for (i in as.character(unique(top$cluster))) {
ego_BP2 <- enrichGO(filter(top,cluster == i)$gene,
OrgDb = org.Hs.eg.db,ont = "BP", keyType = 'SYMBOL',
pAdjustMethod = "BH",pvalueCutoff = 0.05,qvalueCutoff = 0.1,readable = FALSE)
ego_BP2@result$celltype = i
ego_BP = rbind(ego_BP,ego_BP2@result[1:10,])
}
ego_subset = ego_BP[c('GO:0140131','GO:0035747','GO:0010820','GO:0019886','GO:0060333','GO:0043312','GO:0050864','GO:0050853',
'GO:0042113','GO:0010739','GO:0042118','GO:004544','GO:0006936','GO:0033275','GO:0030239','GO:0070268',
'GO:0097284','GO:0043588', 'GO:0002576','GO:0034375','GO:0072376'),c(2,5,6,7,8,9,10)]
ego_subset$log10 = -log10(ego_subset$p.adjust)
ego_subset$Description = factor(ego_subset$Description,levels = rev(ego_subset$Description))
ggplot(ego_subset, aes(x=Description, y=log10,fill = Count)) +
geom_bar(position = "dodge",stat = "identity",colour="black",width= 0.6)+coord_flip()+
scale_fill_gradient2(low = "#EFAE95", high = "#F37132")
复现过程及代码简析
top20基因热图绘制
因为基于文章的代码没有得到理想的结果,所以还是结合了top20基因的dotplot结果数据进行了可视化
##气泡图联动热图
top_dotplot <- DotPlot(sce.all.int, features = top$gene)+
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))
top_dotplot
# 假设数据存储在 dotplot_data 中
dotplot_data <- top_dotplot$data
class(dotplot_data)
#将数据转换为宽格式,以 `avg.exp.scaled` 为数值
library(tidyr)
library(dplyr)
heatmap_data <- dotplot_data %>%
select(features.plot, id, avg.exp.scaled) %>%
pivot_wider(names_from = features.plot, values_from = avg.exp.scaled)
#将细胞分群ID列设置为行名
library(tibble)
heatmap_data = column_to_rownames(heatmap_data,var = "id")
heatmap_data = t(heatmap_data)
# 构建细胞亚群注释
cell_types <- data.frame(
CellType = colnames(heatmap_data)
)
rownames(cell_types) <- colnames(heatmap_data)
# 设定颜色映射(根据你图片的色调调整)
ann_colors <- list(
CellType = c(
"Tcell" = "#E88C28", # 橙
"myeloids" = "#E2511D", # 深橙
"Bcell" = "#EAD8BC", # 浅米色
"endothelial" = "#8BC6B6", # 青绿色
"myofibroblasts" = "#3BA935", # 草绿色
"macrophages" = "#D1796A", # 土红色
"epithelial" = "#DCAAC2", # 粉灰色
"cycle" = "#F09BA0", # 淡粉红
"mast" = "#E3A088", # 橙粉色
"hepatocytes" = "#F73C2E" # 红色
)
)
# 色带
bk <- seq(-2, 2, by = 0.01)
color <- c(
colorRampPalette(colors = c("white", "#FEF9F7"))(length(bk) / 2),
colorRampPalette(colors = c("#FEF9F7", "#FB6848"))(length(bk) / 2)
)
library(pheatmap)
# 绘制热图
pdf("pheatmap_top20.pdf", width = 10, height = 14)
pheatmap(
heatmap_data,
fontsize = 6,
color = color,
scale = 'row',
#border_color = "grey60",
cluster_cols = FALSE,
cluster_rows = FALSE,
show_rownames = TRUE,
show_colnames = TRUE,
fontsize_row = 4,
fontsize_col = 12,
angle_col = "45",
annotation_col = cell_types,
annotation_colors = ann_colors,
legend = T
)
dev.off()
代码简析:
基因富集分析及可视化
#绘制富集分析结果图----
library(clusterProfiler)
res1 = bitr(top$gene,fromType= "SYMBOL" ,toType="ENTREZID",OrgDb="org.Hs.eg.db")
rownames(res1) = res1$SYMBOL
kk <- enrichKEGG(res1[filter(top,cluster == 'Tcell')$gene ,2], pvalueCutoff = 0.05,qvalueCutoff = 0.1)
ego_BP = data.frame()
library(org.Hs.eg.db)
for (i in as.character(unique(top$cluster))) {
ego_BP2 <- enrichGO(filter(top,cluster == i)$gene,
OrgDb = org.Hs.eg.db,ont = "BP", keyType = 'SYMBOL',
pAdjustMethod = "BH",pvalueCutoff = 0.05,qvalueCutoff = 0.1,readable = FALSE)
ego_BP2@result$celltype = i
ego_BP = rbind(ego_BP,ego_BP2@result[1:10,])
}
# 获取所有需要的细胞类型
celltypes <- unique(ego_BP$celltype)
# 初始化一个空的数据框来存储所有细胞类型前三的结果
top3_all_celltypes <- data.frame()
# 对每个细胞类型进行处理
for (celltype in celltypes) {
# 筛选出当前细胞类型的数据
celltype_results <- ego_BP[ego_BP$celltype == celltype, ]
# 根据p值进行排序并选择前三行
top3_results <- celltype_results %>%
arrange(p.adjust) %>%
head(3)
# 将结果添加到总的数据框中
top3_all_celltypes <- rbind(top3_all_celltypes, top3_results)
}
# 计算-log10调整后的p值
top3_all_celltypes$log10 = -log10(top3_all_celltypes$p.adjust)
# 设置GO条目的显示顺序
top3_all_celltypes$Description = factor(top3_all_celltypes$Description, levels = rev(top3_all_celltypes$Description))
# 绘制条形图
ggplot(top3_all_celltypes, aes(x=Description, y=log10, fill=celltype)) +
geom_bar(stat="identity", position="dodge") +
coord_flip() +
scale_fill_manual(values = ann_colors$CellType) + # 使用手动设置的颜色
theme_minimal() +
labs(x = "Enriched GO terms", y = "-log10(adj.p.value)", fill = "Cell Type") +
theme(legend.position = "bottom")
ggsave("Enriched_GO_terms.pdf", width = 10, height = 14, units = "in")
使用AI进行拼图得到最终结果
不足之处:
-
最开始拼图的时候使用的是带有灰色背景的热图,后续使用代码去除掉了,大家可以使用没有灰色背景的内容
-
GO条码的注释信息,因为有些较长,所以没有能够完全排列整齐,大家可以使用A3画板拼图试试
-
没有给细胞信息也修改颜色(偷懒了),确实使用AI改色也比较方便,大家可以试试看
文末友情宣传
如果你也想做单细胞转录组数据分析,最好是有自己的计算机资源哦,比如我们的 满足你生信分析计算需求的低价解决方案 ,而且还需要有基本的生物信息学基础,也可以看看我们的 生物信息学马拉松授课 ,你的生物信息学入门课。
2025年也会继续学习分享单细胞内容,并且组建了交流群—— 承包你2025全部的单细胞转录组降维聚类分群 ,欢迎一起讨论交流学习!