首页> 关于我们 >新闻中心>技术分享>新闻详情

单细胞高分图片复现(独家代码附上)之系列(三): 高分主Fig-富集分析圈图

2025-05-28

GO/KEGG富集,每个人都做过,但富集结果中有很多条目,每个条目中又有很多参数需要展示(比如富集到的基因数,P值,richfactor等),如果用常规的Barplot, Dotplot来展示的话,就需要多张图片分别展示部分信息,就会显得图片比较繁杂。那有没有能一张图就展示完所有上述参数信息,又比较美观的图呢?有的兄弟,有的。富集分析圈图就是你要找的图。

图1 富集分析圈图

图1中展示的就是富集分析圈图,由4层环构成,圆圈中心和右侧是各层的图例。从内圈到外分别展示了:

1.richfactor: 最圈代表富集倍数,在0-1范围内取值,值越大格数越多,颜色代表GO/KEGG类别;

2. 分组基因数:第二圈颜色代表上下调基因,数字代表基因数量;

3. 富集到的基因数与P值: 第三圈代表-log10(pvalue)值,颜色深浅代表值的高低;

4. 通路id及分类:第四圈代表GO/KEGG条目ID,颜色代表GO/KEGG类别。

下面,我们来一层一层地画这张图。

1.数据准备:从富集结果中提取每个Term生成下面这张表格,在R中读取它

2.第一圈:我们根据表格中的通路分类给通路涂上不同的颜色,并且用通路总基因数给圈图加上坐标。

library(circlize)

library(grid)

library(gridBase)

library(graphics)

library(ComplexHeatmap)

circlize_df <-read.csv(‘富集结果表格’)

circlize_df$Category_color<-color[as.numeric(factor(circlize_df$Category))]

rownames(circlize_df)<-circlize_df$GO

circlize_df$gene_num.min <- 0

circlize_df$gene_num.rich <- circlize_df$Total

circlize_df$gene_num.max <- max(circlize_df$gene_num.rich)

circlize_df$stat_val <- -log10(circlize_df$adjustPvalue)

title_second_data <- "-Log10(adjust.Pvalue)"

circle_size <- unit(1, "snpc")

circos.par(gap.degree = 0.5, start.degree = 90)

plot_data <- circlize_df[, c("GO", "gene_num.min", "gene_num.max")]

plot.new()

pushViewport(viewport(

x = 0, y = 0.5, width = circle_size, height = circle_size,

just = c("left", "center")

))

par(omi = gridOMI(), new = TRUE)

circos.genomicInitialize(plot_data, plotType = NULL)

circos.track(

ylim = c(0, 1), track.margin = c(0, 0), track.height = 0.08, bg.border = NA, bg.col = circlize_df$Category_color,

panel.fun = function(x, y) {

ylim <- get.cell.meta.data("ycenter")

xlim <- get.cell.meta.data("xcenter")

sector.name <- get.cell.meta.data("sector.index")

circos.axis(h = "top", labels.cex = 0.6, labels.pos.adjust = T, labels.col = "#000000", labels.niceFacing = FALSE)

circos.text(xlim, ylim, sector.name, cex = 0.6, col = "#000000", niceFacing = FALSE)

}

)

图2 富集分析圈图的第一圈

3.第二圈:绘制富集的基因和富集p值。

plot_data <- circlize_df[, c("GO", "gene_num.min", "gene_num.rich", "stat_val")]

label_data <- circlize_df[, "gene_num.rich"]

label_data <- as.data.frame(label_data)

rownames(label_data) <- circlize_df$GO

p_max <- round(max(circlize_df[["stat_val"]])) + 1

colorsChoice <- colorRampPalette(second_col)

color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(

plot_data,

track.margin = c(0, 0), track.height = 0.1, bg.border = NA, stack = TRUE,

panel.fun = function(region, value, ...) {

circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)

ylim <- get.cell.meta.data("ycenter")

xlim <- label_data[get.cell.meta.data("sector.index"), 1] / 2

sector.name <- label_data[get.cell.meta.data("sector.index"), 1]

circos.text(xlim, ylim, sector.name, cex = 0.8, col = "#000000", niceFacing = FALSE)

}

)

图3 添加富集分析圈图的第二圈

4.第三圈:分组基因数。这里我们用上下调基因作为分组。

circlize_df$up.proportion <- circlize_df$Up / (circlize_df$Up+circlize_df$Down)

circlize_df$down.proportion <- circlize_df$Down / (circlize_df$Up+circlize_df$Down)

circlize_df$up_v <- circlize_df$up.proportion * circlize_df$gene_num.max

plot_data_up <- circlize_df[, c("GO", "gene_num.min", "up_v")]

names(plot_data_up) <- c("id", "start", "end")

plot_data_up$type <- 1

circlize_df$down_v <- circlize_df$down.proportion * circlize_df$gene_num.max + circlize_df$up_v

plot_data_down <- circlize_df[, c("GO", "up_v", "down_v")]

names(plot_data_down) <- c("id", "start", "end")

plot_data_down$type <- 2

plot_data <- rbind(plot_data_up, plot_data_down)

label_data <- circlize_df[, c("up_v", "down_v", "Up", "Down")]

label_data <- as.data.frame(label_data)

rownames(label_data) <- circlize_df$GO

color_assign <- colorRamp2(breaks = c(1, 2), col = c(upcol, downcol))

circos.genomicTrackPlotRegion(

plot_data,

track.margin = c(0, 0), track.height = 0.1, bg.border = NA, stack = TRUE,

panel.fun = function(region, value, ...) {

circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)

ylim <- get.cell.meta.data("cell.bottom.radius") - 0.5

xlim <- label_data[get.cell.meta.data("sector.index"), 1] / 2

sector.name <- ifelse(label_data[get.cell.meta.data("sector.index"), 3] > 0, label_data[get.cell.meta.data("sector.index"), 3], " ")

circos.text(xlim, ylim, sector.name, cex = 0.6, col = "#000000", niceFacing = FALSE)

xlim <- (label_data[get.cell.meta.data("sector.index"), 2] + label_data[get.cell.meta.data("sector.index"), 1]) / 2

sector.name <- ifelse(label_data[get.cell.meta.data("sector.index"), 4] > 0, label_data[get.cell.meta.data("sector.index"), 4], " ")

circos.text(xlim, ylim, sector.name, cex = 0.6, col = "#000000", niceFacing = FALSE)

}

)

图4 添加富集分析圈图的第三圈

5.第四圈:绘制富集因子。

plot_data <- circlize_df[, c("GO", "gene_num.min", "gene_num.max", "RichFactor")]

label_data <- circlize_df[, c("Category", "RichFactor")]

label_data$RichFactor <- round(label_data$RichFactor, 2)

label_data <- as.data.frame(label_data)

rownames(label_data) <- circlize_df$GO

circos.genomicTrack(

plot_data,

track.margin = c(0.01, 0.04), track.height = 0.3, ylim = c(0.05, 0.95), bg.col = "gray95", bg.border = NA, # ylim = c(0, 1)

panel.fun = function(region, value, ...) {

sector.name <- get.cell.meta.data("sector.index")

for (i in seq(0.1, 0.9, by = 0.1)) {

circos.lines(c(0, max(region)), c(i, i), col = "gray", lwd = 0.3)

}

circos.genomicRect(region, value, col = circlize_df[sector.name,'Category_color'], border = NA, ytop.column = 1, ybottom = 0, ...)

ylim <- label_data[get.cell.meta.data("sector.index"), 2] - 0.05

xlim <- get.cell.meta.data("xcenter")

sector.name <- label_data[sector.name, 2]

circos.text(xlim, ylim, sector.name, cex = 0.8, col = "#000000", niceFacing = FALSE)

}

)

图5 添加富集分析圈图的第四圈

6.最后一步:绘制图例。

updown_legend <- Legend(

labels = c("Number of Genes", "Up-regulated", "Down-regulated", "Rich Factor(0-1)"),

graphics = list(

function(x = 0, y, w, h) {

grid.draw(gTree(

children = gList(

rectGrob(x = 0, y, w * 2.2, h * 1, gp = gpar(fill = "gray80", col = "gray80")),

textGrob("100", x = 0, y)),

gp = gpar(col = "black", fontsize = 10, fontface = "bold")))},

function(x = 0, y, w, h) {

grid.rect(x = 0, y, w * 2.2, h * 1, gp = gpar(fill = upcol, col = upcol))},

function(x = 0, y, w, h) {

grid.rect(x = 0, y, w * 2.2, h * 1, gp = gpar(fill = downcol, col = downcol))},

function(x = 0, y, w, h) {

grid.polygon(y = c(0.09, -0.05, -0.12, 0.16), x = c(-0.13, -0.13, 0.13, 0.13), gp = gpar(fill = "gray85", col = "gray85"))}),

labels_gp = gpar(fontsize=10), # grid_height = unit(0.6, 'cm'), grid_width=unit(0.8, 'cm'),

row_gap = unit(2, "mm"))

category_legend <- Legend(

labels = unique(circlize_df$Category),

type = "points", pch = NA, background = unique(circlize_df$Category_color),

labels_gp = gpar(fontsize = 10), row_gap = unit(1, "mm")

) # grid_height = unit(0.6, 'cm'), grid_width = unit(0.8, 'cm'))

pvalue_legend <- Legend(

col_fun = colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1)),

legend_height = unit(3, "cm"), labels_gp = gpar(fontsize = 10),

title_gp = gpar(fontsize = 10), title_position = "topleft", title = paste0(title_second_data, "\n"), row_gap = unit(3, "mm"))

lgd_list_vertical <- packLegend(updown_legend)

lgd_list_vertical2 <- packLegend(category_legend, pvalue_legend)

draw(lgd_list_vertical, just = "center")

upViewport()

draw(lgd_list_vertical2, x = circle_size, just = "left")

应用场景与注意事项

1.适用场景:

可用于解析生物学机制,如揭示疾病、发育等过程的关键通路;注释细胞亚群功能,明确其特性;筛选生物标志物;辅助药物靶点发现与重定位;整合多组学数据及指导实验验证方向等,助力生物医学研究。

2.可视化调整:

可根据研究重点调整颜色方案(如按通路类别分组着色)、过滤低富集因子的条目,或添加热图刻度条增强可读性。

总 结

富集圈图通过多维度信息的空间整合,为复杂的基因功能分析提供了直观的解读框架,助力研究者快速定位关键通路与核心基因,推动单细胞转录组数据的深度挖掘。绘制图片或者复现代码过程中,如果老师遇到疑惑,欢迎拨打我们的热线电话或者联系我们的驻地销售。