当前位置：首页 > news >正文

【R】meme格式绘制logo图

news 2026/7/13 6:38:42

一、目标

将meme格式转化成seqlog

二、seqlog

出图不完整

# 1. 创建全新的环境（避免任何冲突） conda deactivate conda create -n ggmotif_fresh -c conda-forge r-base=4.2.3 # 2. 激活新环境 conda activate ggmotif_fresh R if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("seqLogo") # 加载seqLogo library(seqLogo) # 从MEME格式文件提取PFM并绘图 # 你需要先将MEME格式转换为位置频率矩阵 # 这里是一个简单示例 pfm <- matrix(c( 10, 12, 4, 14, 2, 16, 2, 10, 18, 0, 0, 2, 0, 0, 0, 20, 4, 6, 10, 0 ), nrow=4, byrow=TRUE, dimnames=list(c("A", "C", "G", "T"))) # 转换为PWM并绘图 pwm <- pfm / colSums(pfm) seqLogo(pwm) # 保存图片 png("motif_logo.png", width=1000, height=1000) seqLogo(pwm) dev.off()

二、ggseqlog

1、单个motif

R install.packages("ggseqlogo") library(ggseqlogo) library(ggplot2) # 读取MEME文件 # 假设你的MEME文件名为 "motifs.meme" 或 "meme.txt" meme_file <- "/root/test/motifs_results/pwms/xylem_motifs.meme" # 替换为你的文件路径 # 函数：从MEME文件读取概率矩阵 read_meme_matrix <- function(meme_file, motif_num = 1) { # 读取文件 lines <- readLines(meme_file) # 找到motif开始的标志 motif_start <- grep("letter-probability matrix:", lines) if(length(motif_start) < motif_num) { stop(paste("文件只有", length(motif_start), "个motif")) } # 定位到指定的motif start_line <- motif_start[motif_num] # 获取矩阵维度信息 matrix_info <- lines[start_line] # 解析w=后面的数字（宽度） w <- as.numeric(sub(".*w=\\s*(\\d+).*", "\\1", matrix_info)) # 读取矩阵数据 matrix_lines <- lines[(start_line + 1):(start_line + w)] # 解析每一行 matrix_data <- do.call(rbind, lapply(matrix_lines, function(line) { as.numeric(strsplit(trimws(line), "\\s+")[[1]]) })) # 转置为4行（A,C,G,T）x w列的矩阵 # 通常MEME格式每行是A,C,G,T的概率 if(ncol(matrix_data) == 4) { # 已经是每行一个位置，列是碱基 matrix_data <- t(matrix_data) } # 设置行名 rownames(matrix_data) <- c("A", "C", "G", "T") return(matrix_data) } # 读取第一个motif motif_matrix <- read_meme_matrix(meme_file, motif_num = 1) # 绘制logo p <- ggseqlogo(motif_matrix) + theme_classic() + labs(title = "Motif from MEME file", x = "Position", y = "Information content (bits)") + scale_x_continuous(breaks = 1:ncol(motif_matrix)) # 保存 ggsave("meme_motif_logo.png", plot = p, width = 10, height = 4, dpi = 300)

2、绘制多个motif并排版

重新编号输出

library(ggseqlogo) library(ggplot2) library(patchwork) # 用于排版 # 定义读取MEME文件的函数（方法2的改进版） read_meme_matrix <- function(meme_file, motif_num = 1) { # 读取文件 lines <- readLines(meme_file) # 找到所有motif开始的标志 motif_start <- grep("letter-probability matrix:", lines) if(length(motif_start) < motif_num) { stop(paste("文件只有", length(motif_start), "个motif")) } # 定位到指定的motif start_line <- motif_start[motif_num] # 获取矩阵维度信息 matrix_info <- lines[start_line] # 解析w=后面的数字（宽度） w <- as.numeric(sub(".*w=\\s*(\\d+).*", "\\1", matrix_info)) # 读取矩阵数据 matrix_lines <- lines[(start_line + 1):(start_line + w)] # 解析每一行 matrix_data <- do.call(rbind, lapply(matrix_lines, function(line) { as.numeric(strsplit(trimws(line), "\\s+")[[1]]) })) # 转置为4行（A,C,G,T）x w列的矩阵 if(ncol(matrix_data) == 4) { matrix_data <- t(matrix_data) } # 设置行名 rownames(matrix_data) <- c("A", "C", "G", "T") return(matrix_data) } # 主程序 meme_file <- "/root/test/motifs_results/pwms/xylem_motifs.meme" # 替换为你的文件路径 # 第一步：获取文件中的motif总数 lines <- readLines(meme_file) motif_starts <- grep("letter-probability matrix:", lines) total_motifs <- length(motif_starts) cat(paste("找到", total_motifs, "个motif\n")) # 第二步：循环读取所有motif并分别保存 motif_matrices <- list() # 存储所有矩阵 for(i in 1:total_motifs) { cat(paste("处理第", i, "个motif...\n")) # 读取矩阵 motif_matrix <- read_meme_matrix(meme_file, motif_num = i) motif_matrices[[i]] <- motif_matrix # 创建单独的logo图 p <- ggseqlogo(motif_matrix) + theme_classic() + labs(title = paste("Motif", i), x = "Position", y = "Information content (bits)") + scale_x_continuous(breaks = 1:ncol(motif_matrix)) + theme( plot.title = element_text(hjust = 0.5, size = 14, face = "bold"), axis.title = element_text(size = 10), axis.text = element_text(size = 9) ) # 保存单个motif图片 ggsave(paste0("motif_", i, "_individual.png"), plot = p, width = 8, height = 3, dpi = 300) cat(paste(" 已保存: motif_", i, "_individual.png\n", sep="")) } # 第三步：创建排版图形 ## 方法A：使用patchwork包（推荐，更灵活） if(total_motifs > 0) { # 创建所有motif的图形列表 plot_list <- list() for(i in 1:total_motifs) { p <- ggseqlogo(motif_matrices[[i]]) + theme_classic() + labs(title = paste("Motif", i), x = "Position", y = "Bits") + scale_x_continuous(breaks = 1:ncol(motif_matrices[[i]])) + theme( plot.title = element_text(hjust = 0.5, size = 12, face = "bold"), axis.title = element_text(size = 9), axis.text = element_text(size = 8), plot.margin = margin(5, 5, 5, 5) ) plot_list[[i]] <- p } # 决定排版布局 if(total_motifs <= 3) { # 少数motif用一行显示 combined_plot <- wrap_plots(plot_list, ncol = total_motifs) + plot_annotation( title = "All Motifs from MEME Analysis", theme = theme( plot.title = element_text(hjust = 0.5, size = 16, face = "bold"), plot.margin = margin(10, 10, 10, 10) ) ) height <- 4 width <- 4 * total_motifs } else { # 多个motif用多行显示 ncol <- ceiling(sqrt(total_motifs)) nrow <- ceiling(total_motifs / ncol) combined_plot <- wrap_plots(plot_list, ncol = ncol) + plot_annotation( title = "All Motifs from MEME Analysis", theme = theme( plot.title = element_text(hjust = 0.5, size = 16, face = "bold"), plot.margin = margin(10, 10, 10, 10) ) ) height <- 3 * nrow width <- 4 * ncol } # 保存排版后的图片 ggsave("all_motifs_patchwork.png", plot = combined_plot, width = width, height = height, dpi = 300, limitsize = FALSE) # 允许大尺寸 cat(paste("\n已保存排版图片: all_motifs_patchwork.png (", width, "x", height, "英寸)\n", sep="")) } # 第四步：生成汇总报告（可选） cat("\n========== 汇总报告 ==========\n") cat(paste("总motif数:", total_motifs, "\n")) for(i in 1:total_motifs) { cat(paste("Motif", i, ": 长度 =", ncol(motif_matrices[[i]]), "bp\n")) } cat("==============================\n") # 第五步：如果你想自定义每个motif的颜色 cat("\n是否要自定义颜色？输入数字选择方案 (1-4)，或直接运行默认方案:\n") cat("1: 经典颜色 (A=绿色, C=蓝色, G=橙色, T=红色)\n") cat("2: 紫色系\n") cat("3: 暖色系\n") cat("4: 冷色系\n") # 如果你想要自定义颜色，可以运行这部分 customize_colors <- function(choice = 1) { if(choice == 1) { # 经典颜色 col_scheme <- list(A = "green", C = "blue", G = "orange", T = "red") } else if(choice == 2) { # 紫色系 col_scheme <- list(A = "#9e9ac8", C = "#6a51a3", G = "#cbc9e2", T = "#3f007d") } else if(choice == 3) { # 暖色系 col_scheme <- list(A = "#fd8d3c", C = "#fc4e2a", G = "#e31a1c", T = "#bd0026") } else if(choice == 4) { # 冷色系 col_scheme <- list(A = "#74c476", C = "#31a354", G = "#006d2c", T = "#00441b") } # 应用颜色重新绘制 custom_plots <- list() for(i in 1:total_motifs) { p <- ggseqlogo(motif_matrices[[i]], col_scheme = col_scheme) + theme_classic() + labs(title = paste("Motif", i), x = "Position", y = "Bits") custom_plots[[i]] <- p } # 保存自定义颜色的排版 combined_custom <- wrap_plots(custom_plots, ncol = ifelse(total_motifs <= 3, total_motifs, 2)) ggsave("all_motifs_custom_colors.png", combined_custom, width = 12, height = 4*ceiling(total_motifs/3), dpi = 300) cat("已保存自定义颜色版本: all_motifs_custom_colors.png\n") } # 取消下面一行的注释来使用自定义颜色（修改数字选择方案） # customize_colors(1)

按原名称保存

library(ggseqlogo) library(ggplot2) library(patchwork) # 改进的读取函数：同时返回矩阵和motif名称 read_meme_matrix_with_name <- function(meme_file, motif_num = 1) { lines <- readLines(meme_file) # 找到所有MOTIF行和矩阵开始的位置 motif_lines <- grep("^MOTIF", lines) motif_start <- grep("letter-probability matrix:", lines) if(length(motif_start) < motif_num) { stop(paste("文件只有", length(motif_start), "个motif")) } # 获取motif名称 motif_name_line <- lines[motif_lines[motif_num]] # 提取MOTIF后面的名称（通常是第二个字段） motif_name <- strsplit(motif_name_line, "\\s+")[[1]][2] # 定位到矩阵开始 start_line <- motif_start[motif_num] # 获取矩阵维度 matrix_info <- lines[start_line] w <- as.numeric(sub(".*w=\\s*(\\d+).*", "\\1", matrix_info)) # 读取矩阵数据 matrix_lines <- lines[(start_line + 1):(start_line + w)] matrix_data <- do.call(rbind, lapply(matrix_lines, function(line) { as.numeric(strsplit(trimws(line), "\\s+")[[1]]) })) if(ncol(matrix_data) == 4) matrix_data <- t(matrix_data) rownames(matrix_data) <- c("A", "C", "G", "T") # 返回列表，包含矩阵和名称 return(list( matrix = matrix_data, name = motif_name )) } # 主程序 meme_file <- "你的MEME文件.meme" # 替换为你的文件路径 # 获取基本信息 lines <- readLines(meme_file) motif_lines <- grep("^MOTIF", lines) total_motifs <- length(motif_lines) cat(paste("找到", total_motifs, "个motif\n\n")) # 循环读取所有motif并保存 matrices <- list() motif_names <- c() for(i in 1:total_motifs) { # 读取矩阵和名称 result <- read_meme_matrix_with_name(meme_file, i) matrices[[i]] <- result$matrix motif_names <- c(motif_names, result$name) cat(paste("处理 Motif", i, ":", result$name, "\n")) cat(paste(" 长度:", ncol(result$matrix), "bp\n")) # 创建安全的文件名（替换特殊字符） safe_name <- gsub("[^A-Za-z0-9_-]", "_", result$name) # 创建单独的logo图 p <- ggseqlogo(result$matrix) + theme_classic() + labs(title = result$name, x = "Position", y = "Information content (bits)") + scale_x_continuous(breaks = 1:ncol(result$matrix)) + theme( plot.title = element_text(hjust = 0.5, size = 14, face = "bold"), axis.title = element_text(size = 10), axis.text = element_text(size = 9) ) # 使用motif名称保存文件 filename <- paste0(safe_name, "_logo.png") ggsave(filename, plot = p, width = 8, height = 3, dpi = 300) cat(paste(" 已保存:", filename, "\n\n")) } # 创建所有motif的排版图 plot_list <- list() for(i in 1:total_motifs) { p <- ggseqlogo(matrices[[i]]) + theme_classic() + labs(title = motif_names[i], x = "Position", y = "Bits") + scale_x_continuous(breaks = 1:ncol(matrices[[i]])) + theme( plot.title = element_text(hjust = 0.5, size = 10, face = "bold"), axis.title = element_text(size = 8), axis.text = element_text(size = 7), plot.margin = margin(5, 5, 5, 5) ) plot_list[[i]] <- p } # 智能布局排版 if(total_motifs <= 3) { # 少数motif用一行 combined_plot <- wrap_plots(plot_list, ncol = total_motifs) + plot_annotation( title = "All Motifs from MEME Analysis", theme = theme( plot.title = element_text(hjust = 0.5, size = 16, face = "bold") ) ) width <- 4 * total_motifs height <- 4 } else if(total_motifs <= 6) { # 中等数量用2行 ncol <- ceiling(total_motifs / 2) combined_plot <- wrap_plots(plot_list, ncol = ncol) + plot_annotation( title = "All Motifs from MEME Analysis", theme = theme( plot.title = element_text(hjust = 0.5, size = 16, face = "bold") ) ) width <- 4 * ncol height <- 8 } else { # 大量motif用网格布局 ncol <- ceiling(sqrt(total_motifs)) combined_plot <- wrap_plots(plot_list, ncol = ncol) + plot_annotation( title = "All Motifs from MEME Analysis", theme = theme( plot.title = element_text(hjust = 0.5, size = 16, face = "bold") ) ) width <- 4 * ncol height <- 3 * ceiling(total_motifs / ncol) } # 保存排版图 ggsave("all_motifs_combined.png", plot = combined_plot, width = width, height = height, dpi = 300, limitsize = FALSE) # 生成汇总文件（包含motif名称和长度信息） summary_file <- "motif_summary.txt" sink(summary_file) cat("MEME Motif Summary\n") cat("==================\n\n") cat(paste("总motif数:", total_motifs, "\n")) cat(paste("分析时间:", date(), "\n\n")) cat("详细信息:\n") cat("---------\n") for(i in 1:total_motifs) { cat(sprintf("%2d. %-20s 长度: %2d bp 文件: %s_logo.png\n", i, motif_names[i], ncol(matrices[[i]]), gsub("[^A-Za-z0-9_-]", "_", motif_names[i]))) } sink() cat("\n========== 完成！ ==========\n") cat(paste("生成了", total_motifs, "个单独的logo图\n")) cat(paste("排版图: all_motifs_combined.png\n")) cat(paste("汇总文件: motif_summary.txt\n")) cat("============================\n") # 可选：显示所有motif名称 cat("\nMotif名称列表:\n") for(i in 1:total_motifs) { cat(sprintf(" %2d: %s\n", i, motif_names[i])) }

查看全文

http://www.jsqmd.com/news/492022/