当前位置: 首页 > news >正文

样本TCR库相似性计算Morisita–Horn

样本TCR Morisita–Horn聚类热图

 

image

 

样本TCR杰卡德聚类热图

 

#TCR库在同一个样本不同区域的相似度能够达到0.7这样,但是在不同样本之间的相似度还是较低,但是也可以看到会有聚类簇出现的情况,效果没有杰卡德好

#Morisita–Horn可以运用vegan 包计算,也可以运用horn_morisita计算

R: The Morisita index and Horn-Morisita index

 

(91 封私信 / 84 条消息) 生信分析 8+T细胞受体多样性分析 - 知乎

1、数据准备

 

image

 

2、代码

 

####################数据读取#########################

####加载R包
library(dplyr)
library(ggplot2)
library(ComplexHeatmap)
library(circlize)
library(data.table)
library(readxl)

####构建函数

####数据读取

####WGD信息
WGD<-read.table("E:\\课题\\胰腺癌\\1-数据处理\\WES\\13-WGD\\4-结果整合\\0-SampleWGD.txt",header = T,sep = "\t",stringsAsFactors = F,quote = "",fill = T,check.names = F)
colnames(WGD)[1]<-"sample"
WGD<-WGD[,c("sample","WGD")]

####KRAS信息
KRAS<-read.table("E:\\课题\\胰腺癌\\1-数据处理\\WES\\10-突变过滤\\4-结果整合\\3-non-synonymous-mutation.txt",header = T,sep = "\t",stringsAsFactors = F,quote = "",fill = T,check.names = F)
KRAS<-KRAS %>% select(patientID,geneSymbol) %>% unique()
colnames(KRAS)[1]<-"sample"
KRAS_sample<-unique(KRAS$sample)
KRAS<-unique(KRAS$sample[which(KRAS$geneSymbol=="KRAS")])
KRAS_sample<-c(KRAS_sample,setdiff(WGD$sample,KRAS_sample))

####原发复发
Recurrence <-read_xlsx("E:\\课题\\胰腺癌\\0-原始数据\\1-临床信息\\胰腺癌数据编号及临床信息-20250912.xlsx",sheet = 2)
colnames(Recurrence)<-Recurrence[1,]
Recurrence<-Recurrence[-1,]
Recurrence<-Recurrence %>% select(patientID,Recurrence)
Recurrence<-Recurrence[which(Recurrence$Recurrence!="2"),]
colnames(Recurrence)[1]<-"sample"
Recurrence$Recurrence<-gsub("本次入院即为胰腺癌肝转移",1,Recurrence$Recurrence)
Recurrence$Recurrence<-gsub("1,入院时已经是胰腺癌肝转移",1,Recurrence$Recurrence)
Recurrence$Recurrence<-ifelse(Recurrence$Recurrence=="1","Recurrence","Primary")

####TNM分期
TNM <-read_xlsx("E:\\课题\\胰腺癌\\0-原始数据\\1-临床信息\\胰腺癌数据编号及临床信息-20250912.xlsx",sheet = 2)
colnames(TNM)<-TNM[1,]
TNM<-TNM[-1,]
TNM<-TNM %>% select(patientID,TNM_stage)
TNM$TNM_stage<-gsub("III|IV","III",TNM$TNM_stage)
TNM$TNM_stage<-gsub("IIA|IIB","II",TNM$TNM_stage)
TNM$TNM_stage<-gsub("IA|IB","I",TNM$TNM_stage)
TNM<-TNM[which(!(TNM$TNM_stage %in% NA)),]
colnames(TNM)<-c("sample","TNM")

#TCR
filepath<-dir(path="E:\\课题\\胰腺癌\\runtime\\0-原始数据\\5-BulkTCR\\data\\",pattern = ".clonotypes.TRB.txt$",full.names = TRUE)

#样本信息
sample_info <-read_xlsx("E:\\课题\\胰腺癌\\0-原始数据\\1-临床信息\\胰腺癌数据编号及临床信息-20250912.xlsx")
sample_info<-sample_info[!is.na(sample_info$`BulkTCR(T)`) ,]
sample_info<-sample_info[which(!(sample_info$`WGS(T)` %in% NA &sample_info$`BulkRNA(T)` %in% NA)),]
sample_info<-unique(sample_info[,c("patientID","BulkTCR(T)")])
sample_info$`BulkTCR(T)`<-gsub("-.*","",sample_info$`BulkTCR(T)`)
clin <- setNames(sample_info$patientID,
sample_info$`BulkTCR(T)`)


####################1-获取样本TCR的CDR3#########################
data<-data.frame()
for(i in 1:length(filepath)){
if(strsplit(basename(filepath[i]),".clonotypes.TRB.txt")[[1]][1] %in% sample_info$`BulkTCR(T)`){
sample<-unname(clin[strsplit(basename(filepath[i]),".clonotypes.TRB.txt")[[1]][1]])
TCR<-read.table(filepath[i],header = T, sep = "\t", stringsAsFactors = F, fill = T, check.names = F)
TCR<-TCR %>% select(cloneCount,aaSeqCDR3)
TCR <- TCR %>%
group_by(aaSeqCDR3) %>%
summarise(cloneCount = sum(cloneCount), .groups = "drop")
TCR$sample<-sample
TCR<-unique(TCR[,c("sample","aaSeqCDR3","cloneCount")])
data<-rbind(data,TCR)
}
print(i)
}

fwrite(data,"E:\\课题\\胰腺癌\\5-文章结果\\TCR\\3-output\\1_sampleTCR_CDR3seq_count.txt",quote = F,sep = "\t",row.names = F,col.names = T)

####################2-计算样本Morisita–Horn指数#########################
data<-fread("E:\\课题\\胰腺癌\\5-文章结果\\TCR\\3-output\\1_sampleTCR_CDR3seq_count.txt",header = T,sep = "\t",stringsAsFactors = F,quote = "",fill = T,check.names = F)

mat <- dcast(data, aaSeqCDR3 ~ sample,
value.var = "cloneCount",
fun.aggregate = sum,
fill = 0)

rownames(mat) <- mat$aaSeqCDR3
mat <- as.matrix(mat[, -1])

dist_mh <- vegdist(t(mat), method = "horn")
sim_mh <- 1 - as.matrix(dist_mh)

fwrite(sim_mh,"E:\\课题\\胰腺癌\\5-文章结果\\TCR\\3-output\\1_sampleTCR_Morisita–Horn指数.txt",quote = F,sep = "\t",row.names = T,col.names = T)

####################3-样本TCR Morisita–Horn聚类#########################

jaccard_matrix<-fread("E:\\课题\\胰腺癌\\5-文章结果\\TCR\\3-output\\1_sampleTCR_Morisita–Horn指数.txt",header = T,sep = "\t",stringsAsFactors = F,quote = "",fill = T,check.names = F)
jaccard_matrix<-as.data.frame(jaccard_matrix)
rownames(jaccard_matrix)<-jaccard_matrix$V1
jaccard_matrix<-jaccard_matrix[,-1]

####顶部注释
group_KRAS <- ifelse(colnames(jaccard_matrix) %in% KRAS,
"KRAS", ifelse(colnames(jaccard_matrix) %in% KRAS_sample,"no KRAS",NA))
group_KRAS[is.na(group_KRAS)] <- "Unknown"
names(group_KRAS) <- colnames(jaccard_matrix)

group_WGD <- ifelse(colnames(jaccard_matrix) %in% WGD$sample[which(WGD$WGD!="no WGD")],
"WGD", ifelse(colnames(jaccard_matrix) %in% WGD$sample[which(WGD$WGD=="no WGD")],"no WGD",NA))
group_WGD[is.na(group_WGD)] <- "Unknown"
names(group_WGD) <- colnames(jaccard_matrix)

group_Recurrence <- ifelse(colnames(jaccard_matrix) %in% Recurrence$sample[which(Recurrence$Recurrence=="Recurrence")],
"Recurrence", ifelse(colnames(jaccard_matrix) %in% Recurrence$sample[which(Recurrence$Recurrence=="Primary")],"Primary",NA))
group_Recurrence[is.na(group_Recurrence)] <- "Unknown"
names(group_Recurrence) <- colnames(jaccard_matrix)

group_TNM <- ifelse(colnames(jaccard_matrix) %in% TNM$sample[which(TNM$TNM=="I")],
"I", ifelse(colnames(jaccard_matrix) %in% TNM$sample[which(TNM$TNM=="II")],"II",ifelse(colnames(jaccard_matrix) %in% TNM$sample[which(TNM$TNM=="III")],"III",NA)))
group_TNM[is.na(group_TNM)] <- "Unknown"
names(group_TNM) <- colnames(jaccard_matrix)

Top_Annotation <- HeatmapAnnotation(
KRAS = group_KRAS,
WGD = group_WGD,
Recurrence = group_Recurrence,
TNM = group_TNM,

col = list(
KRAS = c("KRAS" = "#AABCDB",
"no KRAS" = "#C0D6EA","Unknown"="grey90"),

WGD = c("no WGD" = "#EFC99B",
"WGD" = "#E8B574","Unknown"="grey90"),

TNM = c("I" = "#B5A8CA",
"II" = "#9A8AB4",
"III" = "#826BA2","Unknown"="grey90"),

Recurrence = c("Primary" = "#DBDBA7",
"Recurrence" = "#C0C05A","Unknown"="grey90")
),
annotation_height = unit(c(1,1,1,1), "mm")
)

####左边注释
left_Annotation <- rowAnnotation(
KRAS = group_KRAS,
WGD = group_WGD,
Recurrence = group_Recurrence,
TNM = group_TNM,

col = list(
KRAS = c("KRAS" = "#AABCDB",
"no KRAS" = "#C0D6EA",
"Unknown" = "grey90"),

WGD = c("no WGD" = "#EFC99B",
"WGD" = "#E8B574",
"Unknown" = "grey90"),

TNM = c("I" = "#B5A8CA",
"II" = "#9A8AB4",
"III" = "#826BA2",
"Unknown" = "grey90"),

Recurrence = c("Primary" = "#DBDBA7",
"Recurrence" = "#C0C05A",
"Unknown" = "grey90")
),

annotation_width = unit(c(5,5,5,5), "mm") # ⭐控制左侧宽度
)

####设置映射颜色
mat <- jaccard_matrix
diag(mat) <- NA

col_fun <- colorRamp2(
c(0, 0.005, 0.01, 0.02, 0.05, 0.25),
c("white", "#deebf7", "#9ecae1", "#6baed6", "#3182bd", "#08306b")
)

# col_fun <- circlize::colorRamp2(
# c(0.75, 0.90, 0.95, 0.98, 0.99, 0.992, 0.994),
# rev(c("white", "#deebf7", "#9ecae1", "#6baed6", "#3182bd", "#08519c", "#08306b"))
# )

# col_fun <- colorRamp2(
# c(0, 0.005, 0.01, 0.02, 0.05, 0.25),
# c("white", "#FAE5D8", "#FACEB7", "#E6866A", "#BE3137", "#720320")
# )

main_ht <- Heatmap(
mat,
name = "MAPs",
cluster_rows = TRUE,
cluster_columns = TRUE,
show_row_names = FALSE,
show_column_names = FALSE,
show_heatmap_legend = TRUE, # 保留
clustering_distance_rows = as.dist(1 - mat),
clustering_distance_columns = as.dist(1 - mat),
col = col_fun ,

# row_order = Allele_order$Var1,

na_col = "white",
rect_gp = gpar(col = "grey90", lwd = 1),

top_annotation = Top_Annotation,
left_annotation = left_Annotation,

# cell_fun = function(j,i,x,y,w,h,fill){
# if(!is.na(data_use[i,j])){
# grid.text(
# data_use[i,j],
# x,y,
# gp = gpar(fontsize = 8)
# )
# }
# }
)

pdf("E:\\课题\\胰腺癌\\5-文章结果\\TCR\\2-图\\样本TCR Morisita–Horn聚类热图.pdf",width = 13, height =12, onefile = FALSE)

main_ht

dev.off()

 

http://www.jsqmd.com/news/977195/

相关文章:

  • 京东商品批量采集系统:整店下载、SKU图提取与主图视频下载
  • 2026年 2,4-二甲基苯酚厂家推荐排行榜:工业级/医药级中间体,高纯度与稳定供货实力品牌深度解析 - 品牌发掘
  • VC++6.0开发的轻量级网络资产探测工具:支持主机发现、端口扫描、服务识别与常见漏洞初筛
  • LGTV Companion终极指南:5步实现智能电视与电脑的完美同步
  • 2026年广东正规婚恋平台口碑优选推荐指南:线上相亲、同城交友、靠谱婚介、本地婚恋机构怎么选 - 海棠依旧大
  • 2026 泉州本地人必选防水补漏 TOP5|卫生间免砸砖、屋顶 / 外墙 / 地下室防水|同城上门 1–2h|2026 年 6 月最新调研 - 吉林同城获客
  • AI-Shoujo HF Patch终极指南:一站式游戏增强解决方案 [特殊字符]
  • 把 PyTorch 的 Autograd 压进 280 行 C++:从 Dual Number 到一台 Kahn 拓扑排序引擎
  • NXP eIQ机器学习环境在QorIQ Layerscape嵌入式平台的部署与实战指南
  • Path of Building终极指南:流放之路离线构筑计算器完全教程
  • 2026年卡板厂家推荐榜单:实木/熏蒸/出口/免检/胶合/欧标/美标/IPPC卡板,多场景坚固承重之选 - 品牌发掘
  • p3关系代数与元组演算
  • 计算机小程序毕设实战-ssm基于springboot+微信小程序的中小学生个性化阅读平台小程序的设计与实现【完整源码+LW+部署说明+演示视频,全bao一条龙等】
  • 2.初识网络代码——python基础代码
  • 摆脱论文困扰!2026年好用AI论文网站榜单,毕业论文免费写还合规
  • 2026年深圳工业设计公司推荐榜单:产品外观/结构设计/医疗器械/机械设备/机器人设计一站式优选 - 企业推荐官【官方】
  • ArduPilot自动驾驶系统核心技术架构深度解析
  • 天辛大师再谈高考作文,词语的理解,AI时代的何以传承
  • 大湾区哪家 EMBA 机构比较靠谱?优选机构详细盘点 - 品牌测评鉴赏家
  • 若依框架导出Excel合并行功能详解:从注解配置到源码改造的完整指南
  • 通达信ChanlunX缠论插件:3分钟实现股票走势智能识别,告别手动画线烦恼
  • 百步亭空调维修|百步亭空调移机|百步亭空调加氟|百步亭空调回收 高性价比宅到家快速上门 - 武汉宅到家
  • 6/8
  • 手机 Vibe Coding 半年,终于从能跑到真爽了
  • 终极杀戮尖塔模组管理器:3步开启无限游戏可能
  • 深度解析XHS-Downloader数据持久化架构:高级实战与性能优化指南
  • OpenClaw + Ollama + 火山引擎:本地化 AI Agent 完整部署指南
  • LPC55系列ADC硬件触发与采样时间计算实战指南
  • MC68HC12嵌入式开发:D-Bug12监控程序函数库调用全解析
  • 开源LCA软件openLCA:3小时从零搭建专业级生命周期评估平台