1.环境加载
suppressPackageStartupMessages({library(Seurat)library(dplyr)library(ggplot2)library(DoubletFinder)library(harmony)library(cowplot)library(future)library(sceasy)library(reticulate)library(anndata)library(glmGamPoi)
})
2.数据读取
sample_dirs <- c("/share/org/YZWL/yzwl_caojian/caojian/singlecell/singlecell_1103/ChiHei/raw_file/dpi00_rep1/filtered_feature_bc_matrix/","/share/org/YZWL/yzwl_caojian/caojian/singlecell/singlecell_1103/ChiHei/raw_file/dpi00_rep2/filtered_feature_bc_matrix/","/share/org/YZWL/yzwl_caojian/caojian/singlecell/singlecell_1103/ChiHei/raw_file/dpi02_rep1/filtered_feature_bc_matrix/","/share/org/YZWL/yzwl_caojian/caojian/singlecell/singlecell_1103/ChiHei/raw_file/dpi02_rep2/filtered_feature_bc_matrix/","/share/org/YZWL/yzwl_caojian/caojian/singlecell/singlecell_1103/ChiHei/raw_file/dpi10_rep1/filtered_feature_bc_matrix/","/share/org/YZWL/yzwl_caojian/caojian/singlecell/singlecell_1103/ChiHei/raw_file/dpi10_rep2/filtered_feature_bc_matrix/","/share/org/YZWL/yzwl_caojian/caojian/singlecell/singlecell_1103/ChiHei/raw_file/dpi21_rep1/filtered_feature_bc_matrix/","/share/org/YZWL/yzwl_caojian/caojian/singlecell/singlecell_1103/ChiHei/raw_file/dpi21_rep2/filtered_feature_bc_matrix/"
)sample_list <- c("dpi00_rep1","dpi00_rep2","dpi02_rep1","dpi02_rep2","dpi10_rep1","dpi10_rep2","dpi21_rep1","dpi21_rep2"
)stage_labels <- c("dpi00","dpi00","dpi02","dpi02","dpi10","dpi10","dpi21","dpi21")
3.双细胞去除
objs <- list()
for (i in 1:length(sample_list)){data <- Read10X(data.dir = sample_dirs[i])TenX_data <- CreateSeuratObject(counts =data,project=sample_list[i],min.cells=3,min.features=200)TenX_data$orig.ident <- sample_list[i]TenX_data$stage <- stage_labels[i]quantile(TenX_data$nFeature_RNA,c(0.05,0.95))# normalizationTenX_data <- subset(TenX_data, subset = nFeature_RNA > 200)TenX_data <- NormalizeData(TenX_data, normalization.method = "LogNormalize", scale.factor = 10000)TenX_data <- FindVariableFeatures(TenX_data, selection.method = "vst", nfeatures = 3000)TenX_data <- ScaleData(TenX_data,features=VariableFeatures(TenX_data))TenX_data <- RunPCA(TenX_data,npcs=50,verbose =FALSE)# 参数扫描PKsweep.res <- paramSweep(TenX_data, PCs = 1:20, sct = FALSE)sweep.stats <- summarizeSweep(sweep.res, GT = FALSE)bcmvn <- find.pK(sweep.stats)best_pK <- bcmvn$pK[which.max(bcmvn$BCmetric)]best_pK <- as.numeric(as.character(best_pK)) # 估计doublefindernExp <- round(ncol(TenX_data) * 0.075)TenX_data <- doubletFinder(TenX_data,PCs = 1:20,pN=0.25,pK=best_pK, nExp=nExp)# 去除双细胞head(TenX_data@meta.data[, grep("pANN|DF.classification|DoubletFinder", colnames(TenX_data@meta.data))])df_col <- colnames(TenX_data@meta.data)[grep("DF.classification",colnames(TenX_data@meta.data))]table(TenX_data@meta.data[[df_col]])TenX_data <- subset(TenX_data,subset= !!sym(df_col) == "Singlet")objs[[i]] <- TenX_dataassign(sample_list[i],TenX_data)}
4.数据整合及拆分策略
merge_data <- merge(objs[[1]],y=objs[-1],add.cell.ids=sample_list)
saveRDS(merge_data,file="./RDS_file/data_all.rds")obj_0_2 <- subset(merge_data,subset=stage %in%c("dpi00","dpi02"))
obj_10_21 <- subset(merge_data,subset=stage %in%c("dpi10","dpi21"))
saveRDS(obj_0_2,file="./RDS_file/data_all_0_2.rds")
saveRDS(obj_10_21,file="./RDS_file/data_all_10_21.rds")
通过上面步骤就可以获取到不同时期或者整个时期经过质控及去除双细胞之后的单细胞数据了。
