3 Star 10 Fork 2

李农 / 生信代码们

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
克隆/下载
scRNA.R 71.53 KB
一键复制 编辑 原始数据 按行查看 历史
李农 提交于 2024-01-05 02:47 . update scRNA.R.
1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534
setwd("D:\\ProgramFiles\\R\\Work\\scRNA")
set.seed(2023)
library(Seurat)
library(tidyverse)
library(cowplot)
load("heart_h.RData")
dir.create("./heart")
fs=list.files('./GSE207363/','^GSM')
samples=str_split(fs,'_',simplify = T)[,2]
#为每个样本创建子文件夹,重命名文件,并移动到相应的子文件夹里
lapply(unique(samples),function(x){
y=fs[grepl(x,fs)]
folder=paste0("GSE207363/", str_split(y[1],'_',simplify = T)[,2])
dir.create(folder,recursive = T)
file.rename(paste0("GSE207363/",y[1]),file.path(folder,"barcodes.tsv.gz"))
file.rename(paste0("GSE207363/",y[2]),file.path(folder,"features.tsv.gz"))
file.rename(paste0("GSE207363/",y[3]),file.path(folder,"matrix.mtx.gz"))
})
samples <- c("sham","CLP")
dir <- file.path('./GSE207363',samples)
names(dir) <- samples
counts <- Read10X(data.dir = dir)
#循环读入在合并,与上面的一并读入结果不太一样。
for (file in samples){
seurat_data <- Read10X(data.dir = paste0("GSE207363/", file))
seurat_obj <- CreateSeuratObject(counts = seurat_data, min.features = 200,
min.cells = 20, project = file)
assign(file, seurat_obj)
}
scRNA <- merge(x = sham, y = CLP, add.cell.ids = c("sham", "CLP"))
rm(list = setdiff(ls(), "scRNA"))
scRNA$orig.ident <- factor(scRNA$orig.ident, levels = c("sham", "CLP"))
scRNA <- CreateSeuratObject(counts[["Gene Expression"]])
adt <- CreateAssayObject(counts[["Antibody Capture"]])
all.equal(colnames(counts[["Gene Expression"]]), colnames(counts[["Antibody Capture"]]))
scRNA[["ADT"]] <- adt
#如需ID转换,使用EnsDb.Hsapiens.v86而不是org.Hs.eg.db,可以保留MT-
#筛选线粒体,核糖体基因和ERCC,人是MT-,有些是MT_,鼠是mt-
scRNA[["percent.mt"]] <- PercentageFeatureSet(scRNA, pattern = "^mt-")
scRNA[["percent.rb"]] <- PercentageFeatureSet(scRNA, pattern = "^Rp[sl]")
scRNA[["percent.ercc"]] <- PercentageFeatureSet(scRNA, pattern = "^Ercc")
scRNA$log10GenesPerUMI <- log10(scRNA$nFeature_RNA) / log10(scRNA$nCount_RNA)
scRNA$mitoRatio <- PercentageFeatureSet(object = scRNA, pattern = "^mt-")
scRNA$mitoRatio <- scRNA@meta.data$mitoRatio / 100
#查看头部数据
head(scRNA@meta.data)
View(scRNA@meta.data)
VlnPlot(scRNA, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.mt")
FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")
scRNA <- subset(scRNA, subset = nFeature_RNA > 250 & nCount_RNA > 500
& percent.mt < 10 & log10GenesPerUMI > 0.80)
#选择只保留在10个或更多细胞中表达的基因。
counts <- GetAssayData(object = scRNA, slot = "counts")
nonzero <- counts > 0
keep_genes <- Matrix::rowSums(nonzero) >= 10
filtered_counts <- counts[keep_genes,]
scRNA <- CreateSeuratObject(filtered_counts, meta.data = scRNA@meta.data)
scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000)
scRNA <- FindVariableFeatures(scRNA,selection.method = "vst", nfeatures = 2000)
VariableFeaturePlot(scRNA)
#调大内存
options(future.globals.maxSize = 10000 * 1024^2)
#对所有基因数据都缩放,内存占用太多
all.genes <- rownames(scRNA)
scRNA <- ScaleData(scRNA, features = all.genes)
#对高可变基因的子集进行数据缩放,要不内存不够用。。
scRNA <- ScaleData(scRNA)
#提取细胞周期基因
#把人的基因转换成鼠的对应基因,
convertHumanGeneList <- function(x){
require("biomaRt")
human = useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
genesV2 = getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = x , mart = human, attributesL = c("mgi_symbol"), martL = mouse, uniqueRows=T)
humanx <- unique(genesV2[, 2])
print(head(humanx))
return(humanx)
}
s.genes <- convertHumanGeneList(cc.genes.updated.2019$s.genes)
g2m.genes <- convertHumanGeneList(cc.genes.updated.2019$g2m.genes)
#biomaRt网络不行,用homologenen
library(homologene)
s.genes <- homologene(cc.genes.updated.2019$s.genes, inTax = 9606, outTax = 10090)[,2]
g2m.genes <- homologene(cc.genes.updated.2019$g2m.genes, inTax = 9606, outTax = 10090)[,2]
scRNA <- CellCycleScoring(scRNA, s.features = s.genes, g2m.features = g2m.genes,
set.ident = T)
#根据PCA看s和g是否分开决定是否进行去除细胞周期
scRNA <- RunPCA(scRNA, features = c(s.genes, g2m.genes))
DimPlot(scRNA, reduction = "pca", group.by = "Phase", split.by = "Phase")
scRNA <- ScaleData(scRNA, vars.to.regress = c("S.Score", "G2M.Score"),
features = rownames(scRNA))
#如果不想分G1和G2,使用s和g的差值进行去除细胞周期
marrow$CC.Difference <- marrow$S.Score - marrow$G2M.Score
marrow <- ScaleData(marrow, vars.to.regress = "CC.Difference", features = rownames(marrow))
#SCT流程
scRNA <- CreateSeuratObject(counts, project = "sham", min.cells = 10, min.features = 200)
scRNA[["percent.mt"]] <- PercentageFeatureSet(scRNA, pattern = "^mt-")
scRNA <- subset(scRNA, subset = nFeature_RNA > 200 & nFeature_RNA < 7500 & percent.mt < 10)
scRNA <- CellCycleScoring(scRNA, s.features = s.genes, g2m.features = g2m.genes,
set.ident = T)
scRNA <- SCTransform(scRNA, method = "glmGamPoi", ncells = 8824, verbose = T,
vars.to.regress = c("percent.mt","S.Score","G2M.Score"))
#分组分别进行SCT
scRNA <- SplitObject(scRNA, split.by = "orig.ident")
for (i in 1:length(scRNA)){
scRNA[[i]] <- NormalizeData(scRNA[[i]], verbose = T)
scRNA[[i]] <- CellCycleScoring(scRNA[[i]],s.features = s.genes,
g2m.features = g2m.genes,
set.ident = T )
scRNA[[i]] <- SCTransform(scRNA[[i]], method = "glmGamPoi", ncells = 8824, verbose = T,
vars.to.regress = c("percent.mt","S.Score","G2M.Score"))
}
scRNA$sham@assays
scRNA$CLP@assays
#将各个集落进行整合
integ_features <- SelectIntegrationFeatures(object.list = scRNA,
nfeatures = 3000)
scRNA <- PrepSCTIntegration(object.list = scRNA,
anchor.features = integ_features)
integ_anchor <- FindIntegrationAnchors(object.list = scRNA,
normalization.method = "SCT",
anchor.features = integ_features)
scRNA <- IntegrateData(anchorset = integ_anchor, normalization.method = "SCT")
saveRDS(scRNA, "lung_integ.rds")
rm(list = c("integ_features", "integ_anchor", "counts", "nonzero",
"filtered_counts", "keep_genes", "all.genes"))
#常规PCA,筛选PCA数量。
scRNA <- RunPCA(scRNA, features = VariableFeatures(object = scRNA))
DimPlot(scRNA)
VizDimLoadings(scRNA, dims = 1:9, reduction = "pca")
DimHeatmap(scRNA, dims = 1:6, nfeatures = 20, cells = 500, balanced = T)
#不同的方式来帮助我们选择PC的数目,并且分别都对应不同的可视化
scRNA <- JackStraw(scRNA, num.replicate = 100)
scRNA <- ScoreJackStraw(scRNA, dims = 1:20)
JackStrawPlot(scRNA, dims = 1:20)
ElbowPlot(scRNA, ndims = 40)
#选择拐点处的PCA数量,sct的结果直接选择40个pca。
scRNA <- RunPCA(scRNA)
PCAPlot(scRNA,split.by = "orig.ident")
scRNA <- FindNeighbors(scRNA, dims = 1:40)
scRNA <- FindClusters(scRNA, resolution = c(0.4, 0.6, 0.8, 1.0, 1.4))
scRNA <- RunUMAP(scRNA, dims = 1:40)
#选择不同的resolution,可以画不同的图。
Idents(object = scRNA) <- "orig.ident"
DimPlot(scRNA, reduction = "umap", label = TRUE, label.size = 6) + theme(legend.position= "none")
#获取不同集落中的细胞数量
ncells <- FetchData(scRNA, vars = c("ident", "orig.ident")) %>%
dplyr::count(ident, orig.ident) %>% tidyr::spread(ident, n)
#按细胞周期和分组进行分群
DimPlot(scRNA,reduction = "umap", label = TRUE, label.size = 6,
#split.by = "orig.ident"
split.by = "Phase"
) + NoLegend()
#按各种无意义变异源分群,观察分布不均匀的集落,注意是否可利用细胞类型进行区分。
metrics <- c("nFeature_RNA", "nCount_RNA", "S.Score", "G2M.Score", "mitoRatio")
FeaturePlot(scRNA, reduction = "umap", features = metrics, pt.size = 0.4,
sort.cell = TRUE, min.cutoff = 'q10', label = TRUE)
#按PC进行分群
columns <- c(paste0("PC_", 1:16), "ident", "UMAP_1", "UMAP_2")
pc_data <- FetchData(scRNA, vars = columns)
umap_label <- FetchData(scRNA,vars = c("ident", "UMAP_1", "UMAP_2")) %>%
group_by(ident) %>% summarise(x=mean(UMAP_1), y=mean(UMAP_2))
map(paste0("PC_", 1:16), function(pc){
ggplot(pc_data,aes(UMAP_1, UMAP_2)) +
geom_point(aes_string(color=pc), alpha = 0.7) +
scale_color_gradient(guide = FALSE, low = "grey90", high = "blue") +
geom_text(data=umap_label, aes(label=ident, x, y)) +
ggtitle(pc)
}) %>% plot_grid(plotlist = .)
print(scRNA[["pca"]], dims = 1:10, nfeatures = 5)
#已知细胞marker的可视化。
DefaultAssay(scRNA) <- "RNA"
scRNA <- NormalizeData(scRNA, verbose = FALSE)
FeaturePlot(scRNA,reduction = "umap", sort.cell = TRUE, min.cutoff = 'q10',
features = c("Cd3d", "Cd8A"),
label = TRUE)
#寻找到双细胞
library(DoubletFinder)
#SCT的方法的话sct = T。
sweep.res.list <- paramSweep_v3(scRNA, PCs = 1:40, sct = T)
sweep.stats <- summarizeSweep(sweep.res.list, GT = F)
bcmvn <- find.pK(sweep.stats)
pk_bcmvn <- bcmvn$pK[which.max(bcmvn$BCmetric)] %>% as.character() %>% as.numeric()
DoubletRate = ncol(scRNA)*8*1e-6
homotypic.prop <- modelHomotypic(scRNA$seurat_clusters)
nExp_poi <- round(DoubletRate*ncol(scRNA))
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
scRNA <- doubletFinder_v3(scRNA, PCs = 1:30,pN = 0.25, pK = pk_bcmvn,
nExp = nExp_poi, reuse.pANN = F, sct = T)
DimPlot(scRNA, reduction = "umap", label = T,
#group.by = "DF.classifications_0.25_0.06_3032")
group.by = "DF.classifications_0.25_0.16_1919")
#寻找差异基因,sct下来的需要重新质控,不是sct下来的可以直接算差异。
DefaultAssay(scRNA) <- "RNA"
scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000)
scRNA <- FindVariableFeatures(scRNA,selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(scRNA)
scRNA <- ScaleData(scRNA, features = all.genes)
scRNA.markers <- FindAllMarkers(scRNA, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
DEMs <- scRNA.markers %>% group_by(cluster) %>% filter(avg_log2FC > 1 & p_val_adj < 1e-5) %>%
#slice_max(n = 20, order_by = avg_log2FC) %>%
as.data.frame()
write.table(DEMs, "./heart/DEMs.csv", row.names = F, sep = ",")
#查看某一基因的表达情况
VlnPlot(object = scRNA, features ='Spp1',log =T )
FeaturePlot(scRNA, features = c("Spi1", "Lcn2", "Saa3", "S100a8", "Retnlg", "Pglyrp1",
"S100a9", "Ngp", "Hdc", "C1qa", "Pvr", "H2-q4",
"Icam1", "Ccl9", "Selplg", "Ptprc", "Cd22", "Ccl6" ))
#使用singleR识别细胞,并绘图
library(SingleR)
scRNA_sin <- GetAssayData(scRNA, slot = "data")
clusters <- scRNA@meta.data$seurat_clusters
#mousImmu <- celldex::ImmGenData()
#saveRDS(mouseRNA,'mousRNA.rds')
mousImmu <- readRDS("mousImmu.rds")
pred.mouseImmu <- SingleR(test = scRNA_sin, ref = mousImmu, labels = mousImmu$label.main,
clusters = clusters,
assay.type.test = "logcounts", assay.type.ref = "logcounts")
#mouseRNA <- celldex::MouseRNAseqData()
mouseRNA <- readRDS("mousRNA.rds")
pred.mouseRNA <- SingleR(test = scRNA_sin, ref = mouseRNA, labels = mouseRNA$label.fine ,
clusters = clusters,
assay.type.test = "logcounts", assay.type.ref = "logcounts")
cellType <- data.frame(ClusterID = levels(scRNA@meta.data$seurat_clusters),
mouseImmu = pred.mouseImmu$labels,
mouseRNA = pred.mouseRNA$labels )
head(cellType)
scRNA@meta.data$singleR <- cellType[match(clusters,cellType$ClusterID),'mouseRNA']
pro='first_anno'
DimPlot(scRNA,reduction = "umap",label=T, group.by = 'singleR')
ggplot2::ggsave(filename = paste0(pro,'_tSNE_singleR.pdf'))
DimPlot(scRNA,reduction = "umap",label=T,split.by ='orig.ident',group.by = 'singleR')
ggplot2::ggsave(filename = paste0(pro,'_tSNE_singleR_by_orig.ident.pdf'))
#细胞注释,用cellmarker上的细胞marker记录,清晰数据
options(java.parameters= '-Xmx16g')
library(xlsx)
cell_raw <- read.xlsx("Cell_marker_All.xlsx", 1)
mice_lung <- cell_raw[cell_raw$species == "Mouse" & cell_raw$tissue_type == "Heart",][,c(7,9)]
mice_lung$marker <- str_to_sentence(mice_lung$marker)
scell_raw <- read.table("Single_cell_markers.txt",sep = "\t",header = T)
smice_lung <- scell_raw[scell_raw$speciesType == "Mouse" & scell_raw$tissueType == "Heart",][,c(6,8)] %>%
separate_rows(cellMarker, sep = ", ")
colnames(smice_lung) <- colnames(mice_lung)
cell_markers <- rbind(mice_lung,smice_lung)
#计算每个集落中差异基因占细胞marker基因的比例,以确定集落的marker基因。
#方法一,构建两个矩阵,按照位置进行对应。
clRate<-function(x,y){
cellMar <- cell_markers[cell_markers$cell_name == x,][,2]
clMar <- DEMs[DEMs$cluster == y,][,7]
inters <- intersect(cellMar, clMar)
MarRat <- ifelse(length(inters) == 0, 0,sum(sapply(inters, function(x){ sum(cellMar == x)})))
rate <- MarRat/length(cellMar)
return(rate)
}
clInters <- function(x,y){
cellMar <- cell_markers[cell_markers$cell_name == x,][,2]
clMar <- DEMs[DEMs$cluster == y,][,7]
inters <- intersect(cellMar, clMar)
return(paste0(inters, collapse = ", "))
}
clrate <- sapply(unique(cell_markers$cell_name),function(x)
sapply(unique(DEMs$cluster), function(y) clRate(x,y)))
clinter <- sapply(unique(cell_markers$cell_name),function(x)
sapply(unique(DEMs$cluster), function(y) clInters(x,y)))
clrate1 <- clrate
Top5cells <- vector()
Inter <- vector()
Cells <- vector()
Inter2 <- vector()
for (j in 1:5){
Cells <- colnames(clrate1)[max.col(clrate1,"first")]
Top5cells <- cbind(Top5cells,Cells)
for (i in 1:30){
Inter <- rbind(Inter, clinter[i, max.col(clrate1,"first")[i]])
Inter2 <- rbind(Inter2, clrate[i, max.col(clrate1,"first")[i]])
clrate1[i, max.col(clrate1,"first")[i]] <- 0
}
colnames(Inter) <- paste0("Makers ", j)
colnames(Inter2) <- paste0("Rates ", j)
Top5cells <- cbind(Top5cells, Inter, Inter2)
Inter <- vector()
Cells <- vector()
Inter2 <- vector()
}
#方法而,构建一个长数据类型的datafram。
ccRate <- function(x,y){
cellMar <- cell_markers[cell_markers$cell_name == x,][,2]
clMar <- DEMs[DEMs$cluster == y,][,7]
inters <- intersect(cellMar, clMar)
MarRat <- ifelse(length(inters) == 0, 0,sum(sapply(inters, function(x){ sum(cellMar == x)})))
rate <- MarRat/length(cellMar)
return(c(y, x, rate, paste0(inters, collapse = ", ")))
}
Top5cells <- do.call(rbind, lapply(unique(cell_markers$cell_name),function(x)
do.call(rbind, lapply(unique(DEMs$cluster), function(y) ccRate(x,y)))))
colnames(Top5cells) <- c("Cluster", "Cell", "Rate", "Marker")
Top5cells[,3] <- as.numeric(Top5cells[,3])
Top5cells[,1] <- as.numeric(Top5cells[,1]) - 1
Top5cells <- Top5cells %>% as.data.frame() %>% group_by(Cluster) %>% filter(Rate > 0) %>%
slice_max(n = 5, order_by = Rate)
View(Top5cells)
write.table(Top5cells, "./heart/Top5_cell_markers_rate.csv", sep = ",", row.names = FALSE)
#某一集落中所有的marker。
intersect(scRNA.markers[scRNA.markers$cluster == 25,]$gene, cell_markers$marker)
#根据已知文献中记录的marker确定细胞种类
cell_marker <- read.table("cell_markers.csv", header = F, sep = ",")
cell_markers <- cell_marker %>% pivot_longer(cols = 2:4, values_drop_na = T, ) %>%
select(-2)
DEMs <- read.table("DEMs.csv", header = T, sep = ",")
res <- lapply(unique(DEMs$cluster), function (x) {
do.call(rbind, lapply(intersect(DEMs[DEMs$cluster == x,][,7], cell_markers$value),
function(x) cell_markers[cell_markers$value == x,]))
})
sink("heart.txt")
print(res)
sink()
#手工注释细胞。#lung
scRNA <- RenameIdents(object = scRNA,
'0'='Stromal cell',
'1'='Adventitial Fibroblast',
'2'='Monocyte',
'3'='Clara cell',
'4'='Capillary',
'5'='Monocyte',
'6'='Monocyte',
'7'='Monocyte',
'8'='T cell',
'9'='Stromal cell',
'10'='Macrophage',
'11'='Smooth Muscle',
'12'='Natural killer cell',
'13'='Stromal cell',
'14'='Myofibroblast',
'15'='Capillary',
'16'='Mesothelial',
'17'='Monocyte',
'18'='Capillary Aerocyte',
'19'='Proliferating NK/T',
'20'='Endothelial cell',
'21'='Pericyte',
'22'='B cell',
'23'='Macrophage',
'24'='Ccr7+ Dendritic',
'25'='Ciliated',
'26'='Lymphatic',
'27'='Alveolar Epithelial Type 2',
'28'='Capillary Aerocyte',
'29'='Alveolar Epithelial Type 1')
#heart
scRNA <- RenameIdents(object = scRNA,
'0'='Monocyte',
'1'='Endothelial',
'2'='Monocyte',
'3'='Endothelial',
'4'='Endothelial',
'5'='Fibroblast',
'6'='Fibroblast',
'7'='Fibroblast',
'8'='Macrophage',
'9'='Monocyte',
'10'='Fibroblast',
'11'='Macrophage',
'12'='Pericytes',
'13'='B cell',
'14'='Ventricular cardiomyocytes',
'15'='Natural killer cell',
'16'='Endothelial',
'17'='Pericytes',
'18'='Smooth muscle',
'19'='Smooth muscle',
'20'='Neuronal',
'21'='Endothelial',
'22'='T cell')
scRNA$cell_type <- Idents(scRNA)
#保存图片
Idents(object = scRNA) <- "cell_type"
pdf("./lung/UAMP_imm.pdf", height = 8, width = 10)
DimPlot(subRNA, reduction = "umap", label = T, label.size = 5, repel = T)
#theme(legend.position= "none")
dev.off()
#获取细胞亚群比例。
cellnum <- table(scRNA@meta.data$cell_type, scRNA@meta.data[,"orig.ident"]) #统计每个样本种每种细胞亚型的细胞数目
cellfreq <- prop.table(x = table(scRNA@meta.data$cell_type, scRNA@meta.data[,"orig.ident"]),
margin = 2) %>% as.data.frame() %>% filter(Freq > 0) #统计每个每个样本中每个细胞亚型的比例
colnames(cellfreq) = c("Cell_type", "Group", "Freq")
#ggalluvial包加堆叠图之间的连接线
library(ggalluvial)
cellfreq$Group <- factor(cellfreq$Group, levels = c("sham", "CLP"))
ggplot(cellfreq, aes(x = Group, y = Freq, fill = Cell_type,
stratum = Cell_type, alluvium = Cell_type))+
geom_stratum(color = "white", linewidth = 1, width = 1/2)+
geom_alluvium(color = "black", linewidth = 0.5, width =1/2)+
#scale_fill_brewer(type = "qual", palette = "Accent") +
#geom_flow()+
theme_classic()+
theme(legend.position = 'right')+
scale_y_continuous(labels = c("0","25%","50%","75%","100%"),expand = c(0,0))
pdf("./heart/cell_rate.pdf", height = 6, width = 8)
dev.off()
#不严谨的进行细胞亚群里面的差异分析,严谨的应该是每组有2个以上的重复,利用DESeq2工具对特定细胞类型聚类进行pseudobulk差异表达分析
#https://mp.weixin.qq.com/s?__biz=MzI1Njk4ODE0MQ==&mid=2247490575&idx=1&sn=f453fbb0206a1be7b67058b10a27e5dd&chksm=ea1f1a8ddd68939b5446f21c046e85aa179bf00a6a70ea4f707942a944a76b1d75f3111fb62f&scene=178&cur_album_id=1645776134791315457#rd
write.table(table(scRNA$cell_type, scRNA$orig.ident),"./heart/cell_counts.csv", sep = ",")
DEGs <- lapply(unique(scRNA$cell_type), function(x){
FindMarkers(scRNA[,scRNA$cell_type == x],ident.1 = 'sham', ident.2 = 'CLP',
assay = "RNA", slot = "counts", logfc.threshold = 0, min.pct = 0)
})
names(DEGs) <- unique(scRNA$cell_type)
do.call(rbind,lapply(DEGs, function(x){
table(abs(x$avg_log2FC) > 1 & x$p_val_adj < 0.05)
}))
mo_degs <- rownames(DEGs$Monocyte[abs(DEGs$Monocyte$avg_log2FC) >1 & DEGs$Monocyte$p_val_adj < 1e-5,])
rm(list = setdiff(ls(), c("mo_degs", "pred_gene_l")))
#绘制各个细胞的差异表达基因火山图。
library(ggrepel)
lapply(1:length(DEGs), function(i){
df_hsd <- DEGs[[i]]
df_hsd$ID <- rownames(df_hsd)
df_hsd$Threshold<-factor(ifelse(df_hsd$p_val_adj < 1e-5&
abs(df_hsd$avg_log2FC) > 1,
ifelse(df_hsd$avg_log2FC > 1,"UP","DOWN"),"NO"))
pdf(paste0("./heart/", names(DEGs[i]), "_hsplot.pdf"), height = 8, width = 10)
print(ggplot(df_hsd,aes(x= avg_log2FC, y = -log10(p_val_adj), color = Threshold))+
geom_point(alpha=0.5,size = 4)+
scale_color_manual(values = c("blue","grey","red"))+
theme_bw() + xlim(-4,4)+
theme_classic()+
theme(
plot.title = element_text(hjust=0.5),
axis.title = element_text(size = 15),
axis.text = element_text(size = 12),
legend.title = element_text(size = 15),
legend.text = element_text(size = 12),
legend.position = c(1,0.5),
legend.justification = c(1,1),
legend.background = element_rect(fill = NULL, colour = "black"))+
geom_vline(xintercept = c(-1,1),lty = 2, col = "black", lwd = 0.4)+
geom_hline(yintercept = -log10(1e-5),lty = 2, col = "black", lwd = 0.4)+
geom_text_repel(data = subset(df_hsd, p_val_adj < 1e-5&
abs(avg_log2FC) > 1),
aes(label = ID))+
labs(title = paste0(names(DEGs[i]), " DEGs"), x = "logFC", y = "-log10(adj.P.Val)"))
dev.off()
})
#提取lung和heart共有的免疫细胞进行下面的分析
Idents(scRNA) <- scRNA$cell_type
subRNA <- subset(scRNA, ident = c("Monocyte", "T cell", "B cell", "Macrophage",
"Natural killer cell"))
#增殖细胞纳入T细胞
subRNA <- RenameIdents(object = subRNA, "Proliferating NK/T" = "T cell")
subRNA$cell_type <- Idents(subRNA)
#用subRNA,再出一遍上UMAP和cellrate。
#调控网络分析,将数据导出,用python跑。
#subRNA <- subset(scRNA, downsample = 1000)#随机抽1000个细胞
Idents(subRNA)
saveRDS(subRNA, "tf.rds")
library(SCopeLoomR)
build_loom(file.name = "tf.loom", dgem = subRNA@assays$RNA@counts)
write.table(subRNA@meta.data, "sub_meta.csv", sep = "\t", quote = F)
#计算RSS,找到细胞类型特异的TF
library(SCopeLoomR)
library(AUCell)
library(SCENIC)
library(ggrepel)
library(BiocParallel)
loom <- open_loom("aucell.loom")
regulons_incidMat <- get_regulons(loom, column.attr.name="Regulons")
regulons_incidMat[1:4,1:4]
regulons <- regulonsToGeneLists(regulons_incidMat)
regulonAUC <- get_regulons_AUC(loom,column.attr.name='RegulonsAUC')
regulonAucThresholds <- get_regulon_thresholds(loom)
tail(regulonAucThresholds[order(as.numeric(names(regulonAucThresholds)))])
embeddings <- get_embeddings(loom)
close_loom(loom)
meta <- read.table("sub_meta.csv", sep = "\t", header = T, stringsAsFactors = F)
cellinfo <- meta[,c("cell_type", "orig.ident", "nFeature_RNA", "nCount_RNA")]
colnames(cellinfo) <- c("celltype", "group", "nGene", "nUMI")
#整体细胞和两组比较
cellTypes <- as.data.frame(subset(cellinfo, select = "celltype"))
groups <- as.data.frame(subset(cellinfo, select = "group"))
rss <- calcRSS(AUC = getAUC(regulonAUC), cellAnnotation = cellTypes[
colnames(regulonAUC), "celltype"])
rss_group <- calcRSS(AUC = getAUC(regulonAUC), cellAnnotation = groups[
colnames(regulonAUC), "group"])
rss <- na.omit(rss)
rss_group <- na.omit(rss_group)
#单独某个细胞在两组间的比较,如Adipocytes
lapply(c("Monocyte", "T cell", "B cell", "Macrophage", "Natural killer cell"),
function (x){
sub_info <- cellinfo[cellinfo$celltype == x,] %>% subset(select = "group")
sub_regauc <- regulonAUC[,match(rownames(sub_info), colnames(regulonAUC))]
sub_rss <- calcRSS(AUC = getAUC(sub_regauc), cellAnnotation = sub_info[
colnames(sub_regauc), "group"])
sub_rss <- na.omit(sub_rss)
sub_rssplot <- plotRSS(sub_rss)
pdf(paste0("./heart/", x, "_tf.pdf"), height = 20, width = 4)
print(sub_rssplot$plot)
dev.off()
write.table(sub_rssplot$df, paste0("./heart/", x, "_tf.csv"), sep = ",",
col.names = T, row.names = F)
})
#可设置zThreshold,大于1.96相当于p<0.05,1.65相当于p<0.1。
rssplot <- plotRSS(rss)
rssplot_group <- plotRSS(rss_group, zThreshold = 1)
#SCENIC自带的点图,和ggplot的点图。
plotly::ggplotly(rssplot$plot)
ggplot(rssplot_group$df, aes(cellType, Topic))+
geom_tile(aes(fill = RSS))+
scale_fill_gradient(low="white",high ='red')
#在seurat对象里查看某些转录因子。
subRNA <- readRDS("tf.rds")
Idents(object = subRNA) <- "cell_type"
DotPlot(subRNA, features = "Hltf")
#绘制各个细胞的有意义的转录因子图。类似火山图
df_cell <- rssplot$df
df_cell$Threshold<-as.factor(ifelse(abs(df_cell$Z) >1.96, "Yes","No"))
#每个细胞的特异性转录因子绘图。并将有差异的表示出来。
df_p <- df_cell[df_cell$cellType == "T cell",]
df_p <- arrange(df_p, desc(RSS))
df_p$Topic <- factor(df_p$Topic, levels = df_p$Topic)
df_p$Threshold <- factor(df_p$Threshold, levels = c("Yes", "No"))
ggplot(data = df_p, aes(Topic, RSS, color = Threshold))+
geom_point(alpha = 0.6, size = 4)+
geom_text_repel(data = subset(df_p, df_p$Threshold == "Yes"),
aes(label = Topic), color = "black")+
theme_bw()+
scale_color_discrete(name = "Z score", labels = c("Z > 1.96", "Z < 1.96"))+
theme(panel.grid = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(),plot.title = element_text(hjust=0.5),
legend.title.align = 0.5)+
labs(x = "Regulons", y = "Regulons specificity score")+
ggtitle("Adipocytes")
#拟时态分析
library(monocle3)
#提取某一种细胞
ns_cell <- "Monocyte"
cell_ns <- subRNA[,subRNA$cell_type == ns_cell]
cell_ns <- SCTransform(cell_ns) %>% RunPCA()
cell_ns <- RunUMAP(cell_ns, dims = 1:40) %>%
FindNeighbors(dims = 1:40) %>%
FindClusters(resolution=0.8)
DimPlot(cell_ns, reduction = "umap")
data <- GetAssayData(cell_ns, assay = 'RNA', slot = 'counts')
cell_metadata <- cell_ns@meta.data
gene_annotation <- data.frame(gene_short_name = rownames(data))
rownames(gene_annotation) <- rownames(data)
cds <- new_cell_data_set(data,
cell_metadata = cell_metadata,
gene_metadata = gene_annotation)
cds <- preprocess_cds(cds, num_dim = 50)
plot_pc_variance_explained(cds)
cds <- align_cds(cds)
cds <- reduce_dimension(cds, cores = 32, reduction_method = "UMAP", preprocess_method = 'PCA')
#整合seurat的坐标
cds.embed <- cds@int_colData$reducedDims$UMAP
int.embed <- Embeddings(cell_ns, reduction = "umap")
int.embed <- int.embed[rownames(cds.embed),]
cds@int_colData$reducedDims$UMAP <- int.embed
plot_cells(cds)
cds <- cluster_cells(cds)
cds <- learn_graph(cds)
cds <- order_cells(cds)
#可以在弹出来的框中选择根细胞,也根据坐标进行选择。
p + geom_vline(xintercept = seq(-7,-6,0.25)) + geom_hline(yintercept = seq(0,1,0.25))
embed <- data.frame(Embeddings(scRNAsub, reduction = "umap"))
embed <- subset(embed, UMAP_1 > -6.75 & UMAP_1 < -6.5 & UMAP_2 > 0.24 & UMAP_2 < 0.25)
root.cell <- rownames(embed)
cds <- order_cells(cds, root_cells = root.cell)
cds@colData@listData[["orig.ident"]] <- factor(cds@colData@listData[["orig.ident"]], levels = c("CLP", "sham"))
plot_cells(cds, color_cells_by = "pseudotime", group_label_size = 5,graph_label_size = 5, cell_size = 1)
pdf(paste0("./lung/",ns_cell, "_ns.pdf"), height = 6, width = 8)
plot_cells(cds, color_cells_by = "orig.ident", group_label_size = 5,graph_label_size = 5, cell_size = 1)
dev.off()
save_monocle_objects(cds = cds, directory_path = paste0("./lung/",ns_cell, "_ns1"))
cds <- load_monocle_objects("./lung/Monocyte_ns")
##寻找拟时轨迹差异基因
#graph_test分析最重要的结果是莫兰指数(morans_I),其值在-1至1之间,0代表此基因没有
#空间共表达效应,1代表此基因在空间距离相近的细胞中表达值高度相似。
Track_genes <- graph_test(cds, neighbor_graph = "principal_graph", cores = 16)
write.table(Track_genes, paste0("./lung/",ns_cell, "_track_genes.csv"), row.names = T, col.names = T)
#挑选top10画图展示
Track_genes_sig <- Track_genes %>% top_n(n = 10, morans_I) %>%
pull(gene_short_name) %>% as.character()
#基因表达趋势图
pdf(paste0("./lung/",ns_cell, "_ns_g.pdf"), height = 6, width = 8)
plot_genes_in_pseudotime(cds[Track_genes_sig,], color_cells_by = "orig.ident",
min_expr = 0.5, ncol = 2)
dev.off()
#FeaturePlot图
plot_cells(cds, genes = Track_genes_sig, show_trajectory_graph = FALSE,
label_cell_groups = FALSE, label_leaves = FALSE)
##寻找共表达模块
genelist <- pull(Track_genes, gene_short_name) %>% as.character()
gene_module <- find_gene_modules(cds[genelist,], resolution = 1e-2, cores = 16)
cell_group <- tibble::tibble(cell = row.names(colData(cds)),
cell_group = colData(cds)$orig.ident)
agg_mat <- aggregate_gene_expression(cds, gene_module, cell_group)
row.names(agg_mat) <- stringr::str_c("Module ", row.names(agg_mat))
agg_mat <- cbind(agg_mat[,2], agg_mat[,1])
colnames(agg_mat) <- c("sham", "CLP")
pdf(paste0("./lung/",ns_cell, "_ns_m.pdf"), height = 12, width = 8)
print(pheatmap::pheatmap(agg_mat, scale = "column", clustering_method = "ward.D2"))
dev.off()
names(cds@colData)
Track_genes_sig <- c("Spi1", "Lcn2", "Saa3", "S100a8", "Retnlg", "Pglyrp1", "S100a9",
"Ngp", "Hdc", "C1qa", "Pvr","Icam1", "Ccl9", "Selplg",
"Ptprc", "Cd22", "Ccl6" )
library(CellChat)
#cellchat要求输入标准化后的表达数据,并添加细胞类型信息。
subbRNA <- subset(subRNA, orig.ident == "sham")
data_input <- GetAssayData(subbRNA)
identity <- subset(subbRNA@meta.data, select = c( "cell_type"))
cellchat <- createCellChat(data_input)
cellchat <- addMeta(cellchat, meta = identity, meta.name = c("cell_type"))
levels(cellchat@idents)
cellchat <- setIdent(cellchat, ident.use = "cell_type")
groupSize <- as.numeric(table(cellchat@idents))
#选择参考物种,人是CellChatDB.human, 鼠是CellChatDB.mouse,展示一下数据库内容
CellChatDB <- CellChatDB.mouse
showDatabaseCategory(CellChatDB)
#有secreted signaling, ECM-receptor, cell-cell contant。可选,全部可不选。
#CellChatDB.use <- subsetDB(CellChatDB, search = "Secreted Signaling")
CellChatDB.use <- CellChatDB
cellchat@DB <- CellChatDB.use
##配体-受体分析
# 提取数据库支持的数据子集,即便用所有数据,经过此操作也可节省计算资源
cellchat <- subsetData(cellchat)
future::plan("multisession", workers = 8)
# 识别过表达基因
cellchat <- identifyOverExpressedGenes(cellchat)
# 识别配体-受体对象
cellchat <- identifyOverExpressedInteractions(cellchat)
# 将配体、受体投射到PPI网络,可不执行此步骤,若执行,则将computeCommunProb()加上`raw.use = FALSE`
cellchat <- projectData(cellchat, PPI.mouse)
##推测细胞通讯网络,默认25%截断值,type = "truncatedMean"和trim = 0.1,取10%。
#CellChat在概率计算中也可以考虑各细胞群体中细胞比例的影响。设置 population. size = TRUE.
cellchat <- computeCommunProb(cellchat, raw.use = FALSE, population.size = T)
cellchat <- filterCommunication(cellchat, min.cells = 10)
cellchat <- computeCommunProbPathway(cellchat)
cellchat <- aggregateNet(cellchat)
#细胞通讯网络及权重。
par(mfrow = c(1,2), xpd=TRUE)
netVisual_circle(cellchat@net$count, vertex.weight = groupSize,
weight.scale = T, label.edge= F,
title.name = "Number of interactions")
netVisual_circle(cellchat@net$weight, vertex.weight = groupSize,
weight.scale = T, label.edge= F,
title.name = "Interaction weights/strength")
#每个细胞发出连接及权重。
mat <- cellchat@net$weight
par(mfrow = c(3,2), xpd=TRUE)
for (i in 1:nrow(mat)) {
mat2 <- matrix(0, nrow = nrow(mat), ncol = ncol(mat), dimnames = dimnames(mat))
mat2[i, ] <- mat[i, ]
netVisual_circle(mat2, vertex.weight = groupSize/100,
weight.scale = T, edge.weight.max = max(mat),
title.name = rownames(mat)[i])
}
levels(cellchat@idents) #查看细胞顺序
vertex.receiver = c(1,2) #指定靶细胞的索引,用于hierarchy图
cellchat@netP$pathways #查看富集到的信号通路
pathways.show <- "CCL" #指定需要展示的通路
# Hierarchy plot,层次图
netVisual_aggregate(cellchat, signaling = pathways.show, vertex.size = groupSize/100,
vertex.receiver = vertex.receiver, layout = "hierarchy")
# Circle plot,圆圈图
netVisual_aggregate(cellchat, signaling = pathways.show, vertex.size = groupSize/100,
layout = "circle")
netVisual_aggregate(cellchat, signaling = pathways.show, layout = "chord")
#细胞和基因水平上的和弦图,solt.name可以选择在配体和受体基因层面"net",或者信号通路层面"netP"。
netVisual_chord_cell(cellchat, signaling = pathways.show, slot.name = "netP")
#通过signaling指定需要展示的特定通路下的受配体对,以及用pairLR.use指定需要展示的受配体对
netVisual_chord_gene(cellchat, sources.use = 1, lab.cex = 0.5, targets.use = c(5:6))
#热图
netVisual_heatmap(cellchat, signaling = pathways.show, color.heatmap = "Reds")
# 计算配体-受体对信号网络的贡献度
netAnalysis_contribution(cellchat, signaling = pathways.show)
#提取通路中的受配体信号值,并单独画图
pairLR.CCL <- extractEnrichedLR(cellchat, signaling = pathways.show, geneLR.return = FALSE)
netVisual_individual(cellchat, signaling = pathways.show,
pairLR.use = pairLR.CCL[1,], layout = "chord")
#选择相关受配细胞的配对和信号通路绘制气泡图。
netVisual_bubble(cellchat, sources.use = 1, targets.use = c(1:5), remove.isolate = FALSE)
netVisual_bubble(cellchat, sources.use = 1, targets.use = c(1:5),
signaling = c("MIF","MHC-I"), remove.isolate = FALSE)
pairLR.use <- extractEnrichedLR(cellchat, signaling = c("MIF","MHC-I"))
pairLR.use = pairLR.use[c(1,3),,drop=F]
netVisual_bubble(cellchat, sources.use = c(1), targets.use = c(1:5),
pairLR.use = pairLR.use, remove.isolate = TRUE)
#小提琴绘制信号基因表达分布,默认只显示有统计差异的。
plotGeneExpression(cellchat, signaling = "CCL")
# 分析细胞在信号网络中角色
cellchat <- netAnalysis_computeCentrality(cellchat, slot.name = "netP")
netAnalysis_signalingRole_network(cellchat, signaling = pathways.show)
#可以展示所有的通路,也可展示某几个通路。
netAnalysis_signalingRole_scatter(cellchat) +
ggtitle("All pathway")
netAnalysis_signalingRole_heatmap(cellchat, pattern = "outgoing")+
netAnalysis_signalingRole_heatmap(cellchat, pattern = "incoming")
#细胞通讯模式和信号网络.寻找最佳的k值,给out和in赋值。
library(NMF)
selectK(cellchat, pattern = "outgoing")
cellchat <- identifyCommunicationPatterns(cellchat, pattern = "outgoing", k = 3)
library(ggalluvial)
# river plot
netAnalysis_river(cellchat, pattern = "outgoing")
# dot plot
netAnalysis_dot(cellchat, pattern = "outgoing")
selectK(cellchat, pattern = "incoming")
cellchat <- identifyCommunicationPatterns(cellchat, pattern = "incoming", k = 3)
# river plot
netAnalysis_river(cellchat, pattern = "incoming")
# dot plot
netAnalysis_dot(cellchat, pattern = "incoming")
##信号网络聚类
# 按结构相似性聚类
#修改r调用python的路径。
library(reticulate)
use_condaenv("D:\\ProgramFiles\\Anaconda3")
cellchat <- computeNetSimilarity(cellchat, type = "structural")
cellchat <- netEmbedding(cellchat, type = "structural")
#由于future包升级,将netClustering函数里面的'multiprocess'改成了,'multisession'
trace(netClustering, edit = T)
cellchat <- netClustering(cellchat, type = "structural")
# Visualization in 2D-space
netVisual_embedding(cellchat, type = "structural")
netVisual_embeddingZoomIn(cellchat, type = "structural")
# 按功能相似性聚类
cellchat <- computeNetSimilarity(cellchat, type = "functional")
cellchat <- netEmbedding(cellchat, type = "functional")
cellchat <- netClustering(cellchat, type = "functional")
# Visualization in 2D-space
netVisual_embedding(cellchat, type = "functional")
netVisual_embeddingZoomIn(cellchat, type = "functional")
cellchat_sham <- cellchat
cellchat_CLP <- cellchat
save(cellchat, file = "cellchat.rds")
#多组比较可以分别跑完上述的在合并一起.
#https://mp.weixin.qq.com/s?__biz=MzUzMTEwODk0Ng==&mid=2247507275&idx=1&sn=47a8b040307b660ebe483aaec2cc4547&chksm=fa451876cd32916030e713de041b56c79c495753edffd44476daa31fcf61c9819cb30028fcb2&cur_album_id=2487778019613540353&scene=189#wechat_redirect
#https://htmlpreview.github.io/?https://github.com/sqjin/CellChat/blob/master/tutorial/Comparison_analysis_of_multiple_datasets.html
object.list <- list(sham = cellchat_sham, CLP = cellchat_CLP)
cellchat <- mergeCellChat(object.list, add.names = names(object.list))
#比较细胞通讯相互作用的总数和相互作用的强度
pdf("./heart/cell_inter_num.pdf", height = 4.65, width = 8.05)
compareInteractions(cellchat, show.legend = F, group = c(1,2), measure = "count",
color.use = c( "#00BFC4", "#F8766D")) +
compareInteractions(cellchat, show.legend = F, group = c(1,2), measure = "weight",
color.use = c( "#00BFC4", "#F8766D"))
dev.off()
#不同细胞群间相互作用次数或相互作用强度的差异
#红色(或蓝色)颜色的边表示第二个数据集中与第一个数据集相比增加(或减少)的信号。
pdf("./lung/cell_diff.pdf", height = 4.65, width = 8.05)
par(mfrow = c(1,2), xpd=TRUE)
netVisual_diffInteraction(cellchat, weight.scale = T,comparison = c(1,2))
netVisual_diffInteraction(cellchat, weight.scale = T, measure = "weight",comparison = c(1,2))
dev.off()
pdf("./heart/cell_diff_h.pdf", height = 4.65, width = 8.05)
netVisual_heatmap(cellchat,comparison = c(1,2)) +
netVisual_heatmap(cellchat, measure = "weight",comparison = c(1,2))
dev.off()
weight.max <- getMaxWeight(object.list, attribute = c("idents","count"))
pdf("./heart/cell_inter_circle.pdf", height = 4.65, width = 8.05)
par(mfrow = c(1,2), xpd=TRUE)
for (i in 1:length(object.list)) {
netVisual_circle(object.list[[i]]@net$count, weight.scale = T, label.edge= F, edge.weight.max = weight.max[2], edge.width.max = 12, title.name = paste0("Number of interactions - ", names(object.list)[i]))
}
dev.off()
num.link <- sapply(object.list, function(x) {rowSums(x@net$count) + colSums(x@net$count)-diag(x@net$count)})
weight.MinMax <- c(min(num.link), max(num.link)) # control the dot size in the different datasets
gg <- list()
for (i in 1:length(object.list)) {
gg[[i]] <- netAnalysis_signalingRole_scatter(object.list[[i]], title = names(object.list)[i], weight.MinMax = weight.MinMax)
}
pdf("./heart/cell_diff_sg.pdf", height = 4.65, width = 8.05)
patchwork::wrap_plots(plots = gg)
dev.off()
cellchat
signaling_change <- netAnalysis_signalingChanges_scatter(cellchat, idents.use = "T cell")
signaling_share <- signaling_change$data[signaling_change$data$specificity == "Shared",]$labels
#"T cell", Monocyte, Macrophage
netAnalysis_signalingChanges_scatter(cellchat, idents.use = "T cell",
signaling.exclude = signaling_share )
patchwork::wrap_plots(plots = list(gg1,gg2))
cellchat <- computeNetSimilarityPairwise(cellchat, type = "functional",comparison = c(1,2))
cellchat <- netEmbedding(cellchat, type = "functional",comparison = c(1,2))
trace(netClustering, edit = T)
cellchat <- netClustering(cellchat, type = "functional",comparison = c(1,2))
netVisual_embeddingPairwise(cellchat, type = "functional", label.size = 3.5,comparison = c(1,2))
netVisual_embeddingPairwiseZoomIn(cellchat, type = "structural", nCol = 2)
cellchat <- computeNetSimilarityPairwise(cellchat, type = "structural")
cellchat <- netEmbedding(cellchat, type = "structural")
cellchat <- netClustering(cellchat, type = "structural")
netVisual_embeddingPairwise(cellchat, type = "structural", label.size = 3.5)
netVisual_embeddingPairwiseZoomIn(cellchat, type = "structural", nCol = 2)
# 我们只计算两个数据集之间重叠信号通路的距离。 这里不考虑那些只在一个数据集中识别的信号通路。
rankSimilarity(cellchat, type = "functional")
pdf("./heart/cell_diff_xh.pdf", height = 6.5, width = 9)
rankNet(cellchat, mode = "comparison", stacked = T, do.stat = TRUE,
color.use = c( "#00BFC4", "#F8766D"))+
rankNet(cellchat, mode = "comparison", stacked = F, do.stat = TRUE,
color.use = c( "#00BFC4", "#F8766D"))
dev.off()
i = 1
pathway.union <- union(object.list[[i]]@netP$pathways, object.list[[i+1]]@netP$pathways)
netAnalysis_signalingRole_heatmap(object.list[[i]], pattern = "outgoing", signaling = pathway.union, title = names(object.list)[i], width = 5, height = 6) +
netAnalysis_signalingRole_heatmap(object.list[[i+1]], pattern = "outgoing", signaling = pathway.union, title = names(object.list)[i+1], width = 5, height = 6)
netAnalysis_signalingRole_heatmap(object.list[[i]], pattern = "incoming", signaling = pathway.union, title = names(object.list)[i], width = 5, height = 6) +
netAnalysis_signalingRole_heatmap(object.list[[i+1]], pattern = "incoming", signaling = pathway.union, title = names(object.list)[i+1], width = 5, height = 6)
pdf("./heart/cell_diff_hs.pdf", height = 7.5, width = 7)
netAnalysis_signalingRole_heatmap(object.list[[i]], pattern = "all", signaling = pathway.union, title = names(object.list)[i], width = 5, height = 14) +
netAnalysis_signalingRole_heatmap(object.list[[i+1]], pattern = "all", signaling = pathway.union, title = names(object.list)[i+1], width = 5, height = 14)
dev.off()
netVisual_bubble(cellchat, sources.use = 1, targets.use = c(2:5), comparison = c(1, 2), angle.x = 45)
netVisual_bubble(cellchat, sources.use = 1, targets.use = c(2:5), max.dataset = 2,
title.name = "Increased signaling in CLP", comparison = c(1, 2), angle.x = 45)
netVisual_bubble(cellchat, sources.use = 1, targets.use = c(2:5), max.dataset = 1,
title.name = "Decreased signaling in CLP", comparison = c(1, 2), angle.x = 45)
pos.dataset = "CLP"
features.name = pos.dataset
cellchat <- identifyOverExpressedGenes(cellchat, group.dataset = "datasets", pos.dataset = pos.dataset, features.name = features.name, only.pos = FALSE, thresh.pc = 0.1, thresh.fc = 0.1, thresh.p = 1)
net <- netMappingDEG(cellchat, features.name = features.name)
net.up <- subsetCommunication(cellchat, net = net, datasets = "sham",ligand.logFC = 0.2, receptor.logFC = NULL)
net.down <- subsetCommunication(cellchat, net = net, datasets = "CLP",ligand.logFC = -0.1, receptor.logFC = -0.1)
gene.up <- extractGeneSubsetFromPair(net.up, cellchat)
gene.down <- extractGeneSubsetFromPair(net.down, cellchat)
pairLR.use.up = net.up[,"interaction_name", drop = F]
pairLR.use.down = net.down[, "interaction_name", drop = F]
pdf("./lung/cell_signaling.pdf", height = 6, width = 8.5)
netVisual_bubble(cellchat, pairLR.use = pairLR.use.up, sources.use = 2, targets.use = c(1:5), comparison = c(1, 2), angle.x = 45, remove.isolate = T,title.name = paste0("Up-regulated signaling in ", names(object.list)[2]), color.text = c( "#00BFC4", "#F8766D"))+
netVisual_bubble(cellchat, pairLR.use = pairLR.use.down, sources.use = 2, targets.use = c(1:5), comparison = c(1, 2), angle.x = 45, remove.isolate = T,title.name = paste0("Down-regulated signaling in ", names(object.list)[2]), color.text = c( "#00BFC4", "#F8766D"))
dev.off()
par(mfrow = c(1,2), xpd=TRUE)
netVisual_chord_gene(object.list[[2]], sources.use = 1, targets.use = c(2:5), slot.name = 'net', net = net.up, lab.cex = 0.8, small.gap = 3.5, title.name = paste0("Up-regulated signaling in ", names(object.list)[2]))
netVisual_chord_gene(object.list[[1]], sources.use = 1, targets.use = c(2:5), slot.name = 'net', net = net.down, lab.cex = 0.8, small.gap = 3.5, title.name = paste0("Down-regulated signaling in ", names(object.list)[2]))
computeEnrichmentScore(net.down, species = 'mouse')
computeEnrichmentScore(net.up, species = 'mouse')
pathways.show <- c("CXCL")
weight.max <- getMaxWeight(object.list, slot.name = c("netP"), attribute = pathways.show) # control the edge weights across different datasets
par(mfrow = c(1,2), xpd=TRUE)
for (i in 1:length(object.list)) {
netVisual_aggregate(object.list[[i]], signaling = pathways.show, layout = "circle", edge.weight.max = weight.max[1], edge.width.max = 10, signaling.name = paste(pathways.show, names(object.list)[i]))
}
#cellchat@meta$datasets <- factor(cellchat@meta$datasets, levels = c("sham", "CLP")) # set factor level
plotGeneExpression(cellchat, signaling = "TNF", split.by = "datasets", colors.ggplot = T,
color.use = c( "#00BFC4", "#F8766D"))
#筛选基因
mo_tf <- read.csv("./lung/Monocyte_tf.csv")
mo_tf <- unique(mo_tf[,1])
mo_tf <- gsub("(+)", "", mo_tf, fixed = T)
mo_ns <- read.csv("./lung/Monocyte_track_genes.csv", sep = " ")
mo_ns <- na.omit(mo_ns)
mo_ns <- rownames(mo_ns[mo_ns$morans_I > 0.4,])
mo_tn <- union(mo_tf, mo_ns)
#heart细胞通讯差异表达信号通路配对。
mo_tn <- union(mo_tn, c("Pvr", "H2-q4","H2-k1", "H2-d1", "H2-q7", "Icam1", "Mif",
"Cxcl16", "Cxcl10","App", "Ccl4", "Ccl6", "Ptprc", "Cd22",
"Ccl2", "Ccl7", "Ccl9", "Selplg", "Cd86"))
#lung
mo_tn <- union(mo_tn, c("Pvr", "H2-q4","H2-k1", "H2-d1", "H2-q7", "Icam1", "Mif",
"Cxcl2", "App", "Ccl4", "Ccl6", "Ccl3", "Ptprc", "Cd22",
"Ccl9", "Selplg", "Cd86", "Tnbs1", "Fn1", "Cd22", "Lamc1",
"Grn", "Anxa1", "C3", "Nampt", "Tnf", "Tgfb1","H2-t23",
"H2-t24", "H2-t22"))
pred_gene_h <- intersect(mo_degs, mo_tn)
pred_gene_l <- intersect(mo_degs, mo_tn)
pred_gene <- intersect(pred_gene_l, pred_gene_h)
convertMouseGeneList <- function(x){
require("biomaRt")
human = useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
genesV2 = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = x , mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows=T)
humanx <- unique(genesV2[, 2])
print(head(humanx))
return(humanx)
}
pred_gene1 <- convertMouseGeneList(pred_gene)
write.table(pred_gene1, "genes.txt", col.names = F, row.names = F, quote = F )
mo_sub <- subset(scRNA, ident = "Monocyte")
Idents(mo_sub) <- "orig.ident"
pdf("./heart/gene_m.pdf", height = 15, width = 20)
VlnPlot(mo_sub, features = pred_gene, cols = c( "#00BFC4", "#F8766D"))
dev.off()
#绘制韦恩图
library(venn)
load("lung.rdata")
rm(list = setdiff(ls(), c("DEGs")))
mo_degs_l <- rownames(DEGs$Monocyte[abs(DEGs$Monocyte$avg_log2FC) >1 & DEGs$Monocyte$p_val_adj < 1e-5,])
mo_tf_l <- read.csv("./lung/Monocyte_tf.csv")
mo_tf_l <- unique(mo_tf_l[,1])
mo_tf_l <- gsub("(+)", "", mo_tf_l, fixed = T)
mo_ns_l <- read.csv("./lung/Monocyte_track_genes.csv", sep = " ")
mo_ns_l <- na.omit(mo_ns_l)
mo_ns_l <- rownames(mo_ns_l[mo_ns_l$morans_I > 0.4,])
mo_p_l <- c("Pvr", "H2-q4","H2-k1", "H2-d1", "H2-q7", "Icam1", "Mif",
"Cxcl2", "App", "Ccl4", "Ccl6", "Ccl3", "Ptprc", "Cd22",
"Ccl9", "Selplg", "Cd86", "Tnbs1", "Fn1", "Cd22", "Lamc1",
"Grn", "Anxa1", "C3", "Nampt", "Tnf", "Tgfb1","H2-t23",
"H2-t24", "H2-t22")
th_l <- union(union(mo_tf_l, mo_ns_l), mo_p_l)
load("heart.rdata")
rm(list = setdiff(ls(), c("DEGs")))
mo_degs_h <- rownames(DEGs$Monocyte[abs(DEGs$Monocyte$avg_log2FC) >1 & DEGs$Monocyte$p_val_adj < 1e-5,])
mo_tf_h <- read.csv("./heart/Monocyte_tf.csv")
mo_tf_h <- unique(mo_tf_h[,1])
mo_tf_h <- gsub("(+)", "", mo_tf_h, fixed = T)
mo_ns_h <- read.csv("./heart/Monocyte_track_genes.csv", sep = " ")
mo_ns_h <- na.omit(mo_ns_h)
mo_ns_h <- rownames(mo_ns_h[mo_ns_h$morans_I > 0.4,])
mo_p_h <- c("Pvr", "H2-q4","H2-k1", "H2-d1", "H2-q7", "Icam1", "Mif",
"Cxcl16", "Cxcl10","App", "Ccl4", "Ccl6", "Ptprc", "Cd22",
"Ccl2", "Ccl7", "Ccl9", "Selplg", "Cd86")
th_h <- union(union(mo_tf_h, mo_ns_h), mo_p_h)
venn_list <- list(th_h, mo_degs_h, mo_degs_l, th_l)
pdf("final_ven.pdf", height = 4, width = 5)
venn(venn_list, ellipse = T, box = F, ilcs = 1, sncs = 1,
snames = c("heart three", "heart DEGs", "lung DEGs", "lung three"),
zcolor = c("#7CAE00", "#C77CFF","#F8766D", "#00BFC4"),
col = c("#7CAE00", "#C77CFF","#F8766D", "#00BFC4"))
dev.off()
#绘制effect图,,plot
source("plot//plot_SMR.r")
SMRData <- ReadSMRData("plot//heart.ENSG00000179344.txt")
pdf("DQB1.pdf", height = 6, width = 8)
SMREffectPlot(data=SMRData) #, trait_name="Sepsis")
dev.off()
#转换id
library(AnnotationDbi)
library(org.Hs.eg.db)
results_eqt$Gene = mapIds(org.Hs.eg.db, results_eqt$probeID,"SYMBOL","ENSEMBL")
eqtl <- merge(results_eqt, gels, by = "Gene")
results_eqt[results_eqt$Gene == "BIRC3",]
write.table(heart, "heart.txt", col.names = T, row.names = F, quote = F)
write.table(lung, "lung.txt", col.names = T, row.names = F, quote = F)
write.table(atrial, "atrial.txt", col.names = T, row.names = F, quote = F)
write.table(eqtl, "eqtl.txt", col.names = T, row.names = F, quote = F)
write.table(blood, "blood.txt", col.names = T, row.names = F, quote = F)
#GEO中3个队列研究数据验证。
library(GEOquery)
library(DESeq2)
library(tidyverse)
ges <- getGEO("GSE185263", GSEMatrix = T)
exp <- read.csv("GSE185263_raw_counts.csv.gz", header = T, row.names = 1)
exp <- exp[rowMeans(exp) > 1,]
group <- data.frame(ges[[1]]$title, ges[[1]]$`disease state:ch1`, ges[[1]]$`in hospital mortality:ch1`)
colnames(group) <- c("case", "disease", "death")
health <- group[group$disease == "healthy",]
health <- data.frame(health$disease, row.names = health$case)
colnames(health) <- "disease"
sepsis <- group[group$disease == "sepsis" & group$death == "Survived",]
sepsis <- group[group$disease == "sepsis",]
sepsis <- data.frame(sepsis$disease, row.names = sepsis$case)
colnames(sepsis) <- "disease"
death <- group[group$death == "Died",]
death <- data.frame(death$death, row.names = death$case)
colnames(death) <- "disease"
h2s <- rbind(health, sepsis)
h2s$disease <- as.factor(h2s$disease)
exda <- exp[,row.names(h2s)]
s2d <- rbind(sepsis, death)
s2d$disease <- as.factor(s2d$disease)
exda <- exp[,row.names(s2d)]
h2d <- rbind(health, death)
h2d$disease <- as.factor(h2d$disease)
exda <- exp[,row.names(h2d)]
dds <- DESeqDataSetFromMatrix(exda, colData = h2s, design = ~ disease)
dds <- DESeq(dds)
res <- results(dds)
res$pvalue[res@rownames == "ENSG00000179344"]
res$padj[res@rownames == "ENSG00000196735"]
write.table(res[c("ENSG00000179344", "ENSG00000196735"),], "GSE185263.txt", quote = F)
#画个柱状图吧
plotda <- cbind(group, t(exp["ENSG00000196735",]))
pdf("GSE185263_A.pdf", height = 4, width = 4)
ggplot(plotda, aes(x = disease, y = ENSG00000196735, fill = disease))+
geom_violin(trim = FALSE, color = "black")+
theme_classic()+
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
stat_summary(fun = "mean", geom ="point", shape = 18)+
stat_summary(fun.data = "mean_se", geom ="errorbar", width = 0.15)+
scale_fill_manual(values = c( "#00BFC4", "#F8766D"))+
geom_segment(aes(x = 1, xend = 2, y = max(plotda$ENSG00000196735) *1.1, yend = max(plotda$ENSG00000196735) *1.1))+
annotate("text", x = 1.5, y = max(plotda$ENSG00000196735) *1.11, label = "*", size = 10)+
xlab("group")+
ylab("Expression of HLA-DQA1")+
ggtitle("GSE185263")
dev.off()
ges1 <- getGEO("GSE154918", GSEMatrix = T)
exp1 <- read.delim("GSE154918.tsv.gz", header = T)
exp1 <- exp1[rowMeans(exp1) > 1,]
row.names(exp1) <- exp1$GeneID
group1 <- data.frame(ges1[[1]]$geo_accession, ges1[[1]]$`status:ch1`)
health1 <- group1[group1$ges1..1....status.ch1. == "Hlty",]
colnames(health1) <- c("id","disease")
sepsis1 <- group1[(group1$ges1..1....status.ch1. != "Hlty" & group1$ges1..1....status.ch1. != "Inf1_P"), ]
colnames(sepsis1) <- c("id","disease")
sepsis1$disease <- rep("sepsis", times = length(sepsis1$id))
h2s1 <- rbind(health1, sepsis1)
h2s1$disease <- as.factor(h2s1$disease)
exda1 <- exp1[,h2s1$id]
dds1 <- DESeqDataSetFromMatrix(exda1, colData = h2s1, design = ~ disease)
dds1 <- DESeq(dds1)
res1 <- results(dds1)
res1$padj[res1@rownames == 3119]
write.table(res1[c("3117", "3119"),], "GSE154918.txt", quote = F )
plotda <- cbind(h2s1, t(exp1['3117',][,h2s1$id]))
colnames(plotda) <- c("id", "disease", "ge" )
pdf("GSE154918_A.pdf", height = 4, width = 4)
ggplot(plotda, aes(x = disease, y = ge, fill = disease))+
geom_violin(trim = FALSE, color = "black")+
theme_classic()+
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
stat_summary(fun = "mean", geom ="point", shape = 18)+
stat_summary(fun.data = "mean_se", geom ="errorbar", width = 0.15)+
scale_fill_manual(values = c( "#00BFC4", "#F8766D"))+
scale_x_discrete(labels = c("healthy", "sepsis"))+
geom_segment(aes(x = 1, xend = 2, y = max(plotda$ge) *1.3, yend = max(plotda$ge) *1.3))+
annotate("text", x = 1.5, y = max(plotda$ge) *1.31, label = "*", size = 10)+
xlab("group")+
ylab("Expression of HLA-DQA1")+
ggtitle("GSE154918")
dev.off()
ges2 <- getGEO(filename = "./GSE134347_series_matrix.txt.gz", getGPL = F)#Limma
exp2 <- data.frame(ges2@assayData$exprs)
ann <- read.delim("GPL17586-45144.txt", comment.char = "#", header = T)
ann1 <- separate_wider_delim(ann, gene_assignment, "//", names = c("a", "b", "c", "d", "e", "f","g", "h", "i", "j", "k", "l", "m", "n", "o"),
too_many = "merge", too_few = "align_start")
ann2 <- data.frame(ann1$b, row.names = ann1$ID)
exp3 <- merge(exp2, ann2, by = "row.names")
exp3 <- exp3[exp3$ann1.b != " --- ",]
exp4 <- exp3 %>% mutate(rowMean = rowMeans(.[grep("GSM", names(.))])) %>%
arrange(desc(rowMean)) %>%
distinct(ann1.b, .keep_all = TRUE) %>%
select(-rowMean)
row.names(exp4) <- exp4$ann1.b
exp4 <- exp4 %>% select(-Row.names) %>% select(-ann1.b)
sum(is.na(exp4))
group2 <- data.frame(ges2$geo_accession, ges2$`disease state:ch1`)
health2 <- group2[group2$ges2..disease.state.ch1. == "healthy",]
colnames(health2) <- c("id", "disease")
sepsis2 <- group2[group2$ges2..disease.state.ch1. == "sepsis",]
colnames(sepsis2) <- c("id", "disease")
h2s2 <- rbind(health2, sepsis2)
my <- exp4[,h2s2$id]
library(limma)
design <- model.matrix(~0+h2s2$disease)
colnames(design) <- c("healthy", "sepsis")
row.names(design) <- h2s2$id
contrast.matrix <-makeContrasts(sepsis - healthy, levels = design)
fit <- lmFit(my,design)
fit1 <- contrasts.fit(fit,contrast.matrix)
fit2 <- eBayes(fit1, 0.01)
tempOutput <- topTable(fit2,adjust = "fdr", coef = 1, n = Inf)
nrDEG <- na.omit(tempOutput)
nrDEG[" HLA-DQA1 ",]
write.table(nrDEG[c(" HLA-DQB1 ", " HLA-DQA1 "),], "GSE134347.txt", quote = F)
plotda <- cbind(h2s2, t(my[" HLA-DQB1 ",]))
head(plotda)
colnames(plotda) <- c("id", "disease", "ge" )
pdf("GSE134347.pdf", height = 4, width = 4)
ggplot(plotda, aes(x = disease, y = ge, fill = disease))+
geom_violin(trim = FALSE, color = "black")+
theme_classic()+
theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
stat_summary(fun = "mean", geom ="point", shape = 18)+
stat_summary(fun.data = "mean_se", geom ="errorbar", width = 0.15)+
scale_fill_manual(values = c( "#00BFC4", "#F8766D"))+
scale_x_discrete(labels = c("healthy", "sepsis"))+
geom_segment(aes(x = 1, xend = 2, y = max(plotda$ge) *1.1, yend = max(plotda$ge) *1.1))+
annotate("text", x = 1.5, y = max(plotda$ge) *1.11, label = "*", size = 10)+
xlab("group")+
ylab("Expression of HLA-DQB1")+
ggtitle("GSE134347")
dev.off()
###根据HLA-DQA1和HLA-DQB1在11个队列中绘制ROC曲线
library(pROC)
rocdata <- t(exp[c("ENSG00000196735", "ENSG00000179344"),])
rocdata <- data.frame(rocdata, group$disease)
ggplot(rocdata, aes(x = log(ENSG00000196735), y = log(ENSG00000179344), colour = group.disease))+
geom_point()
pdf("GSE185263_r.pdf", height = 4.5, width = 4.5)
roc_a <- plot.roc(rocdata$group.disease, rocdata[,1], col = "#00828B", main = "GSE185263")
roc_b <- lines.roc(rocdata$group.disease, rocdata[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a$auc, 3)),
col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b$auc, 3)),
col = "#F03752")
dev.off()
rocdata1 <- t(exp1[c("3117", "3119"),])[-1,]
rocdata1 <- data.frame(rocdata1, group1$ges1..1....status.ch1.)
rocdata1 <- do.call(rbind, lapply(c("Hlty", "Shock_P","Seps_P"), function (x) {rocdata1[rocdata1$group1.ges1..1....status.ch1. == x,]}))
rocdata1[rocdata1$group1.ges1..1....status.ch1. == "Shock_P", ]$group1.ges1..1....status.ch1. <- "Seps_P"
ggplot(rocdata1, aes(x = log(X3117), y = log(X3119), colour = group1.ges1..1....status.ch1.))+
geom_point()
pdf("GSE154918_r.pdf", height = 4.5, width = 4.5)
roc_a1 <- plot.roc(rocdata1[,3], rocdata1[,1], col = "#00828B",
main = "GSE154918")
roc_b1 <- lines.roc(rocdata1[,3], rocdata1[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a1$auc, 3)),
col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b1$auc, 3)),
col = "#F03752")
dev.off()
rocdata2 <- t(exp4[c(" HLA-DQA1 ", " HLA-DQB1 "),])
rocdata2 <- data.frame(rocdata2, group2$ges2..disease.state.ch1.)
rocdata2 <- rocdata2[rocdata2$group2.ges2..disease.state.ch1. != "noninfectious",]
ggplot(rocdata2, aes(x = X.HLA.DQA1., y = X.HLA.DQB1.,
colour = group2.ges2..disease.state.ch1.))+geom_point()
pdf("GSE134347_r.pdf", height = 4.5, width = 4.5)
roc_a2 <- plot.roc(rocdata2$group2.ges2..disease.state.ch1., rocdata2[,1], col = "#00828B", main = "GSE134347")
roc_b2 <- lines.roc(rocdata2$group2.ges2..disease.state.ch1., rocdata2[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a2$auc, 3)),
col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b2$auc, 3)),
col = "#F03752")
dev.off()
ges40 <- getGEO(filename = "./GSE137340_series_matrix.txt.gz", getGPL = F)
group40 <- data.frame(ges40$`diagnosis:ch1`, ges40$`time of sample collection:ch1`)
exp40 <- data.frame(ges40@assayData$exprs)
ann <- read.delim("GPL10558.annot.gz", skip = 28, header = T)
ann1 <- data.frame(ann$Gene.symbol, row.names = ann$ID)
exp40 <- merge(ann1, exp40, by = "row.names")
#无HLA-DQA1,只有HLA-DQA2,
rocdata40 <- t(exp40[exp40$ann.Gene.symbol == "HLA-DQB1",])[-c(1,2),]
rocdata40 <- data.frame(rocdata40, group40)
rocdata40 <- rocdata40[rocdata40$ges40..time.of.sample.collection.ch1. != "24 hrs",]
rocdata40[rocdata40$ges40..diagnosis.ch1. != "Healthy",][2] <- "sepsis"
ggplot(rocdata2, aes(x = X.HLA.DQA1., y = X.HLA.DQB1.,
colour = group2.ges2..disease.state.ch1.))+geom_point()
pdf("GSE137340_r.pdf", height = 4.5, width = 4.5)
roc_b40 <- plot.roc(rocdata40$ges40..diagnosis.ch1., as.numeric(rocdata40[,1]),
col = "#F03752", main = "GSE137340")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b40$auc, 3)),
col = "#F03752")
dev.off()
ges63 <- getGEO(filename = "./GSE69063_series_matrix.txt.gz", getGPL = F)
group63 <- data.frame(ges63$`disease status:ch1`, ges63$`time point:ch1`)
exp63 <- data.frame(ges63@assayData$exprs)
rocdata63 <- t(exp63[c("3117_at", "3119_at"),])
rocdata63 <- data.frame(rocdata63, group63)
rocdata63 <- do.call(rbind, lapply(c("Healthy control","Sepsis"), function (x) {
rocdata63[rocdata63$ges63..disease.status.ch1.== x,]}))
rocdata63 <- rocdata63[rocdata63$ges63..time.point.ch1. != "T1",]
rocdata63 <- rocdata63[rocdata63$ges63..time.point.ch1. != "T2",]
ggplot(rocdata63, aes(x = X3117_at, y = X3119_at,
colour = ges63..disease.status.ch1.))+geom_point()
pdf("GSE69063_r.pdf", height = 4.5, width = 4.5)
roc_a63 <- plot.roc(rocdata63$ges63..disease.status.ch1., rocdata63[,1], col = "#00828B", main = "GSE69063")
roc_b63 <- lines.roc(rocdata63$ges63..disease.status.ch1., rocdata63[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a63$auc, 3)),
col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b63$auc, 3)),
col = "#F03752")
dev.off()
ges59 <- getGEO(filename = "./GSE100159_series_matrix.txt.gz", getGPL = F)
group59 <- ges59$`sample class:ch1`
exp59 <- data.frame(ges59@assayData$exprs)
rocdata59 <- data.frame(t(exp59["ILMN_1661266",]), group59)
rocdata59[rocdata59$group59 != "Control", ][2] <- "sepsis"
pdf("GSE100159_r.pdf", height = 4.5, width = 4.5)
roc_b59 <- plot.roc(rocdata59$group59, rocdata59[,1], col = "#F03752",
main = "GSE100159")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b59$auc, 3)),
col = "#F03752")
dev.off()
ges33 <- getGEO(filename = "./GSE95233_series_matrix.txt.gz", getGPL = F)
exp33 <- data.frame(ges33@assayData$exprs)
group33 <- data.frame(ges33$source_name_ch1, ges33$`time point:ch1`)
ann <- read.delim("GPL570.annot.gz", skip = 27, header = T)
ann1 <- separate_wider_delim(ann, Gene.symbol, "///", names = c("a", "b", "c", "d", "e", "f","g", "h", "i", "j", "k", "l", "m", "n", "o"),
too_many = "merge", too_few = "align_start")
ann2 <- data.frame(ann1$a, row.names = ann1$ID)
exp33 <- merge(ann2, exp33, by = "row.names")
exp33 <- exp33 %>% mutate(rowMean = rowMeans(.[grep("GSM", names(.))])) %>%
arrange(desc(rowMean)) %>%
distinct(ann1.a, .keep_all = TRUE) %>%
select(-rowMean)
row.names(exp33) <- exp33$ann1.a
exp33 <- exp33 %>% select(-Row.names) %>% select(-ann1.a)
rocdata33 <- t(exp33[c("HLA-DQA1", "HLA-DQB1"),])
rocdata33 <- data.frame(rocdata33, group33)
rocdata33 <- rocdata33[rocdata33$ges33..time.point.ch1. != "D02",]
rocdata33 <- rocdata33[rocdata33$ges33..time.point.ch1. != "D03",]
pdf("GSE95233_r.pdf", height = 4.5, width = 4.5)
roc_a33 <- plot.roc(rocdata33$ges33..time.point.ch1., rocdata33[,1], col = "#00828B",
main = "GSE95233")
roc_b33 <- lines.roc(rocdata33$ges33..time.point.ch1., rocdata33[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a33$auc, 3)),
col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b33$auc, 3)),
col = "#F03752")
dev.off()
ges65 <- getGEO(filename = "./GSE57065_series_matrix.txt.gz", getGPL = F)
exp65 <- data.frame(ges65@assayData$exprs)
group65 <- data.frame(ges65$source_name_ch1, ges65$description)
ann <- read.delim("GPL570.annot.gz", skip = 27, header = T)
ann1 <- separate_wider_delim(ann, Gene.symbol, "///", names = c("a", "b", "c", "d", "e", "f","g", "h", "i", "j", "k", "l", "m", "n", "o"),
too_many = "merge", too_few = "align_start")
ann2 <- data.frame(ann1$a, row.names = ann1$ID)
exp65 <- merge(ann2, exp65, by = "row.names")
exp65 <- exp65 %>% mutate(rowMean = rowMeans(.[grep("GSM", names(.))])) %>%
arrange(desc(rowMean)) %>%
distinct(ann1.a, .keep_all = TRUE) %>%
select(-rowMean)
row.names(exp65) <- exp65$ann1.a
exp65 <- exp65 %>% select(-Row.names) %>% select(-ann1.a)
rocdata65 <- t(exp65[c("HLA-DQA1", "HLA-DQB1"),])
rocdata65 <- data.frame(rocdata65, group65)
rocdata65 <- rocdata65[nchar(rocdata65$ges65.description) != 7,]
rocdata65$group <-c(rep("sepsis", times = 28), rep("health", times = 25))
pdf("GSE57065_r.pdf", height = 4.5, width = 4.5)
roc_a65 <- plot.roc(rocdata65$group, rocdata65[,1], col = "#00828B",
main = "GSE57065")
roc_b65 <- lines.roc(rocdata65$group, rocdata65[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a65$auc, 3)),
col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b65$auc, 3)),
col = "#F03752")
dev.off()
ges28 <- getGEO(filename = "./GSE69528_series_matrix.txt.gz", getGPL = F)
group28 <- data.frame(ges28$`study group:ch1`, ges28$`pathogens:ch1`)
exp28 <- data.frame(ges28@assayData$exprs)
rocdata28 <- data.frame(t(exp28["ILMN_1661266",]), group28)
rocdata28 <- rocdata28[rocdata28$ges28..study.group.ch1. != "Uninfected type 2 diabetes mellitus",]
rocdata28[rocdata28$ges28..study.group.ch1. != "Uninfected healthy", ][2] <- "sepsis"
pdf("GSE69528_r.pdf", height = 4.5, width = 4.5)
roc_b28 <- plot.roc(rocdata28$ges28..study.group.ch1., rocdata28[,1], col = "#F03752",
main = "GSE69528")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b28$auc, 3)),
col = "#F03752")
dev.off()
ges50 <- getGEO(filename = "./GSE28750_series_matrix.txt.gz", getGPL = F)
exp50 <- data.frame(ges50@assayData$exprs)
group50 <- data.frame(ges50$`health status:ch1`)
ann <- read.delim("GPL570.annot.gz", skip = 27, header = T)
ann1 <- separate_wider_delim(ann, Gene.symbol, "///", names = c("a", "b", "c", "d", "e", "f","g", "h", "i", "j", "k", "l", "m", "n", "o"),
too_many = "merge", too_few = "align_start")
ann2 <- data.frame(ann1$a, row.names = ann1$ID)
exp50 <- merge(ann2, exp50, by = "row.names")
exp50 <- exp50 %>% mutate(rowMean = rowMeans(.[grep("GSM", names(.))])) %>%
arrange(desc(rowMean)) %>%
distinct(ann1.a, .keep_all = TRUE) %>%
select(-rowMean)
row.names(exp50) <- exp50$ann1.a
exp50 <- exp50 %>% select(-Row.names) %>% select(-ann1.a)
rocdata50 <- t(exp50[c("HLA-DQA1", "HLA-DQB1"),])
rocdata50 <- data.frame(rocdata50, group50)
rocdata50 <- rocdata50[rocdata50$ges50..health.status.ch1. != "POST_SURGICAL",]
pdf("GSE28750_r.pdf", height = 4.5, width = 4.5)
roc_a50 <- plot.roc(rocdata50$ges50..health.status.ch1., rocdata50[,1], col = "#00828B",
main = "GSE28750")
roc_b50 <- lines.roc(rocdata50$ges50..health.status.ch1., rocdata50[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a50$auc, 3)),
col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b50$auc, 3)),
col = "#F03752")
dev.off()
ges14 <- getGEO(filename = "./GSE54514_series_matrix.txt.gz", getGPL = F)
group14 <- data.frame(strsplit(ges14$`group_day:ch1`, "_"))
exp14 <- data.frame(ges14@assayData$exprs)
rocdata14 <- data.frame(t(exp14["ILMN_1661266",]), t(group14))
rocdata14 <- rocdata14[rocdata14$X2 == "D1", ]
rocdata14 <- rocdata14[]
pdf("GSE54514_r.pdf", height = 4.5, width = 4.5)
roc_b14 <- plot.roc(rocdata14$X1, rocdata14[,1], col = "#F03752",
main = "GSE54514")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b14$auc, 3)),
col = "#F03752")
dev.off()
ges13 <- getGEO(filename = "./GSE139913_series_matrix.txt.gz", getGPL = F)
exp13 <- read.table("GSE139913_norm_data.txt.gz", header = T, row.names = 1)
group13 <- ges13$description
rocdata13 <- t(exp13[c("HLA-DQA1", "HLA-DQB1"),])
rocdata13 <- data.frame(rocdata13, group13)
rocdata13 <- rocdata13[rocdata13$group13 != "Non-Sepsis",]
pdf("GSE139913_r.pdf", height = 4.5, width = 4.5)
roc_a13 <- plot.roc(rocdata13$group13, rocdata13[,1], col = "#00828B",
main = "GSE139913")
roc_b13 <- lines.roc(rocdata13$group13, rocdata13[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a13$auc, 3)),
col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b13$auc, 3)),
col = "#F03752")
dev.off()
##########################################################################################################################################################
#以下分析代码不是使用的r,使用相应软件。
##########################################################################################################################################################
###1000gene转换为PLINK格式
https://mirrors.sjtug.sjtu.edu.cn/cran/web/packages/snpsettest/vignettes/reference_1000Genomes.html
解压
plink2 --zst-decompress all_phase3.pgen.zst > all_phase3.pgen
筛选染色体和人群
plink2 --pfile all_phase3 vzs --chr 1-22 --output-chr 26 --max-alleles 2 --rm-dup exclude-mismatch --set-missing-var-ids '@_#_$1_$2' --make-pgen --out all_phase3_autosomes --allow-extra-chr
plink2 --pfile all_phase3_autosomes --keep EUR_1kg_samples.txt --make-pgen --out EUR_phase3_autosomes
转为PLINK 1二进制文件
plink2 --pfile EUR_phase3_autosomes --maf 0.005 --make-bed --out EUR_phase3_autosomes
然后运行文件将每一条染色体的数据都单独划分出来
@echo off
set /a count=1
:loop
if %count% leq 22 (
plink2 --bfile EUR_phase3_autosomes --chr %count% --make-bed --out EUR_phase3_chr%count%
set /a count=%count%+1
goto loop
)
echo All chr created!
############################################################################################################################################
#给GWAS数据按照变异位点注释出rs号
#参考链接
#https://gwaslab.org/2021/05/25/annovar/
#https://annovar.openbioinformatics.org/en/latest/
#下载注释文件。Download appropriate database files avsnp150
perl annotate_variation.pl -buildver hg19 -downdb -webfrom annovar avsnp150 humandb/
#按照染色体号和位置进行注释。Annotate the variants base on chr:pos
perl annotate_variation.pl -filter -dbtype avsnp150 -buildver hg19 ana.avinput humandb/ -out ana
##############################################################################################################################################
SMR & HEIDI analysis
smr-1.3.1 --bfile EUR_phase3_chr21 --gwas-summary 21gwas.ma --beqtl-summary cis-eQTLs-full_eQTLGen_AF_incl_nr_formatted_20191212.new.txt_besd-dense --out mysmr --thread-num 10 --extract-probe myprobe.list
SMR locus plot data
smr-1.3.1 --bfile 1000gen/EUR_phase3_chr6 --gwas-summary GCST90270871.ma --beqtl-summary eQTL_besd_lite\Whole_Blood.lite --out dqb1_plot --plot --probe ENSG00000179344 --probe-wind 500 --gene-list glist-hg19
#设置个循环
@echo off
set /a count=1
mkdir results_blood
:loop
if %count% leq 22 (
echo doing chr %count%
smr-1.3.1 --bfile 1000gen\EUR_phase3_chr%count% --gwas-summary GCST90270871.ma --beqtl-summary eQTL_besd_lite\Whole_Blood.lite --out results_blood\%count%smr --thread-num 13
set /a count=%count%+1
goto loop
)
echo done!
pause
#################################################################################################################################################
R
1
https://gitee.com/lzh23/GEO1.git
git@gitee.com:lzh23/GEO1.git
lzh23
GEO1
生信代码们
master

搜索帮助