GEO1
/
scRNA.R

setwd("D:\\ProgramFiles\\R\\Work\\scRNA")

set.seed(2023)

library(Seurat)
library(tidyverse)
library(cowplot)

load("heart_h.RData")

dir.create("./heart")

fs=list.files('./GSE207363/','^GSM')
samples=str_split(fs,'_',simplify = T)[,2]

#为每个样本创建子文件夹,重命名文件，并移动到相应的子文件夹里
lapply(unique(samples),function(x){
  y=fs[grepl(x,fs)]
  folder=paste0("GSE207363/", str_split(y[1],'_',simplify = T)[,2])
  dir.create(folder,recursive = T)
  file.rename(paste0("GSE207363/",y[1]),file.path(folder,"barcodes.tsv.gz"))
  file.rename(paste0("GSE207363/",y[2]),file.path(folder,"features.tsv.gz"))
  file.rename(paste0("GSE207363/",y[3]),file.path(folder,"matrix.mtx.gz"))
})

samples <- c("sham","CLP")
dir <- file.path('./GSE207363',samples)
names(dir) <- samples
counts <- Read10X(data.dir = dir)

#循环读入在合并，与上面的一并读入结果不太一样。
for (file in samples){
    seurat_data <- Read10X(data.dir = paste0("GSE207363/", file))
    seurat_obj <- CreateSeuratObject(counts = seurat_data, min.features = 200,
                                     min.cells = 20, project = file)
    assign(file, seurat_obj)
}

scRNA <- merge(x = sham, y = CLP, add.cell.ids = c("sham", "CLP"))
rm(list = setdiff(ls(), "scRNA"))

scRNA$orig.ident <- factor(scRNA$orig.ident, levels = c("sham", "CLP"))

scRNA <- CreateSeuratObject(counts[["Gene Expression"]])
adt <- CreateAssayObject(counts[["Antibody Capture"]])
all.equal(colnames(counts[["Gene Expression"]]), colnames(counts[["Antibody Capture"]]))
scRNA[["ADT"]] <- adt

#如需ID转换，使用EnsDb.Hsapiens.v86而不是org.Hs.eg.db，可以保留MT-
#筛选线粒体,核糖体基因和ERCC，人是MT-，有些是MT_，鼠是mt-
scRNA[["percent.mt"]] <- PercentageFeatureSet(scRNA, pattern = "^mt-")
scRNA[["percent.rb"]] <- PercentageFeatureSet(scRNA, pattern = "^Rp[sl]")
scRNA[["percent.ercc"]] <- PercentageFeatureSet(scRNA, pattern = "^Ercc")
scRNA$log10GenesPerUMI <- log10(scRNA$nFeature_RNA) / log10(scRNA$nCount_RNA)
scRNA$mitoRatio <- PercentageFeatureSet(object = scRNA, pattern = "^mt-")
scRNA$mitoRatio <- scRNA@meta.data$mitoRatio / 100

#查看头部数据
head(scRNA@meta.data)
View(scRNA@meta.data)

VlnPlot(scRNA, features = c("nFeature_RNA", "nCount_RNA", "percent.mt"), ncol = 3)
FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "percent.mt")
FeatureScatter(scRNA, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

scRNA <- subset(scRNA, subset = nFeature_RNA > 250 & nCount_RNA > 500
                & percent.mt < 10 & log10GenesPerUMI > 0.80)

#选择只保留在10个或更多细胞中表达的基因。
counts <- GetAssayData(object = scRNA, slot = "counts")
nonzero <- counts > 0
keep_genes <- Matrix::rowSums(nonzero) >= 10
filtered_counts <- counts[keep_genes,]
scRNA <- CreateSeuratObject(filtered_counts, meta.data = scRNA@meta.data)
scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000)
scRNA <- FindVariableFeatures(scRNA,selection.method = "vst", nfeatures = 2000)
VariableFeaturePlot(scRNA)

#调大内存
options(future.globals.maxSize = 10000 * 1024^2)
#对所有基因数据都缩放，内存占用太多
all.genes <- rownames(scRNA)
scRNA <- ScaleData(scRNA, features = all.genes)
#对高可变基因的子集进行数据缩放，要不内存不够用。。
scRNA <- ScaleData(scRNA)

#提取细胞周期基因
#把人的基因转换成鼠的对应基因，
convertHumanGeneList <- function(x){
  require("biomaRt")
  human = useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
  mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
  genesV2 = getLDS(attributes = c("hgnc_symbol"), filters = "hgnc_symbol", values = x , mart = human, attributesL = c("mgi_symbol"), martL = mouse, uniqueRows=T)
  humanx <- unique(genesV2[, 2])
  print(head(humanx))
  return(humanx)
}
s.genes <- convertHumanGeneList(cc.genes.updated.2019$s.genes)
g2m.genes <- convertHumanGeneList(cc.genes.updated.2019$g2m.genes)

#biomaRt网络不行，用homologenen
library(homologene)
s.genes <- homologene(cc.genes.updated.2019$s.genes, inTax = 9606, outTax = 10090)[,2]
g2m.genes <- homologene(cc.genes.updated.2019$g2m.genes, inTax = 9606, outTax = 10090)[,2]

scRNA <- CellCycleScoring(scRNA, s.features = s.genes, g2m.features = g2m.genes,
                          set.ident = T)

#根据PCA看s和g是否分开决定是否进行去除细胞周期
scRNA <- RunPCA(scRNA, features = c(s.genes, g2m.genes))
DimPlot(scRNA, reduction = "pca", group.by = "Phase", split.by = "Phase")
scRNA <- ScaleData(scRNA, vars.to.regress = c("S.Score", "G2M.Score"),
                   features = rownames(scRNA))

#如果不想分G1和G2，使用s和g的差值进行去除细胞周期
marrow$CC.Difference <- marrow$S.Score - marrow$G2M.Score
marrow <- ScaleData(marrow, vars.to.regress = "CC.Difference", features = rownames(marrow))

#SCT流程
scRNA <- CreateSeuratObject(counts, project = "sham", min.cells = 10, min.features = 200)
scRNA[["percent.mt"]] <- PercentageFeatureSet(scRNA, pattern = "^mt-")
scRNA <- subset(scRNA, subset = nFeature_RNA > 200 & nFeature_RNA < 7500 & percent.mt < 10)
scRNA <- CellCycleScoring(scRNA, s.features = s.genes, g2m.features = g2m.genes,
                          set.ident = T)
scRNA <- SCTransform(scRNA, method = "glmGamPoi", ncells = 8824, verbose = T,
                     vars.to.regress = c("percent.mt","S.Score","G2M.Score"))

#分组分别进行SCT
scRNA <- SplitObject(scRNA, split.by = "orig.ident")
for (i in 1:length(scRNA)){
  scRNA[[i]] <- NormalizeData(scRNA[[i]], verbose = T)
  scRNA[[i]] <- CellCycleScoring(scRNA[[i]],s.features = s.genes,
                                       g2m.features = g2m.genes,
                                       set.ident = T )
  scRNA[[i]] <- SCTransform(scRNA[[i]], method = "glmGamPoi", ncells = 8824, verbose = T,
                                  vars.to.regress = c("percent.mt","S.Score","G2M.Score"))
}
scRNA$sham@assays
scRNA$CLP@assays

#将各个集落进行整合
integ_features <-  SelectIntegrationFeatures(object.list = scRNA,
                                             nfeatures = 3000)
scRNA <- PrepSCTIntegration(object.list = scRNA,
                                  anchor.features = integ_features)
integ_anchor <- FindIntegrationAnchors(object.list = scRNA,
                                       normalization.method = "SCT",
                                       anchor.features = integ_features)
scRNA <- IntegrateData(anchorset = integ_anchor, normalization.method = "SCT")

saveRDS(scRNA, "lung_integ.rds")
rm(list = c("integ_features", "integ_anchor", "counts", "nonzero",
            "filtered_counts", "keep_genes", "all.genes"))

#常规PCA，筛选PCA数量。
scRNA <- RunPCA(scRNA, features = VariableFeatures(object = scRNA))
DimPlot(scRNA)
VizDimLoadings(scRNA, dims = 1:9, reduction = "pca")
DimHeatmap(scRNA, dims = 1:6, nfeatures = 20, cells = 500, balanced = T)

#不同的方式来帮助我们选择PC的数目，并且分别都对应不同的可视化
scRNA <- JackStraw(scRNA, num.replicate = 100)
scRNA <- ScoreJackStraw(scRNA, dims = 1:20)
JackStrawPlot(scRNA, dims = 1:20)
ElbowPlot(scRNA, ndims = 40)

#选择拐点处的PCA数量,sct的结果直接选择40个pca。
scRNA <- RunPCA(scRNA)
PCAPlot(scRNA,split.by = "orig.ident")
scRNA <- FindNeighbors(scRNA, dims = 1:40)
scRNA <- FindClusters(scRNA, resolution = c(0.4, 0.6, 0.8, 1.0, 1.4))
scRNA <- RunUMAP(scRNA, dims = 1:40)

#选择不同的resolution，可以画不同的图。
Idents(object = scRNA) <- "orig.ident"
DimPlot(scRNA, reduction = "umap", label = TRUE, label.size = 6) + theme(legend.position= "none")

#获取不同集落中的细胞数量
ncells <- FetchData(scRNA, vars = c("ident", "orig.ident")) %>%
  dplyr::count(ident, orig.ident) %>% tidyr::spread(ident, n)

#按细胞周期和分组进行分群
DimPlot(scRNA,reduction = "umap", label = TRUE, label.size = 6,
        #split.by = "orig.ident"
        split.by = "Phase"
        ) + NoLegend()
#按各种无意义变异源分群，观察分布不均匀的集落，注意是否可利用细胞类型进行区分。
metrics <-  c("nFeature_RNA", "nCount_RNA", "S.Score", "G2M.Score", "mitoRatio")
FeaturePlot(scRNA, reduction = "umap", features = metrics, pt.size = 0.4,
            sort.cell = TRUE, min.cutoff = 'q10', label = TRUE)
#按PC进行分群
columns <- c(paste0("PC_", 1:16), "ident", "UMAP_1", "UMAP_2")
pc_data <- FetchData(scRNA, vars = columns)
umap_label <- FetchData(scRNA,vars = c("ident", "UMAP_1", "UMAP_2"))  %>%
  group_by(ident) %>% summarise(x=mean(UMAP_1), y=mean(UMAP_2))
map(paste0("PC_", 1:16), function(pc){
  ggplot(pc_data,aes(UMAP_1, UMAP_2)) +
    geom_point(aes_string(color=pc), alpha = 0.7) +
    scale_color_gradient(guide = FALSE, low = "grey90", high = "blue") +
    geom_text(data=umap_label, aes(label=ident, x, y)) +
    ggtitle(pc)
}) %>% plot_grid(plotlist = .)
print(scRNA[["pca"]], dims = 1:10, nfeatures = 5)

#已知细胞marker的可视化。
DefaultAssay(scRNA) <- "RNA"
scRNA <- NormalizeData(scRNA, verbose = FALSE)
FeaturePlot(scRNA,reduction = "umap", sort.cell = TRUE, min.cutoff = 'q10',
            features = c("Cd3d", "Cd8A"),
            label = TRUE)

#寻找到双细胞
library(DoubletFinder)
#SCT的方法的话sct = T。
sweep.res.list <- paramSweep_v3(scRNA, PCs = 1:40, sct = T)
sweep.stats <- summarizeSweep(sweep.res.list, GT = F)
bcmvn <- find.pK(sweep.stats)
pk_bcmvn <- bcmvn$pK[which.max(bcmvn$BCmetric)] %>% as.character() %>% as.numeric()
DoubletRate = ncol(scRNA)*8*1e-6
homotypic.prop <- modelHomotypic(scRNA$seurat_clusters)
nExp_poi <- round(DoubletRate*ncol(scRNA))
nExp_poi.adj <- round(nExp_poi*(1-homotypic.prop))
scRNA <- doubletFinder_v3(scRNA, PCs = 1:30,pN = 0.25, pK = pk_bcmvn,
                          nExp = nExp_poi, reuse.pANN = F, sct = T)
DimPlot(scRNA, reduction = "umap", label = T,
        #group.by = "DF.classifications_0.25_0.06_3032")
        group.by = "DF.classifications_0.25_0.16_1919")

#寻找差异基因，sct下来的需要重新质控，不是sct下来的可以直接算差异。
DefaultAssay(scRNA) <- "RNA"
scRNA <- NormalizeData(scRNA, normalization.method = "LogNormalize", scale.factor = 10000)
scRNA <- FindVariableFeatures(scRNA,selection.method = "vst", nfeatures = 2000)
all.genes <- rownames(scRNA)
scRNA <- ScaleData(scRNA, features = all.genes)
scRNA.markers <- FindAllMarkers(scRNA, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
DEMs <- scRNA.markers %>% group_by(cluster) %>% filter(avg_log2FC > 1 & p_val_adj < 1e-5) %>%
  #slice_max(n = 20, order_by = avg_log2FC) %>%
    as.data.frame()
write.table(DEMs, "./heart/DEMs.csv", row.names = F, sep = ",")

#查看某一基因的表达情况
VlnPlot(object = scRNA, features ='Spp1',log =T )
FeaturePlot(scRNA, features = c("Spi1", "Lcn2", "Saa3", "S100a8", "Retnlg", "Pglyrp1",
                                "S100a9", "Ngp", "Hdc", "C1qa", "Pvr", "H2-q4",
                                "Icam1", "Ccl9", "Selplg", "Ptprc", "Cd22", "Ccl6" ))

#使用singleR识别细胞，并绘图
library(SingleR)
scRNA_sin <- GetAssayData(scRNA, slot = "data")
clusters <- scRNA@meta.data$seurat_clusters
#mousImmu <- celldex::ImmGenData()
#saveRDS(mouseRNA,'mousRNA.rds')
mousImmu <- readRDS("mousImmu.rds")
pred.mouseImmu <- SingleR(test = scRNA_sin, ref = mousImmu, labels = mousImmu$label.main,
                          clusters = clusters,
                          assay.type.test = "logcounts", assay.type.ref = "logcounts")

#mouseRNA <- celldex::MouseRNAseqData()
mouseRNA <- readRDS("mousRNA.rds")
pred.mouseRNA <- SingleR(test = scRNA_sin, ref = mouseRNA, labels = mouseRNA$label.fine ,
                         clusters = clusters,
                         assay.type.test = "logcounts", assay.type.ref = "logcounts")

cellType <- data.frame(ClusterID = levels(scRNA@meta.data$seurat_clusters),
                       mouseImmu = pred.mouseImmu$labels,
                       mouseRNA = pred.mouseRNA$labels )
head(cellType)
scRNA@meta.data$singleR <- cellType[match(clusters,cellType$ClusterID),'mouseRNA']

pro='first_anno'
DimPlot(scRNA,reduction = "umap",label=T, group.by = 'singleR')
ggplot2::ggsave(filename = paste0(pro,'_tSNE_singleR.pdf'))
DimPlot(scRNA,reduction = "umap",label=T,split.by ='orig.ident',group.by = 'singleR')
ggplot2::ggsave(filename = paste0(pro,'_tSNE_singleR_by_orig.ident.pdf'))


#细胞注释，用cellmarker上的细胞marker记录，清晰数据
options(java.parameters= '-Xmx16g')
library(xlsx)
cell_raw <- read.xlsx("Cell_marker_All.xlsx", 1)
mice_lung <- cell_raw[cell_raw$species == "Mouse" & cell_raw$tissue_type == "Heart",][,c(7,9)]
mice_lung$marker <- str_to_sentence(mice_lung$marker)
scell_raw <- read.table("Single_cell_markers.txt",sep = "\t",header = T)
smice_lung <- scell_raw[scell_raw$speciesType == "Mouse" & scell_raw$tissueType == "Heart",][,c(6,8)] %>%
  separate_rows(cellMarker, sep = ", ")
colnames(smice_lung) <- colnames(mice_lung)
cell_markers <- rbind(mice_lung,smice_lung)

#计算每个集落中差异基因占细胞marker基因的比例，以确定集落的marker基因。
#方法一，构建两个矩阵，按照位置进行对应。
clRate<-function(x,y){
  cellMar <- cell_markers[cell_markers$cell_name == x,][,2]
  clMar <- DEMs[DEMs$cluster == y,][,7]
  inters <- intersect(cellMar, clMar)
  MarRat <- ifelse(length(inters) == 0, 0,sum(sapply(inters, function(x){ sum(cellMar == x)})))
  rate <- MarRat/length(cellMar)
  return(rate)
}
clInters <- function(x,y){
  cellMar <- cell_markers[cell_markers$cell_name == x,][,2]
  clMar <- DEMs[DEMs$cluster == y,][,7]
  inters <- intersect(cellMar, clMar)
  return(paste0(inters, collapse = ", "))
}

clrate <- sapply(unique(cell_markers$cell_name),function(x)
  sapply(unique(DEMs$cluster), function(y) clRate(x,y)))

clinter <- sapply(unique(cell_markers$cell_name),function(x)
  sapply(unique(DEMs$cluster), function(y) clInters(x,y)))

clrate1 <- clrate
Top5cells <- vector()
Inter <- vector()
Cells <- vector()
Inter2 <- vector()
for (j in 1:5){
  Cells <- colnames(clrate1)[max.col(clrate1,"first")]
  Top5cells <- cbind(Top5cells,Cells)
  for (i in 1:30){
    Inter <- rbind(Inter, clinter[i, max.col(clrate1,"first")[i]])
    Inter2 <- rbind(Inter2, clrate[i, max.col(clrate1,"first")[i]])
    clrate1[i, max.col(clrate1,"first")[i]] <- 0
  }
  colnames(Inter) <- paste0("Makers ", j)
  colnames(Inter2) <- paste0("Rates ", j)
  Top5cells <- cbind(Top5cells, Inter, Inter2)
  Inter <- vector()
  Cells <- vector()
  Inter2 <- vector()
}

#方法而，构建一个长数据类型的datafram。
ccRate <- function(x,y){
  cellMar <- cell_markers[cell_markers$cell_name == x,][,2]
  clMar <- DEMs[DEMs$cluster == y,][,7]
  inters <- intersect(cellMar, clMar)
  MarRat <- ifelse(length(inters) == 0, 0,sum(sapply(inters, function(x){ sum(cellMar == x)})))
  rate <- MarRat/length(cellMar)
  return(c(y, x, rate, paste0(inters, collapse = ", ")))
}

Top5cells <- do.call(rbind, lapply(unique(cell_markers$cell_name),function(x)
  do.call(rbind, lapply(unique(DEMs$cluster), function(y) ccRate(x,y)))))
colnames(Top5cells) <- c("Cluster", "Cell", "Rate", "Marker")
Top5cells[,3] <- as.numeric(Top5cells[,3])
Top5cells[,1] <- as.numeric(Top5cells[,1]) - 1
Top5cells <- Top5cells %>% as.data.frame() %>% group_by(Cluster) %>% filter(Rate > 0) %>%
  slice_max(n = 5, order_by = Rate)

View(Top5cells)
write.table(Top5cells, "./heart/Top5_cell_markers_rate.csv", sep = ",", row.names = FALSE)
#某一集落中所有的marker。
intersect(scRNA.markers[scRNA.markers$cluster == 25,]$gene, cell_markers$marker)

#根据已知文献中记录的marker确定细胞种类
cell_marker <- read.table("cell_markers.csv", header = F, sep = ",")
cell_markers <- cell_marker %>% pivot_longer(cols = 2:4, values_drop_na = T, ) %>%
  select(-2)
DEMs <- read.table("DEMs.csv", header = T, sep = ",")

res <- lapply(unique(DEMs$cluster), function (x) {
  do.call(rbind, lapply(intersect(DEMs[DEMs$cluster == x,][,7], cell_markers$value),
                        function(x) cell_markers[cell_markers$value == x,]))
})
sink("heart.txt")
print(res)
sink()

#手工注释细胞。#lung
scRNA <- RenameIdents(object = scRNA,
                      '0'='Stromal cell',
                      '1'='Adventitial Fibroblast',
                      '2'='Monocyte',
                      '3'='Clara cell',
                      '4'='Capillary',
                      '5'='Monocyte',
                      '6'='Monocyte',
                      '7'='Monocyte',
                      '8'='T cell',
                      '9'='Stromal cell',
                      '10'='Macrophage',
                      '11'='Smooth Muscle',
                      '12'='Natural killer cell',
                      '13'='Stromal cell',
                      '14'='Myofibroblast',
                      '15'='Capillary',
                      '16'='Mesothelial',
                      '17'='Monocyte',
                      '18'='Capillary Aerocyte',
                      '19'='Proliferating NK/T',
                      '20'='Endothelial cell',
                      '21'='Pericyte',
                      '22'='B cell',
                      '23'='Macrophage',
                      '24'='Ccr7+ Dendritic',
                      '25'='Ciliated',
                      '26'='Lymphatic',
                      '27'='Alveolar Epithelial Type 2',
                      '28'='Capillary Aerocyte',
                      '29'='Alveolar Epithelial Type 1')
#heart
scRNA <- RenameIdents(object = scRNA,
                      '0'='Monocyte',
                      '1'='Endothelial',
                      '2'='Monocyte',
                      '3'='Endothelial',
                      '4'='Endothelial',
                      '5'='Fibroblast',
                      '6'='Fibroblast',
                      '7'='Fibroblast',
                      '8'='Macrophage',
                      '9'='Monocyte',
                      '10'='Fibroblast',
                      '11'='Macrophage',
                      '12'='Pericytes',
                      '13'='B cell',
                      '14'='Ventricular cardiomyocytes',
                      '15'='Natural killer cell',
                      '16'='Endothelial',
                      '17'='Pericytes',
                      '18'='Smooth muscle',
                      '19'='Smooth muscle',
                      '20'='Neuronal',
                      '21'='Endothelial',
                      '22'='T cell')
scRNA$cell_type <- Idents(scRNA)
#保存图片
Idents(object = scRNA) <- "cell_type"
pdf("./lung/UAMP_imm.pdf", height = 8, width = 10)
DimPlot(subRNA, reduction = "umap", label = T, label.size = 5, repel = T)
  #theme(legend.position= "none")
dev.off()

#获取细胞亚群比例。
cellnum <- table(scRNA@meta.data$cell_type, scRNA@meta.data[,"orig.ident"]) #统计每个样本种每种细胞亚型的细胞数目
cellfreq <- prop.table(x = table(scRNA@meta.data$cell_type, scRNA@meta.data[,"orig.ident"]),
                  margin = 2)  %>% as.data.frame() %>% filter(Freq > 0) #统计每个每个样本中每个细胞亚型的比例
colnames(cellfreq) = c("Cell_type", "Group", "Freq")

#ggalluvial包加堆叠图之间的连接线
library(ggalluvial)
cellfreq$Group <- factor(cellfreq$Group, levels = c("sham", "CLP"))
ggplot(cellfreq, aes(x = Group, y = Freq, fill = Cell_type,
                     stratum = Cell_type, alluvium = Cell_type))+
  geom_stratum(color = "white", linewidth = 1, width = 1/2)+
  geom_alluvium(color = "black", linewidth = 0.5, width =1/2)+
  #scale_fill_brewer(type = "qual", palette = "Accent") +
  #geom_flow()+
  theme_classic()+
  theme(legend.position = 'right')+
  scale_y_continuous(labels = c("0","25%","50%","75%","100%"),expand = c(0,0))

pdf("./heart/cell_rate.pdf", height = 6, width = 8)
dev.off()
#不严谨的进行细胞亚群里面的差异分析，严谨的应该是每组有2个以上的重复，利用DESeq2工具对特定细胞类型聚类进行pseudobulk差异表达分析
#https://mp.weixin.qq.com/s?__biz=MzI1Njk4ODE0MQ==&mid=2247490575&idx=1&sn=f453fbb0206a1be7b67058b10a27e5dd&chksm=ea1f1a8ddd68939b5446f21c046e85aa179bf00a6a70ea4f707942a944a76b1d75f3111fb62f&scene=178&cur_album_id=1645776134791315457#rd
write.table(table(scRNA$cell_type, scRNA$orig.ident),"./heart/cell_counts.csv", sep = ",")
DEGs <- lapply(unique(scRNA$cell_type), function(x){
  FindMarkers(scRNA[,scRNA$cell_type == x],ident.1 = 'sham', ident.2 = 'CLP',
              assay = "RNA", slot = "counts", logfc.threshold = 0, min.pct = 0)
})
names(DEGs) <- unique(scRNA$cell_type)
do.call(rbind,lapply(DEGs, function(x){
  table(abs(x$avg_log2FC) > 1 & x$p_val_adj < 0.05)
}))
mo_degs <- rownames(DEGs$Monocyte[abs(DEGs$Monocyte$avg_log2FC) >1 & DEGs$Monocyte$p_val_adj < 1e-5,])
rm(list = setdiff(ls(), c("mo_degs", "pred_gene_l")))

#绘制各个细胞的差异表达基因火山图。
library(ggrepel)

lapply(1:length(DEGs), function(i){
  df_hsd <- DEGs[[i]]
  df_hsd$ID <- rownames(df_hsd)
  df_hsd$Threshold<-factor(ifelse(df_hsd$p_val_adj < 1e-5&
                                  abs(df_hsd$avg_log2FC) > 1,
                                ifelse(df_hsd$avg_log2FC > 1,"UP","DOWN"),"NO"))
  pdf(paste0("./heart/", names(DEGs[i]), "_hsplot.pdf"), height = 8, width = 10)
  print(ggplot(df_hsd,aes(x= avg_log2FC, y = -log10(p_val_adj), color = Threshold))+
    geom_point(alpha=0.5,size = 4)+
    scale_color_manual(values = c("blue","grey","red"))+
    theme_bw() + xlim(-4,4)+
    theme_classic()+
    theme(
      plot.title = element_text(hjust=0.5),
      axis.title = element_text(size = 15),
      axis.text = element_text(size = 12),
      legend.title = element_text(size = 15),
      legend.text = element_text(size = 12),
      legend.position = c(1,0.5),
      legend.justification = c(1,1),
      legend.background = element_rect(fill = NULL, colour = "black"))+
    geom_vline(xintercept = c(-1,1),lty = 2, col = "black", lwd = 0.4)+
    geom_hline(yintercept = -log10(1e-5),lty = 2, col = "black", lwd = 0.4)+
    geom_text_repel(data = subset(df_hsd, p_val_adj < 1e-5&
                                    abs(avg_log2FC) > 1),
                    aes(label = ID))+
    labs(title = paste0(names(DEGs[i]), " DEGs"), x = "logFC", y = "-log10(adj.P.Val)"))
  dev.off()
})


#提取lung和heart共有的免疫细胞进行下面的分析
Idents(scRNA) <- scRNA$cell_type
subRNA <- subset(scRNA, ident = c("Monocyte", "T cell", "B cell", "Macrophage",
                                   "Natural killer cell"))
#增殖细胞纳入T细胞
subRNA <- RenameIdents(object = subRNA, "Proliferating NK/T" = "T cell")
subRNA$cell_type <- Idents(subRNA)
#用subRNA，再出一遍上UMAP和cellrate。

#调控网络分析，将数据导出，用python跑。
#subRNA <- subset(scRNA, downsample = 1000)#随机抽1000个细胞
Idents(subRNA)
saveRDS(subRNA, "tf.rds")
library(SCopeLoomR)
build_loom(file.name = "tf.loom", dgem = subRNA@assays$RNA@counts)
write.table(subRNA@meta.data, "sub_meta.csv", sep = "\t", quote = F)

#计算RSS，找到细胞类型特异的TF
library(SCopeLoomR)
library(AUCell)
library(SCENIC)
library(ggrepel)
library(BiocParallel)

loom <- open_loom("aucell.loom")
regulons_incidMat <- get_regulons(loom, column.attr.name="Regulons")
regulons_incidMat[1:4,1:4]
regulons <- regulonsToGeneLists(regulons_incidMat)
regulonAUC <- get_regulons_AUC(loom,column.attr.name='RegulonsAUC')
regulonAucThresholds <- get_regulon_thresholds(loom)
tail(regulonAucThresholds[order(as.numeric(names(regulonAucThresholds)))])
embeddings <- get_embeddings(loom)
close_loom(loom)

meta <- read.table("sub_meta.csv", sep = "\t", header = T, stringsAsFactors = F)
cellinfo <- meta[,c("cell_type", "orig.ident", "nFeature_RNA", "nCount_RNA")]
colnames(cellinfo) <- c("celltype", "group", "nGene", "nUMI")

#整体细胞和两组比较
cellTypes <-  as.data.frame(subset(cellinfo, select = "celltype"))
groups <- as.data.frame(subset(cellinfo, select = "group"))
rss <- calcRSS(AUC = getAUC(regulonAUC), cellAnnotation = cellTypes[
  colnames(regulonAUC), "celltype"])
rss_group <- calcRSS(AUC = getAUC(regulonAUC), cellAnnotation = groups[
  colnames(regulonAUC), "group"])
rss <- na.omit(rss)
rss_group <- na.omit(rss_group)

#单独某个细胞在两组间的比较，如Adipocytes
lapply(c("Monocyte", "T cell", "B cell", "Macrophage", "Natural killer cell"),
       function (x){
         sub_info <- cellinfo[cellinfo$celltype == x,] %>% subset(select = "group")
         sub_regauc <- regulonAUC[,match(rownames(sub_info), colnames(regulonAUC))]
         sub_rss <- calcRSS(AUC = getAUC(sub_regauc), cellAnnotation = sub_info[
           colnames(sub_regauc), "group"])
         sub_rss <- na.omit(sub_rss)
         sub_rssplot <- plotRSS(sub_rss)
         pdf(paste0("./heart/", x, "_tf.pdf"), height = 20, width = 4)
         print(sub_rssplot$plot)
         dev.off()
         write.table(sub_rssplot$df, paste0("./heart/", x, "_tf.csv"), sep = ",",
                     col.names = T, row.names = F)
         })


#可设置zThreshold，大于1.96相当于p<0.05，1.65相当于p<0.1。
rssplot <- plotRSS(rss)
rssplot_group <- plotRSS(rss_group, zThreshold = 1)

#SCENIC自带的点图，和ggplot的点图。
plotly::ggplotly(rssplot$plot)
ggplot(rssplot_group$df, aes(cellType, Topic))+
  geom_tile(aes(fill = RSS))+
  scale_fill_gradient(low="white",high ='red')

#在seurat对象里查看某些转录因子。
subRNA <- readRDS("tf.rds")
Idents(object = subRNA) <- "cell_type"
DotPlot(subRNA, features = "Hltf")

#绘制各个细胞的有意义的转录因子图。类似火山图
df_cell <- rssplot$df
df_cell$Threshold<-as.factor(ifelse(abs(df_cell$Z) >1.96, "Yes","No"))

#每个细胞的特异性转录因子绘图。并将有差异的表示出来。
df_p <- df_cell[df_cell$cellType == "T cell",]
df_p <- arrange(df_p, desc(RSS))
df_p$Topic <- factor(df_p$Topic, levels = df_p$Topic)
df_p$Threshold <- factor(df_p$Threshold, levels = c("Yes", "No"))
ggplot(data = df_p, aes(Topic, RSS, color = Threshold))+
  geom_point(alpha = 0.6, size = 4)+
  geom_text_repel(data = subset(df_p, df_p$Threshold == "Yes"),
                  aes(label = Topic), color = "black")+
  theme_bw()+
  scale_color_discrete(name = "Z score", labels = c("Z > 1.96", "Z < 1.96"))+
  theme(panel.grid = element_blank(), axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),plot.title = element_text(hjust=0.5),
        legend.title.align = 0.5)+
  labs(x = "Regulons", y = "Regulons specificity score")+
  ggtitle("Adipocytes")

#拟时态分析
library(monocle3)
#提取某一种细胞
ns_cell <- "Monocyte"

cell_ns <- subRNA[,subRNA$cell_type == ns_cell]
cell_ns <- SCTransform(cell_ns) %>% RunPCA()
cell_ns <- RunUMAP(cell_ns, dims = 1:40) %>%
  FindNeighbors(dims = 1:40) %>%
  FindClusters(resolution=0.8)
DimPlot(cell_ns, reduction = "umap")

data <- GetAssayData(cell_ns, assay = 'RNA', slot = 'counts')
cell_metadata <- cell_ns@meta.data
gene_annotation <- data.frame(gene_short_name = rownames(data))
rownames(gene_annotation) <- rownames(data)
cds <- new_cell_data_set(data,
                         cell_metadata = cell_metadata,
                         gene_metadata = gene_annotation)

cds <- preprocess_cds(cds, num_dim = 50)
plot_pc_variance_explained(cds)
cds <- align_cds(cds)
cds <- reduce_dimension(cds, cores = 32, reduction_method = "UMAP", preprocess_method = 'PCA')
#整合seurat的坐标
cds.embed <- cds@int_colData$reducedDims$UMAP
int.embed <- Embeddings(cell_ns, reduction = "umap")
int.embed <- int.embed[rownames(cds.embed),]
cds@int_colData$reducedDims$UMAP <- int.embed
plot_cells(cds)

cds <- cluster_cells(cds)
cds <- learn_graph(cds)
cds <- order_cells(cds)

#可以在弹出来的框中选择根细胞，也根据坐标进行选择。
p + geom_vline(xintercept = seq(-7,-6,0.25)) + geom_hline(yintercept = seq(0,1,0.25))
embed <- data.frame(Embeddings(scRNAsub, reduction = "umap"))
embed <- subset(embed, UMAP_1 > -6.75 & UMAP_1 < -6.5 & UMAP_2 > 0.24 & UMAP_2 < 0.25)
root.cell <- rownames(embed)
cds <- order_cells(cds, root_cells = root.cell)

cds@colData@listData[["orig.ident"]] <- factor(cds@colData@listData[["orig.ident"]], levels = c("CLP", "sham"))

plot_cells(cds, color_cells_by = "pseudotime", group_label_size = 5,graph_label_size = 5, cell_size = 1)

pdf(paste0("./lung/",ns_cell, "_ns.pdf"), height = 6, width = 8)
plot_cells(cds, color_cells_by = "orig.ident", group_label_size = 5,graph_label_size = 5, cell_size = 1)
dev.off()

save_monocle_objects(cds = cds, directory_path = paste0("./lung/",ns_cell, "_ns1"))
cds <- load_monocle_objects("./lung/Monocyte_ns")
##寻找拟时轨迹差异基因
#graph_test分析最重要的结果是莫兰指数（morans_I），其值在-1至1之间，0代表此基因没有
#空间共表达效应，1代表此基因在空间距离相近的细胞中表达值高度相似。
Track_genes <- graph_test(cds, neighbor_graph = "principal_graph", cores = 16)
write.table(Track_genes, paste0("./lung/",ns_cell, "_track_genes.csv"), row.names = T, col.names = T)
#挑选top10画图展示
Track_genes_sig <- Track_genes %>% top_n(n = 10, morans_I) %>%
  pull(gene_short_name) %>% as.character()
#基因表达趋势图
pdf(paste0("./lung/",ns_cell, "_ns_g.pdf"), height = 6, width = 8)
plot_genes_in_pseudotime(cds[Track_genes_sig,], color_cells_by = "orig.ident",
                         min_expr = 0.5, ncol = 2)
dev.off()
#FeaturePlot图
plot_cells(cds, genes = Track_genes_sig, show_trajectory_graph = FALSE,
           label_cell_groups = FALSE,  label_leaves = FALSE)
##寻找共表达模块
genelist <- pull(Track_genes, gene_short_name) %>% as.character()
gene_module <- find_gene_modules(cds[genelist,], resolution = 1e-2, cores = 16)
cell_group <- tibble::tibble(cell = row.names(colData(cds)),
                             cell_group = colData(cds)$orig.ident)
agg_mat <- aggregate_gene_expression(cds, gene_module, cell_group)
row.names(agg_mat) <- stringr::str_c("Module ", row.names(agg_mat))
agg_mat <- cbind(agg_mat[,2], agg_mat[,1])
colnames(agg_mat) <- c("sham", "CLP")

pdf(paste0("./lung/",ns_cell, "_ns_m.pdf"), height = 12, width = 8)
print(pheatmap::pheatmap(agg_mat, scale = "column", clustering_method = "ward.D2"))
dev.off()
names(cds@colData)

Track_genes_sig <- c("Spi1", "Lcn2", "Saa3", "S100a8", "Retnlg", "Pglyrp1", "S100a9",
                     "Ngp", "Hdc", "C1qa", "Pvr","Icam1", "Ccl9", "Selplg",
                     "Ptprc", "Cd22", "Ccl6" )

library(CellChat)
#cellchat要求输入标准化后的表达数据，并添加细胞类型信息。
subbRNA <- subset(subRNA, orig.ident == "sham")
data_input <- GetAssayData(subbRNA)
identity <- subset(subbRNA@meta.data, select = c( "cell_type"))
cellchat <- createCellChat(data_input)
cellchat <- addMeta(cellchat, meta = identity, meta.name = c("cell_type"))
levels(cellchat@idents)
cellchat <- setIdent(cellchat, ident.use = "cell_type")
groupSize <- as.numeric(table(cellchat@idents))

#选择参考物种，人是CellChatDB.human, 鼠是CellChatDB.mouse，展示一下数据库内容
CellChatDB <- CellChatDB.mouse
showDatabaseCategory(CellChatDB)
#有secreted signaling, ECM-receptor, cell-cell contant。可选，全部可不选。
#CellChatDB.use <- subsetDB(CellChatDB, search = "Secreted Signaling")
CellChatDB.use <- CellChatDB
cellchat@DB <- CellChatDB.use

##配体-受体分析
# 提取数据库支持的数据子集，即便用所有数据，经过此操作也可节省计算资源
cellchat <- subsetData(cellchat)
future::plan("multisession", workers = 8)
# 识别过表达基因
cellchat <- identifyOverExpressedGenes(cellchat)
# 识别配体-受体对象
cellchat <- identifyOverExpressedInteractions(cellchat)
# 将配体、受体投射到PPI网络，可不执行此步骤，若执行，则将computeCommunProb()加上`raw.use = FALSE`
cellchat <- projectData(cellchat, PPI.mouse)
##推测细胞通讯网络，默认25%截断值，type = "truncatedMean"和trim = 0.1，取10%。
#CellChat在概率计算中也可以考虑各细胞群体中细胞比例的影响。设置 population. size = TRUE.
cellchat <- computeCommunProb(cellchat, raw.use = FALSE, population.size = T)
cellchat <- filterCommunication(cellchat, min.cells = 10)
cellchat <- computeCommunProbPathway(cellchat)
cellchat <- aggregateNet(cellchat)
#细胞通讯网络及权重。
par(mfrow = c(1,2), xpd=TRUE)
netVisual_circle(cellchat@net$count, vertex.weight = groupSize,
                 weight.scale = T, label.edge= F,
                 title.name = "Number of interactions")
netVisual_circle(cellchat@net$weight, vertex.weight = groupSize,
                 weight.scale = T, label.edge= F,
                 title.name = "Interaction weights/strength")
#每个细胞发出连接及权重。
mat <- cellchat@net$weight
par(mfrow = c(3,2), xpd=TRUE)
for (i in 1:nrow(mat)) {
  mat2 <- matrix(0, nrow = nrow(mat), ncol = ncol(mat), dimnames = dimnames(mat))
  mat2[i, ] <- mat[i, ]
  netVisual_circle(mat2, vertex.weight = groupSize/100,
                   weight.scale = T, edge.weight.max = max(mat),
                   title.name = rownames(mat)[i])
}

levels(cellchat@idents)            #查看细胞顺序
vertex.receiver = c(1,2)          #指定靶细胞的索引，用于hierarchy图
cellchat@netP$pathways             #查看富集到的信号通路
pathways.show <- "CCL"             #指定需要展示的通路
# Hierarchy plot，层次图
netVisual_aggregate(cellchat, signaling = pathways.show, vertex.size = groupSize/100,
                    vertex.receiver = vertex.receiver, layout = "hierarchy")

# Circle plot，圆圈图
netVisual_aggregate(cellchat, signaling = pathways.show, vertex.size = groupSize/100,
                    layout = "circle")
netVisual_aggregate(cellchat, signaling = pathways.show, layout = "chord")

#细胞和基因水平上的和弦图，solt.name可以选择在配体和受体基因层面"net"，或者信号通路层面"netP"。
netVisual_chord_cell(cellchat, signaling = pathways.show, slot.name = "netP")

#通过signaling指定需要展示的特定通路下的受配体对，以及用pairLR.use指定需要展示的受配体对
netVisual_chord_gene(cellchat, sources.use = 1, lab.cex = 0.5, targets.use = c(5:6))
#热图
netVisual_heatmap(cellchat, signaling = pathways.show, color.heatmap = "Reds")
# 计算配体-受体对信号网络的贡献度
netAnalysis_contribution(cellchat, signaling = pathways.show)

#提取通路中的受配体信号值，并单独画图
pairLR.CCL <- extractEnrichedLR(cellchat, signaling = pathways.show, geneLR.return = FALSE)
netVisual_individual(cellchat, signaling = pathways.show,
                     pairLR.use = pairLR.CCL[1,], layout = "chord")

#选择相关受配细胞的配对和信号通路绘制气泡图。
netVisual_bubble(cellchat, sources.use = 1, targets.use = c(1:5), remove.isolate = FALSE)
netVisual_bubble(cellchat, sources.use = 1, targets.use = c(1:5),
                 signaling = c("MIF","MHC-I"), remove.isolate = FALSE)
pairLR.use <- extractEnrichedLR(cellchat, signaling = c("MIF","MHC-I"))
pairLR.use  = pairLR.use[c(1,3),,drop=F]
netVisual_bubble(cellchat, sources.use = c(1), targets.use = c(1:5),
                 pairLR.use = pairLR.use, remove.isolate = TRUE)
#小提琴绘制信号基因表达分布，默认只显示有统计差异的。
plotGeneExpression(cellchat, signaling = "CCL")

# 分析细胞在信号网络中角色
cellchat <- netAnalysis_computeCentrality(cellchat, slot.name = "netP")

netAnalysis_signalingRole_network(cellchat, signaling = pathways.show)
#可以展示所有的通路，也可展示某几个通路。
netAnalysis_signalingRole_scatter(cellchat) +
  ggtitle("All pathway")
netAnalysis_signalingRole_heatmap(cellchat, pattern = "outgoing")+
netAnalysis_signalingRole_heatmap(cellchat, pattern = "incoming")

#细胞通讯模式和信号网络.寻找最佳的k值，给out和in赋值。
library(NMF)
selectK(cellchat, pattern = "outgoing")
cellchat <- identifyCommunicationPatterns(cellchat, pattern = "outgoing", k = 3)

library(ggalluvial)
# river plot
netAnalysis_river(cellchat, pattern = "outgoing")
# dot plot
netAnalysis_dot(cellchat, pattern = "outgoing")

selectK(cellchat, pattern = "incoming")
cellchat <- identifyCommunicationPatterns(cellchat, pattern = "incoming", k = 3)
# river plot
netAnalysis_river(cellchat, pattern = "incoming")
# dot plot
netAnalysis_dot(cellchat, pattern = "incoming")

##信号网络聚类
# 按结构相似性聚类
#修改r调用python的路径。
library(reticulate)
use_condaenv("D:\\ProgramFiles\\Anaconda3")
cellchat <- computeNetSimilarity(cellchat, type = "structural")
cellchat <- netEmbedding(cellchat, type = "structural")

#由于future包升级，将netClustering函数里面的'multiprocess'改成了，'multisession'
trace(netClustering, edit = T)
cellchat <- netClustering(cellchat, type = "structural")
# Visualization in 2D-space
netVisual_embedding(cellchat, type = "structural")
netVisual_embeddingZoomIn(cellchat, type = "structural")

# 按功能相似性聚类
cellchat <- computeNetSimilarity(cellchat, type = "functional")
cellchat <- netEmbedding(cellchat, type = "functional")
cellchat <- netClustering(cellchat, type = "functional")
# Visualization in 2D-space
netVisual_embedding(cellchat, type = "functional")
netVisual_embeddingZoomIn(cellchat, type = "functional")

cellchat_sham <- cellchat
cellchat_CLP <- cellchat
save(cellchat, file = "cellchat.rds")

#多组比较可以分别跑完上述的在合并一起.
#https://mp.weixin.qq.com/s?__biz=MzUzMTEwODk0Ng==&mid=2247507275&idx=1&sn=47a8b040307b660ebe483aaec2cc4547&chksm=fa451876cd32916030e713de041b56c79c495753edffd44476daa31fcf61c9819cb30028fcb2&cur_album_id=2487778019613540353&scene=189#wechat_redirect
#https://htmlpreview.github.io/?https://github.com/sqjin/CellChat/blob/master/tutorial/Comparison_analysis_of_multiple_datasets.html
object.list <- list(sham = cellchat_sham, CLP = cellchat_CLP)
cellchat <- mergeCellChat(object.list, add.names = names(object.list))


#比较细胞通讯相互作用的总数和相互作用的强度
pdf("./heart/cell_inter_num.pdf", height = 4.65, width = 8.05)
compareInteractions(cellchat, show.legend = F, group = c(1,2), measure = "count",
                    color.use = c( "#00BFC4", "#F8766D")) +
compareInteractions(cellchat, show.legend = F, group = c(1,2), measure = "weight",
                    color.use = c( "#00BFC4", "#F8766D"))
dev.off()
#不同细胞群间相互作用次数或相互作用强度的差异
#红色(或蓝色)颜色的边表示第二个数据集中与第一个数据集相比增加(或减少)的信号。
pdf("./lung/cell_diff.pdf", height = 4.65, width = 8.05)
par(mfrow = c(1,2), xpd=TRUE)
netVisual_diffInteraction(cellchat, weight.scale = T,comparison = c(1,2))
netVisual_diffInteraction(cellchat, weight.scale = T, measure = "weight",comparison = c(1,2))
dev.off()
pdf("./heart/cell_diff_h.pdf", height = 4.65, width = 8.05)
netVisual_heatmap(cellchat,comparison = c(1,2)) +
netVisual_heatmap(cellchat, measure = "weight",comparison = c(1,2))
dev.off()

weight.max <- getMaxWeight(object.list, attribute = c("idents","count"))

pdf("./heart/cell_inter_circle.pdf", height = 4.65, width = 8.05)
par(mfrow = c(1,2), xpd=TRUE)
for (i in 1:length(object.list)) {
  netVisual_circle(object.list[[i]]@net$count, weight.scale = T, label.edge= F, edge.weight.max = weight.max[2], edge.width.max = 12, title.name = paste0("Number of interactions - ", names(object.list)[i]))
}
dev.off()

num.link <- sapply(object.list, function(x) {rowSums(x@net$count) + colSums(x@net$count)-diag(x@net$count)})
weight.MinMax <- c(min(num.link), max(num.link)) # control the dot size in the different datasets
gg <- list()
for (i in 1:length(object.list)) {
  gg[[i]] <- netAnalysis_signalingRole_scatter(object.list[[i]], title = names(object.list)[i], weight.MinMax = weight.MinMax)
}
pdf("./heart/cell_diff_sg.pdf", height = 4.65, width = 8.05)
patchwork::wrap_plots(plots = gg)
dev.off()
cellchat
signaling_change <- netAnalysis_signalingChanges_scatter(cellchat, idents.use = "T cell")
signaling_share <-  signaling_change$data[signaling_change$data$specificity == "Shared",]$labels

#"T cell", Monocyte, Macrophage
netAnalysis_signalingChanges_scatter(cellchat, idents.use = "T cell",
                                     signaling.exclude = signaling_share )

patchwork::wrap_plots(plots = list(gg1,gg2))

cellchat <- computeNetSimilarityPairwise(cellchat, type = "functional",comparison = c(1,2))
cellchat <- netEmbedding(cellchat, type = "functional",comparison = c(1,2))
trace(netClustering, edit = T)
cellchat <- netClustering(cellchat, type = "functional",comparison = c(1,2))
netVisual_embeddingPairwise(cellchat, type = "functional", label.size = 3.5,comparison = c(1,2))
netVisual_embeddingPairwiseZoomIn(cellchat, type = "structural", nCol = 2)

cellchat <- computeNetSimilarityPairwise(cellchat, type = "structural")
cellchat <- netEmbedding(cellchat, type = "structural")
cellchat <- netClustering(cellchat, type = "structural")
netVisual_embeddingPairwise(cellchat, type = "structural", label.size = 3.5)
netVisual_embeddingPairwiseZoomIn(cellchat, type = "structural", nCol = 2)

# 我们只计算两个数据集之间重叠信号通路的距离。 这里不考虑那些只在一个数据集中识别的信号通路。
rankSimilarity(cellchat, type = "functional")

pdf("./heart/cell_diff_xh.pdf", height = 6.5, width = 9)
rankNet(cellchat, mode = "comparison", stacked = T, do.stat = TRUE,
        color.use = c( "#00BFC4", "#F8766D"))+
rankNet(cellchat, mode = "comparison", stacked = F, do.stat = TRUE,
        color.use = c( "#00BFC4", "#F8766D"))
dev.off()

i = 1
pathway.union <- union(object.list[[i]]@netP$pathways, object.list[[i+1]]@netP$pathways)
netAnalysis_signalingRole_heatmap(object.list[[i]], pattern = "outgoing", signaling = pathway.union, title = names(object.list)[i], width = 5, height = 6) +
  netAnalysis_signalingRole_heatmap(object.list[[i+1]], pattern = "outgoing", signaling = pathway.union, title = names(object.list)[i+1], width = 5, height = 6)

netAnalysis_signalingRole_heatmap(object.list[[i]], pattern = "incoming", signaling = pathway.union, title = names(object.list)[i], width = 5, height = 6) +
  netAnalysis_signalingRole_heatmap(object.list[[i+1]], pattern = "incoming", signaling = pathway.union, title = names(object.list)[i+1], width = 5, height = 6)

pdf("./heart/cell_diff_hs.pdf", height = 7.5, width = 7)
netAnalysis_signalingRole_heatmap(object.list[[i]], pattern = "all", signaling = pathway.union, title = names(object.list)[i], width = 5, height = 14) +
  netAnalysis_signalingRole_heatmap(object.list[[i+1]], pattern = "all", signaling = pathway.union, title = names(object.list)[i+1], width = 5, height = 14)
dev.off()

netVisual_bubble(cellchat, sources.use = 1, targets.use = c(2:5),  comparison = c(1, 2), angle.x = 45)

netVisual_bubble(cellchat, sources.use = 1, targets.use = c(2:5),  max.dataset = 2,
                 title.name = "Increased signaling in CLP", comparison = c(1, 2), angle.x = 45)
netVisual_bubble(cellchat, sources.use = 1, targets.use = c(2:5),  max.dataset = 1,
                 title.name = "Decreased signaling in CLP", comparison = c(1, 2), angle.x = 45)

pos.dataset = "CLP"
features.name = pos.dataset
cellchat <- identifyOverExpressedGenes(cellchat, group.dataset = "datasets", pos.dataset = pos.dataset, features.name = features.name, only.pos = FALSE, thresh.pc = 0.1, thresh.fc = 0.1, thresh.p = 1)
net <- netMappingDEG(cellchat, features.name = features.name)
net.up <- subsetCommunication(cellchat, net = net, datasets = "sham",ligand.logFC = 0.2, receptor.logFC = NULL)
net.down <- subsetCommunication(cellchat, net = net, datasets = "CLP",ligand.logFC = -0.1, receptor.logFC = -0.1)
gene.up <- extractGeneSubsetFromPair(net.up, cellchat)
gene.down <- extractGeneSubsetFromPair(net.down, cellchat)
pairLR.use.up = net.up[,"interaction_name", drop = F]
pairLR.use.down = net.down[, "interaction_name", drop = F]

pdf("./lung/cell_signaling.pdf", height = 6, width = 8.5)
netVisual_bubble(cellchat, pairLR.use = pairLR.use.up, sources.use = 2, targets.use = c(1:5), comparison = c(1, 2),  angle.x = 45, remove.isolate = T,title.name = paste0("Up-regulated signaling in ", names(object.list)[2]), color.text = c( "#00BFC4", "#F8766D"))+
  netVisual_bubble(cellchat, pairLR.use = pairLR.use.down, sources.use = 2, targets.use = c(1:5), comparison = c(1, 2),  angle.x = 45, remove.isolate = T,title.name = paste0("Down-regulated signaling in ", names(object.list)[2]), color.text = c( "#00BFC4", "#F8766D"))
dev.off()

par(mfrow = c(1,2), xpd=TRUE)
netVisual_chord_gene(object.list[[2]], sources.use = 1, targets.use = c(2:5), slot.name = 'net', net = net.up, lab.cex = 0.8, small.gap = 3.5, title.name = paste0("Up-regulated signaling in ", names(object.list)[2]))
netVisual_chord_gene(object.list[[1]], sources.use = 1, targets.use = c(2:5), slot.name = 'net', net = net.down, lab.cex = 0.8, small.gap = 3.5, title.name = paste0("Down-regulated signaling in ", names(object.list)[2]))
computeEnrichmentScore(net.down, species = 'mouse')
computeEnrichmentScore(net.up, species = 'mouse')

pathways.show <- c("CXCL")
weight.max <- getMaxWeight(object.list, slot.name = c("netP"), attribute = pathways.show) # control the edge weights across different datasets
par(mfrow = c(1,2), xpd=TRUE)
for (i in 1:length(object.list)) {
  netVisual_aggregate(object.list[[i]], signaling = pathways.show, layout = "circle", edge.weight.max = weight.max[1], edge.width.max = 10, signaling.name = paste(pathways.show, names(object.list)[i]))
}

#cellchat@meta$datasets <- factor(cellchat@meta$datasets, levels = c("sham", "CLP")) # set factor level
plotGeneExpression(cellchat, signaling = "TNF", split.by = "datasets", colors.ggplot = T,
                   color.use = c( "#00BFC4", "#F8766D"))

#筛选基因
mo_tf <- read.csv("./lung/Monocyte_tf.csv")
mo_tf <- unique(mo_tf[,1])
mo_tf <- gsub("(+)", "", mo_tf, fixed = T)
mo_ns <- read.csv("./lung/Monocyte_track_genes.csv", sep = " ")
mo_ns <- na.omit(mo_ns)
mo_ns <- rownames(mo_ns[mo_ns$morans_I > 0.4,])
mo_tn <- union(mo_tf, mo_ns)

#heart细胞通讯差异表达信号通路配对。
mo_tn <- union(mo_tn, c("Pvr", "H2-q4","H2-k1", "H2-d1", "H2-q7", "Icam1", "Mif",
                        "Cxcl16", "Cxcl10","App", "Ccl4", "Ccl6", "Ptprc", "Cd22",
                        "Ccl2", "Ccl7", "Ccl9", "Selplg", "Cd86"))
#lung
mo_tn <- union(mo_tn, c("Pvr", "H2-q4","H2-k1", "H2-d1", "H2-q7", "Icam1", "Mif",
                        "Cxcl2", "App", "Ccl4", "Ccl6", "Ccl3", "Ptprc", "Cd22",
                        "Ccl9", "Selplg", "Cd86", "Tnbs1", "Fn1", "Cd22", "Lamc1",
                        "Grn", "Anxa1", "C3", "Nampt", "Tnf", "Tgfb1","H2-t23",
                        "H2-t24", "H2-t22"))
pred_gene_h <- intersect(mo_degs, mo_tn)
pred_gene_l <- intersect(mo_degs, mo_tn)
pred_gene <- intersect(pred_gene_l, pred_gene_h)

convertMouseGeneList <- function(x){
  require("biomaRt")
  human = useMart("ensembl", dataset = "hsapiens_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
  mouse = useMart("ensembl", dataset = "mmusculus_gene_ensembl", host = "https://dec2021.archive.ensembl.org/")
  genesV2 = getLDS(attributes = c("mgi_symbol"), filters = "mgi_symbol", values = x , mart = mouse, attributesL = c("hgnc_symbol"), martL = human, uniqueRows=T)
  humanx <- unique(genesV2[, 2])
  print(head(humanx))
  return(humanx)
}
pred_gene1 <- convertMouseGeneList(pred_gene)
write.table(pred_gene1, "genes.txt", col.names = F, row.names = F, quote = F )

mo_sub <- subset(scRNA, ident = "Monocyte")
Idents(mo_sub) <- "orig.ident"
pdf("./heart/gene_m.pdf", height = 15, width = 20)
VlnPlot(mo_sub, features = pred_gene, cols =  c( "#00BFC4", "#F8766D"))
dev.off()

#绘制韦恩图
library(venn)
load("lung.rdata")
rm(list = setdiff(ls(), c("DEGs")))
mo_degs_l <- rownames(DEGs$Monocyte[abs(DEGs$Monocyte$avg_log2FC) >1 & DEGs$Monocyte$p_val_adj < 1e-5,])
mo_tf_l <- read.csv("./lung/Monocyte_tf.csv")
mo_tf_l <- unique(mo_tf_l[,1])
mo_tf_l <- gsub("(+)", "", mo_tf_l, fixed = T)
mo_ns_l <- read.csv("./lung/Monocyte_track_genes.csv", sep = " ")
mo_ns_l <- na.omit(mo_ns_l)
mo_ns_l <- rownames(mo_ns_l[mo_ns_l$morans_I > 0.4,])
mo_p_l <- c("Pvr", "H2-q4","H2-k1", "H2-d1", "H2-q7", "Icam1", "Mif",
  "Cxcl2", "App", "Ccl4", "Ccl6", "Ccl3", "Ptprc", "Cd22",
  "Ccl9", "Selplg", "Cd86", "Tnbs1", "Fn1", "Cd22", "Lamc1",
  "Grn", "Anxa1", "C3", "Nampt", "Tnf", "Tgfb1","H2-t23",
  "H2-t24", "H2-t22")
th_l <- union(union(mo_tf_l, mo_ns_l), mo_p_l)

load("heart.rdata")
rm(list = setdiff(ls(), c("DEGs")))
mo_degs_h <- rownames(DEGs$Monocyte[abs(DEGs$Monocyte$avg_log2FC) >1 & DEGs$Monocyte$p_val_adj < 1e-5,])
mo_tf_h <- read.csv("./heart/Monocyte_tf.csv")
mo_tf_h <- unique(mo_tf_h[,1])
mo_tf_h <- gsub("(+)", "", mo_tf_h, fixed = T)
mo_ns_h <- read.csv("./heart/Monocyte_track_genes.csv", sep = " ")
mo_ns_h <- na.omit(mo_ns_h)
mo_ns_h <- rownames(mo_ns_h[mo_ns_h$morans_I > 0.4,])
mo_p_h <- c("Pvr", "H2-q4","H2-k1", "H2-d1", "H2-q7", "Icam1", "Mif",
            "Cxcl16", "Cxcl10","App", "Ccl4", "Ccl6", "Ptprc", "Cd22",
            "Ccl2", "Ccl7", "Ccl9", "Selplg", "Cd86")
th_h <- union(union(mo_tf_h, mo_ns_h), mo_p_h)

venn_list <- list(th_h, mo_degs_h, mo_degs_l, th_l)
pdf("final_ven.pdf", height = 4, width = 5)
venn(venn_list, ellipse = T, box = F, ilcs = 1, sncs = 1,
     snames = c("heart three", "heart DEGs", "lung DEGs", "lung three"),
     zcolor = c("#7CAE00", "#C77CFF","#F8766D", "#00BFC4"),
     col = c("#7CAE00",  "#C77CFF","#F8766D", "#00BFC4"))
dev.off()

#绘制effect图，，plot
source("plot//plot_SMR.r")
SMRData <- ReadSMRData("plot//heart.ENSG00000179344.txt")
pdf("DQB1.pdf", height = 6, width = 8)
SMREffectPlot(data=SMRData) #, trait_name="Sepsis")
dev.off()

#转换id
library(AnnotationDbi)
library(org.Hs.eg.db)

results_eqt$Gene = mapIds(org.Hs.eg.db, results_eqt$probeID,"SYMBOL","ENSEMBL")
eqtl <- merge(results_eqt, gels, by = "Gene")
results_eqt[results_eqt$Gene == "BIRC3",]

write.table(heart, "heart.txt", col.names = T, row.names = F, quote = F)
write.table(lung, "lung.txt", col.names = T, row.names = F, quote = F)
write.table(atrial, "atrial.txt", col.names = T, row.names = F, quote = F)
write.table(eqtl, "eqtl.txt", col.names = T, row.names = F, quote = F)
write.table(blood, "blood.txt", col.names = T, row.names = F, quote = F)

#GEO中3个队列研究数据验证。
library(GEOquery)
library(DESeq2)
library(tidyverse)

ges <- getGEO("GSE185263", GSEMatrix = T)
exp <- read.csv("GSE185263_raw_counts.csv.gz", header = T, row.names = 1)
exp <- exp[rowMeans(exp) > 1,]
group <- data.frame(ges[[1]]$title, ges[[1]]$`disease state:ch1`, ges[[1]]$`in hospital mortality:ch1`)
colnames(group) <- c("case", "disease", "death")
health <- group[group$disease == "healthy",]
health <- data.frame(health$disease, row.names = health$case)
colnames(health) <- "disease"
sepsis <- group[group$disease == "sepsis" & group$death == "Survived",]
sepsis <- group[group$disease == "sepsis",]
sepsis <- data.frame(sepsis$disease, row.names = sepsis$case)
colnames(sepsis) <- "disease"
death <- group[group$death == "Died",]
death <- data.frame(death$death, row.names = death$case)
colnames(death) <- "disease"

h2s <- rbind(health, sepsis)
h2s$disease <- as.factor(h2s$disease)
exda <- exp[,row.names(h2s)]

s2d <- rbind(sepsis, death)
s2d$disease <- as.factor(s2d$disease)
exda <- exp[,row.names(s2d)]

h2d <- rbind(health, death)
h2d$disease <- as.factor(h2d$disease)
exda <- exp[,row.names(h2d)]

dds <- DESeqDataSetFromMatrix(exda, colData = h2s, design = ~ disease)
dds <- DESeq(dds)
res <- results(dds)
res$pvalue[res@rownames == "ENSG00000179344"]
res$padj[res@rownames == "ENSG00000196735"]
write.table(res[c("ENSG00000179344", "ENSG00000196735"),], "GSE185263.txt", quote =  F)

#画个柱状图吧
plotda <- cbind(group, t(exp["ENSG00000196735",]))

pdf("GSE185263_A.pdf", height = 4, width = 4)
ggplot(plotda, aes(x = disease, y = ENSG00000196735, fill = disease))+
  geom_violin(trim = FALSE, color = "black")+
  theme_classic()+
  theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
  stat_summary(fun = "mean", geom ="point", shape = 18)+
  stat_summary(fun.data = "mean_se", geom ="errorbar", width = 0.15)+
  scale_fill_manual(values =  c( "#00BFC4", "#F8766D"))+
  geom_segment(aes(x = 1, xend = 2, y = max(plotda$ENSG00000196735) *1.1, yend = max(plotda$ENSG00000196735) *1.1))+
  annotate("text", x = 1.5, y = max(plotda$ENSG00000196735) *1.11, label = "*", size = 10)+
  xlab("group")+
  ylab("Expression of HLA-DQA1")+
  ggtitle("GSE185263")
dev.off()

ges1 <- getGEO("GSE154918", GSEMatrix = T)
exp1 <- read.delim("GSE154918.tsv.gz", header = T)
exp1 <- exp1[rowMeans(exp1) > 1,]
row.names(exp1) <- exp1$GeneID

group1 <- data.frame(ges1[[1]]$geo_accession, ges1[[1]]$`status:ch1`)
health1 <- group1[group1$ges1..1....status.ch1. == "Hlty",]
colnames(health1) <- c("id","disease")
sepsis1 <- group1[(group1$ges1..1....status.ch1. != "Hlty" & group1$ges1..1....status.ch1. != "Inf1_P"), ]
colnames(sepsis1) <- c("id","disease")
sepsis1$disease <- rep("sepsis", times = length(sepsis1$id))

h2s1 <- rbind(health1, sepsis1)
h2s1$disease <- as.factor(h2s1$disease)
exda1 <- exp1[,h2s1$id]

dds1 <- DESeqDataSetFromMatrix(exda1, colData = h2s1, design = ~ disease)
dds1 <- DESeq(dds1)
res1 <- results(dds1)
res1$padj[res1@rownames == 3119]
write.table(res1[c("3117", "3119"),], "GSE154918.txt", quote = F )

plotda <- cbind(h2s1, t(exp1['3117',][,h2s1$id]))
colnames(plotda) <- c("id", "disease", "ge"  )

pdf("GSE154918_A.pdf", height = 4, width = 4)
ggplot(plotda, aes(x = disease, y = ge, fill = disease))+
  geom_violin(trim = FALSE, color = "black")+
  theme_classic()+
  theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
  stat_summary(fun = "mean", geom ="point", shape = 18)+
  stat_summary(fun.data = "mean_se", geom ="errorbar", width = 0.15)+
  scale_fill_manual(values =  c( "#00BFC4", "#F8766D"))+
  scale_x_discrete(labels = c("healthy", "sepsis"))+
  geom_segment(aes(x = 1, xend = 2, y = max(plotda$ge) *1.3, yend = max(plotda$ge) *1.3))+
  annotate("text", x = 1.5, y = max(plotda$ge) *1.31, label = "*", size = 10)+
  xlab("group")+
  ylab("Expression of HLA-DQA1")+
  ggtitle("GSE154918")
dev.off()

ges2 <- getGEO(filename = "./GSE134347_series_matrix.txt.gz", getGPL = F)#Limma
exp2 <- data.frame(ges2@assayData$exprs)
ann <- read.delim("GPL17586-45144.txt", comment.char = "#", header = T)
ann1 <- separate_wider_delim(ann, gene_assignment, "//", names = c("a", "b", "c", "d", "e", "f","g", "h", "i", "j", "k", "l", "m", "n", "o"),
                                   too_many = "merge", too_few = "align_start")
ann2 <- data.frame(ann1$b, row.names = ann1$ID)
exp3 <- merge(exp2, ann2, by = "row.names")
exp3 <- exp3[exp3$ann1.b != " --- ",]
exp4 <- exp3 %>% mutate(rowMean = rowMeans(.[grep("GSM", names(.))])) %>%
  arrange(desc(rowMean)) %>%
  distinct(ann1.b, .keep_all = TRUE) %>%
  select(-rowMean)
row.names(exp4) <- exp4$ann1.b
exp4 <- exp4 %>% select(-Row.names) %>% select(-ann1.b)
sum(is.na(exp4))

group2 <- data.frame(ges2$geo_accession, ges2$`disease state:ch1`)
health2 <- group2[group2$ges2..disease.state.ch1. == "healthy",]
colnames(health2) <- c("id", "disease")
sepsis2 <- group2[group2$ges2..disease.state.ch1. == "sepsis",]
colnames(sepsis2) <- c("id", "disease")
h2s2 <- rbind(health2, sepsis2)

my <- exp4[,h2s2$id]

library(limma)
design <- model.matrix(~0+h2s2$disease)
colnames(design) <- c("healthy", "sepsis")
row.names(design) <- h2s2$id

contrast.matrix <-makeContrasts(sepsis - healthy, levels = design)
fit <- lmFit(my,design)
fit1 <- contrasts.fit(fit,contrast.matrix)
fit2 <- eBayes(fit1, 0.01)
tempOutput <- topTable(fit2,adjust = "fdr", coef = 1, n = Inf)
nrDEG <- na.omit(tempOutput)
nrDEG[" HLA-DQA1 ",]
write.table(nrDEG[c(" HLA-DQB1 ", " HLA-DQA1 "),], "GSE134347.txt", quote = F)

plotda <- cbind(h2s2, t(my[" HLA-DQB1 ",]))
head(plotda)
colnames(plotda) <- c("id", "disease", "ge"  )

pdf("GSE134347.pdf", height = 4, width = 4)
ggplot(plotda, aes(x = disease, y = ge, fill = disease))+
  geom_violin(trim = FALSE, color = "black")+
  theme_classic()+
  theme(legend.position = "none", plot.title = element_text(hjust = 0.5))+
  stat_summary(fun = "mean", geom ="point", shape = 18)+
  stat_summary(fun.data = "mean_se", geom ="errorbar", width = 0.15)+
  scale_fill_manual(values =  c( "#00BFC4", "#F8766D"))+
  scale_x_discrete(labels = c("healthy", "sepsis"))+
  geom_segment(aes(x = 1, xend = 2, y = max(plotda$ge) *1.1, yend = max(plotda$ge) *1.1))+
  annotate("text", x = 1.5, y = max(plotda$ge) *1.11, label = "*", size = 10)+
  xlab("group")+
  ylab("Expression of HLA-DQB1")+
  ggtitle("GSE134347")
dev.off()

###根据HLA-DQA1和HLA-DQB1在11个队列中绘制ROC曲线
library(pROC)
rocdata <- t(exp[c("ENSG00000196735", "ENSG00000179344"),])
rocdata <- data.frame(rocdata, group$disease)
ggplot(rocdata, aes(x = log(ENSG00000196735), y = log(ENSG00000179344), colour = group.disease))+
  geom_point()
pdf("GSE185263_r.pdf", height = 4.5, width = 4.5)
roc_a <- plot.roc(rocdata$group.disease, rocdata[,1], col = "#00828B", main = "GSE185263")
roc_b <- lines.roc(rocdata$group.disease, rocdata[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a$auc, 3)),
     col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b$auc, 3)),
     col = "#F03752")
dev.off()

rocdata1 <- t(exp1[c("3117", "3119"),])[-1,]
rocdata1 <- data.frame(rocdata1, group1$ges1..1....status.ch1.)
rocdata1 <- do.call(rbind, lapply(c("Hlty", "Shock_P","Seps_P"), function (x) {rocdata1[rocdata1$group1.ges1..1....status.ch1. == x,]}))
rocdata1[rocdata1$group1.ges1..1....status.ch1. == "Shock_P", ]$group1.ges1..1....status.ch1. <- "Seps_P"

ggplot(rocdata1, aes(x = log(X3117), y = log(X3119), colour = group1.ges1..1....status.ch1.))+
  geom_point()

pdf("GSE154918_r.pdf", height = 4.5, width = 4.5)
roc_a1 <- plot.roc(rocdata1[,3], rocdata1[,1], col = "#00828B",
                  main = "GSE154918")
roc_b1 <- lines.roc(rocdata1[,3], rocdata1[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a1$auc, 3)),
     col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b1$auc, 3)),
     col = "#F03752")
dev.off()

rocdata2 <- t(exp4[c(" HLA-DQA1 ", " HLA-DQB1 "),])
rocdata2 <- data.frame(rocdata2, group2$ges2..disease.state.ch1.)
rocdata2 <- rocdata2[rocdata2$group2.ges2..disease.state.ch1. != "noninfectious",]
ggplot(rocdata2, aes(x = X.HLA.DQA1., y = X.HLA.DQB1.,
                     colour = group2.ges2..disease.state.ch1.))+geom_point()

pdf("GSE134347_r.pdf", height = 4.5, width = 4.5)
roc_a2 <- plot.roc(rocdata2$group2.ges2..disease.state.ch1., rocdata2[,1], col = "#00828B", main = "GSE134347")
roc_b2 <- lines.roc(rocdata2$group2.ges2..disease.state.ch1., rocdata2[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a2$auc, 3)),
     col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b2$auc, 3)),
     col = "#F03752")
dev.off()

ges40 <- getGEO(filename = "./GSE137340_series_matrix.txt.gz", getGPL = F)
group40 <- data.frame(ges40$`diagnosis:ch1`, ges40$`time of sample collection:ch1`)
exp40 <- data.frame(ges40@assayData$exprs)
ann <- read.delim("GPL10558.annot.gz", skip = 28, header = T)
ann1 <- data.frame(ann$Gene.symbol, row.names = ann$ID)
exp40 <- merge(ann1, exp40, by = "row.names")

#无HLA-DQA1，只有HLA-DQA2,
rocdata40 <- t(exp40[exp40$ann.Gene.symbol == "HLA-DQB1",])[-c(1,2),]
rocdata40 <- data.frame(rocdata40, group40)
rocdata40 <- rocdata40[rocdata40$ges40..time.of.sample.collection.ch1. != "24 hrs",]
rocdata40[rocdata40$ges40..diagnosis.ch1. != "Healthy",][2] <- "sepsis"
ggplot(rocdata2, aes(x = X.HLA.DQA1., y = X.HLA.DQB1.,
                     colour = group2.ges2..disease.state.ch1.))+geom_point()

pdf("GSE137340_r.pdf", height = 4.5, width = 4.5)
roc_b40 <- plot.roc(rocdata40$ges40..diagnosis.ch1., as.numeric(rocdata40[,1]),
                    col = "#F03752", main = "GSE137340")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b40$auc, 3)),
     col = "#F03752")
dev.off()

ges63 <- getGEO(filename = "./GSE69063_series_matrix.txt.gz", getGPL = F)
group63 <- data.frame(ges63$`disease status:ch1`, ges63$`time point:ch1`)
exp63 <- data.frame(ges63@assayData$exprs)
rocdata63 <- t(exp63[c("3117_at", "3119_at"),])
rocdata63 <- data.frame(rocdata63, group63)
rocdata63 <- do.call(rbind, lapply(c("Healthy control","Sepsis"), function (x) {
  rocdata63[rocdata63$ges63..disease.status.ch1.== x,]}))
rocdata63 <- rocdata63[rocdata63$ges63..time.point.ch1. != "T1",]
rocdata63 <- rocdata63[rocdata63$ges63..time.point.ch1. != "T2",]

ggplot(rocdata63, aes(x = X3117_at, y = X3119_at,
                     colour = ges63..disease.status.ch1.))+geom_point()

pdf("GSE69063_r.pdf", height = 4.5, width = 4.5)
roc_a63 <- plot.roc(rocdata63$ges63..disease.status.ch1., rocdata63[,1], col = "#00828B", main = "GSE69063")
roc_b63 <- lines.roc(rocdata63$ges63..disease.status.ch1., rocdata63[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a63$auc, 3)),
     col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b63$auc, 3)),
     col = "#F03752")
dev.off()

ges59 <- getGEO(filename = "./GSE100159_series_matrix.txt.gz", getGPL = F)
group59 <- ges59$`sample class:ch1`
exp59 <- data.frame(ges59@assayData$exprs)
rocdata59 <- data.frame(t(exp59["ILMN_1661266",]), group59)
rocdata59[rocdata59$group59 != "Control", ][2] <- "sepsis"

pdf("GSE100159_r.pdf", height = 4.5, width = 4.5)
roc_b59 <- plot.roc(rocdata59$group59, rocdata59[,1], col = "#F03752",
                    main = "GSE100159")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b59$auc, 3)),
     col = "#F03752")
dev.off()

ges33 <- getGEO(filename = "./GSE95233_series_matrix.txt.gz", getGPL = F)
exp33 <- data.frame(ges33@assayData$exprs)
group33 <- data.frame(ges33$source_name_ch1, ges33$`time point:ch1`)
ann <- read.delim("GPL570.annot.gz", skip = 27, header = T)
ann1 <- separate_wider_delim(ann, Gene.symbol, "///", names = c("a", "b", "c", "d", "e", "f","g", "h", "i", "j", "k", "l", "m", "n", "o"),
                             too_many = "merge", too_few = "align_start")
ann2 <- data.frame(ann1$a, row.names = ann1$ID)
exp33 <- merge(ann2, exp33, by = "row.names")
exp33 <- exp33 %>% mutate(rowMean = rowMeans(.[grep("GSM", names(.))])) %>%
  arrange(desc(rowMean)) %>%
  distinct(ann1.a, .keep_all = TRUE) %>%
  select(-rowMean)
row.names(exp33) <- exp33$ann1.a
exp33 <- exp33 %>% select(-Row.names) %>% select(-ann1.a)

rocdata33 <- t(exp33[c("HLA-DQA1", "HLA-DQB1"),])
rocdata33 <- data.frame(rocdata33, group33)
rocdata33 <- rocdata33[rocdata33$ges33..time.point.ch1. != "D02",]
rocdata33 <- rocdata33[rocdata33$ges33..time.point.ch1. != "D03",]

pdf("GSE95233_r.pdf", height = 4.5, width = 4.5)
roc_a33 <- plot.roc(rocdata33$ges33..time.point.ch1., rocdata33[,1], col = "#00828B",
                    main = "GSE95233")
roc_b33 <- lines.roc(rocdata33$ges33..time.point.ch1., rocdata33[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a33$auc, 3)),
     col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b33$auc, 3)),
     col = "#F03752")
dev.off()

ges65 <- getGEO(filename = "./GSE57065_series_matrix.txt.gz", getGPL = F)
exp65 <- data.frame(ges65@assayData$exprs)
group65 <- data.frame(ges65$source_name_ch1, ges65$description)
ann <- read.delim("GPL570.annot.gz", skip = 27, header = T)
ann1 <- separate_wider_delim(ann, Gene.symbol, "///", names = c("a", "b", "c", "d", "e", "f","g", "h", "i", "j", "k", "l", "m", "n", "o"),
                             too_many = "merge", too_few = "align_start")
ann2 <- data.frame(ann1$a, row.names = ann1$ID)
exp65 <- merge(ann2, exp65, by = "row.names")
exp65 <- exp65 %>% mutate(rowMean = rowMeans(.[grep("GSM", names(.))])) %>%
  arrange(desc(rowMean)) %>%
  distinct(ann1.a, .keep_all = TRUE) %>%
  select(-rowMean)
row.names(exp65) <- exp65$ann1.a
exp65 <- exp65 %>% select(-Row.names) %>% select(-ann1.a)

rocdata65 <- t(exp65[c("HLA-DQA1", "HLA-DQB1"),])
rocdata65 <- data.frame(rocdata65, group65)
rocdata65 <- rocdata65[nchar(rocdata65$ges65.description) != 7,]
rocdata65$group <-c(rep("sepsis", times = 28), rep("health", times = 25))

pdf("GSE57065_r.pdf", height = 4.5, width = 4.5)
roc_a65 <- plot.roc(rocdata65$group, rocdata65[,1], col = "#00828B",
                    main = "GSE57065")
roc_b65 <- lines.roc(rocdata65$group, rocdata65[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a65$auc, 3)),
     col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b65$auc, 3)),
     col = "#F03752")
dev.off()

ges28 <- getGEO(filename = "./GSE69528_series_matrix.txt.gz", getGPL = F)
group28 <- data.frame(ges28$`study group:ch1`, ges28$`pathogens:ch1`)
exp28 <- data.frame(ges28@assayData$exprs)
rocdata28 <- data.frame(t(exp28["ILMN_1661266",]), group28)
rocdata28 <- rocdata28[rocdata28$ges28..study.group.ch1. != "Uninfected type 2 diabetes mellitus",]
rocdata28[rocdata28$ges28..study.group.ch1. != "Uninfected healthy", ][2] <- "sepsis"

pdf("GSE69528_r.pdf", height = 4.5, width = 4.5)
roc_b28 <- plot.roc(rocdata28$ges28..study.group.ch1., rocdata28[,1], col = "#F03752",
                    main = "GSE69528")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b28$auc, 3)),
     col = "#F03752")
dev.off()

ges50 <- getGEO(filename = "./GSE28750_series_matrix.txt.gz", getGPL = F)
exp50 <- data.frame(ges50@assayData$exprs)
group50 <- data.frame(ges50$`health status:ch1`)
ann <- read.delim("GPL570.annot.gz", skip = 27, header = T)
ann1 <- separate_wider_delim(ann, Gene.symbol, "///", names = c("a", "b", "c", "d", "e", "f","g", "h", "i", "j", "k", "l", "m", "n", "o"),
                             too_many = "merge", too_few = "align_start")
ann2 <- data.frame(ann1$a, row.names = ann1$ID)
exp50 <- merge(ann2, exp50, by = "row.names")
exp50 <- exp50 %>% mutate(rowMean = rowMeans(.[grep("GSM", names(.))])) %>%
  arrange(desc(rowMean)) %>%
  distinct(ann1.a, .keep_all = TRUE) %>%
  select(-rowMean)
row.names(exp50) <- exp50$ann1.a
exp50 <- exp50 %>% select(-Row.names) %>% select(-ann1.a)

rocdata50 <- t(exp50[c("HLA-DQA1", "HLA-DQB1"),])
rocdata50 <- data.frame(rocdata50, group50)
rocdata50 <- rocdata50[rocdata50$ges50..health.status.ch1. != "POST_SURGICAL",]

pdf("GSE28750_r.pdf", height = 4.5, width = 4.5)
roc_a50 <- plot.roc(rocdata50$ges50..health.status.ch1., rocdata50[,1], col = "#00828B",
                    main = "GSE28750")
roc_b50 <- lines.roc(rocdata50$ges50..health.status.ch1., rocdata50[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a50$auc, 3)),
     col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b50$auc, 3)),
     col = "#F03752")
dev.off()

ges14 <- getGEO(filename = "./GSE54514_series_matrix.txt.gz", getGPL = F)
group14 <- data.frame(strsplit(ges14$`group_day:ch1`, "_"))
exp14 <- data.frame(ges14@assayData$exprs)
rocdata14 <- data.frame(t(exp14["ILMN_1661266",]), t(group14))
rocdata14 <- rocdata14[rocdata14$X2 == "D1", ]
rocdata14 <- rocdata14[]

pdf("GSE54514_r.pdf", height = 4.5, width = 4.5)
roc_b14 <- plot.roc(rocdata14$X1, rocdata14[,1], col = "#F03752",
                    main = "GSE54514")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b14$auc, 3)),
     col = "#F03752")
dev.off()

ges13 <- getGEO(filename = "./GSE139913_series_matrix.txt.gz", getGPL = F)
exp13 <- read.table("GSE139913_norm_data.txt.gz", header = T, row.names = 1)
group13 <- ges13$description
rocdata13 <- t(exp13[c("HLA-DQA1", "HLA-DQB1"),])
rocdata13 <- data.frame(rocdata13, group13)
rocdata13 <- rocdata13[rocdata13$group13 != "Non-Sepsis",]

pdf("GSE139913_r.pdf", height = 4.5, width = 4.5)
roc_a13 <- plot.roc(rocdata13$group13, rocdata13[,1], col = "#00828B",
                    main = "GSE139913")
roc_b13 <- lines.roc(rocdata13$group13, rocdata13[,2], col = "#F03752")
text(0.4, 0.2, labels = paste("HLA-DQA1_AUC = ", round(roc_a13$auc, 3)),
     col = "#00828B")
text(0.4, 0.15, labels = paste("HLA-DQB1_AUC = ", round(roc_b13$auc, 3)),
     col = "#F03752")
dev.off()


##########################################################################################################################################################
#以下分析代码不是使用的r，使用相应软件。
##########################################################################################################################################################

###1000gene转换为PLINK格式
https://mirrors.sjtug.sjtu.edu.cn/cran/web/packages/snpsettest/vignettes/reference_1000Genomes.html
解压
plink2 --zst-decompress all_phase3.pgen.zst > all_phase3.pgen

筛选染色体和人群
plink2 --pfile all_phase3 vzs --chr 1-22 --output-chr 26 --max-alleles 2 --rm-dup exclude-mismatch --set-missing-var-ids '@_#_$1_$2' --make-pgen --out all_phase3_autosomes --allow-extra-chr
plink2 --pfile all_phase3_autosomes --keep EUR_1kg_samples.txt --make-pgen --out EUR_phase3_autosomes

转为PLINK 1二进制文件。
plink2 --pfile EUR_phase3_autosomes --maf 0.005 --make-bed --out EUR_phase3_autosomes

然后运行文件，将每一条染色体的数据都单独划分出来。

@echo off
set /a count=1
:loop
if %count% leq 22 (
    plink2 --bfile EUR_phase3_autosomes --chr %count% --make-bed --out EUR_phase3_chr%count%
    set /a count=%count%+1
    goto loop
)
echo All chr created!
############################################################################################################################################

#给GWAS数据按照变异位点注释出rs号
#参考链接
#https://gwaslab.org/2021/05/25/annovar/
#https://annovar.openbioinformatics.org/en/latest/

#下载注释文件。Download appropriate database files avsnp150
perl annotate_variation.pl -buildver hg19 -downdb -webfrom annovar avsnp150 humandb/

#按照染色体号和位置进行注释。Annotate the variants base on chr:pos
perl annotate_variation.pl -filter -dbtype avsnp150 -buildver hg19 ana.avinput humandb/ -out ana
##############################################################################################################################################

SMR & HEIDI analysis

smr-1.3.1 --bfile EUR_phase3_chr21 --gwas-summary 21gwas.ma --beqtl-summary cis-eQTLs-full_eQTLGen_AF_incl_nr_formatted_20191212.new.txt_besd-dense --out mysmr --thread-num 10 --extract-probe myprobe.list

SMR locus plot data

smr-1.3.1 --bfile 1000gen/EUR_phase3_chr6 --gwas-summary GCST90270871.ma --beqtl-summary eQTL_besd_lite\Whole_Blood.lite --out dqb1_plot --plot --probe ENSG00000179344 --probe-wind 500 --gene-list glist-hg19

#设置个循环
@echo off
set /a count=1
mkdir results_blood
:loop
if %count% leq 22 (
    echo doing chr %count%
    smr-1.3.1 --bfile 1000gen\EUR_phase3_chr%count% --gwas-summary GCST90270871.ma --beqtl-summary eQTL_besd_lite\Whole_Blood.lite --out results_blood\%count%smr --thread-num 13
    set /a count=%count%+1
    goto loop
)
echo done!
pause

#################################################################################################################################################