rm(list=ls()) # library(edgeR) # library(DESeq2) # library(gplots) require(Biobase) require(DESeq2) require(gplots) require(VennDiagram) dummy <- read.delim("/Users/Beloved/Desktop/Research/Data/Sequencing/CD31 CD14 Study/htseq-countFiles/Gene Name Sorted/All/CombinedNameSorted.txt", header=TRUE, row.names="gene_name") # dummy2groups <- dummy[,13:20] angio1 <- read.delim("/Users/Beloved/Desktop/Research/Data/Sequencing/CD31 CD14 Study/GO Analysis/Old Duplicates Removed/GO Angiogenesis.txt") rownames(angio1) <- angio1[,1] vasculo1 <- read.delim("/Users/Beloved/Desktop/Research/Data/Sequencing/CD31 CD14 Study/GO Analysis/Old Duplicates Removed/GO EC Differentiation.txt") rownames(vasculo1) <- vasculo1[,1] signal1 <- read.delim("/Users/Beloved/Desktop/Research/Data/Sequencing/CD31 CD14 Study/GO Analysis/Old Duplicates Removed/Hallmark Protein Secretion.txt") rownames(signal1) <- signal1[,1] # decay1 <- read.delim("/Users/Beloved/Desktop/Research/Data/Sequencing/CD31 CD14 Study/GO Analysis/Old Duplicates Removed/GO Necrotic Cell Death.txt") # rownames(decay1) <- decay1[,1] # decay2 <- read.delim("/Users/Beloved/Desktop/Research/Data/Sequencing/CD31 CD14 Study/GO Analysis/Old Duplicates Removed/Hallmark Hypoxia.txt") # rownames(decay2) <- decay2[,1] migra1 <- read.delim("/Users/Beloved/Desktop/Research/Data/Sequencing/CD31 CD14 Study/GO Analysis/Duplicates Removed/GO Cell Cell Adhesion.txt") rownames(migra1) <- migra1[,1] migra2 <- read.delim("/Users/Beloved/Desktop/Research/Data/Sequencing/CD31 CD14 Study/GO Analysis/Duplicates Removed/GO Cell Matrix Adhesion.txt") rownames(migra2) <- migra2[,1] immuno1 <- read.delim("/Users/Beloved/Desktop/Research/Data/Sequencing/CD31 CD14 Study/GO Analysis/Old Duplicates Removed/GO Immune Response.txt") rownames(immuno1) <- immuno1[,1] # angioGenesList <- read.delim("/Users/Beloved/Desktop/Research/Data/Sequencing/Gene Clustering/Angiogenesis-gene-list-EN.txt", header=TRUE) # rownames(angioGenesList) <- angioGenesList[,2] # immuneGenesList <- read.delim("/Users/Beloved/Desktop/Research/Data/Sequencing/Gene Clustering/Immune Response gene list.txt", header=TRUE) # rownames(immuneGenesList) <- immuneGenesList[,6] # newCountFile <- dummy2groups # angioCountFile <- dummy2groups[rownames(angioGenesList),] # Big list # immuneCountFile <- dummy2groups[rownames(immuneGenesList),] angio1Count <- dummy[rownames(angio1),] # decay1Count <- dummy[rownames(decay1),] # decay2Count <- dummy[rownames(decay2),] immuno1Count <- dummy[rownames(immuno1),] migra1Count <- dummy[rownames(migra1),] migra2Count <- dummy[rownames(migra2),] signal1Count <- dummy[rownames(signal1),] vasculo1Count <- dummy[rownames(vasculo1),] # angio1Count <- dummy2groups[rownames(angio1),] # decay1Count <- dummy2groups[rownames(decay1),] # decay2Count <- dummy2groups[rownames(decay2),] # immuno1Count <- dummy2groups[rownames(immuno1),] # migra1Count <- dummy2groups[rownames(migra1),] # migra2Count <- dummy2groups[rownames(migra2),] # signal1Count <- dummy2groups[rownames(signal1),] # vasculo1Count <- dummy2groups[rownames(vasculo1),] # Remvoe N/A rows and those with fewer than 10 hits # angioCountFile <- angioCountFile[complete.cases(angioCountFile[,]),] # angioCountFile <- angioCountFile[rowSums(angioCountFile) > 10, ] # immuneCountFile <- immuneCountFile[complete.cases(immuneCountFile[,]),] # immuneCountFile <- immuneCountFile[rowSums(immuneCountFile) > 10, ] angio1Count <- angio1Count[complete.cases(angio1Count[,]),] angio1Count <- angio1Count[rowSums(angio1Count) > 10, ] # decay1Count <- decay1Count[complete.cases(decay1Count[,]),] # decay1Count <- decay1Count[rowSums(decay1Count) > 10, ] # decay2Count <- decay2Count[complete.cases(decay2Count[,]),] # decay2Count <- decay2Count[rowSums(decay2Count) > 10, ] immuno1Count <- immuno1Count[complete.cases(immuno1Count[,]),] immuno1Count <- immuno1Count[rowSums(immuno1Count) > 10, ] migra1Count <- migra1Count[complete.cases(migra1Count[,]),] migra1Count <- migra1Count[rowSums(migra1Count) > 10, ] migra2Count <- migra2Count[complete.cases(migra2Count[,]),] migra2Count <- migra2Count[rowSums(migra2Count) > 10, ] signal1Count <- signal1Count[complete.cases(signal1Count[,]),] signal1Count <- signal1Count[rowSums(signal1Count) > 10, ] vasculo1Count <- vasculo1Count[complete.cases(vasculo1Count[,]),] vasculo1Count <- vasculo1Count[rowSums(vasculo1Count) > 10, ] # Bind gene ontology lists together newCountFile <- rbind(angio1Count, # decay1Count, decay2Count, immuno1Count, migra1Count, migra2Count, signal1Count, vasculo1Count ) # antiAngioCountFile, necroticDeathCountFile, hallmarkImmuneCountFile) # newCountFile <- dummy[c(rownames(angioGenesList) newCountFile <- newCountFile[complete.cases(newCountFile[,]),] cells <- factor(c(rep("MNC",4),rep("CD31N",4), rep("CD31Pos",4),rep("CD31P14N",4),rep("CD14P",4))) # cells <- factor(c(rep("CD31P14N",4),rep("CD14P",4))) ##Processing I don't yet understand #z <- DGEList(counts=newCountFile,group=cells) #z <- calcNormFactors(y) #designGO <- model.matrix(~group) #z <- estimateDisp(z, designGO) #create sample table sampleGOTable <- data.frame(cells = as.factor(cells)) rownames(sampleGOTable) <- colnames(newCountFile) # sampleGOTable #DeSeq Analysis deseqGO <- DESeqDataSetFromMatrix(countData = newCountFile, colData = sampleGOTable, design = ~cells) # deseqGO deseqGO <- deseqGO[rowSums(counts(deseqGO)) > 10, ] # deseqGO d.deseqGO <- DESeq(deseqGO) vsdBGO <- varianceStabilizingTransformation(d.deseqGO) # plotPCA(vsdBGO, intgroup=c("cells")) vsdB_tableGO <- as.data.frame(assay(vsdBGO)) nameVector <- c("MNC", "CD31N", "CD31Pos", "CD31P14N", "CD14P") namesList <- rep(0, 10) pvalue <- rep(0,nrow(vsdB_tableGO)*10) dim(pvalue) <- c(nrow(vsdB_tableGO),10) avgList <- rep(0,nrow(vsdB_tableGO)*10*2) dim(avgList) <- c(nrow(vsdB_tableGO),2,10) for (k in 1:((ncol(vsdB_tableGO)-4)/4)){ if (k==1){ l <- 0 } for (j in (k+1):(ncol(vsdB_tableGO)/4)){ l <- l + 1 namesList[l] <- paste(nameVector[k],"-",nameVector[j]) for (i in 1:nrow(vsdB_tableGO)){ if(sum(vsdB_tableGO[i,(4*k-3):(4*k)])!=sum(vsdB_tableGO[i,(4*j-3):(4*j)])){ pvalue[i,l] <- try(t.test(vsdB_tableGO[i,(4*k-3):(4*k)],vsdB_tableGO[i,(4*j-3):(4*j)])$p.value) }else{ pvalue[i,l] <- 1 } mean1 <- 0 mean2 <- 0 for (m in 0:3){ mean1 <- mean1 + vsdB_tableGO[i,(4*k-3+m)] mean2 <- mean2 + vsdB_tableGO[i,(4*j-3+m)] } avgList[i,1,l] <- (mean1/4) avgList[i,2,l] <- (mean2/4) } } } colnames(pvalue) <- namesList rownames(pvalue) <- rownames(vsdB_tableGO) largeDEList <- rep(0, nrow(vsdB_tableGO)*3*(ncol(pvalue)+1)) dim(largeDEList) <- c(nrow(vsdB_tableGO) , 3 , (ncol(pvalue)+1)) for (k in 1:ncol(pvalue)){ for (j in 1:nrow(pvalue)){ largeDEList[j,3,k] <- pvalue[j,k] } } for (j in 1:((ncol(vsdB_tableGO)-4)/4)){ if (j==1){ l <- 0 } for (i in (j+1):(ncol(vsdB_tableGO)/4)){ l <- l + 1 largeDEList[,1,l] <- rep(nameVector[j],nrow(pvalue)) largeDEList[,2,l] <- rep(nameVector[i],nrow(pvalue)) } } # largeDEList[1,2,1] # pvalue <- rep(0,nrow(vsdB_tableGO)) # for (i in 1:nrow(vsdB_tableGO)){ # pvalue[i] <- t.test(vsdB_tableGO[i,1:4],vsdB_tableGO[i,5:8])$p.value # } # Count file ordering is as follows: # angio1Count, decay1Count, decay2Count, immuno1Count, migra1Count, migra2Count, signal1Count, vasculo1Count GOlabel <- c(rep("GO Angiogenesis", nrow(angio1Count)), # rep("GO Necrotic Cell Death", nrow(decay1Count)), # rep("Hallmark Hypoxia ", nrow(decay2Count)), rep("GO Immune Response", nrow(immuno1Count)), rep("GO Cell-Cell Adhesion", nrow(migra1Count)), rep("GO Cell-Matrix Adhesion",nrow(migra2Count)), rep("Hallmark Protein Secretion", nrow(signal1Count)), rep("GO Endothelial Diffrentiation", nrow(vasculo1Count))) largeDEList[,1,11] <- rownames(pvalue) largeDEList[,2,11] <- GOlabel largeDEList[,3,11] <- c(1:nrow(pvalue)) largeDEList[200,2,3] fullSet <- largeDEList[,1:3,11] colnames(fullSet) <- c("Genes", "GO Group", "Chronology") nomenclature <- colnames(fullSet) for (i in 1:(ncol(pvalue))){ fullSet <- cbind(fullSet, largeDEList[,3,i], avgList[,1:2,i]) nomenclature <- c(nomenclature, "pvalue", largeDEList[2,1,i], largeDEList[2,2,i]) colnames(fullSet) <- nomenclature } write.table(fullSet, "/Users/Beloved/Desktop/Research/Data/Sequencing/Gene Clustering/Differentially expressed genes/DE Genes All Groups/Combined DE.txt", sep="\t") for (i in 1:(ncol(pvalue))){ factors <- cbind(largeDEList[,1:3,11], largeDEList[,3,i], avgList[,1:2,i]) colnames(factors) <- c("Genes", "GO Group", "Chronology", "pvalue", largeDEList[2,1,i], largeDEList[2,2,i]) write.table(factors, paste("/Users/Beloved/Desktop/Research/Data/Sequencing/Gene Clustering/Differentially expressed genes/DE Genes All Groups/DE", colnames(pvalue)[i], "Data.txt"), sep="\t") } pTableGO <- cbind(vsdB_tableGO,data.frame(pvalue),data.frame(GOlabel)) pTableGO.a05 <- pTableGO[pTableGO$pvalue < 0.05,] chronology <- c(1:nrow(pTableGO.a05)) expression <- rep("0",nrow(pTableGO.a05)) for (i in 1:nrow(pTableGO.a05)){ if((rowSums(pTableGO.a05[i,1:4])/rowSums(pTableGO.a05[i,5:8]))>1){ expression[i] <- "CD14N" } else { expression[i] <- "CD14P" } } pTableGO.a05 <- cbind(pTableGO.a05,data.frame(chronology),data.frame(expression)) # Order table according to ratio of average expression # pTableGO.a05 <- pTableGO.a05[order((pTableGO.a05$CD14P.Bio1.Tech1 + pTableGO.a05$CD14P.Bio1.Tech2 + pTableGO.a05$CD14P.Bio2.Tech1 + pTableGO.a05$CD14P.Bio2.Tech2) / (pTableGO.a05$CD31P14N.Bio1.Tech1 + pTableGO.a05$CD31P14N.Bio1.Tech2 + pTableGO.a05$CD31P14N.Bio2.Tech1 + pTableGO.a05$CD31P14N.Bio2.Tech2), decreasing = TRUE),] # Set table order back to default # pTableGO.a05 <- pTableGO.a05[order(pTableGO.a05$chronology, decreasing = FALSE),] # Order table by p-value pTableGO.a05 <- pTableGO.a05[order(pTableGO.a05$pvalue, decreasing = FALSE),] importantInfo <- cbind(rownames(pTableGO.a05),pTableGO.a05[,9:12]) write.table(importantInfo, "/Users/Beloved/Desktop/Research/Data/Sequencing/Gene Clustering/Differentially expressed genes/No Death Differential Genes and Category.txt",sep="\t") # write.xlsx(importantInfo, ) vsdB_tableGO <- pTableGO.a05[,1:8] # Create new count file of analyzed, transformed data # write.table(as.matrix(vsdB_tableGO), "/Users/Beloved/Desktop/Research/Data/Sequencing/CD31 CD14 Study/htseq-countFiles/transformedDataCountFile.txt",sep="\t") # orders table from comparativly high CD14+ to comparatively low CD31+14- vsdB_tableGO <- vsdB_tableGO[order((vsdB_tableGO$CD14P.Bio1.Tech1 + vsdB_tableGO$CD14P.Bio1.Tech2 + vsdB_tableGO$CD14P.Bio2.Tech1 + vsdB_tableGO$CD14P.Bio2.Tech2) / (vsdB_tableGO$CD31P14N.Bio1.Tech1 + vsdB_tableGO$CD31P14N.Bio1.Tech2 + vsdB_tableGO$CD31P14N.Bio2.Tech1 + vsdB_tableGO$CD31P14N.Bio2.Tech2), decreasing = TRUE),] # Create Heatmap # Create heatmap using new, reordered dendogram heatmap.2(as.matrix(vsdB_tableGO), dendrogram = "column", col=greenred(75), # scale goes from low (green) to high (red) scale="row", key=T, key.title = NA, keysize=1, density.info="none", na.rm = TRUE, labRow=FALSE, trace="none", cexRow = 0.75, # Size of row labels cexCol=0.75, # Size of column labels key.par = list(cex=0.5), # Size of words on Color Key margins=c(8,5), # Vector describing height and width for column and row names, respectively lmat=rbind(c(4,3),c(2,1)), lwid=c(0.5,2), # Width distribution of figure lhei=c(0.5,5), # Height distribution of figure Rowv = FALSE) # No ordering of rows grid.newpage() venn.RNA_Seq_MNC <- draw.quad.venn( area1 = 200, area2 = 240, area3 = 166, area4 = 273, n12 = 22, n13 = 47, n14 = 20, n23 = 65, n24 = 176, n34 = 36, n123 = 7, n124 = 16, n134 = 4, n234 = 23, n1234 = 2, category = c("CD31- (200)", "CD31+ (240)", "CD31+14- (166)", "CD31+14+ (273)"), fill = c("cornflower blue", "pink", "light green", "mediumorchid"), fontface = rep("plain", 15), fontfamily = rep("sans", 15), cat.fontface = rep("plain", 4), cat.fontfamily = rep("sans", 4), sclae = F, lty = "blank", cex = 2, cat.cex = 1.5 ) grid.newpage() venn.RNA_Seq_CD31N <- draw.triple.venn( area3 = 286, area1 = 241, area2 = 314, n13 = 134, n23 = 226, n12 = 129, n123 = 100, category = c("CD31+14- (241)", "CD31+14+ (314)", "CD31+ (286)"), fill = c("dark blue", "red", "yellow"), fontface = rep("plain", 7), fontfamily = rep("sans", 7), cat.fontface = rep("plain", 3), cat.fontfamily = rep("sans", 3), sclae = F, lty = "blank", cex = 2, cat.cex = 1.5 )