calculating p-value for GO terms
1
0
Entering edit mode
5 months ago

Hello scholars I have a list of GO ID, GO term, and gene ID. I want to analyze gene enrichment via topGO. but I found out, I need the p-values for my gene list. Through which package or script can I get the p-value for my gene list?

GO_ID • 575 views
ADD COMMENT
1
Entering edit mode
5 months ago

You don't necessarily need p-values for TopGO, see this previous answer, specifically to look at the 'Predefined list of interesting genes' With No P Values Of Gene Difference Expression, How Could I Do Go Analysis With R Package Gostats? (in fact, there are many similar questions to yours on biostars :) )

ADD COMMENT
0
Entering edit mode

thank you so much I used the following script:

library(topGO)
library(AnnotationDbi)
library(S4Vectors)
library(ALL)
library(Rgraphviz)
library(ggplot2)
library(RColorBrewer)
library(wesanderson)
library(viridis)
library(tidyverse)
data(ALL)
data(geneList)
setwd("C:/Users/azadeh/Desktop/PHD_thesis/resaults/topGO")
gene.go <- read.delim("GO_Gene.txt", stringsAsFactors = F)

gene.go <- gene.go[which(gene.go$GO != ""),] # take out genes without GO terms
head(gene.go)
gene2GO <- tapply(gene.go$GO, gene.go$geneID, function(x)x)
head(gene2GO)


#gene list of chromosome ...###
#Enter the gene of the desired list as a list of characters 
x <- c(1:6)
print(x)
#Enter the gene of the desired list as a list of characters 
##belowe methods make a vector from your gene list because GOdata needs vector format to calculate GO enrichment
names(x) <-  c ("CGRA01v4_09953",
                "CGRA01v4_09952",
                "CGRA01v4_09951",
                "CGRA01v4_09950",
                "CGRA01v4_09949",
                "CGRA01v4_09948")


all_genes<- x
print(all_genes)



#prepare GOdata according to Biological Process####

GOdata <- new("topGOdata",
              ontology = "BP",
              allGenes = all_genes,
              geneSelectionFun = function(x)x,
              annot =  annFUN.gene2GO, gene2GO=gene2GO)
print(GOdata)

resultFisher <- runTest(GOdata, algorithm = "elim", statistic = "fisher")

tab <- GenTable(GOdata, raw.p.value = resultFisher, topNodes = length(resultFisher@score),
                numChar = 120)
head(tab)

# Kolmogorov-Smirnov test for Biological Process####
resultKS <- runTest(GOdata, algorithm = "weight01", statistic = "ks")
tabKS <- GenTable(GOdata, raw.p.value = resultKS, topNodes = length(resultKS@score), numChar = 120)
head(tabKS, 50)
write.csv(tabKS, 'C:/Users/azadeh/Desktop/PHD_thesis/resaults/topGO/chr6_KSresults.csv')

resultKS_1 <- runTest(GOdata, algorithm = "classic", statistic = "ks")
resultKS.elim <- runTest(GOdata, algorithm = "elim", statistic = "ks")
allRes <- GenTable(GOdata, classicFisher = resultFisher, classicKS = resultKS.elim, elimKS = resultKS.elim, orderBy = "elimKS", ranksOf = "classicFisher", topNodes = 10)
head(allRes, 70)
write.csv(allRes, 'C:/Users/azadeh/Desktop/PHD_thesis/resaults/topGO/chr6_elim.KSresults.csv')

par(cex = 0.3)
showSigOfNodes(GOdata, score(resultKS.elim), firstSigNodes = 20, useInfo = "all")

#visualize GOTerms####
goEnrichment <- GenTable(
  GOdata,
  resultKS.elim = resultKS.elim,
  orderBy = "resultKS",
  topNodes = length(resultKS.elim@score),
  numChar = 82)

goEnrichment$resultKS.elim <- as.numeric(goEnrichment$resultKS.elim)
goEnrichment <- goEnrichment[goEnrichment$resultKS.elim < 1,] # filter terms for Fisher p<0.1
goEnrichment <- goEnrichment[,c("GO.ID","Term","resultKS.elim")]

# Create a data frame
goEnrichment <- as.data.frame(goEnrichment)
# Use mutate to add a new column
goEnrichment <- goEnrichment %>% mutate(ontology = c("BP"))
print(goEnrichment)

write.csv(goEnrichment, 'C:/Users/azadeh/Desktop/PHD_thesis/resaults/topGO/chr6_goEnrichment.csv')





ntop <- 20
ggdata <- goEnrichment[1:ntop,]
ggdata$Term <- factor(ggdata$Term, levels = rev(ggdata$Term)) # fixes order
gg1 <- ggplot(ggdata,
              aes(x = Term, y = -log10(resultKS.elim), size =frequency(Term), fill = -log10(resultKS.elim)))+
  expand_limits(y = 1) +
  geom_point(shape = 21) +
  scale_size(range = c(2.5,12.5)) +
  scale_fill_continuous(low = 'royalblue', high = 'red4') +

  xlab('') + ylab('Enrichment score') +
  labs(
    title = 'GO Biological processes chromosome 6',
    subtitle = 'Top 20 terms ordered by Kolmogorov-Smirnov p-value',
    caption = 'Cut-off lines drawn at equivalents of p=0.01, p=0.001, p=0.0001') +

  geom_hline(yintercept = c(-log10(0.01), -log10(0.001), -log10(0.0001)),
             linetype = c("dotted", "longdash", "solid"),
             colour = c("black", "black", "black"),
             size = c(0.5, 1.5, 3)) +

  theme_bw(base_size = 24) +
  theme(
    legend.position = 'right',
    legend.background = element_rect(),
    plot.title = element_text(angle = 0, size = 16, face = 'bold', vjust = 1),
    plot.subtitle = element_text(angle = 0, size = 14, face = 'bold', vjust = 1),
    plot.caption = element_text(angle = 0, size = 12, face = 'bold', vjust = 1),

    axis.text.x = element_text(angle = 0, size = 12, face = 'bold', hjust = 1.10),
    axis.text.y = element_text(angle = 0, size = 12, face = 'bold', vjust = 0.5),
    axis.title = element_text(size = 12, face = 'bold'),
    axis.title.x = element_text(size = 12, face = 'bold'),
    axis.title.y = element_text(size = 12, face = 'bold'),
    axis.line = element_line(colour = 'black'),

    #Legend
    legend.key = element_blank(), # removes the border
    legend.key.size = unit(2, "cm"), # Sets overall area/size of the legend
    legend.text = element_text(size = 14, face = "bold"), # Text size
    title = element_text(size = 14, face = "bold")) +

  coord_flip()

print(gg1)
ADD REPLY

Login before adding your answer.

Traffic: 1857 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6