I am a graduate student from India. I have been using tool, SPIA on my lung cancer data downloaded from TCGA to analyze pathway perturbation. It is a RNA- Seq FPKM normalized data. I have further applied between array normalization using EdgeR bioconductor package and used limma to obtain logFC and P-value. The problem is when I run the "spia" function, the results that I get have FDR=1 for all pathways, pSize and pNDE values are equal in each pathway. Below is the codes I've used for limma:
args = commandArgs(TRUE)
control_file = "CONTROL.csv";
disease_file = "LUAD1.csv";
DEfile = "DELUAD1.tsv";
suppressMessages(library(limma));
options(warn=-1)
DE <- function(data1, data2)
{
merged12 = merge(data1,data2,by="row.names")
rn = merged12[,1]
rownames(merged12) = rn;
Eset = ExpressionSet(assayData=as.matrix(merged12[-1]))
ID <- featureNames(Eset)
#Symbol <- getSYMBOL(ID, "org.Hs.eg.db")
tmp <- data.frame(ID=ID, Symbol=ID, stringsAsFactors = F)
# Clean up
tmp[tmp=="NA"] <- NA
fData(Eset) <- tmp
# Clean up
fvarLabels(Eset) <- make.names(fvarLabels(Eset))
sml=NULL
for(i in 1:dim(data1)[[2]]) { sml = append(sml,"G0")}
for(i in 1:dim(data2)[[2]]) { sml = append(sml,"G1")}
ex <- exprs(Eset)
# set up the data and proceed with analysis
fl <- as.factor(sml)
Eset$description <- fl
design <- model.matrix(~ description + 0, Eset)
colnames(design) <- levels(fl)
fit <- lmFit(Eset, design)
cont.matrix <- makeContrasts(G1-G0, levels=design)
fit2 <- contrasts.fit(fit, cont.matrix)
fit2 <- eBayes(fit2, 0.01)
gene_list <- topTable(fit2, coef=1, number=1000000, sort.by="logFC")
write.table (gene_list, file = DEfile, sep = "\t", row.names=F, quote=F);
return (gene_list)
}
df1= as.data.frame(read.table(file="CONTROL.csv", sep = " ", header=T, row.names=1))
df2= as.data.frame(read.csv(file="LUAD1.csv", sep = ",", header=T,row.names=1))
message("Removing non-variable genes in each dataset...")
library(genefilter)
X = transform(as.matrix(df1), SD=rowSds(as.matrix(df1), na.rm=TRUE)); ## GETTING THE SD OF EACH ROW I.E. GENE
names = rownames(df1); ## GETTING THE IDS OF ALL GENES
SAMPLE1 = X[names[X$SD > 0.000001],]; ## GETTING THE GENEIDS AND THEIR EXPRESSION VALUES IN ALL SAMPLES IF THE SD OF EXPRESSION VALUES IS > 0
SAMPLE1$SD <- NULL
r1 = rownames(SAMPLE1);
r1 = rownames(df1)
X = transform(as.matrix(df2), SD=rowSds(as.matrix(df2), na.rm=TRUE)); ## GETTING THE SD OF EACH ROW I.E. GENE
names = rownames(df2); ## GETTING THE IDS OF ALL GENES
SAMPLE2 = X[names[X$SD > 0.000001],]; ## GETTING THE GENEIDS AND THEIR EXPRESSION VALUES IN ALL SAMPLES IF THE SD OF EXPRESSION VALUES IS > 0
SAMPLE2$SD <- NULL
r2 = rownames(SAMPLE2);
genes = intersect(r1,r2); ## getting the list of common genes present in both the datasets
S1 = SAMPLE1[genes,];
S2 = SAMPLE2[genes,];
library(Biobase)
DELUAD1 = DE(S2,S1)
and the code for SPIA is:
library(SPIA)
options(digits=3)
head(DELUAD1)
tg1<-DELUAD1[DELUAD1$adj.P.Val<0.1,]
DE_LUADI=tg1$logFC
names(DE_LUADI)<-as.vector(tg1$ID)
ALL_LUADI=tg1$ID
DE_LUADI[1:10]
ALL_LUADI[1:10]
# pathway analysis based on combined evidence; # use nB=2000 or more for more accurate results
res=spia(de=DE_LUADI,all=ALL_LUADI,organism="hsa",nB=2000,plots=T,beta=NULL,combine="fisher",verbose=FALSE)
#show first 15 pathways, omit KEGG links
res[1:20,-12]
The results looks like this:
Name ID pSize NDE pNDE tA pPERT pG pGFdr pGFWER Status
1 Olfactory transduction 04740 290 290 1 -1710.8 0.002 0.0144 1 1 Inhibited
2 Serotonergic synapse 04726 71 71 1 -114.1 0.009 0.0514 1 1 Inhibited
3 Antigen processing and presentation 04612 49 49 1 609.8 0.011 0.0606 1 1 Activated
4 Systemic lupus erythematosus 05322 95 95 1 458.3 0.013 0.0695 1 1 Activated
5 Staphylococcus aureus infection 05150 36 36 1 833.3 0.024 0.1135 1 1 Activated
6 Natural killer cell mediated cytotoxicity 04650 74 74 1 -736.4 0.032 0.1421 1 1 Inhibited
7 Legionellosis 05134 32 32 1 336.6 0.035 0.1523 1 1 Activated
8 Progesterone-mediated oocyte maturation 04914 48 48 1 344.0 0.037 0.1590 1 1 Activated
9 Fanconi anemia pathway 03460 30 30 1 -12.8 0.038 0.1623 1 1 Inhibited
10 Pertussis 05133 46 46 1 368.6 0.039 0.1655 1 1 Activated
Please help as to where I am going wrong. It'll be a great help. I have been struggling on it from a very long time.
It solved the error. Thank you!