library(limma)
library(edgeR)
Read the count table
cc <- read.csv("counts.csv")
Do the table formatting
rownames(cc) <- cc[,2]
cc <- cc[, -c(1,2)]
o <- order(colnames(cc))
cc <- cc[, o]
ccl <- apply(cc, c(1,2), function(x) log2(x+1))
Adjust the column names
tmp <- colnames(cc)
colnames(cc) <- sapply(tmp, function(x) substr(x, 14, 22))
colnames(ccl) <- sapply(tmp, function(x) substr(x, 14, 22))
head(cc)
head(ccl)
boxplot(ccl)
Read the design/metadata
meta <- read.table("metadata.txt", sep="\t", header=T)
rownames(meta) <- as.character(meta[,1])
o <- order(rownames(meta))
meta <- meta[o, ]
meta
targets <- meta[, c("celline", "treatment")]
targets <- as.data.frame(targets)
targets
Create design object
Treat <- factor(paste(targets$celline,targets$treatment,sep="."))
design <- model.matrix(~0+Treat)
colnames(design) <- levels(Treat)
design
Do the dispesion estimate and model fitting
y <- DGEList(counts=cc)
y <- estimateDisp(y, design)
fit <- glmFit(y, design)
Set up the biological question with a contrast
cm <- makeContrasts( mfc.cnt-mfc.dac, levels=design)
cm
Do the model fit and get top genes
lrt <- glmLRT(fit, contrast=cm)
topOnes <- topTags(lrt, n=dim(cc)[1])$table
head(topOnes)
cc2 <- cc[,1:6]
group <- factor(c(1,1,1,2,2,2))
y <- DGEList(counts=cc2,group=group)
y <- calcNormFactors(y)
design <- model.matrix(~group)
y <- estimateDisp(y,design)
fit <- glmFit(y,design)
lrt <- glmLRT(fit)
topOnes_pairwise <- topTags(lrt, n=dim(cc2)[1])$table
head(topOnes_pairwise)
length(intersect(rownames(topOnes)[1:500], rownames(topOnes_pairwise)[1:500]))
DESeq2 way
library(DESeq2)
cc2 <- cc[,1:6]
conds <- c("cnt","cnt","cnt", "dac", "dac", "dac")
colData<-data.frame(condition=as.factor(conds))
dds <- DESeqDataSetFromMatrix(countData = cc2,colData = colData,design = formula(colData) )
dds <- DESeq(dds)
res <- results(dds)
o <- order(res[,"pvalue"])
res <- res[o, ]
DESeq2 and edgeR agree quite a lot
length(intersect(rownames(res)[1:500], rownames(topOnes_pairwise)[1:500]))
library(Vennerable)
i1 <- rownames(res)[1:500]
i2 <- rownames(topOnes_pairwise)[1:500]
i3 <- rownames(topOnes)[1:500]
vennD <- Venn ( list(i1, i2, i3), SetNames = c("DESeq2", "edgeR_pairwise","edgeR_glm"))
plot(vennD, doWeights = T, type = "circles")