library(limma)
library(edgeR)

Warning message:
“package ‘limma’ was built under R version 3.4.2”

Read the count table

cc <- read.csv("counts.csv")

Do the table formatting

rownames(cc) <- cc[,2]
cc <- cc[, -c(1,2)]
o <- order(colnames(cc))
cc <- cc[, o]
ccl <- apply(cc, c(1,2), function(x) log2(x+1))

Adjust the column names

tmp <- colnames(cc)
colnames(cc) <- sapply(tmp, function(x) substr(x, 14, 22))
colnames(ccl) <- sapply(tmp, function(x) substr(x, 14, 22))

head(cc)

head(ccl)

boxplot(ccl)

Read the design/metadata

meta <- read.table("metadata.txt", sep="\t", header=T)
rownames(meta) <- as.character(meta[,1])
o <- order(rownames(meta))
meta <- meta[o, ]
meta

targets <- meta[, c("celline", "treatment")]
targets <- as.data.frame(targets)

targets

Create design object

Treat <- factor(paste(targets$celline,targets$treatment,sep="."))
design <- model.matrix(~0+Treat)
colnames(design) <- levels(Treat)

design

Do the dispesion estimate and model fitting

y <- DGEList(counts=cc)
y <- estimateDisp(y, design)
fit <- glmFit(y, design)

Set up the biological question with a contrast

cm <- makeContrasts( mfc.cnt-mfc.dac, levels=design)
cm

Do the model fit and get top genes

lrt <- glmLRT(fit, contrast=cm)
topOnes <- topTags(lrt, n=dim(cc)[1])$table

head(topOnes)

Simple pairwise comparison¶

cc2 <- cc[,1:6]
group <- factor(c(1,1,1,2,2,2))
y <- DGEList(counts=cc2,group=group)
y <- calcNormFactors(y)
design <- model.matrix(~group)
y <- estimateDisp(y,design)

fit <- glmFit(y,design)
lrt <- glmLRT(fit)
topOnes_pairwise <- topTags(lrt, n=dim(cc2)[1])$table

head(topOnes_pairwise)

length(intersect(rownames(topOnes)[1:500], rownames(topOnes_pairwise)[1:500]))

DESeq2 way

library(DESeq2)

cc2 <- cc[,1:6]
conds <- c("cnt","cnt","cnt",  "dac", "dac", "dac")
colData<-data.frame(condition=as.factor(conds))
dds <- DESeqDataSetFromMatrix(countData = cc2,colData = colData,design = formula(colData) ) 
dds <- DESeq(dds)
res <- results(dds)
o <- order(res[,"pvalue"])
res <- res[o, ]

estimating size factors
estimating dispersions
gene-wise dispersion estimates
mean-dispersion relationship
final dispersion estimates
fitting model and testing

DESeq2 and edgeR agree quite a lot

length(intersect(rownames(res)[1:500], rownames(topOnes_pairwise)[1:500]))

library(Vennerable)

i1 <- rownames(res)[1:500]
i2 <- rownames(topOnes_pairwise)[1:500]
i3 <- rownames(topOnes)[1:500]

vennD <- Venn ( list(i1, i2, i3), SetNames = c("DESeq2", "edgeR_pairwise","edgeR_glm"))
plot(vennD, doWeights = T, type = "circles")

	mfc_cnt_1	mfc_cnt_2	mfc_cnt_3	mfc_dac_1	mfc_dac_2	mfc_dac_3	yb5_cnt_1	yb5_cnt_2	yb5_cnt_3	yb5_dac_1	yb5_dac_2	yb5_dac_3
ENSG00000223972	11	18	55	46	57	56	59	35	32	278	203	215
ENSG00000227232	2089	2441	7559	3644	4172	4329	4055	2568	2568	6803	4957	5629
ENSG00000278267	0	0	0	0	0	0	0	0	0	0	0	0
ENSG00000243485	2	3	8	8	11	13	12	7	9	33	26	23
ENSG00000237613	0	0	0	0	0	0	1	0	0	2	1	0
ENSG00000268020	0	0	0	0	0	0	0	0	1	0	0	0

	mfc_cnt_1	mfc_cnt_2	mfc_cnt_3	mfc_dac_1	mfc_dac_2	mfc_dac_3	yb5_cnt_1	yb5_cnt_2	yb5_cnt_3	yb5_dac_1	yb5_dac_2	yb5_dac_3
ENSG00000223972	3.584963	4.247928	5.807355	5.554589	5.857981	5.832890	5.906891	5.169925	5.044394	8.124121	7.672425	7.754888
ENSG00000227232	11.029287	11.253847	12.884171	11.831703	12.026869	12.080151	11.985842	11.326991	11.326991	12.732167	12.275543	12.458919
ENSG00000278267	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000
ENSG00000243485	1.584963	2.000000	3.169925	3.169925	3.584963	3.807355	3.700440	3.000000	3.321928	5.087463	4.754888	4.584963
ENSG00000237613	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000	0.000000	0.000000	1.584963	1.000000	0.000000
ENSG00000268020	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	1.000000	0.000000	0.000000	0.000000

	FileName	celline	treatment
mfc_cnt_1	mfc_cnt_1	mfc	cnt
mfc_cnt_2	mfc_cnt_2	mfc	cnt
mfc_cnt_3	mfc_cnt_3	mfc	cnt
mfc_dac_1	mfc_dac_1	mfc	dac
mfc_dac_2	mfc_dac_2	mfc	dac
mfc_dac_3	mfc_dac_3	mfc	dac
yb5_cnt_1	yb5_cnt_1	yb5	cnt
yb5_cnt_2	yb5_cnt_2	yb5	cnt
yb5_cnt_3	yb5_cnt_3	yb5	cnt
yb5_dac_1	yb5_dac_1	yb5	cnt
yb5_dac_2	yb5_dac_2	yb5	cnt
yb5_dac_3	yb5_dac_3	yb5	cnt

	mfc.cnt	mfc.dac	yb5.cnt
1	1	0	0
2	1	0	0
3	1	0	0
4	0	1	0
5	0	1	0
6	0	1	0
7	0	0	1
8	0	0	1
9	0	0	1
10	0	0	1
11	0	0	1
12	0	0	1

	mfc.cnt - mfc.dac
mfc.cnt	1
mfc.dac	-1
yb5.cnt	0

	logFC	logCPM	LR	PValue	FDR
ENSG00000233276	-5.703099	5.916299	243.9207	5.493388e-55	3.185726e-50
ENSG00000141756	-2.501588	6.007440	193.9030	4.471368e-44	1.296518e-39
ENSG00000226383	-5.534574	3.531377	173.8707	1.056378e-39	2.042050e-35
ENSG00000164850	-2.078655	5.068626	173.1630	1.507899e-39	2.186152e-35
ENSG00000084207	-4.253757	8.232289	170.8907	4.727468e-39	5.483106e-35
ENSG00000163362	-2.521056	5.212185	151.6937	7.392005e-35	7.144619e-31

	logFC	logCPM	LR	PValue	FDR
ENSG00000130600	5.505566	9.008124	1041.4478	1.758697e-228	1.019903e-223
ENSG00000135480	4.606337	5.098251	792.1216	2.786028e-174	8.078366e-170
ENSG00000070985	6.610261	2.469754	625.4816	4.803161e-138	9.284831e-134
ENSG00000147174	3.857756	3.692567	606.1356	7.749302e-134	1.123494e-129
ENSG00000105974	-2.766143	5.721369	605.2399	1.213603e-133	1.407585e-129
ENSG00000198883	7.718994	2.154230	589.9396	2.582475e-130	2.496048e-126