## ----setup, message=FALSE, echo = FALSE---------------------------------------
library(limma)
library(edgeR)
options(timeout=1000)
url = "http://bioinf.wehi.edu.au/EGSEA/mam.rnaseq.rdata"
utils::download.file(url, destfile="mam.rnaseq.rdata", mode="wb")
load("mam.rnaseq.rdata")
names(mam.rnaseq.data)
dim(mam.rnaseq.data)
x = calcNormFactors(mam.rnaseq.data, method = "TMM")
design = model.matrix(~0+x$samples$group+x$samples$lane)
colnames(design) = gsub("x\\$samples\\$group", "", colnames(design))
colnames(design) = gsub("x\\$samples\\$lane", "", colnames(design))
head(design)
contr.matrix = makeContrasts(
         BasalvsLP = Basal-LP,
         BasalvsML = Basal - ML,
         LPvsML = LP - ML,
         levels = colnames(design))
head(contr.matrix)

## ----setup2, eval=TRUE--------------------------------------------------------
v = voom(x, design, plot=FALSE)
names(v)

## ----collections--------------------------------------------------------------
library(EGSEAdata)
egsea.data("mouse")

## ----collectionlist-----------------------------------------------------------
info = egsea.data("mouse", returnInfo = TRUE)
names(info)
info$msigdb$info$collections

## ----loadegsea, message=FALSE, warning=FALSE----------------------------------
library(EGSEA)

## ----indexing-----------------------------------------------------------------
gs.annots = buildIdx(entrezIDs=v$genes$ENTREZID, species="mouse", 
           msigdb.gsets=c("c2", "c5"), go.part = TRUE)
names(gs.annots)

## ----exploresets--------------------------------------------------------------
class(gs.annots$c2)
summary(gs.annots$c2)
show(gs.annots$c2)
s = getSetByName(gs.annots$c2, "SMID_BREAST_CANCER_LUMINAL_A_DN")
class(s)
names(s)
names(s$SMID_BREAST_CANCER_LUMINAL_A_DN)

## ----indexclass---------------------------------------------------------------
slotNames(gs.annots$c2)

## ----symbolmap----------------------------------------------------------------
colnames(v$genes)
symbolsMap = v$genes[, c(1, 2)]
colnames(symbolsMap) = c("FeatureID", "Symbols")
symbolsMap[, "Symbols"] = as.character(symbolsMap[, "Symbols"])

## ----base---------------------------------------------------------------------
egsea.base()

## ----selectbasemethods--------------------------------------------------------
baseMethods = egsea.base()[-2]
baseMethods

## ----combine------------------------------------------------------------------
egsea.combine()

## ----sort---------------------------------------------------------------------
egsea.sort()

## ----egseatest----------------------------------------------------------------
gsa = egsea(voom.results=v, contrasts=contr.matrix,  
         gs.annots=gs.annots, symbolsMap=symbolsMap,
         baseGSEAs=baseMethods, sort.by="med.rank",
         num.threads = 16, report = FALSE)

## ----showegsea----------------------------------------------------------------
show(gsa)

## ----summariseegsea-----------------------------------------------------------
summary(gsa)

## ----topsets------------------------------------------------------------------
topSets(gsa, gs.label="c2", contrast = "comparison", names.only=TRUE)

## ----topsetslim---------------------------------------------------------------
t = topSets(gsa, contrast = "comparison",
             names.only=FALSE, number = Inf, verbose = FALSE)
t[grep("LIM_", rownames(t)), c("p.adj", "Rank", "med.rank", "vote.rank")]

## ----topsets2-----------------------------------------------------------------
topSets(gsa, gs.label="kegg", contrast="BasalvsLP", sort.by="med.rank")
topSets(gsa, gs.label="kegg", contrast="comparison", sort.by="med.rank")

## ----heatmaps-----------------------------------------------------------------
plotHeatmap(gsa, gene.set="LIM_MAMMARY_STEM_CELL_UP", gs.label="c2",
         contrast = "comparison", file.name = "hm_cmp_LIM_MAMMARY_STEM_CELL_UP", format="png")
plotHeatmap(gsa, gene.set="LIM_MAMMARY_STEM_CELL_DN", gs.label="c2",
         contrast = "comparison", file.name = "hm_cmp_LIM_MAMMARY_STEM_CELL_DN", format="png")

## ----pathwayplot1, eval=FALSE-------------------------------------------------
#  plotPathway(gsa, gene.set = "Vascular smooth muscle contraction",
#               contrast = "BasalvsLP", gs.label = "kegg",
#               file.name = "Vascular_smooth_muscle_contraction")

## ----pathwayplot2, eval=FALSE-------------------------------------------------
#  plotPathway(gsa, gene.set = "Vascular smooth muscle contraction",
#               contrast = "comparison", gs.label = "kegg",
#               file.name = "Vascular_smooth_muscle_contraction_cmp")

## ----mdsplot------------------------------------------------------------------
plotMethods(gsa, gs.label = "c2", contrast = "BasalvsLP", 
         file.name = "mds_c2_BasalvsLP", format="png")
plotMethods(gsa, gs.label = "c5BP", contrast = "BasalvsLP", 
         file.name = "mds_c5_BasalvsLP", format="png")

## ----keggsummaryplot1---------------------------------------------------------
plotSummary(gsa, gs.label = 3, contrast = 3, 
         file.name = "summary_kegg_LPvsML", format="png")

## ----c2summaryplot2-----------------------------------------------------------
plotSummary(gsa, gs.label = 1, contrast = 3, 
         file.name = "summary_c2_LPvsML", 
         x.axis = "med.rank", format="png")

## ----c2summaryplot3-----------------------------------------------------------
plotSummary(gsa, gs.label = 1, contrast = 3, 
         file.name = "summary_sig_c2_LPvsML", 
         x.axis = "med.rank", x.cutoff=300, format="png")

## ----summaryplotkegg1and2-----------------------------------------------------
plotSummary(gsa, gs.label = "kegg", contrast = c(1,2), 
         file.name = "summary_kegg_1vs2", format="png")

## ----gographs-----------------------------------------------------------------
plotGOGraph(gsa, gs.label="c5BP", contrast = 1, file.name="BasalvsLP-c5BP-top-", format="png")
plotGOGraph(gsa, gs.label="c5CC", contrast = 1, file.name="BasalvsLP-c5CC-top-", format="png")

## ----summarybarplot-----------------------------------------------------------
plotBars(gsa, gs.label = "c2", contrast="comparison", file.name="comparison-c2-bars", format="png")

## ----summaryheatmap-----------------------------------------------------------
plotSummaryHeatmap(gsa, gs.label="c2", hm.vals = "avg.logfc.dir",
         file.name="summary_heatmaps_c2", format="png")
plotSummaryHeatmap(gsa, gs.label="kegg", hm.vals = "avg.logfc.dir",
         file.name="summary_heatmaps_kegg", format="png")

## ----toptable-----------------------------------------------------------------
t = limmaTopTable(gsa, contrast=1)
head(t)

## ----htmlreport, warning=FALSE, eval=FALSE------------------------------------
#  generateReport(gsa, number = 20, report.dir="./mam-rnaseq-egsea-report")

## ----mareadidats, eval=TRUE---------------------------------------------------
library(limma)
library(illuminaio)
options(timeout=1000)
url = "http://bioinf.wehi.edu.au/EGSEA/arraydata.zip"
utils::download.file(url, destfile="arraydata.zip", mode="wb")
utils::unzip("arraydata.zip", exdir = ".")
targets = read.delim("targets.txt", header=TRUE, sep=" ")
data = read.idat(as.character(targets$File), 
                   bgxfile="GPL6887_MouseWG-6_V2_0_R0_11278593_A.bgx",
                   annotation=c("Entrez_Gene_ID","Symbol", "Chromosome"))
data$other$Detection = detectionPValues(data)
data$targets = targets
colnames(data) = targets$Sample

## ----manormalize, eval=TRUE---------------------------------------------------
data = neqc(data)

## ----mafilter, eval=TRUE------------------------------------------------------
table(targets$Celltype)
keep.exprs = rowSums(data$other$Detection<0.05)>=5
table(keep.exprs)
data = data[keep.exprs,]
dim(data)
head(data$genes)
sum(is.na(data$genes$Entrez_Gene_ID))
data1 = data[!is.na(data$genes$Entrez_Gene_ID), ]
dim(data1)
ord = order(lmFit(data1)$Amean, decreasing=TRUE)
ids2keep = data1$genes$Array_Address_Id[ord][!duplicated(data1$genes$Entrez_Gene_ID[ord])]
data1 = data1[match(ids2keep, data1$genes$Array_Address_Id),]
dim(data1)

expr = data1$E
group = as.factor(data1$targets$Celltype)
probe.annot = data1$genes[, 2:4]
head(probe.annot)

## ----malinearmodel, eval=TRUE-------------------------------------------------
head(data1$targets)
experiment = as.character(data1$targets$Experiment)
design = model.matrix(~0 + group + experiment)
colnames(design) = gsub("group", "", colnames(design))
design
contr.matrix = makeContrasts(
         BasalvsLP = Basal-LP,
         BasalvsML = Basal-ML,
         LPvsML = LP-ML,
         levels = colnames(design))
contr.matrix

## ----maindex, eval=FALSE------------------------------------------------------
#  library(EGSEA)
#  library(EGSEAdata)
#  gs.annots = buildIdx(entrezIDs=probe.annot[, 2],
#               species="mouse",
#               msigdb.gsets=c("c2", "c5"), go.part = TRUE)
#  names(gs.annots)

## ----maegsea, eval=FALSE------------------------------------------------------
#  baseMethods = egsea.base()[-2]
#  baseMethods
#  
#  gsam = egsea.ma(expr=expr, group=group,
#   		  	 probe.annot = probe.annot,
#   		  	 design = design,
#           contrasts=contr.matrix,
#           gs.annots=gs.annots,
#           baseGSEAs=baseMethods, sort.by="med.rank",
#           num.threads = 8, report = FALSE)

## ----mareport, warning=FALSE, eval=FALSE--------------------------------------
#  generateReport(gsam, number = 20, report.dir="./mam-ma-egsea-report")

## ----matopsets, eval=FALSE----------------------------------------------------
#  topSets(gsam, gs.label="c2", contrast="comparison", names.only=TRUE, number=5)

## ----softwareinfo-------------------------------------------------------------
sessionInfo()