## ----options, results="hide", include=FALSE, cache=FALSE, results='hide', message=FALSE----
knitr::opts_chunk$set(fig.align="center", cache=FALSE,error=FALSE, #make it stop on error
fig.width=7, fig.height=7, autodep=TRUE, out.width="600px", out.height="600px", results="markup", echo=TRUE, eval=TRUE)
#knitr::opts_knit$set(stop_on_error = 2L) #really make it stop
#knitr::dep_auto()
options(getClass.msg=FALSE) #get rid of annoying messages about cache until fixed internally in R

set.seed(98883) ## for reproducibility

library(bioc2016singlecell)

## for now load individual dependencies
library(clusterExperiment) ## use develop for now
library(SummarizedExperiment)
library(cluster)
library(MAST)

## ----datain, eval=TRUE---------------------------------------------------
data(scone_res)
data(ws_input)

# Summarized Experiment
se <- SummarizedExperiment(list(counts=norm_logcounts),
                           colData=data.frame(batch=batch[colnames(norm_logcounts)],
                                              time_points=bio[colnames(norm_logcounts)]))

## ----pca-----------------------------------------------------------------
pca <- prcomp(t(assay(se)), center=TRUE, scale=TRUE)
plot(pca$x, pch=19, col=bigPalette[bio])
legend("topleft", levels(bio), fill=bigPalette)

res1 <- pam(pca$x[,1:2], k=3)

table(res1$clustering)

res2 <- pam(pca$x[,1:3], k=3)

table(res2$clustering)

plot(pca$sdev^2/sum(pca$sdev^2), xlab="PC", ylab="Percent of explained variance")

res3 <- pam(pca$x[,1:6], k=3)

table(res3$clustering)

## ----pca_cm--------------------------------------------------------------
cm <- clusterMany(se, dimReduce="PCA",  nPCADims=c(2, 3, 6), 
                  ks = 3, clusterFunction = "pam")
cm
apply(clusterMatrix(cm), 2, table)

## ----plot_cm-------------------------------------------------------------
defaultMar <- par("mar")
plotCMar <- c(1.1,8.1,4.1,1.1)
par(mar=plotCMar)

plotClusters(cm)

## ----combine_cm----------------------------------------------------------
cm <- combineMany(cm)
plotClusters(cm)
cm

## ----subsample-----------------------------------------------------------
cl3 <- clusterSingle(se, subsample = TRUE, sequential = FALSE, 
                     clusterFunction = c("hierarchical01"), 
                     clusterDArgs = list(minSize=5, alpha=0.3), 
                     subsampleArgs = list("k"=3,"clusterFunction"="pam"),
                     dimReduce = c("PCA"), ndims=10)

plotCoClustering(cl3)

cl7 <- clusterSingle(se, subsample = TRUE, sequential = FALSE, 
                     clusterFunction = c("hierarchical01"), 
                     clusterDArgs = list(minSize=5, alpha=0.3),
                     subsampleArgs = list("k"=7,"clusterFunction"="pam"),
                     dimReduce = c("PCA"), ndims=10)

plotCoClustering(cl7)

## ----sequential----------------------------------------------------------
cl <- clusterSingle(se, subsample = TRUE, sequential = TRUE, 
                     clusterFunction = c("hierarchical01"), 
                     clusterDArgs = list(minSize=5, alpha=0.3),
                     subsampleArgs = list("clusterFunction"="pam"),
                     seqArgs = list(k0=5, remain.n=10, top.can=5),
                     dimReduce = c("PCA"), ndims=10)
cl

## ----rsec----------------------------------------------------------------
rs <- RSEC(se, k0s = 4:15, dimReduce = "PCA", nPCADims=c(10, 20), 
           alphas=c(0.2, 0.3), clusterFunction="hierarchical01", betas=0.7,
           combineProportion=0.5, combineMinSize=3,
           dendroReduce = "mad", dendroNDims = 1000,
           mergeMethod = "adjP", mergeCutoff=0.01, 
           seqArgs = list(remain.n=10, top.can=5))
rs

## ----plotClusterEx1------------------------------------------------------
par(mar=plotCMar)
plotClusters(rs, main="Clusters from RSEC", axisLine=-1,
             sampleData=c("time_points", "batch"))

## ----plotCoClustering----------------------------------------------------
plotCoClustering(rs)

## ----clusterMatrix-------------------------------------------------------
head(clusterMatrix(rs)[,1:3])
table(primaryClusterNamed(rs))

## ----makeDendrogram------------------------------------------------------
manual <- makeDendrogram(rs, whichCluster = "combineMany", dimReduce="mad", ndims=1000)
plotDendrogram(manual)

## ----mergeClustersPlot---------------------------------------------------
mergeClusters(manual, mergeMethod="adjP", plot="adjP", cutoff=0.01)

## ----mergeClusters-------------------------------------------------------
manual <- mergeClusters(manual, mergeMethod="adjP", plot="none", cutoff=0.01)
par(mar=plotCMar)
plotClusters(manual, whichClusters = c("mergeClusters", "combineMany"))
plotCoClustering(manual, whichClusters=c("mergeClusters","combineMany"))

## ----dendro_merge--------------------------------------------------------
rs <- makeDendrogram(rs, dimReduce="mad", ndims=1000)

## set good breaks for heatmap colors
breaks <- c(min(norm_logcounts), seq(0, quantile(norm_logcounts[norm_logcounts > 0], .99, na.rm = TRUE), length = 50), max(norm_logcounts))

## ----getBestFeatures-----------------------------------------------------
genesF <- getBestFeatures(rs, contrastType="F", number=500, isCount=FALSE)
head(genesF)

## ----getBestFeatures_heatmap---------------------------------------------
plotHeatmap(rs, clusterSamplesData="dendrogramValue",
            clusterFeaturesData=unique(genesF[,"IndexInOriginal"]),
            main="F statistics",
            breaks=breaks, sampleData=c("time_points"))

## ----pairwise------------------------------------------------------------
genesPairs <- getBestFeatures(rs, contrastType="Pairs", number=50, isCount=FALSE)

plotHeatmap(rs, clusterSamplesData="dendrogramValue",
            clusterFeaturesData=unique(genesPairs[,"IndexInOriginal"]),
            main="All pairwise comparisons",
            breaks=breaks, sampleData=c("time_points"))

## ----one_all-------------------------------------------------------------
genesOneAll <- getBestFeatures(rs, contrastType="OneAgainstAll", number=50, isCount=FALSE)

plotHeatmap(rs, clusterSamplesData="dendrogramValue",
            clusterFeaturesData=unique(genesOneAll[,"IndexInOriginal"]),
            main="One versus All",
            breaks=breaks, sampleData=c("time_points"))

## ----dendro--------------------------------------------------------------
genesDendro <- getBestFeatures(rs, contrastType="Dendro", number=50, isCount=FALSE)

plotHeatmap(rs, clusterSamplesData="dendrogramValue",
            clusterFeaturesData=unique(genesDendro[,"IndexInOriginal"]),
            main="Constrasts based on dendrogram",
            breaks=breaks, sampleData=c("time_points"))


## ----get_contrasts-------------------------------------------------------
## get contrasts from clusterExperiment object
contrMat <- clusterContrasts(rs, contrastType="Dendro")$contrastMatrix
wh_rm <- which(primaryCluster(rs)==-1)

## ----mast----------------------------------------------------------------
## threshold data to apply MAST
norm <- transform(rs)
norm[norm<0] <- 0

## create MAST object
sca <- FromMatrix("SingleCellAssay", t(norm[,-wh_rm]), cData=data.frame(Clusters=primaryClusterNamed(rs)[-wh_rm]))

## model fit
fit <- zlm(~0+Clusters, sca)

## hp test
cont_levels <- paste("Clustersm", 1:max(primaryCluster(rs)), sep="")
mast_res <- lapply(1:ncol(contrMat), function(i) {
  cont <- gsub("X", "Clustersm", colnames(contrMat))[i]
  hp <- Hypothesis(cont, cont_levels)
  wald <- waldTest(fit, hp)
  retval <- data.frame(Feature=rownames(wald), ContrastName=paste("Node", i, sep=""), Contrast=colnames(contrMat)[i], P.Value=wald[,1,3], stringsAsFactors = FALSE)
  retval <- retval[order(retval$P.Value),]
  return(retval[1:50,])
})

mast_res <- do.call(rbind, mast_res)

plotHeatmap(rs, clusterSamplesData="dendrogramValue",
            clusterFeaturesData=unique(mast_res$Feature),
            main="Constrasts based on dendrogram",
            breaks=breaks, sampleData=c("time_points"))

## ----session-------------------------------------------------------------
sessionInfo()