## ----style, echo=FALSE, results='asis'-----------------------------------
BiocStyle::markdown()

## ----setup, echo=FALSE, results='hide'-----------------------------------
library(knitr)
opts_chunk$set(cache=TRUE, error=FALSE)

## ----vectorize-----------------------------------------------------------
x <- 1:10
log(x)     ## NOT for (i in seq_along) x[i] <- log(x[i])

## ----pre-allocate--------------------------------------------------------
result <- numeric(10)
result[1] <- runif(1)
for (i in 2:length(result))
       result[i] <- runif(1) * result[i - 1]
result

## ----inefficient---------------------------------------------------------
f0 <- function(n, a=2) {
    ## stopifnot(is.integer(n) && (length(n) == 1) &&
    ##           !is.na(n) && (n > 0))
    result <- numeric()
    for (i in seq_len(n))
        result[[i]] <- a * log(i)
    result
}

## ----system-time---------------------------------------------------------
system.time(f0(10000))
n <- 1000 * seq(1, 20, 2)
t <- sapply(n, function(i) system.time(f0(i))[[3]])
plot(t ~ n, type="b")

## ----correct-init--------------------------------------------------------
n <- 10000
system.time(expected <- f0(n))
head(expected)

## ----hoist---------------------------------------------------------------
f1 <- function(n, a=2) {
    result <- numeric()
    for (i in seq_len(n))
        result[[i]] <- log(i)
    a * result
}
identical(expected, f1(n))

library(microbenchmark)
microbenchmark(f0(n), f1(n), times=5)

## ----preallocate-and-fill------------------------------------------------
f2 <- function(n, a=2) {
    result <- numeric(n)
    for (i in seq_len(n))
        result[[i]] <- log(i)
    a * result
}
identical(expected, f2(n))
microbenchmark(f0(n), f2(n), times=5)

## ----use-apply-----------------------------------------------------------
f3 <- function(n, a=2)
    a * sapply(seq_len(n), log)

identical(expected, f3(n))
microbenchmark(f0(n), f2(n), f3(n), times=10)

## ----use-vectorize-------------------------------------------------------
f4 <- function(n, a=2)
    a * log(seq_len(n))
identical(expected, f4(n))
microbenchmark(f0(n), f3(n), f4(n), times=10)

## ----vectorized-scale----------------------------------------------------
n <- 10 ^ (5:8)                         # 100x larger than f0
t <- sapply(n, function(i) system.time(f4(i))[[3]])
plot(t ~ n, log="xy", type="b")

## ----reduceByYield-setup-------------------------------------------------
suppressPackageStartupMessages({
    library(GenomicFiles)
    library(GenomicAlignments)
    library(Rsamtools)
    library(TxDb.Hsapiens.UCSC.hg19.knownGene)
})

yield <-     # how to input the next chunk of data
    function(X, ...)
{
    readGAlignments(X)
}

map <-       # what to do to each chunk
    function(VALUE, ..., roi)
{
    olaps <- findOverlaps(VALUE, roi, type="within", ignore.strand=TRUE)
    count <- tabulate(subjectHits(olaps), subjectLength(olaps))
    setNames(count, names(roi))
}

reduce <- `+`   # how to combine mapped chunks

## ----yieldFactory--------------------------------------------------------
yieldFactory <-  # return a function with local state 
    function() 
{
    n_records <- 0L
    function(X, ...) {
        aln <- readGAlignments(X)
        n_records <<- n_records + length(aln)
        message(n_records)
        aln
    }
}

## ----count-overlaps-roi, eval=FALSE--------------------------------------
#  exByTx <- exonsBy(TxDb.Hsapiens.UCSC.hg19.knownGene, "tx")
#  
#  fl <- "/home/ubuntu/data/vobencha/LargeData/srarchive/hg19_alias.tab"
#  map0 <- read.delim(fl, header=FALSE, stringsAsFactors=FALSE)
#  seqlevels(exByTx, force=TRUE) <- setNames(map0$V1, map0$V2)

## ----count-overlaps, eval=FALSE------------------------------------------
#  count1 <- function(filename, roi) {
#      message(filename)
#      ## Create and open BAM file
#      bf <- BamFile(filename, yieldSize=1000000)
#      reduceByYield(bf, yieldFactory(), map, reduce, roi=roi)
#  }

## ----count-overlaps-doit, eval=FALSE-------------------------------------
#  bam <- "/home/ubuntu/data/vobencha/LargeData/srarchive/SRR1039508_sorted.bam"
#  count <- count1(bam, exByTx)

## ----rtracklayer-file-classes--------------------------------------------
## rtracklayer menagerie
suppressPackageStartupMessages(library(rtracklayer))
names(getClass("RTLFile")@subclasses)

## ----parallel-sleep------------------------------------------------------
library(BiocParallel)

fun <- function(i) {
    Sys.sleep(1)
    i
}

## serial
f0 <- function(n)
    lapply(seq_len(n), fun)

## parallel
f1 <- function(n)
    bplapply(seq_len(n), fun)

## ----parallel-bpredo-param-----------------------------------------------
param <- MulticoreParam(workers = 3)

## ----parallel-bpredo-bplapply, error=TRUE--------------------------------
X <- list(1, "2", 3)
res <- bplapply(X, sqrt, BPPARAM = param)

## ----parallel-bptry------------------------------------------------------
res <- bptry(bplapply(X, sqrt, BPPARAM=param))
res

## ----parallel-bpredo-----------------------------------------------------
X.redo <- list(1, 2, 3)
bplapply(X.redo, sqrt, BPREDO = res)

## ----parallel-debug, eval=FALSE------------------------------------------
#  > fun = function(i) { browser(); sqrt(i) }
#  > bplapply(X, fun, BPREDO=res, BPPARAM=SerialParam())
#  resuming previous calculation ...
#  Called from: FUN(...)
#  Browse[1]>
#  debug at #1: sqrt(i)
#  Browse[2]> i
#  [1] "2"
#  Browse[2]> i = 2
#  Browse[2]> c
#  [[1]]
#  [1] 1
#  
#  [[2]]
#  [1] 1.414214
#  
#  [[3]]
#  [1] 1.732051

## ----logging, eval=FALSE-------------------------------------------------
#  FUN <- function(i) {
#    flog.debug(paste0("value of 'i': ", i))
#  
#    if (!length(i)) {
#        flog.warn("'i' is missing")
#        NA
#    } else if (!is(i, "numeric")) {
#        flog.info("coercing to numeric")
#        as.numeric(i)
#    } else {
#        i
#    }
#  }

## ----logging-WARN, eval=FALSE--------------------------------------------
#  param <- SnowParam(3, log = TRUE, threshold = "WARN")
#  bplapply(list(1, "2", integer()), FUN, BPPARAM = param)

## ----timeout_constructor-------------------------------------------------
param <- SnowParam(timeout = 20)
param

## ----timeout_setter------------------------------------------------------
bptimeout(param) <- 2 
param

## ----timeout_bplapply----------------------------------------------------
fun <- function(i) {
  Sys.sleep(i)
  i
}

## ----co-setup------------------------------------------------------------
suppressPackageStartupMessages({
    library(BiocParallel)
    library(GenomicFiles)
    library(GenomicAlignments)
    library(Rsamtools)
})

## ----co-param------------------------------------------------------------
param <- MulticoreParam(4, log = TRUE, timeout = 60)

## ----co-param-snow, eval=FALSE-------------------------------------------
#  param <- SnowParam(4, log = TRUE, timeout = 60)

## ----co-bams-------------------------------------------------------------
fls <- dir("/home/ubuntu/data/vobencha/LargeData/copynumber", ".bam$", full=TRUE)
names(fls) <- basename(fls)
bfl <- BamFileList(fls)

## ----co-GRanges----------------------------------------------------------
ranges <- GRanges("chr6", IRanges(c(28477797, 29527797, 32448354),
                                  c(29477797, 30527797, 33448354)))

## ----co-map, eval=FALSE--------------------------------------------------
#  MAP <- function(range, file, ...) {
#      library(GenomicAlignments)         ## readGAlignments(), ScanBamParam()
#      param = ScanBamParam(which=range)  ## restriction
#      gal = readGAlignments(file, param=param)
#      ## log messages
#      flog.info(paste0("file: ", basename(file)))
#      flog.debug(paste0("records: ", length(gal)))
#      ## overlaps
#      olaps <- findOverlaps(gal, range, type="within", ignore.strand=TRUE)
#      tabulate(subjectHits(olaps), subjectLength(olaps))
#  }

## ----co-doit, eval=FALSE-------------------------------------------------
#  cts <- reduceByFile(ranges, fls, MAP, BPPARAM = param)

## ----co-length, eval=FALSE-----------------------------------------------
#  length(cts)

## ----co-elementlengths, eval=FALSE---------------------------------------
#  elementLengths(cts)

## ----co-tables, eval=FALSE-----------------------------------------------
#  cts[[1]]