## ----setup, include = FALSE--------------------------------------------------- options(tinytex.verbose=TRUE) getCaps <- function () { label <- knitr::opts_current$get("label") txts <- list( figcover=c( "Variation in the sharpness of the peaks using \\texttt{trim}", "attribute." ), figmnase=c( "Toy example of MNase biase correction. Random nucleosomal and", "control reads have been generated using \\texttt{synteticNucMap}", "function and corrected using \\texttt{controlCorrect}." ), fignoise=c( "Original intensities from tiling array experiment. Smoothing", "using a sliding window of variable length (0, 20, 50 and 100 bp)", "is presented." ), figfft=c( "Power spectrum of the example Tiling Array data, percentile 1", "marked with a dashed line." ), figfft2=c( "Filtering in Tiling Array (up, blue) (1\\% comp.) and NGS (down", "red) (2\\% comp.)." ), figpeak=c( "Output of \\texttt{plotPeaks} function. Peaks are spotted in red", "and detection threshold marked with an horitzontal line." ), figpeak2="\\texttt{plotPeaks} function with \\texttt{score=TRUE}.", figpeak3=c( "\\texttt{plotPeaks} output with \\texttt{score=TRUE} and", "\\texttt{width=140}." ), figranges=c( "Simple example of ranges manipulation to plot fuzzy nucleosomes." ), figsyn=c( "Example synthetic coverage map of 90 well-positioned (100-10)", "and 20 fuzzy nucleosomes." ) ) caps <- lapply(txts, paste, collapse=" ") return (caps[[label]]) } knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, warning = FALSE, fig.width = 6, fig.height = 2, fig.wide = TRUE ) ## ----processtilling, eval=FALSE----------------------------------------------- # processTilingArray(data, exprName, chrPattern, inferLen=50) ## ----loadTA------------------------------------------------------------------- library(nucleR) library(ggplot2) library(IRanges) library(GenomicRanges) data(nucleosome_tiling) head(nucleosome_tiling, n=25) ## ----readBAM------------------------------------------------------------------ sample.file <- system.file("extdata", "cellCycleM_chrII_5000-25000.bam", package="nucleR") reads <- readBAM(sample.file, type="paired") head(reads) ## ----import------------------------------------------------------------------- data(nucleosome_htseq) class(nucleosome_htseq) nucleosome_htseq ## ----processReads------------------------------------------------------------- # Process the paired end reads, but discard those with length > 200 reads_pair <- processReads(nucleosome_htseq, type="paired", fragmentLen=200) # Process the reads, but now trim each read to 40bp around the dyad reads_trim <- processReads(nucleosome_htseq, type="paired", fragmentLen=200, trim=40) ## ----coverage----------------------------------------------------------------- # Calculate the coverage, directly in reads per million (r.p.m) cover_pair <- coverage.rpm(reads_pair) cover_trim <- coverage.rpm(reads_trim) ## ----figcover, echo=FALSE, fig.cap=getCaps(), fig.width=5--------------------- # Compare both coverages t1 <- as.vector(cover_pair[[1]])[1:2000] t2 <- as.vector(cover_trim[[1]])[1:2000] t1 <- (t1 - min(t1)) / max(t1 - min(t1)) # Normalization t2 <- (t2 - min(t2)) / max(t2 - min(t2)) # Normalization plot_data <- rbind( data.frame( x=seq_along(t1), y=t1, coverage="original" ), data.frame( x=seq_along(t1), y=t2, coverage="trimmed" ) ) ggplot(plot_data, aes(x=x, y=y)) + geom_line(aes(color=coverage)) + xlab("position") + ylab("norm coverage") ## ----figmnase, echo=c(1:5), fig.cap=getCaps(), fig.width=5-------------------- # Toy example map <- syntheticNucMap(as.ratio=TRUE, wp.num=50, fuz.num=25) exp <- coverage(map$syn.reads) ctr <- coverage(map$ctr.reads) corrected <- controlCorrection(exp, ctr) plot_data <- rbind( data.frame( x=seq_along(exp), y=as.vector(exp), coverage="normal" ), data.frame( x=seq_along(corrected), y=as.vector(corrected), coverage="corrected" ) ) ggplot(plot_data, aes(x=x, y=y)) + geom_line(aes(color=coverage)) + xlab("position") + ylab("coverage") ## ----fignoise, echo=FALSE, fig.cap=getCaps(), fig.height=1.5------------------ windowFilter <- function (x, w) { if (missing(w)) { return(x) } else { y <- filter(x, rep(1, w)/w) return(as.vector(y)) } } mkEntry <- function (x, i, w, lab) { if (missing(lab)) { lab <- sprintf("slinding w. %i bp", w) } df <- data.frame(x=i, y=windowFilter(x[i], w), lab=lab) df[!is.na(df[, "y"]), ] } i <- 1:2000 plot_data <- rbind( mkEntry(nucleosome_tiling, i, 1, "original"), mkEntry(nucleosome_tiling, i, 20), mkEntry(nucleosome_tiling, i, 50), mkEntry(nucleosome_tiling, i, 100) ) ggplot(plot_data, aes(x=x, y=y)) + geom_line() + facet_grid(.~lab) + xlab("position") + ylab("intensity") ## ----figfft, fig.cap=getCaps(), fig.width=5----------------------------------- fft_ta <- filterFFT(nucleosome_tiling, pcKeepComp=0.01, showPowerSpec=TRUE) ## ----figfft2, echo=FALSE, fig.cap=getCaps(), fig.width=5, fig.height=4-------- i <- 1:3000 tiling_raw <- nucleosome_tiling[i] tiling_fft <- filterFFT(tiling_raw, pcKeepComp=0.01) htseq_raw <- as.vector(cover_trim[[1]])[i] htseq_fft <- filterFFT(htseq_raw, pcKeepComp=0.02) plot_data <- rbind( data.frame(x=i, y=tiling_raw, lab="intensity", treatment="raw"), data.frame(x=i, y=tiling_fft, lab="intensity", treatment="filtered"), data.frame(x=i, y=htseq_raw, lab="coverage", treatment="raw"), data.frame(x=i, y=htseq_fft, lab="coverage", treatment="filtered") ) ggplot(plot_data, aes(x=x, y=y)) + geom_line(aes(color=treatment)) + facet_grid(lab~., scales="free_y") + ylab("") + xlab("position") ## ----corfft------------------------------------------------------------------- tiling_raw <- nucleosome_tiling tiling_fft <- filterFFT(tiling_raw, pcKeepComp=0.01) htseq_raw <- as.vector(cover_trim[[1]]) htseq_fft <- filterFFT(htseq_raw, pcKeepComp=0.02) cor(tiling_raw, tiling_fft, use="complete.obs") cor(htseq_raw, htseq_fft, use="complete.obs") ## ----figpeak, fig.cap=getCaps()----------------------------------------------- peaks <- peakDetection(htseq_fft, threshold="25%", score=FALSE) peaks plotPeaks(peaks, htseq_fft, threshold="25%", ylab="coverage") ## ----figpeak2, fig.cap=getCaps()---------------------------------------------- peaks <- peakDetection(htseq_fft, threshold="25%", score=TRUE) head(peaks) plotPeaks(peaks, htseq_fft, threshold="25%") ## ----figpeak3, fig.cap=getCaps()---------------------------------------------- peaks <- peakDetection(htseq_fft, threshold="25%", score=TRUE, width=140) peaks plotPeaks(peaks, htseq_fft, threshold="25%") ## ----figranges, echo=-6, fig.cap=getCaps()------------------------------------ nuc_calls <- peaks[peaks$score > 0.1, ] red_calls <- reduce(nuc_calls) red_class <- GRanges(red_calls, isFuzzy=width(red_calls) > 140) red_class plotPeaks(red_calls, htseq_fft, threshold="25%") ## ----figsyn, fig.cap=getCaps(), fig.height=3---------------------------------- syn <- syntheticNucMap(wp.num=100, wp.del=10, wp.var=30, fuz.num=20, fuz.var=50, max.cover=20, nuc.len=147, lin.len=20, rnd.seed=1, as.ratio=TRUE, show.plot=TRUE)