## ----style, echo = FALSE, results = 'asis'-------------------------------
knitr::opts_chunk$set(
    eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")),
    cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE"))
)
options(width = 75)


## ------------------------------------------------------------------------
x <- c(1, 3, 5)
y <- 1:5


## ------------------------------------------------------------------------
x <- c(TRUE, FALSE, NA)
outer(x, x, `&`)
outer(x, x, `|`)


## ------------------------------------------------------------------------
x <- c("Male", "Female", "male")
gender <- factor(x, levels=c("Female", "Male"))
gender


## ------------------------------------------------------------------------
x <- rnorm(100)
hist(x)
y <- x + rnorm(100)
df <- data.frame(x, y)
plot(y ~ x, df)


## ------------------------------------------------------------------------
ridx <- df$x > 0
table(ridx)
plot(y ~ x, df[ridx, ])


## ------------------------------------------------------------------------
fit <- lm(y ~ x, df)
class(fit)
anova(fit)
plot(y ~ x, df)
abline(fit)


## ---- messgae = FALSE----------------------------------------------------
library(ggplot2)


## ------------------------------------------------------------------------
ggplot(df, aes(x, y)) + 
    geom_point() +
    geom_smooth(method="lm")


## ---- message = FALSE----------------------------------------------------
library(readr)


## ---- eval = FALSE-------------------------------------------------------
## pdata_file <- file.choose()    # ALL-sample-sheet.csv

## ---- echo = FALSE-------------------------------------------------------
pdata_file <- 
    system.file(package="BiocIntro", "extdata", "ALL-sample-sheet.csv")


## ------------------------------------------------------------------------
pdata <- read_csv(pdata_file)
pdata


## ---- message = FALSE----------------------------------------------------
library(dplyr)


## ------------------------------------------------------------------------
pdata %>% select(sample, sex, age, mol.biol)
pdata %>% filter(sex == "F", age < 50)
pdata %>% mutate(sex = factor(sex, levels = c("F", "M")))
pdata %>% summarize(
    n = n(),
    ave_age = mean(age, na.rm=TRUE)
)


## ------------------------------------------------------------------------
pdata %>%
    group_by(sex) %>%
    summarize(
        n = n(),
        ave_age = mean(age, na.rm = TRUE)
    )


## ---- eval = FALSE-------------------------------------------------------
## pdata_file <- file.choose()    # airway-sample-sheet.csv
## count_file <- file.choose()    # airway-read-counts.csv

## ---- echo = FALSE-------------------------------------------------------
pdata_file <- 
    system.file(package="BiocIntro", "extdata", "airway-sample-sheet.csv")
count_file <- 
    system.file(package="BiocIntro", "extdata", "airway-read-counts.csv")


## ------------------------------------------------------------------------
pdata <- read_csv(pdata_file)
pdata <- 
    pdata %>% 
    select(Run, cell, dex)


## ------------------------------------------------------------------------
counts <- read_csv(count_file)
eg <- counts[, 1:6]    # make it easy to work with
eg


## ------------------------------------------------------------------------
data <- left_join(pdata, eg)
data


## ---- message = FALSE----------------------------------------------------
library(tidyr)


## ------------------------------------------------------------------------
tbl <- gather(data, "Gene", "Count", -(1:3))
tbl


## ------------------------------------------------------------------------
tbl %>%
    group_by(Run) %>%
    summarize(lib_size = sum(Count))
tbl %>%
    group_by(Gene) %>%
    summarize(
        ave_count = mean(Count),
        ave_log_count = mean(log(1 + Count))
    )


## ------------------------------------------------------------------------
counts_tbl <- gather(counts, "Gene", "Count", -Run)
data_tbl <- left_join(pdata, counts_tbl)
data_tbl


## ------------------------------------------------------------------------
gene_summaries <-
    data_tbl %>%
    group_by(Gene) %>%
    summarize(
        ave_count = mean(Count),
        ave_log_count = mean(log(1 + Count))
    )
gene_summaries


## ---- message=FALSE------------------------------------------------------
library(ggplot2)


## ------------------------------------------------------------------------
ggplot(gene_summaries, aes(ave_log_count)) +
    geom_density()


## ---- message = FALSE----------------------------------------------------
library(Biostrings)


## ------------------------------------------------------------------------
seq = c("AAACA", "CATGC")
dna <- DNAStringSet(seq)
reverseComplement(dna)


## ------------------------------------------------------------------------
dm3_upstream_file <-
    system.file(package="Biostrings", "extdata", "dm3_upstream2000.fa.gz")
readLines(dm3_upstream_file, 10)


## ------------------------------------------------------------------------
dna <- readDNAStringSet(dm3_upstream_file)
dna


## ------------------------------------------------------------------------
gc <- letterFrequency(dna, "GC", as.prob = TRUE)
hist(gc)


## ---- message = FALSE----------------------------------------------------
library(BSgenome)
library(BSgenome.Hsapiens.UCSC.hg38)


## ------------------------------------------------------------------------
BSgenome.Hsapiens.UCSC.hg38
chr17 <- BSgenome.Hsapiens.UCSC.hg38[["chr17"]]
chr17
letterFrequency(chr17, "GC", as.prob=TRUE)


## ---- message = FALSE----------------------------------------------------
library(S4Vectors)


## ------------------------------------------------------------------------
DataFrame(x = rnorm(100), y = rnorm(100))


## ------------------------------------------------------------------------
length(dna)
dna[1:4]


## ------------------------------------------------------------------------
nms = names(dna)
pos = sub(".* ", "", nms)
df <- DataFrame(
    dna = unname(dna),
    pos = pos
)
df$gc <- letterFrequency(df$dna, "GC", as.prob=TRUE)[,1]
df


## ---- message = FALSE----------------------------------------------------
library(BiocManager)


## ------------------------------------------------------------------------
BiocManager::available("BSgenome.Hsapiens")


## ---- eval=FALSE---------------------------------------------------------
## ## also possible to install CRAN, github packages
## BiocManager::install("BSgenome.Hsapiens.UCSC.hg38")


## ------------------------------------------------------------------------
sessionInfo()