## ----setup, include=FALSE-----------------------------------------------------
library(knitr)
library(kableExtra)

## ----comparison_table, include=FALSE,eval=FALSE-------------------------------
#  qckitfastq <- c("yes","yes","yes+","yes")
#  seqTools <- c("no","yes","yes","yes")
#  ShortRead <- c("no","no","no","yes")
#  FASTQC <- c("yes","yes*","yes*","yes*")
#  metrics <- data.frame(qckitfastq,seqTools,ShortRead,FASTQC)
#  rownames(metrics) <- c("Read Length Distribution",
#                         "Per Base Read Quality", "Nucleotide Read Content", "GC Content")
#  kable(metrics)
#  # need to do per read sequence quality
#  # + indicates that the program...
#  # Here, '*' indicates that the program truncates the file or computes on only the first x samples

## ----loading_file-------------------------------------------------------------
library(qckitfastq)
infile <- system.file("extdata", "10^5_reads_test.fq.gz", package = "qckitfastq")
fseq <- seqTools::fastqq(infile)

## ----read_length--------------------------------------------------------------
read_len <- read_length(fseq)
kable(head(read_len)) %>% kable_styling()
plot_read_length(read_len)

## ----per_base_sequence_quality------------------------------------------------
bs <- per_base_quality(infile)
kable(head(bs)) %>% kable_styling()
plot_per_base_quality(bs)

## ----per_read_quality---------------------------------------------------------
prq <- per_read_quality(infile)
kable(head(prq)) %>% kable_styling()
plot_per_read_quality(prq)

## ----gc_content---------------------------------------------------------------
gc_df <- GC_content(infile)
kable(head(gc_df)) %>% kable_styling()
plot_GC_content(gc_df)

## ----nucleotide_read_content--------------------------------------------------
scA <- read_base_content(fseq, content = "A")
kable(head(scA)) %>% kable_styling()
rc <- read_content(fseq)
kable(head(rc)) %>% kable_styling()
plot_read_content(rc)

## ----kmer_count---------------------------------------------------------------
km <- kmer_count(infile,k=6)
kable(head(km)) %>% kable_styling()

## ----overrep_reads------------------------------------------------------------
overrep_reads<-overrep_reads(infile)
knitr::kable(head(overrep_reads,n = 5)) %>% kable_styling()
plot_overrep_reads(overrep_reads)

## ----overrep_kmer-------------------------------------------------------------
overkm <-overrep_kmer(infile,7)
knitr::kable(head(overkm,n=10)) %>% kable_styling()
plot_overrep_kmer(overkm)

## ----adapter_content----------------------------------------------------------
if(.Platform$OS.type != "windows") {
    infile2 <- system.file("extdata", "test.fq.gz", package = "qckitfastq")
    ac_sorted <- adapter_content(infile2)
    kable(head(ac_sorted)) %>% kable_styling()
    plot_adapter_content(ac_sorted)
}

## ----eval=FALSE, include=FALSE------------------------------------------------
#  ### Benchmarking
#  
#  #To demonstrate the utility of our functions on large datasets...
#  #(need to benchmark against ShortRead)
#  #library(seqTools)
#  #library(ShortRead)
#  #library(rbenchmark)
#  #sampler <- FastqSampler('E-MTAB-1147/fastq/ERR127302_1.fastq.gz', 20000)