## ----setup, echo=FALSE, cache=FALSE-------------------------------------------
## Global options  
options(max.print="100")  
knitr::opts_chunk$set(  
  collapse = TRUE,  
  comment = "#>")  
knitr::opts_knit$set(width=75)  

## ----silentload, include=FALSE------------------------------------------------
#Silent load all dependencies for vignette  
library(factR)  
library(AnnotationHub)  
library(Biostrings)  
library(BSgenome.Mmusculus.UCSC.mm10)  
library(GenomicFeatures)  
library(rtracklayer)  
library(tidyverse) 

## ----install.factR, eval = FALSE----------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("factR")

## ----loadfactR----------------------------------------------------------------
library(factR)  

## ----importGTF----------------------------------------------------------------
gtf <- system.file("extdata", "sc_merged_sample.gtf.gz", package = "factR")
custom.gtf <- importGTF(gtf)  

## ----checktype----------------------------------------------------------------
class(custom.gtf)  

## ----headobj------------------------------------------------------------------
head(custom.gtf)  

## ----counttx------------------------------------------------------------------
length(unique(custom.gtf$transcript_id))  

## ----countcds-----------------------------------------------------------------
length(unique(custom.gtf[custom.gtf$type=="CDS"]$transcript_id))  

## ----plottranscripts, message=FALSE, warning=FALSE----------------------------
viewTranscripts(custom.gtf, "Zfr")  

## ----load.gencode, eval = TRUE------------------------------------------------
# query database for mouse gencode basic annotation  
library(AnnotationHub)  
ah <- AnnotationHub()  
query(ah, c('Mus musculus', 'gencode', 'gff'))
  
# Download full annotation  
ref.gtf <- ah[['AH49546']]  

## ----import.gencode, warning=FALSE--------------------------------------------
tmp <- tempfile()
download.file("ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/gencode.vM25.annotation.gtf.gz",  
              destfile = tmp)
ref.gtf <- importGTF(tmp) 

## ----matchgenemeta, warning=FALSE---------------------------------------------
# matching gene metadata  
custom_matched_1.gtf <- matchGeneInfo(custom.gtf, ref.gtf)  

## ----matchgenemeta2, warning=FALSE--------------------------------------------
# matching gene metadata  
custom_matched_2.gtf <- matchGeneInfo(custom.gtf, ref.gtf,   
                            primary_gene_id = "gene_id",   
                            secondary_gene_id = "ref_gene_id")  

## ----plottranscripts2---------------------------------------------------------
viewTranscripts(custom_matched_2.gtf, "Zfr")  

## ----subsettx, warning=FALSE--------------------------------------------------
custom_new.gtf <- subsetNewTranscripts(custom_matched_2.gtf, ref.gtf)  

viewTranscripts(custom_new.gtf,"Zfr")  

## ----subsettx2, warning=FALSE-------------------------------------------------
custom_new.gtf <- subsetNewTranscripts(custom_matched_2.gtf, ref.gtf, refine.by = "intron")  

viewTranscripts(custom_new.gtf, "Zfr")  

## ----genomeBSgenome, eval=FALSE-----------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("BSgenome.Mmusculus.UCSC.mm10")

## ----loadBSgenome-------------------------------------------------------------
library(BSgenome.Mmusculus.UCSC.mm10)
Mmusculus <- BSgenome.Mmusculus.UCSC.mm10

## ----genomeAhub---------------------------------------------------------------
library(AnnotationHub)   
ah <- AnnotationHub()  
query(ah, c("mm10","2bit"))   

## ----eval=FALSE---------------------------------------------------------------
#  # Retrieve mouse genome
#  Mmusculus <- ah[['AH14005']]

## ----downloadGencode, eval=FALSE----------------------------------------------
#  tmp <- tempfile()
#  download.file("ftp://ftp.ebi.ac.uk/pub/databases/gencode/Gencode_mouse/release_M25/GRCm38.primary_assembly.genome.fa.gz",
#                tmp)
#  Mmusculus <- importFASTA(paste0(tmp, "/GRCm38.primary_assembly.genome.fa.gz"))

## ----buildcds, warning=FALSE--------------------------------------------------
custom_new_CDS.gtf <- buildCDS(custom_new.gtf, ref.gtf, Mmusculus)  

## ----viewafterCDS-------------------------------------------------------------
viewTranscripts(custom_new_CDS.gtf, "Zfr")  

## ----viewafterCDSscale--------------------------------------------------------
viewTranscripts(custom_new_CDS.gtf, "Zfr", rescale_introns = TRUE)  

## ----viewrefCDS---------------------------------------------------------------
viewTranscripts(ref.gtf, "Zfr", rescale_introns = TRUE)  

## ----predictNMD1, warning=FALSE-----------------------------------------------
NMDprediction.out <- predictNMD(custom_new_CDS.gtf)  

head(NMDprediction.out)  

## ----predictNMD2--------------------------------------------------------------
NMDprediction.Zfr <- predictNMD(custom_new_CDS.gtf, gene_name == "Zfr") 

head(NMDprediction.Zfr)  

## ----predDomain2--------------------------------------------------------------
predictDomains(custom_new_CDS.gtf, Mmusculus, gene_name == "Zfr", progress_bar = FALSE)  

## ----predDomain3, eval = F----------------------------------------------------
#  domains.Zfr <- predictDomains(custom_new_CDS.gtf, Mmusculus, gene_name == "Zfr", plot = TRUE, progress_bar = FALSE)

## ----predDomain1, eval=FALSE--------------------------------------------------
#  domains.out <- predictDomains(custom_new_CDS.gtf, Mmusculus, progress_bar = FALSE)

## ----exportgtf, eval=FALSE----------------------------------------------------
#  library(rtracklayer)
#  export(custom_new_CDS.gtf, "Custom_new.gtf", format = "GTF")

## ----exporttable, eval=FALSE--------------------------------------------------
#  write.table(NMDprediction.out, "Custom_new_NMD.txt", sep = "\t", row.names = FALSE, quote = FALSE)
#  write.table(domains.out, "Custom_new_domains.txt", sep = "\t", row.names = FALSE, quote = FALSE)

## ----sessioninfo--------------------------------------------------------------
sessionInfo()