## ---- fig.show='hold'------------------------------------------------------
library(TCGAWorkflowData)
data("elmerExample")
data("GBMnocnvhg19")
data("GBMIllumina_HiSeq")
data("LGGIllumina_HiSeq")
data("mafMutect2LGGGBM")
data("markersMatrix")
data("histoneMarks")
data("biogrid")
data("genes_GR")

## ---- eval = FALSE, message=FALSE,warning=FALSE, include=TRUE--------------
#  library(GenomicRanges)
#  library(TCGAbiolinks)
#  ##############################
#  ## Recurrent CNV annotation ##
#  ##############################
#  # Get gene information from GENCODE using biomart
#  genes <- TCGAbiolinks:::get.GRCh.bioMart(genome = "hg19")
#  genes <- genes[genes$external_gene_id != "" & genes$chromosome_name %in% c(1:22,"X","Y"),]
#  genes[genes$chromosome_name == "X", "chromosome_name"] <- 23
#  genes[genes$chromosome_name == "Y", "chromosome_name"] <- 24
#  genes$chromosome_name <- sapply(genes$chromosome_name,as.integer)
#  genes <- genes[order(genes$start_position),]
#  genes <- genes[order(genes$chromosome_name),]
#  genes <- genes[,c("external_gene_id", "chromosome_name", "start_position","end_position")]
#  colnames(genes) <- c("GeneSymbol","Chr","Start","End")
#  genes_GR <- makeGRangesFromDataFrame(genes,keep.extra.columns = TRUE)
#  save(genes_GR,genes,file = "genes_GR.rda", compress = "xz")

## ---- eval=FALSE, message=FALSE, warning=FALSE, include=TRUE---------------
#  library(RTCGAToolbox)
#  # Download GISTIC results
#  lastAnalyseDate <- getFirehoseAnalyzeDates(1)
#  gistic <- getFirehoseData("GBM",gistic2_Date = lastAnalyseDate)
#  
#  # get GISTIC results
#  gistic.allbygene <- getData(gistic,type = "GISTIC", CN = "All")
#  gistic.allbygene <- gistic.allbygene[1:10,]
#  gistic.thresholedbygene <- getData(gistic,type = "GISTIC", CN = "Thresholed")
#  gistic.thresholedbygene <- gistic.thresholedbygene[1:10,]
#  save(gistic.allbygene,gistic.thresholedbygene,file = "GBMGistic.rda", compress = "xz")

## ---- eval=FALSE, include=TRUE, results='asis'-----------------------------
#  library(TCGAbiolinks)
#  query.gbm.nocnv <- GDCquery(project = "TCGA-GBM",
#                              data.category = "Copy number variation",
#                              legacy = TRUE,
#                              file.type = "nocnv_hg19.seg",
#                              sample.type = c("Primary solid Tumor"))
#  # to reduce time we will select only 20 samples
#  query.gbm.nocnv$results[[1]] <- query.gbm.nocnv$results[[1]][1:20,]
#  
#  GDCdownload(query.gbm.nocnv, chunks.per.download = 100)
#  
#  gbm.nocnv <- GDCprepare(query.gbm.nocnv, save = TRUE, save.filename = "GBMnocnvhg19.rda")

## ---- eval=FALSE, include=TRUE, results='asis'-----------------------------
#  query <- GDCquery(project = "TCGA-GBM",
#                    data.category = "Gene expression",
#                    data.type = "Gene expression quantification",
#                    platform = "Illumina HiSeq",
#                    file.type  = "results",
#                    sample.type = c("Primary solid Tumor"),
#                    legacy = TRUE)
#  # We will use only 20 samples to make the example faster
#  query$results[[1]] <-  query$results[[1]][1:20,]
#  GDCdownload(query)
#  gbm.exp <- GDCprepare(query, save = TRUE, summarizedExperiment = TRUE, save.filename = "GBMIllumina_HiSeq.rda")
#  
#  query <- GDCquery(project = "TCGA-LGG",
#                    data.category = "Gene expression",
#                    data.type = "Gene expression quantification",
#                    platform = "Illumina HiSeq",
#                    file.type  = "results",
#                    sample.type = c("Primary solid Tumor"),
#                    legacy = TRUE)
#  # We will use only 20 samples to make the example faster
#  query$results[[1]] <-  query$results[[1]][1:20,]
#  GDCdownload(query)
#  lgg.exp <- GDCprepare(query, save = TRUE, summarizedExperiment = TRUE, save.filename = "LGGIllumina_HiSeq.rda")

## ---- eval=FALSE, include=TRUE, results='asis'-----------------------------
#  #----------- 8.3 Identification of Regulatory Enhancers   -------
#  library(TCGAbiolinks)
#  # Samples: primary solid tumor w/ DNA methylation and gene expression
#  matched_met_exp <- function(project, n = NULL){
#      # get primary solid tumor samples: DNA methylation
#      message("Download DNA methylation information")
#      met450k <- GDCquery(project = project,
#                          data.category = "DNA methylation",
#                          platform = "Illumina Human Methylation 450",
#                          legacy = TRUE,
#                          sample.type = c("Primary solid Tumor"))
#      met450k.tp <-  met450k$results[[1]]$cases
#  
#      # get primary solid tumor samples: RNAseq
#      message("Download gene expression information")
#      exp <- GDCquery(project = project,
#                      data.category = "Gene expression",
#                      data.type = "Gene expression quantification",
#                      platform = "Illumina HiSeq",
#                      file.type  = "normalized_results",
#                      sample.type = c("Primary solid Tumor"),
#                      legacy = TRUE)
#      exp.tp <- exp$results[[1]]$cases
#      # Get patients with samples in both platforms
#      patients <- unique(substr(exp.tp,1,15)[substr(exp.tp,1,12) %in% substr(met450k.tp,1,12)] )
#      if(!is.null(n)) patients <- patients[1:n] # get only n samples
#      return(patients)
#  }
#  lgg.samples <- matched_met_exp("TCGA-LGG", n = 10)
#  gbm.samples <- matched_met_exp("TCGA-GBM", n = 10)
#  samples <- c(lgg.samples,gbm.samples)
#  
#  #-----------------------------------
#  # 1 - Methylation
#  # ----------------------------------
#  query.met <- GDCquery(project = c("TCGA-LGG","TCGA-GBM"),
#                        data.category = "DNA methylation",
#                        platform = "Illumina Human Methylation 450",
#                        legacy = TRUE,
#                        barcode = samples)
#  GDCdownload(query.met)
#  met <- GDCprepare(query.met, save = FALSE)
#  met <- subset(met,subset = as.character(GenomicRanges::seqnames(met)) %in% c("chr9"))
#  
#  #-----------------------------------
#  # 2 - Expression
#  # ----------------------------------
#  query.exp <- GDCquery(project = c("TCGA-LGG","TCGA-GBM"),
#                       data.category = "Gene expression",
#                       data.type = "Gene expression quantification",
#                       platform = "Illumina HiSeq",
#                       file.type  = "normalized_results",
#                       legacy = TRUE,
#                       barcode =  samples)
#  GDCdownload(query.exp)
#  exp <- GDCprepare(query.exp, save = FALSE)
#  save(exp, met, gbm.samples, lgg.samples, file = "elmerExample.rda", compress = "xz")

## ---- eval=FALSE, include=TRUE, results='asis'-----------------------------
#  library(TCGAbiolinks)
#  LGGmut <- GDCquery_Maf(tumor = "LGG", pipelines = "mutect2")
#  GBMmut <- GDCquery_Maf(tumor = "GBM", pipelines = "mutect2")
#  mut <- plyr::rbind.fill(LGGmut, GBMmut)
#  save(mut, LGGmut, GBMmut,file = "mafMutect2LGGGBM.rda")

## ---- eval=FALSE, include=TRUE, results='asis'-----------------------------
#  gdac.root <- "ftp://ftp.broadinstitute.org/pub/GISTIC2.0/hg19_support/"
#  file <- paste0(gdac.root, "genome.info.6.0_hg19.na31_minus_frequent_nan_probes_sorted_2.1.txt")
#  # Retrieve probes meta file from broadinstitute website
#  if(!file.exists(basename(file))) downloader::download(file, basename(file))
#  # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-==--=--==---=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-==--=--==--
#  # For hg38 analysis please take a look on:
#  # https://gdc.cancer.gov/about-data/data-harmonization-and-generation/gdc-reference-files
#  # File: SNP6 GRCh38 Liftover Probeset File for Copy Number Variation Analysis
#  # -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-==--=--==---=-=-=-=-=-=-=-=-=-=-=-=-=-=-=--=-==--=--==--
#  markersMatrix <-  readr::read_tsv(basename(file), col_names = FALSE, col_types = "ccn", progress = FALSE)
#  save(markersMatrix, file = "markersMatrix.rda", compress = "xz")

## ---- eval=FALSE, include=TRUE, results='asis'-----------------------------
#  ### read biogrid info
#  ### Check last version in https://thebiogrid.org/download.php
#  file <- "http://thebiogrid.org/downloads/archives/Release%20Archive/BIOGRID-3.4.146/BIOGRID-ALL-3.4.146.tab2.zip"
#  if(!file.exists(gsub("zip","txt",basename(file)))){
#    downloader::download(file,basename(file))
#    unzip(basename(file),junkpaths =TRUE)
#  }
#  
#  tmp.biogrid <- read.csv(gsub("zip","txt",basename(file)), header=TRUE, sep="\t", stringsAsFactors=FALSE)
#  save(tmp.biogrid, file = "biogrid.rda", compress = "xz")

## ----results='hide', eval=FALSE, echo=FALSE, message=FALSE,warning=FALSE----
#  library(ChIPseeker)
#  library(AnnotationHub)
#  library(pbapply)
#  library(ggplot2)
#  #------------------ Working with ChipSeq data ---------------
#  # Step 1: download histone marks for a brain and non-brain samples.
#  #------------------------------------------------------------
#  # loading annotation hub database
#  ah = AnnotationHub()
#  
#  # Searching for brain consolidated epigenomes in the roadmap database
#  bpChipEpi_brain <- query(ah , c("EpigenomeRoadMap", "narrowPeak", "chip", "consolidated","brain","E068"))
#  
#  # Get chip-seq data
#  histone.marks <- pblapply(names(bpChipEpi_brain), function(x) {ah[[x]]})
#  names(histone.marks) <- names(bpChipEpi_brain)
#  save(histone.marks, file = "histoneMarks.rda", compress = "xz")

## ----sessionInfo, results='asis', echo=FALSE-------------------------------
pander::pander(sessionInfo(), compact = FALSE)