## ----setup, include=TRUE------------------------------------------------------
library(EWCE)
library(ggplot2)
set.seed(1234)

## ----error=TRUE---------------------------------------------------------------
## Load merged cortex and hypothalamus dataset generated by Karolinska institute
ctd <- ewceData::ctd() # i.e. ctd_MergedKI
try({
  plt <- EWCE::plot_ctd(ctd = ctd,
                        level = 1,
                        genes = c("Apoe","Gfap","Gapdh"),
                        metric = "mean_exp")
})

## -----------------------------------------------------------------------------
hits <- ewceData::example_genelist()
print(hits)

## -----------------------------------------------------------------------------
exp <- ctd[[1]]$mean_exp

#### Old conversion method ####
#if running offline pass localhub = TRUE
m2h <- ewceData::mouse_to_human_homologs()
exp_old <- exp[rownames(exp) %in% m2h$MGI.symbol,]

#### New conversion method (used by EWCE internally) ####
exp_new <- orthogene::convert_orthologs(gene_df = exp,
                                        input_species = "mouse", 
                                        output_species = "human", 
                                        method = "homologene")
#### Report ####
message("The new method retains ",
        formatC(nrow(exp_new) - nrow(exp_old), big.mark = ","),
        " more genes than the old method.")

## -----------------------------------------------------------------------------
# Use 100 bootstrap lists for speed, for publishable analysis use >=10000
reps <- 100 
# Use level 1 annotations (i.e. Interneurons)
annotLevel <- 1 

## -----------------------------------------------------------------------------
# Bootstrap significance test, no control for transcript length and GC content 
full_results <- EWCE::bootstrap_enrichment_test(sct_data = ctd,
                                                sctSpecies = "mouse",
                                                genelistSpecies = "human",
                                                hits = hits, 
                                                reps = reps,
                                                annotLevel = annotLevel)

## -----------------------------------------------------------------------------
knitr::kable(full_results$results)

## ----error=TRUE---------------------------------------------------------------
try({
  plot_list <- EWCE::ewce_plot(total_res = full_results$results,
                             mtc_method ="BH",
                               ctd = ctd) 
  # print(plot_list$plain)
})

## -----------------------------------------------------------------------------
# Bootstrap significance test controlling for transcript length and GC content
#if running offline pass localhub = TRUE
cont_results <- EWCE::bootstrap_enrichment_test(
  sct_data = ctd,
  hits = hits,
  sctSpecies = "mouse",
  genelistSpecies = "human", 
  reps = reps,
  annotLevel = annotLevel,
  geneSizeControl = TRUE)

## ----error=TRUE---------------------------------------------------------------
try({
  plot_list <- EWCE::ewce_plot(total_res = cont_results$results,
                             mtc_method = "BH")
  print(plot_list$plain)
})


## -----------------------------------------------------------------------------
# Bootstrap significance test controlling for transcript length and GC content
#if running offline pass localhub = TRUE
cont_results <- EWCE::bootstrap_enrichment_test(sct_data = ctd,
                                                hits = hits, 
                                                sctSpecies = "mouse",
                                                genelistSpecies = "human",
                                                reps = reps,
                                                annotLevel = 2,
                                                geneSizeControl = TRUE)

## ----error=TRUE---------------------------------------------------------------
try({
  plot_list <- EWCE::ewce_plot(total_res = cont_results$results,
                             mtc_method = "BH")
  print(plot_list$plain)
})

## -----------------------------------------------------------------------------
#### Generate random gene list ####
#if running offline pass localhub = TRUE
human.bg <- ewceData::mouse_to_human_homologs()$HGNC.symbol
gene.list.2 <- sample(human.bg,size = 30)

#if running offline pass localhub = TRUE
second_results <- EWCE::bootstrap_enrichment_test(sct_data = ctd,
                                                  sctSpecies = "mouse",
                                                  hits = gene.list.2, 
                                                  reps = reps,
                                                  annotLevel = 1)
full_res2 = data.frame(full_results$results,
                       list="Alzheimers")
rando_res2 = data.frame(second_results$results,
                        list="Random")
merged_results = rbind(full_res2, rando_res2)

## ----error=TRUE---------------------------------------------------------------
try({
  plot_list <- EWCE::ewce_plot(total_res = merged_results,
                             mtc_method = "BH")
  print(plot_list$plain)
})

## ----error=TRUE---------------------------------------------------------------
# example datasets held in ewceData package
#if running offline pass localhub = TRUE
cortex_mrna <- ewceData::cortex_mrna()

gene <- "Necab1"
cellExpDist <- data.frame(Expression=cortex_mrna$exp[gene,],
                          Celltype=cortex_mrna$annot[
                            colnames(cortex_mrna$exp),]$level1class)
try({
  ggplot(cellExpDist) + 
  geom_boxplot(aes(x=Celltype, y=Expression), outlier.alpha = .5) + 
  theme_bw() + 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
})

## -----------------------------------------------------------------------------
nKeep <- 1000
must_keep <- c("Apoe","Gfap","Gapdh") 
keep_genes <- c(must_keep,sample(rownames(cortex_mrna$exp),997))
cortex_mrna$exp <- cortex_mrna$exp[keep_genes,]
dim(cortex_mrna$exp)

## ----style="max-height: 100px;", warning=FALSE, eval = FALSE------------------
#  cortex_mrna$exp_scT_normed <- EWCE::sct_normalize(cortex_mrna$exp)

## -----------------------------------------------------------------------------
# Generate cell type data for just the cortex/hippocampus data  
exp_CortexOnly_DROPPED <- EWCE::drop_uninformative_genes(
  exp = cortex_mrna$exp, 
  input_species = "mouse",
  output_species = "human",
  level2annot = cortex_mrna$annot$level2class) 

## -----------------------------------------------------------------------------
annotLevels <- list(level1class=cortex_mrna$annot$level1class,
                    level2class=cortex_mrna$annot$level2class)

fNames_CortexOnly <- EWCE::generate_celltype_data(
  exp = exp_CortexOnly_DROPPED,
  annotLevels = annotLevels,
  groupName = "kiCortexOnly") 

ctd_CortexOnly <- EWCE::load_rdata(fNames_CortexOnly)

## -----------------------------------------------------------------------------
#if running offline pass localhub = TRUE
hypothalamus_mrna <- ewceData::hypothalamus_mrna()

## -----------------------------------------------------------------------------
#if running offline pass localhub = TRUE
hypothalamus_mrna$exp <- EWCE::fix_bad_mgi_symbols(exp = hypothalamus_mrna$exp)
cortex_mrna$exp <- EWCE::fix_bad_mgi_symbols(exp = cortex_mrna$exp)

## -----------------------------------------------------------------------------
merged_KI <- EWCE::merge_two_expfiles(exp1 = hypothalamus_mrna$exp, 
                                      exp2 = cortex_mrna$exp,
                                      annot1 = hypothalamus_mrna$annot, 
                                      annot2 = cortex_mrna$annot,
                                      name1 = "Hypothalamus (KI)", 
                                      name2 = "Cortex/Hippo (KI)")

## -----------------------------------------------------------------------------
exp_merged_DROPPED <- EWCE::drop_uninformative_genes(
  exp = merged_KI$exp, 
  drop_nonhuman_genes = TRUE, 
  input_species = "mouse",
  level2annot = merged_KI$annot$level2class)

## -----------------------------------------------------------------------------
annotLevels = list(level1class=merged_KI$annot$level1class,
                   level2class=merged_KI$annot$level2class)

# Update file path to where you want the cell type data files to save on disk
fNames_MergedKI <- EWCE::generate_celltype_data(exp = exp_merged_DROPPED,
                                                annotLevels = annotLevels,
                                                groupName = "MergedKI",
                                                savePath = tempdir()) 
ctd_MergedKI <- EWCE::load_rdata(fNames_MergedKI) 

## ----eval=FALSE---------------------------------------------------------------
#  ## You can also import the pre-merged cortex and hypothalamus dataset
#  # generated by Karolinska institute. `ewceData::ctd()` is the same file as
#  # ctd_MergedKI above.
#  #if running offline pass localhub = TRUE
#  ctd <- ewceData::ctd()

## ----fig.width = 7, fig.height = 4, error=TRUE--------------------------------
ctd <- ctd_MergedKI 

try({
  plt <- EWCE::plot_ctd(ctd = ctd,
                      level = 1,
                      genes = c("Apoe","Gfap","Gapdh"),
                      metric = "mean_exp") 
})

## ----fig.width = 7, fig.height = 4, error=TRUE--------------------------------
try({
  plt <- EWCE::plot_ctd(ctd = ctd,
                      level = 1,
                      genes = c("Apoe","Gfap","Gapdh"),
                      metric = "specificity")
})

## ----fig.width = 7, fig.height = 4, error=TRUE--------------------------------
try({
  plt <- EWCE::plot_ctd(ctd = ctd,
                      level = 2,
                      genes = c("Apoe","Gfap","Gapdh"),
                      metric = "specificity")
})

## -----------------------------------------------------------------------------
#if running offline pass localhub = TRUE
ctd <- ewceData::ctd()
hits <- ewceData::example_genelist()

## NOTE: rep=100 for demo purposes only. 
## Use >=10,000 for publication-quality results.
reps <- 100 
annotLevel <- 1

## -----------------------------------------------------------------------------
#if running offline pass localhub = TRUE
unconditional_results <- EWCE::bootstrap_enrichment_test(
  sct_data = ctd,
  hits = hits,
  sctSpecies = "mouse",
  genelistSpecies = "human",
  reps = reps,
  annotLevel = annotLevel)

conditional_results_micro <- EWCE:: bootstrap_enrichment_test(
  sct_data = ctd,
  hits = hits,
  sctSpecies = "mouse",
  genelistSpecies = "human",
  reps = reps,
  annotLevel = annotLevel,
  controlledCT = "microglia")

conditional_results_astro <- EWCE::bootstrap_enrichment_test(
  sct_data = ctd,
  hits = hits,
  sctSpecies = "mouse",
  genelistSpecies = "human",
  reps = reps,
  annotLevel = annotLevel,
  controlledCT = "astrocytes_ependymal")


## ----error=TRUE---------------------------------------------------------------
merged_results <- rbind(
  data.frame(unconditional_results$results,
             list="Unconditional Enrichment"),
  data.frame(conditional_results_micro$results,
             list="Conditional Enrichment (Microglia controlled)"),
  data.frame(conditional_results_astro$results,
             list="Conditional Enrichment (Astrocyte controlled)")
)
try({
 plot_list <- EWCE::ewce_plot(total_res = merged_results,
                             mtc_method = "BH") 
 print(plot_list$plain)
})

## -----------------------------------------------------------------------------
#if running offline pass localhub = TRUE
ctd <- ewceData::ctd()
tt_alzh <- ewceData::tt_alzh()

## NOTE: rep=100 for demo purposes only. 
## Use >=10,000 for publication-quality results.
reps <- 100 

## ----results="hide"-----------------------------------------------------------
# ewce_expression_data calls bootstrap_enrichment_test so  
tt_results <- EWCE::ewce_expression_data(sct_data = ctd,
                                          tt = tt_alzh,
                                          annotLevel = 1,
                                          ttSpecies = "human",
                                          sctSpecies = "mouse")

## ----fig.width = 7, fig.height = 4, error=TRUE--------------------------------
try({
  plot_list <- EWCE::ewce_plot(tt_results$joint_results)
  print(plot_list$plain)
})

## ----Session Info-------------------------------------------------------------
utils::sessionInfo()