## --------------------------------------------------------------------------
suppressPackageStartupMessages({
  library(DuoClustering2018)
})

scename <- "sce_filteredExpr10_Koh"
sce <- sce_filteredExpr10_Koh()
method <- "PCAHC"

## --------------------------------------------------------------------------
## Load parameter files. General dataset and method parameters as well as
## dataset/method-specific parameters
params <- duo_clustering_all_parameter_settings_v2()[[paste0(scename, "_", 
                                                             method)]]
params

## ---- eval=FALSE-----------------------------------------------------------
#  ## Set number of times to run clustering for each k
#  n_rep <- 5
#  
#  ## Run clustering
#  set.seed(1234)
#  L <- lapply(seq_len(n_rep), function(i) {  ## For each run
#    cat(paste0("run = ", i, "\n"))
#    if (method == "Seurat") {
#      tmp <- lapply(params$range_resolutions, function(resolution) {
#        ## For each resolution
#        cat(paste0("resolution = ", resolution, "\n"))
#        ## Run clustering
#        res <- get(paste0("apply_", method))(sce = sce, params = params,
#                                             resolution = resolution)
#  
#        ## Put output in data frame
#        df <- data.frame(dataset = scename,
#                         method = method,
#                         cell = names(res$cluster),
#                         run = i,
#                         k = length(unique(res$cluster)),
#                         resolution = resolution,
#                         cluster = res$cluster,
#                         stringsAsFactors = FALSE, row.names = NULL)
#        tm <- data.frame(dataset = scename,
#                         method = method,
#                         run = i,
#                         k = length(unique(res$cluster)),
#                         resolution = resolution,
#                         user.self = res$st[["user.self"]],
#                         sys.self = res$st[["sys.self"]],
#                         user.child = res$st[["user.child"]],
#                         sys.child = res$st[["sys.child"]],
#                         elapsed = res$st[["elapsed"]],
#                         stringsAsFactors = FALSE, row.names = NULL)
#        kest <- data.frame(dataset = scename,
#                           method = method,
#                           run = i,
#                           k = length(unique(res$cluster)),
#                           resolution = resolution,
#                           est_k = res$est_k,
#                           stringsAsFactors = FALSE, row.names = NULL)
#        list(clusters = df, timing = tm, kest = kest)
#      })  ## End for each resolution
#    } else {
#      tmp <- lapply(params$range_clusters, function(k) {  ## For each k
#        cat(paste0("k = ", k, "\n"))
#        ## Run clustering
#        res <- get(paste0("apply_", method))(sce = sce, params = params, k = k)
#  
#        ## Put output in data frame
#        df <- data.frame(dataset = scename,
#                         method = method,
#                         cell = names(res$cluster),
#                         run = i,
#                         k = k,
#                         resolution = NA,
#                         cluster = res$cluster,
#                         stringsAsFactors = FALSE, row.names = NULL)
#        tm <- data.frame(dataset = scename,
#                         method = method,
#                         run = i,
#                         k = k,
#                         resolution = NA,
#                         user.self = res$st[["user.self"]],
#                         sys.self = res$st[["sys.self"]],
#                         user.child = res$st[["user.child"]],
#                         sys.child = res$st[["sys.child"]],
#                         elapsed = res$st[["elapsed"]],
#                         stringsAsFactors = FALSE, row.names = NULL)
#        kest <- data.frame(dataset = scename,
#                           method = method,
#                           run = i,
#                           k = k,
#                           resolution = NA,
#                           est_k = res$est_k,
#                           stringsAsFactors = FALSE, row.names = NULL)
#        list(clusters = df, timing = tm, kest = kest)
#      })  ## End for each k
#    }
#  
#    ## Summarize across different values of k
#    assignments <- do.call(rbind, lapply(tmp, function(w) w$clusters))
#    timings <- do.call(rbind, lapply(tmp, function(w) w$timing))
#    k_estimates <- do.call(rbind, lapply(tmp, function(w) w$kest))
#    list(assignments = assignments, timings = timings, k_estimates = k_estimates)
#  })  ## End for each run
#  
#  ## Summarize across different runs
#  assignments <- do.call(rbind, lapply(L, function(w) w$assignments))
#  timings <- do.call(rbind, lapply(L, function(w) w$timings))
#  k_estimates <- do.call(rbind, lapply(L, function(w) w$k_estimates))
#  
#  ## Add true group for each cell
#  truth <- data.frame(cell = as.character(rownames(colData(sce))),
#                      trueclass = as.character(colData(sce)$phenoid),
#                      stringsAsFactors = FALSE)
#  assignments$trueclass <- truth$trueclass[match(assignments$cell, truth$cell)]
#  
#  ## Combine results
#  res <- list(assignments = assignments, timings = timings,
#              k_estimates = k_estimates)
#  
#  df <- dplyr::full_join(res$assignments %>%
#                           dplyr::select(dataset, method, cell, run, k,
#                                         resolution, cluster, trueclass),
#                         res$k_estimates %>%
#                           dplyr::select(dataset, method, run, k,
#                                         resolution, est_k)
#  ) %>% dplyr::full_join(res$timings %>% dplyr::select(dataset, method, run, k,
#                                                       resolution, elapsed))

## --------------------------------------------------------------------------
sessionInfo()