## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
    collapse = TRUE,
    comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
#  To install this package, start R (version "4.2") and enter:
#  if (!require("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  BiocManager::install("ccImpute")

## ----setup, message=FALSE, warning=FALSE--------------------------------------
library(scRNAseq)
library(scater)
library(ccImpute)
library(SingleCellExperiment)
library(stats)
library(mclust)

## ----message=FALSE, warning=FALSE---------------------------------------------
sce <- UsoskinBrainData()
X <- cpm(sce)
labels <- colData(sce)$"Level 1"

#Filter bad cells
filt <- !grepl("Empty well", labels) &
        !grepl("NF outlier", labels) &
        !grepl("TH outlier", labels) &
        !grepl("NoN outlier", labels) &
        !grepl("NoN", labels) &
        !grepl("Central, unsolved", labels) &
        !grepl(">1 cell", labels) &
        !grepl("Medium", labels)
        
labels <-labels[filt]
X <- as.matrix(X[,filt])

#Remove genes that are not expressed in any cells:
X <- X[rowSums(X)>0,]

#Recreate the SingleCellExperiment and add log-transformed data:
ann <- data.frame(cell_id = labels)
sce <- SingleCellExperiment(assays = list(normcounts = as.matrix(X)), 
                            colData = ann)
logcounts(sce) <- log(normcounts(sce) + 1)

## -----------------------------------------------------------------------------
# Set seed for reproducibility purposes.
set.seed(0) 
# Compute PCA reduction of the dataset
reducedDims(sce) <- list(PCA=prcomp(t(logcounts(sce)))$x)

# Get an actual number of cell types
k <- length(unique(colData(sce)$cell_id))

# Cluster the PCA reduced dataset and store the assignments
set.seed(0) 
assgmts <- kmeans(reducedDim(sce, "PCA"), centers = k, iter.max = 1e+09,
                    nstart = 1000)$cluster

# Use ARI to compare the k-means assignments to label assignments
adjustedRandIndex(assgmts, colData(sce)$cell_id)

## -----------------------------------------------------------------------------
library(BiocParallel)
BPPARAM = MulticoreParam(2)
sce <- ccImpute(sce, BPPARAM = BPPARAM)

## -----------------------------------------------------------------------------
# Recompute PCA reduction of the dataset
reducedDim(sce, "PCA_imputed") <- prcomp(t(assay(sce, "imputed")))$x

# Cluster the PCA reduced dataset and store the assignments
assgmts <- kmeans(reducedDim(sce, "PCA_imputed"), centers = k, 
                    iter.max = 1e+09, nstart = 1000)$cluster

# Use ARI to compare the k-means assignments to label assignments
adjustedRandIndex(assgmts, colData(sce)$cell_id)

## ----eval=FALSE---------------------------------------------------------------
#  ccImpute(sce, dist, nCeil = 2000, svdMaxRatio = 0.08,
#              maxSets = 8, k, consMin=0.75, kmNStart, kmMax=1000,
#              fastSolver = TRUE, BPPARAM=bpparam(), verbose = TRUE)

## ----eval=FALSE---------------------------------------------------------------
#  cores <- 2
#  BPPARAM = MulticoreParam(cores)
#  
#  w <- rowVars_fast(logcounts(sce), cores)
#  corMat <- getCorM("spearman", logcounts(sce), w, cores)
#  
#  v <- doSVD(corMat, svdMaxRatio=.08, nCeil=2000, nCores=cores)
#  
#  consMtx <- runKM(logX, v, maxSets = 8, k, consMin=0.75, kmNStart, kmMax=1000,
#                      BPPARAM=bpparam())
#  
#  dropIds <- findDropouts(logX, consMtx)
#  
#  iLogX <- computeDropouts(consMtx, logX, dropIds,
#                              fastSolver=TRUE, nCores=cores)
#  assay(sce, "imputed") <- iLogX

## ----eval=FALSE---------------------------------------------------------------
#  corMat <- getCorM("pearson", logcounts(sce), nCores=2)

## ----eval=FALSE---------------------------------------------------------------
#  BPPARAM = MulticoreParam(4)
#  sce <- ccImpute(sce, BPPARAM = BPPARAM)

## ----eval=FALSE---------------------------------------------------------------
#  library(RhpcBLASctl)
#  blas_set_num_threads(4)

## ----reproduce3, echo=FALSE-------------------------------------------------------------------------------------------
## Session info
library("sessioninfo")
options(width = 120)
session_info()