NIDAP-Community · escauley · Apr 24, 2023 · Mar 7, 2023 · Mar 7, 2023 · Apr 7, 2023
diff --git a/.github/workflows/gitflow-R-action.yml b/.github/workflows/gitflow-R-action.yml
@@ -13,5 +13,5 @@ jobs:
   Activating_Parser:
     uses: fnlcr-bids-sdsi/gitflow-R/.github/workflows/parser.yml@DSPWorkflow
     with:
-      image_to_use: ghcr.io/nidap-community/dsp_github_workflow_env:r_env_v1.0
+      image_to_use: ghcr.io/nidap-community/dsp_github_workflow_env:DSPv0.9.3
 
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,10 +1,20 @@
 Package: DSPWorkflow
-Title: What the Package Does (One Line, Title Case)
-Version: 1.0.0.0
-Authors@R: 
-    person("First", "Last", , "first.last@example.com", role = c("aut", "cre"),
-           comment = c(ORCID = "YOUR-ORCID-ID"))
-Description: The DSP Workflow addresses a growing need to streamline the analysis of Spatial Transcriptomics data produced from Digital Spatial Profiling Technology (NanoString). It can be run in a docker container, and for biologists, in user-friendly web-based interactive notebooks (NIDAP, Palantir Foundry).
+Title: A Workflow for Analyzing Digital Spatial Profiling RNA Data
+Version: 0.9.2.0
+Authors@R: c(person("Rui", "He", email = "rui.he@nih.gov", role = "aut"), 
+    person("Maggie", "Cam", email = "maggie.cam@nih.gov", role = "aut", comment = c(ORCID = "0000-0001-8190-9766")), 
+    person("Ned", "Cauley", email = "ned.cauley@nih.gov", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8968-6621")), 
+    person("Jing", "Bian", email = "bianjh@nih.gov", role = "aut", comment = c(ORCID = "0000-0001-7109-716X")), 
+    person("Difei", "Wang", email = "difei.wang2@nih.gov", role = "aut", comment = c(ORCID = "0000-0003-4088-3859")),
+    person("Chad", "Highfill", email = "chad.highfill@nih.gov", role = "aut", comment = c(ORCID = "0000-0003-0046-3593")))
+Description: A set of functions for analyzing RNA data from the spatial 
+    transcriptomics approach Digital Spatial Profiling (Nanostring). The user 
+    provides read count data and annotations, and the package outputs 
+    normalized differential expression of genes and further visualizations and 
+    analysis based on user input. It can be run in a docker container and in 
+    user-friendly web-based interactive notebooks (NIDAP, Palantir Foundry).
+URL: https://github.com/NIDAP-Community/DSPWorkflow
+BugReports: https://github.com/NIDAP-Community/DSPWorkflow/issues
 License: MIT + file LICENSE
 Encoding: UTF-8
 Roxygen: list(markdown = TRUE)
@@ -14,7 +24,6 @@ Suggests:
 Depends:
     R (>= 3.6)
 Imports:
-    backports (>= 1.4.1),
     Biobase (>= 2.54.0),
     BiocGenerics (>= 0.40.0),
     cowplot (>= 1.1.1),
@@ -23,21 +32,20 @@ Imports:
     ggforce (>= 0.3.4),
     ggplot2 (>= 3.3.6),
     gridExtra (>= 2.3),
+    grid (>= 4.1.3),
     gtable (>= 0.3.0),
     knitr (>= 1.40),
     NanoStringNCTools (>= 1.2.0),
     patchwork (>= 1.1.2),
     reshape2 (>= 1.4.4),
-    Rmpfr (>= 0.8-9),
     Rtsne (>= 0.16),
     scales (>= 1.2.1),
     stats (>= 4.1.3),
     SpatialDecon (>= 1.4.3),
     tibble (>= 3.1.8),
     tidyr (>= 1.2.1),
-    tidyverse (>= 1.3.2),
     umap (>= 0.2.9.0),
     pheatmap (>= 1.0.12),
-    stringr,
-    magrittr
+    magrittr (>= 2.0.3),
+    ComplexHeatmap (>= 2.10.0)
 Config/testthat/edition: 3
diff --git a/NAMESPACE b/NAMESPACE
@@ -3,19 +3,15 @@
 export(diffExpr)
 export(dimReduct)
 export(filtering)
-export(geomxnorm)
+export(geomxNorm)
 export(heatMap)
 export(qcProc)
 export(spatialDeconvolution)
 export(studyDesign)
 export(violinPlot)
-import(GeomxTools)
+import(Biobase)
 import(NanoStringNCTools)
-import(SpatialDecon)
 import(ggplot2)
-import(gridExtra)
-import(pheatmap)
-import(stats)
 importFrom(Biobase,assayDataElement)
 importFrom(Biobase,exprs)
 importFrom(Biobase,fData)
@@ -26,8 +22,10 @@ importFrom(BiocGenerics,annotation)
 importFrom(BiocGenerics,colnames)
 importFrom(BiocGenerics,rbind)
 importFrom(BiocGenerics,rownames)
+importFrom(ComplexHeatmap,pheatmap)
 importFrom(GeomxTools,aggregateCounts)
 importFrom(GeomxTools,mixedModelDE)
+importFrom(GeomxTools,ngeoMean)
 importFrom(GeomxTools,normalize)
 importFrom(GeomxTools,readNanoStringGeoMxSet)
 importFrom(GeomxTools,setBioProbeQCFlags)
@@ -38,6 +36,9 @@ importFrom(NanoStringNCTools,esBy)
 importFrom(NanoStringNCTools,negativeControlSubset)
 importFrom(NanoStringNCTools,sData)
 importFrom(Rtsne,Rtsne)
+importFrom(SpatialDecon,create_profile_matrix)
+importFrom(SpatialDecon,derive_GeoMx_background)
+importFrom(SpatialDecon,spatialdecon)
 importFrom(cowplot,plot_grid)
 importFrom(dplyr,arrange)
 importFrom(dplyr,count)
@@ -72,27 +73,29 @@ importFrom(ggplot2,scale_y_continuous)
 importFrom(ggplot2,theme)
 importFrom(ggplot2,theme_bw)
 importFrom(ggplot2,theme_classic)
-importFrom(graphics,boxplot)
 importFrom(grid,gpar)
 importFrom(grid,grid.draw)
 importFrom(grid,grid.newpage)
 importFrom(grid,grobHeight)
 importFrom(grid,textGrob)
+importFrom(gridExtra,arrangeGrob)
 importFrom(gridExtra,grid.arrange)
 importFrom(gridExtra,tableGrob)
+importFrom(gridExtra,ttheme_default)
 importFrom(gtable,gtable_add_grob)
 importFrom(gtable,gtable_add_rows)
 importFrom(knitr,kable)
 importFrom(magrittr,"%>%")
+importFrom(parallel,detectCores)
 importFrom(patchwork,guide_area)
 importFrom(patchwork,patchworkGrob)
 importFrom(patchwork,plot_annotation)
 importFrom(patchwork,plot_layout)
 importFrom(patchwork,wrap_elements)
 importFrom(patchwork,wrap_plots)
-importFrom(pheatmap,pheatmap)
 importFrom(reshape2,melt)
 importFrom(scales,percent)
+importFrom(stats,as.formula)
 importFrom(stats,p.adjust)
 importFrom(stats,prcomp)
 importFrom(stats,quantile)

diff --git a/R/differential_expression_analysis.R b/R/differential_expression_analysis.R
@@ -46,10 +46,11 @@
 #' @importFrom grid grid.newpage textGrob gpar grobHeight grid.draw
 #' @importFrom gtable gtable_add_rows gtable_add_grob
 #' @importFrom tibble rownames_to_column
-#' @importFrom gridExtra tableGrob
+#' @importFrom gridExtra tableGrob ttheme_default
 #' @importFrom BiocGenerics rownames colnames rbind
 #' @importFrom magrittr %>%
 #' @importFrom Biobase pData assayDataElement
+#' @importFrom parallel detectCores
 #' @export
 #'
 #' @return a list containing mixed model output data frame, grid tables for
@@ -71,6 +72,19 @@ diffExpr <- function(object,
                      pval.lim.1 = 0.05,
                      pval.lim.2 = 0.01) {
 
+  # Check the number of cores available for the current machine
+  available.cores <- detectCores()
+
+  if (n.cores > available.cores) {
+    print(paste0("The number of cores selected is greater than the number of available cores, reducing number of cores to maximum of ", available.cores))
+    n.cores <- available.cores
+  }
+
+  # Adjust the number of cores selected within the machine's range
+
+
+
+
   testClass <- testRegion <- Gene <- Subset <- NULL
 
   # convert test variables to factors after checking input

diff --git a/R/filtering.R b/R/filtering.R
@@ -22,7 +22,7 @@
 # loq.cutoff 2 is recommended, loq.min 2 is recommend, 
 # cut.segment = remove segments with less than 10% of the genes detected; .05-.1 recommended,
 # goi = goi (genes of interest). Must be a vector of genes (i.e c("PDCD1", "CD274")),
-filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {
+filtering <- function(object, loq.cutoff, loq.min, cut.segment, goi) {
 
   if(class(object)[1] != "NanoStringGeoMxSet"){
     stop(paste0("Error: You have the wrong data class, must be NanoStringGeoMxSet" ))
@@ -41,7 +41,8 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {
     stop(paste0("Error: You have the wrong data class, must be numeric" ))
   }
   # Define Modules
-  pkc.file <- pkc.file
+  #pkc.file <- pkc.file
+  pkc.file <- annotation(object)
   if(class(pkc.file)[1] != "character"){
     stop(paste0("Error: You have the wrong data class, must be character" ))
   }
@@ -65,22 +66,22 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {
   pData(object)$loq <- loq
 
   ## 4.5.0 Filtering
-  loq_mat <- c()
+  loq.mat <- c()
   for(module in modules) {
     ind <- fData(object)$Module == module
-    mat_i <- t(esApply(object[ind, ], MARGIN = 1,
+    mat.i <- t(esApply(object[ind, ], MARGIN = 1,
                        FUN = function(x) {
                          x > loq[, module]
                        }))
-    loq_mat <- rbind(loq_mat, mat_i)
+    loq.mat <- rbind(loq.mat, mat.i)
   }
   # ensure ordering since this is stored outside of the geomxSet
-  loq_mat <- loq_mat[fData(object)$TargetName, ]
+  loq.mat <- loq.mat[fData(object)$TargetName, ]
 
   ##4.5.1S egment Gene Detection
   # Save detection rate information to pheno data
   pData(object)$GenesDetected <- 
-    colSums(loq_mat, na.rm = TRUE)
+    colSums(loq.mat, na.rm = TRUE)
   pData(object)$GeneDetectionRate <-
     pData(object)$GenesDetected / nrow(object)
 
@@ -114,17 +115,17 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {
 
   # select the annotations we want to show, use `` to surround column names with
   # spaces or special symbols
-  count_mat <- count(pData(object), `slide name`, class, region, segment)
+  count.mat <- count(pData(object), `slide name`, class, region, segment)
   if(class(object)[1] != "NanoStringGeoMxSet"){
     stop(paste0("Error: You have the wrong data class, must be NanoStringGeoMxSet" ))
   }
   # simplify the slide names
-  count_mat$`slide name` <- gsub("disease", "d", gsub("normal", "n", count_mat$`slide name`))
+  count.mat$`slide name` <- gsub("disease", "d", gsub("normal", "n", count.mat$`slide name`))
   # gather the data and plot in order: class, slide name, region, segment
-  test_gr <- gather_set_data(count_mat, 1:4)
-  test_gr$x <-factor(test_gr$x, levels = c("class", "slide name", "region", "segment"))
+  test.gr <- gather_set_data(count.mat, 1:4)
+  test.gr$x <-factor(test.gr$x, levels = c("class", "slide name", "region", "segment"))
   # plot Sankey
-  sankey.plot<- ggplot(test_gr, aes(x, id = id, split = y, value = n)) +
+  sankey.plot<- ggplot(test.gr, aes(x, id = id, split = y, value = n)) +
     geom_parallel_sets(aes(fill = region), alpha = 0.5, axis.width = 0.1) +
     geom_parallel_sets_axes(axis.width = 0.2) +
     geom_parallel_sets_labels(color = "white", size = 5) +
@@ -143,8 +144,8 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {
 
   ##4.5.2 Gene Detection Rate
   # Calculate detection rate:
-  loq_mat <- loq_mat[, colnames(object)]
-  fData(object)$DetectedSegments <- rowSums(loq_mat, na.rm = TRUE)
+  loq.mat <- loq.mat[, colnames(object)]
+  fData(object)$DetectedSegments <- rowSums(loq.mat, na.rm = TRUE)
   fData(object)$DetectionRate <-
     fData(object)$DetectedSegments / nrow(pData(object))
 
@@ -153,20 +154,20 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {
   if(class(goi)[1] != "character"){
     stop(paste0("Error: You have the wrong data class, must be character vector" ))
   }
-  goi_df <- data.frame(Gene = goi,
+  goi.df <- data.frame(Gene = goi,
                        Number = fData(object)[goi, "DetectedSegments"],
                        DetectionRate = percent(fData(object)[goi, "DetectionRate"]))
 
   ## 4.5.3 Gene Filtering
   # Plot detection rate:
-  plot_detect <- data.frame(Freq = c(1, 5, 10, 20, 30, 50))
-  plot_detect$Number <-
+  plot.detect <- data.frame(Freq = c(1, 5, 10, 20, 30, 50))
+  plot.detect$Number <-
     unlist(lapply(c(0.01, 0.05, 0.1, 0.2, 0.3, 0.5),
                   function(x) {sum(fData(object)$DetectionRate >= x)}))
-  plot_detect$Rate <- plot_detect$Number / nrow(fData(object))
-  rownames(plot_detect) <- plot_detect$Freq
+  plot.detect$Rate <- plot.detect$Number / nrow(fData(object))
+  rownames(plot.detect) <- plot.detect$Freq
 
-  genes.detected.plot <- ggplot(plot_detect, aes(x = as.factor(Freq), y = Rate, fill = Rate)) +
+  genes.detected.plot <- ggplot(plot.detect, aes(x = as.factor(Freq), y = Rate, fill = Rate)) +
     geom_bar(stat = "identity") +
     geom_text(aes(label = formatC(Number, format = "d", big.mark = ",")),
               vjust = 1.6, color = "black", size = 4) +
@@ -182,10 +183,10 @@ filtering <- function(object, pkc.file, loq.cutoff, loq.min, cut.segment, goi) {
 
   # Subset to target genes detected in at least 10% of the samples.
   #   Also manually include the negative control probe, for downstream use
-  negativeProbefData <- subset(fData(object), CodeClass == "Negative")
-  neg_probes <- unique(negativeProbefData$TargetName)
+  negative.probe.fData <- subset(fData(object), CodeClass == "Negative")
+  neg.probes <- unique(negative.probe.fData$TargetName)
   object <- object[fData(object)$DetectionRate >= 0.1 |
-                     fData(object)$TargetName %in% neg_probes, ]
+                     fData(object)$TargetName %in% neg.probes, ]
 
   # retain only detected genes of interest
   goi <- goi[goi %in% rownames(object)]

diff --git a/R/heatmap.R b/R/heatmap.R
@@ -52,11 +52,11 @@
 #'
 #' @importFrom NanoStringNCTools assayDataApply
 #' @importFrom Biobase assayDataElement
-#' @importFrom pheatmap pheatmap
+#' @importFrom ComplexHeatmap pheatmap
 #'
 #' @export
 #'
-#' @return A list containing the plot genes data matrix, and the heatmap plot.
+#' @return A list containing the plot genes data matrix, and the heatmap plot
 ##
 heatMap <- function(
   object,

diff --git a/R/normalization.R b/R/normalization.R
@@ -23,7 +23,6 @@
 #' @importFrom cowplot plot_grid
 #' @importFrom Biobase exprs
 #' @importFrom Biobase pData
-#' @importFrom graphics boxplot
 #' @importFrom stats quantile
 #' @importFrom utils head
 #' @importFrom GeomxTools normalize
@@ -34,40 +33,39 @@
 
 
 # To call function, must have object = object; norm = c(quant or neg)
-geomxnorm <- function(object, norm) {
+geomxNorm <- function(object, norm) {
 
   if(class(object)[1] != "NanoStringGeoMxSet"){
     stop(paste0("Error: You have the wrong data class, must be NanoStringGeoMxSet" ))
   }
 
   # run reductions ====
-  color.variable <-
-    Value <- Statistic <- NegProbe <- Q3 <- Annotation <- NULL
+  color.variable <- Value <- Statistic <- NegProbe <- Q3 <- Annotation <- NULL
 
   # Start Function
   neg.probes<- "NegProbe-WTX"
   ann.of.interest <- "region"
 
-  Stat.data <- base::data.frame(row.names = colnames(exprs(object)),
+  stat.data <- base::data.frame(row.names = colnames(exprs(object)),
                                 Segment = colnames(exprs(object)),
                                 Annotation = Biobase::pData(object)[, ann.of.interest],
                                 Q3 = unlist(apply(exprs(object), 2,
                                                   quantile, 0.75, na.rm = TRUE)),
                                 NegProbe = exprs(object)[neg.probes, ])
 
-  Stat.data.m <- melt(Stat.data, measures.vars = c("Q3", "NegProbe"),
+  stat.data.m <- melt(stat.data, measures.vars = c("Q3", "NegProbe"),
                       variable.name = "Statistic", value.name = "Value")
 
 
-  plt1 <- ggplot(Stat.data.m,
+  plt1 <- ggplot(stat.data.m,
                  aes(x = Value, fill = Statistic)) +
     geom_histogram(bins = 40) + theme_bw() +
     scale_x_continuous(trans = "log2") +
     facet_wrap(~Annotation, nrow = 1) + 
     scale_fill_brewer(palette = 3, type = "qual") +
     labs(x = "Counts", y = "Segments, #")
 
-  plt2 <- ggplot(Stat.data,
+  plt2 <- ggplot(stat.data,
                  aes(x = NegProbe, y = Q3, color = Annotation)) +
     geom_abline(intercept = 0, slope = 1, lty = "dashed", color = "darkgray") +
     geom_point() + guides(color = "none") + theme_bw() +
@@ -76,7 +74,7 @@ geomxnorm <- function(object, norm) {
     theme(aspect.ratio = 1) +
     labs(x = "Negative Probe GeoMean, Counts", y = "Q3 Value, Counts")
 
-  plt3 <- ggplot(Stat.data,
+  plt3 <- ggplot(stat.data,
                  aes(x = NegProbe, y = Q3 / NegProbe, color = Annotation)) +
     geom_hline(yintercept = 1, lty = "dashed", color = "darkgray") +
     geom_point() + theme_bw() +