openproblems-bio · habibrehman2002 · Feb 23, 2026 · Feb 26, 2026 · Feb 27, 2026
diff --git a/src/api/comp_method_expression_correction.yaml b/src/api/comp_method_expression_correction.yaml
@@ -15,6 +15,10 @@ arguments:
     required: false
     direction: input
     __merge__: /src/api/file_scrnaseq_reference.yaml
+  - name: --input_ist
+    direction: input
+    required: false
+    __merge__: /src/api/file_transcript_assignments.yaml
   - name: --output
     required: true
     direction: output

diff --git a/src/methods_expression_correction/denoist_correction/config.vsh.yaml b/src/methods_expression_correction/denoist_correction/config.vsh.yaml
@@ -0,0 +1,78 @@
+__merge__: /src/api/comp_method_expression_correction.yaml
+
+name: denoist_correction
+label: "denoist_correction"
+summary: "Correct counts / remove contamination using the DenoIST methods"
+description: >-
+  DenoIST is a package for denoising image-based spatial transcriptomics data. It takes a IST count matrix and returns a adjusted count matrix with contamination removed.
+links:
+  documentation: "https://github.com/aaronkwc/DenoIST"
+  repository: "https://github.com/aaronkwc/DenoIST"
+references:
+  doi: "10.1101/2025.11.13.688387"
+
+arguments:
+  - name: --celltype_key
+    required: false
+    direction: input
+    type: string
+    default: cell_type
+
+  - name: --nbins
+    required: false
+    direction: input
+    type: integer
+    default: 200
+    description: Number of bins to use for hexagonal binning, which is used for calculating background transcript contamination
+
+  - name: --distance
+    required: false
+    direction: input
+    type: integer
+    default: 50
+    description: Maximum distance to consider for local background estimation
+
+  - name: --keep_all_cells
+    required: false
+    direction: input
+    type: boolean
+    default: false
+    description: Whether to keep cells with 0 counts (may cause errors if set to TRUE)
+
+
+resources:
+  - type: r_script
+    path: script.R
+
+engines:
+  - type: docker
+    image: openproblems/base_r:1
+    setup:
+      #- type: docker
+      #  run: |
+      #    apt-get update && apt-get install -y wget
+      - type: r
+        bioc: [anndataR, rhdf5, scuttle, devtools, DelayedMatrixStats]
+        cran: [arrow]
+      - type: r
+        bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment]
+        # bioc_force_install: true
+      - type: docker
+        run: |
+          Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('aaronkwc/DenoIST')"
+
+      # SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues:
+      # https://github.com/drighelli/SpatialExperiment/issues/171
+      # https://github.com/satijalab/seurat/issues/9889
+      # The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment.
+
+      # DenoIST is not available for bioconductor 3.22, only 3.23 :/
+      # if bioconductor is updated, it should make things easier
+
+  - type: native
+
+runners:
+  - type: executable
+  - type: nextflow
+    directives:
+      label: [ hightime, highcpu, highmem ]
diff --git a/src/methods_expression_correction/denoist_correction/script.R b/src/methods_expression_correction/denoist_correction/script.R
@@ -0,0 +1,82 @@
+library(Matrix)
+library(DenoIST)
+library(SpatialExperiment)
+library(SingleCellExperiment)
+library(anndataR)
+library(scuttle)
+library(arrow)
+
+## VIASH START
+par <- list(
+  "input_spatial_with_cell_types" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_aggregated_counts.h5ad",
+  "input_ist" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr",
+  "output" = "task_ist_preprocessing/tmp/denoist_corrected.h5ad",
+  "keep_all_cells" = FALSE,
+  "distance" = 50,
+  "nbins" = 200,
+)
+
+meta <- list(
+  'cpus': 4,
+)
+
+## VIASH END
+
+# Read the input h5ad file and convert to SingleCellExperiment -> SpatialExperiment
+cat("Reading input files\n")
+sce <- read_h5ad(par$input_spatial_with_cell_types, as = "SingleCellExperiment")
+
+# filter out 0 cells
+if (!par$keep_all_cells) {
+  cat("Filtering cells with 0 counts\n")
+  sce <- sce[, colSums(counts(sce)) > 0]
+}
+
+spe <- SpatialExperiment(
+    assay = counts(sce),
+    colData = colData(sce),
+    spatialCoordsNames = c("centroid_x", "centroid_y"))
+
+# Read in transcripts
+tx_dataset <- arrow::open_dataset(file.path(par$input_ist, "points/transcripts/points.parquet"))
+tx <- as.data.frame((tx_dataset))
+
+#If no QV column
+if(!("qv" %in% names(tx))) {
+  cat("QV column not found, adding dummy column of 20 (should be unecessary in future updates?)")
+  tx["qv"] <- 20
+}
+
+# check cores
+cores <- 1
+if ("cpus" %in% names(meta) && !is.null(meta$cpus)) cores <- meta$cpus
+cat(sprintf("Number of cores: %s\n", cores))
+
+# Run the algorithm
+
+res <- denoist(mat = spe,
+              tx = tx,
+              feature_label = "feature_name",
+              coords = NULL,
+              distance = par$distance, nbins = par$nbins, cl = cores)
+
+# format name
+corrected_counts <- res$adjusted_counts
+
+# create corrected counts layer in original SingleCell object
+cat("Normalizing counts\n")
+
+# First copy in counts
+assay(sce, "corrected_counts") <- assay(sce, "counts")
+
+# Then, replace only the updated cells
+assay(sce, "corrected_counts")[rownames(corrected_counts), colnames(corrected_counts)] <- corrected_counts
+
+# Library size normalization - see note in resolVI
+size_factors <- librarySizeFactors(assay(sce, "corrected_counts"))
+assay(sce, "normalized") <- assay(logNormCounts(sce, size_factors=size_factors, assay.type = "corrected_counts"),"logcounts")
+
+# Write the final object to h5ad format
+cat("Writing to h5ad\n")
+dir.create(dirname(par$output), showWarnings = FALSE, recursive = TRUE)
+write_h5ad(sce, par$output, mode = "w")