From 0c38992747199886806475ed097c24dc9d8aa305 Mon Sep 17 00:00:00 2001 From: Habib Rehman Date: Mon, 23 Feb 2026 07:33:05 -0600 Subject: [PATCH 1/3] denoist, but transcripts aren't working yet --- .../denoist_correction/config.vsh.yaml | 68 +++++++++++++++++ .../denoist_correction/script.R | 73 +++++++++++++++++++ 2 files changed, 141 insertions(+) create mode 100644 src/methods_expression_correction/denoist_correction/config.vsh.yaml create mode 100644 src/methods_expression_correction/denoist_correction/script.R diff --git a/src/methods_expression_correction/denoist_correction/config.vsh.yaml b/src/methods_expression_correction/denoist_correction/config.vsh.yaml new file mode 100644 index 00000000..76e88194 --- /dev/null +++ b/src/methods_expression_correction/denoist_correction/config.vsh.yaml @@ -0,0 +1,68 @@ +__merge__: /src/api/comp_method_expression_correction.yaml + +name: denoist_correction +label: "denoist_correction" +summary: "Correct counts / remove contamination using the DenoIST methods" +description: >- + DenoIST is a package for denoising image-based spatial transcriptomics data. It takes a IST count matrix and returns a adjusted count matrix with contamination removed. +links: + documentation: "https://github.com/aaronkwc/DenoIST" + repository: "https://github.com/aaronkwc/DenoIST" +references: + doi: "10.1101/2025.11.13.688387" + +arguments: + - name: --celltype_key + required: false + direction: input + type: string + default: cell_type + + - name: --nbins + required: false + direction: input + type: integer + default: 200 + + - name: --distance + required: false + direction: input + type: integer + default: 50 + + +resources: + - type: r_script + path: script.R + +engines: + - type: docker + image: openproblems/base_r:1 + setup: + #- type: docker + # run: | + # apt-get update && apt-get install -y wget + - type: r + bioc: [anndataR, rhdf5, scuttle, DenoIST, devtools] + #- type: r + # bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment] + # bioc_force_install: true + # - type: docker + # run: | + # Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('aaronkwc/DenoIST')" + + # SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues: + # https://github.com/drighelli/SpatialExperiment/issues/171 + # https://github.com/satijalab/seurat/issues/9889 + # The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment. + + # Is there a better way to install an r package from github? + # The 6 million timeout thing stops it from breaking + + - type: native + +runners: + - type: executable + - type: nextflow + directives: + label: [ hightime, highcpu, highmem ] \ No newline at end of file diff --git a/src/methods_expression_correction/denoist_correction/script.R b/src/methods_expression_correction/denoist_correction/script.R new file mode 100644 index 00000000..423a6bba --- /dev/null +++ b/src/methods_expression_correction/denoist_correction/script.R @@ -0,0 +1,73 @@ +library(Matrix) +library(DenoIST) +library(SpatialExperiment) +library(SingleCellExperiment) +library(anndataR) +library(scuttle) + +## VIASH START +par <- list( + "input_spatial_with_cell_types" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_aggregated_counts.h5ad", + "input_tx" = "mouse_combined_transcripts.csv", + "output" = "task_ist_preprocessing/tmp/split_corrected.h5ad", +# "keep_all_cells" = FALSE, + "distance" = 50, + "nbins" = 200, +) + +meta <- list( + 'cpus': 4, +) + +## VIASH END + +# Read the input h5ad file and convert to SingleCellExperiment and Seurat +sce <- read_h5ad(par$input_spatial_with_cell_types, as = "SingleCellExperiment") +spe <- SpatialExperiment( + assay = counts(sce), + colData = colData(sce), + spatialCoordsNames = c("centroid_x", "centroid_y")) + +tx <- read.csv(par$input_tx) + +# filter out 0 cells +# if (!par$keep_all_cells) { +# cat("Filtering cells with 0 counts\n") +# sce <- sce[, colSums(counts(sce)) > 0] +# xe <- subset(xe, subset = nCount_RNA > 0) +# } + + +# check cores +cores <- 1 +if ("cpus" %in% names(meta) && !is.null(meta$cpus)) cores <- meta$cpus +cat(sprintf("Number of cores: %s\n", cores)) + +# Run the algorithm + +res <- denoist(mat = spe, + tx = tx, + feature_label = "feature_name", + coords = NULL, + distance = par$distance, nbins = par$nbins, cl = cores) #TODO add in params + +# format name +corrected_counts <- res$adjusted_counts + +# create corrected counts layer in original SingleCell object +cat("Normalizing counts\n") + +# First copy in counts +assay(sce, "corrected_counts") <- assay(sce, "counts") + +# Then, replace only the updated cells +assay(sce, "corrected_counts")[rownames(corrected_counts), colnames(corrected_counts)] <- corrected_counts + +# Library size normalization - see note in resolVI +size_factors <- librarySizeFactors(assay(sce, "corrected_counts")) +assay(sce, "normalized") <- assay(logNormCounts(sce, size_factors=size_factors, assay.type = "corrected_counts"),"logcounts") + +# Write the final object to h5ad format +cat("Writing to h5ad\n") +dir.create(dirname(par$output), showWarnings = FALSE, recursive = TRUE) +write_h5ad(sce, par$output, mode = "w") \ No newline at end of file From e34165ecda36b66b5d5bb0c02350930a95b0a06a Mon Sep 17 00:00:00 2001 From: Habib Rehman Date: Thu, 26 Feb 2026 15:21:38 -0500 Subject: [PATCH 2/3] Adding denoist method --- .../denoist_correction/config.vsh.yaml | 33 ++++++++++++++----- .../denoist_correction/script.R | 29 ++++++++++------ 2 files changed, 43 insertions(+), 19 deletions(-) diff --git a/src/methods_expression_correction/denoist_correction/config.vsh.yaml b/src/methods_expression_correction/denoist_correction/config.vsh.yaml index 76e88194..d451d915 100644 --- a/src/methods_expression_correction/denoist_correction/config.vsh.yaml +++ b/src/methods_expression_correction/denoist_correction/config.vsh.yaml @@ -12,6 +12,11 @@ references: doi: "10.1101/2025.11.13.688387" arguments: + - name: --input_ist + direction: input + required: true + type: file + - name: --celltype_key required: false direction: input @@ -23,12 +28,21 @@ arguments: direction: input type: integer default: 200 + description: Number of bins to use for hexagonal binning, which is used for calculating background transcript contamination - name: --distance required: false direction: input type: integer default: 50 + description: Maximum distance to consider for local background estimation + + - name: --keep_all_cells + required: false + direction: input + type: boolean + default: false + description: Whether to keep cells with 0 counts (may cause errors if set to TRUE) resources: @@ -43,21 +57,22 @@ engines: # run: | # apt-get update && apt-get install -y wget - type: r - bioc: [anndataR, rhdf5, scuttle, DenoIST, devtools] - #- type: r - # bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment] - # bioc_force_install: true - # - type: docker - # run: | - # Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('aaronkwc/DenoIST')" + bioc: [anndataR, rhdf5, scuttle, devtools, DelayedMatrixStats] + cran: [arrow] + - type: r + bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment] + # bioc_force_install: true + - type: docker + run: | + Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('aaronkwc/DenoIST')" # SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues: # https://github.com/drighelli/SpatialExperiment/issues/171 # https://github.com/satijalab/seurat/issues/9889 # The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment. - # Is there a better way to install an r package from github? - # The 6 million timeout thing stops it from breaking + # DenoIST is not available for bioconductor 3.22, only 3.23 :/ + # if bioconductor is updated, it should make things easier - type: native diff --git a/src/methods_expression_correction/denoist_correction/script.R b/src/methods_expression_correction/denoist_correction/script.R index 423a6bba..68e09c97 100644 --- a/src/methods_expression_correction/denoist_correction/script.R +++ b/src/methods_expression_correction/denoist_correction/script.R @@ -4,12 +4,13 @@ library(SpatialExperiment) library(SingleCellExperiment) library(anndataR) library(scuttle) +library(arrow) ## VIASH START par <- list( "input_spatial_with_cell_types" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_aggregated_counts.h5ad", - "input_tx" = "mouse_combined_transcripts.csv", - "output" = "task_ist_preprocessing/tmp/split_corrected.h5ad", + "input_ist" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr", + "output" = "task_ist_preprocessing/tmp/denoist_corrected.h5ad", # "keep_all_cells" = FALSE, "distance" = 50, "nbins" = 200, @@ -22,21 +23,29 @@ meta <- list( ## VIASH END # Read the input h5ad file and convert to SingleCellExperiment and Seurat +cat("Reading input files\n") sce <- read_h5ad(par$input_spatial_with_cell_types, as = "SingleCellExperiment") + +# filter out 0 cells +if (!par$keep_all_cells) { + cat("Filtering cells with 0 counts\n") + sce <- sce[, colSums(counts(sce)) > 0] +} + spe <- SpatialExperiment( assay = counts(sce), colData = colData(sce), spatialCoordsNames = c("centroid_x", "centroid_y")) -tx <- read.csv(par$input_tx) - -# filter out 0 cells -# if (!par$keep_all_cells) { -# cat("Filtering cells with 0 counts\n") -# sce <- sce[, colSums(counts(sce)) > 0] -# xe <- subset(xe, subset = nCount_RNA > 0) -# } +# Read in transcripts +tx_dataset <- arrow::open_dataset(file.path(par$input_ist, "points/transcripts/points.parquet")) +tx <- as.data.frame((tx_dataset)) +#If no QV column +if(!("qv" %in% names(tx))) { + cat("QV column not found, adding dummy column of 20 (should be unecessary in future updates?)") + tx["qv"] <- 20 +} # check cores cores <- 1 From fbbdcb23470ff8a2d34c78ca81ee1d719bbc2790 Mon Sep 17 00:00:00 2001 From: Habib Rehman Date: Fri, 27 Feb 2026 15:59:21 -0500 Subject: [PATCH 3/3] Adding to API file for exp correction and fixing comments --- src/api/comp_method_expression_correction.yaml | 4 ++++ .../denoist_correction/config.vsh.yaml | 5 ----- .../denoist_correction/script.R | 6 +++--- 3 files changed, 7 insertions(+), 8 deletions(-) diff --git a/src/api/comp_method_expression_correction.yaml b/src/api/comp_method_expression_correction.yaml index ff726d55..fbe8cb3f 100644 --- a/src/api/comp_method_expression_correction.yaml +++ b/src/api/comp_method_expression_correction.yaml @@ -15,6 +15,10 @@ arguments: required: false direction: input __merge__: /src/api/file_scrnaseq_reference.yaml + - name: --input_ist + direction: input + required: false + __merge__: /src/api/file_transcript_assignments.yaml - name: --output required: true direction: output diff --git a/src/methods_expression_correction/denoist_correction/config.vsh.yaml b/src/methods_expression_correction/denoist_correction/config.vsh.yaml index d451d915..3443db90 100644 --- a/src/methods_expression_correction/denoist_correction/config.vsh.yaml +++ b/src/methods_expression_correction/denoist_correction/config.vsh.yaml @@ -12,11 +12,6 @@ references: doi: "10.1101/2025.11.13.688387" arguments: - - name: --input_ist - direction: input - required: true - type: file - - name: --celltype_key required: false direction: input diff --git a/src/methods_expression_correction/denoist_correction/script.R b/src/methods_expression_correction/denoist_correction/script.R index 68e09c97..9939f365 100644 --- a/src/methods_expression_correction/denoist_correction/script.R +++ b/src/methods_expression_correction/denoist_correction/script.R @@ -11,7 +11,7 @@ par <- list( "input_spatial_with_cell_types" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_aggregated_counts.h5ad", "input_ist" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr", "output" = "task_ist_preprocessing/tmp/denoist_corrected.h5ad", -# "keep_all_cells" = FALSE, + "keep_all_cells" = FALSE, "distance" = 50, "nbins" = 200, ) @@ -22,7 +22,7 @@ meta <- list( ## VIASH END -# Read the input h5ad file and convert to SingleCellExperiment and Seurat +# Read the input h5ad file and convert to SingleCellExperiment -> SpatialExperiment cat("Reading input files\n") sce <- read_h5ad(par$input_spatial_with_cell_types, as = "SingleCellExperiment") @@ -58,7 +58,7 @@ res <- denoist(mat = spe, tx = tx, feature_label = "feature_name", coords = NULL, - distance = par$distance, nbins = par$nbins, cl = cores) #TODO add in params + distance = par$distance, nbins = par$nbins, cl = cores) # format name corrected_counts <- res$adjusted_counts