Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/api/comp_method_expression_correction.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ arguments:
required: false
direction: input
__merge__: /src/api/file_scrnaseq_reference.yaml
- name: --input_ist
direction: input
required: false
__merge__: /src/api/file_transcript_assignments.yaml
- name: --output
required: true
direction: output
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
__merge__: /src/api/comp_method_expression_correction.yaml

name: denoist_correction
label: "denoist_correction"
summary: "Correct counts / remove contamination using the DenoIST methods"
description: >-
DenoIST is a package for denoising image-based spatial transcriptomics data. It takes a IST count matrix and returns a adjusted count matrix with contamination removed.
links:
documentation: "https://github.com/aaronkwc/DenoIST"
repository: "https://github.com/aaronkwc/DenoIST"
references:
doi: "10.1101/2025.11.13.688387"

arguments:
- name: --celltype_key
required: false
direction: input
type: string
default: cell_type

- name: --nbins
required: false
direction: input
type: integer
default: 200
description: Number of bins to use for hexagonal binning, which is used for calculating background transcript contamination

- name: --distance
required: false
direction: input
type: integer
default: 50
description: Maximum distance to consider for local background estimation

- name: --keep_all_cells
required: false
direction: input
type: boolean
default: false
description: Whether to keep cells with 0 counts (may cause errors if set to TRUE)


resources:
- type: r_script
path: script.R

engines:
- type: docker
image: openproblems/base_r:1
setup:
#- type: docker
# run: |
# apt-get update && apt-get install -y wget
- type: r
bioc: [anndataR, rhdf5, scuttle, devtools, DelayedMatrixStats]
cran: [arrow]
- type: r
bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment]
# bioc_force_install: true
- type: docker
run: |
Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('aaronkwc/DenoIST')"

# SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues:
# https://github.com/drighelli/SpatialExperiment/issues/171
# https://github.com/satijalab/seurat/issues/9889
# The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment.

# DenoIST is not available for bioconductor 3.22, only 3.23 :/
# if bioconductor is updated, it should make things easier

- type: native

runners:
- type: executable
- type: nextflow
directives:
label: [ hightime, highcpu, highmem ]
82 changes: 82 additions & 0 deletions src/methods_expression_correction/denoist_correction/script.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
library(Matrix)
library(DenoIST)
library(SpatialExperiment)
library(SingleCellExperiment)
library(anndataR)
library(scuttle)
library(arrow)

## VIASH START
par <- list(
"input_spatial_with_cell_types" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_aggregated_counts.h5ad",
"input_ist" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr",
"output" = "task_ist_preprocessing/tmp/denoist_corrected.h5ad",
"keep_all_cells" = FALSE,
"distance" = 50,
"nbins" = 200,
)

meta <- list(
'cpus': 4,
)

## VIASH END

# Read the input h5ad file and convert to SingleCellExperiment -> SpatialExperiment
cat("Reading input files\n")
sce <- read_h5ad(par$input_spatial_with_cell_types, as = "SingleCellExperiment")

# filter out 0 cells
if (!par$keep_all_cells) {
cat("Filtering cells with 0 counts\n")
sce <- sce[, colSums(counts(sce)) > 0]
}

spe <- SpatialExperiment(
assay = counts(sce),
colData = colData(sce),
spatialCoordsNames = c("centroid_x", "centroid_y"))

# Read in transcripts
tx_dataset <- arrow::open_dataset(file.path(par$input_ist, "points/transcripts/points.parquet"))
tx <- as.data.frame((tx_dataset))

#If no QV column
if(!("qv" %in% names(tx))) {
cat("QV column not found, adding dummy column of 20 (should be unecessary in future updates?)")
tx["qv"] <- 20
}

# check cores
cores <- 1
if ("cpus" %in% names(meta) && !is.null(meta$cpus)) cores <- meta$cpus
cat(sprintf("Number of cores: %s\n", cores))

# Run the algorithm

res <- denoist(mat = spe,
tx = tx,
feature_label = "feature_name",
coords = NULL,
distance = par$distance, nbins = par$nbins, cl = cores)

# format name
corrected_counts <- res$adjusted_counts

# create corrected counts layer in original SingleCell object
cat("Normalizing counts\n")

# First copy in counts
assay(sce, "corrected_counts") <- assay(sce, "counts")

# Then, replace only the updated cells
assay(sce, "corrected_counts")[rownames(corrected_counts), colnames(corrected_counts)] <- corrected_counts

# Library size normalization - see note in resolVI
size_factors <- librarySizeFactors(assay(sce, "corrected_counts"))
assay(sce, "normalized") <- assay(logNormCounts(sce, size_factors=size_factors, assay.type = "corrected_counts"),"logcounts")

# Write the final object to h5ad format
cat("Writing to h5ad\n")
dir.create(dirname(par$output), showWarnings = FALSE, recursive = TRUE)
write_h5ad(sce, par$output, mode = "w")