diff --git a/src/api/comp_method_expression_correction.yaml b/src/api/comp_method_expression_correction.yaml index ff726d55..fbe8cb3f 100644 --- a/src/api/comp_method_expression_correction.yaml +++ b/src/api/comp_method_expression_correction.yaml @@ -15,6 +15,10 @@ arguments: required: false direction: input __merge__: /src/api/file_scrnaseq_reference.yaml + - name: --input_ist + direction: input + required: false + __merge__: /src/api/file_transcript_assignments.yaml - name: --output required: true direction: output diff --git a/src/methods_expression_correction/denoist_correction/config.vsh.yaml b/src/methods_expression_correction/denoist_correction/config.vsh.yaml new file mode 100644 index 00000000..3443db90 --- /dev/null +++ b/src/methods_expression_correction/denoist_correction/config.vsh.yaml @@ -0,0 +1,78 @@ +__merge__: /src/api/comp_method_expression_correction.yaml + +name: denoist_correction +label: "denoist_correction" +summary: "Correct counts / remove contamination using the DenoIST methods" +description: >- + DenoIST is a package for denoising image-based spatial transcriptomics data. It takes a IST count matrix and returns a adjusted count matrix with contamination removed. +links: + documentation: "https://github.com/aaronkwc/DenoIST" + repository: "https://github.com/aaronkwc/DenoIST" +references: + doi: "10.1101/2025.11.13.688387" + +arguments: + - name: --celltype_key + required: false + direction: input + type: string + default: cell_type + + - name: --nbins + required: false + direction: input + type: integer + default: 200 + description: Number of bins to use for hexagonal binning, which is used for calculating background transcript contamination + + - name: --distance + required: false + direction: input + type: integer + default: 50 + description: Maximum distance to consider for local background estimation + + - name: --keep_all_cells + required: false + direction: input + type: boolean + default: false + description: Whether to keep cells with 0 counts (may cause errors if set to TRUE) + + +resources: + - type: r_script + path: script.R + +engines: + - type: docker + image: openproblems/base_r:1 + setup: + #- type: docker + # run: | + # apt-get update && apt-get install -y wget + - type: r + bioc: [anndataR, rhdf5, scuttle, devtools, DelayedMatrixStats] + cran: [arrow] + - type: r + bioc: [SummarizedExperiment,SingleCellExperiment,SpatialExperiment] + # bioc_force_install: true + - type: docker + run: | + Rscript -e "BiocManager::install('SingleCellExperiment', type = 'source', force = TRUE, ask = FALSE); devtools::install_github('aaronkwc/DenoIST')" + + # SingleCellExperiment part can probably be left out again in the future. It currently fixes a bug described in these issues: + # https://github.com/drighelli/SpatialExperiment/issues/171 + # https://github.com/satijalab/seurat/issues/9889 + # The reinstall of SingleCellExperiment triggers the correct re-install of SpatialExperiment. + + # DenoIST is not available for bioconductor 3.22, only 3.23 :/ + # if bioconductor is updated, it should make things easier + + - type: native + +runners: + - type: executable + - type: nextflow + directives: + label: [ hightime, highcpu, highmem ] \ No newline at end of file diff --git a/src/methods_expression_correction/denoist_correction/script.R b/src/methods_expression_correction/denoist_correction/script.R new file mode 100644 index 00000000..9939f365 --- /dev/null +++ b/src/methods_expression_correction/denoist_correction/script.R @@ -0,0 +1,82 @@ +library(Matrix) +library(DenoIST) +library(SpatialExperiment) +library(SingleCellExperiment) +library(anndataR) +library(scuttle) +library(arrow) + +## VIASH START +par <- list( + "input_spatial_with_cell_types" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/spatial_aggregated_counts.h5ad", + "input_ist" = "task_ist_preprocessing/resources_test/task_ist_preprocessing/mouse_brain_combined/raw_ist.zarr", + "output" = "task_ist_preprocessing/tmp/denoist_corrected.h5ad", + "keep_all_cells" = FALSE, + "distance" = 50, + "nbins" = 200, +) + +meta <- list( + 'cpus': 4, +) + +## VIASH END + +# Read the input h5ad file and convert to SingleCellExperiment -> SpatialExperiment +cat("Reading input files\n") +sce <- read_h5ad(par$input_spatial_with_cell_types, as = "SingleCellExperiment") + +# filter out 0 cells +if (!par$keep_all_cells) { + cat("Filtering cells with 0 counts\n") + sce <- sce[, colSums(counts(sce)) > 0] +} + +spe <- SpatialExperiment( + assay = counts(sce), + colData = colData(sce), + spatialCoordsNames = c("centroid_x", "centroid_y")) + +# Read in transcripts +tx_dataset <- arrow::open_dataset(file.path(par$input_ist, "points/transcripts/points.parquet")) +tx <- as.data.frame((tx_dataset)) + +#If no QV column +if(!("qv" %in% names(tx))) { + cat("QV column not found, adding dummy column of 20 (should be unecessary in future updates?)") + tx["qv"] <- 20 +} + +# check cores +cores <- 1 +if ("cpus" %in% names(meta) && !is.null(meta$cpus)) cores <- meta$cpus +cat(sprintf("Number of cores: %s\n", cores)) + +# Run the algorithm + +res <- denoist(mat = spe, + tx = tx, + feature_label = "feature_name", + coords = NULL, + distance = par$distance, nbins = par$nbins, cl = cores) + +# format name +corrected_counts <- res$adjusted_counts + +# create corrected counts layer in original SingleCell object +cat("Normalizing counts\n") + +# First copy in counts +assay(sce, "corrected_counts") <- assay(sce, "counts") + +# Then, replace only the updated cells +assay(sce, "corrected_counts")[rownames(corrected_counts), colnames(corrected_counts)] <- corrected_counts + +# Library size normalization - see note in resolVI +size_factors <- librarySizeFactors(assay(sce, "corrected_counts")) +assay(sce, "normalized") <- assay(logNormCounts(sce, size_factors=size_factors, assay.type = "corrected_counts"),"logcounts") + +# Write the final object to h5ad format +cat("Writing to h5ad\n") +dir.create(dirname(par$output), showWarnings = FALSE, recursive = TRUE) +write_h5ad(sce, par$output, mode = "w") \ No newline at end of file