Vitek-Lab · tonywu1999 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/R/clean_DIANN.R b/R/clean_DIANN.R
@@ -175,7 +175,7 @@
     getOption("MSstatsLog")("INFO", msg)
     getOption("MSstatsMsg")("INFO", msg)
 
-    dn_input = dn_input[QValue >= global_qvalue_cutoff, quantificationColumn := 0]
+    dn_input = dn_input[QValue >= global_qvalue_cutoff, (quantificationColumn) := 0]
     if (MBR) {
         msg = '** MBR was used to analyze the data. Now setting names and filtering'
         msg_1_mbr = paste0('-- LibPGQValue < ', pg_qvalue_cutoff)

diff --git a/inst/tinytest/test_clean_DIANN.R b/inst/tinytest/test_clean_DIANN.R
@@ -26,13 +26,31 @@ output = MSstatsConvert:::.cleanRawDIANN(input, quantificationColumn = "Fragment
 .validateOutput(output)
 
 # Q-value filtering
-output = MSstatsConvert:::.cleanRawDIANN(input, global_qvalue_cutoff = 0.005)
-expect_equal(sum(output$DetectionQValue < 0.005), nrow(output))
-output = MSstatsConvert:::.cleanRawDIANN(input, qvalue_cutoff = 0.00001)
-expect_equal(sum(output$LibQValue < 0.00001), nrow(output))
-output = MSstatsConvert:::.cleanRawDIANN(input, pg_qvalue_cutoff = 0.001)
-expect_equal(sum(output$LibPGQValue < 0.001), nrow(output))
-output = MSstatsConvert:::.cleanRawDIANN(input, MBR = TRUE, qvalue_cutoff = 0.005)
-expect_equal(sum(output$LibQValue < 0.005), nrow(output))
-output = MSstatsConvert:::.cleanRawDIANN(input, MBR = TRUE, pg_qvalue_cutoff = 0.001)
-expect_equal(sum(output$LibPGQValue < 0.001), nrow(output))
+expect_qvalue_cutoff <- function(output, col, cutoff) {
+    expect_equal(
+        sum(output[[col]] > cutoff),
+        sum(output[["Intensity"]] == 0 & output[[col]] > cutoff),
+        info = sprintf(
+            "All rows with %s > %s should have %s == 0",
+            col, cutoff, "Intensity"
+        )
+    )
+    expect_equal(
+        sum(output[[col]] <= cutoff),
+        nrow(output) - sum(output[[col]] > cutoff),
+        info = sprintf(
+            "Rows with %s <= %s should account for all rows not above the cutoff",
+            col, cutoff
+        )
+    )
+}
+output <- MSstatsConvert:::.cleanRawDIANN(input, global_qvalue_cutoff = 0.005)
+expect_qvalue_cutoff(output, "DetectionQValue", 0.005)
+output <- MSstatsConvert:::.cleanRawDIANN(input, qvalue_cutoff = 0.00001)
+expect_qvalue_cutoff(output, "LibQValue", 0.00001)
+output <- MSstatsConvert:::.cleanRawDIANN(input, pg_qvalue_cutoff = 0.001)
+expect_qvalue_cutoff(output, "LibPGQValue", 0.001)
+output <- MSstatsConvert:::.cleanRawDIANN(input, MBR = FALSE, qvalue_cutoff = 0.001)
+expect_qvalue_cutoff(output, "GlobalQValue", 0.001)
+output <- MSstatsConvert:::.cleanRawDIANN(input, MBR = FALSE, pg_qvalue_cutoff = 0.0002)
+expect_qvalue_cutoff(output, "GlobalPGQValue", 0.0002)
diff --git a/inst/tinytest/test_converters_DIANNtoMSstatsFormat.R b/inst/tinytest/test_converters_DIANNtoMSstatsFormat.R
@@ -5,7 +5,7 @@ input = data.table::fread(input_file_path)
 annot = data.table::fread(annotation_file_path)
 output = DIANNtoMSstatsFormat(input, annotation = annot, MBR = FALSE, use_log_file = FALSE)
 expect_equal(ncol(output), 11)
-expect_equal(nrow(output), 174)
+expect_equal(nrow(output), 348) 
 expect_true("Run" %in% colnames(output))
 expect_true("ProteinName" %in% colnames(output))
 expect_true("PeptideSequence" %in% colnames(output))
@@ -25,7 +25,7 @@ input = arrow::read_parquet(input_file_path)
 annot = data.table::fread(annotation_file_path)
 output = DIANNtoMSstatsFormat(input, annotation = annot, MBR = FALSE, use_log_file = FALSE, quantificationColumn = 'auto')
 expect_equal(ncol(output), 11)
-expect_equal(nrow(output), 180)
+expect_equal(nrow(output), 192)
 expect_true("Run" %in% colnames(output))
 expect_true("ProteinName" %in% colnames(output))
 expect_true("PeptideSequence" %in% colnames(output))

diff --git a/inst/tinytest/test_utils_anomaly_score.R b/inst/tinytest/test_utils_anomaly_score.R
@@ -37,9 +37,9 @@ baseline_scores = run_quality_metrics(
 # Data with progressively higher cumulative sums
 high_scores = run_quality_metrics(
     base_df_10,
-    c(rep(0.1, 5), seq(2.0, 5.0, length.out = 5)),  # mean_increase
-    c(rep(0.1, 5), seq(2.0, 5.0, length.out = 5)),  # mean_decrease
-    c(rep(0.1, 5), seq(2.0, 5.0, length.out = 5))   # dispersion_increase
+    c(seq(0, 0.1, length.out = 5), seq(2.0, 5.0, length.out = 5)),  # mean_increase
+    c(seq(0, 0.1, length.out = 5), seq(2.0, 5.0, length.out = 5)),  # mean_decrease
+    c(seq(0, 0.1, length.out = 5), seq(2.0, 5.0, length.out = 5))   # dispersion_increase
 )
 
 # The last 5 rows (with high values) should have higher mean anomaly scores
@@ -51,9 +51,9 @@ base_df_20 = create_base_df(20)
 
 extreme_scores = run_quality_metrics(
     base_df_20,
-    c(rep(0.1, 19), 10.0),  # Last value is extreme
-    c(rep(0.1, 19), 8.0),   # Last value is extreme
-    c(rep(0.1, 19), 12.0)   # Last value is extreme
+    c(seq(0, 0.1, length.out = 19), 10.0),  # Last value is extreme
+    c(seq(0, 0.1, length.out = 19), 8.0),   # Last value is extreme
+    c(seq(0, 0.1, length.out = 19), 12.0)   # Last value is extreme
 )
 
 # The extreme outlier (last row) should have the highest anomaly score
@@ -267,9 +267,9 @@ base_df_6_rank = create_base_df(6)
 # Create data with obvious ranking: Row 6 > Row 5 > Row 4 > Rows 1,2,3
 ranking_scores = run_quality_metrics(
     base_df_6_rank,
-    c(0.1, 0.1, 0.1, 1.0, 2.0, 5.0),
-    c(0.1, 0.1, 0.1, 1.0, 2.0, 5.0),
-    c(0.1, 0.1, 0.1, 1.0, 2.0, 5.0)
+    c(0.1, 0.11, 0.12, 1.0, 2.0, 5.0),
+    c(0.1, 0.11, 0.12, 1.0, 2.0, 5.0),
+    c(0.1, 0.11, 0.12, 1.0, 2.0, 5.0)
 )
 
 # Row 5 should have highest score, Row 4 second highest, etc.
@@ -367,3 +367,21 @@ low_abundance_excluded = MSstatsConvert:::.prepareSpectronautAnomalyInput(
     missing_run_count = 0.95)
 expect_true("AFPLAEWQPSDVDQR" %in% low_abundance_excluded$PeptideSequence)
 expect_false("LowAbundancePeptide" %in% low_abundance_excluded$PeptideSequence)
+
+
+# Test 11: Testing duplicity of quality metrics, applicable considering
+# multiple fragments share the same precursor level metrics
+
+# Data with progressively higher cumulative sums
+duplicate_metrics = run_quality_metrics(
+    base_df_10,
+    c(rep(0.1, 5), seq(2.0, 4.0, length.out = 5)),  # mean_increase
+    c(rep(0.1, 5), seq(2.0, 4.0, length.out = 5)),  # mean_decrease
+    c(rep(0.1, 5), seq(2.0, 4.0, length.out = 5))   # dispersion_increase
+)
+
+# The last 5 rows (with high values) should have lower mean anomaly scores
+# Since they are all clumped between 2 and 4, whereas 0.1 is by itself
+expect_true(mean(duplicate_metrics$AnomalyScores[6:10]) < mean(duplicate_metrics$AnomalyScores[1:5]),
+            info = "Rows 6-10 (values clumped 2-4) should have lower 
+            anomaly scores than rows 1-5 (isolated value of 0.1)")