Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion R/clean_DIANN.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@
getOption("MSstatsLog")("INFO", msg)
getOption("MSstatsMsg")("INFO", msg)

dn_input = dn_input[QValue >= global_qvalue_cutoff, quantificationColumn := 0]
dn_input = dn_input[QValue >= global_qvalue_cutoff, (quantificationColumn) := 0]
if (MBR) {
msg = '** MBR was used to analyze the data. Now setting names and filtering'
msg_1_mbr = paste0('-- LibPGQValue < ', pg_qvalue_cutoff)
Expand Down
38 changes: 28 additions & 10 deletions inst/tinytest/test_clean_DIANN.R
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,31 @@ output = MSstatsConvert:::.cleanRawDIANN(input, quantificationColumn = "Fragment
.validateOutput(output)

# Q-value filtering
output = MSstatsConvert:::.cleanRawDIANN(input, global_qvalue_cutoff = 0.005)
expect_equal(sum(output$DetectionQValue < 0.005), nrow(output))
output = MSstatsConvert:::.cleanRawDIANN(input, qvalue_cutoff = 0.00001)
expect_equal(sum(output$LibQValue < 0.00001), nrow(output))
output = MSstatsConvert:::.cleanRawDIANN(input, pg_qvalue_cutoff = 0.001)
expect_equal(sum(output$LibPGQValue < 0.001), nrow(output))
output = MSstatsConvert:::.cleanRawDIANN(input, MBR = TRUE, qvalue_cutoff = 0.005)
expect_equal(sum(output$LibQValue < 0.005), nrow(output))
output = MSstatsConvert:::.cleanRawDIANN(input, MBR = TRUE, pg_qvalue_cutoff = 0.001)
expect_equal(sum(output$LibPGQValue < 0.001), nrow(output))
expect_qvalue_cutoff <- function(output, col, cutoff) {
expect_equal(
sum(output[[col]] > cutoff),
sum(output[["Intensity"]] == 0 & output[[col]] > cutoff),
info = sprintf(
"All rows with %s > %s should have %s == 0",
col, cutoff, "Intensity"
)
)
expect_equal(
sum(output[[col]] <= cutoff),
nrow(output) - sum(output[[col]] > cutoff),
info = sprintf(
"Rows with %s <= %s should account for all rows not above the cutoff",
col, cutoff
)
)
}
output <- MSstatsConvert:::.cleanRawDIANN(input, global_qvalue_cutoff = 0.005)
expect_qvalue_cutoff(output, "DetectionQValue", 0.005)
output <- MSstatsConvert:::.cleanRawDIANN(input, qvalue_cutoff = 0.00001)
expect_qvalue_cutoff(output, "LibQValue", 0.00001)
output <- MSstatsConvert:::.cleanRawDIANN(input, pg_qvalue_cutoff = 0.001)
expect_qvalue_cutoff(output, "LibPGQValue", 0.001)
output <- MSstatsConvert:::.cleanRawDIANN(input, MBR = FALSE, qvalue_cutoff = 0.001)
expect_qvalue_cutoff(output, "GlobalQValue", 0.001)
output <- MSstatsConvert:::.cleanRawDIANN(input, MBR = FALSE, pg_qvalue_cutoff = 0.0002)
expect_qvalue_cutoff(output, "GlobalPGQValue", 0.0002)
4 changes: 2 additions & 2 deletions inst/tinytest/test_converters_DIANNtoMSstatsFormat.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ input = data.table::fread(input_file_path)
annot = data.table::fread(annotation_file_path)
output = DIANNtoMSstatsFormat(input, annotation = annot, MBR = FALSE, use_log_file = FALSE)
expect_equal(ncol(output), 11)
expect_equal(nrow(output), 174)
expect_equal(nrow(output), 348)
expect_true("Run" %in% colnames(output))
expect_true("ProteinName" %in% colnames(output))
expect_true("PeptideSequence" %in% colnames(output))
Expand All @@ -25,7 +25,7 @@ input = arrow::read_parquet(input_file_path)
annot = data.table::fread(annotation_file_path)
output = DIANNtoMSstatsFormat(input, annotation = annot, MBR = FALSE, use_log_file = FALSE, quantificationColumn = 'auto')
expect_equal(ncol(output), 11)
expect_equal(nrow(output), 180)
expect_equal(nrow(output), 192)
expect_true("Run" %in% colnames(output))
expect_true("ProteinName" %in% colnames(output))
expect_true("PeptideSequence" %in% colnames(output))
Expand Down
36 changes: 27 additions & 9 deletions inst/tinytest/test_utils_anomaly_score.R
Original file line number Diff line number Diff line change
Expand Up @@ -37,9 +37,9 @@ baseline_scores = run_quality_metrics(
# Data with progressively higher cumulative sums
high_scores = run_quality_metrics(
base_df_10,
c(rep(0.1, 5), seq(2.0, 5.0, length.out = 5)), # mean_increase
c(rep(0.1, 5), seq(2.0, 5.0, length.out = 5)), # mean_decrease
c(rep(0.1, 5), seq(2.0, 5.0, length.out = 5)) # dispersion_increase
c(seq(0, 0.1, length.out = 5), seq(2.0, 5.0, length.out = 5)), # mean_increase
c(seq(0, 0.1, length.out = 5), seq(2.0, 5.0, length.out = 5)), # mean_decrease
c(seq(0, 0.1, length.out = 5), seq(2.0, 5.0, length.out = 5)) # dispersion_increase
)

# The last 5 rows (with high values) should have higher mean anomaly scores
Expand All @@ -51,9 +51,9 @@ base_df_20 = create_base_df(20)

extreme_scores = run_quality_metrics(
base_df_20,
c(rep(0.1, 19), 10.0), # Last value is extreme
c(rep(0.1, 19), 8.0), # Last value is extreme
c(rep(0.1, 19), 12.0) # Last value is extreme
c(seq(0, 0.1, length.out = 19), 10.0), # Last value is extreme
c(seq(0, 0.1, length.out = 19), 8.0), # Last value is extreme
c(seq(0, 0.1, length.out = 19), 12.0) # Last value is extreme
)

# The extreme outlier (last row) should have the highest anomaly score
Expand Down Expand Up @@ -267,9 +267,9 @@ base_df_6_rank = create_base_df(6)
# Create data with obvious ranking: Row 6 > Row 5 > Row 4 > Rows 1,2,3
ranking_scores = run_quality_metrics(
base_df_6_rank,
c(0.1, 0.1, 0.1, 1.0, 2.0, 5.0),
c(0.1, 0.1, 0.1, 1.0, 2.0, 5.0),
c(0.1, 0.1, 0.1, 1.0, 2.0, 5.0)
c(0.1, 0.11, 0.12, 1.0, 2.0, 5.0),
c(0.1, 0.11, 0.12, 1.0, 2.0, 5.0),
c(0.1, 0.11, 0.12, 1.0, 2.0, 5.0)
)

# Row 5 should have highest score, Row 4 second highest, etc.
Expand Down Expand Up @@ -367,3 +367,21 @@ low_abundance_excluded = MSstatsConvert:::.prepareSpectronautAnomalyInput(
missing_run_count = 0.95)
expect_true("AFPLAEWQPSDVDQR" %in% low_abundance_excluded$PeptideSequence)
expect_false("LowAbundancePeptide" %in% low_abundance_excluded$PeptideSequence)


# Test 11: Testing duplicity of quality metrics, applicable considering
# multiple fragments share the same precursor level metrics

# Data with progressively higher cumulative sums
duplicate_metrics = run_quality_metrics(
base_df_10,
c(rep(0.1, 5), seq(2.0, 4.0, length.out = 5)), # mean_increase
c(rep(0.1, 5), seq(2.0, 4.0, length.out = 5)), # mean_decrease
c(rep(0.1, 5), seq(2.0, 4.0, length.out = 5)) # dispersion_increase
)

# The last 5 rows (with high values) should have lower mean anomaly scores
# Since they are all clumped between 2 and 4, whereas 0.1 is by itself
expect_true(mean(duplicate_metrics$AnomalyScores[6:10]) < mean(duplicate_metrics$AnomalyScores[1:5]),
info = "Rows 6-10 (values clumped 2-4) should have lower
anomaly scores than rows 1-5 (isolated value of 0.1)")
Loading