diff --git a/plot_auc_boxplots_by_chapter.R b/plot_auc_boxplots_by_chapter.R index 47e85ee..8d3a0e4 100644 --- a/plot_auc_boxplots_by_chapter.R +++ b/plot_auc_boxplots_by_chapter.R @@ -15,7 +15,7 @@ args <- commandArgs(trailingOnly = TRUE) one_year_csv <- if (length(args) >= 1) args[1] else "model_comparison_auc_1year.csv" no_gap_csv <- if (length(args) >= 2) args[2] else "model_comparison_auc_no_gap.csv" out_dir <- if (length(args) >= 3) args[3] else "." -orientation <- if (length(args) >= 4) tolower(args[4]) else "horizontal" # "horizontal" (flipped) or "vertical" +orientation <- if (length(args) >= 4) tolower(args[4]) else "vertical" # "horizontal" (flipped) or "vertical" if (!dir.exists(out_dir)) { dir.create(out_dir, recursive = TRUE, showWarnings = FALSE) @@ -38,6 +38,29 @@ get_chapter_col <- function(df) { return(NA_character_) } +# Compute a deterministic chapter ordering using the ICD-10 chapter numeral prefix +# e.g., "I. Infectious Diseases", "II. Neoplasms", ..., "XVII. ...", with a fallback for "Death" and unknowns +compute_chapter_levels <- function(chapters) { + ch <- as.character(chapters) + roman_levels <- c( + "I","II","III","IV","V","VI","VII","VIII","IX","X", + "XI","XII","XIII","XIV","XV","XVI","XVII","XVIII","XIX","XX" + ) + roman_map <- setNames(seq_along(roman_levels), roman_levels) + # Extract leading Roman numeral before a dot, like "XVI." -> "XVI" + roman <- toupper(gsub("^\\s*([IVXLCDM]+)\\..*$", "\\1", ch)) + idx <- rep(NA_integer_, length(ch)) + hit <- roman %in% names(roman_map) + idx[hit] <- roman_map[roman[hit]] + # Special-case Death at the end + idx[grepl("^\\s*Death\\b", ch, ignore.case = TRUE)] <- 99L + # Unknowns to the very end + idx[is.na(idx)] <- 100L + # Order chapters by idx, stable within same idx by appearance + o <- order(idx, match(ch, unique(ch))) + unique(ch[o]) +} + # Build long-format data.frame with columns: chapter, model, auc # It will include any of the known model columns that exist in the input df build_long_df <- function(df) { @@ -85,14 +108,8 @@ build_long_df <- function(df) { # Make the boxplot grouped by chapter make_boxplot <- function(long_df, title_text, flip = TRUE) { - # Order chapters by median AUC of Delphi if available, otherwise overall median - has_delphi <- any(long_df$model == "Delphi") - if (has_delphi) { - med <- aggregate(auc ~ chapter, data = subset(long_df, model == "Delphi"), median, na.rm = TRUE) - } else { - med <- aggregate(auc ~ chapter, data = long_df, median, na.rm = TRUE) - } - chap_levels <- med[order(med$auc, decreasing = TRUE), "chapter"] + # Order chapters by their ICD-10 chapter number prefix (Roman numerals) + chap_levels <- compute_chapter_levels(long_df$chapter) long_df$chapter <- factor(long_df$chapter, levels = chap_levels) p <- ggplot(long_df, aes(x = chapter, y = auc, fill = model)) +