R: order ICD-10 chapters by chapter number (Roman numeral prefix) in boxplots; handle 'Death' and unknowns at end
This commit is contained in:
@@ -15,7 +15,7 @@ args <- commandArgs(trailingOnly = TRUE)
|
||||
one_year_csv <- if (length(args) >= 1) args[1] else "model_comparison_auc_1year.csv"
|
||||
no_gap_csv <- if (length(args) >= 2) args[2] else "model_comparison_auc_no_gap.csv"
|
||||
out_dir <- if (length(args) >= 3) args[3] else "."
|
||||
orientation <- if (length(args) >= 4) tolower(args[4]) else "horizontal" # "horizontal" (flipped) or "vertical"
|
||||
orientation <- if (length(args) >= 4) tolower(args[4]) else "vertical" # "horizontal" (flipped) or "vertical"
|
||||
|
||||
if (!dir.exists(out_dir)) {
|
||||
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
|
||||
@@ -38,6 +38,29 @@ get_chapter_col <- function(df) {
|
||||
return(NA_character_)
|
||||
}
|
||||
|
||||
# Compute a deterministic chapter ordering using the ICD-10 chapter numeral prefix
|
||||
# e.g., "I. Infectious Diseases", "II. Neoplasms", ..., "XVII. ...", with a fallback for "Death" and unknowns
|
||||
compute_chapter_levels <- function(chapters) {
|
||||
ch <- as.character(chapters)
|
||||
roman_levels <- c(
|
||||
"I","II","III","IV","V","VI","VII","VIII","IX","X",
|
||||
"XI","XII","XIII","XIV","XV","XVI","XVII","XVIII","XIX","XX"
|
||||
)
|
||||
roman_map <- setNames(seq_along(roman_levels), roman_levels)
|
||||
# Extract leading Roman numeral before a dot, like "XVI." -> "XVI"
|
||||
roman <- toupper(gsub("^\\s*([IVXLCDM]+)\\..*$", "\\1", ch))
|
||||
idx <- rep(NA_integer_, length(ch))
|
||||
hit <- roman %in% names(roman_map)
|
||||
idx[hit] <- roman_map[roman[hit]]
|
||||
# Special-case Death at the end
|
||||
idx[grepl("^\\s*Death\\b", ch, ignore.case = TRUE)] <- 99L
|
||||
# Unknowns to the very end
|
||||
idx[is.na(idx)] <- 100L
|
||||
# Order chapters by idx, stable within same idx by appearance
|
||||
o <- order(idx, match(ch, unique(ch)))
|
||||
unique(ch[o])
|
||||
}
|
||||
|
||||
# Build long-format data.frame with columns: chapter, model, auc
|
||||
# It will include any of the known model columns that exist in the input df
|
||||
build_long_df <- function(df) {
|
||||
@@ -85,14 +108,8 @@ build_long_df <- function(df) {
|
||||
|
||||
# Make the boxplot grouped by chapter
|
||||
make_boxplot <- function(long_df, title_text, flip = TRUE) {
|
||||
# Order chapters by median AUC of Delphi if available, otherwise overall median
|
||||
has_delphi <- any(long_df$model == "Delphi")
|
||||
if (has_delphi) {
|
||||
med <- aggregate(auc ~ chapter, data = subset(long_df, model == "Delphi"), median, na.rm = TRUE)
|
||||
} else {
|
||||
med <- aggregate(auc ~ chapter, data = long_df, median, na.rm = TRUE)
|
||||
}
|
||||
chap_levels <- med[order(med$auc, decreasing = TRUE), "chapter"]
|
||||
# Order chapters by their ICD-10 chapter number prefix (Roman numerals)
|
||||
chap_levels <- compute_chapter_levels(long_df$chapter)
|
||||
long_df$chapter <- factor(long_df$chapter, levels = chap_levels)
|
||||
|
||||
p <- ggplot(long_df, aes(x = chapter, y = auc, fill = model)) +
|
||||
|
||||
Reference in New Issue
Block a user