R: order ICD-10 chapters by chapter number (Roman numeral prefix) in boxplots; handle 'Death' and unknowns at end
This commit is contained in:
@@ -15,7 +15,7 @@ args <- commandArgs(trailingOnly = TRUE)
|
|||||||
one_year_csv <- if (length(args) >= 1) args[1] else "model_comparison_auc_1year.csv"
|
one_year_csv <- if (length(args) >= 1) args[1] else "model_comparison_auc_1year.csv"
|
||||||
no_gap_csv <- if (length(args) >= 2) args[2] else "model_comparison_auc_no_gap.csv"
|
no_gap_csv <- if (length(args) >= 2) args[2] else "model_comparison_auc_no_gap.csv"
|
||||||
out_dir <- if (length(args) >= 3) args[3] else "."
|
out_dir <- if (length(args) >= 3) args[3] else "."
|
||||||
orientation <- if (length(args) >= 4) tolower(args[4]) else "horizontal" # "horizontal" (flipped) or "vertical"
|
orientation <- if (length(args) >= 4) tolower(args[4]) else "vertical" # "horizontal" (flipped) or "vertical"
|
||||||
|
|
||||||
if (!dir.exists(out_dir)) {
|
if (!dir.exists(out_dir)) {
|
||||||
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
|
dir.create(out_dir, recursive = TRUE, showWarnings = FALSE)
|
||||||
@@ -38,6 +38,29 @@ get_chapter_col <- function(df) {
|
|||||||
return(NA_character_)
|
return(NA_character_)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Compute a deterministic chapter ordering using the ICD-10 chapter numeral prefix
|
||||||
|
# e.g., "I. Infectious Diseases", "II. Neoplasms", ..., "XVII. ...", with a fallback for "Death" and unknowns
|
||||||
|
compute_chapter_levels <- function(chapters) {
|
||||||
|
ch <- as.character(chapters)
|
||||||
|
roman_levels <- c(
|
||||||
|
"I","II","III","IV","V","VI","VII","VIII","IX","X",
|
||||||
|
"XI","XII","XIII","XIV","XV","XVI","XVII","XVIII","XIX","XX"
|
||||||
|
)
|
||||||
|
roman_map <- setNames(seq_along(roman_levels), roman_levels)
|
||||||
|
# Extract leading Roman numeral before a dot, like "XVI." -> "XVI"
|
||||||
|
roman <- toupper(gsub("^\\s*([IVXLCDM]+)\\..*$", "\\1", ch))
|
||||||
|
idx <- rep(NA_integer_, length(ch))
|
||||||
|
hit <- roman %in% names(roman_map)
|
||||||
|
idx[hit] <- roman_map[roman[hit]]
|
||||||
|
# Special-case Death at the end
|
||||||
|
idx[grepl("^\\s*Death\\b", ch, ignore.case = TRUE)] <- 99L
|
||||||
|
# Unknowns to the very end
|
||||||
|
idx[is.na(idx)] <- 100L
|
||||||
|
# Order chapters by idx, stable within same idx by appearance
|
||||||
|
o <- order(idx, match(ch, unique(ch)))
|
||||||
|
unique(ch[o])
|
||||||
|
}
|
||||||
|
|
||||||
# Build long-format data.frame with columns: chapter, model, auc
|
# Build long-format data.frame with columns: chapter, model, auc
|
||||||
# It will include any of the known model columns that exist in the input df
|
# It will include any of the known model columns that exist in the input df
|
||||||
build_long_df <- function(df) {
|
build_long_df <- function(df) {
|
||||||
@@ -85,14 +108,8 @@ build_long_df <- function(df) {
|
|||||||
|
|
||||||
# Make the boxplot grouped by chapter
|
# Make the boxplot grouped by chapter
|
||||||
make_boxplot <- function(long_df, title_text, flip = TRUE) {
|
make_boxplot <- function(long_df, title_text, flip = TRUE) {
|
||||||
# Order chapters by median AUC of Delphi if available, otherwise overall median
|
# Order chapters by their ICD-10 chapter number prefix (Roman numerals)
|
||||||
has_delphi <- any(long_df$model == "Delphi")
|
chap_levels <- compute_chapter_levels(long_df$chapter)
|
||||||
if (has_delphi) {
|
|
||||||
med <- aggregate(auc ~ chapter, data = subset(long_df, model == "Delphi"), median, na.rm = TRUE)
|
|
||||||
} else {
|
|
||||||
med <- aggregate(auc ~ chapter, data = long_df, median, na.rm = TRUE)
|
|
||||||
}
|
|
||||||
chap_levels <- med[order(med$auc, decreasing = TRUE), "chapter"]
|
|
||||||
long_df$chapter <- factor(long_df$chapter, levels = chap_levels)
|
long_df$chapter <- factor(long_df$chapter, levels = chap_levels)
|
||||||
|
|
||||||
p <- ggplot(long_df, aes(x = chapter, y = auc, fill = model)) +
|
p <- ggplot(long_df, aes(x = chapter, y = auc, fill = model)) +
|
||||||
|
|||||||
Reference in New Issue
Block a user