Add function to drop zero-positive rows and update CSV export logic in age-bin evaluation

This commit is contained in:
2026-01-16 17:51:00 +08:00
parent 4068310a12
commit a637beb220
2 changed files with 521 additions and 83 deletions

View File

@@ -177,6 +177,7 @@ def _worker_eval_mcs_on_gpu(
df_all = pd.concat(frames, ignore_index=True) if len(
frames) else pd.DataFrame()
df_all = _drop_zero_positives_rows(df_all, "n_positives")
df_all.to_csv(out_path, index=False)
queue.put({"ok": True, "out_path": out_path})
except Exception as e:
@@ -211,6 +212,20 @@ def build_criterion_and_out_dims(loss_type: str, n_disease: int, bin_edges, lamb
raise ValueError(f"Unsupported loss_type: {loss_type}")
def _drop_zero_positives_rows(df: pd.DataFrame, positive_col: str) -> pd.DataFrame:
"""Drop rows where the provided positives column is <= 0.
Intended to reduce CSV size by omitting (cause, horizon, bin) rows that have
no positives, which otherwise yield undefined/NaN metrics.
"""
if df is None or len(df) == 0:
return df
if positive_col not in df.columns:
return df
pos = pd.to_numeric(df[positive_col], errors="coerce")
return df[pos > 0].copy()
def build_model(model_type: str, *, dataset: HealthDataset, cfg: dict):
if model_type == "delphi_fork":
return DelphiFork(
@@ -404,8 +419,10 @@ def main() -> None:
device=device,
)
df_by_bin.to_csv(out_bin, index=False)
df_agg.to_csv(out_agg, index=False)
df_by_bin_csv = _drop_zero_positives_rows(df_by_bin, "n_positives")
df_agg_csv = _drop_zero_positives_rows(df_agg, "n_positives_total_mean")
df_by_bin_csv.to_csv(out_bin, index=False)
df_agg_csv.to_csv(out_agg, index=False)
print(f"Wrote: {out_bin}")
print(f"Wrote: {out_agg}")
return
@@ -464,8 +481,13 @@ def main() -> None:
frames = [pd.read_csv(p) for p in tmp_paths if os.path.exists(p)]
df_by_bin = pd.concat(frames, ignore_index=True) if len(
frames) else pd.DataFrame()
# Ensure we don't keep zero-positive rows even if a temp file was produced
# by an older version of the worker.
df_by_bin = _drop_zero_positives_rows(df_by_bin, "n_positives")
df_agg = aggregate_age_bin_results(df_by_bin)
df_agg = _drop_zero_positives_rows(df_agg, "n_positives_total_mean")
df_by_bin.to_csv(out_bin, index=False)
df_agg.to_csv(out_agg, index=False)