Add function to drop zero-positive rows and update CSV export logic in age-bin evaluation
This commit is contained in:
@@ -177,6 +177,7 @@ def _worker_eval_mcs_on_gpu(
|
||||
|
||||
df_all = pd.concat(frames, ignore_index=True) if len(
|
||||
frames) else pd.DataFrame()
|
||||
df_all = _drop_zero_positives_rows(df_all, "n_positives")
|
||||
df_all.to_csv(out_path, index=False)
|
||||
queue.put({"ok": True, "out_path": out_path})
|
||||
except Exception as e:
|
||||
@@ -211,6 +212,20 @@ def build_criterion_and_out_dims(loss_type: str, n_disease: int, bin_edges, lamb
|
||||
raise ValueError(f"Unsupported loss_type: {loss_type}")
|
||||
|
||||
|
||||
def _drop_zero_positives_rows(df: pd.DataFrame, positive_col: str) -> pd.DataFrame:
|
||||
"""Drop rows where the provided positives column is <= 0.
|
||||
|
||||
Intended to reduce CSV size by omitting (cause, horizon, bin) rows that have
|
||||
no positives, which otherwise yield undefined/NaN metrics.
|
||||
"""
|
||||
if df is None or len(df) == 0:
|
||||
return df
|
||||
if positive_col not in df.columns:
|
||||
return df
|
||||
pos = pd.to_numeric(df[positive_col], errors="coerce")
|
||||
return df[pos > 0].copy()
|
||||
|
||||
|
||||
def build_model(model_type: str, *, dataset: HealthDataset, cfg: dict):
|
||||
if model_type == "delphi_fork":
|
||||
return DelphiFork(
|
||||
@@ -404,8 +419,10 @@ def main() -> None:
|
||||
device=device,
|
||||
)
|
||||
|
||||
df_by_bin.to_csv(out_bin, index=False)
|
||||
df_agg.to_csv(out_agg, index=False)
|
||||
df_by_bin_csv = _drop_zero_positives_rows(df_by_bin, "n_positives")
|
||||
df_agg_csv = _drop_zero_positives_rows(df_agg, "n_positives_total_mean")
|
||||
df_by_bin_csv.to_csv(out_bin, index=False)
|
||||
df_agg_csv.to_csv(out_agg, index=False)
|
||||
print(f"Wrote: {out_bin}")
|
||||
print(f"Wrote: {out_agg}")
|
||||
return
|
||||
@@ -464,8 +481,13 @@ def main() -> None:
|
||||
frames = [pd.read_csv(p) for p in tmp_paths if os.path.exists(p)]
|
||||
df_by_bin = pd.concat(frames, ignore_index=True) if len(
|
||||
frames) else pd.DataFrame()
|
||||
|
||||
# Ensure we don't keep zero-positive rows even if a temp file was produced
|
||||
# by an older version of the worker.
|
||||
df_by_bin = _drop_zero_positives_rows(df_by_bin, "n_positives")
|
||||
df_agg = aggregate_age_bin_results(df_by_bin)
|
||||
|
||||
df_agg = _drop_zero_positives_rows(df_agg, "n_positives_total_mean")
|
||||
df_by_bin.to_csv(out_bin, index=False)
|
||||
df_agg.to_csv(out_agg, index=False)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user