Refactor run_evaluations_multi_gpu.sh to remove --tau-short requirement and add support for next-args and horizon-args files
This commit is contained in:
@@ -4,12 +4,12 @@ set -euo pipefail
|
|||||||
usage() {
|
usage() {
|
||||||
cat <<'USAGE'
|
cat <<'USAGE'
|
||||||
Usage:
|
Usage:
|
||||||
./run_evaluations_multi_gpu.sh --gpus 0,1,2 --tau-short 0.5 [options] [-- <extra eval args>]
|
./run_evaluations_multi_gpu.sh --gpus 0,1,2 [options] [-- <common eval args>]
|
||||||
|
|
||||||
Description:
|
Description:
|
||||||
Discovers trained run directories (containing best_model.pt + train_config.json)
|
Discovers trained run directories (containing best_model.pt + train_config.json)
|
||||||
and runs BOTH evaluations on each run:
|
and runs BOTH evaluations on each run:
|
||||||
1) evaluate_next_event.py (requires --tau_short)
|
1) evaluate_next_event.py
|
||||||
2) evaluate_horizon.py
|
2) evaluate_horizon.py
|
||||||
|
|
||||||
Jobs are distributed round-robin across the provided GPU list and each GPU runs
|
Jobs are distributed round-robin across the provided GPU list and each GPU runs
|
||||||
@@ -17,24 +17,29 @@ Description:
|
|||||||
|
|
||||||
Options:
|
Options:
|
||||||
--gpus Comma-separated GPU ids (required), e.g. 0,1,2
|
--gpus Comma-separated GPU ids (required), e.g. 0,1,2
|
||||||
--tau-short Short-window horizon (years) for evaluate_next_event.py (required)
|
|
||||||
--runs-root Root directory containing run subfolders (default: runs)
|
--runs-root Root directory containing run subfolders (default: runs)
|
||||||
--pattern Shell glob to filter run folder basenames (default: *)
|
--pattern Shell glob to filter run folder basenames (default: *)
|
||||||
--run-dirs-file Text file with one run_dir per line (overrides --runs-root)
|
--run-dirs-file Text file with one run_dir per line (overrides --runs-root)
|
||||||
--horizons Horizon grid in years (space-separated list). If omitted, uses script defaults.
|
--horizons Horizon grid in years (space-separated list). If omitted, uses script defaults.
|
||||||
--age-bins Age bin boundaries in years (space-separated list). If omitted, uses script defaults.
|
--age-bins Age bin boundaries in years (space-separated list). If omitted, uses script defaults.
|
||||||
|
--next-args-file File with one CLI argument per line appended only to evaluate_next_event.py
|
||||||
|
--horizon-args-file File with one CLI argument per line appended only to evaluate_horizon.py
|
||||||
--python Python executable/command (default: python)
|
--python Python executable/command (default: python)
|
||||||
--log-dir Directory for logs (default: eval_logs)
|
--log-dir Directory for logs (default: eval_logs)
|
||||||
--dry-run Print commands without executing
|
--dry-run Print commands without executing
|
||||||
--help|-h Show this help
|
--help|-h Show this help
|
||||||
|
|
||||||
Extra eval args:
|
Common eval args:
|
||||||
Anything after `--` is appended to BOTH evaluation commands.
|
Anything after `--` is appended to BOTH evaluation commands.
|
||||||
Examples: -- --batch_size 512 --num_workers 4 --seed 0 --min_pos 20 --no_tqdm
|
Use this only for flags supported by BOTH scripts (e.g. --batch_size, --num_workers, --seed, --min_pos, --no_tqdm).
|
||||||
|
|
||||||
|
Per-eval args:
|
||||||
|
For eval-specific flags (e.g. evaluate_horizon.py --topk_list / --workload_fracs), use --horizon-args-file.
|
||||||
|
Args files are "one argument per line"; blank lines are ignored.
|
||||||
|
|
||||||
Examples:
|
Examples:
|
||||||
./run_evaluations_multi_gpu.sh --gpus 0,1 --tau-short 0.5
|
./run_evaluations_multi_gpu.sh --gpus 0,1
|
||||||
./run_evaluations_multi_gpu.sh --gpus 0,1 --tau-short 0.5 --runs-root runs --pattern "delphi_*" \
|
./run_evaluations_multi_gpu.sh --gpus 0,1 --runs-root runs --pattern "delphi_*" \
|
||||||
--horizons 0.25 0.5 1 2 5 10 --age-bins 40 45 50 55 60 65 70 75 inf -- --batch_size 512 --num_workers 4
|
--horizons 0.25 0.5 1 2 5 10 --age-bins 40 45 50 55 60 65 70 75 inf -- --batch_size 512 --num_workers 4
|
||||||
|
|
||||||
USAGE
|
USAGE
|
||||||
@@ -47,11 +52,11 @@ gpu_list=""
|
|||||||
python_cmd="python"
|
python_cmd="python"
|
||||||
log_dir="eval_logs"
|
log_dir="eval_logs"
|
||||||
dry_run=0
|
dry_run=0
|
||||||
|
|
||||||
tau_short=""
|
|
||||||
horizons=()
|
horizons=()
|
||||||
age_bins=()
|
age_bins=()
|
||||||
extra_args=()
|
extra_args=()
|
||||||
|
next_args_file=""
|
||||||
|
horizon_args_file=""
|
||||||
|
|
||||||
while [[ $# -gt 0 ]]; do
|
while [[ $# -gt 0 ]]; do
|
||||||
case "$1" in
|
case "$1" in
|
||||||
@@ -59,10 +64,6 @@ while [[ $# -gt 0 ]]; do
|
|||||||
gpu_list="${2-}"
|
gpu_list="${2-}"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
--tau-short)
|
|
||||||
tau_short="${2-}"
|
|
||||||
shift 2
|
|
||||||
;;
|
|
||||||
--runs-root)
|
--runs-root)
|
||||||
runs_root="${2-}"
|
runs_root="${2-}"
|
||||||
shift 2
|
shift 2
|
||||||
@@ -75,6 +76,14 @@ while [[ $# -gt 0 ]]; do
|
|||||||
run_dirs_file="${2-}"
|
run_dirs_file="${2-}"
|
||||||
shift 2
|
shift 2
|
||||||
;;
|
;;
|
||||||
|
--next-args-file)
|
||||||
|
next_args_file="${2-}"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--horizon-args-file)
|
||||||
|
horizon_args_file="${2-}"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
--python)
|
--python)
|
||||||
python_cmd="${2-}"
|
python_cmd="${2-}"
|
||||||
shift 2
|
shift 2
|
||||||
@@ -125,10 +134,21 @@ if [[ -z "$gpu_list" ]]; then
|
|||||||
exit 2
|
exit 2
|
||||||
fi
|
fi
|
||||||
|
|
||||||
if [[ -z "$tau_short" ]]; then
|
read_args_file() {
|
||||||
echo "Error: --tau-short is required (e.g. --tau-short 0.5)." >&2
|
local f="${1-}"
|
||||||
exit 2
|
if [[ -z "$f" ]]; then
|
||||||
fi
|
return 0
|
||||||
|
fi
|
||||||
|
if [[ ! -f "$f" ]]; then
|
||||||
|
echo "Error: args file not found: $f" >&2
|
||||||
|
exit 2
|
||||||
|
fi
|
||||||
|
while IFS= read -r line || [[ -n "$line" ]]; do
|
||||||
|
line="${line%$'\r'}" # handle CRLF
|
||||||
|
[[ -z "$line" ]] && continue
|
||||||
|
printf '%s\n' "$line"
|
||||||
|
done < "$f"
|
||||||
|
}
|
||||||
|
|
||||||
mkdir -p "$log_dir"
|
mkdir -p "$log_dir"
|
||||||
|
|
||||||
@@ -226,13 +246,18 @@ for i in "${!gpus[@]}"; do
|
|||||||
echo "gpu: $gpu"
|
echo "gpu: $gpu"
|
||||||
echo "job_id: $jid"
|
echo "job_id: $jid"
|
||||||
echo "run_dir: $run_dir"
|
echo "run_dir: $run_dir"
|
||||||
echo "tau_short: $tau_short"
|
|
||||||
if [[ ${#horizons[@]} -gt 0 ]]; then
|
if [[ ${#horizons[@]} -gt 0 ]]; then
|
||||||
echo "horizons: ${horizons[*]}"
|
echo "horizons: ${horizons[*]}"
|
||||||
fi
|
fi
|
||||||
if [[ ${#age_bins[@]} -gt 0 ]]; then
|
if [[ ${#age_bins[@]} -gt 0 ]]; then
|
||||||
echo "age_bins: ${age_bins[*]}"
|
echo "age_bins: ${age_bins[*]}"
|
||||||
fi
|
fi
|
||||||
|
if [[ -n "${next_args_file}" ]]; then
|
||||||
|
echo "next_args_file: ${next_args_file}"
|
||||||
|
fi
|
||||||
|
if [[ -n "${horizon_args_file}" ]]; then
|
||||||
|
echo "horizon_args_file: ${horizon_args_file}"
|
||||||
|
fi
|
||||||
if [[ ${#extra_args[@]} -gt 0 ]]; then
|
if [[ ${#extra_args[@]} -gt 0 ]]; then
|
||||||
echo "extra_args: ${extra_args[*]}"
|
echo "extra_args: ${extra_args[*]}"
|
||||||
fi
|
fi
|
||||||
@@ -240,10 +265,15 @@ for i in "${!gpus[@]}"; do
|
|||||||
} > "$log_file"
|
} > "$log_file"
|
||||||
|
|
||||||
# Build argv arrays
|
# Build argv arrays
|
||||||
next_cmd=("$python_cmd" evaluate_next_event.py --run_dir "$run_dir" --tau_short "$tau_short")
|
next_cmd=("$python_cmd" evaluate_next_event.py --run_dir "$run_dir")
|
||||||
if [[ ${#age_bins[@]} -gt 0 ]]; then
|
if [[ ${#age_bins[@]} -gt 0 ]]; then
|
||||||
next_cmd+=(--age_bins "${age_bins[@]}")
|
next_cmd+=(--age_bins "${age_bins[@]}")
|
||||||
fi
|
fi
|
||||||
|
if [[ -n "${next_args_file}" ]]; then
|
||||||
|
while IFS= read -r a; do
|
||||||
|
next_cmd+=("$a")
|
||||||
|
done < <(read_args_file "${next_args_file}")
|
||||||
|
fi
|
||||||
if [[ ${#extra_args[@]} -gt 0 ]]; then
|
if [[ ${#extra_args[@]} -gt 0 ]]; then
|
||||||
next_cmd+=("${extra_args[@]}")
|
next_cmd+=("${extra_args[@]}")
|
||||||
fi
|
fi
|
||||||
@@ -255,6 +285,11 @@ for i in "${!gpus[@]}"; do
|
|||||||
if [[ ${#age_bins[@]} -gt 0 ]]; then
|
if [[ ${#age_bins[@]} -gt 0 ]]; then
|
||||||
hor_cmd+=(--age_bins "${age_bins[@]}")
|
hor_cmd+=(--age_bins "${age_bins[@]}")
|
||||||
fi
|
fi
|
||||||
|
if [[ -n "${horizon_args_file}" ]]; then
|
||||||
|
while IFS= read -r a; do
|
||||||
|
hor_cmd+=("$a")
|
||||||
|
done < <(read_args_file "${horizon_args_file}")
|
||||||
|
fi
|
||||||
if [[ ${#extra_args[@]} -gt 0 ]]; then
|
if [[ ${#extra_args[@]} -gt 0 ]]; then
|
||||||
hor_cmd+=("${extra_args[@]}")
|
hor_cmd+=("${extra_args[@]}")
|
||||||
fi
|
fi
|
||||||
|
|||||||
Reference in New Issue
Block a user