diff --git a/run_evaluations_multi_gpu.sh b/run_evaluations_multi_gpu.sh index f9136e0..9bb498a 100644 --- a/run_evaluations_multi_gpu.sh +++ b/run_evaluations_multi_gpu.sh @@ -4,12 +4,12 @@ set -euo pipefail usage() { cat <<'USAGE' Usage: - ./run_evaluations_multi_gpu.sh --gpus 0,1,2 --tau-short 0.5 [options] [-- ] + ./run_evaluations_multi_gpu.sh --gpus 0,1,2 [options] [-- ] Description: Discovers trained run directories (containing best_model.pt + train_config.json) and runs BOTH evaluations on each run: - 1) evaluate_next_event.py (requires --tau_short) + 1) evaluate_next_event.py 2) evaluate_horizon.py Jobs are distributed round-robin across the provided GPU list and each GPU runs @@ -17,24 +17,29 @@ Description: Options: --gpus Comma-separated GPU ids (required), e.g. 0,1,2 - --tau-short Short-window horizon (years) for evaluate_next_event.py (required) --runs-root Root directory containing run subfolders (default: runs) --pattern Shell glob to filter run folder basenames (default: *) --run-dirs-file Text file with one run_dir per line (overrides --runs-root) --horizons Horizon grid in years (space-separated list). If omitted, uses script defaults. --age-bins Age bin boundaries in years (space-separated list). If omitted, uses script defaults. + --next-args-file File with one CLI argument per line appended only to evaluate_next_event.py + --horizon-args-file File with one CLI argument per line appended only to evaluate_horizon.py --python Python executable/command (default: python) --log-dir Directory for logs (default: eval_logs) --dry-run Print commands without executing --help|-h Show this help -Extra eval args: +Common eval args: Anything after `--` is appended to BOTH evaluation commands. - Examples: -- --batch_size 512 --num_workers 4 --seed 0 --min_pos 20 --no_tqdm + Use this only for flags supported by BOTH scripts (e.g. --batch_size, --num_workers, --seed, --min_pos, --no_tqdm). + +Per-eval args: + For eval-specific flags (e.g. evaluate_horizon.py --topk_list / --workload_fracs), use --horizon-args-file. + Args files are "one argument per line"; blank lines are ignored. Examples: - ./run_evaluations_multi_gpu.sh --gpus 0,1 --tau-short 0.5 - ./run_evaluations_multi_gpu.sh --gpus 0,1 --tau-short 0.5 --runs-root runs --pattern "delphi_*" \ + ./run_evaluations_multi_gpu.sh --gpus 0,1 + ./run_evaluations_multi_gpu.sh --gpus 0,1 --runs-root runs --pattern "delphi_*" \ --horizons 0.25 0.5 1 2 5 10 --age-bins 40 45 50 55 60 65 70 75 inf -- --batch_size 512 --num_workers 4 USAGE @@ -47,11 +52,11 @@ gpu_list="" python_cmd="python" log_dir="eval_logs" dry_run=0 - -tau_short="" horizons=() age_bins=() extra_args=() +next_args_file="" +horizon_args_file="" while [[ $# -gt 0 ]]; do case "$1" in @@ -59,10 +64,6 @@ while [[ $# -gt 0 ]]; do gpu_list="${2-}" shift 2 ;; - --tau-short) - tau_short="${2-}" - shift 2 - ;; --runs-root) runs_root="${2-}" shift 2 @@ -75,6 +76,14 @@ while [[ $# -gt 0 ]]; do run_dirs_file="${2-}" shift 2 ;; + --next-args-file) + next_args_file="${2-}" + shift 2 + ;; + --horizon-args-file) + horizon_args_file="${2-}" + shift 2 + ;; --python) python_cmd="${2-}" shift 2 @@ -125,10 +134,21 @@ if [[ -z "$gpu_list" ]]; then exit 2 fi -if [[ -z "$tau_short" ]]; then - echo "Error: --tau-short is required (e.g. --tau-short 0.5)." >&2 - exit 2 -fi +read_args_file() { + local f="${1-}" + if [[ -z "$f" ]]; then + return 0 + fi + if [[ ! -f "$f" ]]; then + echo "Error: args file not found: $f" >&2 + exit 2 + fi + while IFS= read -r line || [[ -n "$line" ]]; do + line="${line%$'\r'}" # handle CRLF + [[ -z "$line" ]] && continue + printf '%s\n' "$line" + done < "$f" +} mkdir -p "$log_dir" @@ -226,13 +246,18 @@ for i in "${!gpus[@]}"; do echo "gpu: $gpu" echo "job_id: $jid" echo "run_dir: $run_dir" - echo "tau_short: $tau_short" if [[ ${#horizons[@]} -gt 0 ]]; then echo "horizons: ${horizons[*]}" fi if [[ ${#age_bins[@]} -gt 0 ]]; then echo "age_bins: ${age_bins[*]}" fi + if [[ -n "${next_args_file}" ]]; then + echo "next_args_file: ${next_args_file}" + fi + if [[ -n "${horizon_args_file}" ]]; then + echo "horizon_args_file: ${horizon_args_file}" + fi if [[ ${#extra_args[@]} -gt 0 ]]; then echo "extra_args: ${extra_args[*]}" fi @@ -240,10 +265,15 @@ for i in "${!gpus[@]}"; do } > "$log_file" # Build argv arrays - next_cmd=("$python_cmd" evaluate_next_event.py --run_dir "$run_dir" --tau_short "$tau_short") + next_cmd=("$python_cmd" evaluate_next_event.py --run_dir "$run_dir") if [[ ${#age_bins[@]} -gt 0 ]]; then next_cmd+=(--age_bins "${age_bins[@]}") fi + if [[ -n "${next_args_file}" ]]; then + while IFS= read -r a; do + next_cmd+=("$a") + done < <(read_args_file "${next_args_file}") + fi if [[ ${#extra_args[@]} -gt 0 ]]; then next_cmd+=("${extra_args[@]}") fi @@ -255,6 +285,11 @@ for i in "${!gpus[@]}"; do if [[ ${#age_bins[@]} -gt 0 ]]; then hor_cmd+=(--age_bins "${age_bins[@]}") fi + if [[ -n "${horizon_args_file}" ]]; then + while IFS= read -r a; do + hor_cmd+=("$a") + done < <(read_args_file "${horizon_args_file}") + fi if [[ ${#extra_args[@]} -gt 0 ]]; then hor_cmd+=("${extra_args[@]}") fi