diff --git a/extract_sequence_lengths.py b/extract_sequence_lengths.py index 6d0ae48..8165656 100644 --- a/extract_sequence_lengths.py +++ b/extract_sequence_lengths.py @@ -63,10 +63,14 @@ def main() -> None: arr = np.asarray(lengths, dtype=np.int64) print(f"Wrote: {out_csv}") - print( - "Summary: " - f"n={arr.size}, min={arr.min()}, p50={int(np.median(arr))}, mean={arr.mean():.2f}, max={arr.max()}" - ) + + percentiles = [5, 10, 25, 50, 75, 90, 95, 99] + pct_values = np.percentile(arr, percentiles) + print("Summary:") + print(f" n={arr.size}") + print(f" min={arr.min()} max={arr.max()} mean={arr.mean():.2f}") + for p, v in zip(percentiles, pct_values): + print(f" p{p:02d}={int(v)}") # Plot histogram plt.figure(figsize=(8, 5))