Enhance summary output in extract_sequence_lengths script to include min, max, and percentile values for better data insights
This commit is contained in:
@@ -63,10 +63,14 @@ def main() -> None:
|
|||||||
|
|
||||||
arr = np.asarray(lengths, dtype=np.int64)
|
arr = np.asarray(lengths, dtype=np.int64)
|
||||||
print(f"Wrote: {out_csv}")
|
print(f"Wrote: {out_csv}")
|
||||||
print(
|
|
||||||
"Summary: "
|
percentiles = [5, 10, 25, 50, 75, 90, 95, 99]
|
||||||
f"n={arr.size}, min={arr.min()}, p50={int(np.median(arr))}, mean={arr.mean():.2f}, max={arr.max()}"
|
pct_values = np.percentile(arr, percentiles)
|
||||||
)
|
print("Summary:")
|
||||||
|
print(f" n={arr.size}")
|
||||||
|
print(f" min={arr.min()} max={arr.max()} mean={arr.mean():.2f}")
|
||||||
|
for p, v in zip(percentiles, pct_values):
|
||||||
|
print(f" p{p:02d}={int(v)}")
|
||||||
|
|
||||||
# Plot histogram
|
# Plot histogram
|
||||||
plt.figure(figsize=(8, 5))
|
plt.figure(figsize=(8, 5))
|
||||||
|
|||||||
Reference in New Issue
Block a user