You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
42 lines
962 B
42 lines
962 B
2 years ago
|
#!/bin/bash
|
||
|
#
|
||
|
# check-gpu-utilization.sh
|
||
|
# Given a SLURM cluster, enumerate all the GPU nodes except those that are down
|
||
|
#
|
||
|
# Created: 2023-04-06
|
||
|
|
||
|
set -eu
|
||
|
|
||
|
# Must be a valid regex
|
||
|
GPU_PARTITIONS='^gpu$'
|
||
|
|
||
|
# list GPU nodes being utilized (partially/fully)
|
||
|
LIST_GPU_NODES=( $(sinfo -N | awk '($3 ~ /'"$GPU_PARTITIONS"'/) && ($4 ~ /^(mix|alloc)$/) { print $1 }') )
|
||
|
|
||
|
echo "$0"
|
||
|
date
|
||
|
|
||
|
# list all the jobs:
|
||
|
|
||
|
echo "=== LISTING OF ALL GPU JOBS ==="
|
||
|
LIST_GPU_JOBS=$(squeue | awk '$2 ~ /'"$GPU_PARTITIONS"'/ { print }')
|
||
|
echo "$LIST_GPU_JOBS"
|
||
|
echo
|
||
|
|
||
|
echo "=== LISTING OF GPU UTILIZATIONS PER NODE ==="
|
||
|
for Node in "${LIST_GPU_NODES[@]}"; do
|
||
|
echo " :: node: $Node"
|
||
|
ssh "$Node" nvidia-smi
|
||
|
echo
|
||
|
done
|
||
|
echo
|
||
|
|
||
|
echo "=== LISTING OF GPU JOB SPECIFICATIONS ==="
|
||
|
for Job in $(echo "${LIST_GPU_JOBS}" | awk '{ if ($1 != "JOBID") { print($1) } }'); do
|
||
|
#echo " :: Job: $Node"
|
||
|
scontrol show job "$Job"
|
||
|
#echo
|
||
|
done
|
||
|
|
||
|
#squeue | awk '$2 ~ /'"$GPU_PARTITIONS"'/ { print }'
|