From 19c833c3ff998594eb2f46a7de1747bf1d602182 Mon Sep 17 00:00:00 2001 From: Wirawan Purwanto Date: Mon, 29 Jun 2020 13:30:04 -0400 Subject: [PATCH] * sinfo-report-node-stats.sh: Simple tool to report status of compute nodes based on SLURM's "sinfo" output. --- slurm/sinfo-report-node-stats.sh | 68 ++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) create mode 100755 slurm/sinfo-report-node-stats.sh diff --git a/slurm/sinfo-report-node-stats.sh b/slurm/sinfo-report-node-stats.sh new file mode 100755 index 0000000..803d999 --- /dev/null +++ b/slurm/sinfo-report-node-stats.sh @@ -0,0 +1,68 @@ +#!/bin/bash +# +# Query the status of active nodes from sinfo + +DOC="Get information about active nodes from SLURM perspective. + +Environment variables that affect this script: + +* OUTDIR +* TIMESTAMP +* SINFO_NODES + +" + +: ${OUTDIR:=.} + +# If the output of "sinfo -N" is not specified, we will fetch +# the output from SLURM and include additional information. +if [ -z "${SINFO_NODES}" ]; then + if [ -n "$TIMESTAMP" ]; then + case "$TIMESTAMP" in + (-|"(none)"|none) + TIMESTAMP= # BLANK + ;; + esac + else + TIMESTAMP=$(date +"_%Y-%m-%dT%H.%M.%S") + fi + + # FIXME: Yeah I know this can run into race condition, oh well. + # sinfo -N should be considered the authoritative output. + + sinfo > "${OUTDIR}/sinfo${TIMESTAMP}.txt" + sinfo -s > "${OUTDIR}/sinfo-s${TIMESTAMP}.txt" + sinfo -N > "${OUTDIR}/sinfo-N${TIMESTAMP}.txt" + SINFO_NODES="${OUTDIR}/sinfo-N${TIMESTAMP}.txt" +else + echo "Reusing node info from ${SINFO_NODES}" + echo "Assigned TIMESTAMP=${TIMESTAMP:-(none)}" +fi + +# `sinfo -N` will give list of nodes and partition it belongs +# (one line per node:partition combination) + +# Get the list of node names, exclude fail and down state, sort it to a unique list +tail -n +2 "${SINFO_NODES}" \ + | awk '$4 !~ /fail|down/ {print $1}' \ + | sort \ + | uniq > "${OUTDIR}/nodes-active${TIMESTAMP}.txt" + +# Get the list of node names, strip the host number (-NNN), +# sort it to a unique list and give the count +tail -n +2 "${SINFO_NODES}" \ + | awk '$4 !~ /fail|down/ {print $1}' \ + | sort \ + | uniq \ + | sed -e 's/-[0-9][0-9]*$//' \ + | uniq -c > "${OUTDIR}/nodes-active-types${TIMESTAMP}.txt" + +#tail -n +2 "${OUTDIR}/sinfo-N_${TIMESTAMP}.txt" | sed -e 's/-[0-9][0-9]*$//' | sort | uniq -c > "${OUTDIR}/sinfo-active-nodes-types_${TIMESTAMP}.txt" + +tail -n +2 "${SINFO_NODES}" \ + | awk ' {print $1}' \ + | sort \ + | uniq \ + | sed -e 's/-[0-9][0-9]*$//' \ + | uniq -c > "${OUTDIR}/nodes-all-types${TIMESTAMP}.txt" +