Custom HPC software & tools from Wirawan. Primarily tailored toward ODU HPC sytems.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

215 lines
5.8 KiB

#!/usr/bin/env python
#
# Created: 20160829
# Wirawan Purwanto
"""
show-usage-current.py
---------------------
Shows the instantaneous usage of the cluster per user at current time.
Based on qstat -f output.
Usage:
1) Using the current qstat -f data (saving that data to the
`qstat-f-<date>-<time>.txt` file:
python show-cluster-usage.py
2) Using a saved qstat -f output:
python show-cluster-usage.py <saved-qstat-f.txt>
"""
import os
import re
import subprocess
import sys
class ParseError(RuntimeError):
pass
class ProgramError(RuntimeError):
pass
MYSELF = 'show-cluster-usage.py'
def analyze_cluster_usage_by_users(qstat_f):
"""Provides a summary analysis of cluster usage by users.
Input: `qstat_f` is a list (or iterable) of text lines yielded by
`qstat -f` command.
Output: total aggregate usage per user, given as dict with usernames
as the keys.
"""
usage = {}
for L in qstat_f:
if re.match(r'^ [0-9]+ ', L):
F = L.split()
# For running jobs there are possibly more than 8 fields,
# but we care only for these 8
(jobid, priority, jobname, user, status, Date, Time, numcores) = F[:8]
if status == "r":
try:
taskid = F[8]
xjobid = jobid + ":" + taskid
except IndexError:
xjobid = jobid
try:
urec = usage[user]
except KeyError:
urec = {
'user': user,
'jobids': set(),
'xjobids': set(),
'cores': 0,
}
usage[user] = urec
urec['jobids'].add(jobid)
urec['xjobids'].add(xjobid)
urec['cores'] += int(numcores)
return usage
def print_cluster_usage_by_users(usage):
"""Prints the instantaneous usage-per-user breakdown of the cluster.
Input: `usage` is the aggregated instantaneous cluster usage as reported
by the analyze_cluster_usage_by_users() function.
"""
cur_users = usage.keys()
# Sort based on total core usage, descending manner
cmp_usage = lambda u1, u2: -cmp(usage[u1]['cores'], usage[u2]['cores'])
cur_users_sorted = sorted(cur_users, cmp=cmp_usage)
fmt = "%-12s %8d %8d %8d"
print(str_fmt_heading(fmt) % ("user", "numcores", "numjobs", "numtasks"))
for u in cur_users_sorted:
urec = usage[u]
print(fmt % (urec['user'],
urec['cores'],
len(urec['jobids']),
len(urec['xjobids'])))
def help():
msg = """\
%(CMD)s - Shows cluster usage from SGE information
The information is mainly drawn from `qstat -f` output,
and analyzes the usage of the cluster in various ways..
Usage:
%(CMD)s
%(CMD)s [qstat_file] [--save]
Shows the cluster usage aggregated per user.
""" \
% dict(CMD=MYSELF)
print(msg)
def main_default(argv):
"""Main default function:
- By default we invoke qstat -f and prints the analysis.
- If argv[1] is given, then we read in the file and
use that for the analysis.
"""
from time import localtime, strftime
from getopt import getopt, GetoptError
dtime = localtime()
dtimestr = strftime("%Y%m%d-%H%M", dtime)
# Skip program name and first command:
cmdargs_in = argv[1:]
try:
cmdopts, cmdargs = getopt(cmdargs_in,
"hs",
["save",
"help"])
except GetoptError as err:
sys.stderr.writelines([str(err), "\n"])
return 2
# Process flag arguments
show_disabled_nodes = False
save_qstat = False
for o,a in cmdopts:
if o in ('-h', '--help'):
help()
return 0
elif o in ('-s', '--save'):
save_qstat = True
else:
raise ProgramError, "Unhandled option in main program: %s %s" % (o,a)
if len(cmdargs) > 1:
qstat_f_current = open(cmdargs[1], "r").read().splitlines()
else:
qstat_f_current = pipe_out(('qstat', '-f'), split=True)
if save_qstat:
with open("qstat-f-%s.txt" % dtimestr, "w") as F:
F.write("\n".join(qstat_f_current))
F.write("\n")
summary = analyze_cluster_usage_by_users(qstat_f_current)
print_cluster_usage_by_users(summary)
return 0
# ---------------------------------------------------------------------------
# Support tools below
# ---------------------------------------------------------------------------
def pipe_out(args, split=False, shell=False):
"""Executes a shell command, piping out the stdout to python for parsing.
This is my customary shortcut for backtick operator.
The result is either a single string (if split==False) or a list of strings
with EOLs removed (if split==True)."""
retval = subprocess.Popen(args, stdout=subprocess.PIPE, shell=shell).communicate()[0]
if not split:
return retval
else:
return retval.splitlines()
# Internal variable: don't mess!
_str_fmt_heading_rx = None
def str_fmt_heading(fmt):
"""Replaces a printf-style formatting with one suitable for table heading:
all non-string conversions are replaced with string conversions,
preserving the minimum widths."""
# Originally from: $PWQMC77/scripts/cost.py and later Cr2_analysis_cbs.py .
#
#_str_fmt_heading_rx = None # only for development purposes
import re
global _str_fmt_heading_rx
if _str_fmt_heading_rx is None:
# Because of complicated regex, I verbosely write it out here:
_str_fmt_heading_rx = re.compile(r"""
(
% # % sign
(?:\([^)]+\))? # optional '(keyname)' mapping key
[-+#0 hlL]* # optional conversion flag
[0-9*]* # optional minimum field width
)
((?:\.[0-9]*)?) # optional precision
[^-+#*0 hlL0-9.%s] # not conv flag, dimensions, nor literal '%',
# nor 's' conversion specifiers
""", re.VERBOSE)
return _str_fmt_heading_rx.sub(r'\1s', fmt)
# stub main code
if __name__ == "__main__" and not "get_ipython" in globals():
sys.exit(main_default(sys.argv))