You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
215 lines
5.8 KiB
215 lines
5.8 KiB
#!/usr/bin/env python
|
|
#
|
|
# Created: 20160829
|
|
# Wirawan Purwanto
|
|
|
|
"""
|
|
show-usage-current.py
|
|
---------------------
|
|
|
|
Shows the instantaneous usage of the cluster per user at current time.
|
|
Based on qstat -f output.
|
|
|
|
Usage:
|
|
|
|
1) Using the current qstat -f data (saving that data to the
|
|
`qstat-f-<date>-<time>.txt` file:
|
|
|
|
python show-cluster-usage.py
|
|
|
|
2) Using a saved qstat -f output:
|
|
|
|
python show-cluster-usage.py <saved-qstat-f.txt>
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
|
|
class ParseError(RuntimeError):
|
|
pass
|
|
|
|
class ProgramError(RuntimeError):
|
|
pass
|
|
|
|
MYSELF = 'show-cluster-usage.py'
|
|
|
|
|
|
def analyze_cluster_usage_by_users(qstat_f):
|
|
"""Provides a summary analysis of cluster usage by users.
|
|
|
|
Input: `qstat_f` is a list (or iterable) of text lines yielded by
|
|
`qstat -f` command.
|
|
|
|
Output: total aggregate usage per user, given as dict with usernames
|
|
as the keys.
|
|
"""
|
|
usage = {}
|
|
for L in qstat_f:
|
|
if re.match(r'^ [0-9]+ ', L):
|
|
F = L.split()
|
|
# For running jobs there are possibly more than 8 fields,
|
|
# but we care only for these 8
|
|
(jobid, priority, jobname, user, status, Date, Time, numcores) = F[:8]
|
|
|
|
if status == "r":
|
|
try:
|
|
taskid = F[8]
|
|
xjobid = jobid + ":" + taskid
|
|
except IndexError:
|
|
xjobid = jobid
|
|
|
|
try:
|
|
urec = usage[user]
|
|
except KeyError:
|
|
urec = {
|
|
'user': user,
|
|
'jobids': set(),
|
|
'xjobids': set(),
|
|
'cores': 0,
|
|
}
|
|
usage[user] = urec
|
|
|
|
urec['jobids'].add(jobid)
|
|
urec['xjobids'].add(xjobid)
|
|
urec['cores'] += int(numcores)
|
|
return usage
|
|
|
|
|
|
def print_cluster_usage_by_users(usage):
|
|
"""Prints the instantaneous usage-per-user breakdown of the cluster.
|
|
|
|
Input: `usage` is the aggregated instantaneous cluster usage as reported
|
|
by the analyze_cluster_usage_by_users() function.
|
|
"""
|
|
cur_users = usage.keys()
|
|
# Sort based on total core usage, descending manner
|
|
cmp_usage = lambda u1, u2: -cmp(usage[u1]['cores'], usage[u2]['cores'])
|
|
cur_users_sorted = sorted(cur_users, cmp=cmp_usage)
|
|
fmt = "%-12s %8d %8d %8d"
|
|
|
|
print(str_fmt_heading(fmt) % ("user", "numcores", "numjobs", "numtasks"))
|
|
|
|
for u in cur_users_sorted:
|
|
urec = usage[u]
|
|
print(fmt % (urec['user'],
|
|
urec['cores'],
|
|
len(urec['jobids']),
|
|
len(urec['xjobids'])))
|
|
|
|
|
|
def help():
|
|
msg = """\
|
|
%(CMD)s - Shows cluster usage from SGE information
|
|
|
|
The information is mainly drawn from `qstat -f` output,
|
|
and analyzes the usage of the cluster in various ways..
|
|
|
|
Usage:
|
|
|
|
%(CMD)s
|
|
%(CMD)s [qstat_file] [--save]
|
|
Shows the cluster usage aggregated per user.
|
|
""" \
|
|
% dict(CMD=MYSELF)
|
|
print(msg)
|
|
|
|
|
|
def main_default(argv):
|
|
"""Main default function:
|
|
- By default we invoke qstat -f and prints the analysis.
|
|
- If argv[1] is given, then we read in the file and
|
|
use that for the analysis.
|
|
"""
|
|
from time import localtime, strftime
|
|
from getopt import getopt, GetoptError
|
|
|
|
dtime = localtime()
|
|
dtimestr = strftime("%Y%m%d-%H%M", dtime)
|
|
|
|
# Skip program name and first command:
|
|
cmdargs_in = argv[1:]
|
|
try:
|
|
cmdopts, cmdargs = getopt(cmdargs_in,
|
|
"hs",
|
|
["save",
|
|
"help"])
|
|
except GetoptError as err:
|
|
sys.stderr.writelines([str(err), "\n"])
|
|
return 2
|
|
|
|
# Process flag arguments
|
|
show_disabled_nodes = False
|
|
save_qstat = False
|
|
for o,a in cmdopts:
|
|
if o in ('-h', '--help'):
|
|
help()
|
|
return 0
|
|
elif o in ('-s', '--save'):
|
|
save_qstat = True
|
|
else:
|
|
raise ProgramError, "Unhandled option in main program: %s %s" % (o,a)
|
|
|
|
if len(cmdargs) > 1:
|
|
qstat_f_current = open(cmdargs[1], "r").read().splitlines()
|
|
else:
|
|
qstat_f_current = pipe_out(('qstat', '-f'), split=True)
|
|
if save_qstat:
|
|
with open("qstat-f-%s.txt" % dtimestr, "w") as F:
|
|
F.write("\n".join(qstat_f_current))
|
|
F.write("\n")
|
|
|
|
summary = analyze_cluster_usage_by_users(qstat_f_current)
|
|
print_cluster_usage_by_users(summary)
|
|
return 0
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Support tools below
|
|
# ---------------------------------------------------------------------------
|
|
|
|
def pipe_out(args, split=False, shell=False):
|
|
"""Executes a shell command, piping out the stdout to python for parsing.
|
|
This is my customary shortcut for backtick operator.
|
|
The result is either a single string (if split==False) or a list of strings
|
|
with EOLs removed (if split==True)."""
|
|
retval = subprocess.Popen(args, stdout=subprocess.PIPE, shell=shell).communicate()[0]
|
|
if not split:
|
|
return retval
|
|
else:
|
|
return retval.splitlines()
|
|
|
|
|
|
# Internal variable: don't mess!
|
|
_str_fmt_heading_rx = None
|
|
def str_fmt_heading(fmt):
|
|
"""Replaces a printf-style formatting with one suitable for table heading:
|
|
all non-string conversions are replaced with string conversions,
|
|
preserving the minimum widths."""
|
|
# Originally from: $PWQMC77/scripts/cost.py and later Cr2_analysis_cbs.py .
|
|
#
|
|
#_str_fmt_heading_rx = None # only for development purposes
|
|
import re
|
|
global _str_fmt_heading_rx
|
|
if _str_fmt_heading_rx is None:
|
|
# Because of complicated regex, I verbosely write it out here:
|
|
_str_fmt_heading_rx = re.compile(r"""
|
|
(
|
|
% # % sign
|
|
(?:\([^)]+\))? # optional '(keyname)' mapping key
|
|
[-+#0 hlL]* # optional conversion flag
|
|
[0-9*]* # optional minimum field width
|
|
)
|
|
((?:\.[0-9]*)?) # optional precision
|
|
[^-+#*0 hlL0-9.%s] # not conv flag, dimensions, nor literal '%',
|
|
# nor 's' conversion specifiers
|
|
""", re.VERBOSE)
|
|
return _str_fmt_heading_rx.sub(r'\1s', fmt)
|
|
|
|
|
|
|
|
# stub main code
|
|
|
|
if __name__ == "__main__" and not "get_ipython" in globals():
|
|
sys.exit(main_default(sys.argv))
|
|
|