WP-hpc-tools/sge/show-cluster-usage.py

#!/usr/bin/env python
#
# Created: 20160829
# Wirawan Purwanto

"""
show-usage-current.py
---------------------

Shows the instantaneous usage of the cluster per user at current time.
Based on qstat -f output.

Usage:

1) Using the current qstat -f data (saving that data to the
   `qstat-f-<date>-<time>.txt` file:

    python show-cluster-usage.py

2) Using a saved qstat -f output:

    python show-cluster-usage.py  <saved-qstat-f.txt>
"""

import os
import re
import subprocess
import sys

class ParseError(RuntimeError):
  pass

class ProgramError(RuntimeError):
  pass

MYSELF = 'show-cluster-usage.py'


def analyze_cluster_usage_by_users(qstat_f):
  """Provides a summary analysis of cluster usage by users.

  Input: `qstat_f` is a list (or iterable) of text lines yielded by
  `qstat -f` command.

  Output: total aggregate usage per user, given as dict with usernames
  as the keys.
  """
  usage = {}
  for L in qstat_f:
    if re.match(r'^ [0-9]+ ', L):
      F = L.split()
      # For running jobs there are possibly more than 8 fields,
      # but we care only for these 8
      (jobid, priority, jobname, user, status, Date, Time, numcores) = F[:8]

      if status == "r":
        try:
          taskid = F[8]
          xjobid = jobid + ":" + taskid
        except IndexError:
          xjobid = jobid

        try:
          urec = usage[user]
        except KeyError:
          urec = {
            'user': user,
            'jobids': set(),
            'xjobids': set(),
            'cores': 0,
          }
          usage[user] = urec

        urec['jobids'].add(jobid)
        urec['xjobids'].add(xjobid)
        urec['cores'] += int(numcores)
  return usage


def print_cluster_usage_by_users(usage):
  """Prints the instantaneous usage-per-user breakdown of the cluster.

  Input: `usage` is the aggregated instantaneous cluster usage as reported
  by the analyze_cluster_usage_by_users() function.
  """
  cur_users = usage.keys()
  # Sort based on total core usage, descending manner
  cmp_usage = lambda u1, u2: -cmp(usage[u1]['cores'], usage[u2]['cores'])
  cur_users_sorted = sorted(cur_users, cmp=cmp_usage)
  fmt = "%-12s  %8d  %8d %8d"

  print(str_fmt_heading(fmt) % ("user", "numcores", "numjobs", "numtasks"))

  for u in cur_users_sorted:
    urec = usage[u]
    print(fmt % (urec['user'],
                 urec['cores'],
                 len(urec['jobids']),
                 len(urec['xjobids'])))


def help():
  msg = """\
%(CMD)s - Shows cluster usage from SGE information

The information is mainly drawn from `qstat -f` output,
and analyzes the usage of the cluster in various ways..

Usage:

%(CMD)s
%(CMD)s [qstat_file] [--save]
    Shows the cluster usage aggregated per user.
""" \
    % dict(CMD=MYSELF)
  print(msg)


def main_default(argv):
  """Main default function:
  - By default we invoke qstat -f and prints the analysis.
  - If argv[1] is given, then we read in the file and
    use that for the analysis.
  """
  from time import localtime, strftime
  from getopt import getopt, GetoptError

  dtime = localtime()
  dtimestr = strftime("%Y%m%d-%H%M", dtime)

  # Skip program name and first command:
  cmdargs_in = argv[1:]
  try:
    cmdopts, cmdargs = getopt(cmdargs_in,
                              "hs",
                              ["save",
                               "help"])
  except GetoptError as err:
    sys.stderr.writelines([str(err), "\n"])
    return 2

  # Process flag arguments
  show_disabled_nodes = False
  save_qstat = False
  for o,a in cmdopts:
    if o in ('-h', '--help'):
      help()
      return 0
    elif o in ('-s', '--save'):
      save_qstat = True
    else:
      raise ProgramError, "Unhandled option in main program: %s %s" % (o,a)

  if len(cmdargs) > 1:
    qstat_f_current = open(cmdargs[1], "r").read().splitlines()
  else:
    qstat_f_current = pipe_out(('qstat', '-f'), split=True)
    if save_qstat:
      with open("qstat-f-%s.txt" % dtimestr, "w") as F:
        F.write("\n".join(qstat_f_current))
        F.write("\n")

  summary = analyze_cluster_usage_by_users(qstat_f_current)
  print_cluster_usage_by_users(summary)
  return 0


# ---------------------------------------------------------------------------
# Support tools below
# ---------------------------------------------------------------------------

def pipe_out(args, split=False, shell=False):
  """Executes a shell command, piping out the stdout to python for parsing.
  This is my customary shortcut for backtick operator.
  The result is either a single string (if split==False) or a list of strings
  with EOLs removed (if split==True)."""
  retval = subprocess.Popen(args, stdout=subprocess.PIPE, shell=shell).communicate()[0]
  if not split:
    return retval
  else:
    return retval.splitlines()


# Internal variable: don't mess!
_str_fmt_heading_rx = None
def str_fmt_heading(fmt):
  """Replaces a printf-style formatting with one suitable for table heading:
  all non-string conversions are replaced with string conversions,
  preserving the minimum widths."""
  # Originally from: $PWQMC77/scripts/cost.py and later Cr2_analysis_cbs.py .
  #
  #_str_fmt_heading_rx = None # only for development purposes
  import re
  global _str_fmt_heading_rx
  if _str_fmt_heading_rx is None:
    # Because of complicated regex, I verbosely write it out here:
    _str_fmt_heading_rx = re.compile(r"""
      (
        %                 # % sign
        (?:\([^)]+\))?    # optional '(keyname)' mapping key
        [-+#0 hlL]*       # optional conversion flag
        [0-9*]*           # optional minimum field width
      )
      ((?:\.[0-9]*)?)     # optional precision
      [^-+#*0 hlL0-9.%s]  # not conv flag, dimensions, nor literal '%',
                          # nor 's' conversion specifiers
    """, re.VERBOSE)
  return _str_fmt_heading_rx.sub(r'\1s', fmt)


# stub main code

if __name__ == "__main__" and not "get_ipython" in globals():
  sys.exit(main_default(sys.argv))