From 52619c36888421a25293996c3fe4a3de3cffdca5 Mon Sep 17 00:00:00 2001 From: Wirawan Purwanto Date: Fri, 26 Aug 2016 15:09:36 -0400 Subject: [PATCH] * Added tools to dump compute node info in batch. --- sge/dump-cluster-info.py | 156 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 156 insertions(+) create mode 100755 sge/dump-cluster-info.py diff --git a/sge/dump-cluster-info.py b/sge/dump-cluster-info.py new file mode 100755 index 0000000..d27d8c9 --- /dev/null +++ b/sge/dump-cluster-info.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python +# +# 20160826 +# Wirawan Purwanto +# +# A tool that dumps every possibly imaginable info I want to get from +# a SGE-managed cluster. + +import os +import re +import subprocess +import sys + + +def pipe_out(args, split=False, shell=False): + """Executes a shell command, piping out the stdout to python for parsing. + This is my customary shortcut for backtick operator. + The result is either a single string (if split==False) or a list of strings + with EOLs removed (if split==True).""" + retval = subprocess.Popen(args, stdout=subprocess.PIPE, shell=shell).communicate()[0] + if not split: + return retval + else: + return retval.splitlines() + + +class pipe_in(object): + """Executes a shell command, piping in the stdin from python for driving. + This is the reverse of pipe_out. + Commands are given through file-like write() or writelines() methods.""" + def __init__(self, args, shell=False): + self.px = subprocess.Popen(args, stdin=subprocess.PIPE, shell=shell) + self.args = args + def write(self, line): + self.px.stdin.write(line) + def writelines(self, lines): + for line in lines: + self.write(line) + def flush(self): + self.px.stdin.flush() + def close(self): + self.px.stdin.close() + + +def errchk(cmd, args, retcode): + """Checking for error after the invocation of an external command.""" + if retcode == 0: return + + print >>sys.stderr, "Error executing ", cmd, " ".join(args) + if retcode < 0: + err = "Command %s was terminated by signal %d" % (cmd, -retcode) + else: + err = "Command %s returned %d" % (cmd, retcode) + raise RuntimeError, err + + +class sh(object): + @staticmethod + def run(prg, args): + retcode = subprocess.call((prg,) + tuple(args)) + errchk(prg, args, retcode) + return 0 + + + +globals().setdefault("NODE_LIST", []) +globals().setdefault("NODE_BAD_LIST", set()) + + +def get_node_list(): + """Reads node list from SGE configuration.""" + node_list = pipe_out(("qconf", "-sel"), split=True) + return node_list + + +def node_list(): + global NODE_LIST + if not NODE_LIST: + NODE_LIST = get_node_list() + + return NODE_LIST + + +def rhost_pipe_out(host, cmdline, split=False): + cmdline_full = ["ssh", "-o", "PreferredAuthentications=publickey", host] \ + + (list(cmdline) if not isinstance(cmdline, basestring) else cmdline.split()) + rslt = pipe_out(cmdline_full, split=split) + return rslt + + +def rhost_run(host, cmdline): + cmdline_full = ["ssh", "-o", "PreferredAuthentications=publickey", host] \ + + (list(cmdline) if not isinstance(cmdline, basestring) else cmdline.split()) + rslt = sh.run(cmdline_full[0], cmdline_full[1:]) + return rslt + + +def rhosts_pipe_out(cmdline, filename, hosts=None, rootdir="cluster-info"): + """Executes cmdline on each remote host (the list is given in and + """ + from os.path import dirname, join, isdir + path_join = join + Verb = 100 + if hosts is None: + hosts = node_list() + for H in hosts: + host_base = H.split(".")[0] + outfname = path_join(rootdir, host_base, filename) + outdir = dirname(outfname) + if not isdir(outdir): + os.makedirs(outdir) + if Verb >= 1: + print(" exec: %s %s" % (H, cmdline)) + out = rhost_pipe_out(H, cmdline, split=False) + with open(outfname, "w") as F: + F.write(out) + + +def test_accessible_hosts(hosts=None): + """Tests ssh connectivity for all the hosts and return a two-tuple + containing lists of good and inaccessible hosts, respectively.""" + from os.path import dirname, join, isdir + path_join = join + Verb = 100 + if hosts is None: + hosts = node_list() + good_hosts = [] + bad_hosts = [] + for H in hosts: + host_base = H.split(".")[0] + msg_send = "Success login from host " + host_base + msg_recv = rhost_pipe_out(H, ("echo", msg_send)) + if msg_send == msg_recv.rstrip(): + good_hosts.append(H) + else: + bad_hosts.append(H) + return good_hosts, bad_hosts + + +# Below are the main gather tools + +def gather_cpuinfo(hosts=None): + """Gather tool: for cpuinfo""" + rhosts_pipe_out(("cat", "/proc/cpuinfo"), "cpuinfo.txt", hosts=hosts) + + +def gather_lspci(hosts=None): + """Gather tool: for lspci""" + rhosts_pipe_out(("lspci"), "lspci.txt", hosts=hosts) + +def gather_free(hosts=None): + """Gather tool: for free""" + rhosts_pipe_out(("free"), "free.txt", hosts=hosts) + + +