#!/usr/bin/env python # # 20160826 # Wirawan Purwanto # # A tool that dumps every possibly imaginable info I want to get from # a SGE-managed cluster. import os import re import subprocess import sys def pipe_out(args, split=False, shell=False): """Executes a shell command, piping out the stdout to python for parsing. This is my customary shortcut for backtick operator. The result is either a single string (if split==False) or a list of strings with EOLs removed (if split==True).""" retval = subprocess.Popen(args, stdout=subprocess.PIPE, shell=shell).communicate()[0] if not split: return retval else: return retval.splitlines() class pipe_in(object): """Executes a shell command, piping in the stdin from python for driving. This is the reverse of pipe_out. Commands are given through file-like write() or writelines() methods.""" def __init__(self, args, shell=False): self.px = subprocess.Popen(args, stdin=subprocess.PIPE, shell=shell) self.args = args def write(self, line): self.px.stdin.write(line) def writelines(self, lines): for line in lines: self.write(line) def flush(self): self.px.stdin.flush() def close(self): self.px.stdin.close() def errchk(cmd, args, retcode): """Checking for error after the invocation of an external command.""" if retcode == 0: return print >>sys.stderr, "Error executing ", cmd, " ".join(args) if retcode < 0: err = "Command %s was terminated by signal %d" % (cmd, -retcode) else: err = "Command %s returned %d" % (cmd, retcode) raise RuntimeError, err class sh(object): @staticmethod def run(prg, args): retcode = subprocess.call((prg,) + tuple(args)) errchk(prg, args, retcode) return 0 globals().setdefault("NODE_LIST", []) globals().setdefault("NODE_BAD_LIST", set()) def get_node_list(): """Reads node list from SGE configuration.""" node_list = pipe_out(("qconf", "-sel"), split=True) return node_list def node_list(): global NODE_LIST if not NODE_LIST: NODE_LIST = get_node_list() return NODE_LIST def rhost_pipe_out(host, cmdline, split=False): cmdline_full = ["ssh", "-o", "PreferredAuthentications=publickey", host] \ + (list(cmdline) if not isinstance(cmdline, basestring) else cmdline.split()) rslt = pipe_out(cmdline_full, split=split) return rslt def rhost_run(host, cmdline): cmdline_full = ["ssh", "-o", "PreferredAuthentications=publickey", host] \ + (list(cmdline) if not isinstance(cmdline, basestring) else cmdline.split()) rslt = sh.run(cmdline_full[0], cmdline_full[1:]) return rslt def rhosts_pipe_out(cmdline, filename, hosts=None, rootdir="cluster-info"): """Executes cmdline on each remote host (the list is given in and """ from os.path import dirname, join, isdir path_join = join Verb = 100 if hosts is None: hosts = node_list() for H in hosts: host_base = H.split(".")[0] outfname = path_join(rootdir, host_base, filename) outdir = dirname(outfname) if not isdir(outdir): os.makedirs(outdir) if Verb >= 1: print(" exec: %s %s" % (H, cmdline)) out = rhost_pipe_out(H, cmdline, split=False) with open(outfname, "w") as F: F.write(out) def test_accessible_hosts(hosts=None): """Tests ssh connectivity for all the hosts and return a two-tuple containing lists of good and inaccessible hosts, respectively.""" from os.path import dirname, join, isdir path_join = join Verb = 100 if hosts is None: hosts = node_list() good_hosts = [] bad_hosts = [] for H in hosts: host_base = H.split(".")[0] msg_send = "Success login from host " + host_base msg_recv = rhost_pipe_out(H, ("echo", msg_send)) if msg_send == msg_recv.rstrip(): good_hosts.append(H) else: bad_hosts.append(H) return good_hosts, bad_hosts # Below are the main gather tools def gather_cpuinfo(hosts=None): """Gather tool: for cpuinfo""" rhosts_pipe_out(("cat", "/proc/cpuinfo"), "cpuinfo.txt", hosts=hosts) def gather_lspci(hosts=None): """Gather tool: for lspci""" rhosts_pipe_out(("lspci"), "lspci.txt", hosts=hosts) def gather_free(hosts=None): """Gather tool: for free""" rhosts_pipe_out(("free"), "free.txt", hosts=hosts)