diff --git a/math/stats/array_stats.py b/math/stats/array_stats.py index 2994b39..6057eb1 100644 --- a/math/stats/array_stats.py +++ b/math/stats/array_stats.py @@ -189,3 +189,112 @@ class ArrayStat(object): else: out.write(rslt) out.flush() + + +def study_sparsity(M, binrange=(-16.5,6.5,1.0), kind='log10x', minzero=1e-30, args={}): + # Adapted from V2b_inspect.py research module. + """Study the distribution of values or sparsity of a given array, M, + using histogram. + + Returns a tuple of length N; the first (N-2) elements are the + histogram() output, while the last two are the number of elements + whose values are smaller (greater) than the lower (upper) bound of + the histogram bind range. + + Argument 'kind': By default, the 'x' values of the histogram is on + logarithmic-10 scale. + Other common log scales can be chosen using 'log2x' and 'logx'. + Choose 'linx' if you want the linear scale instead. + + Argument 'binrange' can be one of the following: + - 3-tuple: the left edge of the first bin, the right edge of the last bin, + and the width of each bin + - 2-tuple: the left MIDPOINT of the first bin, the right MIDPOINT + of the last bin; the width is assumed to be 1. + + The defaults are a 3-tuple, intended for log10x scale, where + we check order-of-magnitudes between 1e-16 and 1e+5 (roughly). + """ + from numpy import abs, count_nonzero, arange, histogram, log10, log2, log + if kind == 'log10x': # usual way, for broad view + log_val = log10(abs(M.flatten()) + minzero) + elif kind == 'log2x': + log_val = log2(abs(M.flatten()) + minzero) + elif kind == 'logx': + log_val = log(abs(M.flatten()) + minzero) + elif kind == 'linx': # linear x scale, usually for more detailed view + log_val = abs(M.flatten()) + else: + raise ValueError, "Invalid kind=%s" % (str(kind)) + if len(binrange) == 3: + binedges = numpy.arange(*binrange) + elif len(binrange) == 2: + l = binrange[0]-0.5 + r = binrange[1]+0.5 + binedges = numpy.arange(l,r) + else: + raise ValueError, "Invalid binrange parameter value" + #print binedges + hist = histogram(log_val, bins=binedges, **args) + # Count values outside the range being considered: + l = 0 + r = 0 + llim = binedges[0] + rlim = binedges[-1] + l = count_nonzero(log_val < llim) + r = count_nonzero(log_val > rlim) + return hist + (l,r) + + +def print_histogram(hist, xaxis='midpt', + # output formatting + xwidth=3, xprec=0, + out=sys.stdout): + # Adapted from V2b_inspect.py research module. + """Prints histogram in an easier way to visualize in text fashion. + """ + if len(hist) >= 4: + # special case: for study_sparsity output. + (bar,edges,lexcess,rexcess) = hist + else: + # for all other cases-- + (bar,edges) = hist[:2] + lexcess = 0 + rexcess = 0 + if xaxis == 'midpt': + midpt = (edges[1:] + edges[:-1]) * 0.5 + elif xaxis == 'leftpt': + midpt = edges[:-1] + elif xaxis == 'rightpt': + midpt = edges[1:] + else: + raise ValueError, "Invalid xaxis parameter value" + width = max(int(numpy.log10(max(bar.max(), lexcess, rexcess))+1), xwidth) + #print "width = %d" % width + barfmt = " %" + str(width) + "d" + midfmt = " %" + str(width) + "." + str(xprec) + "f" + if out == str: + from StringIO import StringIO + out = StringIO() + def retval(): + return out.getvalue() + else: + def retval(): + return None + + def Print(x): + out.write(x) + + out.write((barfmt % lexcess) \ + + "".join([ barfmt % i for i in bar ]) \ + + (barfmt % rexcess) \ + + "\n") + out.write(" " * width + "<" \ + + "".join([ midfmt % i for i in midpt ]) \ + + " " * width + ">" \ + + "\n") + + return retval() + + +