My tools of the trade for python programming.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

304 lines
8.2 KiB

# $Id: __init__.py,v 1.1 2010-10-07 15:57:29 wirawan Exp $
#
# wpylib.math.stats.array_stats module
# Created: 20140404
# Wirawan Purwanto
#
"""
wpylib.math.stats.array_stats module
Tools for studying the statistics of an array, the difference of
two or more arrays, etc.
"""
import os
import sys
import numpy
# Functions below were imported from V2b_inspect.py, 20140404
"""
CONVENTIONS
- Usually, M1 is the matrix under examination, M2 is the reference matrix.
PRINCIPLES
- Make sure functions are dimension-independent as much as possible
(i.e. not limited to 1D or 2D only).
- Text output should not hardwired only to sys.stdout via vanilla print
statement.
"""
def rms(m):
return numpy.sqrt(numpy.sum(m**2) / m.size)
def report_diff_stat(M1, M2, out=sys.stdout):
# Original function name: statdiff
"""Studies the difference of two arrays (or matrices).
Usually, M1 is the matrix under examination, M2 is the reference
matrix.
Prints out standard report to a given text file, or returns
the report as a string.
"""
dM = M1 - M2
if len(M1.shape) == 2:
dM_diag = numpy.diagonal(dM)
stats = (
dM.max(),
dM.min(),
dM.mean(),
numpy.abs(dM).mean(),
rms(dM),
dM_diag.max(),
dM_diag.min(),
dM_diag.mean(),
numpy.abs(dM_diag).mean(),
rms(dM_diag),
)
if out == tuple:
return stats
rslt = """\
- max difference = %13.6e
- min difference = %13.6e
- mean difference = %13.6e
- mean abs diff = %13.6e
- rms difference = %13.6e
- max difference = %13.6e on diagonal
- min difference = %13.6e on diagonal
- mean difference = %13.6e on diagonal
- mean abs diff = %13.6e on diagonal
- rms difference = %13.6e on diagonal
""" % stats
else:
stats = (
dM.max(),
dM.min(),
dM.mean(),
numpy.abs(dM).mean(),
rms(dM),
)
if out == tuple:
return stats
rslt = """\
- max difference = %13.6e
- min difference = %13.6e
- mean difference = %13.6e
- mean abs diff = %13.6e
- rms difference = %13.6e
""" % stats
if out == str:
return rslt
else:
out.write(rslt)
out.flush()
def maxadiff(M1, M2):
# Original function name: maxdiff
"""Returns the maximum absolute difference between two matrices.
Used for checking whether two matrices are identical."""
return numpy.abs(M1 - M2).max()
def maxadiff_by_col(M1, M2):
# Original function name: maxdiff_by_col
"""Assuming two-dimensional array (where first index is the row
index), computes the maximum absolute difference.
"""
return numpy.abs(M1 - M2).max(axis=0)
def maxradiff(M1, M2, ztol=1e-15):
# Original function name: maxrdiff
"""Returns the maximum *relative* absolute difference between two matrices.
Used for checking whether two matrices are identical.
CAVEATS:
- Zero elements in M2 (reference matrix) are converted to unity for
calculating the relative difference.
"""
(mapprox, mref) = (M1, M2)
diff = numpy.asarray(numpy.abs(mapprox - mref))
mref0 = numpy.asarray(numpy.abs(mref))
numpy.putmask(mref0, mref0 < ztol, [1.0])
return (diff / mref0).max()
def rdiff(M1, M2, ztol=1e-15, Abs=False):
"""Returns the *relative* [absolute] difference between two matrices.
Used for checking whether two matrices are identical.
Here, M1 is the array being compared, and M2 is the reference array.
CAVEATS:
- Zero elements in M2 (reference matrix) are converted to unity for
calculating the relative difference.
"""
from numpy import abs, array, asarray
(mapprox, mref) = (M1, M2)
if Abs:
diff = abs(asarray(mapprox) - asarray(mref))
mref0 = abs(asarray(mref))
numpy.putmask(mref0, mref0 < ztol, [1.0])
else:
diff = asarray(mapprox) - asarray(mref)
mref0 = array(mref, copy=True) # must make a copy
numpy.putmask(mref0, abs(mref0) < ztol, [1.0])
return (diff / mref0)
class ArrayStat(object):
"""A class to compute the statistics of an array.
"""
def __init__(self, mat, save=False):
mat = numpy.asarray(mat)
amat = numpy.abs(mat)
if save:
self.mat = mat
self.min = mat.min()
self.max = mat.max()
self.mean = mat.mean()
self.amin = amat.min()
self.amax = amat.max()
self.amean = amat.mean()
self.rms = numpy.sqrt(numpy.sum(amat**2) / amat.size)
def report(self, out=sys.stdout):
"""Prints out a standard report.
"""
#print_histogram(study_sparsity(self.diff, (log_delta-5.5,log_delta+4.5+0.1)), xaxis='left_edge')
rslt = """\
. min = %12.6g
. max = %12.6g
. mean = %12.6g
. absmin = %12.6g
. absmax = %12.6g
. absmean = %12.6g
. rms = %12.6g
""" % (self.min, self.max, self.mean,
self.amin, self.amax, self.amean,
self.rms)
if out == str:
return rslt
else:
out.write(rslt)
out.flush()
def study_sparsity(M, binrange=(-16.5,6.5,1.0), kind='log10x', minzero=1e-30, args={}):
# Adapted from V2b_inspect.py research module.
"""Study the distribution of values or sparsity of a given array, M,
using histogram.
Returns a tuple of length N; the first (N-2) elements are the
histogram() output, while the last two are the number of elements
whose values are smaller (greater) than the lower (upper) bound of
the histogram bind range.
Argument 'kind': By default, the 'x' values of the histogram is on
logarithmic-10 scale.
Other common log scales can be chosen using 'log2x' and 'logx'.
Choose 'linx' if you want the linear scale instead.
Argument 'binrange' can be one of the following:
- 3-tuple: the left edge of the first bin, the right edge of the last bin,
and the width of each bin
- 2-tuple: the left MIDPOINT of the first bin, the right MIDPOINT
of the last bin; the width is assumed to be 1.
The defaults are a 3-tuple, intended for log10x scale, where
we check order-of-magnitudes between 1e-16 and 1e+5 (roughly).
"""
from numpy import abs, count_nonzero, arange, histogram, log10, log2, log
if kind == 'log10x': # usual way, for broad view
log_val = log10(abs(M.flatten()) + minzero)
elif kind == 'log2x':
log_val = log2(abs(M.flatten()) + minzero)
elif kind == 'logx':
log_val = log(abs(M.flatten()) + minzero)
elif kind == 'linx': # linear x scale, usually for more detailed view
log_val = abs(M.flatten())
else:
raise ValueError, "Invalid kind=%s" % (str(kind))
if len(binrange) == 3:
binedges = numpy.arange(*binrange)
elif len(binrange) == 2:
l = binrange[0]-0.5
r = binrange[1]+0.5
binedges = numpy.arange(l,r)
else:
raise ValueError, "Invalid binrange parameter value"
#print binedges
hist = histogram(log_val, bins=binedges, **args)
# Count values outside the range being considered:
l = 0
r = 0
llim = binedges[0]
rlim = binedges[-1]
l = count_nonzero(log_val < llim)
r = count_nonzero(log_val > rlim)
return hist + (l,r)
def print_histogram(hist, xaxis='midpt',
# output formatting
xwidth=3, xprec=0,
out=sys.stdout):
# Adapted from V2b_inspect.py research module.
"""Prints histogram in an easier way to visualize in text fashion.
"""
if len(hist) >= 4:
# special case: for study_sparsity output.
(bar,edges,lexcess,rexcess) = hist
else:
# for all other cases--
(bar,edges) = hist[:2]
lexcess = 0
rexcess = 0
if xaxis == 'midpt':
midpt = (edges[1:] + edges[:-1]) * 0.5
elif xaxis == 'leftpt':
midpt = edges[:-1]
elif xaxis == 'rightpt':
midpt = edges[1:]
else:
raise ValueError, "Invalid xaxis parameter value"
width = max(int(numpy.log10(max(bar.max(), lexcess, rexcess))+1), xwidth)
#print "width = %d" % width
barfmt = " %" + str(width) + "d"
midfmt = " %" + str(width) + "." + str(xprec) + "f"
if out == str:
from StringIO import StringIO
out = StringIO()
def retval():
return out.getvalue()
else:
def retval():
return None
def Print(x):
out.write(x)
out.write((barfmt % lexcess) \
+ "".join([ barfmt % i for i in bar ]) \
+ (barfmt % rexcess) \
+ "\n")
out.write(" " * width + "<" \
+ "".join([ midfmt % i for i in midpt ]) \
+ " " * width + ">" \
+ "\n")
return retval()