You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
304 lines
8.2 KiB
304 lines
8.2 KiB
# $Id: __init__.py,v 1.1 2010-10-07 15:57:29 wirawan Exp $
|
|
#
|
|
# wpylib.math.stats.array_stats module
|
|
# Created: 20140404
|
|
# Wirawan Purwanto
|
|
#
|
|
|
|
"""
|
|
wpylib.math.stats.array_stats module
|
|
Tools for studying the statistics of an array, the difference of
|
|
two or more arrays, etc.
|
|
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import numpy
|
|
|
|
|
|
# Functions below were imported from V2b_inspect.py, 20140404
|
|
|
|
"""
|
|
CONVENTIONS
|
|
|
|
- Usually, M1 is the matrix under examination, M2 is the reference matrix.
|
|
|
|
|
|
PRINCIPLES
|
|
|
|
- Make sure functions are dimension-independent as much as possible
|
|
(i.e. not limited to 1D or 2D only).
|
|
- Text output should not hardwired only to sys.stdout via vanilla print
|
|
statement.
|
|
"""
|
|
|
|
def rms(m):
|
|
return numpy.sqrt(numpy.sum(m**2) / m.size)
|
|
|
|
|
|
def report_diff_stat(M1, M2, out=sys.stdout):
|
|
# Original function name: statdiff
|
|
"""Studies the difference of two arrays (or matrices).
|
|
Usually, M1 is the matrix under examination, M2 is the reference
|
|
matrix.
|
|
Prints out standard report to a given text file, or returns
|
|
the report as a string.
|
|
"""
|
|
dM = M1 - M2
|
|
if len(M1.shape) == 2:
|
|
dM_diag = numpy.diagonal(dM)
|
|
stats = (
|
|
dM.max(),
|
|
dM.min(),
|
|
dM.mean(),
|
|
numpy.abs(dM).mean(),
|
|
rms(dM),
|
|
dM_diag.max(),
|
|
dM_diag.min(),
|
|
dM_diag.mean(),
|
|
numpy.abs(dM_diag).mean(),
|
|
rms(dM_diag),
|
|
)
|
|
if out == tuple:
|
|
return stats
|
|
|
|
rslt = """\
|
|
- max difference = %13.6e
|
|
- min difference = %13.6e
|
|
- mean difference = %13.6e
|
|
- mean abs diff = %13.6e
|
|
- rms difference = %13.6e
|
|
- max difference = %13.6e on diagonal
|
|
- min difference = %13.6e on diagonal
|
|
- mean difference = %13.6e on diagonal
|
|
- mean abs diff = %13.6e on diagonal
|
|
- rms difference = %13.6e on diagonal
|
|
""" % stats
|
|
else:
|
|
stats = (
|
|
dM.max(),
|
|
dM.min(),
|
|
dM.mean(),
|
|
numpy.abs(dM).mean(),
|
|
rms(dM),
|
|
)
|
|
if out == tuple:
|
|
return stats
|
|
|
|
rslt = """\
|
|
- max difference = %13.6e
|
|
- min difference = %13.6e
|
|
- mean difference = %13.6e
|
|
- mean abs diff = %13.6e
|
|
- rms difference = %13.6e
|
|
""" % stats
|
|
|
|
if out == str:
|
|
return rslt
|
|
else:
|
|
out.write(rslt)
|
|
out.flush()
|
|
|
|
|
|
def maxadiff(M1, M2):
|
|
# Original function name: maxdiff
|
|
"""Returns the maximum absolute difference between two matrices.
|
|
Used for checking whether two matrices are identical."""
|
|
return numpy.abs(M1 - M2).max()
|
|
|
|
|
|
def maxadiff_by_col(M1, M2):
|
|
# Original function name: maxdiff_by_col
|
|
"""Assuming two-dimensional array (where first index is the row
|
|
index), computes the maximum absolute difference.
|
|
"""
|
|
return numpy.abs(M1 - M2).max(axis=0)
|
|
|
|
|
|
def maxradiff(M1, M2, ztol=1e-15):
|
|
# Original function name: maxrdiff
|
|
"""Returns the maximum *relative* absolute difference between two matrices.
|
|
Used for checking whether two matrices are identical.
|
|
|
|
CAVEATS:
|
|
- Zero elements in M2 (reference matrix) are converted to unity for
|
|
calculating the relative difference.
|
|
"""
|
|
(mapprox, mref) = (M1, M2)
|
|
diff = numpy.asarray(numpy.abs(mapprox - mref))
|
|
mref0 = numpy.asarray(numpy.abs(mref))
|
|
numpy.putmask(mref0, mref0 < ztol, [1.0])
|
|
|
|
return (diff / mref0).max()
|
|
|
|
|
|
def rdiff(M1, M2, ztol=1e-15, Abs=False):
|
|
"""Returns the *relative* [absolute] difference between two matrices.
|
|
Used for checking whether two matrices are identical.
|
|
Here, M1 is the array being compared, and M2 is the reference array.
|
|
|
|
CAVEATS:
|
|
- Zero elements in M2 (reference matrix) are converted to unity for
|
|
calculating the relative difference.
|
|
"""
|
|
from numpy import abs, array, asarray
|
|
(mapprox, mref) = (M1, M2)
|
|
if Abs:
|
|
diff = abs(asarray(mapprox) - asarray(mref))
|
|
mref0 = abs(asarray(mref))
|
|
numpy.putmask(mref0, mref0 < ztol, [1.0])
|
|
else:
|
|
diff = asarray(mapprox) - asarray(mref)
|
|
mref0 = array(mref, copy=True) # must make a copy
|
|
numpy.putmask(mref0, abs(mref0) < ztol, [1.0])
|
|
|
|
return (diff / mref0)
|
|
|
|
|
|
class ArrayStat(object):
|
|
"""A class to compute the statistics of an array.
|
|
"""
|
|
def __init__(self, mat, save=False):
|
|
mat = numpy.asarray(mat)
|
|
amat = numpy.abs(mat)
|
|
if save:
|
|
self.mat = mat
|
|
self.min = mat.min()
|
|
self.max = mat.max()
|
|
self.mean = mat.mean()
|
|
self.amin = amat.min()
|
|
self.amax = amat.max()
|
|
self.amean = amat.mean()
|
|
self.rms = numpy.sqrt(numpy.sum(amat**2) / amat.size)
|
|
|
|
def report(self, out=sys.stdout):
|
|
"""Prints out a standard report.
|
|
"""
|
|
#print_histogram(study_sparsity(self.diff, (log_delta-5.5,log_delta+4.5+0.1)), xaxis='left_edge')
|
|
rslt = """\
|
|
. min = %12.6g
|
|
. max = %12.6g
|
|
. mean = %12.6g
|
|
. absmin = %12.6g
|
|
. absmax = %12.6g
|
|
. absmean = %12.6g
|
|
. rms = %12.6g
|
|
""" % (self.min, self.max, self.mean,
|
|
self.amin, self.amax, self.amean,
|
|
self.rms)
|
|
|
|
if out == str:
|
|
return rslt
|
|
else:
|
|
out.write(rslt)
|
|
out.flush()
|
|
|
|
|
|
def study_sparsity(M, binrange=(-16.5,6.5,1.0), kind='log10x', minzero=1e-30, args={}):
|
|
# Adapted from V2b_inspect.py research module.
|
|
"""Study the distribution of values or sparsity of a given array, M,
|
|
using histogram.
|
|
|
|
Returns a tuple of length N; the first (N-2) elements are the
|
|
histogram() output, while the last two are the number of elements
|
|
whose values are smaller (greater) than the lower (upper) bound of
|
|
the histogram bind range.
|
|
|
|
Argument 'kind': By default, the 'x' values of the histogram is on
|
|
logarithmic-10 scale.
|
|
Other common log scales can be chosen using 'log2x' and 'logx'.
|
|
Choose 'linx' if you want the linear scale instead.
|
|
|
|
Argument 'binrange' can be one of the following:
|
|
- 3-tuple: the left edge of the first bin, the right edge of the last bin,
|
|
and the width of each bin
|
|
- 2-tuple: the left MIDPOINT of the first bin, the right MIDPOINT
|
|
of the last bin; the width is assumed to be 1.
|
|
|
|
The defaults are a 3-tuple, intended for log10x scale, where
|
|
we check order-of-magnitudes between 1e-16 and 1e+5 (roughly).
|
|
"""
|
|
from numpy import abs, count_nonzero, arange, histogram, log10, log2, log
|
|
if kind == 'log10x': # usual way, for broad view
|
|
log_val = log10(abs(M.flatten()) + minzero)
|
|
elif kind == 'log2x':
|
|
log_val = log2(abs(M.flatten()) + minzero)
|
|
elif kind == 'logx':
|
|
log_val = log(abs(M.flatten()) + minzero)
|
|
elif kind == 'linx': # linear x scale, usually for more detailed view
|
|
log_val = abs(M.flatten())
|
|
else:
|
|
raise ValueError, "Invalid kind=%s" % (str(kind))
|
|
if len(binrange) == 3:
|
|
binedges = numpy.arange(*binrange)
|
|
elif len(binrange) == 2:
|
|
l = binrange[0]-0.5
|
|
r = binrange[1]+0.5
|
|
binedges = numpy.arange(l,r)
|
|
else:
|
|
raise ValueError, "Invalid binrange parameter value"
|
|
#print binedges
|
|
hist = histogram(log_val, bins=binedges, **args)
|
|
# Count values outside the range being considered:
|
|
l = 0
|
|
r = 0
|
|
llim = binedges[0]
|
|
rlim = binedges[-1]
|
|
l = count_nonzero(log_val < llim)
|
|
r = count_nonzero(log_val > rlim)
|
|
return hist + (l,r)
|
|
|
|
|
|
def print_histogram(hist, xaxis='midpt',
|
|
# output formatting
|
|
xwidth=3, xprec=0,
|
|
out=sys.stdout):
|
|
# Adapted from V2b_inspect.py research module.
|
|
"""Prints histogram in an easier way to visualize in text fashion.
|
|
"""
|
|
if len(hist) >= 4:
|
|
# special case: for study_sparsity output.
|
|
(bar,edges,lexcess,rexcess) = hist
|
|
else:
|
|
# for all other cases--
|
|
(bar,edges) = hist[:2]
|
|
lexcess = 0
|
|
rexcess = 0
|
|
if xaxis == 'midpt':
|
|
midpt = (edges[1:] + edges[:-1]) * 0.5
|
|
elif xaxis == 'leftpt':
|
|
midpt = edges[:-1]
|
|
elif xaxis == 'rightpt':
|
|
midpt = edges[1:]
|
|
else:
|
|
raise ValueError, "Invalid xaxis parameter value"
|
|
width = max(int(numpy.log10(max(bar.max(), lexcess, rexcess))+1), xwidth)
|
|
#print "width = %d" % width
|
|
barfmt = " %" + str(width) + "d"
|
|
midfmt = " %" + str(width) + "." + str(xprec) + "f"
|
|
if out == str:
|
|
from StringIO import StringIO
|
|
out = StringIO()
|
|
def retval():
|
|
return out.getvalue()
|
|
else:
|
|
def retval():
|
|
return None
|
|
|
|
def Print(x):
|
|
out.write(x)
|
|
|
|
out.write((barfmt % lexcess) \
|
|
+ "".join([ barfmt % i for i in bar ]) \
|
|
+ (barfmt % rexcess) \
|
|
+ "\n")
|
|
out.write(" " * width + "<" \
|
|
+ "".join([ midfmt % i for i in midpt ]) \
|
|
+ " " * width + ">" \
|
|
+ "\n")
|
|
|
|
return retval()
|
|
|
|
|
|
|
|
|