My tools of the trade for python programming.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

153 lines
3.9 KiB

#
# wpylib.math.fitting.linear module
# Created: 20121015
# Wirawan Purwanto
#
"""
wpylib.math.fitting.linear module
Linear fitting tools.
"""
import numpy
from wpylib.math.fitting import fit_result
def linregr2d_SZ(x, y, sigma=None):
"""Performs a linear least square regression to according to a
linear model
y(x) = a + b*x ,
where the input y has uncertainty given by sigma.
"""
from numpy import sum, sqrt
# Based on Shiwei's regr.F code (from email received 20060102).
# See Linear-regression.txt in my repository of Shiwei's files.
# See also Numerical Recipes in C, 2nd ed, Sec. 15.2.
xx = numpy.array(x, copy=False)
yy = numpy.array(y, copy=False)
if sigma == None:
# My addition -- can be dangerous
# In case of no errorbar, we proceed as if all measurement
# data have the same uncertainty, taken to be 1.
ww = numpy.ones_like(y)
else:
ww = numpy.array(sigma, copy=False)
ww **= -2 # make 1/sigma**2 array
e1 = sum(xx * yy * ww)
e2 = sum(yy * ww)
d11 = sum(xx * ww)
d12 = sum(xx**2 * ww)
d21 = sum(ww)
d22 = d11
detinv = 1.0 / (d11*d22 - d12*d21)
a = (e1*d22 - e2*d12) * detinv
b = (e2*d11 - e1*d21) * detinv
# Shiwei's old method of computing the uncertainty of the
# y-intersect (sigma_a):
varsum = sum((xx*d11 - d12)**2 * ww)
var = varsum * detinv**2
sigma = sqrt(var)
# New method based on NR chapter: sqrt(sigma_a2) must give
# identical result to sigma or else something is screwy!
sigma_a2 = d12 * (-detinv)
sigma_b2 = d21 * (-detinv)
print sigma_a2
print sigma_b2
return fit_result(
fit_method='linregr2d_SZ',
fit_model='linear',
a=a,
b=b,
sigma=sigma,
sigma_a=sqrt(sigma_a2),
sigma_b=sqrt(sigma_b2),
)
def Test_1():
"""Testcase 1.
>>> wpylib.math.fitting.linear.Test_1()
...
{'a': -1392.3182324234213,
'b': -0.82241012516149792,
'fit_method': 'linregr2d_SZ',
'fit_model': 'linear',
'sigma': 0.00048320905704467775,
'sigma_a': 0.00048320905704467786,
'sigma_b': 0.080335909573397646}
My wlinreg tool (via 'dtextrap' shell script alias gives:
a stats:
Total number of data : 100000
Average : -1392.32
Sample standard deviation: 0.000460341
Error of the average : 1.45573e-06 (-1.046e-07%)
b stats:
Total number of data : 100000
Average : -0.822099
Sample standard deviation: 0.0803118
Error of the average : 0.00025397 (-0.03089%)
Summary
a = -1392.31823569246 +/- 0.000460341146124978 = -1392.31824(46)
b = -0.822098515674071 +/- 0.0803118207916705 = -0.822(80)
which is close enough for this purpose!
"""
from wpylib.text_tools import make_matrix as mtx
M = mtx("""
# Source: Co+ QMC/CAS(8,11)d26 cc-pwCVQZ-DK result dated 20121015
0.01 -1392.32619 0.00047
0.005 -1392.32284 0.00037
0.0025 -1392.31994 0.00038
""")
x = M[:,0]
y = M[:,1]
dy = M[:,2]
rslt = linregr2d_SZ(x,y,dy)
print rslt
return rslt
def Test_2():
"""Testcase 2.
Similar to testcase 1 but with all uncertainties == 1.
>>> wpylib.math.fitting.linear.Test_2()
polyfit result = -0.809999999961 -1392.318265
{'a': -1392.3182649999987,
'b': -0.81000000006627304,
'fit_method': 'linregr2d_SZ',
'fit_model': 'linear',
'sigma': 1.2247448713915881,
'sigma_a': 1.2247448713915885,
'sigma_b': 185.16401995451022}
This is to be compared with the polyfit output.
"""
from wpylib.text_tools import make_matrix as mtx
M = mtx("""
# Source: Co+ QMC/CAS(8,11)d26 cc-pwCVQZ-DK result dated 20121015
0.01 -1392.32619 1.0
0.005 -1392.32284 1.0
0.0025 -1392.31994 1.0
""")
x = M[:,0]
y = M[:,1]
dy = M[:,2]
rslt = linregr2d_SZ(x,y,dy)
polyfit_result = numpy.polyfit(x,y,deg=1,full=False)
print "polyfit result = ", polyfit_result[0], polyfit_result[1]
return rslt