wpylib/math/fitting/linear.py

#
# wpylib.math.fitting.linear module
# Created: 20121015
# Wirawan Purwanto
#

"""
wpylib.math.fitting.linear module

Linear fitting tools.
"""

import numpy
from wpylib.math.fitting import fit_result

def linregr2d_SZ(x, y, sigma=None):
  """Performs a linear least square regression to according to a
  linear model

    y(x) = a + b*x ,

  where the input y has uncertainty given by sigma.
  """
  from numpy import sum, sqrt

  # Based on Shiwei's regr.F code (from email received 20060102).
  # See Linear-regression.txt in my repository of Shiwei's files.
  # See also Numerical Recipes in C, 2nd ed, Sec. 15.2.
  xx = numpy.array(x, copy=False)
  yy = numpy.array(y, copy=False)
  if sigma == None:
    # My addition -- can be dangerous
    # In case of no errorbar, we proceed as if all measurement
    # data have the same uncertainty, taken to be 1.
    ww = numpy.ones_like(y)
  else:
    ww = numpy.array(sigma, copy=False)
    ww **= -2      # make 1/sigma**2 array

  e1 = sum(xx * yy * ww)
  e2 = sum(yy * ww)
  d11 = sum(xx * ww)
  d12 = sum(xx**2 * ww)
  d21 = sum(ww)
  d22 = d11

  detinv = 1.0 / (d11*d22 - d12*d21)
  a = (e1*d22 - e2*d12) * detinv
  b = (e2*d11 - e1*d21) * detinv

  # Shiwei's old method of computing the uncertainty of the
  # y-intersect (sigma_a):
  varsum = sum((xx*d11 - d12)**2 * ww)
  var = varsum * detinv**2
  sigma = sqrt(var)

  # New method based on NR chapter: sqrt(sigma_a2) must give
  # identical result to sigma or else something is screwy!
  sigma_a2 = d12 * (-detinv)
  sigma_b2 = d21 * (-detinv)

  #print sigma_a2
  #print sigma_b2

  return fit_result(
    fit_method='linregr2d_SZ',
    fit_model='linear',
    a=a,
    b=b,
    sigma=sigma,
    sigma_a=sqrt(sigma_a2),
    sigma_b=sqrt(sigma_b2),
  )


def Test_1():
  """Testcase 1.

      >>> wpylib.math.fitting.linear.Test_1()
      ...
      {'a': -1392.3182324234213,
       'b': -0.82241012516149792,
       'fit_method': 'linregr2d_SZ',
       'fit_model': 'linear',
       'sigma': 0.00048320905704467775,
       'sigma_a': 0.00048320905704467786,
       'sigma_b': 0.080335909573397646}

  My wlinreg tool (via 'dtextrap' shell script alias gives:

      a stats:
      Total number of data     : 100000
      Average                  : -1392.32
      Sample standard deviation: 0.000460341
      Error of the average     : 1.45573e-06 (-1.046e-07%)
      b stats:
      Total number of data     : 100000
      Average                  : -0.822099
      Sample standard deviation: 0.0803118
      Error of the average     : 0.00025397 (-0.03089%)
      Summary
      a =      -1392.31823569246 +/-   0.000460341146124978 = -1392.31824(46)
      b =     -0.822098515674071 +/-     0.0803118207916705 = -0.822(80)

  which is close enough for this purpose!
  """
  from wpylib.text_tools import make_matrix as mtx
  M = mtx("""
  # Source: Co+ QMC/CAS(8,11)d26 cc-pwCVQZ-DK result dated 20121015
   0.01    -1392.32619 0.00047
   0.005   -1392.32284 0.00037
   0.0025  -1392.31994 0.00038
  """)
  x = M[:,0]
  y = M[:,1]
  dy = M[:,2]
  rslt = linregr2d_SZ(x,y,dy)
  print rslt
  return rslt


def Test_2():
  """Testcase 2.
  Similar to testcase 1 but with all uncertainties == 1.

      >>> wpylib.math.fitting.linear.Test_2()
      polyfit result = -0.809999999961 -1392.318265
      {'a': -1392.3182649999987,
       'b': -0.81000000006627304,
       'fit_method': 'linregr2d_SZ',
       'fit_model': 'linear',
       'sigma': 1.2247448713915881,
       'sigma_a': 1.2247448713915885,
       'sigma_b': 185.16401995451022}

  This is to be compared with the polyfit output.
  """
  from wpylib.text_tools import make_matrix as mtx
  M = mtx("""
  # Source: Co+ QMC/CAS(8,11)d26 cc-pwCVQZ-DK result dated 20121015
   0.01    -1392.32619 1.0
   0.005   -1392.32284 1.0
   0.0025  -1392.31994 1.0
  """)
  x = M[:,0]
  y = M[:,1]
  dy = M[:,2]
  rslt = linregr2d_SZ(x,y,dy)

  polyfit_result = numpy.polyfit(x,y,deg=1,full=False)
  print "polyfit result = ", polyfit_result[0], polyfit_result[1]
  return rslt


def Test_3():
  """Testcase 3.
  Feed of test from 1/x**3 extrapolation, Ca+4H2 Z=2.3 cc-pCV[TQ5]Z basis

      >>> wpylib.math.fitting.linear.Test_3()
      {'a': -0.92257959784330612,
       'b': 10.193612525866801,
       'fit_method': 'linregr2d_SZ',
       'fit_model': 'linear',
       'sigma': 0.0010352279401853368,
       'sigma_a': 0.0010352279401853377,
       'sigma_b': 0.036555525396641586}

  """
  from wpylib.text_tools import make_matrix as mtx
  M = mtx("""
  # Source: Co+ QMC/CAS(8,11)d26 cc-pwCVQZ-DK result dated 20121015
  # Groff notebook 1.90, table "testZ23 geometry: Z=2.3 dHH=0.7682", near
  # "/// begin extra (for paper)"
   0.03703704 -0.54533  0.00061
   0.015625   -0.76167  0.00074
   0.008      -0.8442   0.0012
  """)
  x = M[:,0]
  y = M[:,1]
  dy = M[:,2]
  rslt = linregr2d_SZ(x,y,dy)
  return rslt
* wpylib.math.fitting.linear: for linear fit methods. First method: linregr2d_SZ() from Shiwei's email in 2006. 12 years ago			`#`
			`# wpylib.math.fitting.linear module`
			`# Created: 20121015`
			`# Wirawan Purwanto`
			`#`

			`"""`
			`wpylib.math.fitting.linear module`

			`Linear fitting tools.`
			`"""`

			`import numpy`
			`from wpylib.math.fitting import fit_result`

			`def linregr2d_SZ(x, y, sigma=None):`
			`"""Performs a linear least square regression to according to a`
			`linear model`

			`y(x) = a + b*x ,`

			`where the input y has uncertainty given by sigma.`
			`"""`
			`from numpy import sum, sqrt`

			`# Based on Shiwei's regr.F code (from email received 20060102).`
			`# See Linear-regression.txt in my repository of Shiwei's files.`
			`# See also Numerical Recipes in C, 2nd ed, Sec. 15.2.`
			`xx = numpy.array(x, copy=False)`
			`yy = numpy.array(y, copy=False)`
			`if sigma == None:`
			`# My addition -- can be dangerous`
			`# In case of no errorbar, we proceed as if all measurement`
			`# data have the same uncertainty, taken to be 1.`
			`ww = numpy.ones_like(y)`
			`else:`
			`ww = numpy.array(sigma, copy=False)`
			`ww = -2 # make 1/sigma2 array`

			`e1 = sum(xx * yy * ww)`
			`e2 = sum(yy * ww)`
			`d11 = sum(xx * ww)`
			`d12 = sum(xx*2 ww)`
			`d21 = sum(ww)`
			`d22 = d11`

			`detinv = 1.0 / (d11d22 - d12d21)`
			`a = (e1d22 - e2d12) * detinv`
			`b = (e2d11 - e1d21) * detinv`
* Provide uncertainty of both a and b fit parameters. 12 years ago
			`# Shiwei's old method of computing the uncertainty of the`
			`# y-intersect (sigma_a):`
* wpylib.math.fitting.linear: for linear fit methods. First method: linregr2d_SZ() from Shiwei's email in 2006. 12 years ago			`varsum = sum((xxd11 - d12)2 ww)`
			`var = varsum * detinv**2`
			`sigma = sqrt(var)`

* Provide uncertainty of both a and b fit parameters. 12 years ago			`# New method based on NR chapter: sqrt(sigma_a2) must give`
			`# identical result to sigma or else something is screwy!`
			`sigma_a2 = d12 * (-detinv)`
			`sigma_b2 = d21 * (-detinv)`

* Mute debug printing. 12 years ago			`#print sigma_a2`
			`#print sigma_b2`
* Provide uncertainty of both a and b fit parameters. 12 years ago
* wpylib.math.fitting.linear: for linear fit methods. First method: linregr2d_SZ() from Shiwei's email in 2006. 12 years ago			`return fit_result(`
			`fit_method='linregr2d_SZ',`
			`fit_model='linear',`
			`a=a,`
			`b=b,`
			`sigma=sigma,`
* Provide uncertainty of both a and b fit parameters. 12 years ago			`sigma_a=sqrt(sigma_a2),`
			`sigma_b=sqrt(sigma_b2),`
* wpylib.math.fitting.linear: for linear fit methods. First method: linregr2d_SZ() from Shiwei's email in 2006. 12 years ago			`)`



			`def Test_1():`
			`"""Testcase 1.`

			`>>> wpylib.math.fitting.linear.Test_1()`
			`...`
			`{'a': -1392.3182324234213,`
			`'b': -0.82241012516149792,`
			`'fit_method': 'linregr2d_SZ',`
			`'fit_model': 'linear',`
* Provide uncertainty of both a and b fit parameters. 12 years ago			`'sigma': 0.00048320905704467775,`
			`'sigma_a': 0.00048320905704467786,`
			`'sigma_b': 0.080335909573397646}`
* wpylib.math.fitting.linear: for linear fit methods. First method: linregr2d_SZ() from Shiwei's email in 2006. 12 years ago
			`My wlinreg tool (via 'dtextrap' shell script alias gives:`

			`a stats:`
			`Total number of data : 100000`
			`Average : -1392.32`
			`Sample standard deviation: 0.000460341`
			`Error of the average : 1.45573e-06 (-1.046e-07%)`
			`b stats:`
			`Total number of data : 100000`
			`Average : -0.822099`
			`Sample standard deviation: 0.0803118`
			`Error of the average : 0.00025397 (-0.03089%)`
			`Summary`
			`a = -1392.31823569246 +/- 0.000460341146124978 = -1392.31824(46)`
			`b = -0.822098515674071 +/- 0.0803118207916705 = -0.822(80)`

* Provide uncertainty of both a and b fit parameters. 12 years ago			`which is close enough for this purpose!`
* wpylib.math.fitting.linear: for linear fit methods. First method: linregr2d_SZ() from Shiwei's email in 2006. 12 years ago			`"""`
			`from wpylib.text_tools import make_matrix as mtx`
			`M = mtx("""`
			`# Source: Co+ QMC/CAS(8,11)d26 cc-pwCVQZ-DK result dated 20121015`
			`0.01 -1392.32619 0.00047`
			`0.005 -1392.32284 0.00037`
			`0.0025 -1392.31994 0.00038`
			`""")`
			`x = M[:,0]`
			`y = M[:,1]`
			`dy = M[:,2]`
			`rslt = linregr2d_SZ(x,y,dy)`
			`print rslt`
			`return rslt`
* Added testcase wpylib.math.fitting.linear.Test_2() against numpy.polyfit(). 12 years ago

			`def Test_2():`
			`"""Testcase 2.`
			`Similar to testcase 1 but with all uncertainties == 1.`

			`>>> wpylib.math.fitting.linear.Test_2()`
			`polyfit result = -0.809999999961 -1392.318265`
			`{'a': -1392.3182649999987,`
			`'b': -0.81000000006627304,`
			`'fit_method': 'linregr2d_SZ',`
			`'fit_model': 'linear',`
			`'sigma': 1.2247448713915881,`
			`'sigma_a': 1.2247448713915885,`
			`'sigma_b': 185.16401995451022}`

			`This is to be compared with the polyfit output.`
			`"""`
			`from wpylib.text_tools import make_matrix as mtx`
			`M = mtx("""`
			`# Source: Co+ QMC/CAS(8,11)d26 cc-pwCVQZ-DK result dated 20121015`
			`0.01 -1392.32619 1.0`
			`0.005 -1392.32284 1.0`
			`0.0025 -1392.31994 1.0`
			`""")`
			`x = M[:,0]`
			`y = M[:,1]`
			`dy = M[:,2]`
			`rslt = linregr2d_SZ(x,y,dy)`

			`polyfit_result = numpy.polyfit(x,y,deg=1,full=False)`
			`print "polyfit result = ", polyfit_result[0], polyfit_result[1]`
			`return rslt`
* Added Test_3(). 12 years ago

			`def Test_3():`
			`"""Testcase 3.`
			`Feed of test from 1/x**3 extrapolation, Ca+4H2 Z=2.3 cc-pCV[TQ5]Z basis`

			`>>> wpylib.math.fitting.linear.Test_3()`
			`{'a': -0.92257959784330612,`
			`'b': 10.193612525866801,`
			`'fit_method': 'linregr2d_SZ',`
			`'fit_model': 'linear',`
			`'sigma': 0.0010352279401853368,`
			`'sigma_a': 0.0010352279401853377,`
			`'sigma_b': 0.036555525396641586}`

			`"""`
			`from wpylib.text_tools import make_matrix as mtx`
			`M = mtx("""`
			`# Source: Co+ QMC/CAS(8,11)d26 cc-pwCVQZ-DK result dated 20121015`
			`# Groff notebook 1.90, table "testZ23 geometry: Z=2.3 dHH=0.7682", near`
			`# "/// begin extra (for paper)"`
			`0.03703704 -0.54533 0.00061`
			`0.015625 -0.76167 0.00074`
			`0.008 -0.8442 0.0012`
			`""")`
			`x = M[:,0]`
			`y = M[:,1]`
			`dy = M[:,2]`
			`rslt = linregr2d_SZ(x,y,dy)`
			`return rslt`