From 564d1f43648bde6ee246ad15bc6e5c916ecb2019 Mon Sep 17 00:00:00 2001 From: Wirawan Purwanto Date: Mon, 19 Aug 2013 15:43:49 -0400 Subject: [PATCH] * wpylib.math.fitting.fit_func: Added support for weight (or y uncertainty). * For `leastsq' fit method, also introduced 'xerr' output (in outfmt=0) as the error estimate of the fitted parameters. --- math/fitting/__init__.py | 102 ++++++++++++++++++++++++++++----------- 1 file changed, 73 insertions(+), 29 deletions(-) diff --git a/math/fitting/__init__.py b/math/fitting/__init__.py index 68ced01..b193374 100644 --- a/math/fitting/__init__.py +++ b/math/fitting/__init__.py @@ -121,7 +121,9 @@ class Poly_order4(Poly_base): class fit_result(result_base): pass -def fit_func(Funct, Data=None, Guess=None, x=None, y=None, +def fit_func(Funct, Data=None, Guess=None, + x=None, y=None, + w=None, dy=None, debug=0, outfmt=1, Funct_hook=None, @@ -145,6 +147,11 @@ def fit_func(Funct, Data=None, Guess=None, x=None, y=None, The "y" array is a 1-D array of length M, which contain the "measured" value of the function at every domain point given in "x". + The "w" or "dy" array (only one of them can be specified in a call), + if given, specifies either the weight or standard error of the y data. + If "dy" is specified, then "w" is defined to be (1.0 / dy**2), per usual + convention. + Inspect Poly_base, Poly_order2, and other similar function classes in this module to see the example of the Funct function. @@ -157,9 +164,9 @@ def fit_func(Funct, Data=None, Guess=None, x=None, y=None, * via Data argument (which is a multi-column dataset, where the first row is the "y" argument). - Debugging and other investigations can be done with Funct_hook, which, - if defined, will be called every time right after Funct is called. - It is called with the following parameters: + Debugging and other investigations can be done with "Funct_hook", which, + if defined, will be called every time right after "Funct" is called. + It is called with the following signature: Funct_hook(C, x, y, f, r) where f := f(C,x) @@ -173,6 +180,10 @@ def fit_func(Funct, Data=None, Guess=None, x=None, y=None, if Data != None: # an alternative way to specifying x and y y = Data[0] x = Data[1:] # possibly multidimensional! + + if debug >= 10: + print "Dimensionality of the domain is: ", len(x) + if Guess != None: pass elif hasattr(Funct, "Guess_xy"): @@ -185,40 +196,57 @@ def fit_func(Funct, Data=None, Guess=None, x=None, y=None, elif Guess == None: # VERY OLD, DO NOT USE ANYMORE! Guess = [ y.mean() ] + [0.0, 0.0] * len(x) + if debug >= 5: + print "Guess params:" + print Guess + if Funct_hook != None: if not hasattr(Funct_hook, "__call__"): raise TypeError, "Funct_hook argument must be a callable function." - def fun_err(CC, xx, yy): + def fun_err(CC, xx, yy, ww): + """Computes the error of the fitted functional against the + reference data points: + + * CC = current function parameters + * xx = domain points of the ("experimental") data + * yy = target points of the ("experimental") data + * ww = weights of the ("experimental") data + """ ff = Funct(CC,xx) - r = (ff - yy) + r = (ff - yy) * ww Funct_hook(CC, xx, yy, ff, r) return r - fun_err2 = lambda CC, xx, yy: numpy.sum(abs(fun_err(CC, xx, yy))**2) elif debug < 20: - fun_err = lambda CC, xx, yy: (Funct(CC,xx) - yy) - fun_err2 = lambda CC, xx, yy: numpy.sum(abs(Funct(CC,xx) - yy)**2) - else: - def fun_err(CC, xx, yy): + def fun_err(CC, xx, yy, ww): ff = Funct(CC,xx) - r = (ff - yy) - print " err: %s << %s << %s, %s" % (r, ff, CC, xx) + r = (ff - yy) * ww return r - def fun_err2(CC, xx, yy): + else: + def fun_err(CC, xx, yy, ww): ff = Funct(CC,xx) - r = numpy.sum(abs(ff - yy)**2) - print " err: %s << %s << %s, %s" % (r, ff, CC, xx) + r = (ff - yy) * ww + print " err: %s << %s << %s, %s, %s" % (r, ff, CC, xx, ww) return r - if debug >= 5: - print "Guess params:" - print Guess + fun_err2 = lambda CC, xx, yy, ww: numpy.sum(abs(fun_err(CC, xx, yy, ww))**2) + + if w != None and dy != None: + raise TypeError, "Only one of w or dy can be specified." + if dy != None: + sqrtw = 1.0 / dy + elif w != None: + sqrtw = numpy.sqrt(w) + else: + sqrtw = 1.0 + # Full result is stored in rec + rec = fit_result() extra_keys = {} if method == 'leastsq': # modified Levenberg-Marquardt algorithm rslt = leastsq(fun_err, x0=Guess, # initial coefficient guess - args=(x,y), # data onto which the function is fitted + args=(x,y,sqrtw), # data onto which the function is fitted full_output=1, **opts ) @@ -227,11 +255,22 @@ def fit_func(Funct, Data=None, Guess=None, x=None, y=None, # map the output values to the same keyword as other methods below: 'funcalls': (lambda : rslt[2]['nfev']), } + # Added estimate of fit parameter uncertainty (matching GNUPLOT parameter + # uncertainty. + # The error is estimated to be the diagonal of cov_x, multiplied by the WSSR + # (chi_square below) and divided by the number of fit degrees of freedom. + # I used newer scipy.optimize.curve_fit() routine as my cheat sheet here. + if outfmt == 0: + if rslt[1] != None and len(y) > len(rslt[0]): + NDF = len(y) - len(rslt[0]) + extra_keys['xerr'] = (lambda: + numpy.sqrt(numpy.diagonal(rslt[1]) * rec['chi_square'] / NDF) + ) elif method == 'fmin': # Nelder-Mead Simplex algorithm rslt = fmin(fun_err2, x0=Guess, # initial coefficient guess - args=(x,y), # data onto which the function is fitted + args=(x,y,sqrtw), # data onto which the function is fitted full_output=1, **opts ) @@ -240,7 +279,7 @@ def fit_func(Funct, Data=None, Guess=None, x=None, y=None, # Broyden-Fletcher-Goldfarb-Shanno (BFGS) algorithm rslt = fmin_bfgs(fun_err2, x0=Guess, # initial coefficient guess - args=(x,y), # data onto which the function is fitted + args=(x,y,sqrtw), # data onto which the function is fitted full_output=1, **opts ) @@ -248,14 +287,14 @@ def fit_func(Funct, Data=None, Guess=None, x=None, y=None, elif method == 'anneal': rslt = anneal(fun_err2, x0=Guess, # initial coefficient guess - args=(x,y), # data onto which the function is fitted + args=(x,y,sqrtw), # data onto which the function is fitted full_output=1, **opts ) keys = ('xopt', 'fopt', 'T', 'funcalls', 'iter', 'accept', 'retval') else: raise ValueError, "Unsupported minimization method: %s" % method - chi_sqr = fun_err2(rslt[0], x, y) + chi_sqr = fun_err2(rslt[0], x, y, sqrtw) last_chi_sqr = chi_sqr last_fit_rslt = rslt if (debug >= 10): @@ -265,14 +304,19 @@ def fit_func(Funct, Data=None, Guess=None, x=None, y=None, if debug >= 1: print "params = ", rslt[0] print "chi square = ", last_chi_sqr / len(y) - if outfmt == 1: - return rslt[0] - else: # outfmt == 0 -- full result. - rec = fit_result(dict(zip(keys, rslt))) + if outfmt == 0: # outfmt == 0 -- full result. + rec.update(dict(zip(keys, rslt))) rec['chi_square'] = chi_sqr rec['fit_method'] = method # If there are extra keys, record them here: for (k,v) in extra_keys.iteritems(): rec[k] = v() return rec - + elif outfmt == 1: + return rslt[0] + else: + try: + x = str(outfmt) + except: + x = "(?)" + raise ValueError, "Invalid `outfmt' argument = " + x