### * Added wpylib.math.stats.jackknife1, initial implementation of delete-one

```jackknife resampling method.

This module also contains a hack for weighted average
(warning: the theory is not established yet, at least I have not seen
it).```
master
parent f0ba6f4068
commit 4e2b2c66c2
1 changed files with 162 additions and 0 deletions
1. 162
math/stats/jackknife1.py

#### 162 math/stats/jackknife1.py Unescape Escape View File

 `@ -0,0 +1,162 @@` `"""` `REFERENCES:` ``` ``` `Jackknife and Bootstrap Resampling Methods in Statistical Analysis to Correct for Bias.` `P. Young` `http://young.physics.ucsc.edu/jackboot.pdf` ``` ``` ``` ``` `Notes on Bootstrapping` ``` ``` ``` ``` ``` ``` `"""` ``` ``` `import numpy` ``` ``` `from numpy import pi, cos` `from numpy.random import normal` ``` ``` `def test1_generate_data(ndata=1000):` ` """` ``` ``` ` """` ` return pi / 3 + normal(size=ndata)` ``` ``` ``` ``` `def test1():` ` global test1_dset` ` test1_dset = test1_generate_data()` ` dset = test1_dset` ` print "first jackknife routine: jk_generate_datasets -> jk_wstats"` ` dset_jk = jk_generate_datasets(dset)` ` cos_avg1 = jk_wstats(dset_jk, func=numpy.cos)` ` print cos_avg1` ``` ``` ` print "second jackknife routine: jk_generate_averages -> jk_stats_aa"` ` aa_jk = jk_generate_averages(dset)` ` cos_avg2 = jk_stats_aa(aa_jk, func=numpy.cos)` ` print cos_avg2` ``` ``` ` # the two results above must be identical` ``` ``` ``` ``` `def test2_generate_data():` ` rootdir = "/home/wirawan/Work/PWQMC-77/expt/qmc/MnO/AFM2/rh.1x1x1/Opium-GFRG/vol10.41/k-0772+3780+2187.run"` ` srcfile = rootdir + "/measurements.h5"` ` from pyqmc.results.pwqmc_meas import meas_hdf5` ``` ``` ` global test2_db` ` test2_db = meas_hdf5(srcfile)` ``` ``` ``` ``` `def jk_select_dataset(a, i):` ` """Selects the i-th dataset for jackknife operation from a` ` given dataset 'a'.` ` The argument i must be: 0 <= 0 < len(a).` ` This is essentially deleting the i-th data point from the` ` original dataset.` ` """` ` a = numpy.asarray(a)` ` N = a.shape[0]` ` assert len(a.shape) == 1` ` assert 0 <= i < N` ` rslt = numpy.empty(shape=(N-1,), dtype=a.dtype)` ` rslt[:i] = a[:i]` ` rslt[i:] = a[i+1:]` ` return rslt` ``` ``` `def jk_generate_datasets(a):` ` """Generates ALL the datasets for jackknife operation from` ` the original dataset 'a'.` ` For the i-th dataset, this is essentially deleting the` ` i-th data point from 'a'.` ` """` ` a = numpy.asarray(a)` ` N = a.shape[0]` ` assert len(a.shape) == 1` ` rslt = numpy.empty(shape=(N,N-1,), dtype=a.dtype)` ` for i in xrange(N):` ` rslt[i, :i] = a[:i]` ` rslt[i, i:] = a[i+1:]` ` return rslt` ``` ``` `def jk_generate_averages(a, weights=None):` ` """Generates ALL the average samples for jackknife operation` ` from the original dataset 'a'.` ` For the i-th dataset, this is essentially deleting the` ` i-th data point from 'a', then taking the average.` ` ` ` This version does not store N*(N-1) data points; only (N).` ` """` ` a = numpy.asarray(a)` ` N = a.shape[0]` ` assert len(a.shape) == 1` ` aa_jk = numpy.empty(shape=(N,), dtype=a.dtype)` ` dset_i = numpy.empty(shape=(N-1,), dtype=a.dtype)` ` if weights != None:` ` weights_i = numpy.empty(shape=(N-1,), dtype=weights.dtype)` ` for i in xrange(N):` ` dset_i[:i] = a[:i]` ` dset_i[i:] = a[i+1:]` ` if weights != None:` ` weights_i[:i] = weights[:i]` ` weights_i[i:] = weights[i+1:]` ` aa_jk[i] = numpy.average(dset_i, weights=weights_i)` ` else:` ` aa_jk[i] = numpy.mean(dset_i)` ``` ``` ` return aa_jk` ``` ``` `'''` `def jk_stats_old(a_jk, func=None):` ` """a_jk must be in the same format as that produced by` ``` ``` ` """` ` # get all the jackknived stats.` ` if func == None:` ` jk_mean = numpy.mean(a_jk, axis=1)` ` else:` ` jk_mean = numpy.mean(func(a_jk), axis=1)` `'''` ``` ``` `def jk_wstats_dsets(a_jk, w_jk=None, func=None):` ` """a_jk and w_jk must be in the same format as that produced by` ` jk_generate_datasets.` ``` ``` ` """` ` # get all the jackknived stats.` ` N = len(a_jk)` ` # reconstruct full "a" array:` ` a = numpy.empty(shape=(N,), dtype=a_jk.dtype)` ` a[1:] = a_jk[0]` ` a[0] = a_jk[1][0]` ` if func == None:` ` func = lambda x : x` ` aa_jk = numpy.average(a_jk, axis=1, weights=w_jk)` ` #print aa_jk` ` f_jk = func(aa_jk)` ` mean = numpy.mean(f_jk)` ` var = numpy.std(f_jk) * numpy.sqrt(N-1)` ` mean_unbiased = N * func(a.mean()) - (N-1) * mean` ` return (mean, var, mean_unbiased)` ``` ``` ``` ``` `def jk_stats_aa(aa_jk, func=None, a=None):` ` """Computes the jackknife statistics from the preprocessed` ` jackknife averages (aa_jk).` ` The input array aa_jk is computed by jk_generate_averages().` ` """` ` # get all the jackknived stats.` ` N = len(aa_jk)` ` # reconstruct full "a" array:` ` if func == None:` ` func = lambda x : x` ` f_jk = func(aa_jk)` ` mean = numpy.mean(f_jk)` ` var = numpy.std(f_jk) * numpy.sqrt(N-1)` ` if a != None:` ` mean_unbiased = N * func(a.mean()) - (N-1) * mean` ` else:` ` mean_unbiased = None` ` return (mean, var, mean_unbiased)`