From 7017fdc6af6fb1f495a97a86ab15c270ca3f36f9 Mon Sep 17 00:00:00 2001 From: wirawan Date: Thu, 6 Oct 2011 19:15:05 +0000 Subject: [PATCH] * Committing all dirty work to CVS. We are migrating to GIT. Beware: changes in this commit may or may not work properly. --- db/tables.py | 191 ++++++++++++++++++++++++++++++++ math/fft.py | 55 ++++++++- math/linalg/__init__.py | 8 +- math/stats/linear_regression.py | 13 +++ params/params_flat_test.py | 6 +- py/wrapper.py | 57 ++++++++++ 6 files changed, 326 insertions(+), 4 deletions(-) create mode 100644 db/tables.py create mode 100644 math/stats/linear_regression.py create mode 100644 py/wrapper.py diff --git a/db/tables.py b/db/tables.py new file mode 100644 index 0000000..9f45b72 --- /dev/null +++ b/db/tables.py @@ -0,0 +1,191 @@ +# $Id: tables.py,v 1.1 2011-10-06 19:14:47 wirawan Exp $ +# +# wpylib.db.tables module +# Created: 20100223 +# Wirawan Purwanto +# + +"""Simple table accessors for sqlite database.""" + +import sys +import os +import os.path +import time + +try: + import sqlite3 +except: + # For Python < 2.5: + import pysqlite2.dbapi2 as sqlite3 + + +# dtype map from python types to sqlite3 types: +dtype_map = { + str: 'TEXT', + int: 'INTEGER', + float: 'REAL', +} + +# +simple_row_type = None # returns tuple +indexable_row_type = sqlite3.Row + + +class simple_table(object): + """Simple table with no primary key.""" + dtypes_default = [] + def __init__(self, src_name, table_name, dtypes=None): + self.src_name = src_name + self.table_name = table_name + if isinstance(src_name, str): # os.path.isfile(src_name): + self.db = sqlite3.connect(src_name) + self.dbc = self.db.cursor() + elif isinstance(src_name, sqlite3.Connection): + self.src_name = None + self.db = src_name + self.dbc = self.db.cursor() + else: + raise ValueError, "Invalid src_name data type" + self.db.text_factory = str + self.sql_params = { + 'table_name': table_name, + } + self.debug = 1 + + create_sql = """\ + CREATE TABLE IF NOT EXISTS '%(table_name)s' ( + """ \ + + ", ".join(["'%s' %s" % (dname, self.sqlite_dtype_map[dtyp]) + for (dname,dtyp) in self.dtypes_default + list(dtypes) + ]) \ + + """ + ); + """ + self.exec_sql(create_sql) + self.db.commit() + + def exec_sql(self, stmt, params=None): + sql_stmt = stmt % self.sql_params + if params: + if self.debug: + print "--SQL::", sql_stmt.rstrip() + print "--val::", params + return self.dbc.execute(sql_stmt, params) + else: + if self.debug: + print "--SQL::", sql_stmt.rstrip() + return self.dbc.execute(sql_stmt) + + def add_fields(self, dtypes): + """Adds columns to the table.""" + for (dname, dtyp) in dtypes: + self.exec_sql("ALTER TABLE '%(table_name)s' ADD COLUMN" \ + + " '%s' %s;" % (dname, self.sqlite_dtype_map[dtyp]) + ) + self.db.commit() + + def register_file(self, filename, replace=False, extra_values=None): + """Register a file, note its mtime, and size, and digests its content.""" + filestats = get_file_stats(filename) + fields = [ + ('md5sum', filestats['md5sum']), + ('date', filestats['mdate']), + ('time', filestats['mtime']), + ('size', filestats['size']), + ] + [ + kwpair for kwpair in extra_values + ] + dnames = [ dname for (dname,dval) in fields ] + dvals = [ dval for (dname,dval) in fields ] + + if replace: + # Test if we want to replace or to add. + count = [ + x for x in self.exec_sql( + "SELECT count(*) from '%(table_name)s' where filename = ?;", + (filename,) + ) + ][0][0] + if count == 0: replace = False + + if replace: + # WARNING: This will replace all the occurences of the entry with + # the same filename. + # Replaceable insert is not intended for tables with duplicate entries + # of the same filename. + insert_sql = "UPDATE '%(table_name)s' SET " \ + + ', '.join(["'%s' = ?" % d for d in dnames]) \ + + " WHERE filename = ?;" + vals = tuple(dvals + [filename]) + else: + insert_sql = "INSERT INTO '%(table_name)s' (filename, " \ + + ", ".join(["'%s'" % d for d in dnames]) \ + + ") VALUES (?" + ',?'*(len(fields)) + ");" + vals = tuple([filename] + dvals) + self.exec_sql(insert_sql, vals) + + def flush(self): + self.db.commit() + + def get_filenames(self): + """Reads all the file names in the table to memory.""" + return [ + rslt[0] for rslt in + self.exec_sql("SELECT filename FROM '%(table_name)s' ORDER BY filename;") + ] + + def __getitem__(self, **criteria): + # Criteria could be SQL stmt + """Reads all the entries matching in the `filename' field.""" + if filename.find("%") >= 0: + sql_stmt = "SELECT * FROM '%(table_name)s' WHERE filename LIKE ?;" + else: + sql_stmt = "SELECT * FROM '%(table_name)s' WHERE filename = ?;" + return [ rslt for rslt in self.exec_sql(sql_stmt, (filename,)) ] + + def __setitem__(self, filename, newdata): + """Updates the metadata on the filename. Any other field than the filename + can be updated. The filename serves as a unique key here. + The newdata can be a hash, like this: + + A_file_table[filename] = {'date': 20041201, 'time': 122144} + + or a list of tuples: + + A_file_table[filename] = [('date': 20041201), ('time': 122144)] + """ + if isinstance(newdata, dict) or "keys" in dir(newdata): + dnames = newdata.keys() + dvals = [ newdata[k] for k in dnames ] + else: + # Assuming an iterable with ('field', 'value') tuples. + dnames = [ dname for (dname,dval) in newdata ] + dvals = [ dval for (dname,dval) in newdata ] + update_sql = "UPDATE '%(table_name)s' SET " \ + + ', '.join(["'%s' = ?" % d for d in dnames]) \ + + " WHERE filename = ?;" + vals = tuple(dvals + [filename]) + self.exec_sql(update_sql, vals) + + def __contains__(self, filename): + """Counts the number of record entries matching in the `filename' field.""" + if filename.find("%") >= 0: + sql_stmt = "SELECT count(*) FROM '%(table_name)s' WHERE filename LIKE ?;" + else: + sql_stmt = "SELECT count(*) FROM '%(table_name)s' WHERE filename = ?;" + return [ rslt for rslt in self.exec_sql(sql_stmt, (filename,)) ][0][0] + + count = __contains__ + + def fields(self): + """Returns the field names of the table of the latest query.""" + return [ z[0] for z in self.dbc.description ] + + def row_kind(self, kind=None): + if kind: + self.db.row_factory = kind + # We will reload the cursor to account for the new factory + self.dbc = self.db.cursor() + return self.db.row_factory + + diff --git a/math/fft.py b/math/fft.py index d07e5cb..df40a60 100644 --- a/math/fft.py +++ b/math/fft.py @@ -1,4 +1,4 @@ -# $Id: fft.py,v 1.1 2010-02-24 14:27:23 wirawan Exp $ +# $Id: fft.py,v 1.2 2011-10-06 19:14:48 wirawan Exp $ # # wpylib.math.fft module # Created: 20100205 @@ -37,7 +37,7 @@ The slice [gmin:gmax:gstep] will certainly result in an empty slice. To do this, we define two functions below. First, fft_grid_ranges1 generates the ranges for each dimension, then fft_grid_ranges itself generates all the combination of ranges (which cover -all combinations of positive and ndgative frequency domains for all +all combinations of positive and negative frequency domains for all dimensions.) For a (5x8) FFT grid, we will have @@ -70,6 +70,57 @@ fft_grid_ranges = lambda Gmin, Gmax, Gstep : \ all_combinations(fft_grid_ranges1(Gmin, Gmax, Gstep)) +class fft_grid(object): + """A class describing a N-dimensional grid for plane wave + (or real-space) basis. + In this version, the grid is centered at (0,0,...) coordinate. + To actually create a grid, use the new_dens() method. + """ + dtype = complex + def __init__(self, Gsize=None, Gmin=None, Gmax=None, dtype=None): + """Creates a new grid descriptor. + There are two possible methods, and you must choose either one for + initialization: + * Gsize = an N-dimensional array (list, tuple, ndarray) specifying + the number of grid points in each dimension. + or + * Gmin, Gmax = a pair of N-dimensional arrays (list, tuple, ndarray) + specifying the smallest (most negative) and largest (most positive) + coordinates in each dimension. + The grid size will be specified to fit this range. + """ + from numpy import maximum + if Gsize != None: + self.Gsize = numpy.array(Gsize, dtype=int) + (self.Gmin, self.Gmax) = fft_grid_bounds(self.Gsize) + elif Gmin != None and Gmax != None: + self.Gmin = numpy.array(Gmin, dtype=int) + self.Gmax = numpy.array(Gmax, dtype=int) + # Figure out the minimum grid size to fit this data: + Gsize_min = abs(self.Gmin) * 2 + Gsize_max = abs(self.Gmax) * 2 + (abs(self.Gmax) % 2) + Gsize_def = self.Gmax - self.Gmin + 1 + self.Gsize = maximum(maximum(Gsize_min, Gsize_max), Gsize_def) + else: + raise ValueError, \ + "Either Gsize or (Gmin,Gmax) parameters have to be specified." + if dtype != None: + self.dtype = dtype + self.ndim = len(self.Gsize) + + def new_dens(self, zero=False, dtype=None): + """Creates a new N-dimensional array (grid).""" + if dtype == None: dtype = self.dtype + if zero: + return numpy.zeros(self.Gsize, dtype=dtype) + else: + return numpy.empty(self.Gsize, dtype=dtype) + + def check_index(self, G): + """Check if an index is valid according to Gmin, Gmax boundary.""" + return numpy.all(self.Gmin <= G) and numpy.all(G <= self.Gmax) + + def fft_r2g(dens): """Do real-to-G space transformation. According to our covention, this transformation gets the 1/Vol prefactor.""" diff --git a/math/linalg/__init__.py b/math/linalg/__init__.py index 4528bec..44f6450 100644 --- a/math/linalg/__init__.py +++ b/math/linalg/__init__.py @@ -1,4 +1,4 @@ -# $Id: __init__.py,v 1.1 2011-07-14 19:00:59 wirawan Exp $ +# $Id: __init__.py,v 1.2 2011-10-06 19:14:49 wirawan Exp $ # # wpylib.math.linalg main module # Created: 20110714 @@ -13,6 +13,12 @@ already provided by numpy. """ import numpy +import numpy.linalg + +# My favorites: +from numpy import dot, trace +from numpy.linalg import det, inv + def matmul(*Mats): """Do successive matrix product. For example, diff --git a/math/stats/linear_regression.py b/math/stats/linear_regression.py new file mode 100644 index 0000000..13fd190 --- /dev/null +++ b/math/stats/linear_regression.py @@ -0,0 +1,13 @@ +# $Id: linear_regression.py,v 1.1 2011-10-06 19:14:50 wirawan Exp $ +# +# Module wpylib.math.stats.linear_regression +# +# Created: 20110414 +# Wirawan Purwanto +# +# Transcribed from my cp.inc's stats1.cpp + +class linreg(object): + """Class linreg provides my standard recipe for linear regression. + """ + diff --git a/params/params_flat_test.py b/params/params_flat_test.py index 8a85214..2906298 100644 --- a/params/params_flat_test.py +++ b/params/params_flat_test.py @@ -1,4 +1,4 @@ -# $Id: params_flat_test.py,v 1.2 2011-09-09 18:58:48 wirawan Exp $ +# $Id: params_flat_test.py,v 1.3 2011-10-06 19:14:51 wirawan Exp $ # 20100930 from wpylib.params import flat as params @@ -55,6 +55,10 @@ def test2b(**_opts_): print "new deltau = ", p.deltau +def dump_objects(): + """See what's in each dicts. + """ + pass if __name__ == "__main__": diff --git a/py/wrapper.py b/py/wrapper.py new file mode 100644 index 0000000..0415af9 --- /dev/null +++ b/py/wrapper.py @@ -0,0 +1,57 @@ +# $Id: wrapper.py,v 1.1 2011-10-06 19:15:05 wirawan Exp $ +# +# wpylib.py.wrapper module +# Created: 20110608 +# Wirawan Purwanto +# +# Wrapper base class. +# Used for automatic wrapping of (especially) methods to +# dispatch it to a host of object possibilities. +# + + +class wrapper_base(object): + """Wrapper or proxy object to provide uniform API to other routines, + etc. + + This class allows dirty tricks such as injecting external functions + to accomplish certain required tasks in object-oriented manner. + If using external procedure, it must be callable with "self" as + its first argument. + + Reserved attributes: + * _obj_ = the wrapped object + * _procnames_[:] = method names to wrap automatically. + * _obj_path_[:] = list of objects (instances) from which to look + for the methods. + * _set_obj_path_() = object method to define what objects to be + included in the object path (_obj_path_). + + """ + def __init__(self, obj): + """Creates a wrapper.""" + self._obj_ = obj + if hasattr(self, '_set_obj_path_'): + self._set_obj_path_() + else: + self._obj_path_ = [ obj ] + + def _autoset_proc_(self, procname, extproc=None): + from wpylib.py import make_unbound_method + from wpylib.py.im_weakref import im_ref + from weakref import ref + + procname_ = procname + '_' + procname_proc = procname + '_proc' + if hasattr(self, procname_proc): + # In case the derived-class has the procedure, we will use + # that. + setattr(self, procname, im_ref(getattr(self, procname_proc))) + else: + for o in self._obj_path_: + if hasattr(o, procname): + setattr(self, procname, im_ref(getattr(o, procname))) + return + # May implement a global fallback hook here? + pass +