From dfc7a780f99d4cd40987671e0b9d83ab62794063 Mon Sep 17 00:00:00 2001 From: wirawan Date: Mon, 8 Feb 2010 01:46:12 +0000 Subject: [PATCH] * Renaming file_db to file_table class. * Allow record update in somewhat pythonic way (instead of using SQL). --- db/file_db.py | 43 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/db/file_db.py b/db/file_db.py index e613001..ae0680e 100644 --- a/db/file_db.py +++ b/db/file_db.py @@ -1,4 +1,4 @@ -# $Id: file_db.py,v 1.1 2010-02-06 23:21:09 wirawan Exp $ +# $Id: file_db.py,v 1.2 2010-02-08 01:46:12 wirawan Exp $ # # wpylib.db.filedb module # Created: 20100205 @@ -20,7 +20,7 @@ except: class file_rec(tuple): pass -class file_db(object): +class file_table(object): # dtype for numpy (if wanted) dtype = numpy.dtype([ ('filename', 'S256'), @@ -39,12 +39,14 @@ class file_db(object): def __init__(self, src_name, table_name='filedb', extra_fields=[]): self.src_name = src_name self.table_name = table_name - if os.path.isfile(src_name): + if isinstance(src_name, str): # os.path.isfile(src_name): self.db = sqlite3.connect(src_name) self.dbc = self.db.cursor() - else: - self.db = sqlite3.connect(src_name) + elif isinstance(src_name, sqlite3.Connection): + self.db = src_name self.dbc = self.db.cursor() + else: + raise ValueError, "Invalid src_name data type" self.db.text_factory = str self.sql_params = { 'table_name': table_name, @@ -117,12 +119,12 @@ class file_db(object): # Replaceable insert is not intended for tables with duplicate entries # of the same filename. insert_sql = "UPDATE '%(table_name)s' SET " \ - + ', '.join(["'%s' = ?" % dname for dname in dnames]) \ + + ', '.join(["'%s' = ?" % d for d in dnames]) \ + " WHERE filename = ?;" vals = tuple(dvals + [filename]) else: insert_sql = "INSERT INTO '%(table_name)s' (filename, " \ - + ", ".join(["'%s'" % dname for dname in dnames]) \ + + ", ".join(["'%s'" % d for d in dnames]) \ + ") VALUES (?" + ',?'*(len(fields)) + ");" vals = tuple([filename] + dvals) self.exec_sql(insert_sql, vals) @@ -145,6 +147,22 @@ class file_db(object): sql_stmt = "SELECT * FROM '%(table_name)s' WHERE filename = ?;" return [ rslt for rslt in self.exec_sql(sql_stmt, (filename,)) ] + def __setitem__(self, filename, newdata): + """Updates the metadata on the filename. Any other field than the filename + can be updated. The filename serves as a unique key here.""" + if isinstance(newdata, dict) or "keys" in dir(newdata): + dnames = newdata.keys() + dvals = [ newdata[k] for k in dnames ] + else: + # Assuming an iterable with ('field', 'value') tuples. + dnames = [ dname for (dname,dval) in newdata ] + dvals = [ dval for (dname,dval) in newdata ] + update_sql = "UPDATE '%(table_name)s' SET " \ + + ', '.join(["'%s' = ?" % d for d in dnames]) \ + + " WHERE filename = ?;" + vals = tuple(dvals + [filename]) + self.exec_sql(update_sql, vals) + def __contains__(self, filename): """Counts the number of record entries matching in the `filename' field.""" if filename.find("%") >= 0: @@ -155,6 +173,10 @@ class file_db(object): count = __contains__ + def fields(self): + """Returns the field names of the table of the latest query.""" + return [ z[0] for z in self.dbc.description ] + def md5_digest_file(filename): """Digests the content of a file.""" @@ -174,13 +196,16 @@ def str2hexstr(md5sum): return "".join([ "%02x" % ord(c) for c in md5sum ]) -def get_file_stats(filename): +def get_file_stats(filename, get_md5sum=True): stats = os.stat(filename) mtime = time.localtime(stats.st_mtime) Mdate = mtime.tm_year * 10000 + mtime.tm_mon * 100 + mtime.tm_mday Mtime = mtime.tm_hour * 10000 + mtime.tm_min * 100 + mtime.tm_sec size = stats.st_size - md5sum = str2hexstr(md5_digest_file(filename)) # this step is EXPEN$IVE + if get_md5sum: + md5sum = str2hexstr(md5_digest_file(filename)) # this step is EXPEN$IVE + else: + md5sum = None return { 'filename': filename, 'mdate': Mdate,