From dfc7a780f99d4cd40987671e0b9d83ab62794063 Mon Sep 17 00:00:00 2001
From: wirawan <wirawan>
Date: Mon, 8 Feb 2010 01:46:12 +0000
Subject: [PATCH] * Renaming file_db to file_table class. * Allow record update
 in somewhat pythonic way (instead of using SQL).

---
 db/file_db.py | 43 ++++++++++++++++++++++++++++++++++---------
 1 file changed, 34 insertions(+), 9 deletions(-)
diff --git a/db/file_db.py b/db/file_db.py
index e613001..ae0680e 100644
--- a/db/file_db.py
+++ b/db/file_db.py
@@ -1,4 +1,4 @@
-# $Id: file_db.py,v 1.1 2010-02-06 23:21:09 wirawan Exp $
+# $Id: file_db.py,v 1.2 2010-02-08 01:46:12 wirawan Exp $
 #
 # wpylib.db.filedb module
 # Created: 20100205
@@ -20,7 +20,7 @@ except:
 class file_rec(tuple):
   pass
 
-class file_db(object):
+class file_table(object):
   # dtype for numpy (if wanted)
   dtype = numpy.dtype([
                        ('filename', 'S256'),
@@ -39,12 +39,14 @@ class file_db(object):
   def __init__(self, src_name, table_name='filedb', extra_fields=[]):
     self.src_name = src_name
     self.table_name = table_name
-    if os.path.isfile(src_name):
+    if isinstance(src_name, str): # os.path.isfile(src_name):
       self.db = sqlite3.connect(src_name)
       self.dbc = self.db.cursor()
-    else:
-      self.db = sqlite3.connect(src_name)
+    elif isinstance(src_name, sqlite3.Connection):
+      self.db = src_name
       self.dbc = self.db.cursor()
+    else:
+      raise ValueError, "Invalid src_name data type"
     self.db.text_factory = str
     self.sql_params = {
         'table_name': table_name,
@@ -117,12 +119,12 @@ class file_db(object):
       # Replaceable insert is not intended for tables with duplicate entries
       # of the same filename.
       insert_sql = "UPDATE '%(table_name)s' SET " \
-        + ', '.join(["'%s' = ?" % dname for dname in dnames]) \
+        + ', '.join(["'%s' = ?" % d for d in dnames]) \
         + " WHERE filename = ?;"
       vals = tuple(dvals + [filename])
     else:
       insert_sql = "INSERT INTO '%(table_name)s' (filename, " \
-        + ", ".join(["'%s'" % dname for dname in dnames]) \
+        + ", ".join(["'%s'" % d for d in dnames]) \
         + ") VALUES (?" + ',?'*(len(fields)) + ");"
       vals = tuple([filename] + dvals)
     self.exec_sql(insert_sql, vals)
@@ -145,6 +147,22 @@ class file_db(object):
       sql_stmt = "SELECT * FROM '%(table_name)s' WHERE filename = ?;"
     return [ rslt for rslt in self.exec_sql(sql_stmt, (filename,)) ]
 
+  def __setitem__(self, filename, newdata):
+    """Updates the metadata on the filename. Any other field than the filename
+    can be updated. The filename serves as a unique key here."""
+    if isinstance(newdata, dict) or "keys" in dir(newdata):
+      dnames = newdata.keys()
+      dvals = [ newdata[k] for k in dnames ]
+    else:
+      # Assuming an iterable with ('field', 'value') tuples.
+      dnames = [ dname for (dname,dval) in newdata ]
+      dvals = [ dval for (dname,dval) in newdata ]
+    update_sql = "UPDATE '%(table_name)s' SET " \
+      + ', '.join(["'%s' = ?" % d for d in dnames]) \
+      + " WHERE filename = ?;"
+    vals = tuple(dvals + [filename])
+    self.exec_sql(update_sql, vals)
+
   def __contains__(self, filename):
     """Counts the number of record entries matching in the `filename' field."""
     if filename.find("%") >= 0:
@@ -155,6 +173,10 @@ class file_db(object):
 
   count = __contains__
 
+  def fields(self):
+    """Returns the field names of the table of the latest query."""
+    return [ z[0] for z in self.dbc.description ]
+
 
 def md5_digest_file(filename):
   """Digests the content of a file."""
@@ -174,13 +196,16 @@ def str2hexstr(md5sum):
   return "".join([ "%02x" % ord(c) for c in md5sum ])
 
 
-def get_file_stats(filename):
+def get_file_stats(filename, get_md5sum=True):
   stats = os.stat(filename)
   mtime = time.localtime(stats.st_mtime)
   Mdate = mtime.tm_year * 10000 + mtime.tm_mon * 100 + mtime.tm_mday
   Mtime = mtime.tm_hour * 10000 + mtime.tm_min * 100 + mtime.tm_sec
   size = stats.st_size
-  md5sum = str2hexstr(md5_digest_file(filename))  # this step is EXPEN$IVE
+  if get_md5sum:
+    md5sum = str2hexstr(md5_digest_file(filename))  # this step is EXPEN$IVE
+  else:
+    md5sum = None
   return {
     'filename': filename,
     'mdate': Mdate,