You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
192 lines
5.5 KiB
192 lines
5.5 KiB
15 years ago
|
# $Id: file_db.py,v 1.1 2010-02-06 23:21:09 wirawan Exp $
|
||
|
#
|
||
|
# wpylib.db.filedb module
|
||
|
# Created: 20100205
|
||
|
# Wirawan Purwanto
|
||
|
#
|
||
|
|
||
|
"""File fingerprint database."""
|
||
|
|
||
|
import md5
|
||
|
import numpy
|
||
|
import os.path
|
||
|
import time
|
||
|
|
||
|
try:
|
||
|
import sqlite3
|
||
|
except:
|
||
|
import pysqlite2 as sqlite3
|
||
|
|
||
|
class file_rec(tuple):
|
||
|
pass
|
||
|
|
||
|
class file_db(object):
|
||
|
# dtype for numpy (if wanted)
|
||
|
dtype = numpy.dtype([
|
||
|
('filename', 'S256'),
|
||
|
('md5', 'S32'),
|
||
|
('date', 'i4'),
|
||
|
('time', 'i4'),
|
||
|
('size', 'i8'),
|
||
|
])
|
||
|
# dtype map from python types to sqlite3 types:
|
||
|
sqlite_dtype_map = {
|
||
|
str: 'TEXT',
|
||
|
int: 'INTEGER',
|
||
|
float: 'REAL',
|
||
|
}
|
||
|
|
||
|
def __init__(self, src_name, table_name='filedb', extra_fields=[]):
|
||
|
self.src_name = src_name
|
||
|
self.table_name = table_name
|
||
|
if os.path.isfile(src_name):
|
||
|
self.db = sqlite3.connect(src_name)
|
||
|
self.dbc = self.db.cursor()
|
||
|
else:
|
||
|
self.db = sqlite3.connect(src_name)
|
||
|
self.dbc = self.db.cursor()
|
||
|
self.db.text_factory = str
|
||
|
self.sql_params = {
|
||
|
'table_name': table_name,
|
||
|
}
|
||
|
self.debug = 1
|
||
|
|
||
|
create_sql = """\
|
||
|
CREATE TABLE IF NOT EXISTS '%(table_name)s' (
|
||
|
filename TEXT,
|
||
|
md5sum TEXT,
|
||
|
date INTEGER,
|
||
|
time INTEGER,
|
||
|
size INTEGER""" \
|
||
|
+ "".join([", '%s' %s" % (dname, self.sqlite_dtype_map[dtyp])
|
||
|
for (dname,dtyp) in extra_fields
|
||
|
]) \
|
||
|
+ """
|
||
|
);
|
||
|
"""
|
||
|
self.exec_sql(create_sql)
|
||
|
self.db.commit()
|
||
|
|
||
|
def exec_sql(self, stmt, params=None):
|
||
|
sql_stmt = stmt % self.sql_params
|
||
|
if params:
|
||
|
if self.debug:
|
||
|
print "--SQL::", sql_stmt.rstrip()
|
||
|
print "--val::", params
|
||
|
return self.dbc.execute(sql_stmt, params)
|
||
|
else:
|
||
|
if self.debug:
|
||
|
print "--SQL::", sql_stmt.rstrip()
|
||
|
return self.dbc.execute(sql_stmt)
|
||
|
|
||
|
def add_fields(self, dtypes):
|
||
|
"""Adds columns to the table."""
|
||
|
for (dname, dtyp) in dtypes:
|
||
|
self.exec_sql("ALTER TABLE '%(table_name)s' ADD COLUMN" \
|
||
|
+ " '%s' %s;" % (dname, self.sqlite_dtype_map[dtyp])
|
||
|
)
|
||
|
self.db.commit()
|
||
|
|
||
|
def register_file(self, filename, replace=False, extra_values=None):
|
||
|
"""Register a file, note its mtime, and size, and digests its content."""
|
||
|
filestats = get_file_stats(filename)
|
||
|
fields = [
|
||
|
('md5sum', filestats['md5sum']),
|
||
|
('date', filestats['mdate']),
|
||
|
('time', filestats['mtime']),
|
||
|
('size', filestats['size']),
|
||
|
] + [
|
||
|
kwpair for kwpair in extra_values
|
||
|
]
|
||
|
dnames = [ dname for (dname,dval) in fields ]
|
||
|
dvals = [ dval for (dname,dval) in fields ]
|
||
|
|
||
|
if replace:
|
||
|
# Test if we want to replace or to add.
|
||
|
count = [
|
||
|
x for x in self.exec_sql(
|
||
|
"SELECT count(*) from '%(table_name)s' where filename = ?;",
|
||
|
(filename,)
|
||
|
)
|
||
|
][0][0]
|
||
|
if count == 0: replace = False
|
||
|
|
||
|
if replace:
|
||
|
# WARNING: This will replace all the occurences of the entry with
|
||
|
# the same filename.
|
||
|
# Replaceable insert is not intended for tables with duplicate entries
|
||
|
# of the same filename.
|
||
|
insert_sql = "UPDATE '%(table_name)s' SET " \
|
||
|
+ ', '.join(["'%s' = ?" % dname for dname in dnames]) \
|
||
|
+ " WHERE filename = ?;"
|
||
|
vals = tuple(dvals + [filename])
|
||
|
else:
|
||
|
insert_sql = "INSERT INTO '%(table_name)s' (filename, " \
|
||
|
+ ", ".join(["'%s'" % dname for dname in dnames]) \
|
||
|
+ ") VALUES (?" + ',?'*(len(fields)) + ");"
|
||
|
vals = tuple([filename] + dvals)
|
||
|
self.exec_sql(insert_sql, vals)
|
||
|
|
||
|
def flush(self):
|
||
|
self.db.commit()
|
||
|
|
||
|
def get_filenames(self):
|
||
|
"""Reads all the file names in the table to memory."""
|
||
|
return [
|
||
|
rslt[0] for rslt in
|
||
|
self.exec_sql("SELECT filename FROM '%(table_name)s' ORDER BY filename;")
|
||
|
]
|
||
|
|
||
|
def __getitem__(self, filename):
|
||
|
"""Reads all the entries matching in the `filename' field."""
|
||
|
if filename.find("%") >= 0:
|
||
|
sql_stmt = "SELECT * FROM '%(table_name)s' WHERE filename LIKE ?;"
|
||
|
else:
|
||
|
sql_stmt = "SELECT * FROM '%(table_name)s' WHERE filename = ?;"
|
||
|
return [ rslt for rslt in self.exec_sql(sql_stmt, (filename,)) ]
|
||
|
|
||
|
def __contains__(self, filename):
|
||
|
"""Counts the number of record entries matching in the `filename' field."""
|
||
|
if filename.find("%") >= 0:
|
||
|
sql_stmt = "SELECT count(*) FROM '%(table_name)s' WHERE filename LIKE ?;"
|
||
|
else:
|
||
|
sql_stmt = "SELECT count(*) FROM '%(table_name)s' WHERE filename = ?;"
|
||
|
return [ rslt for rslt in self.exec_sql(sql_stmt, (filename,)) ][0][0]
|
||
|
|
||
|
count = __contains__
|
||
|
|
||
|
|
||
|
def md5_digest_file(filename):
|
||
|
"""Digests the content of a file."""
|
||
|
ff = open(filename, "rb")
|
||
|
bufsize = 32768
|
||
|
stuff = ff.read(bufsize)
|
||
|
digest = md5.new()
|
||
|
while len(stuff) > 0:
|
||
|
digest.update(stuff)
|
||
|
stuff = ff.read(bufsize)
|
||
|
ff.close()
|
||
|
return digest.digest()
|
||
|
|
||
|
|
||
|
def str2hexstr(md5sum):
|
||
|
"""Return the hex representation of a string."""
|
||
|
return "".join([ "%02x" % ord(c) for c in md5sum ])
|
||
|
|
||
|
|
||
|
def get_file_stats(filename):
|
||
|
stats = os.stat(filename)
|
||
|
mtime = time.localtime(stats.st_mtime)
|
||
|
Mdate = mtime.tm_year * 10000 + mtime.tm_mon * 100 + mtime.tm_mday
|
||
|
Mtime = mtime.tm_hour * 10000 + mtime.tm_min * 100 + mtime.tm_sec
|
||
|
size = stats.st_size
|
||
|
md5sum = str2hexstr(md5_digest_file(filename)) # this step is EXPEN$IVE
|
||
|
return {
|
||
|
'filename': filename,
|
||
|
'mdate': Mdate,
|
||
|
'mtime': Mtime,
|
||
|
'size': size,
|
||
|
'md5sum': md5sum,
|
||
|
}
|
||
|
|