My tools of the trade for python programming.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

377 lines
12 KiB

# $Id: fortbin.py,v 1.4 2011-08-31 20:27:31 wirawan Exp $
#
# wpylib.iofmt.fortbin module
# Created: 20100208
# Wirawan Purwanto
#
"""
Fortran binary format.
"""
import numpy
import sys
from wpylib.sugar import ifelse
class fortran_types(object):
"""A description of Fortran data types.
Useful for estimating memory use, file sizes, etc."""
desc = {
numpy.int32: dict(
size=4,
),
numpy.int64: dict(
size=8,
),
numpy.float32: dict( # IEEE single precision
size=4,
),
numpy.float64: dict( # IEEE double precision
size=8,
),
numpy.complex64: dict( # Fortran 90 complex(4)
size=8,
),
numpy.complex128: dict( # Fortran 90 complex(8)
size=16,
),
}
# defaults: ok for "LP64" systems or typical 32-bit systems
desc[int] = desc[numpy.int32]
desc[float] = desc[numpy.float64]
desc[complex] = desc[numpy.complex128]
record_marker_size = 4 # size for a record marker (2x this for a record)
# special data types
class fortran_dtype(object):
"""Base class for all special Fortran data types."""
pass
class character(object):
"""Fortran fixed-width character string."""
def __init__(self, len):
self.len = len
@property
def size(self):
return self.len
def size(self, dtype):
"""Computes the size of a single datatype."""
if isinstance(dtype, self.fortran_dtype):
return dtype.size()
else:
return self.desc[dtype]['size']
def file_rec_size(self, dtypes):
"""Computes the size of a record on disk."""
return sum([ self.size(d) for d in dtypes ]) + 2 * self.record_marker_size
def file_data_size(self, recs):
"""Computes the size of a sequence of records on disk."""
return sum([ self.file_rec_size(r) for r in recs ])
class fortran_bin_file(object):
"""A tool for reading and writing Fortran binary files.
Caveat: On 64-bit systems, typical Fortran implementations still have int==int32
(i.e. the LP64 programming model), unless "-i8" kind of option is enabled.
To use 64-bit default integer, set the default_int attribute to numpy.int64 .
"""
record_marker_type = numpy.uint32
default_int = numpy.int32
default_float = numpy.float64
default_complex = numpy.complex128
default_str = numpy.str_
def __init__(self, filename=None, mode="r"):
self.debug = 0
if filename:
self.open(filename, mode)
def open(self, filename, mode="r"):
self.F = open(filename, mode+"b")
def close(self):
if getattr(self, "F", None):
self.F.close()
self.F = None
@staticmethod
def fld_count(f):
"""Determines how many items are in a Fortran data field.
The `f' argument is a field descriptor, which can be given as
either (name, dtype) or (name, dtype, length) tuple.
If length is not specified, then a scalar value is read.
Length is a scalar for 1-D array, or a tuple or list for multidimensional
array.
"""
if len(f) > 2:
if isinstance(f[2], (list,tuple)):
return numpy.product(f[2])
else:
return f[2]
else:
return 1
def byte_length(self, *fields):
"""Given a list of field descriptors, determine how many bytes this
set of fields would occupy.
"""
expected_len = sum([ self.fld_count(f) * numpy.dtype(f[1]).itemsize
for f in fields ])
return expected_len
def read(self, *fields, **opts):
"""Reads a Fortran record.
This corresponds to a single READ statement in a Fortran program.
The description of the fields are given as
either (name, dtype) or (name, dtype, length) tuples.
If length is not specified, then a scalar value is read.
Length is a scalar for 1-D array, or a tuple or list for multidimensional
array.
Optional argument:
* dest = a structure to contain the result.
"""
from numpy import fromfile as rd
if self.debug or opts.get("debug"):
dbg = lambda msg : sys.stderr.write(msg)
else:
dbg = lambda msg : None
fld_count = self.fld_count
reclen = numpy.fromfile(self.F, self.record_marker_type, 1)
dbg("Record length = %d\n" % reclen)
expected_len = self.byte_length(*fields)
dbg("Expected length = %d\n" % expected_len)
if expected_len > reclen:
raise IOError, \
"Attempting to read %d bytes from a record of length %d bytes" \
% (expected_len, reclen)
if "dest" in opts:
rslt = opts["dest"]
else:
rslt = {}
if (issubclass(rslt.__class__, dict) and issubclass(dict, rslt.__class__)) \
or "__setitem__" in dir(rslt):
def setval(d, k, v):
d[k] = v
else:
# Assume we can use setattr method:
setval = setattr
for f in fields:
if len(f) > 2:
(name,Dtyp,Len) = f
dtyp = numpy.dtype(Dtyp)
Len2 = fld_count(f)
if isinstance(f[2], list) or isinstance(f[2], tuple):
# Special handling for shaped arrays
arr = numpy.fromfile(self.F, dtyp, Len2)
setval(rslt, name, arr.reshape(tuple(Len), order='F'))
else:
setval(rslt, name, numpy.fromfile(self.F, dtyp, Len2))
else:
# Special handling for scalars
name = f[0]
dtyp = numpy.dtype(f[1])
setval(rslt, name, numpy.fromfile(self.F, dtyp, 1)[0])
if expected_len < reclen:
self.F.seek(reclen - expected_len, 1)
reclen2 = numpy.fromfile(self.F, self.record_marker_type, 1)
dbg("Record length tail = %d\n" % reclen2)
if reclen2 != reclen:
raise IOError, \
"Inconsistency in record: end-marker length = %d; was expecting %d" \
% (reclen2, reclen)
return rslt
def bulk_read_array1(self, dtype, shape):
"""Reads data that is regularly stored as an array of Fortran records
(all of the same type and length).
Each record must be 'read' individually and validated if the record lengths
are indeed correct.
But this routine will bulk-read all the records at once, and shape it
into an array with that format.
Warning: because we load all the leading and trailing reclen markers, the array
will be larger than the actual size of the data, and the memory will not be
contiguous.
Use copy_subarray below to create the contiguous representation of the data
(per field name).
"""
from numpy import product, fromfile, all
dtype1 = numpy.dtype([('reclen', self.record_marker_type),
('content', dtype),
('reclen2', self.record_marker_type)])
dtype_itemsize = dtype1['content'].itemsize
size = product(shape) # total number of elements to read in bulk
# reads in *ALL* the records in a linear fashion, in one read stmt
arr = fromfile(self.F, dtype1, size)
if not all(arr['reclen'] == dtype_itemsize) \
or not all(arr['reclen2'] == dtype_itemsize):
raise IOError, \
(("Inconsistency detected in record array: " \
"one or more records do not have the expected record length=%d") \
% (dtype_itemsize,))
# Returns only the content--this WILL NOT be contiguous in memory.
return arr['content'].reshape(shape, order='F')
def write_vals(self, *vals, **opts):
"""Writes a Fortran record.
Only values need to be given, because the types are known.
This is a direct converse of read subroutine."""
if self.debug:
dbg = lambda msg : sys.stderr.write(msg)
else:
dbg = lambda msg : None
vals0 = vals
vals = []
for v in vals0:
if isinstance(v, int):
v2 = self.default_int(v)
if v2 != v:
raise OverflowError, \
"Integer too large to represent by default int: %d" % v
vals.append(v2)
elif isinstance(v, float):
v2 = self.default_float(v)
# FIXME: check for overflow error like in integer conversion above
vals.append(v2)
elif isinstance(v, basestring):
v2 = self.default_str(v)
vals.append(v2)
elif "itemsize" in dir(v):
vals.append(v)
else:
raise NotImplementedError, \
"Unsupported object of type %s of value %s" \
(str(type(v)), str(v))
reclen = numpy.sum([ v.size * v.itemsize for v in vals ], \
dtype=self.record_marker_type)
dbg("Record length = %d\n" % reclen)
dbg("Item count = %d\n" % len(vals))
reclen.tofile(self.F)
for v in vals:
if isinstance(v, numpy.ndarray):
# Always store in "Fortran" format, i.e. column major
# Since tofile() always write in the row major format,
# we will transpose it before writing:
v.T.tofile(self.F)
else:
v.tofile(self.F)
reclen.tofile(self.F)
def write_fields(self, src, *fields, **opts):
if (issubclass(src.__class__, dict) and issubclass(dict, src.__class__)) \
or "__getitem__" in dir(src):
def getval(d, k):
return d[k]
else:
# Assume we can use getattr method:
getval = getattr
vals = []
for f in fields:
if isinstance(f, basestring):
vals.append(getval(src, f))
elif isinstance(f, (list, tuple)):
v = getval(src, f[0])
# FIXME: check datatype and do necessary conversion if f[1] exists
# Exception: if a string spec is found, we will retrofit the string
# to that kind of object. Strings that are too long are silently
# truncated and those that are too short will have whitespaces
# (ASCII 32) appended.
if len(f) > 1:
dtyp = numpy.dtype(f[1])
if dtyp.char == 'S':
strlen = dtyp.itemsize
v = self.default_str("%-*s" % (strlen, v[:strlen]))
# FIXME: check dimensionality if f[2] exists
vals.append(v)
else:
raise ValueError, "Invalid field type: %s" % str(type(f))
self.write_vals(*vals, **opts)
def peek_next_rec_len(self):
"""Fetches the length of the next record, while preserving
the position of the file read pointer.
"""
filepos = self.F.tell()
reclen = numpy.fromfile(self.F, self.record_marker_type, 1)
self.F.seek(filepos)
return reclen[0]
def array_major_dim(arr):
"""Tests whether a numpy array is column or row major.
It will return the following:
-1 : row major
+1 : column major
0 : unknown (e.g. no indication one way or the other)
In the case of inconsistent order, we will raise an exception."""
if len(arr.shape) <= 1:
return 0
elif arr.flags['C_CONTIGUOUS']:
return -1
elif arr.flags['F_CONTIGUOUS']:
return +1
# In case of noncontiguous array, we will have to test it
# based on the strides
else:
Lstrides = numpy.array(arr.shape[:-1])
Rstrides = numpy.array(arr.shape[1:])
if numpy.all(Lstrides >= Rstrides):
# Categorizes equal consecutive strides to "row major" as well
return -1
elif numpy.all(Lstrides <= Rstrides):
return +1
else:
raise RuntimeError, \
"Unable to determine whether this is a row or column major object."
def copy_subarray(arr, key, order='F'):
"""Given a numpy array of structured datatype, copy out a subarray field
into a new array with contiguous format.
The field accessed by arr[key] must be a fixed-size array.
The order argument can be either 'F' or 'C':
- For 'F' ordering, then the subarray index will become the *first* index.
- For 'C' ordering, then the subarray index will become the *last* index.
"""
subarr = arr[key]
dim = len(arr.shape)
subdim = len(subarr.shape) - dim
if order == 'F':
rslt = numpy.transpose(subarr, axes=list(range(dim, subdim+dim) + range(dim)))
elif order == 'C':
rslt = subarr
else:
raise ValueError, 'Invalid order argument'
# Always return a copy!
if numpy.may_share_memory(rslt, arr):
return rslt.copy(order=order)
else:
return rslt