wpylib/iofmt/text_input.py

#!/usr/bin/python
# $Id: text_input.py,v 1.6 2011-09-16 21:21:23 wirawan Exp $
#
# wpylib.iofmt.text_input module
# Quick-n-dirty text input utilities
#
# Wirawan Purwanto
# Created: 20090601
#
# Routines put here are commonly used in my own scripts.
# They are not necessarily suitable for general-purpose uses; evaluate
# your needs and see if they can them as well.
#
# 20090601: Created as pyqmc.utils.text_input .
# 20100927: Moved to wpylib.iofmt.text_input .
#
# TODO
# - book-keep the line number. Also note superfile must have its own line
#   number keeping.
#
"""
Simple text-based input reader.

This module is part of wpylib project.
"""

import re
import numpy

from wpylib.sugar import zip_gen
from wpylib.file.file_utils import open_input_file
from wpylib.py import make_unbound_instance_method
import wpylib.py.im_weakref

def make_match_proc(match):
  """Make matching procedure: simple string becomes regexp,
  regexp remains regexp, and other callable object is passed as is."""
  if isinstance(match, basestring):
    Regexp = re.compile(match)
    match_proc = lambda x: Regexp.search(x)
  elif hasattr(getattr(match, "search", None), "__call__"):
    Regexp = match
    match_proc = lambda x: Regexp.search(x)
  else:
    match_proc = match
  return match_proc


class text_input(object):
  '''Text input reader with support for UNIX-style comment marker (#) and
  standard field separation (tabs and whitespaces).
  Used for quick and dirty data reading (iterating only once in forward
  direction without the need of rewinding or skipping).
  This object can be treated like an input file, e.g. used as an iterator,
  etc.

  To support more fancy options (e.g., rewinding), use "superize=1" when
  creating the instance.

  Other valid constructor flags:
  - expand_errorbar (default: False)
  - comment_char (default: "#")
  - skip_blank_lines (default: True)
  '''

  def __init__(self, fname, **opts):
    if opts.get("superize", 0):
      open_opts = { "superize" : opts["superize"] }
      del opts["superize"]
    else:
      open_opts = {}
    self.file = open_input_file(fname, **open_opts)
    # Do NOT touch the "next_" field below unless you know what you're doing:
    self.set_next_proc(self.next_line)
    # field_filtering_proc field can be used to filter unwanted fields, or do
    # some additional transformations before final feed to the main iteration.
    self.field_filtering_proc = lambda flds : flds
    # Default fancy options:
    self.skip_blank_lines = True
    if len(opts) > 0:
      self.set_options(**opts)
    self.lineno = 0

  def __del__(self):
    if getattr(self, "file", None):
      self.file.close()

  def close(self):
    if getattr(self, "file", None):
      self.file.close()
      del self.file

  def __iter__(self):
    return self

  """
  def next(self):
    while True:
      L = self.file.next()
      F = self.field_filtering_proc(L.split("#")[0].split())
      if len(F) > 0:
        return F
  """

  def next_rec(self):
    '''Yields the next record, which is already separated into fields.'''
    comment_char = getattr(self, "comment_char", "#")
    while True:
      self.lineno += 1
      L = self.file.next()
      F = self.field_filtering_proc(L.split(comment_char)[0].split())
      if len(F) > 0 or not self.skip_blank_lines:
        return F

  def next_line(self):
    '''Yields the next line, which is already separated into fields.'''
    comment_char = getattr(self, "comment_char", "#")
    while True:
      self.lineno += 1
      L = self.file.next()
      F = self.field_filtering_proc(L.split(comment_char)[0].rstrip())
      if len(F) > 0 or not self.skip_blank_lines:
        return F

  def set_next_proc(self, proc):
    self.next_ = make_unbound_instance_method(proc)
  def next(self):
    return self.next_(self)

  def seek_text(self, regex=None, match=None):
    '''Seeks the file until a particular piece text is encountered.
    We ignore all comments.
    The `regex' argument can be either a regex string or a standard python
    regular expression object.'''

    if regex:
      if isinstance(regex, basestring):
        Regexp = re.compile(regex)
      else:
        Regexp = regex
      match_proc = lambda x: Regexp.search(x)
    else:
      match_proc = match

    while True:
      L = self.next_line()
      if match_proc(L):
        return L


  def read_floats(self, *cols, **kwd):
    """Quickly reads a set of floats from a text file.
    Returns a numpy array of the values in double precision.

    Example usage:
      >>> arr = text_input("/tmp/file.txt").read_floats(0, 2, 3)
    to read columns 1, 3, and 4 of the text file /tmp/file.txt, while disregarding
    comments.
    """
    # float_fields extracts the desired columns and converts them to floats
    float_fields = lambda vals : [ float(vals[col]) for col in cols ]
    if "maxcount" in kwd:
      rslt = [ float_fields(vals.split()) for (c,vals) in zip(xrange(kwd['maxcount']),self) ]
    else:
      rslt = [ float_fields(vals.split()) for vals in self ]
    # finally convert them to a numpy ndarray:
    return numpy.array(rslt)

  def read_items(self, *col_desc, **kwd):
    """Quickly reads a set of items from records of whitespace-separated fields
    in a text file.
    Returns a structured numpy array of the values read.

    Example usage:

      >>> arr = text_input("/tmp/file.txt").read_items(0, (2, int), (3, "S10", "Atom"))

    reads columns 1 (as floats, by default), 3 (as integers), and 4 (as strings of
    max length of 10, which field is named "Atom") from the text file /tmp/file.txt,
    while disregarding comments.

    If the tuple contains the third field, it is used as the name of the field;
    otherwise the fields are named f0, f1, f2, ....

    Preliminary ability to read in complex data has been added!
    Complex data (floating-point only) must be specified as a tuple of two columns
    containing the real and imaginary data, like this:
       ((2, 3), complex, 'ampl')
    or
       ((7, 9), complex)     # fine to interleave column with something else


    Additional keyword options:
    * deftype: default datatype
    * maxcount: maximum number of records to be read
    * end_line_match: a regular expression or test subroutine accepting a
      single argument (i.e. the text line) marking the end boundary of the list
      to be read (i.e. one line past the list contents)
    * last_line_match: a regular expression or test subroutine accepting a
      single argument (i.e. the text line) marking the last element of the list
      to be read

    """
    deftype = kwd.get("deftype", float)

    class register_item_t:
      flds = []
      cols = []
      complex_types = (complex, numpy.complexfloating)
      def add(self, col, fldname, type):
        dtype = numpy.dtype(type)
        t = dtype.type
        dsamp = t() # create a sample
        # Special handling for complex:
        # -- unfortunately this detection fails because even real
        # numbers have its 'imag' attribute:
        #dattrs = dir(dsamp)
        #if "imag" in dattrs and "real" in dattrs:
        if isinstance(dsamp, numpy.complexfloating):
          dtype_elem = dsamp.real.dtype
          t_elem = dtype_elem.type
          conv_func = lambda v, c: t(t_elem(v[c[0]]) + 1j*t_elem(v[c[1]]))
          self.cols.append((conv_func, col))
          self.flds.append((fldname, dtype))
        else:
          # other datatypes: much easier
          # Simply get the string, and use numpy to convert to the datatype
          # on-the-fly
          conv_func = lambda v, c: t(v[c])
          self.cols.append((conv_func, col))
          self.flds.append((fldname, dtype))
    reg = register_item_t()

    for (i,c) in zip(xrange(len(col_desc)), col_desc):
      if type(c) == int:
        reg.add(c, 'f' + str(i), deftype)
      elif len(c) == 1:
        reg.add(c[0], 'f' + str(i), deftype)
      elif len(c) == 2:
        reg.add(c[0], 'f' + str(i), c[1])
      elif len(c) == 3:
        reg.add(c[0], c[2], c[1])
      else:
        raise ValueError, \
          "Invalid column specification: %s" % (c,)

    cols = reg.cols
    flds = reg.flds
    get_fields = lambda vals : tuple([ filt(vals,col) for (filt,col) in cols ])

    if "maxcount" in kwd:
      src_iter = zip_gen(xrange(kwd['maxcount']),self)
    else:
      src_iter = enumerate(self)
    # FIXME below: zip() evaluates the function before the loop, thus may
    # eat a lot of memory.
    if 'end_line_match' in kwd:
      rslt = []
      match = make_match_proc(kwd['end_line_match'])
      for (c,vals) in src_iter:
        if match(vals):
          break
        rslt.append(get_fields(vals.split()))
    elif 'last_line_match' in kwd:
      rslt = []
      match = make_match_proc(kwd['end_line_match'])
      for (c,vals) in src_iter:
        rslt.append(get_fields(vals.split()))
        if match(vals):
          break
    elif "maxcount" in kwd:
      #print "hello"
      rslt = [ get_fields(vals.split()) for (c,vals) in zip(xrange(kwd['maxcount']),self) ]
    else:
      rslt = [ get_fields(vals.split()) for vals in self ]
    #print rslt
    # finally convert them to a numpy ndarray:
    return numpy.array(rslt, dtype=flds)

  # Sets fancy options
  def set_options(self, **opts):
    for (o,v) in opts.iteritems():
      if o == "expand_errorbar":
        self.expand_errorbar(v)
      elif o == "skip_blank_lines":
        self.skip_blank_lines = v
      elif o == "comment_char":
        self.comment_char = v
      else:
        raise ValueError, "Invalid option: %s" % (o,)
    return self

  # Option for errorbar expansion:
  def expand_errorbar(self, v=True):
    '''Enables or disables errorbar expansion.'''
    from wpylib.py.im_weakref import im_ref
    if v:
      self.opt_expand_errorbar = True
      self.field_filtering_proc = im_ref(self.expand_errorbar_hook)
    else:
      self.opt_expand_errorbar = False
      self.field_filtering_proc = lambda flds : flds
    return self

  def expand_errorbar_hook(self, F):
    # A hook for field_filtering_proc for expanding errorbars:
    from pyqmc.stats.errorbar import expand
    return expand(F, flatten=True)


# Various sundry tools

def head(filename, maxlines):
  """Emulates UNIX head(1) command by reading at most `maxlines`
  text lines.
  It is intended for plain text files only!
  It also supports compressed files through text_input() facility.
  """
  # head is easy to implement. But how about tail?
  F = text_input(filename, skip_blank_lines=False, comment_char='\0')
  out = []
  try:
    for x in xrange(maxlines):
      out.append(F.next())
  except StopIteration:
    pass
  return out


def tail(filename, maxlines):
  """Emulates UNIX tail(1) command by reading at most `maxlines`
  text lines at the end of a text file.
  It is intended for plain text files only!
  It also supports compressed files through text_input() facility.

  Warning: this algorithm is far less optimal than head() since it
  has to read the whole file.
  It's okay for moderately small files.
  """
  F = text_input(filename, skip_blank_lines=False, comment_char='\0')
  out = []
  lines2read = max(2*maxlines, 100)
  try:
    while True:
      for x in xrange(lines2read):
        out.append(F.next())
      out = out[-maxlines:]
  except StopIteration:
    pass
  return out[-maxlines:]
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`#!/usr/bin/python`
* Allow comment character to be changed from "#" using the instance.comment_char attribute. 14 years ago			`# $Id: text_input.py,v 1.6 2011-09-16 21:21:23 wirawan Exp $`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`#`
			`# wpylib.iofmt.text_input module`
			`# Quick-n-dirty text input utilities`
			`#`
			`# Wirawan Purwanto`
			`# Created: 20090601`
			`#`
			`# Routines put here are commonly used in my own scripts.`
			`# They are not necessarily suitable for general-purpose uses; evaluate`
			`# your needs and see if they can them as well.`
			`#`
			`# 20090601: Created as pyqmc.utils.text_input .`
			`# 20100927: Moved to wpylib.iofmt.text_input .`
			`#`
			`# TODO`
			`# - book-keep the line number. Also note superfile must have its own line`
			`# number keeping.`
			`#`
			`"""`
			`Simple text-based input reader.`

			`This module is part of wpylib project.`
			`"""`

			`import re`
			`import numpy`

* text_input.read_items(): added option `end_line_match' and `last_line_match' (mutually exclusive options) to allow the dataset reading to end upon encountering certain text pattern (or a more complicated match, if we specify a function for the option value). These options can be used to work with the `maxcount' option; the shortest of the two (maxcount records read first, or end/last_line_match finds a match) will end the reading of the dataset. 13 years ago			`from wpylib.sugar import zip_gen`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`from wpylib.file.file_utils import open_input_file`
* Added text_input.close() method. 14 years ago			`from wpylib.py import make_unbound_instance_method`
* Added comment_char option for text_input object creation. * Memory leak bugfix: Use instance method weakref. 13 years ago			`import wpylib.py.im_weakref`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago
* text_input.read_items(): added option `end_line_match' and `last_line_match' (mutually exclusive options) to allow the dataset reading to end upon encountering certain text pattern (or a more complicated match, if we specify a function for the option value). These options can be used to work with the `maxcount' option; the shortest of the two (maxcount records read first, or end/last_line_match finds a match) will end the reading of the dataset. 13 years ago			`def make_match_proc(match):`
			`"""Make matching procedure: simple string becomes regexp,`
			`regexp remains regexp, and other callable object is passed as is."""`
			`if isinstance(match, basestring):`
			`Regexp = re.compile(match)`
			`match_proc = lambda x: Regexp.search(x)`
			`elif hasattr(getattr(match, "search", None), "__call__"):`
			`Regexp = match`
			`match_proc = lambda x: Regexp.search(x)`
			`else:`
			`match_proc = match`
			`return match_proc`


* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`class text_input(object):`
			`'''Text input reader with support for UNIX-style comment marker (#) and`
			`standard field separation (tabs and whitespaces).`
			`Used for quick and dirty data reading (iterating only once in forward`
			`direction without the need of rewinding or skipping).`
			`This object can be treated like an input file, e.g. used as an iterator,`
			`etc.`

			`To support more fancy options (e.g., rewinding), use "superize=1" when`
* In module text_input: Added head() and tail() to emulate the basic behavior of UNIX commands with the same name. Output lines are already split into a list of strings. 13 years ago			`creating the instance.`

			`Other valid constructor flags:`
			`- expand_errorbar (default: False)`
			`- comment_char (default: "#")`
			`- skip_blank_lines (default: True)`
			`'''`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago
			`def __init__(self, fname, **opts):`
			`if opts.get("superize", 0):`
			`open_opts = { "superize" : opts["superize"] }`
			`del opts["superize"]`
			`else:`
			`open_opts = {}`
			`self.file = open_input_file(fname, **open_opts)`
* Added set_next_proc() method to set which procedure should be called when "next()" method dispatcher is called. This approach should get rid of the problem with circular reference which causes the text_input instance not be erasable because of strong reference to `self' in `self.next.im_self'. 14 years ago			`# Do NOT touch the "next_" field below unless you know what you're doing:`
			`self.set_next_proc(self.next_line)`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`# field_filtering_proc field can be used to filter unwanted fields, or do`
			`# some additional transformations before final feed to the main iteration.`
			`self.field_filtering_proc = lambda flds : flds`
			`# Default fancy options:`
			`self.skip_blank_lines = True`
			`if len(opts) > 0:`
			`self.set_options(**opts)`
* Introducing self.lineno counter for line numbers (including the blanks and comment lines). 14 years ago			`self.lineno = 0`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago
			`def __del__(self):`
			`if getattr(self, "file", None):`
			`self.file.close()`

* Added text_input.close() method. 14 years ago			`def close(self):`
			`if getattr(self, "file", None):`
			`self.file.close()`
			`del self.file`

* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`def __iter__(self):`
			`return self`

			`"""`
			`def next(self):`
			`while True:`
			`L = self.file.next()`
			`F = self.field_filtering_proc(L.split("#")[0].split())`
			`if len(F) > 0:`
			`return F`
			`"""`

			`def next_rec(self):`
			`'''Yields the next record, which is already separated into fields.'''`
* Allow comment character to be changed from "#" using the instance.comment_char attribute. 14 years ago			`comment_char = getattr(self, "comment_char", "#")`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`while True:`
* Introducing self.lineno counter for line numbers (including the blanks and comment lines). 14 years ago			`self.lineno += 1`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`L = self.file.next()`
* Allow comment character to be changed from "#" using the instance.comment_char attribute. 14 years ago			`F = self.field_filtering_proc(L.split(comment_char)[0].split())`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`if len(F) > 0 or not self.skip_blank_lines:`
			`return F`

			`def next_line(self):`
			`'''Yields the next line, which is already separated into fields.'''`
* Allow comment character to be changed from "#" using the instance.comment_char attribute. 14 years ago			`comment_char = getattr(self, "comment_char", "#")`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`while True:`
* Introducing self.lineno counter for line numbers (including the blanks and comment lines). 14 years ago			`self.lineno += 1`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`L = self.file.next()`
* Allow comment character to be changed from "#" using the instance.comment_char attribute. 14 years ago			`F = self.field_filtering_proc(L.split(comment_char)[0].rstrip())`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`if len(F) > 0 or not self.skip_blank_lines:`
			`return F`

* Added set_next_proc() method to set which procedure should be called when "next()" method dispatcher is called. This approach should get rid of the problem with circular reference which causes the text_input instance not be erasable because of strong reference to `self' in `self.next.im_self'. 14 years ago			`def set_next_proc(self, proc):`
			`self.next_ = make_unbound_instance_method(proc)`
			`def next(self):`
			`return self.next_(self)`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago
			`def seek_text(self, regex=None, match=None):`
			`'''Seeks the file until a particular piece text is encountered.`
			`We ignore all comments.`
			The `regex' argument can be either a regex string or a standard python
			`regular expression object.'''`

			`if regex:`
* Changing all `isinstance(STUFF, str)' to `isinstance(STUFF, basestring)' for future-proofing this code. 13 years ago			`if isinstance(regex, basestring):`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`Regexp = re.compile(regex)`
			`else:`
			`Regexp = regex`
			`match_proc = lambda x: Regexp.search(x)`
			`else:`
			`match_proc = match`

			`while True:`
			`L = self.next_line()`
			`if match_proc(L):`
			`return L`


			`def read_floats(self, cols, *kwd):`
			`"""Quickly reads a set of floats from a text file.`
			`Returns a numpy array of the values in double precision.`

			`Example usage:`
			`>>> arr = text_input("/tmp/file.txt").read_floats(0, 2, 3)`
			`to read columns 1, 3, and 4 of the text file /tmp/file.txt, while disregarding`
			`comments.`
			`"""`
			`# float_fields extracts the desired columns and converts them to floats`
			`float_fields = lambda vals : [ float(vals[col]) for col in cols ]`
			`if "maxcount" in kwd:`
			`rslt = [ float_fields(vals.split()) for (c,vals) in zip(xrange(kwd['maxcount']),self) ]`
			`else:`
			`rslt = [ float_fields(vals.split()) for vals in self ]`
			`# finally convert them to a numpy ndarray:`
			`return numpy.array(rslt)`

			`def read_items(self, col_desc, *kwd):`
			`"""Quickly reads a set of items from records of whitespace-separated fields`
			`in a text file.`
			`Returns a structured numpy array of the values read.`

			`Example usage:`

			`>>> arr = text_input("/tmp/file.txt").read_items(0, (2, int), (3, "S10", "Atom"))`

			`reads columns 1 (as floats, by default), 3 (as integers), and 4 (as strings of`
			`max length of 10, which field is named "Atom") from the text file /tmp/file.txt,`
			`while disregarding comments.`

			`If the tuple contains the third field, it is used as the name of the field;`
			`otherwise the fields are named f0, f1, f2, ....`

* text_input.read_items(): added option `end_line_match' and `last_line_match' (mutually exclusive options) to allow the dataset reading to end upon encountering certain text pattern (or a more complicated match, if we specify a function for the option value). These options can be used to work with the `maxcount' option; the shortest of the two (maxcount records read first, or end/last_line_match finds a match) will end the reading of the dataset. 13 years ago			`Preliminary ability to read in complex data has been added!`
* Upgrade to allow reading complex-valued data directly. Please see the documentation of text_input.read_items for more detail. 15 years ago			`Complex data (floating-point only) must be specified as a tuple of two columns`
			`containing the real and imaginary data, like this:`
			`((2, 3), complex, 'ampl')`
			`or`
			`((7, 9), complex) # fine to interleave column with something else`


* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`Additional keyword options:`
			`* deftype: default datatype`
			`* maxcount: maximum number of records to be read`
* text_input.read_items(): added option `end_line_match' and `last_line_match' (mutually exclusive options) to allow the dataset reading to end upon encountering certain text pattern (or a more complicated match, if we specify a function for the option value). These options can be used to work with the `maxcount' option; the shortest of the two (maxcount records read first, or end/last_line_match finds a match) will end the reading of the dataset. 13 years ago			`* end_line_match: a regular expression or test subroutine accepting a`
			`single argument (i.e. the text line) marking the end boundary of the list`
			`to be read (i.e. one line past the list contents)`
			`* last_line_match: a regular expression or test subroutine accepting a`
			`single argument (i.e. the text line) marking the last element of the list`
			`to be read`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago
			`"""`
			`deftype = kwd.get("deftype", float)`

* Upgrade to allow reading complex-valued data directly. Please see the documentation of text_input.read_items for more detail. 15 years ago			`class register_item_t:`
			`flds = []`
			`cols = []`
			`complex_types = (complex, numpy.complexfloating)`
			`def add(self, col, fldname, type):`
			`dtype = numpy.dtype(type)`
			`t = dtype.type`
			`dsamp = t() # create a sample`
			`# Special handling for complex:`
			`# -- unfortunately this detection fails because even real`
			`# numbers have its 'imag' attribute:`
			`#dattrs = dir(dsamp)`
			`#if "imag" in dattrs and "real" in dattrs:`
			`if isinstance(dsamp, numpy.complexfloating):`
			`dtype_elem = dsamp.real.dtype`
			`t_elem = dtype_elem.type`
			`conv_func = lambda v, c: t(t_elem(v[c[0]]) + 1j*t_elem(v[c[1]]))`
			`self.cols.append((conv_func, col))`
			`self.flds.append((fldname, dtype))`
			`else:`
			`# other datatypes: much easier`
			`# Simply get the string, and use numpy to convert to the datatype`
			`# on-the-fly`
			`conv_func = lambda v, c: t(v[c])`
			`self.cols.append((conv_func, col))`
			`self.flds.append((fldname, dtype))`
			`reg = register_item_t()`

* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`for (i,c) in zip(xrange(len(col_desc)), col_desc):`
			`if type(c) == int:`
* Upgrade to allow reading complex-valued data directly. Please see the documentation of text_input.read_items for more detail. 15 years ago			`reg.add(c, 'f' + str(i), deftype)`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`elif len(c) == 1:`
* Upgrade to allow reading complex-valued data directly. Please see the documentation of text_input.read_items for more detail. 15 years ago			`reg.add(c[0], 'f' + str(i), deftype)`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`elif len(c) == 2:`
* Upgrade to allow reading complex-valued data directly. Please see the documentation of text_input.read_items for more detail. 15 years ago			`reg.add(c[0], 'f' + str(i), c[1])`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`elif len(c) == 3:`
* Upgrade to allow reading complex-valued data directly. Please see the documentation of text_input.read_items for more detail. 15 years ago			`reg.add(c[0], c[2], c[1])`
			`else:`
			`raise ValueError, \`
			`"Invalid column specification: %s" % (c,)`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago
* Upgrade to allow reading complex-valued data directly. Please see the documentation of text_input.read_items for more detail. 15 years ago			`cols = reg.cols`
			`flds = reg.flds`
			`get_fields = lambda vals : tuple([ filt(vals,col) for (filt,col) in cols ])`
* text_input.read_items(): added option `end_line_match' and `last_line_match' (mutually exclusive options) to allow the dataset reading to end upon encountering certain text pattern (or a more complicated match, if we specify a function for the option value). These options can be used to work with the `maxcount' option; the shortest of the two (maxcount records read first, or end/last_line_match finds a match) will end the reading of the dataset. 13 years ago
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`if "maxcount" in kwd:`
* text_input.read_items(): added option `end_line_match' and `last_line_match' (mutually exclusive options) to allow the dataset reading to end upon encountering certain text pattern (or a more complicated match, if we specify a function for the option value). These options can be used to work with the `maxcount' option; the shortest of the two (maxcount records read first, or end/last_line_match finds a match) will end the reading of the dataset. 13 years ago			`src_iter = zip_gen(xrange(kwd['maxcount']),self)`
			`else:`
			`src_iter = enumerate(self)`
			`# FIXME below: zip() evaluates the function before the loop, thus may`
			`# eat a lot of memory.`
			`if 'end_line_match' in kwd:`
			`rslt = []`
			`match = make_match_proc(kwd['end_line_match'])`
			`for (c,vals) in src_iter:`
			`if match(vals):`
			`break`
			`rslt.append(get_fields(vals.split()))`
			`elif 'last_line_match' in kwd:`
			`rslt = []`
			`match = make_match_proc(kwd['end_line_match'])`
			`for (c,vals) in src_iter:`
			`rslt.append(get_fields(vals.split()))`
			`if match(vals):`
			`break`
			`elif "maxcount" in kwd:`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`#print "hello"`
			`rslt = [ get_fields(vals.split()) for (c,vals) in zip(xrange(kwd['maxcount']),self) ]`
			`else:`
			`rslt = [ get_fields(vals.split()) for vals in self ]`
			`#print rslt`
			`# finally convert them to a numpy ndarray:`
			`return numpy.array(rslt, dtype=flds)`

			`# Sets fancy options`
			`def set_options(self, **opts):`
			`for (o,v) in opts.iteritems():`
			`if o == "expand_errorbar":`
			`self.expand_errorbar(v)`
* Added comment_char option for text_input object creation. * Memory leak bugfix: Use instance method weakref. 13 years ago			`elif o == "skip_blank_lines":`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`self.skip_blank_lines = v`
* Added comment_char option for text_input object creation. * Memory leak bugfix: Use instance method weakref. 13 years ago			`elif o == "comment_char":`
			`self.comment_char = v`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`else:`
* Added comment_char option for text_input object creation. * Memory leak bugfix: Use instance method weakref. 13 years ago			`raise ValueError, "Invalid option: %s" % (o,)`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`return self`

			`# Option for errorbar expansion:`
			`def expand_errorbar(self, v=True):`
			`'''Enables or disables errorbar expansion.'''`
* Added comment_char option for text_input object creation. * Memory leak bugfix: Use instance method weakref. 13 years ago			`from wpylib.py.im_weakref import im_ref`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`if v:`
			`self.opt_expand_errorbar = True`
* Added comment_char option for text_input object creation. * Memory leak bugfix: Use instance method weakref. 13 years ago			`self.field_filtering_proc = im_ref(self.expand_errorbar_hook)`
* Simple text input reader. Imported from pyqmc.utils.text_input . 15 years ago			`else:`
			`self.opt_expand_errorbar = False`
			`self.field_filtering_proc = lambda flds : flds`
			`return self`

			`def expand_errorbar_hook(self, F):`
			`# A hook for field_filtering_proc for expanding errorbars:`
			`from pyqmc.stats.errorbar import expand`
			`return expand(F, flatten=True)`
* In module text_input: Added head() and tail() to emulate the basic behavior of UNIX commands with the same name. Output lines are already split into a list of strings. 13 years ago

			`# Various sundry tools`

			`def head(filename, maxlines):`
			"""Emulates UNIX head(1) command by reading at most `maxlines`
			`text lines.`
			`It is intended for plain text files only!`
			`It also supports compressed files through text_input() facility.`
			`"""`
			`# head is easy to implement. But how about tail?`
			`F = text_input(filename, skip_blank_lines=False, comment_char='\0')`
			`out = []`
			`try:`
			`for x in xrange(maxlines):`
			`out.append(F.next())`
			`except StopIteration:`
			`pass`
			`return out`


			`def tail(filename, maxlines):`
			"""Emulates UNIX tail(1) command by reading at most `maxlines`
			`text lines at the end of a text file.`
			`It is intended for plain text files only!`
			`It also supports compressed files through text_input() facility.`

			`Warning: this algorithm is far less optimal than head() since it`
			`has to read the whole file.`
			`It's okay for moderately small files.`
			`"""`
			`F = text_input(filename, skip_blank_lines=False, comment_char='\0')`
			`out = []`
			`lines2read = max(2*maxlines, 100)`
			`try:`
			`while True:`
			`for x in xrange(lines2read):`
			`out.append(F.next())`
			`out = out[-maxlines:]`
			`except StopIteration:`
			`pass`
			`return out[-maxlines:]`