symbian-qemu-0.9.1-12/python-win32-2.6.1/lib/csv.py
author andy simpson <andrews@symbian.org>
Fri, 08 Oct 2010 16:52:59 +0100
changeset 119 931f2bbaabf7
parent 1 2fb8b9db1c86
permissions -rw-r--r--
work-a-round for Bug 3645 change ARGB->XRGB pixel format
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     2
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     3
csv.py - read/write/investigate CSV files
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     4
"""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     5
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     6
import re
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     7
from functools import reduce
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     8
from _csv import Error, __version__, writer, reader, register_dialect, \
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
     9
                 unregister_dialect, get_dialect, list_dialects, \
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    10
                 field_size_limit, \
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    11
                 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    12
                 __doc__
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    13
from _csv import Dialect as _Dialect
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    14
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    15
try:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    16
    from cStringIO import StringIO
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    17
except ImportError:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    18
    from StringIO import StringIO
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    19
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    20
__all__ = [ "QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    21
            "Error", "Dialect", "__doc__", "excel", "excel_tab",
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    22
            "field_size_limit", "reader", "writer",
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    23
            "register_dialect", "get_dialect", "list_dialects", "Sniffer",
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    24
            "unregister_dialect", "__version__", "DictReader", "DictWriter" ]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    25
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    26
class Dialect:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    27
    """Describe an Excel dialect.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    28
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    29
    This must be subclassed (see csv.excel).  Valid attributes are:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    30
    delimiter, quotechar, escapechar, doublequote, skipinitialspace,
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    31
    lineterminator, quoting.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    32
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    33
    """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    34
    _name = ""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    35
    _valid = False
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    36
    # placeholders
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    37
    delimiter = None
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    38
    quotechar = None
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    39
    escapechar = None
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    40
    doublequote = None
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    41
    skipinitialspace = None
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    42
    lineterminator = None
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    43
    quoting = None
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    44
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    45
    def __init__(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    46
        if self.__class__ != Dialect:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    47
            self._valid = True
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    48
        self._validate()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    49
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    50
    def _validate(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    51
        try:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    52
            _Dialect(self)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    53
        except TypeError, e:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    54
            # We do this for compatibility with py2.3
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    55
            raise Error(str(e))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    56
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    57
class excel(Dialect):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    58
    """Describe the usual properties of Excel-generated CSV files."""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    59
    delimiter = ','
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    60
    quotechar = '"'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    61
    doublequote = True
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    62
    skipinitialspace = False
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    63
    lineterminator = '\r\n'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    64
    quoting = QUOTE_MINIMAL
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    65
register_dialect("excel", excel)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    66
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    67
class excel_tab(excel):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    68
    """Describe the usual properties of Excel-generated TAB-delimited files."""
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    69
    delimiter = '\t'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    70
register_dialect("excel-tab", excel_tab)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    71
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    72
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    73
class DictReader:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    74
    def __init__(self, f, fieldnames=None, restkey=None, restval=None,
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    75
                 dialect="excel", *args, **kwds):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    76
        self._fieldnames = fieldnames   # list of keys for the dict
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    77
        self.restkey = restkey          # key to catch long rows
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    78
        self.restval = restval          # default value for short rows
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    79
        self.reader = reader(f, dialect, *args, **kwds)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    80
        self.dialect = dialect
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    81
        self.line_num = 0
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    82
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    83
    def __iter__(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    84
        return self
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    85
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    86
    @property
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    87
    def fieldnames(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    88
        if self._fieldnames is None:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    89
            try:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    90
                self._fieldnames = self.reader.next()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    91
            except StopIteration:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    92
                pass
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    93
        self.line_num = self.reader.line_num
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    94
        return self._fieldnames
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    95
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    96
    @fieldnames.setter
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    97
    def fieldnames(self, value):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    98
        self._fieldnames = value
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
    99
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   100
    def next(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   101
        if self.line_num == 0:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   102
            # Used only for its side effect.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   103
            self.fieldnames
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   104
        row = self.reader.next()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   105
        self.line_num = self.reader.line_num
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   106
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   107
        # unlike the basic reader, we prefer not to return blanks,
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   108
        # because we will typically wind up with a dict full of None
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   109
        # values
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   110
        while row == []:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   111
            row = self.reader.next()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   112
        d = dict(zip(self.fieldnames, row))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   113
        lf = len(self.fieldnames)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   114
        lr = len(row)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   115
        if lf < lr:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   116
            d[self.restkey] = row[lf:]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   117
        elif lf > lr:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   118
            for key in self.fieldnames[lr:]:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   119
                d[key] = self.restval
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   120
        return d
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   121
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   122
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   123
class DictWriter:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   124
    def __init__(self, f, fieldnames, restval="", extrasaction="raise",
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   125
                 dialect="excel", *args, **kwds):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   126
        self.fieldnames = fieldnames    # list of keys for the dict
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   127
        self.restval = restval          # for writing short dicts
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   128
        if extrasaction.lower() not in ("raise", "ignore"):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   129
            raise ValueError, \
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   130
                  ("extrasaction (%s) must be 'raise' or 'ignore'" %
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   131
                   extrasaction)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   132
        self.extrasaction = extrasaction
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   133
        self.writer = writer(f, dialect, *args, **kwds)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   134
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   135
    def _dict_to_list(self, rowdict):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   136
        if self.extrasaction == "raise":
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   137
            wrong_fields = [k for k in rowdict if k not in self.fieldnames]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   138
            if wrong_fields:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   139
                raise ValueError("dict contains fields not in fieldnames: " +
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   140
                                 ", ".join(wrong_fields))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   141
        return [rowdict.get(key, self.restval) for key in self.fieldnames]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   142
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   143
    def writerow(self, rowdict):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   144
        return self.writer.writerow(self._dict_to_list(rowdict))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   145
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   146
    def writerows(self, rowdicts):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   147
        rows = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   148
        for rowdict in rowdicts:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   149
            rows.append(self._dict_to_list(rowdict))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   150
        return self.writer.writerows(rows)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   151
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   152
# Guard Sniffer's type checking against builds that exclude complex()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   153
try:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   154
    complex
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   155
except NameError:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   156
    complex = float
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   157
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   158
class Sniffer:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   159
    '''
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   160
    "Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   161
    Returns a Dialect object.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   162
    '''
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   163
    def __init__(self):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   164
        # in case there is more than one possible delimiter
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   165
        self.preferred = [',', '\t', ';', ' ', ':']
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   166
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   167
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   168
    def sniff(self, sample, delimiters=None):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   169
        """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   170
        Returns a dialect (or None) corresponding to the sample
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   171
        """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   172
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   173
        quotechar, delimiter, skipinitialspace = \
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   174
                   self._guess_quote_and_delimiter(sample, delimiters)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   175
        if not delimiter:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   176
            delimiter, skipinitialspace = self._guess_delimiter(sample,
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   177
                                                                delimiters)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   178
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   179
        if not delimiter:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   180
            raise Error, "Could not determine delimiter"
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   181
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   182
        class dialect(Dialect):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   183
            _name = "sniffed"
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   184
            lineterminator = '\r\n'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   185
            quoting = QUOTE_MINIMAL
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   186
            # escapechar = ''
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   187
            doublequote = False
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   188
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   189
        dialect.delimiter = delimiter
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   190
        # _csv.reader won't accept a quotechar of ''
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   191
        dialect.quotechar = quotechar or '"'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   192
        dialect.skipinitialspace = skipinitialspace
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   193
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   194
        return dialect
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   195
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   196
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   197
    def _guess_quote_and_delimiter(self, data, delimiters):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   198
        """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   199
        Looks for text enclosed between two identical quotes
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   200
        (the probable quotechar) which are preceded and followed
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   201
        by the same character (the probable delimiter).
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   202
        For example:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   203
                         ,'some text',
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   204
        The quote with the most wins, same with the delimiter.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   205
        If there is no quotechar the delimiter can't be determined
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   206
        this way.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   207
        """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   208
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   209
        matches = []
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   210
        for restr in ('(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   211
                      '(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)',   #  ".*?",
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   212
                      '(?P<delim>>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)',  # ,".*?"
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   213
                      '(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'):                            #  ".*?" (no delim, no space)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   214
            regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   215
            matches = regexp.findall(data)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   216
            if matches:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   217
                break
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   218
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   219
        if not matches:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   220
            return ('', None, 0) # (quotechar, delimiter, skipinitialspace)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   221
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   222
        quotes = {}
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   223
        delims = {}
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   224
        spaces = 0
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   225
        for m in matches:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   226
            n = regexp.groupindex['quote'] - 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   227
            key = m[n]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   228
            if key:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   229
                quotes[key] = quotes.get(key, 0) + 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   230
            try:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   231
                n = regexp.groupindex['delim'] - 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   232
                key = m[n]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   233
            except KeyError:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   234
                continue
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   235
            if key and (delimiters is None or key in delimiters):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   236
                delims[key] = delims.get(key, 0) + 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   237
            try:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   238
                n = regexp.groupindex['space'] - 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   239
            except KeyError:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   240
                continue
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   241
            if m[n]:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   242
                spaces += 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   243
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   244
        quotechar = reduce(lambda a, b, quotes = quotes:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   245
                           (quotes[a] > quotes[b]) and a or b, quotes.keys())
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   246
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   247
        if delims:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   248
            delim = reduce(lambda a, b, delims = delims:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   249
                           (delims[a] > delims[b]) and a or b, delims.keys())
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   250
            skipinitialspace = delims[delim] == spaces
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   251
            if delim == '\n': # most likely a file with a single column
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   252
                delim = ''
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   253
        else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   254
            # there is *no* delimiter, it's a single column of quoted data
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   255
            delim = ''
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   256
            skipinitialspace = 0
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   257
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   258
        return (quotechar, delim, skipinitialspace)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   259
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   260
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   261
    def _guess_delimiter(self, data, delimiters):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   262
        """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   263
        The delimiter /should/ occur the same number of times on
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   264
        each row. However, due to malformed data, it may not. We don't want
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   265
        an all or nothing approach, so we allow for small variations in this
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   266
        number.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   267
          1) build a table of the frequency of each character on every line.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   268
          2) build a table of freqencies of this frequency (meta-frequency?),
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   269
             e.g.  'x occurred 5 times in 10 rows, 6 times in 1000 rows,
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   270
             7 times in 2 rows'
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   271
          3) use the mode of the meta-frequency to determine the /expected/
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   272
             frequency for that character
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   273
          4) find out how often the character actually meets that goal
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   274
          5) the character that best meets its goal is the delimiter
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   275
        For performance reasons, the data is evaluated in chunks, so it can
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   276
        try and evaluate the smallest portion of the data possible, evaluating
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   277
        additional chunks as necessary.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   278
        """
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   279
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   280
        data = filter(None, data.split('\n'))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   281
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   282
        ascii = [chr(c) for c in range(127)] # 7-bit ASCII
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   283
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   284
        # build frequency tables
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   285
        chunkLength = min(10, len(data))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   286
        iteration = 0
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   287
        charFrequency = {}
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   288
        modes = {}
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   289
        delims = {}
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   290
        start, end = 0, min(chunkLength, len(data))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   291
        while start < len(data):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   292
            iteration += 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   293
            for line in data[start:end]:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   294
                for char in ascii:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   295
                    metaFrequency = charFrequency.get(char, {})
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   296
                    # must count even if frequency is 0
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   297
                    freq = line.count(char)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   298
                    # value is the mode
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   299
                    metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   300
                    charFrequency[char] = metaFrequency
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   301
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   302
            for char in charFrequency.keys():
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   303
                items = charFrequency[char].items()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   304
                if len(items) == 1 and items[0][0] == 0:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   305
                    continue
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   306
                # get the mode of the frequencies
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   307
                if len(items) > 1:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   308
                    modes[char] = reduce(lambda a, b: a[1] > b[1] and a or b,
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   309
                                         items)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   310
                    # adjust the mode - subtract the sum of all
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   311
                    # other frequencies
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   312
                    items.remove(modes[char])
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   313
                    modes[char] = (modes[char][0], modes[char][1]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   314
                                   - reduce(lambda a, b: (0, a[1] + b[1]),
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   315
                                            items)[1])
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   316
                else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   317
                    modes[char] = items[0]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   318
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   319
            # build a list of possible delimiters
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   320
            modeList = modes.items()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   321
            total = float(chunkLength * iteration)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   322
            # (rows of consistent data) / (number of rows) = 100%
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   323
            consistency = 1.0
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   324
            # minimum consistency threshold
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   325
            threshold = 0.9
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   326
            while len(delims) == 0 and consistency >= threshold:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   327
                for k, v in modeList:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   328
                    if v[0] > 0 and v[1] > 0:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   329
                        if ((v[1]/total) >= consistency and
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   330
                            (delimiters is None or k in delimiters)):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   331
                            delims[k] = v
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   332
                consistency -= 0.01
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   333
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   334
            if len(delims) == 1:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   335
                delim = delims.keys()[0]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   336
                skipinitialspace = (data[0].count(delim) ==
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   337
                                    data[0].count("%c " % delim))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   338
                return (delim, skipinitialspace)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   339
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   340
            # analyze another chunkLength lines
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   341
            start = end
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   342
            end += chunkLength
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   343
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   344
        if not delims:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   345
            return ('', 0)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   346
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   347
        # if there's more than one, fall back to a 'preferred' list
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   348
        if len(delims) > 1:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   349
            for d in self.preferred:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   350
                if d in delims.keys():
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   351
                    skipinitialspace = (data[0].count(d) ==
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   352
                                        data[0].count("%c " % d))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   353
                    return (d, skipinitialspace)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   354
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   355
        # nothing else indicates a preference, pick the character that
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   356
        # dominates(?)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   357
        items = [(v,k) for (k,v) in delims.items()]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   358
        items.sort()
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   359
        delim = items[-1][1]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   360
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   361
        skipinitialspace = (data[0].count(delim) ==
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   362
                            data[0].count("%c " % delim))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   363
        return (delim, skipinitialspace)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   364
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   365
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   366
    def has_header(self, sample):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   367
        # Creates a dictionary of types of data in each column. If any
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   368
        # column is of a single type (say, integers), *except* for the first
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   369
        # row, then the first row is presumed to be labels. If the type
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   370
        # can't be determined, it is assumed to be a string in which case
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   371
        # the length of the string is the determining factor: if all of the
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   372
        # rows except for the first are the same length, it's a header.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   373
        # Finally, a 'vote' is taken at the end for each column, adding or
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   374
        # subtracting from the likelihood of the first row being a header.
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   375
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   376
        rdr = reader(StringIO(sample), self.sniff(sample))
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   377
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   378
        header = rdr.next() # assume first row is header
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   379
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   380
        columns = len(header)
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   381
        columnTypes = {}
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   382
        for i in range(columns): columnTypes[i] = None
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   383
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   384
        checked = 0
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   385
        for row in rdr:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   386
            # arbitrary number of rows to check, to keep it sane
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   387
            if checked > 20:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   388
                break
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   389
            checked += 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   390
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   391
            if len(row) != columns:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   392
                continue # skip rows that have irregular number of columns
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   393
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   394
            for col in columnTypes.keys():
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   395
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   396
                for thisType in [int, long, float, complex]:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   397
                    try:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   398
                        thisType(row[col])
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   399
                        break
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   400
                    except (ValueError, OverflowError):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   401
                        pass
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   402
                else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   403
                    # fallback to length of string
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   404
                    thisType = len(row[col])
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   405
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   406
                # treat longs as ints
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   407
                if thisType == long:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   408
                    thisType = int
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   409
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   410
                if thisType != columnTypes[col]:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   411
                    if columnTypes[col] is None: # add new column type
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   412
                        columnTypes[col] = thisType
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   413
                    else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   414
                        # type is inconsistent, remove column from
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   415
                        # consideration
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   416
                        del columnTypes[col]
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   417
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   418
        # finally, compare results against first row and "vote"
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   419
        # on whether it's a header
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   420
        hasHeader = 0
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   421
        for col, colType in columnTypes.items():
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   422
            if type(colType) == type(0): # it's a length
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   423
                if len(header[col]) != colType:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   424
                    hasHeader += 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   425
                else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   426
                    hasHeader -= 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   427
            else: # attempt typecast
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   428
                try:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   429
                    colType(header[col])
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   430
                except (ValueError, TypeError):
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   431
                    hasHeader += 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   432
                else:
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   433
                    hasHeader -= 1
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   434
2fb8b9db1c86 Initial QEMU (symbian-qemu-0.9.1-12) import
martin.trojer@nokia.com
parents:
diff changeset
   435
        return hasHeader > 0