symbian-qemu-0.9.1-12/python-2.6.1/Demo/scripts/markov.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 #! /usr/bin/env python
       
     2 
       
     3 class Markov:
       
     4     def __init__(self, histsize, choice):
       
     5         self.histsize = histsize
       
     6         self.choice = choice
       
     7         self.trans = {}
       
     8     def add(self, state, next):
       
     9         if not self.trans.has_key(state):
       
    10             self.trans[state] = [next]
       
    11         else:
       
    12             self.trans[state].append(next)
       
    13     def put(self, seq):
       
    14         n = self.histsize
       
    15         add = self.add
       
    16         add(None, seq[:0])
       
    17         for i in range(len(seq)):
       
    18             add(seq[max(0, i-n):i], seq[i:i+1])
       
    19         add(seq[len(seq)-n:], None)
       
    20     def get(self):
       
    21         choice = self.choice
       
    22         trans = self.trans
       
    23         n = self.histsize
       
    24         seq = choice(trans[None])
       
    25         while 1:
       
    26             subseq = seq[max(0, len(seq)-n):]
       
    27             options = trans[subseq]
       
    28             next = choice(options)
       
    29             if not next: break
       
    30             seq = seq + next
       
    31         return seq
       
    32 
       
    33 def test():
       
    34     import sys, string, random, getopt
       
    35     args = sys.argv[1:]
       
    36     try:
       
    37         opts, args = getopt.getopt(args, '0123456789cdw')
       
    38     except getopt.error:
       
    39         print 'Usage: markov [-#] [-cddqw] [file] ...'
       
    40         print 'Options:'
       
    41         print '-#: 1-digit history size (default 2)'
       
    42         print '-c: characters (default)'
       
    43         print '-w: words'
       
    44         print '-d: more debugging output'
       
    45         print '-q: no debugging output'
       
    46         print 'Input files (default stdin) are split in paragraphs'
       
    47         print 'separated blank lines and each paragraph is split'
       
    48         print 'in words by whitespace, then reconcatenated with'
       
    49         print 'exactly one space separating words.'
       
    50         print 'Output consists of paragraphs separated by blank'
       
    51         print 'lines, where lines are no longer than 72 characters.'
       
    52     histsize = 2
       
    53     do_words = 0
       
    54     debug = 1
       
    55     for o, a in opts:
       
    56         if '-0' <= o <= '-9': histsize = eval(o[1:])
       
    57         if o == '-c': do_words = 0
       
    58         if o == '-d': debug = debug + 1
       
    59         if o == '-q': debug = 0
       
    60         if o == '-w': do_words = 1
       
    61     if not args: args = ['-']
       
    62     m = Markov(histsize, random.choice)
       
    63     try:
       
    64         for filename in args:
       
    65             if filename == '-':
       
    66                 f = sys.stdin
       
    67                 if f.isatty():
       
    68                     print 'Sorry, need stdin from file'
       
    69                     continue
       
    70             else:
       
    71                 f = open(filename, 'r')
       
    72             if debug: print 'processing', filename, '...'
       
    73             text = f.read()
       
    74             f.close()
       
    75             paralist = string.splitfields(text, '\n\n')
       
    76             for para in paralist:
       
    77                 if debug > 1: print 'feeding ...'
       
    78                 words = string.split(para)
       
    79                 if words:
       
    80                     if do_words: data = tuple(words)
       
    81                     else: data = string.joinfields(words, ' ')
       
    82                     m.put(data)
       
    83     except KeyboardInterrupt:
       
    84         print 'Interrupted -- continue with data read so far'
       
    85     if not m.trans:
       
    86         print 'No valid input files'
       
    87         return
       
    88     if debug: print 'done.'
       
    89     if debug > 1:
       
    90         for key in m.trans.keys():
       
    91             if key is None or len(key) < histsize:
       
    92                 print repr(key), m.trans[key]
       
    93         if histsize == 0: print repr(''), m.trans['']
       
    94         print
       
    95     while 1:
       
    96         data = m.get()
       
    97         if do_words: words = data
       
    98         else: words = string.split(data)
       
    99         n = 0
       
   100         limit = 72
       
   101         for w in words:
       
   102             if n + len(w) > limit:
       
   103                 print
       
   104                 n = 0
       
   105             print w,
       
   106             n = n + len(w) + 1
       
   107         print
       
   108         print
       
   109 
       
   110 def tuple(list):
       
   111     if len(list) == 0: return ()
       
   112     if len(list) == 1: return (list[0],)
       
   113     i = len(list)//2
       
   114     return tuple(list[:i]) + tuple(list[i:])
       
   115 
       
   116 if __name__ == "__main__":
       
   117     test()