symbian-qemu-0.9.1-12/python-2.6.1/Tools/scripts/parseentities.py
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 #!/usr/local/bin/python
       
     2 """ Utility for parsing HTML entity definitions available from:
       
     3 
       
     4       http://www.w3.org/ as e.g.
       
     5       http://www.w3.org/TR/REC-html40/HTMLlat1.ent
       
     6 
       
     7     Input is read from stdin, output is written to stdout in form of a
       
     8     Python snippet defining a dictionary "entitydefs" mapping literal
       
     9     entity name to character or numeric entity.
       
    10 
       
    11     Marc-Andre Lemburg, mal@lemburg.com, 1999.
       
    12     Use as you like. NO WARRANTIES.
       
    13 
       
    14 """
       
    15 import re,sys
       
    16 import TextTools
       
    17 
       
    18 entityRE = re.compile('<!ENTITY +(\w+) +CDATA +"([^"]+)" +-- +((?:.|\n)+?) *-->')
       
    19 
       
    20 def parse(text,pos=0,endpos=None):
       
    21 
       
    22     pos = 0
       
    23     if endpos is None:
       
    24         endpos = len(text)
       
    25     d = {}
       
    26     while 1:
       
    27         m = entityRE.search(text,pos,endpos)
       
    28         if not m:
       
    29             break
       
    30         name,charcode,comment = m.groups()
       
    31         d[name] = charcode,comment
       
    32         pos = m.end()
       
    33     return d
       
    34 
       
    35 def writefile(f,defs):
       
    36 
       
    37     f.write("entitydefs = {\n")
       
    38     items = defs.items()
       
    39     items.sort()
       
    40     for name,(charcode,comment) in items:
       
    41         if charcode[:2] == '&#':
       
    42             code = int(charcode[2:-1])
       
    43             if code < 256:
       
    44                 charcode = "'\%o'" % code
       
    45             else:
       
    46                 charcode = repr(charcode)
       
    47         else:
       
    48             charcode = repr(charcode)
       
    49         comment = TextTools.collapse(comment)
       
    50         f.write("    '%s':\t%s,  \t# %s\n" % (name,charcode,comment))
       
    51     f.write('\n}\n')
       
    52 
       
    53 if __name__ == '__main__':
       
    54     if len(sys.argv) > 1:
       
    55         infile = open(sys.argv[1])
       
    56     else:
       
    57         infile = sys.stdin
       
    58     if len(sys.argv) > 2:
       
    59         outfile = open(sys.argv[2],'w')
       
    60     else:
       
    61         outfile = sys.stdout
       
    62     text = infile.read()
       
    63     defs = parse(text)
       
    64     writefile(outfile,defs)