symbian-qemu-0.9.1-12/python-2.6.1/Tools/scripts/parseentities.py
changeset 1 2fb8b9db1c86
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/symbian-qemu-0.9.1-12/python-2.6.1/Tools/scripts/parseentities.py	Fri Jul 31 15:01:17 2009 +0100
@@ -0,0 +1,64 @@
+#!/usr/local/bin/python
+""" Utility for parsing HTML entity definitions available from:
+
+      http://www.w3.org/ as e.g.
+      http://www.w3.org/TR/REC-html40/HTMLlat1.ent
+
+    Input is read from stdin, output is written to stdout in form of a
+    Python snippet defining a dictionary "entitydefs" mapping literal
+    entity name to character or numeric entity.
+
+    Marc-Andre Lemburg, mal@lemburg.com, 1999.
+    Use as you like. NO WARRANTIES.
+
+"""
+import re,sys
+import TextTools
+
+entityRE = re.compile('<!ENTITY +(\w+) +CDATA +"([^"]+)" +-- +((?:.|\n)+?) *-->')
+
+def parse(text,pos=0,endpos=None):
+
+    pos = 0
+    if endpos is None:
+        endpos = len(text)
+    d = {}
+    while 1:
+        m = entityRE.search(text,pos,endpos)
+        if not m:
+            break
+        name,charcode,comment = m.groups()
+        d[name] = charcode,comment
+        pos = m.end()
+    return d
+
+def writefile(f,defs):
+
+    f.write("entitydefs = {\n")
+    items = defs.items()
+    items.sort()
+    for name,(charcode,comment) in items:
+        if charcode[:2] == '&#':
+            code = int(charcode[2:-1])
+            if code < 256:
+                charcode = "'\%o'" % code
+            else:
+                charcode = repr(charcode)
+        else:
+            charcode = repr(charcode)
+        comment = TextTools.collapse(comment)
+        f.write("    '%s':\t%s,  \t# %s\n" % (name,charcode,comment))
+    f.write('\n}\n')
+
+if __name__ == '__main__':
+    if len(sys.argv) > 1:
+        infile = open(sys.argv[1])
+    else:
+        infile = sys.stdin
+    if len(sys.argv) > 2:
+        outfile = open(sys.argv[2],'w')
+    else:
+        outfile = sys.stdout
+    text = infile.read()
+    defs = parse(text)
+    writefile(outfile,defs)