equal
deleted
inserted
replaced
|
1 #!/usr/bin/env python |
|
2 |
|
3 """ |
|
4 For each argument on the command line, look for it in the set of all Unicode |
|
5 names. Arguments are treated as case-insensitive regular expressions, e.g.: |
|
6 |
|
7 % find-uname 'small letter a$' 'horizontal line' |
|
8 *** small letter a$ matches *** |
|
9 LATIN SMALL LETTER A (97) |
|
10 COMBINING LATIN SMALL LETTER A (867) |
|
11 CYRILLIC SMALL LETTER A (1072) |
|
12 PARENTHESIZED LATIN SMALL LETTER A (9372) |
|
13 CIRCLED LATIN SMALL LETTER A (9424) |
|
14 FULLWIDTH LATIN SMALL LETTER A (65345) |
|
15 *** horizontal line matches *** |
|
16 HORIZONTAL LINE EXTENSION (9135) |
|
17 """ |
|
18 |
|
19 import unicodedata |
|
20 import sys |
|
21 import re |
|
22 |
|
23 def main(args): |
|
24 unicode_names= [] |
|
25 for ix in range(sys.maxunicode+1): |
|
26 try: |
|
27 unicode_names.append( (ix, unicodedata.name(unichr(ix))) ) |
|
28 except ValueError: # no name for the character |
|
29 pass |
|
30 for arg in args: |
|
31 pat = re.compile(arg, re.I) |
|
32 matches = [(x,y) for (x,y) in unicode_names |
|
33 if pat.search(y) is not None] |
|
34 if matches: |
|
35 print "***", arg, "matches", "***" |
|
36 for (x,y) in matches: |
|
37 print "%s (%d)" % (y,x) |
|
38 |
|
39 if __name__ == "__main__": |
|
40 main(sys.argv[1:]) |