author | William Roberts <williamr@symbian.org> |
Fri, 29 May 2009 14:13:23 +0100 | |
changeset 1 | 4a4ca5a019bb |
parent 0 | 02cd6b52f378 |
permissions | -rw-r--r-- |
0
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
1 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
2 |
# TO DO: |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
3 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
4 |
# - Implement SlowMappers for expat builtin encodings (for which there |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
5 |
# are no .enc files), e.g. UTF-16, US-ASCII, ISO-8859-1. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
6 |
# - Instead of parsing the .xml file with XML::Encoding, we should use XS. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
7 |
# If this will not be implemented for a while, we could try reading the |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
8 |
# .enc file directly, instead of the .xml file. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
9 |
# I started writing XML::UM::EncParser to do this (see EOF), but got stuck. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
10 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
11 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
12 |
use strict; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
13 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
14 |
package XML::UM::SlowMapper; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
15 |
use Carp; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
16 |
use XML::Encoding; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
17 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
18 |
use vars qw{ $ENCDIR %DEFAULT_ASCII_MAPPINGS }; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
19 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
20 |
my $UTFCHAR = '[\\x00-\\xBF]|[\\xC0-\\xDF].|[\\xE0-\\xEF]..|[\\xF0-\\xFF]...'; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
21 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
22 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
23 |
# The directory that contains the .xml files that come with XML::Encoding. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
24 |
# Include the terminating '\' or '/' !! |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
25 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
26 |
$ENCDIR = "/home1/enno/perlModules/XML-Encoding-1.01/maps/"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
27 |
#$ENCDIR = "c:\\src\\perl\\xml\\XML-Encoding-1.01\\maps\\"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
28 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
29 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
30 |
# From xmlparse.h in expat distribution: |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
31 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
32 |
# Expat places certain restrictions on the encodings that are supported |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
33 |
# using this mechanism. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
34 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
35 |
# 1. Every ASCII character that can appear in a well-formed XML document, |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
36 |
# other than the characters |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
37 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
38 |
# $@\^`{}~ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
39 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
40 |
# must be represented by a single byte, and that byte must be the |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
41 |
# same byte that represents that character in ASCII. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
42 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
43 |
# [end of excerpt] |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
44 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
45 |
#?? Which 'ASCII characters can appear in a well-formed XML document ?? |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
46 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
47 |
# All ASCII codes 0 - 127, excl. 36,64,92,94,96,123,125,126 i.e. $@\^`{}~ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
48 |
%DEFAULT_ASCII_MAPPINGS = map { (chr($_), chr($_)) } (0 .. 35, 37 .. 63, |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
49 |
65 .. 91, 93, 95, |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
50 |
97 .. 122, 124, 127); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
51 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
52 |
sub new |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
53 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
54 |
my ($class, %hash) = @_; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
55 |
my $self = bless \%hash, $class; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
56 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
57 |
$self->read_encoding_file; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
58 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
59 |
$self; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
60 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
61 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
62 |
sub dispose |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
63 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
64 |
my $self = shift; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
65 |
$self->{Factory}->dispose_mapper ($self); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
66 |
delete $self->{Encode}; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
67 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
68 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
69 |
# Reads the XML file that contains the encoding definition. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
70 |
# These files come with XML::Encoding. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
71 |
sub read_encoding_file |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
72 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
73 |
#?? This should parse the .enc files (the .xml files are not installed) !! |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
74 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
75 |
my ($self) = @_; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
76 |
my $encoding = $self->{Encoding}; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
77 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
78 |
# There is no .enc (or .xml) file for US-ASCII, but the mapping is simple |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
79 |
# so here it goes... |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
80 |
if ($encoding eq 'US-ASCII') |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
81 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
82 |
$self->{EncMapName} = 'US-ASCII'; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
83 |
$self->{Map} = \%DEFAULT_ASCII_MAPPINGS; # I hope this is right |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
84 |
return; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
85 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
86 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
87 |
my $file = $self->find_encoding_file ($encoding); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
88 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
89 |
my %uni = %DEFAULT_ASCII_MAPPINGS; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
90 |
my $prefix = ""; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
91 |
my $DIR = "file:$ENCDIR"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
92 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
93 |
my $enc = new XML::Encoding (Handlers => { |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
94 |
Init => |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
95 |
sub { |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
96 |
my $base = shift->base ($DIR); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
97 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
98 |
}, |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
99 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
100 |
PushPrefixFcn => |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
101 |
sub { |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
102 |
$prefix .= chr (shift); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
103 |
undef; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
104 |
}, |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
105 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
106 |
PopPrefixFcn => |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
107 |
sub { |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
108 |
chop $prefix; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
109 |
undef; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
110 |
}, |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
111 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
112 |
RangeSetFcn => |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
113 |
sub { |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
114 |
my ($byte, $uni, $len) = @_; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
115 |
for (my $i = $uni; $len--; $uni++) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
116 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
117 |
$uni{XML::UM::unicode_to_utf8($uni)} = $prefix . chr ($byte++); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
118 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
119 |
undef; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
120 |
}); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
121 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
122 |
$self->{EncMapName} = $enc->parsefile ($file); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
123 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
124 |
#print "Parsed Encoding " . $self->{Encoding} . " MapName=" . $self->{EncMapName} . "\n"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
125 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
126 |
$self->{Map} = \%uni; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
127 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
128 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
129 |
sub find_encoding_file |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
130 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
131 |
my ($self, $enc) = @_; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
132 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
133 |
return "$ENCDIR\L$enc\E.xml"; # .xml filename is lower case |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
134 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
135 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
136 |
# Returns a closure (method) that converts a UTF-8 encoded string to an |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
137 |
# encoded byte sequence. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
138 |
sub get_encode |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
139 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
140 |
my ($self, %hash) = @_; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
141 |
my $MAP = $self->{Map}; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
142 |
my $ENCODE_UNMAPPED = $hash{EncodeUnmapped} || \&XML::UM::encode_unmapped_dec; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
143 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
144 |
my $code = "sub {\n my \$str = shift;\n \$str =~ s/"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
145 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
146 |
$code .= "($UTFCHAR)/\n"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
147 |
$code .= "defined \$MAP->{\$1} ? \$MAP->{\$1} : "; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
148 |
$code .= "\&\$ENCODE_UNMAPPED(\$1) /egs;\n"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
149 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
150 |
$code .= "\$str }\n"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
151 |
# print $code; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
152 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
153 |
my $func = eval $code; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
154 |
croak "could not eval generated code=[$code]: $@" if $@; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
155 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
156 |
$func; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
157 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
158 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
159 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
160 |
# Optimized version for when the encoding is UTF-8. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
161 |
# (In that case no conversion takes place.) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
162 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
163 |
package XML::UM::SlowMapper::UTF8; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
164 |
use vars qw{ @ISA }; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
165 |
@ISA = qw{ XML::UM::SlowMapper }; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
166 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
167 |
sub read_encoding_file |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
168 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
169 |
# ignore it |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
170 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
171 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
172 |
sub get_encode |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
173 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
174 |
\&dont_convert; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
175 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
176 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
177 |
sub dont_convert # static |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
178 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
179 |
shift # return argument unchanged |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
180 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
181 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
182 |
package XML::UM::SlowMapperFactory; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
183 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
184 |
sub new |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
185 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
186 |
my ($class, %hash) = @_; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
187 |
bless \%hash, $class; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
188 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
189 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
190 |
sub get_encode |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
191 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
192 |
my ($self, %options) = @_; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
193 |
my $encoding = $options{Encoding}; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
194 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
195 |
my $mapper = $self->get_mapper ($encoding); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
196 |
return $mapper->get_encode (%options); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
197 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
198 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
199 |
sub get_mapper |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
200 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
201 |
my ($self, $encoding) = @_; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
202 |
$self->{Mapper}->{$encoding} ||= |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
203 |
($encoding eq "UTF-8" ? |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
204 |
new XML::UM::SlowMapper::UTF8 (Encoding => $encoding, |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
205 |
Factory => $self) : |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
206 |
new XML::UM::SlowMapper (Encoding => $encoding, |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
207 |
Factory => $self)); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
208 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
209 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
210 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
211 |
# Prepare for garbage collection (remove circular refs) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
212 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
213 |
sub dispose_encoding |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
214 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
215 |
my ($self, $encoding) = @_; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
216 |
my $mapper = $self->{Mapper}->{$encoding}; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
217 |
return unless defined $mapper; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
218 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
219 |
delete $mapper->{Factory}; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
220 |
delete $self->{Mapper}->{$encoding}; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
221 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
222 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
223 |
package XML::UM; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
224 |
use Carp; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
225 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
226 |
use vars qw{ $FACTORY %XML_MAPPING_CRITERIA }; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
227 |
$FACTORY = XML::UM::SlowMapperFactory->new; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
228 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
229 |
sub get_encode # static |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
230 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
231 |
$FACTORY->get_encode (@_); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
232 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
233 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
234 |
sub dispose_encoding # static |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
235 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
236 |
$FACTORY->dispose_encoding (@_); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
237 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
238 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
239 |
# Convert UTF-8 byte sequence to Unicode index; then to '&#xNN;' string |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
240 |
sub encode_unmapped_hex # static |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
241 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
242 |
my $n = utf8_to_unicode (shift); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
243 |
sprintf ("&#x%X;", $n); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
244 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
245 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
246 |
sub encode_unmapped_dec # static |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
247 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
248 |
my $n = utf8_to_unicode (shift); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
249 |
"&#$n;" |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
250 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
251 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
252 |
# Converts a UTF-8 byte sequence that represents one character, |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
253 |
# to its Unicode index. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
254 |
sub utf8_to_unicode # static |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
255 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
256 |
my $str = shift; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
257 |
my $len = length ($str); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
258 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
259 |
if ($len == 1) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
260 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
261 |
return ord ($str); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
262 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
263 |
if ($len == 2) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
264 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
265 |
my @n = unpack "C2", $str; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
266 |
return (($n[0] & 0x3f) << 6) + ($n[1] & 0x3f); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
267 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
268 |
elsif ($len == 3) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
269 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
270 |
my @n = unpack "C3", $str; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
271 |
return (($n[0] & 0x1f) << 12) + (($n[1] & 0x3f) << 6) + |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
272 |
($n[2] & 0x3f); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
273 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
274 |
elsif ($len == 4) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
275 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
276 |
my @n = unpack "C4", $str; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
277 |
return (($n[0] & 0x0f) << 18) + (($n[1] & 0x3f) << 12) + |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
278 |
(($n[2] & 0x3f) << 6) + ($n[3] & 0x3f); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
279 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
280 |
else |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
281 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
282 |
croak "bad UTF8 sequence [$str] hex=" . hb($str); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
283 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
284 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
285 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
286 |
# Converts a Unicode character index to the byte sequence |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
287 |
# that represents that character in UTF-8. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
288 |
sub unicode_to_utf8 # static |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
289 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
290 |
my $n = shift; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
291 |
if ($n < 0x80) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
292 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
293 |
return chr ($n); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
294 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
295 |
elsif ($n < 0x800) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
296 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
297 |
return pack ("CC", (($n >> 6) | 0xc0), (($n & 0x3f) | 0x80)); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
298 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
299 |
elsif ($n < 0x10000) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
300 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
301 |
return pack ("CCC", (($n >> 12) | 0xe0), ((($n >> 6) & 0x3f) | 0x80), |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
302 |
(($n & 0x3f) | 0x80)); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
303 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
304 |
elsif ($n < 0x110000) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
305 |
{ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
306 |
return pack ("CCCC", (($n >> 18) | 0xf0), ((($n >> 12) & 0x3f) | 0x80), |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
307 |
((($n >> 6) & 0x3f) | 0x80), (($n & 0x3f) | 0x80)); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
308 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
309 |
croak "number [$n] is too large for Unicode in \&unicode_to_utf8"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
310 |
} |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
311 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
312 |
#?? The following package is unfinished. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
313 |
#?? It should parse the .enc file and create an array that maps |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
314 |
#?? Unicode-index to encoded-str. I got stuck... |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
315 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
316 |
# package XML::UM::EncParser; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
317 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
318 |
# sub new |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
319 |
# { |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
320 |
# my ($class, %hash) = @_; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
321 |
# my $self = bless \%hash, $class; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
322 |
# $self; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
323 |
# } |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
324 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
325 |
# sub parse |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
326 |
# { |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
327 |
# my ($self, $filename) = @_; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
328 |
# open (FILE, $filename) || die "can't open .enc file $filename"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
329 |
# binmode (FILE); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
330 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
331 |
# my $buf; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
332 |
# read (FILE, $buf, 4 + 40 + 2 + 2 + 1024); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
333 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
334 |
# my ($magic, $name, $pfsize, $bmsize, @map) = unpack ("NA40nnN256", $buf); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
335 |
# printf "magic=%04x name=$name pfsize=$pfsize bmsize=$bmsize\n", $magic; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
336 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
337 |
# if ($magic != 0xFEEBFACE) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
338 |
# { |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
339 |
# close FILE; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
340 |
# die sprintf ("bad magic number [0x%08X] in $filename, expected 0xFEEBFACE", $magic); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
341 |
# } |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
342 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
343 |
# for (my $i = 0; $i < 256; $i++) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
344 |
# { |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
345 |
# printf "[%d]=%d ", $i, $map[$i]; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
346 |
# print "\n" if ($i % 8 == 7); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
347 |
# } |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
348 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
349 |
# for (my $i = 0; $i < $pfsize; $i++) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
350 |
# { |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
351 |
# print "----- PrefixMap $i ----\n"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
352 |
# read (FILE, $buf, 2 + 2 + 32 + 32); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
353 |
# my ($min, $len, $bmap_start, @ispfx) = unpack ("CCnC64", $buf); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
354 |
# my (@ischar) = splice @ispfx, 32, 32, (); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
355 |
# #?? could use b256 instead of C32 for bitvector a la vec() |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
356 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
357 |
# print "ispfx=@ispfx\n"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
358 |
# print "ischar=@ischar\n"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
359 |
# $len = 256 if $len == 0; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
360 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
361 |
# print " min=$min len=$len bmap_start=$bmap_start\n"; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
362 |
# } |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
363 |
# |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
364 |
# close FILE; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
365 |
# } |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
366 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
367 |
1; # package return code |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
368 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
369 |
__END__ |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
370 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
371 |
=head1 NAME |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
372 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
373 |
XML::UM - Convert UTF-8 strings to any encoding supported by XML::Encoding |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
374 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
375 |
=head1 SYNOPSIS |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
376 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
377 |
use XML::UM; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
378 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
379 |
# Set directory with .xml files that comes with XML::Encoding distribution |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
380 |
# Always include the trailing slash! |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
381 |
$XML::UM::ENCDIR = '/home1/enno/perlModules/XML-Encoding-1.01/maps/'; |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
382 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
383 |
# Create the encoding routine |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
384 |
my $encode = XML::UM::get_encode ( |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
385 |
Encoding => 'ISO-8859-2', |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
386 |
EncodeUnmapped => \&XML::UM::encode_unmapped_dec); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
387 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
388 |
# Convert a string from UTF-8 to the specified Encoding |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
389 |
my $encoded_str = $encode->($utf8_str); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
390 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
391 |
# Remove circular references for garbage collection |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
392 |
XML::UM::dispose_encoding ('ISO-8859-2'); |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
393 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
394 |
=head1 DESCRIPTION |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
395 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
396 |
This module provides methods to convert UTF-8 strings to any XML encoding |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
397 |
that L<XML::Encoding> supports. It creates mapping routines from the .xml |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
398 |
files that can be found in the maps/ directory in the L<XML::Encoding> |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
399 |
distribution. Note that the XML::Encoding distribution does install the |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
400 |
.enc files in your perl directory, but not the.xml files they were created |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
401 |
from. That's why you have to specify $ENCDIR as in the SYNOPSIS. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
402 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
403 |
This implementation uses the XML::Encoding class to parse the .xml |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
404 |
file and creates a hash that maps UTF-8 characters (each consisting of up |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
405 |
to 4 bytes) to their equivalent byte sequence in the specified encoding. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
406 |
Note that large mappings may consume a lot of memory! |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
407 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
408 |
Future implementations may parse the .enc files directly, or |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
409 |
do the conversions entirely in XS (i.e. C code.) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
410 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
411 |
=head1 get_encode (Encoding => STRING, EncodeUnmapped => SUB) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
412 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
413 |
The central entry point to this module is the XML::UM::get_encode() method. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
414 |
It forwards the call to the global $XML::UM::FACTORY, which is defined as |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
415 |
an instance of XML::UM::SlowMapperFactory by default. Override this variable |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
416 |
to plug in your own mapper factory. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
417 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
418 |
The XML::UM::SlowMapperFactory creates an instance of XML::UM::SlowMapper |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
419 |
(and caches it for subsequent use) that reads in the .xml encoding file and |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
420 |
creates a hash that maps UTF-8 characters to encoded characters. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
421 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
422 |
The get_encode() method of XML::UM::SlowMapper is called, finally, which |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
423 |
generates an anonimous subroutine that uses the hash to convert |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
424 |
multi-character UTF-8 blocks to the proper encoding. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
425 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
426 |
=head1 dispose_encoding ($encoding_name) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
427 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
428 |
Call this to free the memory used by the SlowMapper for a specific encoding. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
429 |
Note that in order to free the big conversion hash, the user should no longer |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
430 |
have references to the subroutines generated by get_encode(). |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
431 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
432 |
The parameters to the get_encode() method (defined as name/value pairs) are: |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
433 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
434 |
=over 4 |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
435 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
436 |
=item * Encoding |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
437 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
438 |
The name of the desired encoding, e.g. 'ISO-8859-2' |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
439 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
440 |
=item * EncodeUnmapped (Default: \&XML::UM::encode_unmapped_dec) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
441 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
442 |
Defines how Unicode characters not found in the mapping file (of the |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
443 |
specified encoding) are printed. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
444 |
By default, they are converted to decimal entity references, like '{' |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
445 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
446 |
Use \&XML::UM::encode_unmapped_hex for hexadecimal constants, like '«' |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
447 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
448 |
=back |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
449 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
450 |
=head1 CAVEATS |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
451 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
452 |
I'm not exactly sure about which Unicode characters in the range (0 .. 127) |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
453 |
should be mapped to themselves. See comments in XML/UM.pm near |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
454 |
%DEFAULT_ASCII_MAPPINGS. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
455 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
456 |
The encodings that expat supports by default are currently not supported, |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
457 |
(e.g. UTF-16, ISO-8859-1), |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
458 |
because there are no .enc files available for these encodings. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
459 |
This module needs some more work. If you have the time, please help! |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
460 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
461 |
=head1 AUTHOR |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
462 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
463 |
Send bug reports, hints, tips, suggestions to Enno Derksen at |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
464 |
<F<enno@att.com>>. |
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
465 |
|
02cd6b52f378
adding synch hg to p4 & create dummy foundation structure scripts
tahirm@symbian.org
parents:
diff
changeset
|
466 |
=cut |