author | Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com> |
Fri, 12 Mar 2010 15:51:09 +0200 | |
branch | RCL_3 |
changeset 11 | 6971d1c87c9a |
parent 0 | 1fb32624e06b |
permissions | -rw-r--r-- |
0
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
1 |
{\rtf1\ansi \deff4\deflang1033{\fonttbl{\f1\froman\fcharset2\fprq2 Symbol;}{\f4\froman\fcharset0\fprq2 Times New Roman;}{\f5\fswiss\fcharset0\fprq2 Arial;}{\f11\fmodern\fcharset0\fprq1 Courier New;} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
2 |
{\f56\fswiss\fcharset0\fprq2 Verdana;}}{\colortbl;\red0\green0\blue0;\red0\green0\blue255;\red0\green255\blue255;\red0\green255\blue0;\red255\green0\blue255;\red255\green0\blue0;\red255\green255\blue0;\red255\green255\blue255;\red0\green0\blue128; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
3 |
\red0\green128\blue128;\red0\green128\blue0;\red128\green0\blue128;\red128\green0\blue0;\red128\green128\blue0;\red128\green128\blue128;\red192\green192\blue192;}{\stylesheet{\widctlpar \f4\fs20 \snext0 Normal;}{\s1\sb240\sa60\keepn\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
4 |
\b\f5\fs28\kerning28 \sbasedon0\snext0 heading 1;}{\s2\sb240\sa60\keepn\widctlpar \b\i\f5 \sbasedon0\snext0 heading 2;}{\s3\sb240\sa60\keepn\widctlpar \f5 \sbasedon0\snext0 heading 3;}{\s4\sb240\sa60\keepn\widctlpar \b\f5 \sbasedon0\snext0 heading 4;}{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
5 |
\s5\sb240\sa60\widctlpar \f5\fs22 \sbasedon0\snext0 heading 5;}{\*\cs10 \additive Default Paragraph Font;}{\s15\widctlpar \f4\fs20 \sbasedon0\snext15 footnote text;}{\*\cs16 \additive\super \sbasedon10 footnote reference;}}{\info |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
6 |
{\author Preferred Customer}{\operator Preferred Customer}{\creatim\yr1999\mo3\dy16\hr14\min36}{\revtim\yr1999\mo11\dy26\hr14\min22}{\printim\yr1999\mo3\dy17\hr16\min34}{\version8}{\edmins11}{\nofpages9}{\nofwords5010}{\nofchars28557} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
7 |
{\*\company Dell Computer Corporation}{\vern57443}}\paperw11906\paperh16838 \deftab340\widowctrl\ftnbj\aenddoc\hyphcaps0\formshade \fet0\sectd \linex0\headery709\footery709\colsx709\endnhere {\*\pnseclvl1\pnlcrm\pnstart1\pnindent283\pnhang}{\*\pnseclvl2 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
8 |
\pnf1\pnstart1\pnindent283 {\pntxtb \'b7}}{\*\pnseclvl3\pnf1\pnstart1\pnindent283 {\pntxtb \'b7}}{\*\pnseclvl4\pnf1\pnstart1\pnindent283 {\pntxtb \'b7}}{\*\pnseclvl5\pnf1\pnstart1\pnindent283 {\pntxtb \'b7}}{\*\pnseclvl6\pnf1\pnstart1\pnindent283 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
9 |
{\pntxtb \'b7}}{\*\pnseclvl7\pnf1\pnstart1\pnindent283 {\pntxtb \'b7}}{\*\pnseclvl8\pnf1\pnstart1\pnindent283 {\pntxtb \'b7}}{\*\pnseclvl9\pnf1\pnstart1\pnindent283 {\pntxtb \'b7}}\pard\plain \s1\sb240\sa60\keepn\widctlpar \b\f5\fs28\kerning28 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
10 |
Converting text between different character-encoding systems |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
11 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 EPOC machines internally store text encoded in Unicode - the character set{\cs16\super \chftn {\footnote \pard\plain \s15\qj\widctlpar \f4\fs20 {\cs16\super \chftn } The term \ldblquote character set\rdblquote |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
12 |
is used throughout this document to refer to {\i a set of characters and their encodings} as defined by a particular body (e.g. a government organization, a consortium, etc).}} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
13 |
defined by The Unicode Consortium. However, much of the text sent between the EPOC machine and the outside world (e.g. in emails) will be encoded in character sets other than Unicode. The CHARCONV component provides a DLL for converting text (both ways) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
14 |
between Unicode and other standard characters sets. It also supports converting (both ways) between 2-byte Unicode (also known as UC |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
15 |
S-2) and its two transformation formats UTF-7 and UTF-8. There is no direct way to convert between UTF-7 and UTF-8 though - in fact CHARCONV as a whole does not provide any direct conversion facilities other than to or from Unicode (UCS-2). |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
16 |
\par \pard\plain \s2\sb240\sa60\keepn\widctlpar \b\i\f5 Converting text between Unicode and other standard character sets |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
17 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 The class {\f11 CCnvCharacterSetConverter} is provided for converting text between Unicode and other standard character sets. The public parts of the class are shown below: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
18 |
\par \pard \qj\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
19 |
\par \pard \widctlpar {\f11\fs14 class CCnvCharacterSetConverter : public CBase |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
20 |
\par \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
21 |
\par public: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
22 |
\par \tab enum TAvailability |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
23 |
\par \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
24 |
\par \tab \tab EAvailable, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
25 |
\par \tab \tab ENotAvailable |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
26 |
\par \tab \tab \}; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
27 |
\par \tab enum TError |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
28 |
\par \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
29 |
\par \tab \tab EErrorIllFormedInput=-1 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
30 |
\par \tab \tab \}; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
31 |
\par \tab enum TEndianness |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
32 |
\par \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
33 |
\par \tab \tab ELittleEndian, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
34 |
\par \tab \tab EBigEndian |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
35 |
\par \tab \tab \}; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
36 |
\par \tab enum \{KStateDefault=0\}; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
37 |
\par \tab struct SCharacterSet |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
38 |
\par \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
39 |
\par \tab \tab inline TUint Identifier() const; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
40 |
\par \tab \tab inline TBool NameIsFileName() const; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
41 |
\par \tab \tab inline TPtrC Name() const; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
42 |
\par \tab private: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
43 |
\par \tab \tab // ... |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
44 |
\par \tab \tab \}; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
45 |
\par public: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
46 |
\par \tab IMPORT_C static CCnvCharacterSetConverter* NewL(); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
47 |
\par \tab IMPORT_C static CCnvCharacterSetConverter* NewLC(); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
48 |
\par \tab IMPORT_C virtual ~CCnvCharacterSetConverter(); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
49 |
\par \tab IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableL(RFs&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
50 |
\par \tab IMPORT_C static CArrayFix<SCharacterSet>* CreateArrayOfCharacterSetsAvailableLC(RFs&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
51 |
\par \tab IMPORT_C TUint ConvertStandardNameOfCharacterSetToIdentifierL(const TDesC8&, RFs&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
52 |
\par \tab IMPORT_C HBufC8* ConvertCharacterSetIdentifierToStandardNameL(TUint, RFs&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
53 |
\par \tab IMPORT_C TUint ConvertMibEnumOfCharacterSetToIdentifierL(TInt, RFs&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
54 |
\par \tab IMPORT_C TInt ConvertCharacterSetIdentifierToMibEnumL(TUint, RFs&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
55 |
\par \tab IMPORT_C void PrepareToConvertToOrFromL(TUint, const CArrayFix<SCharacterSet>&, RFs&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
56 |
\par \tab IMPORT_C TAvailability PrepareToConvertToOrFromL(TUint, RFs&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
57 |
\par \tab IMPORT_C void SetDefaultEndiannessOfForeignCharacters(TEndianness); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
58 |
\par \tab IMPORT_C void SetReplacementForUnconvertibleUnicodeCharactersL(const TDesC8&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
59 |
\par \tab IMPORT_C TInt ConvertFromUnicode(TDes8&, const TDesC16&) const; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
60 |
\par \tab IMPORT_C TInt ConvertFromUnicode(TDes8&, const TDesC16&, TInt&) const; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
61 |
\par \tab IMPORT_C TInt ConvertFromUnicode(TDes8&, const TDesC16&, TInt&, TInt&) const; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
62 |
\par \tab IMPORT_C TInt ConvertToUnicode(TDes16&, const TDesC8&, TInt&) const; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
63 |
\par \tab IMPORT_C TInt ConvertToUnicode(TDes16&, const TDesC8&, TInt&, TInt&) const; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
64 |
\par \tab IMPORT_C TInt ConvertToUnicode(TDes16&, const TDesC8&, TInt&, TInt&, TInt&) const; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
65 |
\par private: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
66 |
\par \tab // ... |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
67 |
\par \tab \}; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
68 |
\par }\pard\plain \s3\sb240\sa60\keepn\widctlpar \f5 Overview |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
69 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 There are two basic stages involved in using a {\f11 CCnvCharacterSetConverter} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
70 |
object to change the encoding of a piece of text from one character set to another. It is first necessary to select the foreign character set of interest. Then having done th |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
71 |
at, the actual conversion of text to or from that character set can be performed. Selecting the foreign character set is done by calling one of the overloads of {\f11 PrepareToConvertToOrFromL} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
72 |
. Text conversion itself is done by calling one of the overloads of {\f11 ConvertFromUnicode} or {\f11 ConvertToUnicode}. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
73 |
\par \pard\plain \s3\sb240\sa60\keepn\widctlpar \f5 Creating and destroying {\f11 CCnvCharacterSetConverter} objects |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
74 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 Objects of the {\f11 CCnvCharacterSetConverter} class are created by calling either of the static member functions {\f11 NewL} or {\f11 NewLC} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
75 |
, the latter leaving the object on the cleanup stack. These objects are created on the heap and are thus destroyed using the {\f11 delete} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
76 |
operator as normal. Creating one of these objects only involves some allocation on the heap and initialization of member data - it does not involve anything as onerous as scanning for files, or loading data from a file. Such \ldblquote heavyweight |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
77 |
\rdblquote activities are done at a later stage (see \ldblquote Selecting the foreign character set to convert to/from\rdblquote below). |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
78 |
\par \pard\plain \s3\sb240\sa60\keepn\widctlpar \f5 The member functions for doing the conversion |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
79 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 The most important member functions of the class are {\f11 ConvertFromUnicode} and {\f11 ConvertToUnicode} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
80 |
. These all convert the text passed in the second parameter and output the resulting text in the first parameter. Sixteen-bit descriptors are used to hold any text encoded in Unicode (UCS-2), and eight-bit descriptors are used to hold any text encoded in |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
81 |
any other character set. Foreign character sets (i.e. character sets other than Unicode) may use a single byte per character (e.g. Code Page 1252) or more than one byte per character (e.g. China\rquote |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
82 |
s GB 2312-80) or even a variable number of bytes per character (e.g. Japan\rquote s Shift-JIS) - hence the need to use eight-bit descriptors. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
83 |
\par The third parameter to the second two overloads of {\f11 ConvertFromUnicode} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
84 |
(which is purely an output parameter) tells the caller the number of characters in the input Unicode descriptor that could not be converted into the target character set because the target character set has no equivalents of those Unicode characters. Sim |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
85 |
ilarly, the fourth parameters to the second two overloads of {\f11 ConvertToUnicode} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
86 |
(which are also purely output parameters) tell the caller the number of characters in the input descriptor that could not be converted into Unicode (for the analogous reason). In neither {\f11 ConvertFromUnicode} or {\f11 ConvertToUnicode} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
87 |
is conversion aborted because of unconvertible characters. (What is written to the output descriptor for such unconvertible characters is discussed in a separate paragraph below.) The purpose of these parameters in the case of both {\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
88 |
ConvertFromUnicode} and {\f11 ConvertToUnicode} is to allow the caller to gauge a rough idea of the proportion of characters that are failing to be converted. (Recall that a character in the input descriptor to {\f11 ConvertToUnicode} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
89 |
does not necessarily occupy only a single byte of that descriptor. However, since Unicode is intended to cover all possible characters and thus be the superset of all character sets, this will rarely, if ever, report anything other than zero characters, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
90 |
although it will report the existence of unconvertible characters if the input descriptor contains illegal \ldblquote characters\rdblquote , i.e. values not actually in the foreign character set.) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
91 |
\par The last parameter to the last overloads of both {\f11 ConvertFromUnicode} and {\f11 ConvertToUnicode} (these are again purely output parameters) indicates the index of the first character (or in the case of {\f11 ConvertToUnicode} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
92 |
, the index of the first {\i byte} of the first character) in the input descriptor that could not be converted. If all the characters in the input descriptor could be converted, this parameter is set to a negative value. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
93 |
\par The {\f11 TInt} value returned by {\f11 ConvertFromUnicode} and {\f11 ConvertToUnicode} is either one of the negative error values defined in {\f11 CCnvCharacterSetConverter}\rquote s {\f11 enum TError}{\cs16\super \chftn {\footnote \pard\plain |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
94 |
\s15\qj\widctlpar \f4\fs20 {\cs16\super \chftn } Although there is currently only one error code in {\f11 CCnvCharacterSetConverter::TError}, this may be extended in the future, so CHARCONV clients should program defensively to allow for this.}} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
95 |
or the number of elements left at the end of the input descriptor which were not converted, ei |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
96 |
ther (i) because there was no room left in the output descriptor, or (ii) because the input descriptor ended with an incomplete sequence (e.g. half-way through a multi-byte character). |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
97 |
\par {\pntext\pard\plain\f1\fs20 \'b7\tab}\pard \qj\fi-283\li283\widctlpar{\*\pn \pnlvlblt\pnf1\pnstart1\pnindent283\pnhang{\pntxtb \'b7}} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
98 |
The ability to do partial conversions means that the CHARCONV client does not have to try to guess how big to make the output descriptor for a given input descriptor, they can simply do the conversion in a loop using a smallish output descriptor (an outpu |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
99 |
t descriptor whose {\f11 MaxLength()} is less than about 20 elements would not be recommended) - the code in the \ldblquote Sample code\rdblquote |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
100 |
section below gives a demonstration of how to do this. It is worth noting that even if the output descriptor was not big enough to hold the entire converted input text, it will not necessarily be full in the sense of have its {\f11 Length()} equal its { |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
101 |
\f11 MaxLength()}. The only sure way to find out whether the whole input descriptor has been converted is to test whether the return value (if not negative) is zero or not. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
102 |
\par {\pntext\pard\plain\f1\fs20 \'b7\tab}The ability to cope with an input descriptor be |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
103 |
ing truncated is useful if the client cannot guarantee whether the input descriptor will be complete, e.g. if they themselves are receiving it in chunks from an external source. All that the client needs to do to handle this situation is to add the specif |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
104 |
ied number of descriptor elements to the start of the input descriptor passed to the next call. However, there is a binary compatibility issue here. For those clients whose input descriptor is known to be complete, if they perform the sort of loop found i |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
105 |
n the first two examples in the \ldblquote Sample code\rdblquote |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
106 |
section, they would get into an infinite loop if the input descriptor was corrupt such that it ended with an incomplete sequence, as the conversion function would always return a value greater than zero. To avoi |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
107 |
d this problem arising, the behavior of the conversion functions is actually such that if none of the input descriptor can be consumed because it consists {\i exclusively} of an incomplete sequence, the error {\f11 EErrorIllFormedInput} (defined in {\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
108 |
CCnvCharacterSetConverter}\rquote s {\f11 enum TError}) will be returned. In order for this error not to be returned when the presence of an incomplete sequence at the end of the input descriptor does {\i not} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
109 |
mean it is corrupt, the input descriptor must be long enough such that it cannot consist exclusively of an incomplete sequence - an input descriptor of at least 20 elements should be plenty long enough. (The third example in the \ldblquote Sample code |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
110 |
\rdblquote section below gives an illustration of how to use CHARCONV to convert fragmented input.) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
111 |
\par \pard \qj\fi720\widctlpar The third parameter to {\f11 ConvertToUnicode} is used to save state information across multiple calls to {\f11 ConvertToUnicode} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
112 |
where each call starts off where the previous call reached in the input descriptor. At the start of each such series of calls to {\f11 ConvertToUnicode} this parameter must be set to {\f11 CCnvCharacterSetConverter::KStateDefault} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
113 |
and thereafter not tampered with, but simply passed in to each {\f11 ConvertToUnicode} call. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
114 |
\par \pard\plain \s3\sb240\sa60\keepn\widctlpar \f5 Selecting the foreign character set to convert to/from |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
115 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 When the {\f11 CCnvCharacterSetConverter} object has been created, before either {\f11 ConvertFromUnicode} or {\f11 ConvertToUnicode} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
116 |
can be called, it is necessary to state which foreign character set is to be converted to/from. This is done by calling either of the two overloads of {\f11 PrepareToConvertToOrFromL} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
117 |
. Each takes the identifier (a Symbian-defined UID) of the required character set as it\rquote s first parameter, and a file-server session object as it\rquote s last parameter. The first overload requires the array returned by either of {\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
118 |
CCnvCharacterSetConverter}\rquote s static member functions {\f11 CreateArrayOfCharacterSetsAvailableL} or {\f11 CreateArrayOfCharacterSetsAvailableLC} to be passed in as its second parameter. This array contains one {\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
119 |
CCnvCharacterSetConverter::SCharacterSet} item for each foreign character set that is available (at run-time) for text conversion. The {\f11 Identifier} member function of {\f11 SCharacterSet} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
120 |
returns the Symbian-UID identifier of the character set represented by that object. (See below for a discussion of {\f11 SCharacterSet}\rquote s other member functions.) Creating this array is not required for calling the second overload. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
121 |
\par If the first overload of {\f11 PrepareToConvertToOrFromL} is used, the character-set identifier passed into the first parameter must correspond to an item in the array passed to the second parameter (i.e. that item\rquote s {\f11 Identifier} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
122 |
member function returns the same number), or a panic will occur. The second overload, however, is less demanding - the character set identified in the first parameter does not have to be availabl |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
123 |
e at run-time. Depending on whether the character set is available or not, it returns {\f11 EAvailable} or {\f11 ENotAvailable} (from {\f11 CCnvCharacterSetConverter}\rquote s {\f11 enum TAvailability}). Calls to {\f11 ConvertFromUnicode} or {\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
124 |
ConvertToUnicode} convert to or from the character set whose identifier was passed into the last {\f11 PrepareToConvertToOrFromL} call which did not return {\f11 ENotAvailable}. If all calls previously returned {\f11 ENotAvailable}, a panic will occur. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
125 |
|
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
126 |
\par The two overloads of {\f11 PrepareToConvertToOrFromL} are designed for different usage. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
127 |
In a situation where it may be necessary to present the user with a list of all the possible character sets for them to choose which to convert to/from, then the first overload would need to be used. However, if the CHARCONV client wishes to convert the c |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
128 |
ontent of, say, an email message, or an HTML document, i.e. something that describes within itself what character set its content is in, then the second overload would be more appropriate. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
129 |
\par \pard\plain \s3\sb240\sa60\keepn\widctlpar \f5 Efficiency considerations |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
130 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 The first overload of {\f11 PrepareToConvertToOrFromL} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
131 |
should be used in preference to the second overload if a number of different character sets are to be converted to/from in a short space of time. The reason for this is that the second overload, unlike the first, internally has to do a file scan{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
132 |
\cs16\super \chftn {\footnote \pard\plain \s15\qj\widctlpar \f4\fs20 {\cs16\super \chftn } The second overload of {\f11 PrepareToConvertToOrFromL} is fast to call for if the required conversion data/code is hard-coded within CHARCONV ( |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
133 |
this currently applies to Code Page 1252, ISO 8859-1, ASCII and SMS 7-bit, and also the Unicode transformation formats UTF-7 and UTF-8). For other character sets, file scanning is necessary.}} which is similar to the one performed by {\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
134 |
CreateArrayOfCharacterSetsAvailableL} and {\f11 CreateArrayOfCharacterSetsAvailableLC} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
135 |
. Thus it is more efficient to do the file-scanning once to start with by calling one of these array-creating functions, and then pass that array in to the first overload of {\f11 PrepareToConvertToOrFromL} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
136 |
each time the target/source character set is to be changed (first checking to see if each required character set is in the array, to avoid the panic that occurs if it is not). However, if the different calls to {\f11 PrepareToConvertToOrFromL} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
137 |
are spread out over a {\i considerable} time, it may be best to use the second overload for this as it will perform the necessary re-scanning of files each time and thus pick up any new files that have been installed in the mean time. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
138 |
\par \pard\plain \s3\sb240\sa60\keepn\widctlpar \f5 {\f11 SCharacterSet} and the names of foreign character sets |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
139 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 {\f11 SCharacterSet}\rquote s member function {\f11 Name} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
140 |
returns the full path and file name of the conversion-data file for that character set, although for those character sets for which CHARCONV hard codes conversion data (rather than getting it from a file) {\f11 Name} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
141 |
returns an arbitrary name for that character set. {\i Note that the name returned cannot be treated as an Internet-standard name. Converting between the Internet-standard names of character sets and the Symbian-UID identifiers that }{\i\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
142 |
PrepareToConvertToOrFromL}{\i expects is provided by the member functions }{\i\f11 ConvertStandardNameOfCharacterSetToIdentifierL}{\i and }{\i\f11 ConvertCharacterSetIdentifierToStandardNameL}{\i (see \ldblquote Other member functions\rdblquote |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
143 |
below).} Also, the name returned is locale-independent and therefore should be hidden from the user{\cs16\super \chftn {\footnote \pard\plain \s15\qj\widctlpar \f4\fs20 {\cs16\super \chftn } |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
144 |
If a character-set name needs to be shown to the user (and therefore is locale-dependent), that name should to be mapped to the locale-independent name returned by {\f11 Name} by some software at a higher level than CHARCONV.}}. {\f11 NameIsFileName} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
145 |
returns whether {\f11 Name} returns a file name or just an arbitrary name. (This may prove useful, for example, to find out whether F32\rquote s file-name parsing class {\f11 TParse} can be used on the return value of {\f11 Name}.) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
146 |
\par \pard\plain \s3\sb240\sa60\keepn\widctlpar \f5 Other member functions |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
147 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 For character sets using more than one byte per character, the \ldblquote endian\rdblquote -ness of those characters may be defined or left open{\cs16\super \chftn {\footnote \pard\plain \s15\qj\widctlpar \f4\fs20 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
148 |
{\cs16\super \chftn } As an example of each, China\rquote s GB2312-80 defines the byte order of its two-byte characters, whereas Unicode leaves the byte order of its two-byte characters undefined.}}. Although none of the foreign character s |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
149 |
ets currently supported by CHARCONV are of undefined endianness, the member function {\f11 SetDefaultEndiannessOfForeignCharacters} which takes either {\f11 ELittleEndian} or {\f11 EBigEndian} (from {\f11 CCnvCharacterSetConverter}\rquote s {\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
150 |
enum TEndianness}) allows the CHARCONV client to set the endianness that {\f11 ConvertFromUnicode} and {\f11 ConvertToUnicode} use when converting to or from foreign character sets of undefined endianness. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
151 |
\par When converting to Unicode, each character in the input descriptor for which there is no equivalent in Unicode is c |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
152 |
onverted to the Unicode character specifically designated for that purpose - 0xFFFD. (As already noted, this should rarely, if ever, be the case, unless the input is corrupt.) When converting the other way, each foreign character set provides a default ch |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
153 |
aracter to be used for each Unicode character lacking an equivalent in that character set. This default may be overridden by calling the member function {\f11 SetReplacementForUnconvertibleUnicodeCharactersL}. It should be noted that {\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
154 |
PrepareToConvertToOrFromL} undoes the effect of any previous {\f11 SetReplacementForUnconvertibleUnicodeCharactersL} call, and thus for the latter to have any effect, it should be called between the {\f11 PrepareToConvertToOrFromL} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
155 |
call and the subsequent {\f11 ConvertFromUnicode} call(s). The parameter to {\f11 SetReplacementForUnconvertibleUnicodeCharactersL} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
156 |
should be passed a single, possibly multi-byte, character, whose byte order (if multi-byte) should be little-endian if the endianness of the character set is undefined, otherwise the same endianness as is defined by the character set. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
157 |
\par {\f11 ConvertStandardNameOfCharacterSetToIdentifierL} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
158 |
is provided for the CHARCONV client to be able to find out the Symbian UID of a character set for a given Internet-standard name of that character set. It returns zero if the name of the character set was not known. This function |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
159 |
can be called at any time in the {\f11 CCnvCharacterSetConverter} object\rquote s lifetime. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
160 |
\par {\f11 ConvertCharacterSetIdentifierToStandardNameL} is provided for the CHARCONV client to be able to find out an Internet-standard name of a character set (which is a \ldblquote preferred MIME name\rdblquote |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
161 |
where possible) for the Symbian UID of that character set. It returns {\f11 NULL} if the UID of the character set was not known. This function can be called at any time in the {\f11 CCnvCharacterSetConverter} object\rquote s lifetime. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
162 |
\par {\f11 ConvertMibEnumOfCharacterSetToIdentifierL} is provided for the CHARCONV client to be able to find out the Symbian UID of a character set for a given MIB-enum{\cs16\super \chftn {\footnote \pard\plain \s15\qj\widctlpar \f4\fs20 {\cs16\super \chftn } |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
163 |
MIB enum values are defined in {\i ftp.isi.edu/in-notes/iana/assignments/character-sets}.}} of that character set. It returns zero if the name of the character set was not known. This function can be called at any time in the {\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
164 |
CCnvCharacterSetConverter} object\rquote s lifetime. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
165 |
\par {\f11 ConvertCharacterSetIdentifierToMibEnumL} is provided for the CHARCONV client to be able to find out a MIB-enum of a character |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
166 |
set for the Symbian UID of that character set. It returns zero if the UID of the character set was not known. This function can be called at any time in the {\f11 CCnvCharacterSetConverter} object\rquote s lifetime. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
167 |
\par The four functions described above all behave as follows. The first time any if them is called with a first parameter that is not in {\f11 CCnvCharacterSetConverter}\rquote |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
168 |
s internal hard-coded list, a file scan is done to try to find out the required information from a file. The results of this are stored in the {\f11 CCnvCharacterSetConverter} object, so that no subsequent call to {\i any} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
169 |
of these four functions will repeat the file scanning. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
170 |
\par \pard\plain \s3\sb240\sa60\keepn\widctlpar \f5 Sample code |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
171 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 The sample code below illustrates typical usage of {\f11 CCnvCharacterSetConverter}. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
172 |
\par \pard \qj\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
173 |
\par An example of how to convert, in small chunks, Unicode text to another character set: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
174 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
175 |
\par \pard \widctlpar {\f11\fs14 LOCAL_C void ConvertUnicodeTextL(CCnvCharacterSetConverter& aCharacterSetConverter, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
176 |
\par \tab \tab \tab \tab \tab \tab \tab \tab RFs& aFileServerSession, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
177 |
\par \tab \tab \tab \tab \tab \tab \tab \tab TUint aForeignCharacterSet, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
178 |
\par \tab \tab \tab \tab \tab \tab \tab \tab const TDesC16& aUnicodeText) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
179 |
\par \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
180 |
\par \tab if (aCharacterSetConverter.PrepareToConvertToOrFromL(aForeignCharacterSet, aFileServerSession) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
181 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab !=CCnvCharacterSetConverter::EAvailable) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
182 |
\par \tab \tab User::Leave(KErrNotSupported); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
183 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
184 |
\par \tab TBuf8<20> outputBuffer; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
185 |
\par \tab TPtrC16 remainderOfUnicodeText(aUnicodeText); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
186 |
\par \tab FOREVER |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
187 |
\par \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
188 |
\par \tab \tab const TInt returnValue=aCharacterSetConverter.ConvertFromUnicode(outputBuffer, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
189 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab remainderOfUnicodeText); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
190 |
\par \tab \tab if (returnValue==CCnvCharacterSetConverter::EErrorIllFormedInput) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
191 |
\par \tab \tab \tab User::Leave(KErrCorrupt); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
192 |
\par \tab \tab else if (returnValue<0) // future-proof against "TError" expanding |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
193 |
\par \tab \tab \tab User::Leave(KErrGeneral); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
194 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
195 |
\par \tab \tab // ? - do something here with outputBuffer |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
196 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
197 |
\par \tab \tab if (returnValue==0) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
198 |
\par \tab \tab \tab break; // all of aUnicodeText has been converted and handled |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
199 |
\par \tab \tab remainderOfUnicodeText.Set(remainderOfUnicodeText.Right(returnValue)); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
200 |
\par \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
201 |
\par \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
202 |
\par }\pard \qj\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
203 |
\par An example of how to convert, in small chunks, text in another character set to Unicode: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
204 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
205 |
\par \pard \widctlpar {\f11\fs14 LOCAL_C void ConvertForeignTextL(CCnvCharacterSetConverter& aCharacterSetConverter, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
206 |
\par \tab \tab \tab \tab \tab \tab \tab \tab RFs& aFileServerSession, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
207 |
\par \tab \tab \tab \tab \tab \tab \tab \tab TUint aForeignCharacterSet, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
208 |
\par \tab \tab \tab \tab \tab \tab \tab \tab const TDesC8& aForeignText) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
209 |
\par \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
210 |
\par \tab if (aCharacterSetConverter.PrepareToConvertToOrFromL(aForeignCharacterSet, aFileServerSession) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
211 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab !=CCnvCharacterSetConverter::EAvailable) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
212 |
\par \tab \tab User::Leave(KErrNotSupported); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
213 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
214 |
\par \tab TBuf16<20> outputBuffer; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
215 |
\par \tab TPtrC8 remainderOfForeignText(aForeignText); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
216 |
\par \tab // "state" must be initialized to CCnvCharacterSetConverter::KStateDefault - it must not be |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
217 |
\par \tab // tampered with thereafter but simply passed into each call of "ConvertToUnicode" |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
218 |
\par \tab TInt state=CCnvCharacterSetConverter::KStateDefault; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
219 |
\par \tab FOREVER |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
220 |
\par \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
221 |
\par \tab \tab const TInt returnValue=aCharacterSetConverter.ConvertToUnicode(outputBuffer, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
222 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab remainderOfForeignText, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
223 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab state); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
224 |
\par \tab \tab if (returnValue==CCnvCharacterSetConverter::EErrorIllFormedInput) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
225 |
\par \tab \tab \tab User::Leave(KErrCorrupt); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
226 |
\par \tab \tab else if (returnValue<0) // future-proof against "TError" expanding |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
227 |
\par \tab \tab \tab User::Leave(KErrGeneral); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
228 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
229 |
\par \tab \tab // ? - do something here with outputBuffer |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
230 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
231 |
\par \tab \tab if (returnValue==0) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
232 |
\par \tab \tab \tab break; // all of aForeignText has been converted and handled |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
233 |
\par \tab \tab remainderOfForeignText.Set(remainderOfForeignText.Right(returnValue)); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
234 |
\par \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
235 |
\par \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
236 |
\par }\pard \qj\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
237 |
\par An example of how to convert, in small chunks, {\i fragmented} text in another character set to Unicode: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
238 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
239 |
\par \pard \widctlpar {\f11\fs14 LOCAL_C void ConvertForeignTextL(CCnvCharacterSetConverter& aCharacterSetConverter, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
240 |
\par \tab \tab \tab \tab \tab \tab \tab \tab RFs& aFileServerSession, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
241 |
\par \tab \tab \tab \tab \tab \tab \tab \tab TUint aForeignCharacterSet) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
242 |
\par \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
243 |
\par \tab if (aCharacterSetConverter.PrepareToConvertToOrFromL(aForeignCharacterSet, aFileServerSession) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
244 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab !=CCnvCharacterSetConverter::EAvailable) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
245 |
\par \tab \tab User::Leave(KErrNotSupported); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
246 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
247 |
\par \tab const TInt KMaximumLengthOfBufferForForeignText=200; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
248 |
\par \tab TUint8 bufferForForeignText[KMaximumLengthOfBufferForForeignText]; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
249 |
\par \tab TInt lengthOfBufferForForeignText=0; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
250 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
251 |
\par \tab FOREVER |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
252 |
\par \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
253 |
\par \tab \tab TPtr8 nextChunkOfForeignText(bufferForForeignText+lengthOfBufferForForeignText, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
254 |
\par \tab \tab \tab \tab \tab \tab \tab KMaximumLengthOfBufferForForeignText-lengthOfBufferForForeignText); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
255 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
256 |
\par \tab \tab // ? load nextChunkOfForeignText here |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
257 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
258 |
\par \tab \tab const TInt lengthOfNextChunkOfForeignText=nextChunkOfForeignText.Length(); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
259 |
\par \tab \tab lengthOfBufferForForeignText+=lengthOfNextChunkOfForeignText; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
260 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
261 |
\par \tab \tab const TBool isLastChunkOfForeignText= // ? find out from the source of the foreign text |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
262 |
\par \tab \tab // whether this is the last chunk and set this variable accordingly, e.g. the source may |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
263 |
\par \tab \tab // define that the last chunk is of length zero, in which case the expression |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
264 |
\par \tab \tab // "(lengthOfNextChunkOfForeignText==0)" would be assigned to this variable; note that |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
265 |
\par \tab \tab // even if the length of this chunk is zero, we can't just exit this function here as |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
266 |
\par \tab \tab // bufferForForeignText may not be empty (i.e. lengthOfBufferForForeignText>0) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
267 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
268 |
\par \tab \tab TBuf16<20> outputBuffer; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
269 |
\par \tab \tab TPtrC8 remainderOfForeignText(bufferForForeignText, lengthOfBufferForForeignText); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
270 |
\par \tab \tab // "state" must be initialized to CCnvCharacterSetConverter::KStateDefault - it must not |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
271 |
\par \tab \tab // be tampered with thereafter but simply passed into each call of "ConvertToUnicode" |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
272 |
\par \tab \tab TInt state=CCnvCharacterSetConverter::KStateDefault; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
273 |
\par \tab \tab FOREVER |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
274 |
\par \tab \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
275 |
\par \tab \tab \tab const TInt lengthOfRemainderOfForeignText=remainderOfForeignText.Length(); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
276 |
\par \tab \tab \tab if (isLastChunkOfForeignText) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
277 |
\par \tab \tab \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
278 |
\par \tab \tab \tab \tab if (lengthOfRemainderOfForeignText==0) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
279 |
\par \tab \tab \tab \tab \tab return; // the single point of exit of this function |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
280 |
\par \tab \tab \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
281 |
\par \tab \tab \tab else |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
282 |
\par \tab \tab \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
283 |
\par \tab \tab \tab \tab // As this isn't the last chunk, we don't want ConvertToUnicode to return |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
284 |
\par \tab \tab \tab \tab // CCnvCharacterSetConverter::EErrorIllFormedInput if the input descriptor ends |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
285 |
\par \tab \tab \tab \tab // with an incomplete sequence - but it will only do this if *none* of the input |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
286 |
\par \tab \tab \tab \tab // descriptor can be consumed. Therefore if the input descriptor is long enough |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
287 |
\par \tab \tab \tab \tab // (20 elements or longer is plenty adequate) there is no danger of this error |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
288 |
\par \tab \tab \tab \tab // being returned for this reason. If it's shorter than that, we'll simply put it |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
289 |
\par \tab \tab \tab \tab // at the start of the buffer so that it gets converted with the next chunk. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
290 |
\par \tab \tab \tab \tab if (lengthOfRemainderOfForeignText<20) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
291 |
\par \tab \tab \tab \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
292 |
\par \tab \tab \tab \tab \tab // put any remaining foreign text at the start of bufferForForeignText |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
293 |
\par \tab \tab \tab \tab \tab lengthOfBufferForForeignText=lengthOfRemainderOfForeignText; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
294 |
\par \tab \tab \tab \tab \tab Mem::Copy(bufferForForeignText, remainderOfForeignText.Ptr(), |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
295 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab lengthOfBufferForForeignText); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
296 |
\par \tab \tab \tab \tab \tab break; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
297 |
\par \tab \tab \tab \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
298 |
\par \tab \tab \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
299 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
300 |
\par \tab \tab \tab const TInt returnValue=aCharacterSetConverter.ConvertToUnicode(outputBuffer, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
301 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab remainderOfForeignText, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
302 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab state); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
303 |
\par \tab \tab \tab if (returnValue==CCnvCharacterSetConverter::EErrorIllFormedInput) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
304 |
\par \tab \tab \tab \tab User::Leave(KErrCorrupt); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
305 |
\par \tab \tab \tab else if (returnValue<0) // future-proof against "TError" expanding |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
306 |
\par \tab \tab \tab \tab User::Leave(KErrGeneral); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
307 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
308 |
\par \tab \tab \tab // ? - do something here with outputBuffer |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
309 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
310 |
\par \tab \tab \tab remainderOfForeignText.Set(remainderOfForeignText.Right(returnValue)); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
311 |
\par \tab \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
312 |
\par \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
313 |
\par \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
314 |
\par }\pard\plain \s2\sb240\sa60\keepn\widctlpar \b\i\f5 Converting text between UCS-2 and Unicode\rquote s transformation formats |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
315 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 The class {\f11 CnvUtfConverter} is provided for converting text between UCS-2 and the two Unicode transformation formats UTF-7 and UTF-8. (For the definition of these formats, see {\i The Unicode Standard} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
316 |
published by The Unicode Consortium.) The public parts of the class are shown below: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
317 |
\par \pard \qj\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
318 |
\par \pard \widctlpar {\f11\fs14 class CnvUtfConverter |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
319 |
\par \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
320 |
\par public: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
321 |
\par \tab enum TError |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
322 |
\par \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
323 |
\par \tab \tab EErrorIllFormedInput=-1 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
324 |
\par \tab \tab \}; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
325 |
\par \tab enum \{KStateDefault=0\}; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
326 |
\par public: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
327 |
\par \tab IMPORT_C static TInt ConvertFromUnicodeToUtf7(TDes8&, const TDesC16&, TBool); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
328 |
\par \tab IMPORT_C static TInt ConvertFromUnicodeToUtf8(TDes8&, const TDesC16&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
329 |
\par \tab IMPORT_C static TInt ConvertToUnicodeFromUtf7(TDes16&, const TDesC8&, TInt&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
330 |
\par \tab IMPORT_C static TInt ConvertToUnicodeFromUtf8(TDes16&, const TDesC8&); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
331 |
\par private: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
332 |
\par \tab // ... |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
333 |
\par \tab \}; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
334 |
\par }\pard \qj\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
335 |
\par \pard \qj\fi720\widctlpar In fact the functionality of this class is also available via the {\f11 CCnvCharacterSetConverter} class by passing it the UID for UTF-7 or UTF-8. However, {\f11 CnvUtfConverter} is provided for ease of use as it |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
336 |
requires no instantiation (all the member functions are static) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
337 |
. The four functions all convert the text passed in the second parameter and output the resulting text in the first parameter. Sixteen-bit descriptors are used to hold text encoded in UCS-2 (i.e. n |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
338 |
ormal 2-byte Unicode), and eight-bit descriptors are used to hold text encoded either of the transformation formats. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
339 |
\par UTF-7 divides the Unicode character set into 3 subsets: set D which is the set of characters that are encoded directly (i.e. unaltered), set B which is the set of characters that are encoded in modified \ldblquote base 64\rdblquote |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
340 |
, and set O which is the set of characters that can either be encoded directly, or in modified \ldblquote base 64\rdblquote . The {\f11 TBool} parameter to {\f11 ConvertFromUnicodeToUtf7} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
341 |
controls the way that the characters in set O are encoded: passing in {\f11 ETrue} causes them to be encoded in \ldblquote base 64\rdblquote (which is the \ldblquote safer\rdblquote option), and {\f11 EFalse} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
342 |
causes them to encoded directly. The contents of set O are listed in their entirety below. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
343 |
\par \pard \qj\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
344 |
\par \pard \qc\widctlpar {\b\f56 !"#$%&*;<=>@[]^_`\{|\}}{\f56 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
345 |
\par }\pard \qj\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
346 |
\par \pard \qj\fi720\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
347 |
The variant of UTF-8 used internally by Java differs from standard UTF-8 in two ways. First, the specific case of the NULL character (0x0000) is encoded in the two-byte format, and second, only the one-, two- and three-byte formats are used |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
348 |
(i.e. not the four-byte format which is normally used for Unicode surrogate-pairs). Generating this variant of UTF-8 is not supported via {\f11 CnvUtfConverter}; however, it is possible by using {\f11 CCnvCharacterSetConverter} with a UID of {\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
349 |
KCharacterSetIdentifierJavaConformantUtf8}. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
350 |
\par The {\f11 TInt} value returned by all four member functions is either one of the negative error values defined in {\f11 CnvUtfConverter}\rquote s {\f11 enum TError}{\cs16\super \chftn {\footnote \pard\plain \s15\qj\widctlpar \f4\fs20 {\cs16\super \chftn } |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
351 |
Although there is currently only one error code in {\f11 CnvUtfConverter::TError}, this may be extended in the future, so CHARCONV clients should program defensively to allow for this.}} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
352 |
or the number of elements left at the end of the input descriptor which were not converted, either (i) because there was no room left in the output descriptor, or (ii) because the input descriptor ended with an incomplete sequence (e.g. half-way through |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
353 |
a multi-byte character). |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
354 |
\par {\pntext\pard\plain\f1\fs20 \'b7\tab}\pard \qj\fi-283\li283\widctlpar{\*\pn \pnlvlblt\pnf1\pnstart1\pnindent283\pnhang{\pntxtb \'b7}} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
355 |
The ability to do partial conversions means that the CHARCONV client does not have to try to guess how big to make the output descriptor for a given inp |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
356 |
ut descriptor, they can simply do the conversion in a loop using a smallish output descriptor (an output descriptor whose {\f11 MaxLength()} is less than about 20 elements would not be recommended) - the code in the \ldblquote Sample code\rdblquote |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
357 |
section below gives a demonstration of how to do this. It is worth noting that even if the output descriptor was not big enough to hold the entire converted input text, it will not necessarily be full in the sense of have its {\f11 Length()} equal its { |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
358 |
\f11 MaxLength()}. The only sure way to find out whether the whole input descriptor has been converted is to test whether the return value (if not negative) is zero or not. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
359 |
\par {\pntext\pard\plain\f1\fs20 \'b7\tab} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
360 |
The ability to cope with an input descriptor being truncated is useful if the client cannot guarantee whether the input descriptor will be complete, e.g. if they themselves are receiving it in chunks from an external source. All that the client needs to d |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
361 |
o to handle this situation is to add the specified number of descriptor elements to the start of the input descriptor passed to the |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
362 |
next call. However, there is a binary compatibility issue here. For those clients whose input descriptor is known to be complete, if they perform the sort of loop found in the first two examples in the \ldblquote Sample code\rdblquote |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
363 |
section, they would get into an infinit |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
364 |
e loop if the input descriptor was corrupt such that it ended with an incomplete sequence, as the conversion function would always return a value greater than zero. To avoid this problem arising, the behavior of the conversion functions is actually such t |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
365 |
hat if none of the input descriptor can be consumed because it consists {\i exclusively} of an incomplete sequence, the error {\f11 EErrorIllFormedInput} (defined in {\f11 CCnvCharacterSetConverter}\rquote s {\f11 enum TError} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
366 |
) will be returned. In order for this error not to be returned when the presence of an incomplete sequence at the end of the input descriptor does {\i not} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
367 |
mean it is corrupt, the input descriptor must be long enough such that it cannot consist exclusively of an incomplete sequence - an input descriptor of at least 20 elements should be plenty long enough. (The third example in the \ldblquote Sample code |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
368 |
\rdblquote section below gives an illustration of how to use CHARCONV to convert fragmented input.) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
369 |
\par \pard \qj\fi720\widctlpar The third parameter to {\f11 ConvertToUnicodeFromUtf7} is used to save state information across multiple calls to {\f11 ConvertToUnicodeFromUtf7} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
370 |
where each call starts off where the previous call reached in the input descriptor. At the start of each such series of calls to {\f11 ConvertToUnicodeFromUtf7}, this parameter must be set to {\f11 CnvUtfConverter::KStateDefault} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
371 |
and thereafter not tampered with, but simply passed in to each {\f11 ConvertToUnicode} call. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
372 |
\par \pard\plain \s3\sb240\sa60\keepn\widctlpar \f5 Sample Code |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
373 |
\par \pard\plain \qj\fi720\widctlpar \f4\fs20 The sample code below illustrates typical usage of {\f11 CnvUtfConverter} to convert to and from UTF-7. (Converting to and from UTF-8 is almost identical, the only differences being that {\f11 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
374 |
ConvertFromUnicodeToUtf8} and {\f11 ConvertToUnicodeFromUtf8} each only take two parameters.) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
375 |
\par \pard \qj\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
376 |
\par An example of how to convert, in small chunks, Unicode (UCS-2) to UTF-7: |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
377 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
378 |
\par \pard \widctlpar {\f11\fs14 LOCAL_C void EncodeL(const TDesC16& aUnicodeText) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
379 |
\par \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
380 |
\par \tab TBuf8<20> outputBuffer; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
381 |
\par \tab TPtrC16 remainderOfUnicodeText(aUnicodeText); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
382 |
\par \tab FOREVER |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
383 |
\par \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
384 |
\par \tab \tab const TInt returnValue=CnvUtfConverter::ConvertFromUnicodeToUtf7(outputBuffer, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
385 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab remainderOfUnicodeText, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
386 |
\par }\pard \fi340\li5780\widctlpar {\f11\fs14 ETrue); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
387 |
\par }\pard \widctlpar {\f11\fs14 \tab \tab if (returnValue==CnvUtfConverter::EErrorIllFormedInput) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
388 |
\par \tab \tab \tab User::Leave(KErrCorrupt); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
389 |
\par \tab \tab else if (returnValue<0) // future-proof against "TError" expanding |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
390 |
\par \tab \tab \tab User::Leave(KErrGeneral); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
391 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
392 |
\par \tab \tab // ? - do something here with outputBuffer |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
393 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
394 |
\par \tab \tab if (returnValue==0) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
395 |
\par \tab \tab \tab break; // all of aUnicodeText has been converted and handled |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
396 |
\par \tab \tab remainderOfUnicodeText.Set(remainderOfUnicodeText.Right(returnValue)); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
397 |
\par \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
398 |
\par \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
399 |
\par }\pard \qj\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
400 |
\par An example of how to convert, in small chunks, UTF-7 to Unicode (UCS-2): |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
401 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
402 |
\par \pard \widctlpar {\f11\fs14 LOCAL_C void DecodeL(const TDesC8& aUtf7) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
403 |
\par \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
404 |
\par \tab TBuf16<20> outputBuffer; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
405 |
\par \tab TPtrC8 remainderOfUtf7(aUtf7); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
406 |
\par \tab // "state" must be initialized to CnvUtfConverter::KStateDefault - it must not be tampered |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
407 |
\par }\pard \fi340\widctlpar {\f11\fs14 // with thereafter but simply passed into each call of "ConvertToUnicodeFromUtf7" |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
408 |
\par }\pard \widctlpar {\f11\fs14 \tab TInt state=CnvUtfConverter::KStateDefault; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
409 |
\par \tab FOREVER |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
410 |
\par \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
411 |
\par \tab \tab const TInt returnValue=CnvUtfConverter::ConvertToUnicodeFromUtf7(outputBuffer, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
412 |
\par }\pard \fi340\li5780\widctlpar {\f11\fs14 remainderOfUtf7, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
413 |
\par state); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
414 |
\par }\pard \widctlpar {\f11\fs14 \tab \tab if (returnValue==CnvUtfConverter::EErrorIllFormedInput) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
415 |
\par \tab \tab \tab User::Leave(KErrCorrupt); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
416 |
\par \tab \tab else if (returnValue<0) // future-proof against "TError" expanding |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
417 |
\par \tab \tab \tab User::Leave(KErrGeneral); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
418 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
419 |
\par \tab \tab // ? - do something here with outputBuffer |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
420 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
421 |
\par \tab \tab if (returnValue==0) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
422 |
\par \tab \tab \tab break; // all of aUtf7 has been converted and handled |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
423 |
\par \tab \tab remainderOfUtf7.Set(remainderOfUtf7.Right(returnValue)); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
424 |
\par \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
425 |
\par \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
426 |
\par }\pard \qj\widctlpar |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
427 |
\par An example of how to convert, in small chunks, {\i fragmented} UTF-7 to Unicode (UCS-2): |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
428 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
429 |
\par \pard \widctlpar {\f11\fs14 LOCAL_C void DecodeL() |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
430 |
\par \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
431 |
\par \tab const TInt KMaximumLengthOfBufferForUtf7=200; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
432 |
\par \tab TUint8 bufferForUtf7[KMaximumLengthOfBufferForUtf7]; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
433 |
\par \tab TInt lengthOfBufferForUtf7=0; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
434 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
435 |
\par \tab FOREVER |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
436 |
\par \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
437 |
\par \tab \tab TPtr8 nextChunkOfUtf7(bufferForUtf7+lengthOfBufferForUtf7, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
438 |
\par \tab \tab \tab \tab \tab \tab \tab KMaximumLengthOfBufferForUtf7-lengthOfBufferForUtf7); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
439 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
440 |
\par \tab \tab // ? load nextChunkOfUtf7 here |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
441 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
442 |
\par \tab \tab const TInt lengthOfNextChunkOfUtf7=nextChunkOfUtf7.Length(); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
443 |
\par \tab \tab lengthOfBufferForUtf7+=lengthOfNextChunkOfUtf7; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
444 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
445 |
\par \tab \tab const TBool isLastChunkOfUtf7= // ? find out from the source of the UTF-7 whether this is |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
446 |
\par \tab \tab // the last chunk and set this variable accordingly, e.g. the source may define that the |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
447 |
\par \tab \tab // last chunk is of length zero, in which case the expression |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
448 |
\par \tab \tab // "(lengthOfNextChunkOfUtf7==0)" would be assigned to this variable; note that even if |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
449 |
\par \tab \tab // the length of this chunk is zero, we can't just exit this function here as |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
450 |
\par \tab \tab // bufferForUtf7 may not be empty (i.e. lengthOfBufferForUtf7>0) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
451 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
452 |
\par \tab \tab TBuf16<20> outputBuffer; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
453 |
\par \tab \tab TPtrC8 remainderOfUtf7(bufferForUtf7, lengthOfBufferForUtf7); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
454 |
\par \tab \tab // "state" must be initialized to CnvUtfConverter::KStateDefault - it must not be tampered |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
455 |
\par \tab \tab // with thereafter but simply passed into each call of "ConvertToUnicodeFromUtf7" |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
456 |
\par \tab \tab TInt state=CnvUtfConverter::KStateDefault; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
457 |
\par \tab \tab FOREVER |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
458 |
\par \tab \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
459 |
\par \tab \tab \tab const TInt lengthOfRemainderOfUtf7=remainderOfUtf7.Length(); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
460 |
\par \tab \tab \tab if (isLastChunkOfUtf7) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
461 |
\par \tab \tab \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
462 |
\par \tab \tab \tab \tab if (lengthOfRemainderOfUtf7==0) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
463 |
\par \tab \tab \tab \tab \tab return; // the single point of exit of this function |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
464 |
\par \tab \tab \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
465 |
\par \tab \tab \tab else |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
466 |
\par \tab \tab \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
467 |
\par \tab \tab \tab \tab // As this isn't the last chunk, we don't want ConvertToUnicodeFromUtf7 to return |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
468 |
\par \tab \tab \tab \tab // CnvUtfConverter::EErrorIllFormedInput if the input descriptor ends with an |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
469 |
\par \tab \tab \tab \tab // incomplete sequence - but it will only do this if *none* of the input |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
470 |
\par \tab \tab \tab \tab // descriptor can be consumed. Therefore if the input descriptor is long enough |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
471 |
\par \tab \tab \tab \tab // (20 elements or longer is plenty adequate) there is no danger of this error |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
472 |
\par \tab \tab \tab \tab // being returned for this reason. If it's shorter than that, we'll simply put it |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
473 |
\par \tab \tab \tab \tab // at the start of the buffer so that it gets converted with the next chunk. |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
474 |
\par \tab \tab \tab \tab if (lengthOfRemainderOfUtf7<20) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
475 |
\par \tab \tab \tab \tab \tab \{ |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
476 |
\par \tab \tab \tab \tab \tab // put any remaining UTF-7 at the start of bufferForUtf7 |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
477 |
\par \tab \tab \tab \tab \tab lengthOfBufferForUtf7=lengthOfRemainderOfUtf7; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
478 |
\par \tab \tab \tab \tab \tab Mem::Copy(bufferForUtf7, remainderOfUtf7.Ptr(), lengthOfBufferForUtf7); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
479 |
\par \tab \tab \tab \tab \tab break; |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
480 |
\par \tab \tab \tab \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
481 |
\par \tab \tab \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
482 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
483 |
\par \tab \tab \tab const TInt returnValue=CnvUtfConverter::ConvertToUnicodeFromUtf7(outputBuffer, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
484 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab remainderOfUtf7, |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
485 |
\par \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab \tab state); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
486 |
\par \tab \tab \tab if (returnValue==CnvUtfConverter::EErrorIllFormedInput) |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
487 |
\par \tab \tab \tab \tab User::Leave(KErrCorrupt); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
488 |
\par \tab \tab \tab else if (returnValue<0) // future-proof against "TError" expanding |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
489 |
\par \tab \tab \tab \tab User::Leave(KErrGeneral); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
490 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
491 |
\par \tab \tab \tab // ? - do something here with outputBuffer |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
492 |
\par |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
493 |
\par \tab \tab \tab remainderOfUtf7.Set(remainderOfUtf7.Right(returnValue)); |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
494 |
\par \tab \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
495 |
\par \tab \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
496 |
\par \tab \} |
1fb32624e06b
Revision: 201003
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
497 |
\par }} |