|
1 /* |
|
2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies). |
|
3 * All rights reserved. |
|
4 * This component and the accompanying materials are made available |
|
5 * under the terms of "Eclipse Public License v1.0" |
|
6 * which accompanies this distribution, and is available |
|
7 * at the URL "http://www.eclipse.org/legal/epl-v10.html". |
|
8 * |
|
9 * Initial Contributors: |
|
10 * Nokia Corporation - initial contribution. |
|
11 * |
|
12 * Contributors: |
|
13 * |
|
14 * Description: |
|
15 * |
|
16 */ |
|
17 #ifndef TINYUTF16_H_ |
|
18 #define TINYUTF16_H_ |
|
19 |
|
20 #include "tinyiterator.h" |
|
21 |
|
22 namespace analysis { |
|
23 |
|
24 namespace tiny { |
|
25 |
|
26 /** |
|
27 * Translates given unicode character as utf16 and |
|
28 * stores utf16 codes in the output stream |
|
29 */ |
|
30 template <typename Stream> |
|
31 void utf16put(Stream& out, int c) { |
|
32 if ( c >= 0x00010000L ) { |
|
33 c -= 0x00010000L; |
|
34 out<<(wchar_t)(0xd800 + ((c >> 10) & 0x03ffL)); |
|
35 out<<(wchar_t)(0xd800 + (c & 0x03ffL)); |
|
36 } else { |
|
37 out<<(wchar_t)(c); |
|
38 } |
|
39 } |
|
40 |
|
41 /** |
|
42 * Writes unicode characters into the output |
|
43 * stream as utf16 codes. |
|
44 */ |
|
45 template <typename Output> |
|
46 struct Utf16Output { |
|
47 public: |
|
48 Utf16Output(const Output& out) : out_(out) {}; |
|
49 inline Utf16Output& operator<<(int c) { |
|
50 utf16put(out_, c); |
|
51 return *this; |
|
52 } |
|
53 |
|
54 inline Utf16Output& operator<<(wchar_t c) { |
|
55 return (*this)<<(int)c; |
|
56 } |
|
57 template <typename I> |
|
58 Utf16Output& write(I source, int length) { |
|
59 for (int i = 0; i < length; i++) { |
|
60 (*this)<<source; ++source; |
|
61 } |
|
62 return *this; |
|
63 } |
|
64 template <typename I> |
|
65 Utf16Output& operator<<(I source) { |
|
66 for (;*source; ++source) { |
|
67 (*this)<<*source; |
|
68 } |
|
69 return *this; |
|
70 } |
|
71 private: |
|
72 Output out_; |
|
73 }; |
|
74 |
|
75 /** |
|
76 * Writes unicode characters into the given iterator as utf16 codes |
|
77 */ |
|
78 template <typename Iterator> |
|
79 struct Utf16Writer : public Utf16Output<IteratorOutput<Iterator> > { |
|
80 public: |
|
81 Utf16Writer(Iterator i) : Utf16Output<IteratorOutput<Iterator> >(IteratorOutput<Iterator>(i)) {} |
|
82 }; |
|
83 |
|
84 /** |
|
85 * Calculates the size of all characters with the iterator as utf16 |
|
86 * code points |
|
87 */ |
|
88 template<typename Iterator> |
|
89 int utf16size(Iterator i) { |
|
90 int rv = 0; |
|
91 for (;*i; ++i) { |
|
92 rv += (*i >= 0x10000 ? 2 : 1); |
|
93 } |
|
94 return rv; |
|
95 } |
|
96 |
|
97 /** |
|
98 * Reads utf16 code points from given iterator and translates them |
|
99 * as unicode characters. |
|
100 */ |
|
101 template <typename Iterator> |
|
102 struct Utf16Iterator { |
|
103 public: |
|
104 Utf16Iterator(Iterator i) : i_(i) { |
|
105 operator++(); // cache first character |
|
106 } |
|
107 Utf16Iterator() : i_(), c_(0), offset_(0) {} |
|
108 inline int operator*() const { |
|
109 return c_; |
|
110 }; |
|
111 Utf16Iterator& operator++() { |
|
112 offset_ = i_; |
|
113 c_ = *i_; ++i_; |
|
114 if ( c_ >= 0xd800 && c_ <= 0xdfff ) { |
|
115 int c2 = *i_; ++i_; |
|
116 if ( c2 >= 0xdc00 && c2 <= 0xdfff ){ |
|
117 c_ = (((c_ & 0x03ffL) << 10) | ((c2 & 0x03ffL) << 0)) + 0x00010000L; |
|
118 } |
|
119 } |
|
120 return *this; |
|
121 } |
|
122 operator int() {return offset_;} |
|
123 private: |
|
124 Iterator i_; |
|
125 int c_; // current utf cached |
|
126 int offset_; // characters read |
|
127 }; |
|
128 |
|
129 /** |
|
130 * Copies the iterator content into a wstring |
|
131 */ |
|
132 template<class Iterator> |
|
133 std::wstring utf16str(Iterator i) { |
|
134 std::wostringstream ret; |
|
135 while (*i) { |
|
136 utf16put(ret, *i); |
|
137 ++i; |
|
138 } |
|
139 return ret.str(); |
|
140 } |
|
141 |
|
142 } |
|
143 } |
|
144 |
|
145 #endif /* TINYUTF16_H_ */ |