--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/searchengine/oss/loc/analysis/inc/public/tinyutf16.h Fri Oct 15 12:09:28 2010 +0530
@@ -0,0 +1,145 @@
+/*
+* Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
+* All rights reserved.
+* This component and the accompanying materials are made available
+* under the terms of "Eclipse Public License v1.0"
+* which accompanies this distribution, and is available
+* at the URL "http://www.eclipse.org/legal/epl-v10.html".
+*
+* Initial Contributors:
+* Nokia Corporation - initial contribution.
+*
+* Contributors:
+*
+* Description:
+*
+*/
+#ifndef TINYUTF16_H_
+#define TINYUTF16_H_
+
+#include "tinyiterator.h"
+
+namespace analysis {
+
+ namespace tiny {
+
+ /**
+ * Translates given unicode character as utf16 and
+ * stores utf16 codes in the output stream
+ */
+ template <typename Stream>
+ void utf16put(Stream& out, int c) {
+ if ( c >= 0x00010000L ) {
+ c -= 0x00010000L;
+ out<<(wchar_t)(0xd800 + ((c >> 10) & 0x03ffL));
+ out<<(wchar_t)(0xd800 + (c & 0x03ffL));
+ } else {
+ out<<(wchar_t)(c);
+ }
+ }
+
+ /**
+ * Writes unicode characters into the output
+ * stream as utf16 codes.
+ */
+ template <typename Output>
+ struct Utf16Output {
+ public:
+ Utf16Output(const Output& out) : out_(out) {};
+ inline Utf16Output& operator<<(int c) {
+ utf16put(out_, c);
+ return *this;
+ }
+
+ inline Utf16Output& operator<<(wchar_t c) {
+ return (*this)<<(int)c;
+ }
+ template <typename I>
+ Utf16Output& write(I source, int length) {
+ for (int i = 0; i < length; i++) {
+ (*this)<<source; ++source;
+ }
+ return *this;
+ }
+ template <typename I>
+ Utf16Output& operator<<(I source) {
+ for (;*source; ++source) {
+ (*this)<<*source;
+ }
+ return *this;
+ }
+ private:
+ Output out_;
+ };
+
+ /**
+ * Writes unicode characters into the given iterator as utf16 codes
+ */
+ template <typename Iterator>
+ struct Utf16Writer : public Utf16Output<IteratorOutput<Iterator> > {
+ public:
+ Utf16Writer(Iterator i) : Utf16Output<IteratorOutput<Iterator> >(IteratorOutput<Iterator>(i)) {}
+ };
+
+ /**
+ * Calculates the size of all characters with the iterator as utf16
+ * code points
+ */
+ template<typename Iterator>
+ int utf16size(Iterator i) {
+ int rv = 0;
+ for (;*i; ++i) {
+ rv += (*i >= 0x10000 ? 2 : 1);
+ }
+ return rv;
+ }
+
+ /**
+ * Reads utf16 code points from given iterator and translates them
+ * as unicode characters.
+ */
+ template <typename Iterator>
+ struct Utf16Iterator {
+ public:
+ Utf16Iterator(Iterator i) : i_(i) {
+ operator++(); // cache first character
+ }
+ Utf16Iterator() : i_(), c_(0), offset_(0) {}
+ inline int operator*() const {
+ return c_;
+ };
+ Utf16Iterator& operator++() {
+ offset_ = i_;
+ c_ = *i_; ++i_;
+ if ( c_ >= 0xd800 && c_ <= 0xdfff ) {
+ int c2 = *i_; ++i_;
+ if ( c2 >= 0xdc00 && c2 <= 0xdfff ){
+ c_ = (((c_ & 0x03ffL) << 10) | ((c2 & 0x03ffL) << 0)) + 0x00010000L;
+ }
+ }
+ return *this;
+ }
+ operator int() {return offset_;}
+ private:
+ Iterator i_;
+ int c_; // current utf cached
+ int offset_; // characters read
+ };
+
+ /**
+ * Copies the iterator content into a wstring
+ */
+ template<class Iterator>
+ std::wstring utf16str(Iterator i) {
+ std::wostringstream ret;
+ while (*i) {
+ utf16put(ret, *i);
+ ++i;
+ }
+ return ret.str();
+ }
+
+ }
+}
+
+#endif /* TINYUTF16_H_ */