searchengine/oss/loc/analysis/src/tinyunicode.cpp
changeset 24 65456528cac2
equal deleted inserted replaced
23:d4d56f5e7c55 24:65456528cac2
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 #include "tinyunicode.h"
       
    18 
       
    19 namespace analysis {
       
    20 
       
    21 	namespace unicode {
       
    22 
       
    23         // See Chapter 16 Hangul in http://unicode.org/reports/tr15/ for reference
       
    24         // 
       
    25         // Ported from Hangul Java code example
       
    26         //
       
    27 	
       
    28         int IsHangulSyllable(int c) {
       
    29             return (c >= 0xAC00 && c <= 0xD7AF);
       
    30         }
       
    31         int IsHangulJamo(int c) {
       
    32             return (c >= 0x1100 && c < 0x1200)     // Hangul Jamo
       
    33                 || (c >= 0x3130 && c <= 0x318F)    // Hangul compatibility Jamo
       
    34                 || (c >= 0xA960 && c < 0xA97F)     // Hangul Jamo Extended-A
       
    35                 || (c >= 0xD780 && c <= 0xD7FF)    // Hangul Jamo Extended-B
       
    36                 || (c >= 0xff00 && c <= 0xffef);   // Hangul halfwidth and fullwidth forms
       
    37         }
       
    38 
       
    39         int IsHangul(int c) {
       
    40             return IsHangulSyllable(c) || IsHangulJamo(c); 
       
    41         }
       
    42 		int IsCjk(int c) {
       
    43 			return (c >= 0x4E00 && c < 0xa000)  // CJK Unified ideographs block
       
    44                  || IsHangul(c)              // Korean alphabet
       
    45 				 || (c >= 0x3400 && c < 0x4Dc0)     // CJK Unified ideographs extension A
       
    46 				 || (c >= 0x3040 && c <= 0x309f)    // Hiragana
       
    47 				 || (c >= 0x20000 && c < 0x30000);  // CJK Unified ideographs extension B, C, D E and so forth
       
    48 		}
       
    49 		
       
    50 		int IsThai(int c) {
       
    51 			return (c >= 0x0E00) && (c < 0x0F00); // Thai unicode block 
       
    52 		}
       
    53 	}
       
    54 	
       
    55 }