searchengine/oss/loc/analysis/inc/private/thaistatemachine.h
changeset 24 65456528cac2
equal deleted inserted replaced
23:d4d56f5e7c55 24:65456528cac2
       
     1 /*
       
     2 * Copyright (c) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 #ifndef THAISTATEMACHINE_H_
       
    19 #define THAISTATEMACHINE_H_
       
    20 
       
    21 #include "breakiterator.h"
       
    22 
       
    23 namespace analysis {
       
    24 
       
    25 	class ThaiSmEncoding 
       
    26 	{
       
    27 		public: 
       
    28 			static const byte_t FINAL_BIT = 0x80; 
       
    29 			static const byte_t LENGTH_BITS = 0x7f;
       
    30 			
       
    31 			static const byte_t DOT_CODE = 0;
       
    32 			static const wchar_t THAI_CHART_OFFSET = 0x0E00;
       
    33 		
       
    34 			static inline bool getFinal(StatePointer pointer) 
       
    35 			{
       
    36 				return ((*pointer) & FINAL_BIT) > 0; 
       
    37 			}
       
    38 			static inline int readSuccCount(StatePointer& pointer)
       
    39 			{
       
    40 				return (*(pointer++)) & LENGTH_BITS;
       
    41 			}
       
    42 			static const int SIZEOF_CHAR = 1;
       
    43 			static inline wchar_t readChar(byte_t*& pointer)
       
    44 			{
       
    45 				byte_t c = (*(pointer++));
       
    46 				return c == DOT_CODE ? '.' : c + THAI_CHART_OFFSET;
       
    47 			}
       
    48 			static const int SIZEOF_OFFSET = 3;
       
    49 			static inline StateOffset getOffset(byte_t* pointer)
       
    50 			{
       
    51 				// big endian
       
    52 				byte_t high = *(pointer+0);
       
    53 				byte_t mid = *(pointer+1);
       
    54 				byte_t low = *(pointer+2);
       
    55 				return static_cast<StateOffset>((high<<16) + (mid<<8) + low); 
       
    56 			}
       
    57 			
       
    58 	};
       
    59 	
       
    60 	class ThaiAnalysisInfraNotInitialized : std::exception 
       
    61 		{
       
    62 		public:
       
    63 			const char* what() const throw();
       
    64 		};
       
    65 
       
    66 	class StateMachineFileNotFound : std::exception 
       
    67 		{
       
    68 		public:
       
    69 			const char* what() const throw();
       
    70 		};
       
    71 	
       
    72 	class StateMachineLoadingFailed : std::exception
       
    73 		{
       
    74 		public:
       
    75 			const char* what() const throw();
       
    76 		};
       
    77 
       
    78 
       
    79 	class ThaiAnalysisInfra 
       
    80 	{
       
    81 		public: // public static API 
       
    82 		
       
    83 			static void init(const char* dataFile); 
       
    84 			
       
    85 			static ThaiAnalysisInfra* theInstance();
       
    86 			
       
    87 			static void shutdown();
       
    88 			
       
    89 		public: // public non-static API 
       
    90 			
       
    91 			std::auto_ptr<BreakIterator> createBreakIterator();
       
    92 			
       
    93 		private: // construction
       
    94 		
       
    95 			ThaiAnalysisInfra(const char* dataFile);
       
    96 			
       
    97 			~ThaiAnalysisInfra(); 
       
    98 
       
    99 		private: 
       
   100 			
       
   101 			static ThaiAnalysisInfra* theInstance_;
       
   102 			
       
   103 			Cpt::auto_array<byte_t> blob_;
       
   104 			
       
   105 			StateMachine<ThaiSmEncoding> stateMachine_;
       
   106 			
       
   107 			std::string dataFile_; 
       
   108 	};
       
   109 	
       
   110 }
       
   111 	
       
   112 #endif