fontservices/textshaperplugin/IcuSource/common/rbbisetb.h
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 //
       
     2 //  rbbisetb.h
       
     3 /*
       
     4 **********************************************************************
       
     5 *   Copyright (c) 2001-2005, International Business Machines
       
     6 *   Corporation and others.  All Rights Reserved.
       
     7 **********************************************************************
       
     8 */
       
     9 
       
    10 #ifndef RBBISETB_H
       
    11 #define RBBISETB_H
       
    12 
       
    13 #include "unicode/utypes.h"
       
    14 #include "unicode/uobject.h"
       
    15 #include "rbbirb.h"
       
    16 #include "uvector.h"
       
    17 
       
    18 struct  UNewTrie;
       
    19 
       
    20 U_NAMESPACE_BEGIN
       
    21 
       
    22 //
       
    23 //  RBBISetBuilder   Derives the character categories used by the runtime RBBI engine
       
    24 //                   from the Unicode Sets appearing in the source  RBBI rules, and
       
    25 //                   creates the TRIE table used to map from Unicode to the
       
    26 //                   character categories.
       
    27 //
       
    28 
       
    29 
       
    30 //
       
    31 //  RangeDescriptor
       
    32 //
       
    33 //     Each of the non-overlapping character ranges gets one of these descriptors.
       
    34 //     All of them are strung together in a linked list, which is kept in order
       
    35 //     (by character)
       
    36 //
       
    37 class RangeDescriptor : public UMemory {
       
    38 public:
       
    39     UChar32            fStartChar;      // Start of range, unicode 32 bit value.
       
    40     UChar32            fEndChar;        // End of range, unicode 32 bit value.
       
    41     int32_t            fNum;            // runtime-mapped input value for this range.
       
    42     UVector           *fIncludesSets;   // vector of the the original
       
    43                                         //   Unicode sets that include this range.
       
    44                                         //    (Contains ptrs to uset nodes)
       
    45     RangeDescriptor   *fNext;           // Next RangeDescriptor in the linked list.
       
    46 
       
    47     RangeDescriptor(UErrorCode &status);
       
    48     RangeDescriptor(const RangeDescriptor &other, UErrorCode &status);
       
    49     ~RangeDescriptor();
       
    50     void split(UChar32 where, UErrorCode &status);   // Spit this range in two at "where", with
       
    51                                         //   where appearing in the second (higher) part.
       
    52     void setDictionaryFlag();           // Check whether this range appears as part of
       
    53                                         //   the Unicode set named "dictionary"
       
    54 
       
    55 private:
       
    56     RangeDescriptor(const RangeDescriptor &other); // forbid copying of this class
       
    57     RangeDescriptor &operator=(const RangeDescriptor &other); // forbid copying of this class
       
    58 };
       
    59 
       
    60 
       
    61 //
       
    62 //  RBBISetBuilder   Handles processing of Unicode Sets from RBBI rules.
       
    63 //
       
    64 //      Starting with the rules parse tree from the scanner,
       
    65 //
       
    66 //                   -  Enumerate the set of UnicodeSets that are referenced
       
    67 //                      by the RBBI rules.
       
    68 //                   -  compute a derived set of non-overlapping UnicodeSets
       
    69 //                      that will correspond to columns in the state table for
       
    70 //                      the RBBI execution engine.
       
    71 //                   -  construct the trie table that maps input characters
       
    72 //                      to set numbers in the non-overlapping set of sets.
       
    73 //
       
    74 
       
    75 
       
    76 class RBBISetBuilder : public UMemory {
       
    77 public:
       
    78     RBBISetBuilder(RBBIRuleBuilder *rb);
       
    79     ~RBBISetBuilder();
       
    80 
       
    81     void     build();
       
    82     void     addValToSets(UVector *sets,      uint32_t val);
       
    83     void     addValToSet (RBBINode *usetNode, uint32_t val);
       
    84     int32_t  getNumCharCategories() const;   // CharCategories are the same as input symbol set to the
       
    85                                    //    runtime state machine, which are the same as
       
    86                                    //    columns in the DFA state table
       
    87     int32_t  getTrieSize() /*const*/;        // Size in bytes of the serialized Trie.
       
    88     void     serializeTrie(uint8_t *where);  // write out the serialized Trie.
       
    89     UChar32  getFirstChar(int32_t  val) const;
       
    90 #ifdef RBBI_DEBUG
       
    91     void     printSets();
       
    92     void     printRanges();
       
    93     void     printRangeGroups();
       
    94 #else
       
    95     #define printSets()
       
    96     #define printRanges()
       
    97     #define printRangeGroups()
       
    98 #endif
       
    99 
       
   100 private:
       
   101     void           numberSets();
       
   102 
       
   103     RBBIRuleBuilder       *fRB;             // The RBBI Rule Compiler that owns us.
       
   104     UErrorCode            *fStatus;
       
   105 
       
   106     RangeDescriptor       *fRangeList;      // Head of the linked list of RangeDescriptors
       
   107 
       
   108     UNewTrie              *fTrie;           // The mapping TRIE that is the end result of processing
       
   109     uint32_t              fTrieSize;        //  the Unicode Sets.
       
   110 
       
   111     // Groups correspond to character categories -
       
   112     //       groups of ranges that are in the same original UnicodeSets.
       
   113     //       fGroupCount is the index of the last used group.
       
   114     //       fGroupCount+1 is also the number of columns in the RBBI state table being compiled.
       
   115     //       State table column 0 is not used.  Column 1 is for end-of-input.
       
   116     //       column 2 is for group 0.  Funny counting.
       
   117     int32_t               fGroupCount;
       
   118 
       
   119     RBBISetBuilder(const RBBISetBuilder &other); // forbid copying of this class
       
   120     RBBISetBuilder &operator=(const RBBISetBuilder &other); // forbid copying of this class
       
   121 };
       
   122 
       
   123 
       
   124 
       
   125 U_NAMESPACE_END
       
   126 #endif