fontservices/textshaperplugin/IcuSource/common/rbbirb.h
changeset 0 1fb32624e06b
equal deleted inserted replaced
-1:000000000000 0:1fb32624e06b
       
     1 //
       
     2 //  rbbirb.h
       
     3 //
       
     4 //  Copyright (C) 2002-2004, International Business Machines Corporation and others.
       
     5 //  All Rights Reserved.
       
     6 //
       
     7 //  This file contains declarations for several classes from the
       
     8 //    Rule Based Break Iterator rule builder.
       
     9 //
       
    10 
       
    11 
       
    12 #ifndef RBBIRB_H
       
    13 #define RBBIRB_H
       
    14 
       
    15 #include "unicode/utypes.h"
       
    16 #include "unicode/uobject.h"
       
    17 #include "unicode/rbbi.h"
       
    18 #include "unicode/uniset.h"
       
    19 #include "unicode/parseerr.h"
       
    20 #include "uhash.h"
       
    21 #include "uvector.h"
       
    22 #include "unicode/symtable.h"// For UnicodeSet parsing, is the interface that
       
    23                           //    looks up references to $variables within a set.
       
    24 
       
    25 
       
    26 
       
    27 U_NAMESPACE_BEGIN
       
    28 
       
    29 class               RBBIRuleScanner;
       
    30 struct              RBBIRuleTableEl;
       
    31 class               RBBISetBuilder;
       
    32 class               RBBINode;
       
    33 class               RBBITableBuilder;
       
    34 
       
    35 
       
    36 
       
    37 //--------------------------------------------------------------------------------
       
    38 //
       
    39 //   RBBISymbolTable.    Implements SymbolTable interface that is used by the
       
    40 //                       UnicodeSet parser to resolve references to $variables.
       
    41 //
       
    42 //--------------------------------------------------------------------------------
       
    43 class RBBISymbolTableEntry : public UMemory { // The symbol table hash table contains one
       
    44 public:                                       //   of these structs for each entry.
       
    45     RBBISymbolTableEntry();
       
    46     UnicodeString          key;
       
    47     RBBINode               *val;
       
    48     ~RBBISymbolTableEntry();
       
    49 
       
    50 private:
       
    51     RBBISymbolTableEntry(const RBBISymbolTableEntry &other); // forbid copying of this class
       
    52     RBBISymbolTableEntry &operator=(const RBBISymbolTableEntry &other); // forbid copying of this class
       
    53 };
       
    54 
       
    55 
       
    56 class RBBISymbolTable : public UMemory, public SymbolTable {
       
    57 private:
       
    58     const UnicodeString      &fRules;
       
    59     UHashtable               *fHashTable;
       
    60     RBBIRuleScanner          *fRuleScanner;
       
    61 
       
    62     // These next two fields are part of the mechanism for passing references to
       
    63     //   already-constructed UnicodeSets back to the UnicodeSet constructor
       
    64     //   when the pattern includes $variable references.
       
    65     const UnicodeString      ffffString;      // = "/uffff"
       
    66     UnicodeSet              *fCachedSetLookup;
       
    67 
       
    68 public:
       
    69     //  API inherited from class SymbolTable
       
    70     virtual const UnicodeString*  lookup(const UnicodeString& s) const;
       
    71     virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const;
       
    72     virtual UnicodeString parseReference(const UnicodeString& text,
       
    73                                          ParsePosition& pos, int32_t limit) const;
       
    74 
       
    75     //  Additional Functions
       
    76     RBBISymbolTable(RBBIRuleScanner *, const UnicodeString &fRules, UErrorCode &status);
       
    77     virtual ~RBBISymbolTable();
       
    78 
       
    79     virtual RBBINode *lookupNode(const UnicodeString &key) const;
       
    80     virtual void      addEntry  (const UnicodeString &key, RBBINode *val, UErrorCode &err);
       
    81 
       
    82 #ifdef RBBI_DEBUG
       
    83     virtual void      rbbiSymtablePrint() const;
       
    84 #else
       
    85     // A do-nothing inline function for non-debug builds.  Member funcs can't be empty
       
    86     //  or the call sites won't compile.
       
    87     int  fFakeField;
       
    88     #define rbbiSymtablePrint() fFakeField=0; 
       
    89 #endif
       
    90 
       
    91 private:
       
    92     RBBISymbolTable(const RBBISymbolTable &other); // forbid copying of this class
       
    93     RBBISymbolTable &operator=(const RBBISymbolTable &other); // forbid copying of this class
       
    94 };
       
    95 
       
    96 
       
    97 //--------------------------------------------------------------------------------
       
    98 //
       
    99 //  class RBBIRuleBuilder       The top-level class handling RBBI rule compiling.
       
   100 //
       
   101 //--------------------------------------------------------------------------------
       
   102 class RBBIRuleBuilder : public UMemory {
       
   103 public:
       
   104 
       
   105     //  Create a rule based break iterator from a set of rules.
       
   106     //  This function is the main entry point into the rule builder.  The
       
   107     //   public ICU API for creating RBBIs uses this function to do the actual work.
       
   108     //
       
   109     static BreakIterator * createRuleBasedBreakIterator( const UnicodeString    &rules,
       
   110                                     UParseError      &parseError,
       
   111                                     UErrorCode       &status);
       
   112 
       
   113 public:
       
   114     // The "public" functions and data members that appear below are accessed
       
   115     //  (and shared) by the various parts that make up the rule builder.  They
       
   116     //  are NOT intended to be accessed by anything outside of the
       
   117     //  rule builder implementation.
       
   118     RBBIRuleBuilder(const UnicodeString  &rules,
       
   119                     UParseError          &parseErr,
       
   120                     UErrorCode           &status
       
   121         );
       
   122 
       
   123     virtual    ~RBBIRuleBuilder();
       
   124     char                          *fDebugEnv;        // controls debug trace output
       
   125     UErrorCode                    *fStatus;          // Error reporting.  Keeping status
       
   126     UParseError                   *fParseError;      //   here avoids passing it everywhere.
       
   127     const UnicodeString           &fRules;           // The rule string that we are compiling
       
   128 
       
   129     RBBIRuleScanner               *fScanner;         // The scanner.
       
   130     RBBINode                      *fForwardTree;     // The parse trees, generated by the scanner,
       
   131     RBBINode                      *fReverseTree;     //   then manipulated by subsequent steps.
       
   132     RBBINode                      *fSafeFwdTree;
       
   133     RBBINode                      *fSafeRevTree;
       
   134 
       
   135     RBBINode                      **fDefaultTree;    // For rules not qualified with a !
       
   136                                                      //   the tree to which they belong to.
       
   137 
       
   138     UBool                         fChainRules;       // True for chained Unicode TR style rules.
       
   139                                                      // False for traditional regexp rules.
       
   140 
       
   141     UBool                         fLBCMNoChain;      // True:  suppress chaining of rules on
       
   142                                                      //   chars with LineBreak property == CM.
       
   143 
       
   144     UBool                         fLookAheadHardBreak;  // True:  Look ahead matches cause an
       
   145                                                      // immediate break, no continuing for the
       
   146                                                      // longest match.
       
   147 
       
   148     RBBISetBuilder                *fSetBuilder;      // Set and Character Category builder.
       
   149     UVector                       *fUSetNodes;       // Vector of all uset nodes.
       
   150 
       
   151     RBBITableBuilder              *fForwardTables;   // State transition tables
       
   152     RBBITableBuilder              *fReverseTables;
       
   153     RBBITableBuilder              *fSafeFwdTables;
       
   154     RBBITableBuilder              *fSafeRevTables;
       
   155 
       
   156     UVector                       *fRuleStatusVals;  // The values that can be returned
       
   157                                                      //   from getRuleStatus().
       
   158 
       
   159     RBBIDataHeader                *flattenData();    // Create the flattened (runtime format)
       
   160                                                      // data tables..
       
   161 private:
       
   162     RBBIRuleBuilder(const RBBIRuleBuilder &other); // forbid copying of this class
       
   163     RBBIRuleBuilder &operator=(const RBBIRuleBuilder &other); // forbid copying of this class
       
   164 };
       
   165 
       
   166 
       
   167 
       
   168 
       
   169 //----------------------------------------------------------------------------
       
   170 //
       
   171 //   RBBISetTableEl   is an entry in the hash table of UnicodeSets that have
       
   172 //                    been encountered.  The val Node will be of nodetype uset
       
   173 //                    and contain pointers to the actual UnicodeSets.
       
   174 //                    The Key is the source string for initializing the set.
       
   175 //
       
   176 //                    The hash table is used to avoid creating duplicate
       
   177 //                    unnamed (not $var references) UnicodeSets.
       
   178 //
       
   179 //                    Memory Management:
       
   180 //                       The Hash Table owns these RBBISetTableEl structs and
       
   181 //                            the key strings.  It does NOT own the val nodes.
       
   182 //
       
   183 //----------------------------------------------------------------------------
       
   184 struct RBBISetTableEl {
       
   185     UnicodeString *key;
       
   186     RBBINode      *val;
       
   187 };
       
   188 
       
   189 
       
   190 //----------------------------------------------------------------------------
       
   191 //
       
   192 //   RBBIDebugPrintf    Printf equivalent, for debugging output.
       
   193 //                      Conditional compilation of the implementation lets us
       
   194 //                      get rid of the stdio dependency in environments where it
       
   195 //                      is unavailable.
       
   196 //
       
   197 //----------------------------------------------------------------------------
       
   198 #ifdef RBBI_DEBUG
       
   199 #include <stdio.h>
       
   200 #define RBBIDebugPrintf printf
       
   201 #define RBBIDebugPuts puts
       
   202 #else
       
   203 #undef RBBIDebugPrintf 
       
   204 #define RBBIDebugPuts(arg)
       
   205 #endif
       
   206 
       
   207 U_NAMESPACE_END
       
   208 #endif
       
   209 
       
   210 
       
   211