FCL/sf/os/textandloc: comparison fontservices/textshaperplugin/IcuSource/common/brkdict.h

equal deleted inserted replaced

--1:000000000000
+:1fb32624e06b
+/*
+**********************************************************************
+*   Copyright (C) 1999-2004 IBM and others. All rights reserved.
+**********************************************************************
+*   Date        Name        Description
+*   12/1/99     rtg         Ported from Java
+*   01/13/2000  helena      Added UErrorCode to ctors.
+**********************************************************************
+*/
+#ifndef BRKDICT_H
+#define BRKDICT_H
+#include "unicode/utypes.h"
+#include "unicode/uobject.h"
+#include "ucmp8.h"
+U_NAMESPACE_BEGIN
+/**
+* This is the class that represents the list of known words used by
+* DictionaryBasedBreakIterator.  The conceptual data structure used
+* here is a trie: there is a node hanging off the root node for every
+* letter that can start a word.  Each of these nodes has a node hanging
+* off of it for every letter that can be the second letter of a word
+* if this node is the first letter, and so on.  The trie is represented
+* as a two-dimensional array that can be treated as a table of state
+* transitions.  Indexes are used to compress this array, taking
+* advantage of the fact that this array will always be very sparse.
+*/
+class BreakDictionary : public UMemory {
+//=================================================================================
+// data members
+//=================================================================================
+private:
+/**
+* Maps from characters to column numbers.  The main use of this is to
+* avoid making room in the array for empty columns.
+*/
+CompactByteArray* columnMap;
+/**
+* The number of actual columns in the table
+*/
+int32_t numCols;
+/**
+* Columns are organized into groups of 32.  This says how many
+* column groups.  (We could calculate this, but we store the
+* value to avoid having to repeatedly calculate it.)
+*/
+int32_t numColGroups;
+/**
+* The actual compressed state table.  Each conceptual row represents
+* a state, and the cells in it contain the row numbers of the states
+* to transition to for each possible letter.  0 is used to indicate
+* an illegal combination of letters (i.e., the error state).  The
+* table is compressed by eliminating all the unpopulated (i.e., zero)
+* cells.  Multiple conceptual rows can then be doubled up in a single
+* physical row by sliding them up and possibly shifting them to one
+* side or the other so the populated cells don't collide.  Indexes
+* are used to identify unpopulated cells and to locate populated cells.
+*/
+int16_t* table;
+/**
+* This index maps logical row numbers to physical row numbers
+*/
+int16_t* rowIndex;
+/**
+* A bitmap is used to tell which cells in the comceptual table are
+* populated.  This array contains all the unique bit combinations
+* in that bitmap.  If the table is more than 32 columns wide,
+* successive entries in this array are used for a single row.
+*/
+int32_t* rowIndexFlags;
+/**
+* This index maps from a logical row number into the bitmap table above.
+* (This keeps us from storing duplicate bitmap combinations.)  Since there
+* are a lot of rows with only one populated cell, instead of wasting space
+* in the bitmap table, we just store a negative number in this index for
+* rows with one populated cell.  The absolute value of that number is
+* the column number of the populated cell.
+*/
+int16_t* rowIndexFlagsIndex;
+/**
+* For each logical row, this index contains a constant that is added to
+* the logical column number to get the physical column number
+*/
+int8_t* rowIndexShifts;
+//=================================================================================
+// deserialization
+//=================================================================================
+public:
+/**
+* Constructor.  Creates the BreakDictionary by using readDictionaryFile() to
+* load the dictionary tables from the disk.
+* @param dictionaryFilename The name of the dictionary file
+* @param status for errors if it occurs
+*/
+BreakDictionary(const char* dictionaryFilename, UErrorCode& status);
+/**
+* Destructor.
+*/
+~BreakDictionary();
+/**
+* Reads the dictionary file on the disk and constructs the appropriate in-memory
+* representation.
+* @param in The given memory stream
+*/
+void readDictionaryFile(const uint8_t * in);
+//=================================================================================
+// access to the words
+//=================================================================================
+/**
+* Uses the column map to map the character to a column number, then
+* passes the row and column number to the other version of at()
+* @param row The current state
+* @param ch The character whose column we're interested in
+* @return The new state to transition to
+*/
+int16_t at(int32_t row, UChar ch) const;
+/**
+* Returns the value in the cell with the specified (logical) row and
+* column numbers.  In DictionaryBasedBreakIterator, the row number is
+* a state number, the column number is an input, and the return value
+* is the row number of the new state to transition to.  (0 is the
+* "error" state, and -1 is the "end of word" state in a dictionary)
+* @param row The row number of the current state
+* @param col The column number of the input character (0 means "not a
+* dictionary character")
+* @return The row number of the new state to transition to
+*/
+int16_t at(int32_t row, int32_t col) const;
+private:
+/**
+* Given (logical) row and column numbers, returns true if the
+* cell in that position is populated
+* @param row The LOGICAL row number of the cell
+* @param col The PHYSICAL row number of the cell
+* @return true if the cell in that position is populated
+*/
+UBool cellIsPopulated(int32_t row, int32_t col) const;
+/**
+* Implementation of at() when we know the specified cell is populated.
+* @param row The PHYSICAL row number of the cell
+* @param col The PHYSICAL column number of the cell
+* @return The value stored in the cell
+*/
+int16_t internalAt(int32_t row, int32_t col) const;
+// the following methods are never meant to be called and so are not defined
+// (if you don't declare them, you get default implementations)
+BreakDictionary(const BreakDictionary& that);
+BreakDictionary& operator=(const BreakDictionary& that);
+};
+U_NAMESPACE_END
+#endif