webengine/osswebengine/JavaScriptCore/pcre/ucpinternal.h
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Mon, 18 Jan 2010 21:20:18 +0200
changeset 27 6297cdf66332
parent 26 cb62a4f66ebe
permissions -rw-r--r--
Revision: 201001 Kit: 201003
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
     1
/*************************************************
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
     2
*     libucp - Unicode Property Table handler    *
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
     3
*************************************************/
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
     4
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
     5
/* Internal header file defining the layout of compact nodes in the tree. */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
     6
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
     7
typedef struct cnode {
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
     8
  unsigned short int f0;
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
     9
  unsigned short int f1;
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    10
  unsigned short int f2;
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    11
} cnode;
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    12
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    13
/* Things for the f0 field */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    14
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    15
#define f0_leftexists   0x8000    /* Left child exists */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    16
#define f0_typemask     0x3f00    /* Type bits */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    17
#define f0_typeshift         8    /* Type shift */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    18
#define f0_chhmask      0x00ff    /* Character high bits */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    19
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    20
/* Things for the f2 field */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    21
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    22
#define f2_rightmask    0xf000    /* Mask for right offset bits */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    23
#define f2_rightshift       12    /* Shift for right offset */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    24
#define f2_casemask     0x0fff    /* Mask for case offset */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    25
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    26
/* The tree consists of a vector of structures of type cnode, with the root
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    27
node as the first element. The three short ints (16-bits) are used as follows:
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    28
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    29
(f0) (1) The 0x8000 bit of f0 is set if a left child exists. The child's node
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    30
         is the next node in the vector.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    31
     (2) The 0x4000 bits of f0 is spare.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    32
     (3) The 0x3f00 bits of f0 contain the character type; this is a number
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    33
         defined by the enumeration in ucp.h (e.g. ucp_Lu).
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    34
     (4) The bottom 8 bits of f0 contain the most significant byte of the
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    35
         character's 24-bit codepoint.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    36
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    37
(f1) (1) The f1 field contains the two least significant bytes of the
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    38
         codepoint.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    39
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    40
(f2) (1) The 0xf000 bits of f2 contain zero if there is no right child of this
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    41
         node. Otherwise, they contain one plus the exponent of the power of
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    42
         two of the offset to the right node (e.g. a value of 3 means 8). The
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    43
         units of the offset are node items.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    44
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    45
     (2) The 0x0fff bits of f2 contain the signed offset from this character to
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    46
         its alternate cased value. They are zero if there is no such
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    47
         character.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    48
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    49
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    50
-----------------------------------------------------------------------------
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    51
||.|.| type (6) | ms char (8) ||  ls char (16)  ||....|  case offset (12)  ||
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    52
-----------------------------------------------------------------------------
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    53
  | |                                              |
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    54
  | |-> spare                                      |
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    55
  |                                        exponent of right
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    56
  |-> left child exists                       child offset
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    57
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    58
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    59
The upper/lower casing information is set only for characters that come in
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    60
pairs. There are (at present) four non-one-to-one mappings in the Unicode data.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    61
These are ignored. They are:
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    62
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    63
  1FBE Greek Prosgegrammeni (lower, with upper -> capital iota)
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    64
  2126 Ohm
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    65
  212A Kelvin
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    66
  212B Angstrom
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    67
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    68
Certainly for the last three, having an alternate case would seem to be a
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    69
mistake. I don't know any Greek, so cannot comment on the first one.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    70
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    71
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    72
When searching the tree, proceed as follows:
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    73
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    74
(1) Start at the first node.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    75
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    76
(2) Extract the character value from f1 and the bottom 8 bits of f0;
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    77
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    78
(3) Compare with the character being sought. If equal, we are done.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    79
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    80
(4) If the test character is smaller, inspect the f0_leftexists flag. If it is
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    81
    not set, the character is not in the tree. If it is set, move to the next
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    82
    node, and go to (2).
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    83
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    84
(5) If the test character is bigger, extract the f2_rightmask bits from f2, and
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    85
    shift them right by f2_rightshift. If the result is zero, the character is
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    86
    not in the tree. Otherwise, calculate the number of nodes to skip by
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    87
    shifting the value 1 left by this number minus one. Go to (2).
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    88
*/
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    89
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    90
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    91
/* End of internal.h */