webengine/osswebengine/JavaScriptCore/pcre/ucpinternal.h
author Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
Thu, 07 Jan 2010 13:31:38 +0200
changeset 26 cb62a4f66ebe
parent 25 0ed94ceaa377
child 27 6297cdf66332
permissions -rw-r--r--
Revision: 200951 Kit: 201001
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
25
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     1
/*
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     2
* Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     3
* All rights reserved.
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     4
* This component and the accompanying materials are made available
26
cb62a4f66ebe Revision: 200951
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 25
diff changeset
     5
* under the terms of the License "Eclipse Public License v1.0"
25
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     6
* which accompanies this distribution, and is available
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     7
* at the URL "http://www.eclipse.org/legal/epl-v10.html".
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     8
*
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
     9
* Initial Contributors:
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    10
* Nokia Corporation - initial contribution.
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    11
*
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    12
* Contributors:
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    13
*
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    14
* Description: 
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    15
*
0ed94ceaa377 Revision: 200948
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents: 0
diff changeset
    16
*/
0
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    17
/*************************************************
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    18
*     libucp - Unicode Property Table handler    *
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    19
*************************************************/
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    20
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    21
/* Internal header file defining the layout of compact nodes in the tree. */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    22
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    23
typedef struct cnode {
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    24
  unsigned short int f0;
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    25
  unsigned short int f1;
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    26
  unsigned short int f2;
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    27
} cnode;
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    28
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    29
/* Things for the f0 field */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    30
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    31
#define f0_leftexists   0x8000    /* Left child exists */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    32
#define f0_typemask     0x3f00    /* Type bits */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    33
#define f0_typeshift         8    /* Type shift */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    34
#define f0_chhmask      0x00ff    /* Character high bits */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    35
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    36
/* Things for the f2 field */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    37
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    38
#define f2_rightmask    0xf000    /* Mask for right offset bits */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    39
#define f2_rightshift       12    /* Shift for right offset */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    40
#define f2_casemask     0x0fff    /* Mask for case offset */
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    41
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    42
/* The tree consists of a vector of structures of type cnode, with the root
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    43
node as the first element. The three short ints (16-bits) are used as follows:
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    44
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    45
(f0) (1) The 0x8000 bit of f0 is set if a left child exists. The child's node
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    46
         is the next node in the vector.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    47
     (2) The 0x4000 bits of f0 is spare.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    48
     (3) The 0x3f00 bits of f0 contain the character type; this is a number
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    49
         defined by the enumeration in ucp.h (e.g. ucp_Lu).
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    50
     (4) The bottom 8 bits of f0 contain the most significant byte of the
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    51
         character's 24-bit codepoint.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    52
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    53
(f1) (1) The f1 field contains the two least significant bytes of the
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    54
         codepoint.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    55
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    56
(f2) (1) The 0xf000 bits of f2 contain zero if there is no right child of this
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    57
         node. Otherwise, they contain one plus the exponent of the power of
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    58
         two of the offset to the right node (e.g. a value of 3 means 8). The
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    59
         units of the offset are node items.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    60
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    61
     (2) The 0x0fff bits of f2 contain the signed offset from this character to
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    62
         its alternate cased value. They are zero if there is no such
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    63
         character.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    64
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    65
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    66
-----------------------------------------------------------------------------
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    67
||.|.| type (6) | ms char (8) ||  ls char (16)  ||....|  case offset (12)  ||
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    68
-----------------------------------------------------------------------------
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    69
  | |                                              |
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    70
  | |-> spare                                      |
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    71
  |                                        exponent of right
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    72
  |-> left child exists                       child offset
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    73
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    74
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    75
The upper/lower casing information is set only for characters that come in
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    76
pairs. There are (at present) four non-one-to-one mappings in the Unicode data.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    77
These are ignored. They are:
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    78
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    79
  1FBE Greek Prosgegrammeni (lower, with upper -> capital iota)
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    80
  2126 Ohm
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    81
  212A Kelvin
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    82
  212B Angstrom
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    83
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    84
Certainly for the last three, having an alternate case would seem to be a
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    85
mistake. I don't know any Greek, so cannot comment on the first one.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    86
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    87
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    88
When searching the tree, proceed as follows:
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    89
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    90
(1) Start at the first node.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    91
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    92
(2) Extract the character value from f1 and the bottom 8 bits of f0;
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    93
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    94
(3) Compare with the character being sought. If equal, we are done.
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    95
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    96
(4) If the test character is smaller, inspect the f0_leftexists flag. If it is
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    97
    not set, the character is not in the tree. If it is set, move to the next
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    98
    node, and go to (2).
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
    99
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
   100
(5) If the test character is bigger, extract the f2_rightmask bits from f2, and
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
   101
    shift them right by f2_rightshift. If the result is zero, the character is
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
   102
    not in the tree. Otherwise, calculate the number of nodes to skip by
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
   103
    shifting the value 1 left by this number minus one. Go to (2).
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
   104
*/
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
   105
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
   106
dd21522fd290 Revision: 200911
Kiiskinen Klaus (Nokia-D-MSW/Tampere) <klaus.kiiskinen@nokia.com>
parents:
diff changeset
   107
/* End of internal.h */