|
1 /* |
|
2 ****************************************************************************** |
|
3 * |
|
4 * Copyright (C) 1999-2005, International Business Machines |
|
5 * Corporation and others. All Rights Reserved. |
|
6 * |
|
7 ****************************************************************************** |
|
8 * file name: ubidiimp.h |
|
9 * encoding: US-ASCII |
|
10 * tab size: 8 (not used) |
|
11 * indentation:4 |
|
12 * |
|
13 * created on: 1999aug06 |
|
14 * created by: Markus W. Scherer |
|
15 */ |
|
16 |
|
17 #ifndef UBIDIIMP_H |
|
18 #define UBIDIIMP_H |
|
19 |
|
20 /* set import/export definitions */ |
|
21 #ifdef U_COMMON_IMPLEMENTATION |
|
22 |
|
23 #include "unicode/utypes.h" |
|
24 #include "unicode/uchar.h" |
|
25 #include "ubidi_props.h" |
|
26 |
|
27 /* miscellaneous definitions ---------------------------------------------- */ |
|
28 |
|
29 typedef uint8_t DirProp; |
|
30 typedef uint32_t Flags; |
|
31 |
|
32 /* Comparing the description of the BiDi algorithm with this implementation |
|
33 is easier with the same names for the BiDi types in the code as there. |
|
34 See UCharDirection in uchar.h . |
|
35 */ |
|
36 enum { |
|
37 L= U_LEFT_TO_RIGHT, |
|
38 R= U_RIGHT_TO_LEFT, |
|
39 EN= U_EUROPEAN_NUMBER, |
|
40 ES= U_EUROPEAN_NUMBER_SEPARATOR, |
|
41 ET= U_EUROPEAN_NUMBER_TERMINATOR, |
|
42 AN= U_ARABIC_NUMBER, |
|
43 CS= U_COMMON_NUMBER_SEPARATOR, |
|
44 B= U_BLOCK_SEPARATOR, |
|
45 S= U_SEGMENT_SEPARATOR, |
|
46 WS= U_WHITE_SPACE_NEUTRAL, |
|
47 ON= U_OTHER_NEUTRAL, |
|
48 LRE=U_LEFT_TO_RIGHT_EMBEDDING, |
|
49 LRO=U_LEFT_TO_RIGHT_OVERRIDE, |
|
50 AL= U_RIGHT_TO_LEFT_ARABIC, |
|
51 RLE=U_RIGHT_TO_LEFT_EMBEDDING, |
|
52 RLO=U_RIGHT_TO_LEFT_OVERRIDE, |
|
53 PDF=U_POP_DIRECTIONAL_FORMAT, |
|
54 NSM=U_DIR_NON_SPACING_MARK, |
|
55 BN= U_BOUNDARY_NEUTRAL, |
|
56 dirPropCount |
|
57 }; |
|
58 |
|
59 /* |
|
60 * Sometimes, bit values are more appropriate |
|
61 * to deal with directionality properties. |
|
62 * Abbreviations in these macro names refer to names |
|
63 * used in the BiDi algorithm. |
|
64 */ |
|
65 #define DIRPROP_FLAG(dir) (1UL<<(dir)) |
|
66 |
|
67 /* special flag for multiple runs from explicit embedding codes */ |
|
68 #define DIRPROP_FLAG_MULTI_RUNS (1UL<<31) |
|
69 |
|
70 /* are there any characters that are LTR or RTL? */ |
|
71 #define MASK_LTR (DIRPROP_FLAG(L)|DIRPROP_FLAG(EN)|DIRPROP_FLAG(AN)|DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) |
|
72 #define MASK_RTL (DIRPROP_FLAG(R)|DIRPROP_FLAG(AL)|DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) |
|
73 |
|
74 /* explicit embedding codes */ |
|
75 #define MASK_LRX (DIRPROP_FLAG(LRE)|DIRPROP_FLAG(LRO)) |
|
76 #define MASK_RLX (DIRPROP_FLAG(RLE)|DIRPROP_FLAG(RLO)) |
|
77 #define MASK_OVERRIDE (DIRPROP_FLAG(LRO)|DIRPROP_FLAG(RLO)) |
|
78 |
|
79 #define MASK_EXPLICIT (MASK_LRX|MASK_RLX|DIRPROP_FLAG(PDF)) |
|
80 #define MASK_BN_EXPLICIT (DIRPROP_FLAG(BN)|MASK_EXPLICIT) |
|
81 |
|
82 /* paragraph and segment separators */ |
|
83 #define MASK_B_S (DIRPROP_FLAG(B)|DIRPROP_FLAG(S)) |
|
84 |
|
85 /* all types that are counted as White Space or Neutral in some steps */ |
|
86 #define MASK_WS (MASK_B_S|DIRPROP_FLAG(WS)|MASK_BN_EXPLICIT) |
|
87 #define MASK_N (DIRPROP_FLAG(ON)|MASK_WS) |
|
88 |
|
89 /* all types that are included in a sequence of European Terminators for (W5) */ |
|
90 #define MASK_ET_NSM_BN (DIRPROP_FLAG(ET)|DIRPROP_FLAG(NSM)|MASK_BN_EXPLICIT) |
|
91 |
|
92 /* types that are neutrals or could becomes neutrals in (Wn) */ |
|
93 #define MASK_POSSIBLE_N (DIRPROP_FLAG(CS)|DIRPROP_FLAG(ES)|DIRPROP_FLAG(ET)|MASK_N) |
|
94 |
|
95 /* |
|
96 * These types may be changed to "e", |
|
97 * the embedding type (L or R) of the run, |
|
98 * in the BiDi algorithm (N2) |
|
99 */ |
|
100 #define MASK_EMBEDDING (DIRPROP_FLAG(NSM)|MASK_POSSIBLE_N) |
|
101 |
|
102 /* the dirProp's L and R are defined to 0 and 1 values in UCharDirection */ |
|
103 #define GET_LR_FROM_LEVEL(level) ((DirProp)((level)&1)) |
|
104 |
|
105 #define IS_DEFAULT_LEVEL(level) ((level)>=0xfe) |
|
106 |
|
107 /* |
|
108 * The following bit is ORed to the property of characters in paragraphs |
|
109 * with contextual RTL direction when paraLevel is contextual. |
|
110 */ |
|
111 #define CONTEXT_RTL 0x80 |
|
112 #define NO_CONTEXT_RTL(dir) ((dir)&~CONTEXT_RTL) |
|
113 /* |
|
114 * The following is a variant of DIRPROP_FLAG which ignores the CONTEXT_RTL bit. |
|
115 */ |
|
116 #define DIRPROP_FLAG_NC(dir) (1UL<<(NO_CONTEXT_RTL(dir))) |
|
117 |
|
118 #define GET_PARALEVEL(ubidi, index) \ |
|
119 (UBiDiLevel)((ubidi)->defaultParaLevel ? (ubidi)->dirProps[index]>>7 \ |
|
120 : (ubidi)->paraLevel) |
|
121 |
|
122 /* Paragraph type for multiple paragraph support --- -------------------- */ |
|
123 typedef int32_t Para; |
|
124 |
|
125 #define CR 0x000D |
|
126 #define LF 0x000A |
|
127 |
|
128 /* Run structure for reordering --------------------------------------------- */ |
|
129 |
|
130 typedef struct Run { |
|
131 int32_t logicalStart, /* first character of the run; b31 indicates even/odd level */ |
|
132 visualLimit; /* last visual position of the run +1 */ |
|
133 } Run; |
|
134 |
|
135 /* in a Run, logicalStart will get this bit set if the run level is odd */ |
|
136 #define INDEX_ODD_BIT (1UL<<31) |
|
137 |
|
138 #define MAKE_INDEX_ODD_PAIR(index, level) (index|((int32_t)level<<31)) |
|
139 #define ADD_ODD_BIT_FROM_LEVEL(x, level) ((x)|=((int32_t)level<<31)) |
|
140 #define REMOVE_ODD_BIT(x) ((x)&=~INDEX_ODD_BIT) |
|
141 |
|
142 #define GET_INDEX(x) (x&~INDEX_ODD_BIT) |
|
143 #define GET_ODD_BIT(x) ((uint32_t)x>>31) |
|
144 #define IS_ODD_RUN(x) ((x&INDEX_ODD_BIT)!=0) |
|
145 #define IS_EVEN_RUN(x) ((x&INDEX_ODD_BIT)==0) |
|
146 |
|
147 U_CFUNC UBool |
|
148 ubidi_getRuns(UBiDi *pBiDi); |
|
149 |
|
150 /* UBiDi structure ----------------------------------------------------------- */ |
|
151 |
|
152 struct UBiDi { |
|
153 /* pointer to parent paragraph object (pointer to self if this object is |
|
154 * a paragraph object); set to NULL in a newly opened object; set to a |
|
155 * real value after a successful execution of ubidi_setPara or ubidi_setLine |
|
156 */ |
|
157 const UBiDi * pParaBiDi; |
|
158 |
|
159 const UBiDiProps *bdp; |
|
160 |
|
161 /* alias pointer to the current text */ |
|
162 const UChar *text; |
|
163 |
|
164 /* length of the current text */ |
|
165 int32_t length; |
|
166 |
|
167 /* memory sizes in bytes */ |
|
168 int32_t dirPropsSize, levelsSize, parasSize, runsSize; |
|
169 |
|
170 /* allocated memory */ |
|
171 DirProp *dirPropsMemory; |
|
172 UBiDiLevel *levelsMemory; |
|
173 Para *parasMemory; |
|
174 Run *runsMemory; |
|
175 |
|
176 /* indicators for whether memory may be allocated after ubidi_open() */ |
|
177 UBool mayAllocateText, mayAllocateRuns; |
|
178 |
|
179 /* arrays with one value per text-character */ |
|
180 const DirProp *dirProps; |
|
181 UBiDiLevel *levels; |
|
182 |
|
183 /* are we performing an approximation of the "inverse BiDi" algorithm? */ |
|
184 UBool isInverse; |
|
185 UBool isInverse2; |
|
186 |
|
187 /* must block separators receive level 0? */ |
|
188 UBool orderParagraphsLTR; |
|
189 |
|
190 /* the paragraph level */ |
|
191 UBiDiLevel paraLevel; |
|
192 /* original paraLevel when contextual */ |
|
193 /* must be one of UBIDI_DEFAULT_xxx or 0 if not contextual */ |
|
194 UBiDiLevel defaultParaLevel; |
|
195 |
|
196 /* the following is set in ubidi_setPara, used in processPropertySeq */ |
|
197 const struct ImpTabPair * pImpTabPair; /* pointer to levels state table pair */ |
|
198 |
|
199 /* the overall paragraph or line directionality - see UBiDiDirection */ |
|
200 UBiDiDirection direction; |
|
201 |
|
202 /* flags is a bit set for which directional properties are in the text */ |
|
203 Flags flags; |
|
204 |
|
205 /* characters after trailingWSStart are WS and are */ |
|
206 /* implicitly at the paraLevel (rule (L1)) - levels may not reflect that */ |
|
207 int32_t trailingWSStart; |
|
208 |
|
209 /* fields for paragraph handling */ |
|
210 int32_t paraCount; /* set in getDirProps() */ |
|
211 Para *paras; /* limits of paragraphs, filled in |
|
212 ResolveExplicitLevels() or CheckExplicitLevels() */ |
|
213 |
|
214 /* for single paragraph text, we only need a tiny array of paras (no malloc()) */ |
|
215 Para simpleParas[1]; |
|
216 |
|
217 /* fields for line reordering */ |
|
218 int32_t runCount; /* ==-1: runs not set up yet */ |
|
219 Run *runs; |
|
220 |
|
221 /* for non-mixed text, we only need a tiny array of runs (no malloc()) */ |
|
222 Run simpleRuns[1]; |
|
223 }; |
|
224 |
|
225 #define IS_VALID_PARA(x) ((x) && ((x)->pParaBiDi==(x))) |
|
226 #define IS_VALID_LINE(x) ((x) && ((x)->pParaBiDi) && ((x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi)) |
|
227 #define IS_VALID_PARA_OR_LINE(x) ((x) && ((x)->pParaBiDi==(x) || (((x)->pParaBiDi) && (x)->pParaBiDi->pParaBiDi==(x)->pParaBiDi))) |
|
228 |
|
229 /* helper function to (re)allocate memory if allowed */ |
|
230 U_CFUNC UBool |
|
231 ubidi_getMemory(void **pMemory, int32_t *pSize, UBool mayAllocate, int32_t sizeNeeded); |
|
232 |
|
233 /* helper macros for each allocated array in UBiDi */ |
|
234 #define getDirPropsMemory(pBiDi, length) \ |
|
235 ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ |
|
236 (pBiDi)->mayAllocateText, (length)) |
|
237 |
|
238 #define getLevelsMemory(pBiDi, length) \ |
|
239 ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ |
|
240 (pBiDi)->mayAllocateText, (length)) |
|
241 |
|
242 #define getRunsMemory(pBiDi, length) \ |
|
243 ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ |
|
244 (pBiDi)->mayAllocateRuns, (length)*sizeof(Run)) |
|
245 |
|
246 /* additional macros used by ubidi_open() - always allow allocation */ |
|
247 #define getInitialDirPropsMemory(pBiDi, length) \ |
|
248 ubidi_getMemory((void **)&(pBiDi)->dirPropsMemory, &(pBiDi)->dirPropsSize, \ |
|
249 TRUE, (length)) |
|
250 |
|
251 #define getInitialLevelsMemory(pBiDi, length) \ |
|
252 ubidi_getMemory((void **)&(pBiDi)->levelsMemory, &(pBiDi)->levelsSize, \ |
|
253 TRUE, (length)) |
|
254 |
|
255 #define getInitialParasMemory(pBiDi, length) \ |
|
256 ubidi_getMemory((void **)&(pBiDi)->parasMemory, &(pBiDi)->parasSize, \ |
|
257 TRUE, (length)*sizeof(Para)) |
|
258 |
|
259 #define getInitialRunsMemory(pBiDi, length) \ |
|
260 ubidi_getMemory((void **)&(pBiDi)->runsMemory, &(pBiDi)->runsSize, \ |
|
261 TRUE, (length)*sizeof(Run)) |
|
262 |
|
263 #endif |
|
264 |
|
265 #endif |