|
1 /* |
|
2 * Copyright (C) 1999-2005, International Business Machines Corporation and others. |
|
3 * All Rights Reserved. |
|
4 ********************************************************************** |
|
5 * Date Name Description |
|
6 * 11/17/99 aliu Creation. |
|
7 ********************************************************************** |
|
8 */ |
|
9 #ifndef UNIFILT_H |
|
10 #define UNIFILT_H |
|
11 |
|
12 #include "unicode/unifunct.h" |
|
13 #include "unicode/unimatch.h" |
|
14 |
|
15 /** |
|
16 * \file |
|
17 * \brief C++ API: Unicode Filter |
|
18 */ |
|
19 |
|
20 U_NAMESPACE_BEGIN |
|
21 |
|
22 /** |
|
23 * U_ETHER is used to represent character values for positions outside |
|
24 * a range. For example, transliterator uses this to represent |
|
25 * characters outside the range contextStart..contextLimit-1. This |
|
26 * allows explicit matching by rules and UnicodeSets of text outside a |
|
27 * defined range. |
|
28 * @draft ICU 3.0 |
|
29 */ |
|
30 #define U_ETHER ((UChar)0xFFFF) |
|
31 |
|
32 /** |
|
33 * |
|
34 * <code>UnicodeFilter</code> defines a protocol for selecting a |
|
35 * subset of the full range (U+0000 to U+10FFFF) of Unicode characters. |
|
36 * Currently, filters are used in conjunction with classes like {@link |
|
37 * Transliterator} to only process selected characters through a |
|
38 * transformation. |
|
39 * |
|
40 * <p>Note: UnicodeFilter currently stubs out two pure virtual methods |
|
41 * of its base class, UnicodeMatcher. These methods are toPattern() |
|
42 * and matchesIndexValue(). This is done so that filter classes that |
|
43 * are not actually used as matchers -- specifically, those in the |
|
44 * UnicodeFilterLogic component, and those in tests -- can continue to |
|
45 * work without defining these methods. As long as a filter is not |
|
46 * used in an RBT during real transliteration, these methods will not |
|
47 * be called. However, this breaks the UnicodeMatcher base class |
|
48 * protocol, and it is not a correct solution. |
|
49 * |
|
50 * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter |
|
51 * hierarchy and either redesign it, or simply remove the stubs in |
|
52 * UnicodeFilter and force subclasses to implement the full |
|
53 * UnicodeMatcher protocol. |
|
54 * |
|
55 * @see UnicodeFilterLogic |
|
56 * @stable ICU 2.0 |
|
57 */ |
|
58 class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher { |
|
59 |
|
60 public: |
|
61 /** |
|
62 * Destructor |
|
63 * @stable ICU 2.0 |
|
64 */ |
|
65 virtual ~UnicodeFilter(); |
|
66 |
|
67 /** |
|
68 * Returns <tt>true</tt> for characters that are in the selected |
|
69 * subset. In other words, if a character is <b>to be |
|
70 * filtered</b>, then <tt>contains()</tt> returns |
|
71 * <b><tt>false</tt></b>. |
|
72 * @stable ICU 2.0 |
|
73 */ |
|
74 virtual UBool contains(UChar32 c) const = 0; |
|
75 |
|
76 /** |
|
77 * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer |
|
78 * and return the pointer. |
|
79 * @stable ICU 2.4 |
|
80 */ |
|
81 virtual UnicodeMatcher* toMatcher() const; |
|
82 |
|
83 /** |
|
84 * Implement UnicodeMatcher API. |
|
85 * @stable ICU 2.4 |
|
86 */ |
|
87 virtual UMatchDegree matches(const Replaceable& text, |
|
88 int32_t& offset, |
|
89 int32_t limit, |
|
90 UBool incremental); |
|
91 |
|
92 /** |
|
93 * UnicodeFunctor API. Nothing to do. |
|
94 * @stable ICU 2.4 |
|
95 */ |
|
96 virtual void setData(const TransliterationRuleData*); |
|
97 |
|
98 /** |
|
99 * ICU "poor man's RTTI", returns a UClassID for the actual class. |
|
100 * |
|
101 * @stable ICU 2.2 |
|
102 */ |
|
103 virtual UClassID getDynamicClassID() const = 0; |
|
104 |
|
105 /** |
|
106 * ICU "poor man's RTTI", returns a UClassID for this class. |
|
107 * |
|
108 * @stable ICU 2.2 |
|
109 */ |
|
110 static UClassID U_EXPORT2 getStaticClassID(); |
|
111 |
|
112 protected: |
|
113 |
|
114 /* |
|
115 * Since this class has pure virtual functions, |
|
116 * a constructor can't be used. |
|
117 * @stable ICU 2.0 |
|
118 */ |
|
119 /* UnicodeFilter();*/ |
|
120 }; |
|
121 |
|
122 /*inline UnicodeFilter::UnicodeFilter() {}*/ |
|
123 |
|
124 U_NAMESPACE_END |
|
125 |
|
126 #endif |