author | Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com> |
Fri, 11 Jun 2010 14:43:47 +0300 | |
changeset 7 | a5fbfefd615f |
permissions | -rw-r--r-- |
7
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
1 |
/** |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
2 |
* Copyright 2002-2004 The Apache Software Foundation |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
3 |
* |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
4 |
* Licensed under the Apache License, Version 2.0 (the "License"); |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
5 |
* you may not use this file except in compliance with the License. |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
6 |
* You may obtain a copy of the License at |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
7 |
* |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
8 |
* http://www.apache.org/licenses/LICENSE-2.0 |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
9 |
* |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
10 |
* Unless required by applicable law or agreed to in writing, software |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
11 |
* distributed under the License is distributed on an "AS IS" BASIS, |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
12 |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
13 |
* See the License for the specific language governing permissions and |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
14 |
* limitations under the License. |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
15 |
*/ |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
16 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
17 |
#ifndef _lucene_search_highlight_simplefragmenter_ |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
18 |
#define _lucene_search_highlight_simplefragmenter_ |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
19 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
20 |
#if defined(_LUCENE_PRAGMA_ONCE) |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
21 |
# pragma once |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
22 |
#endif |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
23 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
24 |
#include "CLucene/analysis/AnalysisHeader.h" |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
25 |
#include "CLucene/highlighter/Fragmenter.h" |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
26 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
27 |
CL_NS_DEF2(search,highlight) |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
28 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
29 |
/** |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
30 |
* {@link Fragmenter} implementation which breaks text up into same-size |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
31 |
* fragments with no concerns over spotting sentence boundaries. |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
32 |
*/ |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
33 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
34 |
class SimpleFragmenter:public Fragmenter |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
35 |
{ |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
36 |
private: |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
37 |
LUCENE_STATIC_CONSTANT(int32_t, DEFAULT_FRAGMENT_SIZE =100 ); |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
38 |
int32_t _currentNumFrags; |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
39 |
int32_t _fragmentSize; |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
40 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
41 |
public: |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
42 |
/** |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
43 |
* |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
44 |
* @param fragmentSize size in bytes of each fragment |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
45 |
*/ |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
46 |
SimpleFragmenter(int32_t fragmentSize = DEFAULT_FRAGMENT_SIZE); |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
47 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
48 |
~SimpleFragmenter(); |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
49 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
50 |
/* (non-Javadoc) |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
51 |
* @see org.apache.lucene.search.highlight.TextFragmenter#start(const TCHAR*) |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
52 |
*/ |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
53 |
void start(const TCHAR* originalText); |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
54 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
55 |
/* (non-Javadoc) |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
56 |
* @see org.apache.lucene.search.highlight.TextFragmenter#isNewFragment(org.apache.lucene.analysis.Token) |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
57 |
*/ |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
58 |
bool isNewFragment(const CL_NS(analysis)::Token * token); |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
59 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
60 |
/** |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
61 |
* @return size in bytes of each fragment |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
62 |
*/ |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
63 |
int32_t getFragmentSize() const; |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
64 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
65 |
/** |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
66 |
* @param size size in bytes of each fragment |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
67 |
*/ |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
68 |
void setFragmentSize(int32_t size); |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
69 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
70 |
}; |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
71 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
72 |
CL_NS_END2 |
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
73 |
|
a5fbfefd615f
Revision: 201021
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
74 |
#endif |