author | hgs |
Fri, 15 Oct 2010 12:09:28 +0530 | |
changeset 24 | 65456528cac2 |
parent 8 | 6547bf8ca13a |
permissions | -rw-r--r-- |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
1 |
Analyzer "stdtokens": |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
2 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
3 |
'I' 'am' 'happy' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
4 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
5 |
'Oh' 'happiness' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
6 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
7 |
'Nothing' 'important' 'in' 'here' 'So' 'don't' 'even' 'look' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever' |
8 | 8 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
9 |
'What' 'is' 'happening' 'here' |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
10 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
11 |
'Juon' 'nyt' 'teetä' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
12 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
13 |
'Tee' 'näin' |
8 | 14 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
24 | 15 |
'ปรากฏการณ์ฝนดาวตก' '7' '-18พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า' '17' '-18' 'พฤศจิกายน' '2552' |
8 | 16 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
17 |
'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี' '2541' '-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิกายน' '2552' |
|
18 |
||
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
19 |
Analyzer "whitespace": |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
20 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
21 |
'I' 'am' 'happy.' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
22 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
23 |
'Oh' 'happiness!' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
24 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
25 |
'Nothing' 'important' 'in' 'here.' 'So' 'don't' 'even' 'look.' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever.' |
8 | 26 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
27 |
'What' 'is' 'happening' 'here?' |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
28 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
29 |
'Juon' 'nyt' 'teetä.' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
30 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
31 |
'Tee' 'näin!' |
8 | 32 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
33 |
'ปรากฏการณ์ฝนดาวตก17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า' '17-18' 'พฤศจิกายน' '2552' |
|
34 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
35 |
'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี' '2541-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิกายน' '2552' |
|
36 |
||
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
37 |
Analyzer "whitespace>lowercase": |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
38 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
39 |
'i' 'am' 'happy.' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
40 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
41 |
'oh' 'happiness!' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
42 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
43 |
'nothing' 'important' 'in' 'here.' 'so' 'don't' 'even' 'look.' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever.' |
8 | 44 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
45 |
'what' 'is' 'happening' 'here?' |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
46 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
47 |
'juon' 'nyt' 'teetä.' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
48 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
49 |
'tee' 'näin!' |
8 | 50 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
51 |
'ปรากฏการณ์ฝนดาวตก17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า' '17-18' 'พฤศจิกายน' '2552' |
|
52 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
53 |
'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี' '2541-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิกายน' '2552' |
|
54 |
||
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
55 |
Analyzer "whitespace>accent": |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
56 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
57 |
'I' 'am' 'happy.' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
58 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
59 |
'Oh' 'happiness!' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
60 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
61 |
'Nothing' 'important' 'in' 'here.' 'So' 'don't' 'even' 'look.' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever.' |
8 | 62 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
63 |
'What' 'is' 'happening' 'here?' |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
64 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
65 |
'Juon' 'nyt' 'teeta.' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
66 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
67 |
'Tee' 'nain!' |
8 | 68 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
69 |
'ปรากฏการณ์ฝนดาวตก17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า' '17-18' 'พฤศจิกายน' '2552' |
|
70 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
71 |
'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี' '2541-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิกายน' '2552' |
|
72 |
||
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
73 |
Analyzer "letter": |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
74 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
75 |
'I' 'am' 'happy' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
76 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
77 |
'Oh' 'happiness' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
78 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
79 |
'Nothing' 'important' 'in' 'here' 'So' 'don' 't' 'even' 'look' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever' |
8 | 80 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
81 |
'What' 'is' 'happening' 'here' |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
82 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
83 |
'Juon' 'nyt' 'teetä' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
84 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
85 |
'Tee' 'näin' |
8 | 86 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
87 |
'ปรากฏการณ' 'ฝนดาวตก' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' 'พฤศจ' 'กายน' |
|
88 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
89 |
'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'กายน' |
|
90 |
||
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
91 |
Analyzer "letter>lowercase": |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
92 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
93 |
'i' 'am' 'happy' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
94 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
95 |
'oh' 'happiness' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
96 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
97 |
'nothing' 'important' 'in' 'here' 'so' 'don' 't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' |
8 | 98 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
99 |
'what' 'is' 'happening' 'here' |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
100 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
101 |
'juon' 'nyt' 'teetä' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
102 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
103 |
'tee' 'näin' |
8 | 104 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
105 |
'ปรากฏการณ' 'ฝนดาวตก' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' 'พฤศจ' 'กายน' |
|
106 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
107 |
'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'กายน' |
|
108 |
||
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
109 |
Analyzer "keyword": |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
110 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
111 |
'I am happy. |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
112 |
' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
113 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
114 |
'Oh happiness! |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
115 |
' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
116 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
117 |
' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
118 |
Nothing |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
119 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
120 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
121 |
important in |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
122 |
here. |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
123 |
So don't even look. Because |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
124 |
you shall find |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
125 |
nothing |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
126 |
whatsoever. |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
127 |
' |
8 | 128 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
129 |
'What is happening here? |
|
130 |
||
131 |
' |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
132 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
133 |
'Juon nyt teetä.' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
134 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
135 |
'Tee näin! ' |
8 | 136 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
137 |
'ปรากฏการณ์ฝนดาวตก17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า 17-18 พฤศจิกายน 2552 |
|
138 |
' |
|
139 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
140 |
'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี 2541-2544 คือในคืนวันที่ 17 ต่อเนื่องวันที่ 18 พฤศจิกายน 2552 |
|
141 |
' |
|
142 |
||
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
143 |
Analyzer "keyword>lowercase": |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
144 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
145 |
'i am happy. |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
146 |
' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
147 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
148 |
'oh happiness! |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
149 |
' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
150 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
151 |
' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
152 |
nothing |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
153 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
154 |
|
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
155 |
important in |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
156 |
here. |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
157 |
so don't even look. because |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
158 |
you shall find |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
159 |
nothing |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
160 |
whatsoever. |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
161 |
' |
8 | 162 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
163 |
'what is happening here? |
|
164 |
||
165 |
' |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
166 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
167 |
'juon nyt teetä.' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
168 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
169 |
'tee näin! ' |
8 | 170 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
171 |
'ปรากฏการณ์ฝนดาวตก17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า 17-18 พฤศจิกายน 2552 |
|
172 |
' |
|
173 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
174 |
'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี 2541-2544 คือในคืนวันที่ 17 ต่อเนื่องวันที่ 18 พฤศจิกายน 2552 |
|
175 |
' |
|
176 |
||
177 |
Analyzer "letter>lowercase>stop(en)": |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
178 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
179 |
'i' 'am' 'happy' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
180 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
181 |
'oh' 'happiness' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
182 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
183 |
'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' |
8 | 184 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
185 |
'what' 'happening' 'here' |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
186 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
8 | 187 |
'juon' 'nyt' 'teetä' |
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
188 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
8 | 189 |
'tee' 'näin' |
190 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
|
191 |
'ปรากฏการณ' 'ฝนดาวตก' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' 'พฤศจ' 'กายน' |
|
192 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
193 |
'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'กายน' |
|
194 |
||
195 |
Analyzer "letter>lowercase>stop('i', 'oh', 'nyt', 'n�in')": |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
196 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
197 |
'am' 'happy' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
198 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
199 |
'happiness' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
200 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
201 |
'nothing' 'important' 'in' 'here' 'so' 'don' 't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' |
8 | 202 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
203 |
'what' 'is' 'happening' 'here' |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
204 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
205 |
'juon' 'teetä' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
206 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
8 | 207 |
'tee' 'näin' |
208 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
|
209 |
'ปรากฏการณ' 'ฝนดาวตก' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' 'พฤศจ' 'กายน' |
|
210 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
211 |
'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'กายน' |
|
212 |
||
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
213 |
Analyzer "letter>length(2, 4)": |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
214 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
215 |
'am' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
216 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
217 |
'Oh' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
218 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
219 |
'in' 'here' 'So' 'don' 'even' 'look' 'you' 'find' |
8 | 220 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
221 |
'What' 'is' 'here' |
|
0
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
222 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
223 |
'Juon' 'nyt' |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
224 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
671dee74050a
Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff
changeset
|
225 |
'Tee' 'näin' |
8 | 226 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
227 |
'ยน' 'ทย' 'นว' 'ณฑ' 'ญญาท' 'องถ' 'นด' 'าวว' 'พฤศจ' 'กายน' |
|
228 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
229 |
'จะม' 'คร' 'งสำค' 'ญท' 'วเม' 'อป' 'อในค' 'นว' 'นท' 'อเน' 'องว' 'นท' 'พฤศจ' 'กายน' |
|
230 |
||
231 |
Analyzer "standard>prefixes(1)": |
|
232 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
|
233 |
'i' 'a' 'h' |
|
234 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
|
235 |
'o' 'h' |
|
236 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
|
237 |
'n' 'i' 'h' 's' 'd' 'e' 'l' 'b' 'y' 's' 'f' 'n' 'w' |
|
238 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
|
239 |
'w' 'h' 'h' |
|
240 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
|
241 |
'j' 'n' 't' |
|
242 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
|
243 |
't' 'n' |
|
244 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
|
24 | 245 |
'ป' '7' '-' 'ย' 'น' 'ต' 'ป' '1' '-' 'พ' '2' |
8 | 246 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
247 |
'จ' '2' '-' 'ค' '1' 'ต' '1' 'พ' '2' |
|
248 |
||
249 |
Analyzer "standard>prefixes(2)": |
|
250 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
|
251 |
'i' 'am'|'a' 'ha'|'h' |
|
252 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
|
253 |
'oh'|'o' 'ha'|'h' |
|
254 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
|
255 |
'no'|'n' 'im'|'i' 'he'|'h' 'so'|'s' 'do'|'d' 'ev'|'e' 'lo'|'l' 'be'|'b' 'yo'|'y' 'sh'|'s' 'fi'|'f' 'no'|'n' 'wh'|'w' |
|
256 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
|
257 |
'wh'|'w' 'ha'|'h' 'he'|'h' |
|
258 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
|
259 |
'ju'|'j' 'ny'|'n' 'te'|'t' |
|
260 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
|
261 |
'te'|'t' 'nä'|'n' |
|
262 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
|
24 | 263 |
'ปร'|'ป' '7' '-1'|'-' 'ยน'|'ย' 'นา'|'น' 'ตั'|'ต' 'ปร'|'ป' '17'|'1' '-1'|'-' 'พฤ'|'พ' '25'|'2' |
8 | 264 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
265 |
'จะ'|'จ' '25'|'2' '-2'|'-' 'คื'|'ค' '17'|'1' 'ต่'|'ต' '18'|'1' 'พฤ'|'พ' '25'|'2' |
|
266 |
||
267 |
Analyzer "standard>prefixes(3)": |
|
268 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
|
269 |
'i' 'am'|'a' 'hap'|'ha'|'h' |
|
270 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
|
271 |
'oh'|'o' 'hap'|'ha'|'h' |
|
272 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
|
273 |
'not'|'no'|'n' 'imp'|'im'|'i' 'her'|'he'|'h' 'so'|'s' 'don'|'do'|'d' 'eve'|'ev'|'e' 'loo'|'lo'|'l' 'bec'|'be'|'b' 'you'|'yo'|'y' 'sha'|'sh'|'s' 'fin'|'fi'|'f' 'not'|'no'|'n' 'wha'|'wh'|'w' |
|
274 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
|
275 |
'wha'|'wh'|'w' 'hap'|'ha'|'h' 'her'|'he'|'h' |
|
276 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
|
277 |
'juo'|'ju'|'j' 'nyt'|'ny'|'n' 'tee'|'te'|'t' |
|
278 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
|
279 |
'tee'|'te'|'t' 'näi'|'nä'|'n' |
|
280 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
|
24 | 281 |
'ปรา'|'ปร'|'ป' '7' '-18'|'-1'|'-' 'ยนี'|'ยน'|'ย' 'นาย'|'นา'|'น' 'ตัน'|'ตั'|'ต' 'ปรา'|'ปร'|'ป' '17'|'1' '-18'|'-1'|'-' 'พฤศ'|'พฤ'|'พ' '255'|'25'|'2' |
8 | 282 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
283 |
'จะม'|'จะ'|'จ' '254'|'25'|'2' '-25'|'-2'|'-' 'คือ'|'คื'|'ค' '17'|'1' 'ต่อ'|'ต่'|'ต' '18'|'1' 'พฤศ'|'พฤ'|'พ' '255'|'25'|'2' |
|
284 |
||
285 |
Analyzer "stdtokens>stdfilter>lowercase>thai>stop(en)": |
|
286 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
|
287 |
'i' 'am' 'happy' |
|
288 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
|
289 |
'oh' 'happiness' |
|
290 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
|
291 |
'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' |
|
292 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
|
293 |
'what' 'happening' 'here' |
|
294 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
|
295 |
'juon' 'nyt' 'teetä' |
|
296 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
|
297 |
'tee' 'näin' |
|
298 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
|
24 | 299 |
'ปรากฏการณ์' 'ฝน' 'ดาวตก' '7' '-18พ' 'ยนี' '้' 'นาย' 'วรวิท' 'ย์' 'ตัน' 'วุฒิ' 'บัณฑิต' 'ปราชญ์' 'ภูมิปัญญา' 'ท้อง' 'ถิ่น' 'ด้าน' 'ดาราศาสตร์' 'ไทย' 'กล่าว' 'ว่า' '17' '-18' 'พฤศจิกายน' '2552' |
8 | 300 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
301 |
'จะ' 'มี' 'ปรากฏการณ์' 'ดาราศาสตร์' 'ครั้ง' 'สำคัญ' 'ที่' 'ชาว' 'ไทย' 'เคย' 'ประทับ' 'ใจมา' 'แล้ว' 'เมื่อ' 'ปี' '2541' '-2544' 'คือ' 'ใน' 'คืน' 'วัน' 'ที่' '17' 'ต่อ' 'เนื่อง' 'วัน' 'ที่' '18' 'พฤศจิกายน' '2552' |
|
302 |
||
303 |
Analyzer "cjk>stop(en)": |
|
304 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
|
305 |
'i' 'am' 'happy' |
|
306 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
|
307 |
'oh' 'happiness' |
|
308 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
|
309 |
'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' |
|
310 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
|
311 |
'what' 'happening' 'here' |
|
312 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
|
313 |
'juon' 'nyt' 'teetä' |
|
314 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
|
315 |
'tee' 'näin' |
|
316 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
|
317 |
'ปร' 'รา' 'าก' 'กฏ' 'ฏก' 'กา' 'าร' 'รณ' 'ฝน' 'นด' 'ดา' 'าว' 'วต' 'ตก' '17' '18' 'พ' 'ยน' 'นา' 'าย' 'ยว' 'วร' 'รว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปร' 'รา' 'าช' 'ชญ' 'ภ' 'ม' 'ป' 'ญญ' 'ญา' 'าท' 'อง' 'งถ' 'นด' 'าน' 'นด' 'ดา' 'าร' 'รา' 'าศ' 'ศา' 'าส' 'สต' 'ตร' 'ไท' 'ทย' 'ยก' 'กล' 'าว' 'วว' 'า' '17' '18' 'พฤ' 'ฤศ' 'ศจ' 'กา' 'าย' 'ยน' '2552' |
|
318 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
319 |
'จะ' 'ะม' 'ปร' 'รา' 'าก' 'กฏ' 'ฏก' 'กา' 'าร' 'รณ' 'ดา' 'าร' 'รา' 'าศ' 'ศา' 'าส' 'สต' 'ตร' 'คร' 'งส' 'สำ' 'ำค' 'ญท' 'ชา' 'าว' 'วไ' 'ไท' 'ทย' 'ยเ' 'เค' 'คย' 'ยป' 'ปร' 'ระ' 'ะท' 'บใ' 'ใจ' 'จม' 'มา' 'าแ' 'แล' 'วเ' 'เม' 'อป' '2541' '2544' 'ค' 'อใ' 'ใน' 'นค' 'นว' 'นท' '17' 'ต' 'อเ' 'เน' 'อง' 'งว' 'นท' '18' 'พฤ' 'ฤศ' 'ศจ' 'กา' 'าย' 'ยน' '2552' |
|
320 |
||
321 |
Analyzer "ngram(1)>lowercase>stop(en)": |
|
322 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
|
323 |
'i' 'am' 'happy' |
|
324 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
|
325 |
'oh' 'happiness' |
|
326 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
|
327 |
'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' |
|
328 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
|
329 |
'what' 'happening' 'here' |
|
330 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
|
331 |
'juon' 'nyt' 'teetä' |
|
332 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
|
333 |
'tee' 'näin' |
|
334 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
|
335 |
'ปรากฏการณ' 'ฝนดาวตก17' '18พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' '17' '18' 'พฤศจ' 'กายน' '2552' |
|
336 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
337 |
'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' '2541' '2544' 'ค' 'อในค' 'นว' 'นท' '17' 'ต' 'อเน' 'องว' 'นท' '18' 'พฤศจ' 'กายน' '2552' |
|
338 |
||
339 |
Analyzer "ngram(2)>lowercase>stop(en)": |
|
340 |
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized: |
|
341 |
'i' 'am' 'happy' |
|
342 |
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized: |
|
343 |
'oh' 'happiness' |
|
344 |
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized: |
|
345 |
'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever' |
|
346 |
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized: |
|
347 |
'what' 'happening' 'here' |
|
348 |
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized: |
|
349 |
'juon' 'nyt' 'teetä' |
|
350 |
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized: |
|
351 |
'tee' 'näin' |
|
352 |
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized: |
|
353 |
'ปรากฏการณ' 'ฝนดาวตก17' '18พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' '17' '18' 'พฤศจ' 'กายน' '2552' |
|
354 |
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized: |
|
355 |
'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' '2541' '2544' 'ค' 'อในค' 'นว' 'นท' '17' 'ต' 'อเน' 'องว' 'นท' '18' 'พฤศจ' 'กายน' '2552' |
|
356 |