searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/analyzer_exp_out.txt
author hgs
Fri, 15 Oct 2010 12:09:28 +0530
changeset 24 65456528cac2
parent 8 6547bf8ca13a
permissions -rw-r--r--
201041
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     1
Analyzer "stdtokens":
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     2
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     3
 'I' 'am' 'happy'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     4
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     5
 'Oh' 'happiness'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     6
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
     7
 'Nothing' 'important' 'in' 'here' 'So' 'don't' 'even' 'look' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
8
hgs
parents: 0
diff changeset
     8
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
     9
 'What' 'is' 'happening' 'here'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    10
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    11
 'Juon' 'nyt' 'teetä'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    12
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    13
 'Tee' 'näin'
8
hgs
parents: 0
diff changeset
    14
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
24
hgs
parents: 8
diff changeset
    15
 'ปรากฏการณ์ฝนดาวตก' '7' '-18พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า' '17' '-18' 'พฤศจิกายน' '2552'
8
hgs
parents: 0
diff changeset
    16
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
    17
 'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี' '2541' '-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิกายน' '2552'
hgs
parents: 0
diff changeset
    18
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    19
Analyzer "whitespace":
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    20
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    21
 'I' 'am' 'happy.'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    22
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    23
 'Oh' 'happiness!'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    24
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    25
 'Nothing' 'important' 'in' 'here.' 'So' 'don't' 'even' 'look.' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever.'
8
hgs
parents: 0
diff changeset
    26
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
    27
 'What' 'is' 'happening' 'here?'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    28
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    29
 'Juon' 'nyt' 'teetä.'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    30
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    31
 'Tee' 'näin!'
8
hgs
parents: 0
diff changeset
    32
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
    33
 'ปรากฏการณ์ฝนดาวตก17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า' '17-18' 'พฤศจิกายน' '2552'
hgs
parents: 0
diff changeset
    34
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
    35
 'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี' '2541-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิกายน' '2552'
hgs
parents: 0
diff changeset
    36
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    37
Analyzer "whitespace>lowercase":
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    38
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    39
 'i' 'am' 'happy.'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    40
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    41
 'oh' 'happiness!'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    42
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    43
 'nothing' 'important' 'in' 'here.' 'so' 'don't' 'even' 'look.' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever.'
8
hgs
parents: 0
diff changeset
    44
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
    45
 'what' 'is' 'happening' 'here?'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    46
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    47
 'juon' 'nyt' 'teetä.'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    48
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    49
 'tee' 'näin!'
8
hgs
parents: 0
diff changeset
    50
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
    51
 'ปรากฏการณ์ฝนดาวตก17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า' '17-18' 'พฤศจิกายน' '2552'
hgs
parents: 0
diff changeset
    52
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
    53
 'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี' '2541-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิกายน' '2552'
hgs
parents: 0
diff changeset
    54
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    55
Analyzer "whitespace>accent":
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    56
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    57
 'I' 'am' 'happy.'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    58
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    59
 'Oh' 'happiness!'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    60
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    61
 'Nothing' 'important' 'in' 'here.' 'So' 'don't' 'even' 'look.' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever.'
8
hgs
parents: 0
diff changeset
    62
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
    63
 'What' 'is' 'happening' 'here?'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    64
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    65
 'Juon' 'nyt' 'teeta.'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    66
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    67
 'Tee' 'nain!'
8
hgs
parents: 0
diff changeset
    68
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
    69
 'ปรากฏการณ์ฝนดาวตก17-18พ.ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า' '17-18' 'พฤศจิกายน' '2552'
hgs
parents: 0
diff changeset
    70
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
    71
 'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี' '2541-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิกายน' '2552'
hgs
parents: 0
diff changeset
    72
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    73
Analyzer "letter":
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    74
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    75
 'I' 'am' 'happy'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    76
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    77
 'Oh' 'happiness'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    78
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    79
 'Nothing' 'important' 'in' 'here' 'So' 'don' 't' 'even' 'look' 'Because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
8
hgs
parents: 0
diff changeset
    80
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
    81
 'What' 'is' 'happening' 'here'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    82
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    83
 'Juon' 'nyt' 'teetä'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    84
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    85
 'Tee' 'näin'
8
hgs
parents: 0
diff changeset
    86
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
    87
 'ปรากฏการณ' 'ฝนดาวตก' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' 'พฤศจ' 'กายน'
hgs
parents: 0
diff changeset
    88
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
    89
 'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'กายน'
hgs
parents: 0
diff changeset
    90
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    91
Analyzer "letter>lowercase":
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    92
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    93
 'i' 'am' 'happy'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    94
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    95
 'oh' 'happiness'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    96
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
    97
 'nothing' 'important' 'in' 'here' 'so' 'don' 't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
8
hgs
parents: 0
diff changeset
    98
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
    99
 'what' 'is' 'happening' 'here'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   100
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   101
 'juon' 'nyt' 'teetä'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   102
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   103
 'tee' 'näin'
8
hgs
parents: 0
diff changeset
   104
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
   105
 'ปรากฏการณ' 'ฝนดาวตก' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' 'พฤศจ' 'กายน'
hgs
parents: 0
diff changeset
   106
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   107
 'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'กายน'
hgs
parents: 0
diff changeset
   108
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   109
Analyzer "keyword":
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   110
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   111
 'I am happy.
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   112
'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   113
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   114
 'Oh happiness!
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   115
'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   116
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   117
 '
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   118
Nothing 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   119
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   120
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   121
important    in
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   122
  here.
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   123
So  don't     even look.  Because
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   124
              you shall         find
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   125
nothing
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   126
whatsoever.
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   127
'
8
hgs
parents: 0
diff changeset
   128
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   129
 'What is happening here?
hgs
parents: 0
diff changeset
   130
hgs
parents: 0
diff changeset
   131
'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   132
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   133
 'Juon nyt teetä.'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   134
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   135
 'Tee näin! '
8
hgs
parents: 0
diff changeset
   136
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
   137
 'ปรากฏการณ์ฝนดาวตก17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า 17-18 พฤศจิกายน 2552
hgs
parents: 0
diff changeset
   138
'
hgs
parents: 0
diff changeset
   139
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   140
 'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี 2541-2544 คือในคืนวันที่ 17 ต่อเนื่องวันที่ 18 พฤศจิกายน 2552
hgs
parents: 0
diff changeset
   141
'
hgs
parents: 0
diff changeset
   142
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   143
Analyzer "keyword>lowercase":
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   144
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   145
 'i am happy.
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   146
'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   147
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   148
 'oh happiness!
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   149
'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   150
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   151
 '
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   152
nothing 
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   153
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   154
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   155
important    in
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   156
  here.
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   157
so  don't     even look.  because
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   158
              you shall         find
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   159
nothing
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   160
whatsoever.
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   161
'
8
hgs
parents: 0
diff changeset
   162
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   163
 'what is happening here?
hgs
parents: 0
diff changeset
   164
hgs
parents: 0
diff changeset
   165
'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   166
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   167
 'juon nyt teetä.'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   168
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   169
 'tee näin! '
8
hgs
parents: 0
diff changeset
   170
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
   171
 'ปรากฏการณ์ฝนดาวตก17-18พ.ยนี้ นายวรวิทย์ ตันวุฒิบัณฑิต ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า 17-18 พฤศจิกายน 2552
hgs
parents: 0
diff changeset
   172
'
hgs
parents: 0
diff changeset
   173
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   174
 'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี 2541-2544 คือในคืนวันที่ 17 ต่อเนื่องวันที่ 18 พฤศจิกายน 2552
hgs
parents: 0
diff changeset
   175
'
hgs
parents: 0
diff changeset
   176
hgs
parents: 0
diff changeset
   177
Analyzer "letter>lowercase>stop(en)":
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   178
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   179
 'i' 'am' 'happy'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   180
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   181
 'oh' 'happiness'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   182
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   183
 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
8
hgs
parents: 0
diff changeset
   184
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   185
 'what' 'happening' 'here'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   186
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
8
hgs
parents: 0
diff changeset
   187
 'juon' 'nyt' 'teetä'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   188
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
8
hgs
parents: 0
diff changeset
   189
 'tee' 'näin'
hgs
parents: 0
diff changeset
   190
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
   191
 'ปรากฏการณ' 'ฝนดาวตก' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' 'พฤศจ' 'กายน'
hgs
parents: 0
diff changeset
   192
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   193
 'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'กายน'
hgs
parents: 0
diff changeset
   194
hgs
parents: 0
diff changeset
   195
Analyzer "letter>lowercase>stop('i', 'oh', 'nyt', 'n�in')":
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   196
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   197
 'am' 'happy'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   198
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   199
 'happiness'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   200
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   201
 'nothing' 'important' 'in' 'here' 'so' 'don' 't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
8
hgs
parents: 0
diff changeset
   202
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   203
 'what' 'is' 'happening' 'here'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   204
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   205
 'juon' 'teetä'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   206
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
8
hgs
parents: 0
diff changeset
   207
 'tee' 'näin'
hgs
parents: 0
diff changeset
   208
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
   209
 'ปรากฏการณ' 'ฝนดาวตก' 'พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' 'พฤศจ' 'กายน'
hgs
parents: 0
diff changeset
   210
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   211
 'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' 'ค' 'อในค' 'นว' 'นท' 'ต' 'อเน' 'องว' 'นท' 'พฤศจ' 'กายน'
hgs
parents: 0
diff changeset
   212
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   213
Analyzer "letter>length(2, 4)":
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   214
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   215
 'am'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   216
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   217
 'Oh'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   218
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   219
 'in' 'here' 'So' 'don' 'even' 'look' 'you' 'find'
8
hgs
parents: 0
diff changeset
   220
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   221
 'What' 'is' 'here'
0
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   222
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   223
 'Juon' 'nyt'
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   224
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
671dee74050a Revision: 201011
Dremov Kirill (Nokia-D-MSW/Tampere) <kirill.dremov@nokia.com>
parents:
diff changeset
   225
 'Tee' 'näin'
8
hgs
parents: 0
diff changeset
   226
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
   227
 'ยน' 'ทย' 'นว' 'ณฑ' 'ญญาท' 'องถ' 'นด' 'าวว' 'พฤศจ' 'กายน'
hgs
parents: 0
diff changeset
   228
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   229
 'จะม' 'คร' 'งสำค' 'ญท' 'วเม' 'อป' 'อในค' 'นว' 'นท' 'อเน' 'องว' 'นท' 'พฤศจ' 'กายน'
hgs
parents: 0
diff changeset
   230
hgs
parents: 0
diff changeset
   231
Analyzer "standard>prefixes(1)":
hgs
parents: 0
diff changeset
   232
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
hgs
parents: 0
diff changeset
   233
 'i' 'a' 'h'
hgs
parents: 0
diff changeset
   234
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
hgs
parents: 0
diff changeset
   235
 'o' 'h'
hgs
parents: 0
diff changeset
   236
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
hgs
parents: 0
diff changeset
   237
 'n' 'i' 'h' 's' 'd' 'e' 'l' 'b' 'y' 's' 'f' 'n' 'w'
hgs
parents: 0
diff changeset
   238
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   239
 'w' 'h' 'h'
hgs
parents: 0
diff changeset
   240
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
hgs
parents: 0
diff changeset
   241
 'j' 'n' 't'
hgs
parents: 0
diff changeset
   242
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
hgs
parents: 0
diff changeset
   243
 't' 'n'
hgs
parents: 0
diff changeset
   244
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
24
hgs
parents: 8
diff changeset
   245
 'ป' '7' '-' 'ย' 'น' 'ต' 'ป' '1' '-' 'พ' '2'
8
hgs
parents: 0
diff changeset
   246
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   247
 'จ' '2' '-' 'ค' '1' 'ต' '1' 'พ' '2'
hgs
parents: 0
diff changeset
   248
hgs
parents: 0
diff changeset
   249
Analyzer "standard>prefixes(2)":
hgs
parents: 0
diff changeset
   250
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
hgs
parents: 0
diff changeset
   251
 'i' 'am'|'a' 'ha'|'h'
hgs
parents: 0
diff changeset
   252
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
hgs
parents: 0
diff changeset
   253
 'oh'|'o' 'ha'|'h'
hgs
parents: 0
diff changeset
   254
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
hgs
parents: 0
diff changeset
   255
 'no'|'n' 'im'|'i' 'he'|'h' 'so'|'s' 'do'|'d' 'ev'|'e' 'lo'|'l' 'be'|'b' 'yo'|'y' 'sh'|'s' 'fi'|'f' 'no'|'n' 'wh'|'w'
hgs
parents: 0
diff changeset
   256
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   257
 'wh'|'w' 'ha'|'h' 'he'|'h'
hgs
parents: 0
diff changeset
   258
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
hgs
parents: 0
diff changeset
   259
 'ju'|'j' 'ny'|'n' 'te'|'t'
hgs
parents: 0
diff changeset
   260
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
hgs
parents: 0
diff changeset
   261
 'te'|'t' 'nä'|'n'
hgs
parents: 0
diff changeset
   262
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
24
hgs
parents: 8
diff changeset
   263
 'ปร'|'ป' '7' '-1'|'-' 'ยน'|'ย' 'นา'|'น' 'ตั'|'ต' 'ปร'|'ป' '17'|'1' '-1'|'-' 'พฤ'|'พ' '25'|'2'
8
hgs
parents: 0
diff changeset
   264
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   265
 'จะ'|'จ' '25'|'2' '-2'|'-' 'คื'|'ค' '17'|'1' 'ต่'|'ต' '18'|'1' 'พฤ'|'พ' '25'|'2'
hgs
parents: 0
diff changeset
   266
hgs
parents: 0
diff changeset
   267
Analyzer "standard>prefixes(3)":
hgs
parents: 0
diff changeset
   268
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
hgs
parents: 0
diff changeset
   269
 'i' 'am'|'a' 'hap'|'ha'|'h'
hgs
parents: 0
diff changeset
   270
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
hgs
parents: 0
diff changeset
   271
 'oh'|'o' 'hap'|'ha'|'h'
hgs
parents: 0
diff changeset
   272
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
hgs
parents: 0
diff changeset
   273
 'not'|'no'|'n' 'imp'|'im'|'i' 'her'|'he'|'h' 'so'|'s' 'don'|'do'|'d' 'eve'|'ev'|'e' 'loo'|'lo'|'l' 'bec'|'be'|'b' 'you'|'yo'|'y' 'sha'|'sh'|'s' 'fin'|'fi'|'f' 'not'|'no'|'n' 'wha'|'wh'|'w'
hgs
parents: 0
diff changeset
   274
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   275
 'wha'|'wh'|'w' 'hap'|'ha'|'h' 'her'|'he'|'h'
hgs
parents: 0
diff changeset
   276
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
hgs
parents: 0
diff changeset
   277
 'juo'|'ju'|'j' 'nyt'|'ny'|'n' 'tee'|'te'|'t'
hgs
parents: 0
diff changeset
   278
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
hgs
parents: 0
diff changeset
   279
 'tee'|'te'|'t' 'näi'|'nä'|'n'
hgs
parents: 0
diff changeset
   280
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
24
hgs
parents: 8
diff changeset
   281
 'ปรา'|'ปร'|'ป' '7' '-18'|'-1'|'-' 'ยนี'|'ยน'|'ย' 'นาย'|'นา'|'น' 'ตัน'|'ตั'|'ต' 'ปรา'|'ปร'|'ป' '17'|'1' '-18'|'-1'|'-' 'พฤศ'|'พฤ'|'พ' '255'|'25'|'2'
8
hgs
parents: 0
diff changeset
   282
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   283
 'จะม'|'จะ'|'จ' '254'|'25'|'2' '-25'|'-2'|'-' 'คือ'|'คื'|'ค' '17'|'1' 'ต่อ'|'ต่'|'ต' '18'|'1' 'พฤศ'|'พฤ'|'พ' '255'|'25'|'2'
hgs
parents: 0
diff changeset
   284
hgs
parents: 0
diff changeset
   285
Analyzer "stdtokens>stdfilter>lowercase>thai>stop(en)":
hgs
parents: 0
diff changeset
   286
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
hgs
parents: 0
diff changeset
   287
 'i' 'am' 'happy'
hgs
parents: 0
diff changeset
   288
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
hgs
parents: 0
diff changeset
   289
 'oh' 'happiness'
hgs
parents: 0
diff changeset
   290
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
hgs
parents: 0
diff changeset
   291
 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
hgs
parents: 0
diff changeset
   292
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   293
 'what' 'happening' 'here'
hgs
parents: 0
diff changeset
   294
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
hgs
parents: 0
diff changeset
   295
 'juon' 'nyt' 'teetä'
hgs
parents: 0
diff changeset
   296
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
hgs
parents: 0
diff changeset
   297
 'tee' 'näin'
hgs
parents: 0
diff changeset
   298
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
24
hgs
parents: 8
diff changeset
   299
 'ปรากฏการณ์' 'ฝน' 'ดาวตก' '7' '-18พ' 'ยนี' '้' 'นาย' 'วรวิท' 'ย์' 'ตัน' 'วุฒิ' 'บัณฑิต' 'ปราชญ์' 'ภูมิปัญญา' 'ท้อง' 'ถิ่น' 'ด้าน' 'ดาราศาสตร์' 'ไทย' 'กล่าว' 'ว่า' '17' '-18' 'พฤศจิกายน' '2552'
8
hgs
parents: 0
diff changeset
   300
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   301
 'จะ' 'มี' 'ปรากฏการณ์' 'ดาราศาสตร์' 'ครั้ง' 'สำคัญ' 'ที่' 'ชาว' 'ไทย' 'เคย' 'ประทับ' 'ใจมา' 'แล้ว' 'เมื่อ' 'ปี' '2541' '-2544' 'คือ' 'ใน' 'คืน' 'วัน' 'ที่' '17' 'ต่อ' 'เนื่อง' 'วัน' 'ที่' '18' 'พฤศจิกายน' '2552'
hgs
parents: 0
diff changeset
   302
hgs
parents: 0
diff changeset
   303
Analyzer "cjk>stop(en)":
hgs
parents: 0
diff changeset
   304
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
hgs
parents: 0
diff changeset
   305
 'i' 'am' 'happy'
hgs
parents: 0
diff changeset
   306
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
hgs
parents: 0
diff changeset
   307
 'oh' 'happiness'
hgs
parents: 0
diff changeset
   308
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
hgs
parents: 0
diff changeset
   309
 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
hgs
parents: 0
diff changeset
   310
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   311
 'what' 'happening' 'here'
hgs
parents: 0
diff changeset
   312
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
hgs
parents: 0
diff changeset
   313
 'juon' 'nyt' 'teetä'
hgs
parents: 0
diff changeset
   314
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
hgs
parents: 0
diff changeset
   315
 'tee' 'näin'
hgs
parents: 0
diff changeset
   316
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
   317
 'ปร' 'รา' 'าก' 'กฏ' 'ฏก' 'กา' 'าร' 'รณ' 'ฝน' 'นด' 'ดา' 'าว' 'วต' 'ตก' '17' '18' 'พ' 'ยน' 'นา' 'าย' 'ยว' 'วร' 'รว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปร' 'รา' 'าช' 'ชญ' 'ภ' 'ม' 'ป' 'ญญ' 'ญา' 'าท' 'อง' 'งถ' 'นด' 'าน' 'นด' 'ดา' 'าร' 'รา' 'าศ' 'ศา' 'าส' 'สต' 'ตร' 'ไท' 'ทย' 'ยก' 'กล' 'าว' 'วว' 'า' '17' '18' 'พฤ' 'ฤศ' 'ศจ' 'กา' 'าย' 'ยน' '2552'
hgs
parents: 0
diff changeset
   318
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   319
 'จะ' 'ะม' 'ปร' 'รา' 'าก' 'กฏ' 'ฏก' 'กา' 'าร' 'รณ' 'ดา' 'าร' 'รา' 'าศ' 'ศา' 'าส' 'สต' 'ตร' 'คร' 'งส' 'สำ' 'ำค' 'ญท' 'ชา' 'าว' 'วไ' 'ไท' 'ทย' 'ยเ' 'เค' 'คย' 'ยป' 'ปร' 'ระ' 'ะท' 'บใ' 'ใจ' 'จม' 'มา' 'าแ' 'แล' 'วเ' 'เม' 'อป' '2541' '2544' 'ค' 'อใ' 'ใน' 'นค' 'นว' 'นท' '17' 'ต' 'อเ' 'เน' 'อง' 'งว' 'นท' '18' 'พฤ' 'ฤศ' 'ศจ' 'กา' 'าย' 'ยน' '2552'
hgs
parents: 0
diff changeset
   320
hgs
parents: 0
diff changeset
   321
Analyzer "ngram(1)>lowercase>stop(en)":
hgs
parents: 0
diff changeset
   322
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
hgs
parents: 0
diff changeset
   323
 'i' 'am' 'happy'
hgs
parents: 0
diff changeset
   324
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
hgs
parents: 0
diff changeset
   325
 'oh' 'happiness'
hgs
parents: 0
diff changeset
   326
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
hgs
parents: 0
diff changeset
   327
 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
hgs
parents: 0
diff changeset
   328
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   329
 'what' 'happening' 'here'
hgs
parents: 0
diff changeset
   330
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
hgs
parents: 0
diff changeset
   331
 'juon' 'nyt' 'teetä'
hgs
parents: 0
diff changeset
   332
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
hgs
parents: 0
diff changeset
   333
 'tee' 'näin'
hgs
parents: 0
diff changeset
   334
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
   335
 'ปรากฏการณ' 'ฝนดาวตก17' '18พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' '17' '18' 'พฤศจ' 'กายน' '2552'
hgs
parents: 0
diff changeset
   336
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   337
 'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' '2541' '2544' 'ค' 'อในค' 'นว' 'นท' '17' 'ต' 'อเน' 'องว' 'นท' '18' 'พฤศจ' 'กายน' '2552'
hgs
parents: 0
diff changeset
   338
hgs
parents: 0
diff changeset
   339
Analyzer "ngram(2)>lowercase>stop(en)":
hgs
parents: 0
diff changeset
   340
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
hgs
parents: 0
diff changeset
   341
 'i' 'am' 'happy'
hgs
parents: 0
diff changeset
   342
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
hgs
parents: 0
diff changeset
   343
 'oh' 'happiness'
hgs
parents: 0
diff changeset
   344
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
hgs
parents: 0
diff changeset
   345
 'nothing' 'important' 'here' 'so' 'don' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
hgs
parents: 0
diff changeset
   346
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents: 0
diff changeset
   347
 'what' 'happening' 'here'
hgs
parents: 0
diff changeset
   348
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
hgs
parents: 0
diff changeset
   349
 'juon' 'nyt' 'teetä'
hgs
parents: 0
diff changeset
   350
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
hgs
parents: 0
diff changeset
   351
 'tee' 'näin'
hgs
parents: 0
diff changeset
   352
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
hgs
parents: 0
diff changeset
   353
 'ปรากฏการณ' 'ฝนดาวตก17' '18พ' 'ยน' 'นายวรว' 'ทย' 'ต' 'นว' 'ฒ' 'บ' 'ณฑ' 'ต' 'ปราชญ' 'ภ' 'ม' 'ป' 'ญญาท' 'องถ' 'นด' 'านดาราศาสตร' 'ไทยกล' 'าวว' 'า' '17' '18' 'พฤศจ' 'กายน' '2552'
hgs
parents: 0
diff changeset
   354
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents: 0
diff changeset
   355
 'จะม' 'ปรากฏการณ' 'ดาราศาสตร' 'คร' 'งสำค' 'ญท' 'ชาวไทยเคยประท' 'บใจมาแล' 'วเม' 'อป' '2541' '2544' 'ค' 'อในค' 'นว' 'นท' '17' 'ต' 'อเน' 'องว' 'นท' '18' 'พฤศจ' 'กายน' '2552'
hgs
parents: 0
diff changeset
   356