searchengine/cpix/tsrc/cpixunittest/data/cpixunittest/analysis/whitebox/localeSwitchAnalyzer_exp_out.txt
author hgs
Fri, 15 Oct 2010 12:09:28 +0530
changeset 24 65456528cac2
parent 8 6547bf8ca13a
permissions -rw-r--r--
201041
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
8
hgs
parents:
diff changeset
     1
locale=en:
hgs
parents:
diff changeset
     2
Analyzer "
hgs
parents:
diff changeset
     3
locale_switch {
hgs
parents:
diff changeset
     4
    case 'en':       stdtokens>stdfilter>lowercase>stop(en);
hgs
parents:
diff changeset
     5
    case 'th':       stdtokens>stdfilter>lowercase>thai>stop(en);
hgs
parents:
diff changeset
     6
    case 'ca':       stdtokens>stdfilter>lowercase>accent;
hgs
parents:
diff changeset
     7
    default:         stdtokens>stdfilter>lowercase;
hgs
parents:
diff changeset
     8
}":
hgs
parents:
diff changeset
     9
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
hgs
parents:
diff changeset
    10
 'i' 'am' 'happy'
hgs
parents:
diff changeset
    11
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
hgs
parents:
diff changeset
    12
 'oh' 'happiness'
hgs
parents:
diff changeset
    13
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
hgs
parents:
diff changeset
    14
 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
hgs
parents:
diff changeset
    15
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents:
diff changeset
    16
 'what' 'happening' 'here'
hgs
parents:
diff changeset
    17
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
hgs
parents:
diff changeset
    18
 'juon' 'nyt' 'teetä'
hgs
parents:
diff changeset
    19
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
hgs
parents:
diff changeset
    20
 'tee' 'näin'
hgs
parents:
diff changeset
    21
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
24
hgs
parents: 8
diff changeset
    22
 'ปรากฏการณ์ฝนดาวตก' '7' '-18พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า' '17' '-18' 'พฤศจิกายน' '2552'
8
hgs
parents:
diff changeset
    23
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents:
diff changeset
    24
 'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี' '2541' '-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิกายน' '2552'
hgs
parents:
diff changeset
    25
hgs
parents:
diff changeset
    26
hgs
parents:
diff changeset
    27
locale=th:
hgs
parents:
diff changeset
    28
Analyzer "
hgs
parents:
diff changeset
    29
locale_switch {
hgs
parents:
diff changeset
    30
    case 'en':       stdtokens>stdfilter>lowercase>stop(en);
hgs
parents:
diff changeset
    31
    case 'th':       stdtokens>stdfilter>lowercase>thai>stop(en);
hgs
parents:
diff changeset
    32
    case 'ca':       stdtokens>stdfilter>lowercase>accent;
hgs
parents:
diff changeset
    33
    default:         stdtokens>stdfilter>lowercase;
hgs
parents:
diff changeset
    34
}":
hgs
parents:
diff changeset
    35
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
hgs
parents:
diff changeset
    36
 'i' 'am' 'happy'
hgs
parents:
diff changeset
    37
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
hgs
parents:
diff changeset
    38
 'oh' 'happiness'
hgs
parents:
diff changeset
    39
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
hgs
parents:
diff changeset
    40
 'nothing' 'important' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
hgs
parents:
diff changeset
    41
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents:
diff changeset
    42
 'what' 'happening' 'here'
hgs
parents:
diff changeset
    43
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
hgs
parents:
diff changeset
    44
 'juon' 'nyt' 'teetä'
hgs
parents:
diff changeset
    45
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
hgs
parents:
diff changeset
    46
 'tee' 'näin'
hgs
parents:
diff changeset
    47
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
24
hgs
parents: 8
diff changeset
    48
 'ปรากฏการณ์' 'ฝน' 'ดาวตก' '7' '-18พ' 'ยนี' '้' 'นาย' 'วรวิท' 'ย์' 'ตัน' 'วุฒิ' 'บัณฑิต' 'ปราชญ์' 'ภูมิปัญญา' 'ท้อง' 'ถิ่น' 'ด้าน' 'ดาราศาสตร์' 'ไทย' 'กล่าว' 'ว่า' '17' '-18' 'พฤศจิกายน' '2552'
8
hgs
parents:
diff changeset
    49
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents:
diff changeset
    50
 'จะ' 'มี' 'ปรากฏการณ์' 'ดาราศาสตร์' 'ครั้ง' 'สำคัญ' 'ที่' 'ชาว' 'ไทย' 'เคย' 'ประทับ' 'ใจมา' 'แล้ว' 'เมื่อ' 'ปี' '2541' '-2544' 'คือ' 'ใน' 'คืน' 'วัน' 'ที่' '17' 'ต่อ' 'เนื่อง' 'วัน' 'ที่' '18' 'พฤศจิกายน' '2552'
hgs
parents:
diff changeset
    51
hgs
parents:
diff changeset
    52
hgs
parents:
diff changeset
    53
locale=ca:
hgs
parents:
diff changeset
    54
Analyzer "
hgs
parents:
diff changeset
    55
locale_switch {
hgs
parents:
diff changeset
    56
    case 'en':       stdtokens>stdfilter>lowercase>stop(en);
hgs
parents:
diff changeset
    57
    case 'th':       stdtokens>stdfilter>lowercase>thai>stop(en);
hgs
parents:
diff changeset
    58
    case 'ca':       stdtokens>stdfilter>lowercase>accent;
hgs
parents:
diff changeset
    59
    default:         stdtokens>stdfilter>lowercase;
hgs
parents:
diff changeset
    60
}":
hgs
parents:
diff changeset
    61
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
hgs
parents:
diff changeset
    62
 'i' 'am' 'happy'
hgs
parents:
diff changeset
    63
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
hgs
parents:
diff changeset
    64
 'oh' 'happiness'
hgs
parents:
diff changeset
    65
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
hgs
parents:
diff changeset
    66
 'nothing' 'important' 'in' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
hgs
parents:
diff changeset
    67
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents:
diff changeset
    68
 'what' 'is' 'happening' 'here'
hgs
parents:
diff changeset
    69
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
hgs
parents:
diff changeset
    70
 'juon' 'nyt' 'teeta'
hgs
parents:
diff changeset
    71
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
hgs
parents:
diff changeset
    72
 'tee' 'nain'
hgs
parents:
diff changeset
    73
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
24
hgs
parents: 8
diff changeset
    74
 'ปรากฏการณ์ฝนดาวตก' '7' '-18พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า' '17' '-18' 'พฤศจิกายน' '2552'
8
hgs
parents:
diff changeset
    75
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents:
diff changeset
    76
 'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี' '2541' '-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิกายน' '2552'
hgs
parents:
diff changeset
    77
hgs
parents:
diff changeset
    78
hgs
parents:
diff changeset
    79
default locale:
hgs
parents:
diff changeset
    80
Analyzer "
hgs
parents:
diff changeset
    81
locale_switch {
hgs
parents:
diff changeset
    82
    case 'en':       stdtokens>stdfilter>lowercase>stop(en);
hgs
parents:
diff changeset
    83
    case 'th':       stdtokens>stdfilter>lowercase>thai>stop(en);
hgs
parents:
diff changeset
    84
    case 'ca':       stdtokens>stdfilter>lowercase>accent;
hgs
parents:
diff changeset
    85
    default:         stdtokens>stdfilter>lowercase;
hgs
parents:
diff changeset
    86
}":
hgs
parents:
diff changeset
    87
File !:\data\cpixunittestcorpus\stem\en\1.txt tokenized:
hgs
parents:
diff changeset
    88
 'i' 'am' 'happy'
hgs
parents:
diff changeset
    89
File !:\data\cpixunittestcorpus\stem\en\2.txt tokenized:
hgs
parents:
diff changeset
    90
 'oh' 'happiness'
hgs
parents:
diff changeset
    91
File !:\data\cpixunittestcorpus\stem\en\3.txt tokenized:
hgs
parents:
diff changeset
    92
 'nothing' 'important' 'in' 'here' 'so' 'don't' 'even' 'look' 'because' 'you' 'shall' 'find' 'nothing' 'whatsoever'
hgs
parents:
diff changeset
    93
File !:\data\cpixunittestcorpus\stem\en\4.txt tokenized:
hgs
parents:
diff changeset
    94
 'what' 'is' 'happening' 'here'
hgs
parents:
diff changeset
    95
File !:\data\cpixunittestcorpus\stem\fi\1.txt tokenized:
hgs
parents:
diff changeset
    96
 'juon' 'nyt' 'teetä'
hgs
parents:
diff changeset
    97
File !:\data\cpixunittestcorpus\stem\fi\2.txt tokenized:
hgs
parents:
diff changeset
    98
 'tee' 'näin'
hgs
parents:
diff changeset
    99
File !:\data\cpixunittestcorpus\loc\th\1.txt tokenized:
24
hgs
parents: 8
diff changeset
   100
 'ปรากฏการณ์ฝนดาวตก' '7' '-18พ' 'ยนี้' 'นายวรวิทย์' 'ตันวุฒิบัณฑิต' 'ปราชญ์ภูมิปัญญาท้องถิ่นด้านดาราศาสตร์ไทยกล่าวว่า' '17' '-18' 'พฤศจิกายน' '2552'
8
hgs
parents:
diff changeset
   101
File !:\data\cpixunittestcorpus\loc\th\2.txt tokenized:
hgs
parents:
diff changeset
   102
 'จะมีปรากฏการณ์ดาราศาสตร์ครั้งสำคัญที่ชาวไทยเคยประทับใจมาแล้วเมื่อปี' '2541' '-2544' 'คือในคืนวันที่' '17' 'ต่อเนื่องวันที่' '18' 'พฤศจิกายน' '2552'
hgs
parents:
diff changeset
   103