diff -r 6ceef9a83b1a -r 6defe5d1bd39 textinput/ptihangulcore/src/hanja.c --- a/textinput/ptihangulcore/src/hanja.c Mon Mar 15 12:42:02 2010 +0200 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,603 +0,0 @@ -/* libhangul - * Copyright (c) 2005,2006 Choe Hwanjin - * All rights reserved. - * - * This library is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with this library; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#ifdef HAVE_CONFIG_H -#include -#endif - -#include -#include -#include - -#ifdef HAVE_MMAP -#include -#endif - -#include -#include -#include -#include - -#include "hangul.h" -#include "hangulinternals.h" - -#ifndef TRUE -#define TRUE 1 -#endif - -#ifndef FALSE -#define FALSE 0 -#endif - -typedef struct _HanjaIndex HanjaIndex; - -typedef struct _HanjaPair HanjaPair; -typedef struct _HanjaPairArray HanjaPairArray; - -struct _Hanja { - uint32_t key_offset; - uint32_t value_offset; - uint32_t comment_offset; -}; - -struct _HanjaList { - char* key; - size_t len; - size_t alloc; - const Hanja** items; -}; - -struct _HanjaIndex { - unsigned offset; - char key[8]; -}; - -struct _HanjaTable { - HanjaIndex* keytable; - unsigned nkeys; - unsigned key_size; - FILE* file; -}; - -struct _HanjaPair { - ucschar first; - ucschar second; -}; - -struct _HanjaPairArray { - ucschar key; - const HanjaPair* pairs; -}; - -#include "hanjacompatible.h" - -static const char utf8_skip_table[256] = { - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, - 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, - 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1 -}; - -static -#ifndef __SYMBIAN32__ -inline -#endif -int utf8_char_len(const char *p) -{ - return utf8_skip_table[*(const unsigned char*)p]; -} - -static -#ifndef __SYMBIAN32__ -inline -#endif -const char* utf8_next(const char *str) -{ - int n = utf8_char_len(str); - - while (n > 0) { - str++; - if (*str == '\0') - return str; - n--; - } - - return str; -} - -static -#ifndef __SYMBIAN32__ -inline -#endif -char* utf8_prev(const char *str, const char *p) -{ - for (--p; p >= str; --p) { - if ((*p & 0xc0) != 0x80) - break; - } - return (char*)p; -} - -/* hanja searching functions */ -static Hanja * -hanja_new(const char *key, const char *value, const char *comment) -{ - Hanja* hanja; - size_t size; - size_t keylen; - size_t valuelen; - size_t commentlen; - char* p; - - keylen = strlen(key) + 1; - valuelen = strlen(value) + 1; - if (comment != NULL) - commentlen = strlen(comment) + 1; - else - commentlen = 1; - - size = sizeof(*hanja) + keylen + valuelen + commentlen; - hanja = malloc(size); - if (hanja == NULL) - return NULL; - - p = (char*)hanja + sizeof(*hanja); - strcpy(p, key); - p += keylen; - strcpy(p, value); - p += valuelen; - if (comment != NULL) - strcpy(p, comment); - else - *p = '\0'; - p += valuelen; - - hanja->key_offset = sizeof(*hanja); - hanja->value_offset = sizeof(*hanja) + keylen; - hanja->comment_offset = sizeof(*hanja) + keylen + valuelen; - - return hanja; -} - -static void -hanja_delete(Hanja* hanja) -{ - free(hanja); -} - -const char* -hanja_get_key(const Hanja* hanja) -{ - if (hanja != NULL) { - const char* p = (const char*)hanja; - return p + hanja->key_offset; - } - return NULL; -} - -const char* -hanja_get_value(const Hanja* hanja) -{ - if (hanja != NULL) { - const char* p = (const char*)hanja; - return p + hanja->value_offset; - } - return NULL; -} - -const char* -hanja_get_comment(const Hanja* hanja) -{ - if (hanja != NULL) { - const char* p = (const char*)hanja; - return p + hanja->comment_offset; - } - return NULL; -} - -static HanjaList * -hanja_list_new(const char *key) -{ - HanjaList *list; - - list = malloc(sizeof(*list)); - if (list != NULL) { - list->key = strdup(key); - list->len = 0; - list->alloc = 1; - list->items = malloc(list->alloc * sizeof(list->items[0])); - if (list->items == NULL) { - free(list); - list = NULL; - } - } - - return list; -} - -static void -hanja_list_reserve(HanjaList* list, size_t n) -{ - size_t size = list->alloc; - - if (n > SIZE_MAX / sizeof(list->items[0]) - list->len) - return; - - while (size < list->len + n) - size *= 2; - - if (size > SIZE_MAX / sizeof(list->items[0])) - return; - - if (list->alloc < list->len + n) { - const Hanja** data; - - data = realloc(list->items, size * sizeof(list->items[0])); - if (data != NULL) { - list->alloc = size; - list->items = data; - } - } -} - -static void -hanja_list_append_n(HanjaList* list, const Hanja* hanja, int n) -{ - hanja_list_reserve(list, n); - - if (list->alloc >= list->len + n) { - unsigned int i; - for (i = 0; i < n ; i++) - list->items[list->len + i] = hanja + i; - list->len += n; - } -} - -static void -hanja_table_match(const HanjaTable* table, - const char* key, HanjaList** list) -{ - int low, high, mid = 0; - int res = -1; - - low = 0; - high = table->nkeys - 1; - - while (low < high) { - mid = (low + high) / 2; - res = strncmp(table->keytable[mid].key, key, table->key_size); - if (res < 0) { - low = mid + 1; - } else if (res > 0) { - high = mid - 1; - } else { - break; - } - } - - if (res != 0) { - mid = low; - res = strncmp(table->keytable[mid].key, key, table->key_size); - } - - if (res == 0) { - unsigned offset; - char buf[512]; - - offset = table->keytable[mid].offset; - fseek(table->file, offset, SEEK_SET); - - while (fgets(buf, sizeof(buf), table->file) != NULL) { - char* save = NULL; - char* p = strtok_r(buf, ":", &save); - res = strcmp(p, key); - if (res == 0) { - char* value = strtok_r(NULL, ":", &save); - char* comment = strtok_r(NULL, "\r\n", &save); - - Hanja* hanja = hanja_new(p, value, comment); - - if (*list == NULL) { - *list = hanja_list_new(key); - } - - hanja_list_append_n(*list, hanja, 1); - } else if (res > 0) { - break; - } - } - } -} - -HanjaTable* -hanja_table_load(const char* filename) -{ - unsigned nkeys; - char buf[512]; - int key_size = 5; - char last_key[8] = { '\0', }; - char* save_ptr = NULL; - char* key; - long offset; - unsigned i; - FILE* file; - HanjaIndex* keytable; - HanjaTable* table; - - if (filename == NULL) - filename = LIBHANGUL_DEFAULT_HANJA_DIC; - - file = fopen(filename, "r"); - if (file == NULL) { - return NULL; - } - - nkeys = 0; - while (fgets(buf, sizeof(buf), file) != NULL) { - /* skip comments and empty lines */ - if (buf[0] == '#' || buf[0] == '\r' || buf[0] == '\n' || buf[0] == '\0') - continue; - - save_ptr = NULL; - key = strtok_r(buf, ":", &save_ptr); - - if (key == NULL || strlen(key) == 0) - continue; - - if (strncmp(last_key, key, key_size) != 0) { - nkeys++; - strncpy(last_key, key, key_size); - } - } - - rewind(file); - keytable = malloc(nkeys * sizeof(keytable[0])); - memset(keytable, 0, nkeys * sizeof(keytable[0])); - - i = 0; - offset = ftell(file); - while (fgets(buf, sizeof(buf), file) != NULL) { - /* skip comments and empty lines */ - if (buf[0] == '#' || buf[0] == '\r' || buf[0] == '\n' || buf[0] == '\0') - continue; - - save_ptr = NULL; - key = strtok_r(buf, ":", &save_ptr); - - if (key == NULL || strlen(key) == 0) - continue; - - if (strncmp(last_key, key, key_size) != 0) { - keytable[i].offset = offset; - strncpy(keytable[i].key, key, key_size); - strncpy(last_key, key, key_size); - i++; - } - offset = ftell(file); - } - - table = malloc(sizeof(*table)); - if (table == NULL) { - free(keytable); - fclose(file); - return NULL; - } - - table->keytable = keytable; - table->nkeys = nkeys; - table->key_size = key_size; - table->file = file; - - return table; -} - -void -hanja_table_delete(HanjaTable *table) -{ - if (table != NULL) { - free(table->keytable); - fclose(table->file); - free(table); - } -} - -HanjaList* -hanja_table_match_exact(const HanjaTable* table, const char *key) -{ - HanjaList* ret = NULL; - - if (key == NULL || key[0] == '\0' || table == NULL) - return NULL; - - hanja_table_match(table, key, &ret); - - return ret; -} - -HanjaList* -hanja_table_match_prefix(const HanjaTable* table, const char *key) -{ - char* p; - char* newkey; - HanjaList* ret = NULL; - - if (key == NULL || key[0] == '\0' || table == NULL) - return NULL; - - newkey = strdup(key); - if (newkey == NULL) - return NULL; - - p = strchr(newkey, '\0'); - while (newkey[0] != '\0') { - hanja_table_match(table, newkey, &ret); - p = utf8_prev(newkey, p); - p[0] = '\0'; - } - free(newkey); - - return ret; -} - -HanjaList* -hanja_table_match_suffix(const HanjaTable* table, const char *key) -{ - const char* p; - HanjaList* ret = NULL; - - if (key == NULL || key[0] == '\0' || table == NULL) - return NULL; - - p = key; - while (p[0] != '\0') { - hanja_table_match(table, p, &ret); - p = utf8_next(p); - } - - return ret; -} - -int -hanja_list_get_size(const HanjaList *list) -{ - if (list != NULL) - return list->len; - return 0; -} - -const char* -hanja_list_get_key(const HanjaList *list) -{ - if (list != NULL) - return list->key; - return NULL; -} - -const Hanja* -hanja_list_get_nth(const HanjaList *list, unsigned int n) -{ - if (list != NULL) { - if (n < list->len) - return list->items[n]; - } - return NULL; -} - -const char* -hanja_list_get_nth_key(const HanjaList *list, unsigned int n) -{ - const Hanja* hanja = hanja_list_get_nth(list, n); - return hanja_get_key(hanja); -} - -const char* -hanja_list_get_nth_value(const HanjaList *list, unsigned int n) -{ - const Hanja* hanja = hanja_list_get_nth(list, n); - return hanja_get_value(hanja); -} - -const char* -hanja_list_get_nth_comment(const HanjaList *list, unsigned int n) -{ - const Hanja* hanja = hanja_list_get_nth(list, n); - return hanja_get_comment(hanja); -} - -void -hanja_list_delete(HanjaList *list) -{ - if (list) { - size_t i; - for (i = 0; i < list->len; i++) { - hanja_delete((Hanja*)list->items[i]); - } - free(list->items); - free(list->key); - free(list); - } -} - -static int -compare_pair(const void* a, const void* b) -{ - const ucschar* c = a; - const HanjaPair* y = b; - - return *c - y->first; -} - -size_t -hanja_compatibility_form(ucschar* hanja, const ucschar* hangul, size_t n) -{ - size_t i; - size_t nconverted; - - if (hangul == NULL || hanja == NULL) - return 0; - - nconverted = 0; - for (i = 0; i < n && hangul[i] != 0 && hanja[i] != 0; i++) { - HanjaPairArray* p; - - p = bsearch(&hanja[i], - hanja_unified_to_compat_table, - N_ELEMENTS(hanja_unified_to_compat_table), - sizeof(hanja_unified_to_compat_table[0]), - compare_pair); - if (p != NULL) { - const HanjaPair* pair = p->pairs; - while (pair->first != 0) { - if (pair->first == hangul[i]) { - hanja[i] = pair->second; - nconverted++; - break; - } - pair++; - } - } - } - - return nconverted; -} - -size_t -hanja_unified_form(ucschar* str, size_t n) -{ - size_t i; - size_t nconverted; - - if (str == NULL) - return 0; - - nconverted = 0; - for (i = 0; i < n && str[i] != 0; i++) { - if (str[i] >= 0xF900 && str[i] <= 0xFA0B) { - str[i] = hanja_compat_to_unified_table[str[i] - 0xF900]; - nconverted++; - } - } - - return nconverted; -} -