--- a/textinput/ptihangulcore/src/hanja.c Mon Mar 15 12:42:02 2010 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,603 +0,0 @@
-/* libhangul
- * Copyright (c) 2005,2006 Choe Hwanjin
- * All rights reserved.
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#ifdef HAVE_CONFIG_H
-#include <config.h>
-#endif
-
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <unistd.h>
-
-#ifdef HAVE_MMAP
-#include <sys/mman.h>
-#endif
-
-#include <limits.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "hangul.h"
-#include "hangulinternals.h"
-
-#ifndef TRUE
-#define TRUE 1
-#endif
-
-#ifndef FALSE
-#define FALSE 0
-#endif
-
-typedef struct _HanjaIndex HanjaIndex;
-
-typedef struct _HanjaPair HanjaPair;
-typedef struct _HanjaPairArray HanjaPairArray;
-
-struct _Hanja {
- uint32_t key_offset;
- uint32_t value_offset;
- uint32_t comment_offset;
-};
-
-struct _HanjaList {
- char* key;
- size_t len;
- size_t alloc;
- const Hanja** items;
-};
-
-struct _HanjaIndex {
- unsigned offset;
- char key[8];
-};
-
-struct _HanjaTable {
- HanjaIndex* keytable;
- unsigned nkeys;
- unsigned key_size;
- FILE* file;
-};
-
-struct _HanjaPair {
- ucschar first;
- ucschar second;
-};
-
-struct _HanjaPairArray {
- ucschar key;
- const HanjaPair* pairs;
-};
-
-#include "hanjacompatible.h"
-
-static const char utf8_skip_table[256] = {
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
- 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
-};
-
-static
-#ifndef __SYMBIAN32__
-inline
-#endif
-int utf8_char_len(const char *p)
-{
- return utf8_skip_table[*(const unsigned char*)p];
-}
-
-static
-#ifndef __SYMBIAN32__
-inline
-#endif
-const char* utf8_next(const char *str)
-{
- int n = utf8_char_len(str);
-
- while (n > 0) {
- str++;
- if (*str == '\0')
- return str;
- n--;
- }
-
- return str;
-}
-
-static
-#ifndef __SYMBIAN32__
-inline
-#endif
-char* utf8_prev(const char *str, const char *p)
-{
- for (--p; p >= str; --p) {
- if ((*p & 0xc0) != 0x80)
- break;
- }
- return (char*)p;
-}
-
-/* hanja searching functions */
-static Hanja *
-hanja_new(const char *key, const char *value, const char *comment)
-{
- Hanja* hanja;
- size_t size;
- size_t keylen;
- size_t valuelen;
- size_t commentlen;
- char* p;
-
- keylen = strlen(key) + 1;
- valuelen = strlen(value) + 1;
- if (comment != NULL)
- commentlen = strlen(comment) + 1;
- else
- commentlen = 1;
-
- size = sizeof(*hanja) + keylen + valuelen + commentlen;
- hanja = malloc(size);
- if (hanja == NULL)
- return NULL;
-
- p = (char*)hanja + sizeof(*hanja);
- strcpy(p, key);
- p += keylen;
- strcpy(p, value);
- p += valuelen;
- if (comment != NULL)
- strcpy(p, comment);
- else
- *p = '\0';
- p += valuelen;
-
- hanja->key_offset = sizeof(*hanja);
- hanja->value_offset = sizeof(*hanja) + keylen;
- hanja->comment_offset = sizeof(*hanja) + keylen + valuelen;
-
- return hanja;
-}
-
-static void
-hanja_delete(Hanja* hanja)
-{
- free(hanja);
-}
-
-const char*
-hanja_get_key(const Hanja* hanja)
-{
- if (hanja != NULL) {
- const char* p = (const char*)hanja;
- return p + hanja->key_offset;
- }
- return NULL;
-}
-
-const char*
-hanja_get_value(const Hanja* hanja)
-{
- if (hanja != NULL) {
- const char* p = (const char*)hanja;
- return p + hanja->value_offset;
- }
- return NULL;
-}
-
-const char*
-hanja_get_comment(const Hanja* hanja)
-{
- if (hanja != NULL) {
- const char* p = (const char*)hanja;
- return p + hanja->comment_offset;
- }
- return NULL;
-}
-
-static HanjaList *
-hanja_list_new(const char *key)
-{
- HanjaList *list;
-
- list = malloc(sizeof(*list));
- if (list != NULL) {
- list->key = strdup(key);
- list->len = 0;
- list->alloc = 1;
- list->items = malloc(list->alloc * sizeof(list->items[0]));
- if (list->items == NULL) {
- free(list);
- list = NULL;
- }
- }
-
- return list;
-}
-
-static void
-hanja_list_reserve(HanjaList* list, size_t n)
-{
- size_t size = list->alloc;
-
- if (n > SIZE_MAX / sizeof(list->items[0]) - list->len)
- return;
-
- while (size < list->len + n)
- size *= 2;
-
- if (size > SIZE_MAX / sizeof(list->items[0]))
- return;
-
- if (list->alloc < list->len + n) {
- const Hanja** data;
-
- data = realloc(list->items, size * sizeof(list->items[0]));
- if (data != NULL) {
- list->alloc = size;
- list->items = data;
- }
- }
-}
-
-static void
-hanja_list_append_n(HanjaList* list, const Hanja* hanja, int n)
-{
- hanja_list_reserve(list, n);
-
- if (list->alloc >= list->len + n) {
- unsigned int i;
- for (i = 0; i < n ; i++)
- list->items[list->len + i] = hanja + i;
- list->len += n;
- }
-}
-
-static void
-hanja_table_match(const HanjaTable* table,
- const char* key, HanjaList** list)
-{
- int low, high, mid = 0;
- int res = -1;
-
- low = 0;
- high = table->nkeys - 1;
-
- while (low < high) {
- mid = (low + high) / 2;
- res = strncmp(table->keytable[mid].key, key, table->key_size);
- if (res < 0) {
- low = mid + 1;
- } else if (res > 0) {
- high = mid - 1;
- } else {
- break;
- }
- }
-
- if (res != 0) {
- mid = low;
- res = strncmp(table->keytable[mid].key, key, table->key_size);
- }
-
- if (res == 0) {
- unsigned offset;
- char buf[512];
-
- offset = table->keytable[mid].offset;
- fseek(table->file, offset, SEEK_SET);
-
- while (fgets(buf, sizeof(buf), table->file) != NULL) {
- char* save = NULL;
- char* p = strtok_r(buf, ":", &save);
- res = strcmp(p, key);
- if (res == 0) {
- char* value = strtok_r(NULL, ":", &save);
- char* comment = strtok_r(NULL, "\r\n", &save);
-
- Hanja* hanja = hanja_new(p, value, comment);
-
- if (*list == NULL) {
- *list = hanja_list_new(key);
- }
-
- hanja_list_append_n(*list, hanja, 1);
- } else if (res > 0) {
- break;
- }
- }
- }
-}
-
-HanjaTable*
-hanja_table_load(const char* filename)
-{
- unsigned nkeys;
- char buf[512];
- int key_size = 5;
- char last_key[8] = { '\0', };
- char* save_ptr = NULL;
- char* key;
- long offset;
- unsigned i;
- FILE* file;
- HanjaIndex* keytable;
- HanjaTable* table;
-
- if (filename == NULL)
- filename = LIBHANGUL_DEFAULT_HANJA_DIC;
-
- file = fopen(filename, "r");
- if (file == NULL) {
- return NULL;
- }
-
- nkeys = 0;
- while (fgets(buf, sizeof(buf), file) != NULL) {
- /* skip comments and empty lines */
- if (buf[0] == '#' || buf[0] == '\r' || buf[0] == '\n' || buf[0] == '\0')
- continue;
-
- save_ptr = NULL;
- key = strtok_r(buf, ":", &save_ptr);
-
- if (key == NULL || strlen(key) == 0)
- continue;
-
- if (strncmp(last_key, key, key_size) != 0) {
- nkeys++;
- strncpy(last_key, key, key_size);
- }
- }
-
- rewind(file);
- keytable = malloc(nkeys * sizeof(keytable[0]));
- memset(keytable, 0, nkeys * sizeof(keytable[0]));
-
- i = 0;
- offset = ftell(file);
- while (fgets(buf, sizeof(buf), file) != NULL) {
- /* skip comments and empty lines */
- if (buf[0] == '#' || buf[0] == '\r' || buf[0] == '\n' || buf[0] == '\0')
- continue;
-
- save_ptr = NULL;
- key = strtok_r(buf, ":", &save_ptr);
-
- if (key == NULL || strlen(key) == 0)
- continue;
-
- if (strncmp(last_key, key, key_size) != 0) {
- keytable[i].offset = offset;
- strncpy(keytable[i].key, key, key_size);
- strncpy(last_key, key, key_size);
- i++;
- }
- offset = ftell(file);
- }
-
- table = malloc(sizeof(*table));
- if (table == NULL) {
- free(keytable);
- fclose(file);
- return NULL;
- }
-
- table->keytable = keytable;
- table->nkeys = nkeys;
- table->key_size = key_size;
- table->file = file;
-
- return table;
-}
-
-void
-hanja_table_delete(HanjaTable *table)
-{
- if (table != NULL) {
- free(table->keytable);
- fclose(table->file);
- free(table);
- }
-}
-
-HanjaList*
-hanja_table_match_exact(const HanjaTable* table, const char *key)
-{
- HanjaList* ret = NULL;
-
- if (key == NULL || key[0] == '\0' || table == NULL)
- return NULL;
-
- hanja_table_match(table, key, &ret);
-
- return ret;
-}
-
-HanjaList*
-hanja_table_match_prefix(const HanjaTable* table, const char *key)
-{
- char* p;
- char* newkey;
- HanjaList* ret = NULL;
-
- if (key == NULL || key[0] == '\0' || table == NULL)
- return NULL;
-
- newkey = strdup(key);
- if (newkey == NULL)
- return NULL;
-
- p = strchr(newkey, '\0');
- while (newkey[0] != '\0') {
- hanja_table_match(table, newkey, &ret);
- p = utf8_prev(newkey, p);
- p[0] = '\0';
- }
- free(newkey);
-
- return ret;
-}
-
-HanjaList*
-hanja_table_match_suffix(const HanjaTable* table, const char *key)
-{
- const char* p;
- HanjaList* ret = NULL;
-
- if (key == NULL || key[0] == '\0' || table == NULL)
- return NULL;
-
- p = key;
- while (p[0] != '\0') {
- hanja_table_match(table, p, &ret);
- p = utf8_next(p);
- }
-
- return ret;
-}
-
-int
-hanja_list_get_size(const HanjaList *list)
-{
- if (list != NULL)
- return list->len;
- return 0;
-}
-
-const char*
-hanja_list_get_key(const HanjaList *list)
-{
- if (list != NULL)
- return list->key;
- return NULL;
-}
-
-const Hanja*
-hanja_list_get_nth(const HanjaList *list, unsigned int n)
-{
- if (list != NULL) {
- if (n < list->len)
- return list->items[n];
- }
- return NULL;
-}
-
-const char*
-hanja_list_get_nth_key(const HanjaList *list, unsigned int n)
-{
- const Hanja* hanja = hanja_list_get_nth(list, n);
- return hanja_get_key(hanja);
-}
-
-const char*
-hanja_list_get_nth_value(const HanjaList *list, unsigned int n)
-{
- const Hanja* hanja = hanja_list_get_nth(list, n);
- return hanja_get_value(hanja);
-}
-
-const char*
-hanja_list_get_nth_comment(const HanjaList *list, unsigned int n)
-{
- const Hanja* hanja = hanja_list_get_nth(list, n);
- return hanja_get_comment(hanja);
-}
-
-void
-hanja_list_delete(HanjaList *list)
-{
- if (list) {
- size_t i;
- for (i = 0; i < list->len; i++) {
- hanja_delete((Hanja*)list->items[i]);
- }
- free(list->items);
- free(list->key);
- free(list);
- }
-}
-
-static int
-compare_pair(const void* a, const void* b)
-{
- const ucschar* c = a;
- const HanjaPair* y = b;
-
- return *c - y->first;
-}
-
-size_t
-hanja_compatibility_form(ucschar* hanja, const ucschar* hangul, size_t n)
-{
- size_t i;
- size_t nconverted;
-
- if (hangul == NULL || hanja == NULL)
- return 0;
-
- nconverted = 0;
- for (i = 0; i < n && hangul[i] != 0 && hanja[i] != 0; i++) {
- HanjaPairArray* p;
-
- p = bsearch(&hanja[i],
- hanja_unified_to_compat_table,
- N_ELEMENTS(hanja_unified_to_compat_table),
- sizeof(hanja_unified_to_compat_table[0]),
- compare_pair);
- if (p != NULL) {
- const HanjaPair* pair = p->pairs;
- while (pair->first != 0) {
- if (pair->first == hangul[i]) {
- hanja[i] = pair->second;
- nconverted++;
- break;
- }
- pair++;
- }
- }
- }
-
- return nconverted;
-}
-
-size_t
-hanja_unified_form(ucschar* str, size_t n)
-{
- size_t i;
- size_t nconverted;
-
- if (str == NULL)
- return 0;
-
- nconverted = 0;
- for (i = 0; i < n && str[i] != 0; i++) {
- if (str[i] >= 0xF900 && str[i] <= 0xFA0B) {
- str[i] = hanja_compat_to_unified_table[str[i] - 0xF900];
- nconverted++;
- }
- }
-
- return nconverted;
-}
-