textinput/ptihangulcore/src/hanja.c
branchRCL_3
changeset 3 f5a1e66df979
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/textinput/ptihangulcore/src/hanja.c	Fri Feb 19 23:09:27 2010 +0200
@@ -0,0 +1,603 @@
+/* libhangul
+ * Copyright (c) 2005,2006 Choe Hwanjin
+ * All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ */
+
+#ifdef HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#ifdef HAVE_MMAP
+#include <sys/mman.h>
+#endif
+
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "hangul.h"
+#include "hangulinternals.h"
+
+#ifndef TRUE
+#define TRUE  1
+#endif
+
+#ifndef FALSE
+#define FALSE 0
+#endif
+
+typedef struct _HanjaIndex     HanjaIndex;
+
+typedef struct _HanjaPair      HanjaPair;
+typedef struct _HanjaPairArray HanjaPairArray;
+
+struct _Hanja {
+    uint32_t key_offset;
+    uint32_t value_offset;
+    uint32_t comment_offset;
+};
+
+struct _HanjaList {
+    char*         key;
+    size_t        len;
+    size_t        alloc;
+    const Hanja** items; 
+};
+
+struct _HanjaIndex {
+    unsigned offset;
+    char     key[8];
+};
+
+struct _HanjaTable {
+    HanjaIndex*    keytable;
+    unsigned       nkeys;
+    unsigned       key_size;
+    FILE*          file;
+};
+
+struct _HanjaPair {
+    ucschar first;
+    ucschar second;
+};
+
+struct _HanjaPairArray {
+    ucschar          key;
+    const HanjaPair* pairs;
+};
+
+#include "hanjacompatible.h"
+
+static const char utf8_skip_table[256] = {
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+    2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+    3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,5,5,5,5,6,6,1,1
+};
+
+static 
+#ifndef __SYMBIAN32__
+inline 
+#endif
+int utf8_char_len(const char *p)
+{
+    return utf8_skip_table[*(const unsigned char*)p];
+}
+
+static 
+#ifndef __SYMBIAN32__
+inline 
+#endif
+const char* utf8_next(const char *str)
+{
+    int n = utf8_char_len(str);
+
+    while (n > 0) {
+	str++;
+	if (*str == '\0')
+	    return str;
+	n--;
+    }
+
+    return str;
+}
+
+static 
+#ifndef __SYMBIAN32__
+inline 
+#endif
+char* utf8_prev(const char *str, const char *p)
+{
+    for (--p; p >= str; --p) {
+	if ((*p & 0xc0) != 0x80)
+	    break;
+    }
+    return (char*)p;
+}
+
+/* hanja searching functions */
+static Hanja *
+hanja_new(const char *key, const char *value, const char *comment)
+{
+    Hanja* hanja;
+    size_t size;
+    size_t keylen;
+    size_t valuelen;
+    size_t commentlen;
+    char*  p;
+
+    keylen = strlen(key) + 1;
+    valuelen = strlen(value) + 1;
+    if (comment != NULL)
+	commentlen = strlen(comment) + 1;
+    else
+	commentlen = 1;
+
+    size = sizeof(*hanja) + keylen + valuelen + commentlen;
+    hanja = malloc(size);
+    if (hanja == NULL)
+	return NULL;
+
+    p = (char*)hanja + sizeof(*hanja);
+    strcpy(p, key);
+    p += keylen;
+    strcpy(p, value);
+    p += valuelen;
+    if (comment != NULL)
+	strcpy(p, comment);
+    else
+	*p = '\0';
+    p += valuelen;
+
+    hanja->key_offset     = sizeof(*hanja);
+    hanja->value_offset   = sizeof(*hanja) + keylen;
+    hanja->comment_offset = sizeof(*hanja) + keylen + valuelen;
+
+    return hanja;
+}
+
+static void
+hanja_delete(Hanja* hanja)
+{
+    free(hanja);
+}
+
+const char*
+hanja_get_key(const Hanja* hanja)
+{
+    if (hanja != NULL) {
+	const char* p  = (const char*)hanja;
+	return p + hanja->key_offset;
+    }
+    return NULL;
+}
+
+const char*
+hanja_get_value(const Hanja* hanja)
+{
+    if (hanja != NULL) {
+	const char* p  = (const char*)hanja;
+	return p + hanja->value_offset;
+    }
+    return NULL;
+}
+
+const char*
+hanja_get_comment(const Hanja* hanja)
+{
+    if (hanja != NULL) {
+	const char* p  = (const char*)hanja;
+	return p + hanja->comment_offset;
+    }
+    return NULL;
+}
+
+static HanjaList *
+hanja_list_new(const char *key)
+{
+    HanjaList *list;
+
+    list = malloc(sizeof(*list));
+    if (list != NULL) {
+	list->key = strdup(key);
+	list->len = 0;
+	list->alloc = 1;
+	list->items = malloc(list->alloc * sizeof(list->items[0]));
+	if (list->items == NULL) {
+	    free(list);
+	    list = NULL;
+	}
+    }
+
+    return list;
+}
+
+static void
+hanja_list_reserve(HanjaList* list, size_t n)
+{
+    size_t size = list->alloc;
+
+    if (n > SIZE_MAX / sizeof(list->items[0]) - list->len)
+	return;
+
+    while (size < list->len + n)
+	size *= 2;
+
+    if (size > SIZE_MAX / sizeof(list->items[0]))
+	return;
+
+    if (list->alloc < list->len + n) {
+	const Hanja** data;
+
+	data = realloc(list->items, size * sizeof(list->items[0]));
+	if (data != NULL) {
+	    list->alloc = size;
+	    list->items = data;
+	}
+    }
+}
+
+static void
+hanja_list_append_n(HanjaList* list, const Hanja* hanja, int n)
+{
+    hanja_list_reserve(list, n);
+
+    if (list->alloc >= list->len + n) {
+	unsigned int i;
+	for (i = 0; i < n ; i++)
+	    list->items[list->len + i] = hanja + i;
+	list->len += n;
+    }
+}
+
+static void
+hanja_table_match(const HanjaTable* table,
+		  const char* key, HanjaList** list)
+{
+    int low, high, mid = 0;
+    int res = -1;
+
+    low = 0;
+    high = table->nkeys - 1;
+
+    while (low < high) {
+	mid = (low + high) / 2;
+	res = strncmp(table->keytable[mid].key, key, table->key_size);
+	if (res < 0) {
+	    low = mid + 1;
+	} else if (res > 0) {
+	    high = mid - 1;
+	} else {
+	    break;
+	}
+    }
+
+    if (res != 0) {
+	mid = low;
+	res = strncmp(table->keytable[mid].key, key, table->key_size);
+    }
+
+    if (res == 0) {
+	unsigned offset;
+	char buf[512];
+
+	offset = table->keytable[mid].offset;
+	fseek(table->file, offset, SEEK_SET);
+
+	while (fgets(buf, sizeof(buf), table->file) != NULL) {
+	    char* save = NULL;
+	    char* p = strtok_r(buf, ":", &save);
+	    res = strcmp(p, key);
+	    if (res == 0) {
+		char* value   = strtok_r(NULL, ":", &save);
+		char* comment = strtok_r(NULL, "\r\n", &save);
+
+		Hanja* hanja = hanja_new(p, value, comment);
+
+		if (*list == NULL) {
+		    *list = hanja_list_new(key);
+		}
+
+		hanja_list_append_n(*list, hanja, 1);
+	    } else if (res > 0) {
+		break;
+	    }
+	}
+    }
+}
+
+HanjaTable*
+hanja_table_load(const char* filename)
+{
+    unsigned nkeys;
+    char buf[512];
+    int key_size = 5;
+    char last_key[8] = { '\0', };
+    char* save_ptr = NULL;
+    char* key;
+    long offset;
+    unsigned i;
+    FILE* file;
+    HanjaIndex* keytable;
+    HanjaTable* table;
+
+    if (filename == NULL)
+	filename = LIBHANGUL_DEFAULT_HANJA_DIC;
+
+    file = fopen(filename, "r");
+    if (file == NULL) {
+	return NULL;
+    }
+
+    nkeys = 0;
+    while (fgets(buf, sizeof(buf), file) != NULL) {
+	/* skip comments and empty lines */
+	if (buf[0] == '#' || buf[0] == '\r' || buf[0] == '\n' || buf[0] == '\0')
+	    continue;
+
+	save_ptr = NULL;
+	key = strtok_r(buf, ":", &save_ptr);
+
+	if (key == NULL || strlen(key) == 0)
+	    continue;
+
+	if (strncmp(last_key, key, key_size) != 0) {
+	    nkeys++;
+	    strncpy(last_key, key, key_size);
+	}
+    }
+
+    rewind(file);
+    keytable = malloc(nkeys * sizeof(keytable[0]));
+    memset(keytable, 0, nkeys * sizeof(keytable[0]));
+
+    i = 0;
+    offset = ftell(file);
+    while (fgets(buf, sizeof(buf), file) != NULL) {
+	/* skip comments and empty lines */
+	if (buf[0] == '#' || buf[0] == '\r' || buf[0] == '\n' || buf[0] == '\0')
+	    continue;
+
+	save_ptr = NULL;
+	key = strtok_r(buf, ":", &save_ptr);
+
+	if (key == NULL || strlen(key) == 0)
+	    continue;
+
+	if (strncmp(last_key, key, key_size) != 0) {
+	    keytable[i].offset = offset;
+	    strncpy(keytable[i].key, key, key_size);
+	    strncpy(last_key, key, key_size);
+	    i++;
+	}
+	offset = ftell(file);
+    }
+
+    table = malloc(sizeof(*table));
+    if (table == NULL) {
+	free(keytable);
+	fclose(file);
+	return NULL;
+    }
+
+    table->keytable = keytable;
+    table->nkeys = nkeys;
+    table->key_size = key_size;
+    table->file = file;
+
+    return table;
+}
+
+void
+hanja_table_delete(HanjaTable *table)
+{
+    if (table != NULL) {
+	free(table->keytable);
+	fclose(table->file);
+	free(table);
+    }
+}
+
+HanjaList*
+hanja_table_match_exact(const HanjaTable* table, const char *key)
+{
+    HanjaList* ret = NULL;
+
+    if (key == NULL || key[0] == '\0' || table == NULL)
+	return NULL;
+
+    hanja_table_match(table, key, &ret);
+
+    return ret;
+}
+
+HanjaList*
+hanja_table_match_prefix(const HanjaTable* table, const char *key)
+{
+    char* p;
+    char* newkey;
+    HanjaList* ret = NULL;
+
+    if (key == NULL || key[0] == '\0' || table == NULL)
+	return NULL;
+
+    newkey = strdup(key);
+    if (newkey == NULL)
+	return NULL;
+
+    p = strchr(newkey, '\0');
+    while (newkey[0] != '\0') {
+	hanja_table_match(table, newkey, &ret);
+	p = utf8_prev(newkey, p);
+	p[0] = '\0';
+    }
+    free(newkey);
+
+    return ret;
+}
+
+HanjaList*
+hanja_table_match_suffix(const HanjaTable* table, const char *key)
+{
+    const char* p;
+    HanjaList* ret = NULL;
+
+    if (key == NULL || key[0] == '\0' || table == NULL)
+	return NULL;
+
+    p = key;
+    while (p[0] != '\0') {
+	hanja_table_match(table, p, &ret);
+	p = utf8_next(p);
+    }
+
+    return ret;
+}
+
+int
+hanja_list_get_size(const HanjaList *list)
+{
+    if (list != NULL)
+	return list->len;
+    return 0;
+}
+
+const char*
+hanja_list_get_key(const HanjaList *list)
+{
+    if (list != NULL)
+	return list->key;
+    return NULL;
+}
+
+const Hanja*
+hanja_list_get_nth(const HanjaList *list, unsigned int n)
+{
+    if (list != NULL) {
+	if (n < list->len)
+	    return list->items[n];
+    }
+    return NULL;
+}
+
+const char*
+hanja_list_get_nth_key(const HanjaList *list, unsigned int n)
+{
+    const Hanja* hanja = hanja_list_get_nth(list, n);
+    return hanja_get_key(hanja);
+}
+
+const char*
+hanja_list_get_nth_value(const HanjaList *list, unsigned int n)
+{
+    const Hanja* hanja = hanja_list_get_nth(list, n);
+    return hanja_get_value(hanja);
+}
+
+const char*
+hanja_list_get_nth_comment(const HanjaList *list, unsigned int n)
+{
+    const Hanja* hanja = hanja_list_get_nth(list, n);
+    return hanja_get_comment(hanja);
+}
+
+void
+hanja_list_delete(HanjaList *list)
+{
+    if (list) {
+	size_t i;
+	for (i = 0; i < list->len; i++) {
+	    hanja_delete((Hanja*)list->items[i]);
+	}
+	free(list->items);
+	free(list->key);
+	free(list);
+    }
+}
+
+static int
+compare_pair(const void* a, const void* b)
+{
+    const ucschar*   c = a;
+    const HanjaPair* y = b;
+
+    return *c - y->first;
+}
+
+size_t
+hanja_compatibility_form(ucschar* hanja, const ucschar* hangul, size_t n)
+{
+    size_t i;
+    size_t nconverted;
+
+    if (hangul == NULL || hanja == NULL)
+	return 0;
+
+    nconverted = 0;
+    for (i = 0; i < n && hangul[i] != 0 && hanja[i] != 0; i++) {
+	HanjaPairArray* p;
+
+	p = bsearch(&hanja[i],
+		    hanja_unified_to_compat_table,
+		    N_ELEMENTS(hanja_unified_to_compat_table),
+		    sizeof(hanja_unified_to_compat_table[0]),
+		    compare_pair);
+	if (p != NULL) {
+	    const HanjaPair* pair = p->pairs;
+	    while (pair->first != 0) {
+		if (pair->first == hangul[i]) {
+		    hanja[i] = pair->second;
+		    nconverted++;
+		    break;
+		}
+		pair++;
+	    }
+	}
+    }
+
+    return nconverted;
+}
+
+size_t
+hanja_unified_form(ucschar* str, size_t n)
+{
+    size_t i;
+    size_t nconverted;
+
+    if (str == NULL)
+	return 0;
+
+    nconverted = 0;
+    for (i = 0; i < n && str[i] != 0; i++) {
+	if (str[i] >= 0xF900 && str[i] <= 0xFA0B) {
+	    str[i] = hanja_compat_to_unified_table[str[i] - 0xF900];
+	    nconverted++;
+	}
+    }
+
+    return nconverted;
+}
+