diff -r e4d67989cc36 -r 47c74d1534e1 genericopenlibs/liboil/src/utf8_s.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/genericopenlibs/liboil/src/utf8_s.c Fri Apr 16 16:46:38 2010 +0300 @@ -0,0 +1,151 @@ +/* + * LIBOIL - Library of Optimized Inner Loops + * Copyright (c) 2004 David A. Schleef + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + * IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include +#include +#include +#include "liboil/utf8/utf8.h" + + +/* + * Little explanation: + * 0x00-0x7f ASCII, one byte character + * 0x80-0xbf continuation byte, not a valid start byte + * 0xc0-0xdf 2-byte character + * 0xe0-0xef 3-byte character + * 0xf0-0xf7 4-byte character + * 0xf8-0xff reserved (illegal at the present time) + */ +static void +utf8_validate_test (OilTest *test) +{ + int i; + int n = test->n; + uint8_t *ptr = oil_test_get_source_data (test, OIL_ARG_SRC1); + int x; + int extra_chars = 0; + + for (i=0;i= n-16) { + /* if it's close to the end, we'll randomly drop in a bad + * byte from either the 0x80-0xbf or 0xf8-0xff segments */ + x = oil_rand_u8(); + if (x < 16) { + x = oil_rand_u8(); + if (extra_chars>0) { + /* this might not actually be a bad char */ + ptr[i] = x; + extra_chars--; + } else { + if (x & 0x80) { + ptr[i] = 0x80 | (x&0x3f); + } else { + ptr[i] = 0xf8 | (x&0x07); + } + } + continue; + } + } + if (extra_chars > 0) { + ptr[i] = 0x80 | (oil_rand_u8() & 0x3f); + extra_chars--; + } else { + /* otherwise, we'll do a low probability of a multibyte char */ + x = oil_rand_u8() & 0xf; + if (x == 0) { + ptr[i] = 0xc0 | (oil_rand_u8() & 0x1f); + extra_chars = 1; + } else if (x == 1) { + ptr[i] = 0xe0 | (oil_rand_u8() & 0x0f); + extra_chars = 2; + } else if (x == 2) { + ptr[i] = 0xf0 | (oil_rand_u8() & 0x07); + extra_chars = 3; + } else { + ptr[i] = oil_rand_u8() & 0x7f; + } + } + } + +} + +/** + * oil_utf8_validate: + * @d_1: + * @s: + * @n: + * + * Checks @s for valid UTF-8 characters. If the entire @s array + * represents valid UTF-8 characters, @n is written to @d_1. + * Otherwise, the index in the array of the beginning of the first + * invalid UTF-8 character is written to @d_1. + */ +OIL_DEFINE_CLASS_FULL (utf8_validate, "int32_t *d_1, uint8_t *s, int n", + utf8_validate_test); + + +static void +utf8_validate_ref (int32_t *d_1, uint8_t *s, int n) +{ + int i; + int extra_bytes; + int mask; + + for(i=0;i= n) goto error; + while(extra_bytes--) { + i++; + if ((s[i] & 0xc0) != 0x80) goto error; + } + } + +error: + d_1[0] = i; +} + +OIL_DEFINE_IMPL_REF (utf8_validate_ref, utf8_validate); + + +