symbian-qemu-0.9.1-12/python-2.6.1/Python/pystrtod.c
changeset 1 2fb8b9db1c86
equal deleted inserted replaced
0:ffa851df0825 1:2fb8b9db1c86
       
     1 /* -*- Mode: C; c-file-style: "python" -*- */
       
     2 
       
     3 #include <Python.h>
       
     4 #include <locale.h>
       
     5 
       
     6 /* ascii character tests (as opposed to locale tests) */
       
     7 #define ISSPACE(c)  ((c) == ' ' || (c) == '\f' || (c) == '\n' || \
       
     8                      (c) == '\r' || (c) == '\t' || (c) == '\v')
       
     9 #define ISDIGIT(c)  ((c) >= '0' && (c) <= '9')
       
    10 
       
    11 
       
    12 /**
       
    13  * PyOS_ascii_strtod:
       
    14  * @nptr:    the string to convert to a numeric value.
       
    15  * @endptr:  if non-%NULL, it returns the character after
       
    16  *           the last character used in the conversion.
       
    17  * 
       
    18  * Converts a string to a #gdouble value.
       
    19  * This function behaves like the standard strtod() function
       
    20  * does in the C locale. It does this without actually
       
    21  * changing the current locale, since that would not be
       
    22  * thread-safe.
       
    23  *
       
    24  * This function is typically used when reading configuration
       
    25  * files or other non-user input that should be locale independent.
       
    26  * To handle input from the user you should normally use the
       
    27  * locale-sensitive system strtod() function.
       
    28  *
       
    29  * If the correct value would cause overflow, plus or minus %HUGE_VAL
       
    30  * is returned (according to the sign of the value), and %ERANGE is
       
    31  * stored in %errno. If the correct value would cause underflow,
       
    32  * zero is returned and %ERANGE is stored in %errno.
       
    33  * If memory allocation fails, %ENOMEM is stored in %errno.
       
    34  * 
       
    35  * This function resets %errno before calling strtod() so that
       
    36  * you can reliably detect overflow and underflow.
       
    37  *
       
    38  * Return value: the #gdouble value.
       
    39  **/
       
    40 double
       
    41 PyOS_ascii_strtod(const char *nptr, char **endptr)
       
    42 {
       
    43 	char *fail_pos;
       
    44 	double val = -1.0;
       
    45 	struct lconv *locale_data;
       
    46 	const char *decimal_point;
       
    47 	size_t decimal_point_len;
       
    48 	const char *p, *decimal_point_pos;
       
    49 	const char *end = NULL; /* Silence gcc */
       
    50 	const char *digits_pos = NULL;
       
    51 	int negate = 0;
       
    52 
       
    53 	assert(nptr != NULL);
       
    54 
       
    55 	fail_pos = NULL;
       
    56 
       
    57 	locale_data = localeconv();
       
    58 	decimal_point = locale_data->decimal_point;
       
    59 	decimal_point_len = strlen(decimal_point);
       
    60 
       
    61 	assert(decimal_point_len != 0);
       
    62 
       
    63 	decimal_point_pos = NULL;
       
    64 
       
    65 	/* We process any leading whitespace and the optional sign manually,
       
    66 	   then pass the remainder to the system strtod.  This ensures that
       
    67 	   the result of an underflow has the correct sign. (bug #1725)  */
       
    68 
       
    69 	p = nptr;
       
    70 	/* Skip leading space */
       
    71 	while (ISSPACE(*p))
       
    72 		p++;
       
    73 
       
    74 	/* Process leading sign, if present */
       
    75 	if (*p == '-') {
       
    76 		negate = 1;
       
    77 		p++;
       
    78 	} else if (*p == '+') {
       
    79 		p++;
       
    80 	}
       
    81 
       
    82 	/* What's left should begin with a digit, a decimal point, or one of
       
    83 	   the letters i, I, n, N. It should not begin with 0x or 0X */
       
    84 	if ((!ISDIGIT(*p) &&
       
    85 	     *p != '.' && *p != 'i' && *p != 'I' && *p != 'n' && *p != 'N')
       
    86 	    ||
       
    87 	    (*p == '0' && (p[1] == 'x' || p[1] == 'X')))
       
    88 	{
       
    89 		if (endptr)
       
    90 			*endptr = (char*)nptr;
       
    91 		errno = EINVAL;
       
    92 		return val;
       
    93 	}
       
    94 	digits_pos = p;
       
    95 
       
    96 	if (decimal_point[0] != '.' || 
       
    97 	    decimal_point[1] != 0)
       
    98 	{
       
    99 		while (ISDIGIT(*p))
       
   100 			p++;
       
   101 
       
   102 		if (*p == '.')
       
   103 		{
       
   104 			decimal_point_pos = p++;
       
   105 
       
   106 			while (ISDIGIT(*p))
       
   107 				p++;
       
   108 
       
   109 			if (*p == 'e' || *p == 'E')
       
   110 				p++;
       
   111 			if (*p == '+' || *p == '-')
       
   112 				p++;
       
   113 			while (ISDIGIT(*p))
       
   114 				p++;
       
   115 			end = p;
       
   116 		}
       
   117 		else if (strncmp(p, decimal_point, decimal_point_len) == 0)
       
   118 		{
       
   119 			/* Python bug #1417699 */
       
   120 			if (endptr)
       
   121 				*endptr = (char*)nptr;
       
   122 			errno = EINVAL;
       
   123 			return val;
       
   124 		}
       
   125 		/* For the other cases, we need not convert the decimal
       
   126 		   point */
       
   127 	}
       
   128 
       
   129 	/* Set errno to zero, so that we can distinguish zero results
       
   130 	   and underflows */
       
   131 	errno = 0;
       
   132 
       
   133 	if (decimal_point_pos)
       
   134 	{
       
   135 		char *copy, *c;
       
   136 
       
   137 		/* We need to convert the '.' to the locale specific decimal
       
   138 		   point */
       
   139 		copy = (char *)PyMem_MALLOC(end - digits_pos +
       
   140 					    1 + decimal_point_len);
       
   141 		if (copy == NULL) {
       
   142 			if (endptr)
       
   143 				*endptr = (char *)nptr;
       
   144 			errno = ENOMEM;
       
   145 			return val;
       
   146 		}
       
   147 
       
   148 		c = copy;
       
   149 		memcpy(c, digits_pos, decimal_point_pos - digits_pos);
       
   150 		c += decimal_point_pos - digits_pos;
       
   151 		memcpy(c, decimal_point, decimal_point_len);
       
   152 		c += decimal_point_len;
       
   153 		memcpy(c, decimal_point_pos + 1,
       
   154 		       end - (decimal_point_pos + 1));
       
   155 		c += end - (decimal_point_pos + 1);
       
   156 		*c = 0;
       
   157 
       
   158 		val = strtod(copy, &fail_pos);
       
   159 
       
   160 		if (fail_pos)
       
   161 		{
       
   162 			if (fail_pos > decimal_point_pos)
       
   163 				fail_pos = (char *)digits_pos +
       
   164 					(fail_pos - copy) -
       
   165 					(decimal_point_len - 1);
       
   166 			else
       
   167 				fail_pos = (char *)digits_pos +
       
   168 					(fail_pos - copy);
       
   169 		}
       
   170 
       
   171 		PyMem_FREE(copy);
       
   172 
       
   173 	}
       
   174 	else {
       
   175 		val = strtod(digits_pos, &fail_pos);
       
   176 	}
       
   177 
       
   178 	if (fail_pos == digits_pos)
       
   179 		fail_pos = (char *)nptr;
       
   180 
       
   181 	if (negate && fail_pos != nptr)
       
   182 		val = -val;
       
   183 
       
   184 	if (endptr)
       
   185 		*endptr = fail_pos;
       
   186 
       
   187 	return val;
       
   188 }
       
   189 
       
   190 /* Given a string that may have a decimal point in the current
       
   191    locale, change it back to a dot.  Since the string cannot get
       
   192    longer, no need for a maximum buffer size parameter. */
       
   193 Py_LOCAL_INLINE(void)
       
   194 change_decimal_from_locale_to_dot(char* buffer)
       
   195 {
       
   196 	struct lconv *locale_data = localeconv();
       
   197 	const char *decimal_point = locale_data->decimal_point;
       
   198 
       
   199 	if (decimal_point[0] != '.' || decimal_point[1] != 0) {
       
   200 		size_t decimal_point_len = strlen(decimal_point);
       
   201 
       
   202 		if (*buffer == '+' || *buffer == '-')
       
   203 			buffer++;
       
   204 		while (isdigit(Py_CHARMASK(*buffer)))
       
   205 			buffer++;
       
   206 		if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
       
   207 			*buffer = '.';
       
   208 			buffer++;
       
   209 			if (decimal_point_len > 1) {
       
   210 				/* buffer needs to get smaller */
       
   211 				size_t rest_len = strlen(buffer +
       
   212 						     (decimal_point_len - 1));
       
   213 				memmove(buffer,
       
   214 					buffer + (decimal_point_len - 1),
       
   215 					rest_len);
       
   216 				buffer[rest_len] = 0;
       
   217 			}
       
   218 		}
       
   219 	}
       
   220 }
       
   221 
       
   222 
       
   223 /* From the C99 standard, section 7.19.6:
       
   224 The exponent always contains at least two digits, and only as many more digits
       
   225 as necessary to represent the exponent.
       
   226 */
       
   227 #define MIN_EXPONENT_DIGITS 2
       
   228 
       
   229 /* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
       
   230    in length. */
       
   231 Py_LOCAL_INLINE(void)
       
   232 ensure_minumim_exponent_length(char* buffer, size_t buf_size)
       
   233 {
       
   234 	char *p = strpbrk(buffer, "eE");
       
   235 	if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
       
   236 		char *start = p + 2;
       
   237 		int exponent_digit_cnt = 0;
       
   238 		int leading_zero_cnt = 0;
       
   239 		int in_leading_zeros = 1;
       
   240 		int significant_digit_cnt;
       
   241 
       
   242 		/* Skip over the exponent and the sign. */
       
   243 		p += 2;
       
   244 
       
   245 		/* Find the end of the exponent, keeping track of leading
       
   246 		   zeros. */
       
   247 		while (*p && isdigit(Py_CHARMASK(*p))) {
       
   248 			if (in_leading_zeros && *p == '0')
       
   249 				++leading_zero_cnt;
       
   250 			if (*p != '0')
       
   251 				in_leading_zeros = 0;
       
   252 			++p;
       
   253 			++exponent_digit_cnt;
       
   254 		}
       
   255 
       
   256 		significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
       
   257 		if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
       
   258 			/* If there are 2 exactly digits, we're done,
       
   259 			   regardless of what they contain */
       
   260 		}
       
   261 		else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
       
   262 			int extra_zeros_cnt;
       
   263 
       
   264 			/* There are more than 2 digits in the exponent.  See
       
   265 			   if we can delete some of the leading zeros */
       
   266 			if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
       
   267 				significant_digit_cnt = MIN_EXPONENT_DIGITS;
       
   268 			extra_zeros_cnt = exponent_digit_cnt -
       
   269 				significant_digit_cnt;
       
   270 
       
   271 			/* Delete extra_zeros_cnt worth of characters from the
       
   272 			   front of the exponent */
       
   273 			assert(extra_zeros_cnt >= 0);
       
   274 
       
   275 			/* Add one to significant_digit_cnt to copy the
       
   276 			   trailing 0 byte, thus setting the length */
       
   277 			memmove(start,
       
   278 				start + extra_zeros_cnt,
       
   279 				significant_digit_cnt + 1);
       
   280 		}
       
   281 		else {
       
   282 			/* If there are fewer than 2 digits, add zeros
       
   283 			   until there are 2, if there's enough room */
       
   284 			int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
       
   285 			if (start + zeros + exponent_digit_cnt + 1
       
   286 			      < buffer + buf_size) {
       
   287 				memmove(start + zeros, start,
       
   288 					exponent_digit_cnt + 1);
       
   289 				memset(start, '0', zeros);
       
   290 			}
       
   291 		}
       
   292 	}
       
   293 }
       
   294 
       
   295 /* Ensure that buffer has a decimal point in it.  The decimal point
       
   296    will not be in the current locale, it will always be '.' */
       
   297 Py_LOCAL_INLINE(void)
       
   298 ensure_decimal_point(char* buffer, size_t buf_size)
       
   299 {
       
   300 	int insert_count = 0;
       
   301 	char* chars_to_insert;
       
   302 
       
   303 	/* search for the first non-digit character */
       
   304 	char *p = buffer;
       
   305 	if (*p == '-' || *p == '+')
       
   306 		/* Skip leading sign, if present.  I think this could only
       
   307 		   ever be '-', but it can't hurt to check for both. */
       
   308 		++p;
       
   309 	while (*p && isdigit(Py_CHARMASK(*p)))
       
   310 		++p;
       
   311 
       
   312 	if (*p == '.') {
       
   313 		if (isdigit(Py_CHARMASK(*(p+1)))) {
       
   314 			/* Nothing to do, we already have a decimal
       
   315 			   point and a digit after it */
       
   316 		}
       
   317 		else {
       
   318 			/* We have a decimal point, but no following
       
   319 			   digit.  Insert a zero after the decimal. */
       
   320 			++p;
       
   321 			chars_to_insert = "0";
       
   322 			insert_count = 1;
       
   323 		}
       
   324 	}
       
   325 	else {
       
   326 		chars_to_insert = ".0";
       
   327 		insert_count = 2;
       
   328 	}
       
   329 	if (insert_count) {
       
   330 		size_t buf_len = strlen(buffer);
       
   331 		if (buf_len + insert_count + 1 >= buf_size) {
       
   332 			/* If there is not enough room in the buffer
       
   333 			   for the additional text, just skip it.  It's
       
   334 			   not worth generating an error over. */
       
   335 		}
       
   336 		else {
       
   337 			memmove(p + insert_count, p,
       
   338 				buffer + strlen(buffer) - p + 1);
       
   339 			memcpy(p, chars_to_insert, insert_count);
       
   340 		}
       
   341 	}
       
   342 }
       
   343 
       
   344 /* Add the locale specific grouping characters to buffer.  Note
       
   345    that any decimal point (if it's present) in buffer is already
       
   346    locale-specific.  Return 0 on error, else 1. */
       
   347 Py_LOCAL_INLINE(int)
       
   348 add_thousands_grouping(char* buffer, size_t buf_size)
       
   349 {
       
   350 	Py_ssize_t len = strlen(buffer);
       
   351 	struct lconv *locale_data = localeconv();
       
   352 	const char *decimal_point = locale_data->decimal_point;
       
   353 
       
   354 	/* Find the decimal point, if any.  We're only concerned
       
   355 	   about the characters to the left of the decimal when
       
   356 	   adding grouping. */
       
   357 	char *p = strstr(buffer, decimal_point);
       
   358 	if (!p) {
       
   359 		/* No decimal, use the entire string. */
       
   360 
       
   361 		/* If any exponent, adjust p. */
       
   362 		p = strpbrk(buffer, "eE");
       
   363 		if (!p)
       
   364 			/* No exponent and no decimal.  Use the entire
       
   365 			   string. */
       
   366 			p = buffer + len;
       
   367 	}
       
   368 	/* At this point, p points just past the right-most character we
       
   369 	   want to format.  We need to add the grouping string for the
       
   370 	   characters between buffer and p. */
       
   371 	return _PyString_InsertThousandsGrouping(buffer, len, p-buffer,
       
   372 						 buf_size, NULL, 1);
       
   373 }
       
   374 
       
   375 /* see FORMATBUFLEN in unicodeobject.c */
       
   376 #define FLOAT_FORMATBUFLEN 120
       
   377 
       
   378 /**
       
   379  * PyOS_ascii_formatd:
       
   380  * @buffer: A buffer to place the resulting string in
       
   381  * @buf_size: The length of the buffer.
       
   382  * @format: The printf()-style format to use for the
       
   383  *          code to use for converting. 
       
   384  * @d: The #gdouble to convert
       
   385  *
       
   386  * Converts a #gdouble to a string, using the '.' as
       
   387  * decimal point. To format the number you pass in
       
   388  * a printf()-style format string. Allowed conversion
       
   389  * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'n'.
       
   390  * 
       
   391  * 'n' is the same as 'g', except it uses the current locale.
       
   392  * 'Z' is the same as 'g', except it always has a decimal and
       
   393  *     at least one digit after the decimal.
       
   394  *
       
   395  * Return value: The pointer to the buffer with the converted string.
       
   396  **/
       
   397 char *
       
   398 PyOS_ascii_formatd(char       *buffer, 
       
   399 		   size_t      buf_size, 
       
   400 		   const char *format, 
       
   401 		   double      d)
       
   402 {
       
   403 	char format_char;
       
   404 	size_t format_len = strlen(format);
       
   405 
       
   406 	/* For type 'n', we need to make a copy of the format string, because
       
   407 	   we're going to modify 'n' -> 'g', and format is const char*, so we
       
   408 	   can't modify it directly.  FLOAT_FORMATBUFLEN should be longer than
       
   409 	   we ever need this to be.  There's an upcoming check to ensure it's
       
   410 	   big enough. */
       
   411 	/* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
       
   412 	   also with at least one character past the decimal. */
       
   413 	char tmp_format[FLOAT_FORMATBUFLEN];
       
   414 
       
   415 	/* The last character in the format string must be the format char */
       
   416 	format_char = format[format_len - 1];
       
   417 
       
   418 	if (format[0] != '%')
       
   419 		return NULL;
       
   420 
       
   421 	/* I'm not sure why this test is here.  It's ensuring that the format
       
   422 	   string after the first character doesn't have a single quote, a
       
   423 	   lowercase l, or a percent. This is the reverse of the commented-out
       
   424 	   test about 10 lines ago. */
       
   425 	if (strpbrk(format + 1, "'l%"))
       
   426 		return NULL;
       
   427 
       
   428 	/* Also curious about this function is that it accepts format strings
       
   429 	   like "%xg", which are invalid for floats.  In general, the
       
   430 	   interface to this function is not very good, but changing it is
       
   431 	   difficult because it's a public API. */
       
   432 
       
   433 	if (!(format_char == 'e' || format_char == 'E' || 
       
   434 	      format_char == 'f' || format_char == 'F' || 
       
   435 	      format_char == 'g' || format_char == 'G' ||
       
   436 	      format_char == 'n' || format_char == 'Z'))
       
   437 		return NULL;
       
   438 
       
   439 	/* Map 'n' or 'Z' format_char to 'g', by copying the format string and
       
   440 	   replacing the final char with a 'g' */
       
   441 	if (format_char == 'n' || format_char == 'Z') {
       
   442 		if (format_len + 1 >= sizeof(tmp_format)) {
       
   443 			/* The format won't fit in our copy.  Error out.  In
       
   444 			   practice, this will never happen and will be
       
   445 			   detected by returning NULL */
       
   446 			return NULL;
       
   447 		}
       
   448 		strcpy(tmp_format, format);
       
   449 		tmp_format[format_len - 1] = 'g';
       
   450 		format = tmp_format;
       
   451 	}
       
   452 
       
   453 
       
   454 	/* Have PyOS_snprintf do the hard work */
       
   455 	PyOS_snprintf(buffer, buf_size, format, d);
       
   456 
       
   457 	/* Do various fixups on the return string */
       
   458 
       
   459 	/* Get the current locale, and find the decimal point string.
       
   460 	   Convert that string back to a dot.  Do not do this if using the
       
   461 	   'n' (number) format code, since we want to keep the localized
       
   462 	   decimal point in that case. */
       
   463 	if (format_char != 'n')
       
   464 		change_decimal_from_locale_to_dot(buffer);
       
   465 
       
   466 	/* If an exponent exists, ensure that the exponent is at least
       
   467 	   MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
       
   468 	   for the extra zeros.  Also, if there are more than
       
   469 	   MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
       
   470 	   back to MIN_EXPONENT_DIGITS */
       
   471 	ensure_minumim_exponent_length(buffer, buf_size);
       
   472 
       
   473 	/* If format_char is 'Z', make sure we have at least one character
       
   474 	   after the decimal point (and make sure we have a decimal point). */
       
   475 	if (format_char == 'Z')
       
   476 		ensure_decimal_point(buffer, buf_size);
       
   477 
       
   478 	/* If format_char is 'n', add the thousands grouping. */
       
   479 	if (format_char == 'n')
       
   480 		if (!add_thousands_grouping(buffer, buf_size))
       
   481 			return NULL;
       
   482 
       
   483 	return buffer;
       
   484 }
       
   485 
       
   486 double
       
   487 PyOS_ascii_atof(const char *nptr)
       
   488 {
       
   489 	return PyOS_ascii_strtod(nptr, NULL);
       
   490 }