/* Metrowerks Standard Library
 * Copyright  1995-2004 Metrowerks Corporation.  All rights reserved.
 *
 * $Date: 2004/01/26 21:32:33 $
 * $Revision: 1.11 $
 */

/*
 *	
 *  First written by:  Matthew Fassiotto on June 1, 1998
 *  
 *	Routines
 *	--------
 *		
 *		__num2dec
 *      __dec2num
 *
 * Purpose:  library support routines for converting IEEE 32 and 64 bit floating point
 *           numbers to and from ansi C strings.  these routines are called by atof,strtod, 
 *           printf,scanf et al.
 *
 * Theory:   These routines are INTEL specific and can be used on any intel architecture with a
 *           fpu.  They are highly optimized in that any number will scale in two ops where at most
 *           one op is a divide. They also use the full precision of the fpu(64 significant digits)
 *           to achieve perfectly rounded double values.  This is centered around the fbstp and fbld
 *           instructions.  To keep the codesize small, the tables in the header x80...fordoubles.h
 *           are only large enough for the range of a double.  The tables can be extended for 80 bit
 *           support.           
 */

#include <ansi_fp.h>
#include <math.h>
#include <fenv.h>
#include <x80_powers_of_ten_fordoubles.h>

static const long*  small_pow=(long*)&powers_of_ten[0];
static const long*  big_pow=(long*)&big_powers_of_ten[0];

//	the variable "direction" is passed on the stack
static __declspec(naked)  void __cdecl __scalenum(int)
{
    asm
    {
	    cmp 	eax,0
	    jge 	positive_exp
	    xor 	dword ptr [esp+4],0x00000001
	    neg 	eax				; eax guaranteed > 0 
positive_exp: 
	    fld1					; dummy stack element for the case where
								; we skip load_big_pow
	    mov 	ecx,eax			; ecx guaranteed > 0  
	    mov 	edx,eax			; edx will be the sum of eax and ecx in load_big_pow                 
	    cmp 	ecx,32			; 32 is the number of elements in the
								; table containing the small powers
								; of ten. this number will vary according to
								; the optimal table size(varies according to precision
								; of number). for 80 bit numberes the small table has 64 elements
	    jl  	load_small_pow                       
	    and 	eax,0xffffffe0	; this value shifted right 5(divided by 32) = index -1 for big_pow 
	    and 	ecx,0x0000001f	;  0 <= ecx < 32  and note 10^(eax+ecx)= scale factor
load_big_pow:
	    fstp 	st				; we do not need 1 in st anymore
								; checking for really big exponents where we bail here and
								; return inf
	    cmp 	eax,400
	    jl 		not_humungous
	    fld 	tbyte ptr[big_powers_of_ten + 110] 
	    fstp 	st(1)
	    jmp 	finished
not_humungous:                                
	    sub  	edx,eax			; edx=remainder of exp/32(note edx >= eax)
	    shr  	eax,5    
	    dec  	eax				; we overestimate big pow and do the opposite
								; operation with little pow(i.e. div vs. mul)
	    imul 	eax,10
	    mov  	edx, big_pow
	    add 	edx,eax
	    fld 	tbyte ptr[edx]              
load_small_pow:
	    mov 	edx,small_pow
	    imul 	ecx,10
	    add 	edx,ecx
	    fld 	tbyte ptr[edx]	; order is important and must be the same at all times!!
	                           	; st=little, st(1)=bigpower of 10, st(2)=number itself                          
	    fmul                   	; multiply big and small power of 10 to get scale factor
	    cmp 	dword ptr [esp+4],0
	    jne 	divide_res
	    fmul
	    jmp 	finished   
divide_res:
	    fdivp  	st(1),st
finished:    
		ret		4				; remove direction
  }	
/* common code ends here */
}

_MSL_IMP_EXP_C void _MSL_CDECL __num2dec(const  decform * /* f */, double x, decimal *d)
{    
      short NEAREST_FULLPREC=0X037F;
      float seventeen=17.0F;
      short TEMP;
      double TEN_TO_SEVENTEEN=1e17;	
      double TEN=10.0;	
      const register unsigned char* tmp=&(d->sig.text[0]);
      const int* exp_tmp=(int*)(&(d->exp));
      int direction;
      
      d->sgn= (char)(signbit(x)>>31); 
	
	switch(fpclassify(x))
    {
        case FP_NAN:
         d->sig.text[0] = 'N';
         d->sig.length=(char)1;
         return;  
         break;
        
        case FP_INFINITE:
         d->sig.text[0] = 'I';
         d->sig.length=(char)1;
         return; 
         break;
        
        case FP_ZERO:
         d->sig.text[0] = '0';
         d->sig.length=(char)1;
         return; 
         break;
        
        case FP_NORMAL:	
        case FP_SUBNORMAL:
        direction=1;
 
        asm
	    {
          fstcw 	TEMP			; backup control word and reload after calculation
	      fldcw 	NEAREST_FULLPREC
	      fld 		x
	      fabs					; the sign of x is in d->sgn already,so sign of number is irrelevant now
	      fld 		st				; need x later to multiply by 10^exp10
	      fxtract				; st=base 2 exponent st(1)=sig
	      fxch
	      fstp 		st(1)			; significand not needed
	      fldlg2				; log10(2)
          fmul					; st=base 10 exponent=exp10
	      fsub 		seventeen		; exp of 10 to  scale number to be btwn. [10e^17,10e18)
	      mov 		eax,d           ; 
	      fistp		decimal.exp[eax]; rounded to nearest exponent produces value in 
	      movsx		eax, decimal.exp[eax]
	      push		direction
          call 		__scalenum
          fld		TEN_TO_SEVENTEEN
          fcomp		st(1)  
          fstsw		ax
          sahf
          jbe 		start_for_real
          fmul 		TEN
          mov		eax,d                
	      dec		decimal.exp[eax]
start_for_real:          
          sub		esp,14			; 4 bytes for address of length
          mov		eax, tmp		; sig.text[0]                              
          fbstp		tbyte ptr[esp+4]; will round number to get all integer digits exact   	
          mov		ecx,9	  
store_bytes:
          dec   	ecx
          mov   	dl,byte ptr[esp+4+ecx]	; start at most significant byte
          mov   	dh,dl                
          shr   	dl,4
          and   	dx,0x0f0f
          or    	dx,0x3030				; the first 4 bits of an ASCII digit
          mov   	word ptr[eax],dx
          add   	eax,2					; eax is a byte pointer(tmp)    
          jcxz 		done
          jmp 		store_bytes
done:
          add 		esp,14
          fldcw 	TEMP
		}/* end of .asm routine */
	} /* end of switch */
    
    d->sig.length=18;
}

_MSL_IMP_EXP_C double _MSL_CDECL __dec2num(const decimal *d)
{
  unsigned char tmp,p[10]; /* 80 bitpacked decimal array */
                           /* bit 80=sign 79-73=reserved ; bits 1-72 are digits */
  double ret_val=0.0;
  int i ;
  short temp, oldcw;
   unsigned char *p2=p;
   int direction;
   
  for(i=0;i < 9; i++)
  {
  /* shift the bits of the odd elements left 4 then add to preceding even element */
   tmp=d->sig.text[2*i]<<4;
   p[9-(i+1)]=(d->sig.text[2*i+1]&0x0F) | tmp ;
  }
  p[9]=d->sgn;
  i=d->exp - (18 - d->sig.length) ;
 direction =0;
  		
	asm							/*- pmk 010402 -*/
	{
		fstcw 	oldcw				; save FPU_register 				
		mov 	ax, oldcw			; tfr values to usable space
		or 		ax, 0x0300			; gives full precision
		mov 	temp, ax
		fldcw 	temp 
		mov  	edx,p2
		fbld 	tbyte ptr[edx]
		mov 	eax,i				; load base 10 exponent
		push	direction
		call 	__scalenum
		fstp 	ret_val
		fldcw 	oldcw				; return_original precision
	}

	return ret_val;
}  

/* Change record: 
 * pmk 010402 dec2num function did not save & restore current values of FPU register
 * ejs 010403 added "register" keyword for new compiler, fixed bug accessing var, formatting 
 * ejs 010726 turn off K6 calls for naked assembly function taking args on FP stack
 * cc  011203 Added _MSL_CDECL for new name mangling 
 * ejs 020125 Turn off K6 calls pragmas, add _MSL_IMP_EXP_C
 * cc  020521 Changed the double_t to double
 * ejs 030217 Move "direction" into a local for thread safety
 */