crypto/weakcryptospi/source/bigint/algorithms.cpp
changeset 19 cd501b96611d
equal deleted inserted replaced
15:da2ae96f639b 19:cd501b96611d
       
     1 /*
       
     2 * Copyright (c) 2003-2009 Nokia Corporation and/or its subsidiary(-ies).
       
     3 * All rights reserved.
       
     4 * This component and the accompanying materials are made available
       
     5 * under the terms of the License "Eclipse Public License v1.0"
       
     6 * which accompanies this distribution, and is available
       
     7 * at the URL "http://www.eclipse.org/legal/epl-v10.html".
       
     8 *
       
     9 * Initial Contributors:
       
    10 * Nokia Corporation - initial contribution.
       
    11 *
       
    12 * Contributors:
       
    13 *
       
    14 * Description: 
       
    15 *
       
    16 */
       
    17 
       
    18 
       
    19 #include "words.h"
       
    20 #include "algorithms.h"
       
    21 
       
    22 word Add(word *C, const word *A, const word *B, unsigned int N)
       
    23 {
       
    24 	assert (N%2 == 0);
       
    25 	word carry = 0;
       
    26 	for (unsigned int i = 0; i < N; i+=2)
       
    27 	{
       
    28 		dword u = (dword) carry + A[i] + B[i];
       
    29 		C[i] = LOW_WORD(u);
       
    30 		u = (dword) HIGH_WORD(u) + A[i+1] + B[i+1];
       
    31 		C[i+1] = LOW_WORD(u);
       
    32 		carry = HIGH_WORD(u);
       
    33 	}
       
    34 	return carry;
       
    35 }
       
    36 
       
    37 word Subtract(word *C, const word *A, const word *B, unsigned int N)
       
    38 {
       
    39 	assert (N%2 == 0);
       
    40 	word borrow=0;
       
    41 	for (unsigned i = 0; i < N; i+=2)
       
    42 	{
       
    43 		dword u = (dword) A[i] - B[i] - borrow;
       
    44 		C[i] = LOW_WORD(u);
       
    45 		u = (dword) A[i+1] - B[i+1] - (word)(0-HIGH_WORD(u));
       
    46 		C[i+1] = LOW_WORD(u);
       
    47 		borrow = 0-HIGH_WORD(u);
       
    48 	}
       
    49 	return borrow;
       
    50 }
       
    51 
       
    52 int Compare(const word *A, const word *B, unsigned int N)
       
    53 {
       
    54 	while (N--)
       
    55 		if (A[N] > B[N])
       
    56 			return 1;
       
    57 		else if (A[N] < B[N])
       
    58 			return -1;
       
    59 
       
    60 	return 0;
       
    61 }
       
    62 
       
    63 // It is the job of the calling code to ensure that this won't carry.
       
    64 // If you aren't sure, use the next version that will tell you if you need to
       
    65 // grow your integer.
       
    66 // Having two of these creates ever so slightly more code but avoids having
       
    67 // ifdefs all over the rest of the code checking the following type stuff which
       
    68 // causes warnings in certain compilers about unused parameters in release
       
    69 // builds.  We can't have that can we!
       
    70 /*
       
    71 Allows avoid this all over bigint.cpp and primes.cpp
       
    72 ifdef _DEBUG
       
    73 	TUint carry = Increment(Ptr(), Size());
       
    74 	assert(!carry);
       
    75 else
       
    76 	Increment(Ptr(), Size())
       
    77 endif
       
    78 */
       
    79 void IncrementNoCarry(word *A, unsigned int N, word B)
       
    80 {
       
    81 	assert(N);
       
    82 	word t = A[0];
       
    83 	A[0] = t+B;
       
    84 	if (A[0] >= t)
       
    85 		return;
       
    86 	for (unsigned i=1; i<N; i++)
       
    87 		if (++A[i])
       
    88 			return;
       
    89 	assert(0);
       
    90 }
       
    91 
       
    92 word Increment(word *A, unsigned int N, word B)
       
    93 {
       
    94 	assert(N);
       
    95 	word t = A[0];
       
    96 	A[0] = t+B;
       
    97 	if (A[0] >= t)
       
    98 		return 0;
       
    99 	for (unsigned i=1; i<N; i++)
       
   100 		if (++A[i])
       
   101 			return 0;
       
   102 	return 1;
       
   103 }
       
   104 
       
   105 //See commments above about IncrementNoCarry
       
   106 void DecrementNoCarry(word *A, unsigned int N, word B)
       
   107 {
       
   108 	assert(N);
       
   109 	word t = A[0];
       
   110 	A[0] = t-B;
       
   111 	if (A[0] <= t)
       
   112 		return;
       
   113 	for (unsigned i=1; i<N; i++)
       
   114 		if (A[i]--)
       
   115 			return;
       
   116 	assert(0);
       
   117 }
       
   118 
       
   119 word Decrement(word *A, unsigned int N, word B)
       
   120 {
       
   121 	assert(N);
       
   122 	word t = A[0];
       
   123 	A[0] = t-B;
       
   124 	if (A[0] <= t)
       
   125 		return 0;
       
   126 	for (unsigned i=1; i<N; i++)
       
   127 		if (A[i]--)
       
   128 			return 0;
       
   129 	return 1;
       
   130 }
       
   131 
       
   132 void TwosComplement(word *A, unsigned int N)
       
   133 {
       
   134 	Decrement(A, N);
       
   135 	for (unsigned i=0; i<N; i++)
       
   136 		A[i] = ~A[i];
       
   137 }
       
   138 
       
   139 static word LinearMultiply(word *C, const word *A, word B, unsigned int N)
       
   140 {
       
   141 	word carry=0;
       
   142 	for(unsigned i=0; i<N; i++)
       
   143 	{
       
   144 		dword p = (dword)A[i] * B + carry;
       
   145 		C[i] = LOW_WORD(p);
       
   146 		carry = HIGH_WORD(p);
       
   147 	}
       
   148 	return carry;
       
   149 }
       
   150 
       
   151 static void AtomicMultiply(word *C, const word *A, const word *B)
       
   152 {
       
   153 /*
       
   154 	word s;
       
   155 	dword d;
       
   156 
       
   157 	if (A1 >= A0)
       
   158 		if (B0 >= B1)
       
   159 		{
       
   160 			s = 0;
       
   161 			d = (dword)(A1-A0)*(B0-B1);
       
   162 		}
       
   163 		else
       
   164 		{
       
   165 			s = (A1-A0);
       
   166 			d = (dword)s*(word)(B0-B1);
       
   167 		}
       
   168 	else
       
   169 		if (B0 > B1)
       
   170 		{
       
   171 			s = (B0-B1);
       
   172 			d = (word)(A1-A0)*(dword)s;
       
   173 		}
       
   174 		else
       
   175 		{
       
   176 			s = 0;
       
   177 			d = (dword)(A0-A1)*(B1-B0);
       
   178 		}
       
   179 */
       
   180 	// this segment is the branchless equivalent of above
       
   181 	word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
       
   182 	unsigned int ai = A[1] < A[0];
       
   183 	unsigned int bi = B[0] < B[1];
       
   184 	unsigned int di = ai & bi;
       
   185 	dword d = (dword)D[di]*D[di+2];
       
   186 	D[1] = D[3] = 0;
       
   187 	unsigned int si = ai + !bi;
       
   188 	word s = D[si];
       
   189 
       
   190 	dword A0B0 = (dword)A[0]*B[0];
       
   191 	C[0] = LOW_WORD(A0B0);
       
   192 
       
   193 	dword A1B1 = (dword)A[1]*B[1];
       
   194 	dword t = (dword) HIGH_WORD(A0B0) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1);
       
   195 	C[1] = LOW_WORD(t);
       
   196 
       
   197 	t = A1B1 + HIGH_WORD(t) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s;
       
   198 	C[2] = LOW_WORD(t);
       
   199 	C[3] = HIGH_WORD(t);
       
   200 }
       
   201 
       
   202 static word AtomicMultiplyAdd(word *C, const word *A, const word *B)
       
   203 {
       
   204 	word D[4] = {A[1]-A[0], A[0]-A[1], B[0]-B[1], B[1]-B[0]};
       
   205 	unsigned int ai = A[1] < A[0];
       
   206 	unsigned int bi = B[0] < B[1];
       
   207 	unsigned int di = ai & bi;
       
   208 	dword d = (dword)D[di]*D[di+2];
       
   209 	D[1] = D[3] = 0;
       
   210 	unsigned int si = ai + !bi;
       
   211 	word s = D[si];
       
   212 
       
   213 	dword A0B0 = (dword)A[0]*B[0];
       
   214 	dword t = A0B0 + C[0];
       
   215 	C[0] = LOW_WORD(t);
       
   216 
       
   217 	dword A1B1 = (dword)A[1]*B[1];
       
   218 	t = (dword) HIGH_WORD(t) + LOW_WORD(A0B0) + LOW_WORD(d) + LOW_WORD(A1B1) + C[1];
       
   219 	C[1] = LOW_WORD(t);
       
   220 
       
   221 	t = (dword) HIGH_WORD(t) + LOW_WORD(A1B1) + HIGH_WORD(A0B0) + HIGH_WORD(d) + HIGH_WORD(A1B1) - s + C[2];
       
   222 	C[2] = LOW_WORD(t);
       
   223 
       
   224 	t = (dword) HIGH_WORD(t) + HIGH_WORD(A1B1) + C[3];
       
   225 	C[3] = LOW_WORD(t);
       
   226 	return HIGH_WORD(t);
       
   227 }
       
   228 
       
   229 static inline void AtomicMultiplyBottom(word *C, const word *A, const word *B)
       
   230 {
       
   231 	dword t = (dword)A[0]*B[0];
       
   232 	C[0] = LOW_WORD(t);
       
   233 	C[1] = HIGH_WORD(t) + A[0]*B[1] + A[1]*B[0];
       
   234 }
       
   235 
       
   236 #define MulAcc(x, y)								\
       
   237 	p = (dword)A[x] * B[y] + c; 					\
       
   238 	c = LOW_WORD(p);								\
       
   239 	p = (dword)d + HIGH_WORD(p);					\
       
   240 	d = LOW_WORD(p);								\
       
   241 	e += HIGH_WORD(p);
       
   242 
       
   243 #define SaveMulAcc(s, x, y) 						\
       
   244 	R[s] = c;										\
       
   245 	p = (dword)A[x] * B[y] + d; 					\
       
   246 	c = LOW_WORD(p);								\
       
   247 	p = (dword)e + HIGH_WORD(p);					\
       
   248 	d = LOW_WORD(p);								\
       
   249 	e = HIGH_WORD(p);
       
   250 
       
   251 #define MulAcc1(x, y)								\
       
   252 	p = (dword)A[x] * A[y] + c; 					\
       
   253 	c = LOW_WORD(p);								\
       
   254 	p = (dword)d + HIGH_WORD(p);					\
       
   255 	d = LOW_WORD(p);								\
       
   256 	e += HIGH_WORD(p);
       
   257 
       
   258 #define SaveMulAcc1(s, x, y) 						\
       
   259 	R[s] = c;										\
       
   260 	p = (dword)A[x] * A[y] + d; 					\
       
   261 	c = LOW_WORD(p);								\
       
   262 	p = (dword)e + HIGH_WORD(p);					\
       
   263 	d = LOW_WORD(p);								\
       
   264 	e = HIGH_WORD(p);
       
   265 
       
   266 #define SquAcc(x, y)								\
       
   267 	p = (dword)A[x] * A[y];	\
       
   268 	p = p + p + c; 					\
       
   269 	c = LOW_WORD(p);								\
       
   270 	p = (dword)d + HIGH_WORD(p);					\
       
   271 	d = LOW_WORD(p);								\
       
   272 	e += HIGH_WORD(p);
       
   273 
       
   274 #define SaveSquAcc(s, x, y) 						\
       
   275 	R[s] = c;										\
       
   276 	p = (dword)A[x] * A[y];	\
       
   277 	p = p + p + d; 					\
       
   278 	c = LOW_WORD(p);								\
       
   279 	p = (dword)e + HIGH_WORD(p);					\
       
   280 	d = LOW_WORD(p);								\
       
   281 	e = HIGH_WORD(p);
       
   282 
       
   283 // VC60 workaround: MSVC 6.0 has an optimization problem that makes
       
   284 // (dword)A*B where either A or B has been cast to a dword before
       
   285 // very expensive. Revisit a CombaSquare4() function when this
       
   286 // problem is fixed.               
       
   287 
       
   288 // WARNING: KeithR.  05/08/03 This routine doesn't work with gcc on hardware
       
   289 // either.  I've completely removed it.  It may be worth looking into sometime
       
   290 // in the future.
       
   291 /*#ifndef __WINS__
       
   292 static void CombaSquare4(word *R, const word *A)
       
   293 {
       
   294 	dword p;
       
   295 	word c, d, e;
       
   296 
       
   297 	p = (dword)A[0] * A[0];
       
   298 	R[0] = LOW_WORD(p);
       
   299 	c = HIGH_WORD(p);
       
   300 	d = e = 0;
       
   301 
       
   302 	SquAcc(0, 1);
       
   303 
       
   304 	SaveSquAcc(1, 2, 0);
       
   305 	MulAcc1(1, 1);
       
   306 
       
   307 	SaveSquAcc(2, 0, 3);
       
   308 	SquAcc(1, 2);
       
   309 
       
   310 	SaveSquAcc(3, 3, 1);
       
   311 	MulAcc1(2, 2);
       
   312 
       
   313 	SaveSquAcc(4, 2, 3);
       
   314 
       
   315 	R[5] = c;
       
   316 	p = (dword)A[3] * A[3] + d;
       
   317 	R[6] = LOW_WORD(p);
       
   318 	R[7] = e + HIGH_WORD(p);
       
   319 }
       
   320 #endif */
       
   321 
       
   322 static void CombaMultiply4(word *R, const word *A, const word *B)
       
   323 {
       
   324 	dword p;
       
   325 	word c, d, e;
       
   326 
       
   327 	p = (dword)A[0] * B[0];
       
   328 	R[0] = LOW_WORD(p);
       
   329 	c = HIGH_WORD(p);
       
   330 	d = e = 0;
       
   331 
       
   332 	MulAcc(0, 1);
       
   333 	MulAcc(1, 0);
       
   334 
       
   335 	SaveMulAcc(1, 2, 0);
       
   336 	MulAcc(1, 1);
       
   337 	MulAcc(0, 2);
       
   338 
       
   339 	SaveMulAcc(2, 0, 3);
       
   340 	MulAcc(1, 2);
       
   341 	MulAcc(2, 1);
       
   342 	MulAcc(3, 0);
       
   343 
       
   344 	SaveMulAcc(3, 3, 1);
       
   345 	MulAcc(2, 2);
       
   346 	MulAcc(1, 3);
       
   347 
       
   348 	SaveMulAcc(4, 2, 3);
       
   349 	MulAcc(3, 2);
       
   350 
       
   351 	R[5] = c;
       
   352 	p = (dword)A[3] * B[3] + d;
       
   353 	R[6] = LOW_WORD(p);
       
   354 	R[7] = e + HIGH_WORD(p);
       
   355 }
       
   356 
       
   357 static void CombaMultiply8(word *R, const word *A, const word *B)
       
   358 {
       
   359 	dword p;
       
   360 	word c, d, e;
       
   361 
       
   362 	p = (dword)A[0] * B[0];
       
   363 	R[0] = LOW_WORD(p);
       
   364 	c = HIGH_WORD(p);
       
   365 	d = e = 0;
       
   366 
       
   367 	MulAcc(0, 1);
       
   368 	MulAcc(1, 0);
       
   369 
       
   370 	SaveMulAcc(1, 2, 0);
       
   371 	MulAcc(1, 1);
       
   372 	MulAcc(0, 2);
       
   373 
       
   374 	SaveMulAcc(2, 0, 3);
       
   375 	MulAcc(1, 2);
       
   376 	MulAcc(2, 1);
       
   377 	MulAcc(3, 0);
       
   378 
       
   379 	SaveMulAcc(3, 0, 4);
       
   380 	MulAcc(1, 3);
       
   381 	MulAcc(2, 2);
       
   382 	MulAcc(3, 1);
       
   383 	MulAcc(4, 0);
       
   384 
       
   385 	SaveMulAcc(4, 0, 5);
       
   386 	MulAcc(1, 4);
       
   387 	MulAcc(2, 3);
       
   388 	MulAcc(3, 2);
       
   389 	MulAcc(4, 1);
       
   390 	MulAcc(5, 0);
       
   391 
       
   392 	SaveMulAcc(5, 0, 6);
       
   393 	MulAcc(1, 5);
       
   394 	MulAcc(2, 4);
       
   395 	MulAcc(3, 3);
       
   396 	MulAcc(4, 2);
       
   397 	MulAcc(5, 1);
       
   398 	MulAcc(6, 0);
       
   399 
       
   400 	SaveMulAcc(6, 0, 7);
       
   401 	MulAcc(1, 6);
       
   402 	MulAcc(2, 5);
       
   403 	MulAcc(3, 4);
       
   404 	MulAcc(4, 3);
       
   405 	MulAcc(5, 2);
       
   406 	MulAcc(6, 1);
       
   407 	MulAcc(7, 0);
       
   408 
       
   409 	SaveMulAcc(7, 1, 7);
       
   410 	MulAcc(2, 6);
       
   411 	MulAcc(3, 5);
       
   412 	MulAcc(4, 4);
       
   413 	MulAcc(5, 3);
       
   414 	MulAcc(6, 2);
       
   415 	MulAcc(7, 1);
       
   416 
       
   417 	SaveMulAcc(8, 2, 7);
       
   418 	MulAcc(3, 6);
       
   419 	MulAcc(4, 5);
       
   420 	MulAcc(5, 4);
       
   421 	MulAcc(6, 3);
       
   422 	MulAcc(7, 2);
       
   423 
       
   424 	SaveMulAcc(9, 3, 7);
       
   425 	MulAcc(4, 6);
       
   426 	MulAcc(5, 5);
       
   427 	MulAcc(6, 4);
       
   428 	MulAcc(7, 3);
       
   429 
       
   430 	SaveMulAcc(10, 4, 7);
       
   431 	MulAcc(5, 6);
       
   432 	MulAcc(6, 5);
       
   433 	MulAcc(7, 4);
       
   434 
       
   435 	SaveMulAcc(11, 5, 7);
       
   436 	MulAcc(6, 6);
       
   437 	MulAcc(7, 5);
       
   438 
       
   439 	SaveMulAcc(12, 6, 7);
       
   440 	MulAcc(7, 6);
       
   441 
       
   442 	R[13] = c;
       
   443 	p = (dword)A[7] * B[7] + d;
       
   444 	R[14] = LOW_WORD(p);
       
   445 	R[15] = e + HIGH_WORD(p);
       
   446 }
       
   447 
       
   448 static void CombaMultiplyBottom4(word *R, const word *A, const word *B)
       
   449 {
       
   450 	dword p;
       
   451 	word c, d, e;
       
   452 
       
   453 	p = (dword)A[0] * B[0];
       
   454 	R[0] = LOW_WORD(p);
       
   455 	c = HIGH_WORD(p);
       
   456 	d = e = 0;
       
   457 
       
   458 	MulAcc(0, 1);
       
   459 	MulAcc(1, 0);
       
   460 
       
   461 	SaveMulAcc(1, 2, 0);
       
   462 	MulAcc(1, 1);
       
   463 	MulAcc(0, 2);
       
   464 
       
   465 	R[2] = c;
       
   466 	R[3] = d + A[0] * B[3] + A[1] * B[2] + A[2] * B[1] + A[3] * B[0];
       
   467 }
       
   468 
       
   469 static void CombaMultiplyBottom8(word *R, const word *A, const word *B)
       
   470 {
       
   471 	dword p;
       
   472 	word c, d, e;
       
   473 
       
   474 	p = (dword)A[0] * B[0];
       
   475 	R[0] = LOW_WORD(p);
       
   476 	c = HIGH_WORD(p);
       
   477 	d = e = 0;
       
   478 
       
   479 	MulAcc(0, 1);
       
   480 	MulAcc(1, 0);
       
   481 
       
   482 	SaveMulAcc(1, 2, 0);
       
   483 	MulAcc(1, 1);
       
   484 	MulAcc(0, 2);
       
   485 
       
   486 	SaveMulAcc(2, 0, 3);
       
   487 	MulAcc(1, 2);
       
   488 	MulAcc(2, 1);
       
   489 	MulAcc(3, 0);
       
   490 
       
   491 	SaveMulAcc(3, 0, 4);
       
   492 	MulAcc(1, 3);
       
   493 	MulAcc(2, 2);
       
   494 	MulAcc(3, 1);
       
   495 	MulAcc(4, 0);
       
   496 
       
   497 	SaveMulAcc(4, 0, 5);
       
   498 	MulAcc(1, 4);
       
   499 	MulAcc(2, 3);
       
   500 	MulAcc(3, 2);
       
   501 	MulAcc(4, 1);
       
   502 	MulAcc(5, 0);
       
   503 
       
   504 	SaveMulAcc(5, 0, 6);
       
   505 	MulAcc(1, 5);
       
   506 	MulAcc(2, 4);
       
   507 	MulAcc(3, 3);
       
   508 	MulAcc(4, 2);
       
   509 	MulAcc(5, 1);
       
   510 	MulAcc(6, 0);
       
   511 
       
   512 	R[6] = c;
       
   513 	R[7] = d + A[0] * B[7] + A[1] * B[6] + A[2] * B[5] + A[3] * B[4] +
       
   514 				A[4] * B[3] + A[5] * B[2] + A[6] * B[1] + A[7] * B[0];
       
   515 }
       
   516 
       
   517 #undef MulAcc
       
   518 #undef SaveMulAcc
       
   519 static void AtomicInverseModPower2(word *C, word A0, word A1)
       
   520 {
       
   521 	assert(A0%2==1);
       
   522 
       
   523 	dword A=MAKE_DWORD(A0, A1), R=A0%8;
       
   524 
       
   525 	for (unsigned i=3; i<2*WORD_BITS; i*=2)
       
   526 		R = R*(2-R*A);
       
   527 
       
   528 	assert(R*A==1);
       
   529 
       
   530 	C[0] = LOW_WORD(R);
       
   531 	C[1] = HIGH_WORD(R);
       
   532 }
       
   533 // ********************************************************
       
   534 
       
   535 #define A0		A
       
   536 #define A1		(A+N2)
       
   537 #define B0		B
       
   538 #define B1		(B+N2)
       
   539 
       
   540 #define T0		T
       
   541 #define T1		(T+N2)
       
   542 #define T2		(T+N)
       
   543 #define T3		(T+N+N2)
       
   544 
       
   545 #define R0		R
       
   546 #define R1		(R+N2)
       
   547 #define R2		(R+N)
       
   548 #define R3		(R+N+N2)
       
   549 
       
   550 // R[2*N] - result = A*B
       
   551 // T[2*N] - temporary work space
       
   552 // A[N] --- multiplier
       
   553 // B[N] --- multiplicant
       
   554 
       
   555 void RecursiveMultiply(word *R, word *T, const word *A, const word *B, unsigned int N)
       
   556 {
       
   557 	assert(N>=2 && N%2==0);
       
   558 
       
   559 	if (N==2)
       
   560 		AtomicMultiply(R, A, B);
       
   561 	else if (N==4)
       
   562 		CombaMultiply4(R, A, B);
       
   563 	else if (N==8)
       
   564 		CombaMultiply8(R, A, B);
       
   565 	else
       
   566 	{
       
   567 		const unsigned int N2 = N/2;
       
   568 		int carry;
       
   569 
       
   570 		int aComp = Compare(A0, A1, N2);
       
   571 		int bComp = Compare(B0, B1, N2);
       
   572 
       
   573 		switch (2*aComp + aComp + bComp)
       
   574 		{
       
   575 		case -4:
       
   576 			Subtract(R0, A1, A0, N2);
       
   577 			Subtract(R1, B0, B1, N2);
       
   578 			RecursiveMultiply(T0, T2, R0, R1, N2);
       
   579 			Subtract(T1, T1, R0, N2);
       
   580 			carry = -1;
       
   581 			break;
       
   582 		case -2:
       
   583 			Subtract(R0, A1, A0, N2);
       
   584 			Subtract(R1, B0, B1, N2);
       
   585 			RecursiveMultiply(T0, T2, R0, R1, N2);
       
   586 			carry = 0;
       
   587 			break;
       
   588 		case 2:
       
   589 			Subtract(R0, A0, A1, N2);
       
   590 			Subtract(R1, B1, B0, N2);
       
   591 			RecursiveMultiply(T0, T2, R0, R1, N2);
       
   592 			carry = 0;
       
   593 			break;
       
   594 		case 4:
       
   595 			Subtract(R0, A1, A0, N2);
       
   596 			Subtract(R1, B0, B1, N2);
       
   597 			RecursiveMultiply(T0, T2, R0, R1, N2);
       
   598 			Subtract(T1, T1, R1, N2);
       
   599 			carry = -1;
       
   600 			break;
       
   601 		default:
       
   602 			SetWords(T0, 0, N);
       
   603 			carry = 0;
       
   604 		}
       
   605 
       
   606 		RecursiveMultiply(R0, T2, A0, B0, N2);
       
   607 		RecursiveMultiply(R2, T2, A1, B1, N2);
       
   608 
       
   609 		// now T[01] holds (A1-A0)*(B0-B1), R[01] holds A0*B0, R[23] holds A1*B1
       
   610 
       
   611 		carry += Add(T0, T0, R0, N);
       
   612 		carry += Add(T0, T0, R2, N);
       
   613 		carry += Add(R1, R1, T0, N);
       
   614 
       
   615 		assert (carry >= 0 && carry <= 2);
       
   616 		Increment(R3, N2, carry);
       
   617 	}
       
   618 }
       
   619 
       
   620 // R[2*N] - result = A*A
       
   621 // T[2*N] - temporary work space
       
   622 // A[N] --- number to be squared
       
   623 
       
   624 void RecursiveSquare(word *R, word *T, const word *A, unsigned int N)
       
   625 {
       
   626 	assert(N && N%2==0);
       
   627 
       
   628 	if (N==2)
       
   629 		AtomicMultiply(R, A, A);
       
   630 	else if (N==4)
       
   631 	{
       
   632 		// VC60 workaround: MSVC 6.0 has an optimization problem that makes
       
   633 		// (dword)A*B where either A or B has been cast to a dword before
       
   634 		// very expensive. Revisit a CombaSquare4() function when this
       
   635 		// problem is fixed.               
       
   636 
       
   637 // WARNING: KeithR. 05/08/03 This routine doesn't work with gcc on hardware
       
   638 // either.  I've completely removed it.  It may be worth looking into sometime
       
   639 // in the future.  Therefore, we use the CombaMultiply4 on all targets.
       
   640 //#ifdef __WINS__
       
   641 		CombaMultiply4(R, A, A);
       
   642 /*#else
       
   643 		CombaSquare4(R, A);
       
   644 #endif*/
       
   645 	}
       
   646 	else
       
   647 	{
       
   648 		const unsigned int N2 = N/2;
       
   649 
       
   650 		RecursiveSquare(R0, T2, A0, N2);
       
   651 		RecursiveSquare(R2, T2, A1, N2);
       
   652 		RecursiveMultiply(T0, T2, A0, A1, N2);
       
   653 
       
   654 		word carry = Add(R1, R1, T0, N);
       
   655 		carry += Add(R1, R1, T0, N);
       
   656 		Increment(R3, N2, carry);
       
   657 	}
       
   658 }
       
   659 // R[N] - bottom half of A*B
       
   660 // T[N] - temporary work space
       
   661 // A[N] - multiplier
       
   662 // B[N] - multiplicant
       
   663 
       
   664 void RecursiveMultiplyBottom(word *R, word *T, const word *A, const word *B, unsigned int N)
       
   665 {
       
   666 	assert(N>=2 && N%2==0);
       
   667 
       
   668 	if (N==2)
       
   669 		AtomicMultiplyBottom(R, A, B);
       
   670 	else if (N==4)
       
   671 		CombaMultiplyBottom4(R, A, B);
       
   672 	else if (N==8)
       
   673 		CombaMultiplyBottom8(R, A, B);
       
   674 	else
       
   675 	{
       
   676 		const unsigned int N2 = N/2;
       
   677 
       
   678 		RecursiveMultiply(R, T, A0, B0, N2);
       
   679 		RecursiveMultiplyBottom(T0, T1, A1, B0, N2);
       
   680 		Add(R1, R1, T0, N2);
       
   681 		RecursiveMultiplyBottom(T0, T1, A0, B1, N2);
       
   682 		Add(R1, R1, T0, N2);
       
   683 	}
       
   684 }
       
   685 
       
   686 // R[N] --- upper half of A*B
       
   687 // T[2*N] - temporary work space
       
   688 // L[N] --- lower half of A*B
       
   689 // A[N] --- multiplier
       
   690 // B[N] --- multiplicant
       
   691 
       
   692 void RecursiveMultiplyTop(word *R, word *T, const word *L, const word *A, const word *B, unsigned int N)
       
   693 {
       
   694 	assert(N>=2 && N%2==0);
       
   695 
       
   696 	if (N==2)
       
   697 	{
       
   698 		AtomicMultiply(T, A, B);
       
   699 		((dword *)R)[0] = ((dword *)T)[1];
       
   700 	}
       
   701 	else if (N==4)
       
   702 	{
       
   703 		CombaMultiply4(T, A, B);
       
   704 		((dword *)R)[0] = ((dword *)T)[2];
       
   705 		((dword *)R)[1] = ((dword *)T)[3];
       
   706 	}
       
   707 	else
       
   708 	{
       
   709 		const unsigned int N2 = N/2;
       
   710 		int carry;
       
   711 
       
   712 		int aComp = Compare(A0, A1, N2);
       
   713 		int bComp = Compare(B0, B1, N2);
       
   714 
       
   715 		switch (2*aComp + aComp + bComp)
       
   716 		{
       
   717 		case -4:
       
   718 			Subtract(R0, A1, A0, N2);
       
   719 			Subtract(R1, B0, B1, N2);
       
   720 			RecursiveMultiply(T0, T2, R0, R1, N2);
       
   721 			Subtract(T1, T1, R0, N2);
       
   722 			carry = -1;
       
   723 			break;
       
   724 		case -2:
       
   725 			Subtract(R0, A1, A0, N2);
       
   726 			Subtract(R1, B0, B1, N2);
       
   727 			RecursiveMultiply(T0, T2, R0, R1, N2);
       
   728 			carry = 0;
       
   729 			break;
       
   730 		case 2:
       
   731 			Subtract(R0, A0, A1, N2);
       
   732 			Subtract(R1, B1, B0, N2);
       
   733 			RecursiveMultiply(T0, T2, R0, R1, N2);
       
   734 			carry = 0;
       
   735 			break;
       
   736 		case 4:
       
   737 			Subtract(R0, A1, A0, N2);
       
   738 			Subtract(R1, B0, B1, N2);
       
   739 			RecursiveMultiply(T0, T2, R0, R1, N2);
       
   740 			Subtract(T1, T1, R1, N2);
       
   741 			carry = -1;
       
   742 			break;
       
   743 		default:
       
   744 			SetWords(T0, 0, N);
       
   745 			carry = 0;
       
   746 		}
       
   747 
       
   748 		RecursiveMultiply(T2, R0, A1, B1, N2);
       
   749 
       
   750 		// now T[01] holds (A1-A0)*(B0-B1), T[23] holds A1*B1
       
   751 
       
   752 		CopyWords(R0, L+N2, N2);
       
   753 		word c2 = Subtract(R0, R0, L, N2);
       
   754 		c2 += Subtract(R0, R0, T0, N2);
       
   755 		word t = (Compare(R0, T2, N2) == -1);
       
   756 
       
   757 		carry += t;
       
   758 		carry += Increment(R0, N2, c2+t);
       
   759 		carry += Add(R0, R0, T1, N2);
       
   760 		carry += Add(R0, R0, T3, N2);
       
   761 
       
   762 		CopyWords(R1, T3, N2);
       
   763 		assert (carry >= 0 && carry <= 2);
       
   764 		Increment(R1, N2, carry);
       
   765 	}
       
   766 }
       
   767 
       
   768 // R[NA+NB] - result = A*B
       
   769 // T[NA+NB] - temporary work space
       
   770 // A[NA] ---- multiplier
       
   771 // B[NB] ---- multiplicant
       
   772 
       
   773 void AsymmetricMultiply(word *R, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
       
   774 {
       
   775 	if (NA == NB)
       
   776 	{
       
   777 		if (A == B)
       
   778 			RecursiveSquare(R, T, A, NA);
       
   779 		else
       
   780 			RecursiveMultiply(R, T, A, B, NA);
       
   781 
       
   782 		return;
       
   783 	}
       
   784 
       
   785 	if (NA > NB)
       
   786 	{
       
   787 		TClassSwap(A, B);
       
   788 		TClassSwap(NA, NB);
       
   789 		//std::swap(A, B);
       
   790 		//std::swap(NA, NB);
       
   791 	}
       
   792 
       
   793 	assert(NB % NA == 0);
       
   794 	assert((NB/NA)%2 == 0); 	// NB is an even multiple of NA
       
   795 
       
   796 	if (NA==2 && !A[1])
       
   797 	{
       
   798 		switch (A[0])
       
   799 		{
       
   800 		case 0:
       
   801 			SetWords(R, 0, NB+2);
       
   802 			return;
       
   803 		case 1:
       
   804 			CopyWords(R, B, NB);
       
   805 			R[NB] = R[NB+1] = 0;
       
   806 			return;
       
   807 		default:
       
   808 			R[NB] = LinearMultiply(R, B, A[0], NB);
       
   809 			R[NB+1] = 0;
       
   810 			return;
       
   811 		}
       
   812 	}
       
   813 
       
   814 	RecursiveMultiply(R, T, A, B, NA);
       
   815 	CopyWords(T+2*NA, R+NA, NA);
       
   816 
       
   817 	unsigned i;
       
   818 
       
   819 	for (i=2*NA; i<NB; i+=2*NA)
       
   820 		RecursiveMultiply(T+NA+i, T, A, B+i, NA);
       
   821 	for (i=NA; i<NB; i+=2*NA)
       
   822 		RecursiveMultiply(R+i, T, A, B+i, NA);
       
   823 
       
   824 	if (Add(R+NA, R+NA, T+2*NA, NB-NA))
       
   825 		Increment(R+NB, NA);
       
   826 }
       
   827 // R[N] ----- result = A inverse mod 2**(WORD_BITS*N)
       
   828 // T[3*N/2] - temporary work space
       
   829 // A[N] ----- an odd number as input
       
   830 
       
   831 void RecursiveInverseModPower2(word *R, word *T, const word *A, unsigned int N)
       
   832 {
       
   833 	if (N==2)
       
   834 		AtomicInverseModPower2(R, A[0], A[1]);
       
   835 	else
       
   836 	{
       
   837 		const unsigned int N2 = N/2;
       
   838 		RecursiveInverseModPower2(R0, T0, A0, N2);
       
   839 		T0[0] = 1;
       
   840 		SetWords(T0+1, 0, N2-1);
       
   841 		RecursiveMultiplyTop(R1, T1, T0, R0, A0, N2);
       
   842 		RecursiveMultiplyBottom(T0, T1, R0, A1, N2);
       
   843 		Add(T0, R1, T0, N2);
       
   844 		TwosComplement(T0, N2);
       
   845 		RecursiveMultiplyBottom(R1, T1, R0, T0, N2);
       
   846 	}
       
   847 }
       
   848 #undef A0
       
   849 #undef A1
       
   850 #undef B0
       
   851 #undef B1
       
   852 
       
   853 #undef T0
       
   854 #undef T1
       
   855 #undef T2
       
   856 #undef T3
       
   857 
       
   858 #undef R0
       
   859 #undef R1
       
   860 #undef R2
       
   861 #undef R3
       
   862 
       
   863 // R[N] --- result = X/(2**(WORD_BITS*N)) mod M
       
   864 // T[3*N] - temporary work space
       
   865 // X[2*N] - number to be reduced
       
   866 // M[N] --- modulus
       
   867 // U[N] --- multiplicative inverse of M mod 2**(WORD_BITS*N)
       
   868 
       
   869 void MontgomeryReduce(word *R, word *T, const word *X, const word *M, const word *U, unsigned int N)
       
   870 {
       
   871 	RecursiveMultiplyBottom(R, T, X, U, N);
       
   872 	RecursiveMultiplyTop(T, T+N, X, R, M, N);
       
   873 	if (Subtract(R, X+N, T, N))
       
   874 	{
       
   875 #ifdef _DEBUG
       
   876 		word carry = Add(R, R, M, N);
       
   877 		assert(carry);
       
   878 #else
       
   879 		Add(R, R, M, N);
       
   880 #endif
       
   881 	}
       
   882 }
       
   883 
       
   884 // do a 3 word by 2 word divide, returns quotient and leaves remainder in A
       
   885 static word SubatomicDivide(word *A, word B0, word B1)
       
   886 {
       
   887 	// assert {A[2],A[1]} < {B1,B0}, so quotient can fit in a word
       
   888 	assert(A[2] < B1 || (A[2]==B1 && A[1] < B0));
       
   889 
       
   890 	dword p, u;
       
   891 	word Q;
       
   892 
       
   893 	// estimate the quotient: do a 2 word by 1 word divide
       
   894 	if (B1+1 == 0)
       
   895 		Q = A[2];
       
   896 	else
       
   897 		Q = word(MAKE_DWORD(A[1], A[2]) / (B1+1));
       
   898 
       
   899 	// now subtract Q*B from A
       
   900 	p = (dword) B0*Q;
       
   901 	u = (dword) A[0] - LOW_WORD(p);
       
   902 	A[0] = LOW_WORD(u);
       
   903 	u = (dword) A[1] - HIGH_WORD(p) - (word)(0-HIGH_WORD(u)) - (dword)B1*Q;
       
   904 	A[1] = LOW_WORD(u);
       
   905 	A[2] += HIGH_WORD(u);
       
   906 
       
   907 	// Q <= actual quotient, so fix it
       
   908 	while (A[2] || A[1] > B1 || (A[1]==B1 && A[0]>=B0))
       
   909 	{
       
   910 		u = (dword) A[0] - B0;
       
   911 		A[0] = LOW_WORD(u);
       
   912 		u = (dword) A[1] - B1 - (word)(0-HIGH_WORD(u));
       
   913 		A[1] = LOW_WORD(u);
       
   914 		A[2] += HIGH_WORD(u);
       
   915 		Q++;
       
   916 		assert(Q);	// shouldn't overflow
       
   917 	}
       
   918 
       
   919 	return Q;
       
   920 }
       
   921 
       
   922 // do a 4 word by 2 word divide, returns 2 word quotient in Q0 and Q1
       
   923 static inline void AtomicDivide(word *Q, const word *A, const word *B)
       
   924 {
       
   925 	if (!B[0] && !B[1]) // if divisor is 0, we assume divisor==2**(2*WORD_BITS)
       
   926 	{
       
   927 		Q[0] = A[2];
       
   928 		Q[1] = A[3];
       
   929 	}
       
   930 	else
       
   931 	{
       
   932 		word T[4];
       
   933 		T[0] = A[0]; T[1] = A[1]; T[2] = A[2]; T[3] = A[3];
       
   934 		Q[1] = SubatomicDivide(T+1, B[0], B[1]);
       
   935 		Q[0] = SubatomicDivide(T, B[0], B[1]);
       
   936 
       
   937 #ifdef _DEBUG
       
   938 		// multiply quotient and divisor and add remainder, make sure it equals dividend
       
   939 		assert(!T[2] && !T[3] && (T[1] < B[1] || (T[1]==B[1] && T[0]<B[0])));
       
   940 		word P[4];
       
   941 		AtomicMultiply(P, Q, B);
       
   942 		Add(P, P, T, 4);
       
   943 		assert(Mem::Compare((TUint8*)P, 4*WORD_SIZE, (TUint8*)A, 4*WORD_SIZE)==0);
       
   944 #endif
       
   945 	}
       
   946 }
       
   947 
       
   948 // for use by Divide(), corrects the underestimated quotient {Q1,Q0}
       
   949 static void CorrectQuotientEstimate(word *R, word *T, word *Q, const word *B, unsigned int N)
       
   950 {
       
   951 	assert(N && N%2==0);
       
   952 
       
   953 	if (Q[1])
       
   954 	{
       
   955 		T[N] = T[N+1] = 0;
       
   956 		unsigned i;
       
   957 		for (i=0; i<N; i+=4)
       
   958 			AtomicMultiply(T+i, Q, B+i);
       
   959 		for (i=2; i<N; i+=4)
       
   960 			if (AtomicMultiplyAdd(T+i, Q, B+i))
       
   961 				T[i+5] += (++T[i+4]==0);
       
   962 	}
       
   963 	else
       
   964 	{
       
   965 		T[N] = LinearMultiply(T, B, Q[0], N);
       
   966 		T[N+1] = 0;
       
   967 	}
       
   968 
       
   969 #ifdef _DEBUG
       
   970 	word borrow = Subtract(R, R, T, N+2);
       
   971 	assert(!borrow && !R[N+1]);
       
   972 #else
       
   973 	Subtract(R, R, T, N+2);
       
   974 #endif
       
   975 
       
   976 	while (R[N] || Compare(R, B, N) >= 0)
       
   977 	{
       
   978 		R[N] -= Subtract(R, R, B, N);
       
   979 		Q[1] += (++Q[0]==0);
       
   980 		assert(Q[0] || Q[1]); // no overflow
       
   981 	}
       
   982 }
       
   983 
       
   984 // R[NB] -------- remainder = A%B
       
   985 // Q[NA-NB+2] --- quotient	= A/B
       
   986 // T[NA+2*NB+4] - temp work space
       
   987 // A[NA] -------- dividend
       
   988 // B[NB] -------- divisor
       
   989 
       
   990 void Divide(word *R, word *Q, word *T, const word *A, unsigned int NA, const word *B, unsigned int NB)
       
   991 {
       
   992 	assert(NA && NB && NA%2==0 && NB%2==0);
       
   993 	assert(B[NB-1] || B[NB-2]);
       
   994 	assert(NB <= NA);
       
   995 
       
   996 	// set up temporary work space
       
   997 	word *const TA=T;
       
   998 	word *const TB=T+NA+2;
       
   999 	word *const TP=T+NA+2+NB;
       
  1000 
       
  1001 	// copy B into TB and normalize it so that TB has highest bit set to 1
       
  1002 	unsigned shiftWords = (B[NB-1]==0);
       
  1003 	TB[0] = TB[NB-1] = 0;
       
  1004 	CopyWords(TB+shiftWords, B, NB-shiftWords);
       
  1005 	unsigned shiftBits = WORD_BITS - BitPrecision(TB[NB-1]);
       
  1006 	assert(shiftBits < WORD_BITS);
       
  1007 	ShiftWordsLeftByBits(TB, NB, shiftBits);
       
  1008 
       
  1009 	// copy A into TA and normalize it
       
  1010 	TA[0] = TA[NA] = TA[NA+1] = 0;
       
  1011 	CopyWords(TA+shiftWords, A, NA);
       
  1012 	ShiftWordsLeftByBits(TA, NA+2, shiftBits);
       
  1013 
       
  1014 	if (TA[NA+1]==0 && TA[NA] <= 1)
       
  1015 	{
       
  1016 		Q[NA-NB+1] = Q[NA-NB] = 0;
       
  1017 		while (TA[NA] || Compare(TA+NA-NB, TB, NB) >= 0)
       
  1018 		{
       
  1019 			TA[NA] -= Subtract(TA+NA-NB, TA+NA-NB, TB, NB);
       
  1020 			++Q[NA-NB];
       
  1021 		}
       
  1022 	}
       
  1023 	else
       
  1024 	{
       
  1025 		NA+=2;
       
  1026 		assert(Compare(TA+NA-NB, TB, NB) < 0);
       
  1027 	}
       
  1028 
       
  1029 	word BT[2];
       
  1030 	BT[0] = TB[NB-2] + 1;
       
  1031 	BT[1] = TB[NB-1] + (BT[0]==0);
       
  1032 
       
  1033 	// start reducing TA mod TB, 2 words at a time
       
  1034 	for (unsigned i=NA-2; i>=NB; i-=2)
       
  1035 	{
       
  1036 		AtomicDivide(Q+i-NB, TA+i-2, BT);
       
  1037 		CorrectQuotientEstimate(TA+i-NB, TP, Q+i-NB, TB, NB);
       
  1038 	}
       
  1039 
       
  1040 	// copy TA into R, and denormalize it
       
  1041 	CopyWords(R, TA+shiftWords, NB);
       
  1042 	ShiftWordsRightByBits(R, NB, shiftBits);
       
  1043 }
       
  1044 
       
  1045 static inline unsigned int EvenWordCount(const word *X, unsigned int N)
       
  1046 {
       
  1047 	while (N && X[N-2]==0 && X[N-1]==0)
       
  1048 		N-=2;
       
  1049 	return N;
       
  1050 }
       
  1051 
       
  1052 // return k
       
  1053 // R[N] --- result = A^(-1) * 2^k mod M
       
  1054 // T[4*N] - temporary work space
       
  1055 // A[NA] -- number to take inverse of
       
  1056 // M[N] --- modulus
       
  1057 
       
  1058 unsigned int AlmostInverse(word *R, word *T, const word *A, unsigned int NA, const word *M, unsigned int N)
       
  1059 {
       
  1060 	assert(NA<=N && N && N%2==0);
       
  1061 
       
  1062 	word *b = T;
       
  1063 	word *c = T+N;
       
  1064 	word *f = T+2*N;
       
  1065 	word *g = T+3*N;
       
  1066 	unsigned int bcLen=2, fgLen=EvenWordCount(M, N);
       
  1067 	unsigned int k=0, s=0;
       
  1068 
       
  1069 	SetWords(T, 0, 3*N);
       
  1070 	b[0]=1;
       
  1071 	CopyWords(f, A, NA);
       
  1072 	CopyWords(g, M, N);
       
  1073 
       
  1074 	FOREVER
       
  1075 	{
       
  1076 		word t=f[0];
       
  1077 		while (!t)
       
  1078 		{
       
  1079 			if (EvenWordCount(f, fgLen)==0)
       
  1080 			{
       
  1081 				SetWords(R, 0, N);
       
  1082 				return 0;
       
  1083 			}
       
  1084 
       
  1085 			ShiftWordsRightByWords(f, fgLen, 1);
       
  1086 			if (c[bcLen-1]) bcLen+=2;
       
  1087 			assert(bcLen <= N);
       
  1088 			ShiftWordsLeftByWords(c, bcLen, 1);
       
  1089 			k+=WORD_BITS;
       
  1090 			t=f[0];
       
  1091 		}
       
  1092 
       
  1093 		unsigned int i=0;
       
  1094 		while (t%2 == 0)
       
  1095 		{
       
  1096 			t>>=1;
       
  1097 			i++;
       
  1098 		}
       
  1099 		k+=i;
       
  1100 
       
  1101 		if (t==1 && f[1]==0 && EvenWordCount(f, fgLen)==2)
       
  1102 		{
       
  1103 			if (s%2==0)
       
  1104 				CopyWords(R, b, N);
       
  1105 			else
       
  1106 				Subtract(R, M, b, N);
       
  1107 			return k;
       
  1108 		}
       
  1109 
       
  1110 		ShiftWordsRightByBits(f, fgLen, i);
       
  1111 		t=ShiftWordsLeftByBits(c, bcLen, i);
       
  1112 		if (t)
       
  1113 		{
       
  1114 			c[bcLen] = t;
       
  1115 			bcLen+=2;
       
  1116 			assert(bcLen <= N);
       
  1117 		}
       
  1118 
       
  1119 		if (f[fgLen-2]==0 && g[fgLen-2]==0 && f[fgLen-1]==0 && g[fgLen-1]==0)
       
  1120 			fgLen-=2;
       
  1121 
       
  1122 		if (Compare(f, g, fgLen)==-1)
       
  1123 		{
       
  1124 			TClassSwap<word*>(f,g);
       
  1125 			TClassSwap<word*>(b,c);
       
  1126 			s++;
       
  1127 		}
       
  1128 
       
  1129 		Subtract(f, f, g, fgLen);
       
  1130 
       
  1131 		if (Add(b, b, c, bcLen))
       
  1132 		{
       
  1133 			b[bcLen] = 1;
       
  1134 			bcLen+=2;
       
  1135 			assert(bcLen <= N);
       
  1136 		}
       
  1137 	}
       
  1138 }
       
  1139 
       
  1140 // R[N] - result = A/(2^k) mod M
       
  1141 // A[N] - input
       
  1142 // M[N] - modulus
       
  1143 
       
  1144 void DivideByPower2Mod(word *R, const word *A, unsigned int k, const word *M, unsigned int N)
       
  1145 {
       
  1146 	CopyWords(R, A, N);
       
  1147 
       
  1148 	while (k--)
       
  1149 	{
       
  1150 		if (R[0]%2==0)
       
  1151 			ShiftWordsRightByBits(R, N, 1);
       
  1152 		else
       
  1153 		{
       
  1154 			word carry = Add(R, R, M, N);
       
  1155 			ShiftWordsRightByBits(R, N, 1);
       
  1156 			R[N-1] += carry<<(WORD_BITS-1);
       
  1157 		}
       
  1158 	}
       
  1159 }
       
  1160