tcpiputils/punycodeconv/src/punycodeconverter.cpp
author Dario Sestito <darios@symbian.org>
Thu, 28 Oct 2010 13:54:27 +0100
branchRCL_3
changeset 77 8c9903e4e6b4
parent 0 af10295192d8
permissions -rw-r--r--
Workaround for bug 3738

// Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies).
// All rights reserved.
// This component and the accompanying materials are made available
// under the terms of "Eclipse Public License v1.0"
// which accompanies this distribution, and is available
// at the URL "http://www.eclipse.org/legal/epl-v10.html".
//
// Initial Contributors:
// Nokia Corporation - initial contribution.
//
// Contributors:
//
// Description:
// Contains the implementation punycode conversion algorithm
//



/**
 @file
*/

#include "punycodeconverter.h"
#include <e32std.h>

// *******************************************************
// This implementation is adapted from the Punycode sample
// implementation in appendix C of the RFC-3492.
// *******************************************************
//
// Bootstring parameters for Punycode
//
#define PUNYCODE_BASE 36
#define PUNYCODE_TMIN 1
#define PUNYCODE_TMAX 26
#define PUNYCODE_SKEW 38
#define PUNYCODE_DAMP 700
#define PUNYCODE_BIAS 72
#define PUNYCODE_INIT 0x80
#define PUNYCODE_DELI 0x2D

_LIT8(KAcePrefix, "xn--");


/* basic(cp) tests whether cp is a basic code point: */
#define basic(cp) ((TUint)(cp) < 0x80)

/* delim(cp) tests whether cp is a delimiter: */
#define delim(cp) ((cp) == PUNYCODE_DELI)


/**
Function to decode each digit and return the character.
	decode_digit(cp) returns the numeric value of a basic code 
	point (for use in representing integers) in the range 0 to 
	base-1, or base if cp is does not represent a value.       
@return - returns the non ASCII character for the input
@param cp the codepoint
*/

static TUint decode_digit(TUint cp)
	{
	return  cp - 48 < 10 ? cp - 22 :
			cp - 65 < 26 ? cp - 65 :
			cp - 97 < 26 ? cp - 97 :
			PUNYCODE_BASE;
	}


/**
Function to encode each digit and return the character.
	encode_digit(d) returns the basic code point whose value      
	(when used for representing integers) is d, which needs to be in   
	the range 0 to base-1.                                           
@return - returns the ASCII character for the input
@param cp the codepoint
*/
static char encode_digit(TUint d)
	{
	return (d + 22 + 75 * (d < 26));
	  /*  0..25 map to ASCII a..z */
	  /* 26..35 map to ASCII 0..9 */
	}



/**
Function to adapt the bias.
Bias adaptation function
@return - returns the ASCII character for the input
@param delta, difference delta
@param numPoints, the number of points
@param firsttime , whether the first bias
*/
static TUint adapt(TUint delta, TUint numpoints, int firsttime)
	{
  	TUint k;

	delta = firsttime ? delta / PUNYCODE_DAMP : delta >> 1;
	delta += delta / numpoints;

	for (k = 0;  delta > ((PUNYCODE_BASE - PUNYCODE_TMIN) * PUNYCODE_TMAX) / 2;  k += PUNYCODE_BASE)
		{
		delta /= PUNYCODE_BASE - PUNYCODE_TMIN;
		}
	return k + (PUNYCODE_BASE - PUNYCODE_TMIN + 1) * delta / (delta + PUNYCODE_SKEW);
	}


/**
Function to convert the IDN to Punycode
@return KErrNone, if conversion is successful
	KErrDndNameTooBig, if the IDN conversion exceeds the limit for a domain Name
	or any other system wide errors
@param aName, the input name in UCS2.0 encoding
*/
EXPORT_C TInt TPunyCodeDndName::IdnToPunycode(const THostName &aName)
	{
	SetLength(0);
			
	for (TInt i = 0; i < aName.Length(); )
		{
		i = Encode(aName, i);
		if (i < 0)
			return i;
		}
	return KErrNone;
	}

/**
Function to encode each label
@return KErrNone, if conversion is successful
	KErrDndNameTooBig, if the IDN conversion exceeds the limit for a domain Name
	or any other system wide errors
@param aName, the input name in UCS2.0 encoding
*/
TInt TPunyCodeDndName::Encode(const THostName &aName, TInt aIndex)
	{
	const TInt output_start = Length();

	TInt j;
	TUint n = PUNYCODE_INIT;
	TUint delta = 0;
	TUint bias = PUNYCODE_BIAS;
	TInt first_time = 1;

	// Copy the basic code points as is, and
	// compute the length of the current label
	// into input_length
	TUint input_length = 0;
	for (j = aIndex;  j < aName.Length();  ++j)
		{
		const TUint c = aName[j];
		if (c == '.')
			break;
	    if (basic(c))
	    	{
	    	if (Length() == MaxLength())
				return KErrDndNameTooBig;
	    	Append(c);
			}
		input_length += 1;
		}

	// h is the number of code points that have been handled
	TUint h = Length() - output_start;

	if (h == input_length)
		// Only basic code points, all done.
		goto done;

	// IDN is required, add prefix!
	if (Length() + KAcePrefix().Length() > MaxLength())
   		return KErrDndNameTooBig;
	Insert(output_start, KAcePrefix);

	if (h > 0)
		{
		// Both basic and non-basic points, need to add a delimiter.
		if (Length() == MaxLength())
			return KErrDndNameTooBig;
		Append(PUNYCODE_DELI);
		}
	
	// Main encoding loop

	while (h < input_length)
		{
		// All non-basic code points < n have been
		// handled already.  Find the next larger one:
		TUint m = KMaxTUint;
	    for (j = aIndex;  j < aIndex + input_length;  ++j)
	    	{
	    	const TUint c = aName[j];
			if (c >= n && c < m)
				m = c;
	    	}

		// Increase delta enough to advance the decoder's
		// <n,i> state to <m,0>, but guard against overflow:
		if (m - n > (KMaxTUint - delta) / (h + 1))
			return KErrOverflow;
		delta += (m - n) * (h + 1);
		n = m;
		
		for (j = aIndex;  j < aIndex + input_length;  ++j)
			{
			const TUint c = aName[j];
			if (c < n)
				{
				if (++delta == 0)
					return KErrOverflow;
				}
			else if (c == n)
				{
				// Represent delta as a generalized variable-length integer:
				TUint q = delta;
		        for (TUint k = PUNYCODE_BASE;  ;  k += PUNYCODE_BASE)
		        	{
		        	if (Length() >= MaxLength())
						return KErrDndNameTooBig;

					const TUint t = k <= bias /* + tmin */ ? PUNYCODE_TMIN :     /* +tmin not needed */
              			k >= bias + PUNYCODE_TMAX ? PUNYCODE_TMAX : k - bias;
              		if (q < t)
              			break;
              		Append(encode_digit(t + (q - t) % (PUNYCODE_BASE - t)));
					q = (q - t) / (PUNYCODE_BASE - t);
		        	}
		        Append(encode_digit(q));
				++h;
        		bias = adapt(delta, h, first_time);
        		delta = 0;
        		first_time = 0;
				}
			}
	    ++delta, ++n;
		}
done:
	aIndex += input_length;
	if (aIndex < aName.Length())
		{
		// Input terminated with '.', copy it to ouput.
		if (Length() == MaxLength())
			return KErrDndNameTooBig;
		Append('.');
		aIndex += 1;
		}
	return aIndex;
	}
	

/**
Function to decode the punycode to IDN
@return KErrNone, if conversion is successful
	KErrDndBadName, if the punycode provided cannot be decoded
	or any other system wide errors
@param aName, the input punycode name in ASCII format
@param aStart, where to start the conversion, defaulted to 0.
*/
EXPORT_C TInt TPunyCodeDndName::PunycodeToIdn(TDes& aBuf, const TInt aStart)
	{
	aBuf.SetLength(0);
	for (TInt i = aStart; i < Length();  )
		{
		i = Decode(i, aBuf);
		if (i < 0)
			{
			// If Punycode fails for any reason, just return
			// the raw name (it probably was not punycode).
			return KErrDndBadName;
			}
		}
	return KErrNone;
	}

/**
Function to decode each label
@return KErrNone, if conversion is successful
	KErrDndBadName, if the punycode provided cannot be decoded
	or any other system wide errors
@param aBuf, the input punycode name in ASCII format for each label
@param aIndex, where to start the conversion, defaulted to 0.
*/
TInt TPunyCodeDndName::Decode(TInt aIndex, TDes &aBuf) const
	{
	if (aIndex + KAcePrefix().Length() > Length() ||
			Mid(aIndex, KAcePrefix().Length()).Compare(KAcePrefix()) != 0)
		{
		// cannot be punycode.
		// copy label as is, while updating aIndex
		while (aIndex < Length())
			{
			const TUint c = (*this)[aIndex++];
			if (aBuf.Length() == aBuf.MaxLength())
				return KErrDndNameTooBig;
			aBuf.Append(c);
			if (c == '.')
				break;
			}
		return aIndex;
		}
		
	aIndex += KAcePrefix().Length();	// Skip KAcePrefix.


	// Handle the basic code points.
	TInt puny_end = aIndex;
	TInt inp = aIndex;
	for ( ; puny_end < Length();  ++puny_end)
		{
		const TUint c = (*this)[puny_end];
		if (c == '.')
			break;
		if (delim(c))
			inp = puny_end;
		}

	if (aBuf.Length() + inp - aIndex > aBuf.MaxLength())
		return KErrDndNameTooBig;

	const TUint out_base = aBuf.Length();
	// Copy the basic code points as is.
	for (TInt j = aIndex;  j < inp;  ++j)
		{
		const TUint c = (*this)[j];
		if (!basic(c))
			return KErrGeneral;
		aBuf.Append(c);
		}
	// Skip inp over delimiter, if present
	if (inp > aIndex)
		inp += 1;

	// Initialize the state:

	TUint n = PUNYCODE_INIT;
	TUint outp = aBuf.Length() - out_base;
	TUint i = 0;
	TUint bias = PUNYCODE_BIAS;

	// Main decoding loop:  Start just after the last delimiter if any 
	// basic code points were copied; start at the beginning otherwise.

	while (inp < puny_end)
		{
		// inp is the index of the next character to be consumed, and
		// outp is the number of code points processed (includes the
		// initial basic points).

		// Decode a generalized variable-length integer into delta,
		// which gets added to i.  The overflow checking is easier
		// if we increase i as we go, then subtract off its starting
		// value at the end to obtain delta.
		const TUint oldi = i;
		TUint w = 1;
		for (TUint k = PUNYCODE_BASE;  ; k += PUNYCODE_BASE)
			{
			if (inp >= puny_end)
				return KErrGeneral;
			
			const TUint digit = decode_digit((*this)[inp++]);
			if (digit >= PUNYCODE_BASE)
				return KErrGeneral;
			if (digit > (KMaxTUint - i) / w)
				return KErrOverflow;
			i += digit * w;
			const TUint t = k <= bias /* + tmin */ ? PUNYCODE_TMIN :     /* +tmin not needed */
				k >= bias + PUNYCODE_TMAX ? PUNYCODE_TMAX : k - bias;
			if (digit < t)
				break;
			if (w > KMaxTUint / (PUNYCODE_BASE - t))
				return KErrOverflow;
			w *= (PUNYCODE_BASE - t);
			}

		outp++;		// Going to add new code, increment count.
		bias = adapt(i - oldi, outp, oldi == 0);
		
		// i was supposed to wrap around from out+1 to 0,
		// incrementing n each time, so we'll fix that now:

	    if (i / outp > KMaxTUint - n)
			return KErrOverflow;
	    n += i / outp;
	    i %= outp;

		// Insert n at position i of the output:

		if (aBuf.Length() == aBuf.MaxLength())
			return KErrDndNameTooBig;
		TBuf<1> tmp;
		tmp.Append(n);
		aBuf.Insert(out_base + i, tmp);
		i++;
		}
	if (puny_end < Length())
		{
		// Input terminated with '.', copy it to ouput.
		if (aBuf.Length() == aBuf.MaxLength())
			return KErrDndNameTooBig;
		aBuf.Append('.');
		puny_end++;
		}
	return puny_end;
	}