src/messaging/win32wce/qmailcodec.cpp
changeset 0 876b1a06bc25
equal deleted inserted replaced
-1:000000000000 0:876b1a06bc25
       
     1 /****************************************************************************
       
     2 **
       
     3 ** Copyright (C) 2010 Nokia Corporation and/or its subsidiary(-ies).
       
     4 ** All rights reserved.
       
     5 ** Contact: Nokia Corporation (qt-info@nokia.com)
       
     6 **
       
     7 ** This file is part of the Qt Mobility Components.
       
     8 **
       
     9 ** $QT_BEGIN_LICENSE:LGPL$
       
    10 ** No Commercial Usage
       
    11 ** This file contains pre-release code and may not be distributed.
       
    12 ** You may use this file in accordance with the terms and conditions
       
    13 ** contained in the Technology Preview License Agreement accompanying
       
    14 ** this package.
       
    15 **
       
    16 ** GNU Lesser General Public License Usage
       
    17 ** Alternatively, this file may be used under the terms of the GNU Lesser
       
    18 ** General Public License version 2.1 as published by the Free Software
       
    19 ** Foundation and appearing in the file LICENSE.LGPL included in the
       
    20 ** packaging of this file.  Please review the following information to
       
    21 ** ensure the GNU Lesser General Public License version 2.1 requirements
       
    22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
       
    23 **
       
    24 ** In addition, as a special exception, Nokia gives you certain additional
       
    25 ** rights.  These rights are described in the Nokia Qt LGPL Exception
       
    26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
       
    27 **
       
    28 ** If you have questions regarding the use of this file, please contact
       
    29 ** Nokia at qt-info@nokia.com.
       
    30 **
       
    31 **
       
    32 **
       
    33 **
       
    34 **
       
    35 **
       
    36 **
       
    37 **
       
    38 ** $QT_END_LICENSE$
       
    39 **
       
    40 ****************************************************************************/
       
    41 
       
    42 #include "qmailcodec.h"
       
    43 #include "qmaillog.h"
       
    44 #include <QIODevice>
       
    45 #include <QTextCodec>
       
    46 #include <QtDebug>
       
    47 #include <ctype.h>
       
    48 
       
    49 // Allow these values to be reduced from test harness code:
       
    50 int QTOPIAMAIL_EXPORT MaxCharacters = QMailCodec::ChunkCharacters;
       
    51 // Must be an even multiple of 4:
       
    52 int QTOPIAMAIL_EXPORT Base64MaxLineLength = 76;
       
    53 // Can be any number:
       
    54 int QTOPIAMAIL_EXPORT QuotedPrintableMaxLineLength = 74;
       
    55 
       
    56 
       
    57 /*!
       
    58   \class QMailCodec
       
    59 
       
    60   \brief The QMailCodec class provides mechanisms for encoding and decoding between 7-bit ASCII strings
       
    61   and arbitrary octet sequences.
       
    62 
       
    63   \ingroup messaginglibrary
       
    64 
       
    65   Messages transferred via the SMTP protocol must be encoded in 7-bit ASCII characters, even though
       
    66   their contents are typically composed in sequences of 8-bit octets.  The QMailCodec class provides
       
    67   an interface through which data can be easily converted between an 8-bit octet sequence and
       
    68   a 7-bit ASCII character sequence.
       
    69 
       
    70   QMailCodec is an abstract class; in order to perform a coding operation, a derived class
       
    71   must be used that provides a policy for mapping 8-bit data to and from 7-bit ASCII characters.
       
    72   This policy is implemented by overriding the encodeChunk() and decodeChunk() virtual functions.
       
    73 
       
    74   Using the QMailCodec interface, data can be encoded or decoded from an input QDataStream to an 
       
    75   output QDataStream, or for convenience, from an input QByteArray to an output QByteArray.
       
    76 
       
    77   If the data to be encoded is in unicode form, then the QMailCodec interface can be used to
       
    78   convert the data to ASCII via an intermediate QTextCodec, which converts the incoming text
       
    79   to a sequence of octets.  The QTextCodec used is specified by the name of the encoding
       
    80   produced, or that decoded when decoding an ASCII input sequence.  QMailCodec provides functions 
       
    81   to encode from a QTextStream to a QDataStream, and to decode from a QDataStream to a QTextStream.
       
    82   For convenience, it is also possible to encode a QString to a QByteArray, and to decode a 
       
    83   QByteArray to a QString.
       
    84 
       
    85   \sa QDataStream, QTextStream, QTextCodec
       
    86 */
       
    87 
       
    88 /*!
       
    89     \fn void QMailCodec::encodeChunk(QDataStream& out, const unsigned char* input, int length, bool finalChunk)
       
    90 
       
    91     Overridden by derived classes to perform an encoding operation.  The implementation function
       
    92     must encode \a length 8-bit octets at the location \a input, writing the resulting ASCII characters 
       
    93     to the stream \a out.  If \a finalChunk is false, further calls will be made to encodeChunk()
       
    94     with continued input data.  Otherwise, the encoding operation is complete.
       
    95 */
       
    96 
       
    97 /*!
       
    98     \fn void QMailCodec::decodeChunk(QDataStream& out, const char* input, int length, bool finalChunk)
       
    99 
       
   100     Overridden by derived classes to perform a decoding operation.  The implementation function
       
   101     must decode \a length ASCII characters at the location \a input, writing the resulting octets
       
   102     to the stream \a out.  If \a finalChunk is false, further calls will be made to decodeChunk()
       
   103     with continued input data.  Otherwise, the decoding operation is complete.
       
   104 */
       
   105 
       
   106 /*!
       
   107     Destroys a QMailCodec instance.
       
   108 */
       
   109 QMailCodec::~QMailCodec()
       
   110 {
       
   111 }
       
   112 
       
   113 /*!
       
   114     \fn QMailCodec::name() const
       
   115 
       
   116     Returns a string that identifies the subclass of QMailCodec that this instance belongs to.
       
   117 */
       
   118 
       
   119 static void enumerateCodecs()
       
   120 {
       
   121     static bool enumerated = false;
       
   122 
       
   123     if (!enumerated)
       
   124     {
       
   125         qWarning() << "Available codecs:";
       
   126         foreach (const QByteArray& codec, QTextCodec::availableCodecs())
       
   127             qWarning() << "  " << codec;
       
   128 
       
   129         enumerated = true;
       
   130     }
       
   131 }
       
   132 
       
   133 static QTextCodec* codecForName(const QByteArray& charset, bool translateAscii = true)
       
   134 {
       
   135     QByteArray encoding(charset.toLower());
       
   136 
       
   137     if (!encoding.isEmpty())
       
   138     {
       
   139         int index;
       
   140 
       
   141         if (translateAscii && encoding.contains("ascii")) 
       
   142         {
       
   143             // We'll assume the text is plain ASCII, to be extracted to Latin-1
       
   144             encoding = "ISO-8859-1";
       
   145         }
       
   146         else if ((index = encoding.indexOf('*')) != -1)
       
   147         {
       
   148             // This charset specification includes a trailing language specifier
       
   149             encoding = encoding.left(index);
       
   150         }
       
   151 
       
   152         QTextCodec* codec = QTextCodec::codecForName(encoding);
       
   153         if (!codec)
       
   154         {
       
   155             qWarning() << "QMailCodec::codecForName - Unable to find codec for charset" << encoding;
       
   156             enumerateCodecs();
       
   157         }
       
   158 
       
   159         return codec;
       
   160     }
       
   161 
       
   162     return 0;
       
   163 }
       
   164 
       
   165 /*!
       
   166     Writes the data read from the stream \a in to the stream \a out, as a sequence 
       
   167     of 7-bit ASCII characters.  The unicode characters read from \a in are first 
       
   168     encoded to the text encoding \a charset.
       
   169 
       
   170     \sa QTextCodec::codecForName()
       
   171 */
       
   172 void QMailCodec::encode(QDataStream& out, QTextStream& in, const QString& charset)
       
   173 {
       
   174     if (QTextCodec* codec = codecForName(charset.toLatin1()))
       
   175     {
       
   176         while (!in.atEnd())
       
   177         {
       
   178             QString chunk = in.read(MaxCharacters);
       
   179             QByteArray charsetEncoded = codec->fromUnicode(chunk);
       
   180 
       
   181             encodeChunk(out, 
       
   182                         reinterpret_cast<const unsigned char*>(charsetEncoded.constData()), 
       
   183                         charsetEncoded.length(),
       
   184                         in.atEnd());
       
   185         }
       
   186     }
       
   187 }
       
   188 
       
   189 /*!
       
   190     Writes the data read from the stream \a in to the stream \a out, converting from 
       
   191     a sequence of 7-bit ASCII characters.  The characters read from \a in are 
       
   192     decoded from the text encoding \a charset to unicode.
       
   193 
       
   194     \sa QTextCodec::codecForName()
       
   195 */
       
   196 void QMailCodec::decode(QTextStream& out, QDataStream& in, const QString& charset)
       
   197 {
       
   198     if (QTextCodec* codec = codecForName(charset.toLatin1()))
       
   199     {
       
   200         QByteArray decoded;
       
   201         {
       
   202             QDataStream decodedStream(&decoded, QIODevice::WriteOnly);
       
   203             
       
   204             char* buffer = new char[MaxCharacters];
       
   205             while (!in.atEnd())
       
   206             {
       
   207                 int length = in.readRawData(buffer, MaxCharacters);
       
   208 
       
   209                 // Allow for decoded data to be twice the size without reallocation
       
   210                 decoded.reserve(decoded.size() + (MaxCharacters * 2));
       
   211 
       
   212                 decodeChunk(decodedStream, buffer, length, in.atEnd());
       
   213             }
       
   214             delete [] buffer;
       
   215         }
       
   216 
       
   217         // This is an unfortunately-necessary copy operation; we should investigate
       
   218         // modifying QTextCodec to support a stream interface
       
   219         QString unicode = codec->toUnicode(decoded);
       
   220         out << unicode;
       
   221         out.flush();
       
   222     }
       
   223 }
       
   224 
       
   225 /*!
       
   226     Writes the data read from the stream \a in to the stream \a out, as a sequence 
       
   227     of 7-bit ASCII characters.
       
   228 */
       
   229 void QMailCodec::encode(QDataStream& out, QDataStream& in)
       
   230 {
       
   231     char* buffer = new char[MaxCharacters];
       
   232     while (!in.atEnd())
       
   233     {
       
   234         int length = in.readRawData(buffer, MaxCharacters);
       
   235 
       
   236         encodeChunk(out, reinterpret_cast<unsigned char*>(buffer), length, in.atEnd());
       
   237     }
       
   238     delete [] buffer;
       
   239 }
       
   240 
       
   241 /*!
       
   242     Writes the data read from the stream \a in to the stream \a out, converting from 
       
   243     a sequence of 7-bit ASCII characters.
       
   244 */
       
   245 void QMailCodec::decode(QDataStream& out, QDataStream& in)
       
   246 {
       
   247     char* buffer = new char[MaxCharacters];
       
   248     while (!in.atEnd())
       
   249     {
       
   250         int length = in.readRawData(buffer, MaxCharacters);
       
   251 
       
   252         decodeChunk(out, buffer, length, in.atEnd());
       
   253     }
       
   254     delete [] buffer;
       
   255 }
       
   256 
       
   257 /*!
       
   258     Writes the data read from the stream \a in to the stream \a out, without conversion.
       
   259 */
       
   260 void QMailCodec::copy(QDataStream& out, QDataStream& in)
       
   261 {
       
   262     char* buffer = new char[MaxCharacters];
       
   263     while (!in.atEnd())
       
   264     {
       
   265         int length = in.readRawData(buffer, MaxCharacters);
       
   266         out.writeRawData(buffer, length);
       
   267     }
       
   268     delete [] buffer;
       
   269 }
       
   270 
       
   271 /*!
       
   272     Writes the data read from the stream \a in to the stream \a out, without conversion.
       
   273 */
       
   274 void QMailCodec::copy(QTextStream& out, QTextStream& in)
       
   275 {
       
   276     while (!in.atEnd())
       
   277     {
       
   278         QString input = in.read(MaxCharacters);
       
   279         out << input;
       
   280     }
       
   281 }
       
   282 
       
   283 /*!
       
   284     Returns a QByteArray containing the string \a input, encoded to the text encoding \a charset 
       
   285     and then to a sequence of 7-bit ASCII characters.
       
   286 
       
   287     \sa QTextCodec::codecForName()
       
   288 */
       
   289 QByteArray QMailCodec::encode(const QString& input, const QString& charset)
       
   290 {
       
   291     QByteArray result;
       
   292     {
       
   293         QDataStream out(&result, QIODevice::WriteOnly);
       
   294 
       
   295         // We can't currently guarantee that this is safe - we should investigate modifying
       
   296         // QTextStream to support a read-only interface...
       
   297         QTextStream in(const_cast<QString*>(&input), QIODevice::ReadOnly);
       
   298 
       
   299         encode(out, in, charset);
       
   300     }
       
   301 
       
   302     return result;
       
   303 }
       
   304 
       
   305 /*!
       
   306     Returns a QString containing characters decoded from the text encoding \a charset, which 
       
   307     are decoded from the sequence of 7-bit ASCII characters read from \a input. 
       
   308 
       
   309     \sa QTextCodec::codecForName()
       
   310 */
       
   311 QString QMailCodec::decode(const QByteArray& input, const QString& charset)
       
   312 {
       
   313     QString result;
       
   314     {
       
   315         QTextStream out(&result, QIODevice::WriteOnly);
       
   316         QDataStream in(input);
       
   317         decode(out, in, charset);
       
   318     }
       
   319 
       
   320     return result;
       
   321 }
       
   322 
       
   323 /*!
       
   324     Returns a QByteArray containing the octets from \a input, encoded to a sequence of 
       
   325     7-bit ASCII characters.
       
   326 */
       
   327 QByteArray QMailCodec::encode(const QByteArray& input)
       
   328 {
       
   329     QByteArray result;
       
   330     {
       
   331         QDataStream out(&result, QIODevice::WriteOnly);
       
   332         QDataStream in(input);
       
   333 
       
   334         encode(out, in);
       
   335     }
       
   336 
       
   337     return result;
       
   338 }
       
   339 
       
   340 /*!
       
   341     Returns a QByteArray containing the octets decoded from the sequence of 7-bit ASCII
       
   342     characters in \a input.
       
   343 */
       
   344 QByteArray QMailCodec::decode(const QByteArray& input)
       
   345 {
       
   346     QByteArray result;
       
   347     {
       
   348         QDataStream out(&result, QIODevice::WriteOnly);
       
   349         QDataStream in(input);
       
   350 
       
   351         decode(out, in);
       
   352     }
       
   353 
       
   354     return result;
       
   355 }
       
   356 
       
   357 
       
   358 // ASCII character values used throughout
       
   359 const unsigned char MinPrintableRange = 0x20;
       
   360 const unsigned char MaxPrintableRange = 0x7e;
       
   361 const unsigned char HorizontalTab = 0x09;
       
   362 const unsigned char LineFeed = 0x0a;
       
   363 const unsigned char FormFeed = 0x0c;
       
   364 const unsigned char CarriageReturn = 0x0d;
       
   365 const unsigned char Space = 0x20;
       
   366 const unsigned char Equals = 0x3d;
       
   367 const unsigned char ExclamationMark = 0x21;
       
   368 const unsigned char Asterisk = 0x2a;
       
   369 const unsigned char Plus = 0x2b;
       
   370 const unsigned char Minus = 0x2d;
       
   371 const unsigned char Slash = 0x2f;
       
   372 const unsigned char Underscore = 0x5f;
       
   373 
       
   374 // Static data and functions for Base 64 codec
       
   375 static const char Base64Characters[64 + 1] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
       
   376 static const unsigned char* Base64Values = reinterpret_cast<const unsigned char*>(Base64Characters);
       
   377 static const unsigned char Base64PaddingByte = 0x3d;
       
   378 
       
   379 static inline unsigned char base64Index(const char ascii)
       
   380 {
       
   381     if (ascii >= 'A' && ascii <= 'Z')
       
   382         return (ascii - 'A');
       
   383     if (ascii >= 'a' && ascii <= 'z')
       
   384         return (ascii - 'a') + 26;
       
   385     if (ascii >= '0' && ascii <= '9')
       
   386         return (ascii - '0') + 52;
       
   387     if (ascii == '+')
       
   388         return 62;
       
   389     if (ascii == '/')
       
   390         return 63;
       
   391     if (ascii == Base64PaddingByte)
       
   392         return 64;
       
   393     return 65;
       
   394 }
       
   395 
       
   396 
       
   397 /*!
       
   398   \class QMailBase64Codec
       
   399 
       
   400   \brief The QMailBase64Codec class encodes or decodes between 8-bit data and 7-bit ASCII, using the Base64
       
   401   character mapping scheme.
       
   402 
       
   403   \ingroup messaginglibrary
       
   404 
       
   405   The Base64 character mapping scheme maps arbitrary 8-bit values into a range of 64 printable 
       
   406   characters from the 7-bit ASCII set.  The mapping scheme used is defined in 
       
   407   \l{http://www.ietf.org/rfc/rfc2045.txt} {RFC 2045} (Multipurpose Internet Mail Extensions Part One). 
       
   408   This encoding is also defined as the '"B" encoding' for 'encoded words' in
       
   409   \l{http://www.ietf.org/rfc/rfc2047.txt} {RFC 2047} (Multipurpose Internet Mail Extensions Part Three). 
       
   410 
       
   411   The Base64 scheme encodes every incoming octet regardless of its original value, and thus 
       
   412   produces the same ratio of output length to input length for any input data sequence.  Since
       
   413   Base64 encodes four output characters for every three input octets, it produces a 33% 
       
   414   increase in stream size when encoding.
       
   415 
       
   416   An instance of QMailBase64Codec contains state information about the encoding or decoding
       
   417   operation it performs, so an instance should be used for a single coding operation only:
       
   418 
       
   419   \code
       
   420   QString textData = acquireInput();
       
   421 
       
   422   // Encode the string data to a UTF-8 byte sequence, and then encode to Base64
       
   423   QMailBase64Codec encoder;
       
   424   QByteArray base64Data = encoder.encode(textData, "UTF-8");
       
   425   \endcode
       
   426 
       
   427   \sa QMailCodec
       
   428 */
       
   429 
       
   430 /*!
       
   431     \enum QMailBase64Codec::ContentType
       
   432 
       
   433     This enumerated type is used to specify whether content is textual data or binary data. 
       
   434 
       
   435     \value Text     The data is textual data; newline sequences within the data will be converted during coding.
       
   436     \value Binary   The data is not textual, and does not contain newline sequences.
       
   437 */
       
   438 
       
   439 /*!
       
   440     Constructs a codec object for coding to or from Base64 encoding.
       
   441 
       
   442     If \a content is QMailBase64Codec::Text, then newline sequences will be converted
       
   443     between the local representation (for example, 0x0A on Unix) and the transmission standard
       
   444     representation (0x0D 0x0A). Otherwise, the data will be coded without modification.
       
   445 
       
   446     The maximum number of encoded characters per output line can be specified as \a maximumLineLength.  
       
   447     If not specified, or specified to a non-positive value, a default value will be used.
       
   448 */
       
   449 QMailBase64Codec::QMailBase64Codec(ContentType content, int maximumLineLength)
       
   450     : _content(content),
       
   451       _maximumLineLength(maximumLineLength),
       
   452       _lastChar(0)
       
   453 {
       
   454     // Our buffer output iterators - initially at start of buffer
       
   455     _encodeBufferOut = _encodeBuffer;
       
   456     _decodeBufferOut = _decodeBuffer;
       
   457 
       
   458     // Each encoded line will contain 76 output chars - 57 input chars
       
   459     if (_maximumLineLength <= 0)
       
   460         _maximumLineLength = Base64MaxLineLength;
       
   461     _encodeLineCharsRemaining = (_maximumLineLength / 4 * 3);
       
   462 
       
   463     // Count the number of padding characters encountered during decode
       
   464     _decodePaddingCount = 0;
       
   465 }
       
   466 
       
   467 /*! \reimp */
       
   468 QString QMailBase64Codec::name() const
       
   469 {
       
   470     return "QMailBase64Codec";
       
   471 }
       
   472 
       
   473 /*! \internal */
       
   474 void QMailBase64Codec::encodeChunk(QDataStream& out, const unsigned char* it, int length, bool finalChunk)
       
   475 {
       
   476     unsigned char* bufferEnd = _encodeBuffer + 3;
       
   477 
       
   478     // Set the input pointers relative to this input
       
   479     const unsigned char* lineEnd = it + _encodeLineCharsRemaining;
       
   480     const unsigned char* const end = it + length;
       
   481 
       
   482     while (it != end)
       
   483     {
       
   484         bool trailingLF = false;
       
   485 
       
   486         const unsigned char input = *it++;
       
   487         if ((input == CarriageReturn || input == LineFeed) && (_content == Text))
       
   488         {
       
   489             if (_lastChar == CarriageReturn && input == LineFeed)
       
   490             {
       
   491                 // We have already encoded this character-sequence
       
   492 
       
   493                 // We can accept one more input character than accounted for
       
   494                 lineEnd += 1;
       
   495             }
       
   496             else 
       
   497             {
       
   498                 // We must replace this character with ASCII CRLF
       
   499                 *_encodeBufferOut++ = CarriageReturn;
       
   500                 if (_encodeBufferOut != bufferEnd)
       
   501                 {
       
   502                     *_encodeBufferOut++ = LineFeed;
       
   503                 }
       
   504                 else 
       
   505                 {
       
   506                     trailingLF = true;
       
   507                 }
       
   508             
       
   509                 // We can accept one fewer input character than expected, now
       
   510                 lineEnd -= 1;
       
   511             }
       
   512 
       
   513             _lastChar = input;
       
   514         }
       
   515         else
       
   516             *_encodeBufferOut++ = input;
       
   517 
       
   518         if (_encodeBufferOut == bufferEnd)
       
   519         {
       
   520             // We have buffered 3 input bytes - write them out as four output bytes
       
   521             out << Base64Values[(_encodeBuffer[0] >> 2) & 0x3f];
       
   522             out << Base64Values[(((_encodeBuffer[0] & 0x03) << 4) | (_encodeBuffer[1] >> 4)) & 0x3f];
       
   523             out << Base64Values[(((_encodeBuffer[1] & 0x0f) << 2) | (_encodeBuffer[2] >> 6)) & 0x3f];
       
   524             out << Base64Values[_encodeBuffer[2] & 0x3f];
       
   525 
       
   526             _encodeBufferOut = _encodeBuffer;
       
   527             if ((it >= lineEnd) && ((it != end) || !finalChunk))
       
   528             {
       
   529                 // Insert an ASCII CRLF sequence
       
   530                 out << static_cast<unsigned char>(CarriageReturn) << static_cast<unsigned char>(LineFeed);
       
   531                 lineEnd += (_maximumLineLength / 4 * 3);
       
   532             }
       
   533         }
       
   534 
       
   535         if (trailingLF)
       
   536         {
       
   537             *_encodeBufferOut++ = LineFeed;
       
   538         }
       
   539     }
       
   540 
       
   541     if (finalChunk)
       
   542     {
       
   543         int bufferedBytesRemaining = _encodeBufferOut - _encodeBuffer;
       
   544         if (bufferedBytesRemaining > 0)
       
   545         {
       
   546             // We have some data still buffered - pad buffer with zero bits
       
   547             *_encodeBufferOut = 0;
       
   548 
       
   549             out << Base64Values[(_encodeBuffer[0] >> 2) & 0x3f];
       
   550             out << Base64Values[(((_encodeBuffer[0] & 0x03) << 4) | (_encodeBuffer[1] >> 4)) & 0x3f];
       
   551 
       
   552             // Indicate unused bytes with the padding character
       
   553             if (bufferedBytesRemaining == 1)
       
   554             {
       
   555                 out << Base64PaddingByte;
       
   556                 out << Base64PaddingByte;
       
   557             }
       
   558             else // must be two
       
   559             {
       
   560                 out << Base64Values[(((_encodeBuffer[1] & 0x0f) << 2) | (_encodeBuffer[2] >> 6)) & 0x3f];
       
   561                 out << Base64PaddingByte;
       
   562             }
       
   563         }
       
   564     }
       
   565     else
       
   566     {
       
   567         // Leave the buffer intact, and adjust the line char count
       
   568         _encodeLineCharsRemaining = (lineEnd - it);
       
   569     }
       
   570 }
       
   571 
       
   572 /*! \internal */
       
   573 void QMailBase64Codec::decodeChunk(QDataStream& out, const char* it, int length, bool finalChunk)
       
   574 {
       
   575     unsigned char* bufferEnd = _decodeBuffer + 4;
       
   576 
       
   577     const char* const end = it + length;
       
   578     while (it != end)
       
   579     {
       
   580         // Convert each character to the index value
       
   581         *_decodeBufferOut = base64Index(*it++);
       
   582         if (*_decodeBufferOut == 64)
       
   583             ++_decodePaddingCount;
       
   584         if (*_decodeBufferOut <= 64)
       
   585             ++_decodeBufferOut;
       
   586 
       
   587         if (_decodeBufferOut == bufferEnd)
       
   588         {
       
   589             // We have buffered 4 input characters - write them out as three output bytes
       
   590             // unless some of them are padding
       
   591 
       
   592             unsigned char decoded[3] = { 0 };
       
   593             decoded[0] = static_cast<unsigned char>((_decodeBuffer[0] << 2) | ((_decodeBuffer[1] >> 4) & 0x03));
       
   594             decoded[1] = static_cast<unsigned char>((_decodeBuffer[1] << 4) | ((_decodeBuffer[2] >> 2) & 0x0f));
       
   595             decoded[2] = static_cast<unsigned char>(((_decodeBuffer[2] & 0x03) << 6) | (_decodeBuffer[3] & 0x3f));
       
   596 
       
   597             int remainingChars = (3 - _decodePaddingCount);
       
   598             for (int i = 0; i < remainingChars; ++i)
       
   599             {
       
   600                 if ((decoded[i] == CarriageReturn || decoded[i] == LineFeed) && (_content == Text))
       
   601                 {
       
   602                     if (_lastChar == CarriageReturn && decoded[i] == LineFeed)
       
   603                     {
       
   604                         // We have already processed this sequence
       
   605                     }
       
   606                     else
       
   607                     {
       
   608                         // We should output the local newline sequence, but we can't
       
   609                         // because we don't know what it is, and C++ translation-from-\n will
       
   610                         // only work if the stream is a file...
       
   611                         out << static_cast<unsigned char>('\n');
       
   612                     }
       
   613 
       
   614                     _lastChar = decoded[i];
       
   615                 }
       
   616                 else
       
   617                     out << decoded[i];
       
   618             }
       
   619 
       
   620             _decodeBufferOut = _decodeBuffer;
       
   621         }
       
   622     }
       
   623 
       
   624     if (finalChunk)
       
   625     {
       
   626         // There should always be an even multiple of 4 input bytes
       
   627         int bufferedBytesRemaining = _decodeBufferOut - _decodeBuffer;
       
   628         if (bufferedBytesRemaining > 0)
       
   629         {
       
   630             qWarning() << "Huh? bytes remaining:" << bufferedBytesRemaining;
       
   631         }
       
   632     }
       
   633 }
       
   634 
       
   635 
       
   636 // Static data and functions for Quoted-Prinatable codec
       
   637 static const unsigned char NilPreceding = 0x7f;
       
   638 static const char QuotedPrintableCharacters[16 + 1] = "0123456789ABCDEF";
       
   639 static const unsigned char* QuotedPrintableValues = reinterpret_cast<const unsigned char*>(QuotedPrintableCharacters);
       
   640 
       
   641 static bool requiresEscape(unsigned char input, QMailQuotedPrintableCodec::ConformanceType conformance, int charsRemaining)
       
   642 {
       
   643     // For both, we need to escape '=' and anything unprintable
       
   644     bool escape = ((input > MaxPrintableRange) || 
       
   645                    ((input < MinPrintableRange) && (input != HorizontalTab) && (input != FormFeed)) ||
       
   646                    (input == Equals));
       
   647 
       
   648     // For RFC 2047, we need to escape '?', '_', ' ' & '\t'
       
   649     // In fact, since the output may be used in a header field 'word', then the only characters
       
   650     // that can be used un-escaped are: alphanumerics, '!', '*', '+' '-', '/' and '_'
       
   651     if (!escape && (conformance == QMailQuotedPrintableCodec::Rfc2047))
       
   652     {
       
   653         // We can also ignore space, since it will become an underscore
       
   654         if ((input != ExclamationMark) && (input != Asterisk) && (input != Plus) && 
       
   655             (input != Minus) && (input != Slash) && (input != Underscore) && (input != Space))
       
   656         {
       
   657             escape = !isalnum(input);
       
   658         }
       
   659     }
       
   660 
       
   661     if (!escape && (input == HorizontalTab || input == Space))
       
   662     {
       
   663         // The (potentially) last whitespace character on a line must be escaped
       
   664         if (charsRemaining <= 3)
       
   665             escape = true;
       
   666     }
       
   667 
       
   668     return escape;
       
   669 }
       
   670 
       
   671 static inline void encodeCharacter(QDataStream& out, unsigned char value)
       
   672 {
       
   673     out << static_cast<unsigned char>(Equals);
       
   674     out << QuotedPrintableValues[value >> 4];
       
   675     out << QuotedPrintableValues[value & 0x0f];
       
   676 }
       
   677 
       
   678 static inline void lineBreak(QDataStream& out, int* _encodeLineCharsRemaining, int maximumLineLength)
       
   679 {
       
   680     out << static_cast<unsigned char>(Equals);
       
   681     out << static_cast<unsigned char>(LineFeed);
       
   682 
       
   683     *_encodeLineCharsRemaining = maximumLineLength;
       
   684 }
       
   685 
       
   686 static inline unsigned char decodeCharacter(unsigned char value)
       
   687 {
       
   688     if ((value >= 0x30) && (value <= 0x39))
       
   689         return (value - 0x30);
       
   690 
       
   691     if ((value >= 0x41) && (value <= 0x46))
       
   692         return ((value - 0x41) + 10);
       
   693 
       
   694     if ((value >= 0x61) && (value <= 0x66))
       
   695         return ((value - 0x61) + 10);
       
   696 
       
   697     return 0;
       
   698 }
       
   699 
       
   700 
       
   701 /*!
       
   702   \class QMailQuotedPrintableCodec
       
   703 
       
   704   \brief The QMailQuotedPrintableCodec class encodes or decodes between 8-bit data and 7-bit ASCII, 
       
   705   using the 'quoted printable' character mapping scheme.
       
   706 
       
   707   \ingroup messaginglibrary
       
   708 
       
   709   The 'quoted printable' character mapping scheme maps arbitrary 8-bit values into 7-bit ASCII
       
   710   characters, by replacing values that cannot be directly represented with an escape sequence.
       
   711   The mapping scheme used is defined in 
       
   712   \l{http://www.ietf.org/rfc/rfc2045.txt} {RFC 2045} (Multipurpose Internet Mail Extensions Part One). 
       
   713   A minor variation on the scheme is defined as the '"Q" encoding' for 'encoded words' in
       
   714   \l{http://www.ietf.org/rfc/rfc2047.txt} {RFC 2047} (Multipurpose Internet Mail Extensions Part Three). 
       
   715 
       
   716   The 'quoted printable' scheme encodes only those incoming octet values that cannot be directly
       
   717   represented in ASCII, by replacing the input octet with a three-character sequence that encodes 
       
   718   the numeric value of the original octet.  Therefore, the ratio of input length to output length 
       
   719   for any input data sequence depends on the percentage of the input that corresponds to ASCII 
       
   720   values, with ASCII-like encodings producing only small increases.  With an input data encoding 
       
   721   such as Latin-1 (ISO-8859-1), the output maintains a reasonable degree of human-readability.
       
   722 
       
   723   An instance of QMailQuotedPrintableCodec contains state information about the encoding or decoding
       
   724   operation it performs, so an instance should be used for a single coding operation only:
       
   725 
       
   726   \code
       
   727   QByteArray asciiData = acquireInput();
       
   728 
       
   729   // We know the data is text in Latin-1 encoding, so decode the data from 
       
   730   // quoted printable ASCII encoding, and then decode from Latin-1 to unicode
       
   731   QMailQuotedPrintableCodec decoder(QMailQuotedPrintableCodec::Text, QMailQuotedPrintableCodec::Rfc2045);
       
   732   QString textData = decoder.decode(asciiData, "ISO-8859-1");
       
   733   \endcode
       
   734 
       
   735   \sa QMailCodec
       
   736 */
       
   737 
       
   738 /*!
       
   739     \enum QMailQuotedPrintableCodec::ContentType
       
   740 
       
   741     This enumerated type is used to specify whether content is textual data or binary data. 
       
   742 
       
   743     \value Text     The data is textual data; newline sequences within the data will be converted during coding.
       
   744     \value Binary   The data is not textual, and does not contain newline sequences.
       
   745 */
       
   746 
       
   747 /*!
       
   748     \enum QMailQuotedPrintableCodec::ConformanceType
       
   749 
       
   750     This enumerated type is used to specify which RFC the coding operation should conform to.
       
   751 
       
   752     \value Rfc2045  The coding should be performed according to the requirements of RFC 2045.
       
   753     \value Rfc2047  The coding should be performed according to the requirements of RFC 2047's '"Q" encoding'.
       
   754 */
       
   755 
       
   756 /*!
       
   757     Constructs a codec object for coding data of type \a content, using the mapping scheme
       
   758     specified by the requirements of \a conformance.
       
   759 
       
   760     If \a content is QMailQuotedPrintableCodec::Text, then newline sequences will be converted
       
   761     between the local representation (for example, 0x0A on Unix) and the transmission standard
       
   762     representation (0x0D 0x0A). Otherwise, the data will be coded without modification.
       
   763 
       
   764     If \a conformance is QMailQuotedPrintableCodec::Rfc2047, then coding will use the mapping
       
   765     scheme of the 
       
   766     \l{http://www.ietf.org/rfc/rfc2047.txt} {RFC 2047} '"Q" encoding'; otherwise the scheme defined in 
       
   767     \l{http://www.ietf.org/rfc/rfc2045.txt} {RFC 2045} will be used.
       
   768 
       
   769     The maximum number of encoded output characters per line can be specified as \a maximumLineLength.  
       
   770     If not specified, or specified to a non-positive value, a default value will be used.
       
   771 */
       
   772 QMailQuotedPrintableCodec::QMailQuotedPrintableCodec(ContentType content, ConformanceType conformance, int maximumLineLength)
       
   773     : _content(content),
       
   774       _conformance(conformance),
       
   775       _maximumLineLength(maximumLineLength)
       
   776 {
       
   777     // We're allowed up to 76 chars per output line, but the RFC isn't really clear on 
       
   778     // whether this includes the '=' and '\n' of a soft line break, so we'll assume they're counted
       
   779     if (_maximumLineLength <= 0)
       
   780         _maximumLineLength = QuotedPrintableMaxLineLength;
       
   781 
       
   782     _encodeLineCharsRemaining = _maximumLineLength;
       
   783     _encodeLastChar = '\0';
       
   784 
       
   785     _decodePrecedingInput = NilPreceding;
       
   786     _decodeLastChar = '\0';
       
   787 }
       
   788 
       
   789 /*! \reimp */
       
   790 QString QMailQuotedPrintableCodec::name() const
       
   791 {
       
   792     return "QMailQuotedPrintableCodec";
       
   793 }
       
   794 
       
   795 /*! \internal */
       
   796 void QMailQuotedPrintableCodec::encodeChunk(QDataStream& out, const unsigned char* it, int length, bool finalChunk)
       
   797 {
       
   798     // Set the input pointers relative to this input
       
   799     const unsigned char* const end = it + length;
       
   800 
       
   801     while (it != end)
       
   802     {
       
   803         unsigned char input = *it++;
       
   804 
       
   805         if ((input == CarriageReturn || input == LineFeed) && (_content == Text))
       
   806         {
       
   807             if (_encodeLastChar == CarriageReturn && input == LineFeed)
       
   808             {
       
   809                 // We have already encoded this character-sequence
       
   810             }
       
   811             else 
       
   812             {
       
   813                 // We must replace this character with ascii CRLF
       
   814                 out << CarriageReturn << LineFeed;
       
   815             }
       
   816 
       
   817             _encodeLastChar = input;
       
   818             _encodeLineCharsRemaining = _maximumLineLength;
       
   819             continue;
       
   820         }
       
   821 
       
   822         bool escape = requiresEscape(input, _conformance, _encodeLineCharsRemaining);
       
   823         int charsRequired = (escape ? 3 : 1);
       
   824 
       
   825         // If we can't fit this character on the line, insert a line break
       
   826         if (charsRequired > _encodeLineCharsRemaining)
       
   827         {
       
   828             lineBreak(out, &_encodeLineCharsRemaining, _maximumLineLength); 
       
   829 
       
   830             // We may no longer need the encoding after the line break
       
   831             if (input == Space || (input == HorizontalTab && _conformance != Rfc2047))
       
   832                 charsRequired = 1;
       
   833         }
       
   834 
       
   835         if (charsRequired == 1)
       
   836         {
       
   837             if (input == Space && _conformance == Rfc2047) // output space as '_'
       
   838                 out << static_cast<unsigned char>(Underscore);
       
   839             else
       
   840                 out << input;
       
   841         }
       
   842         else
       
   843             encodeCharacter(out, input);
       
   844 
       
   845         _encodeLineCharsRemaining -= charsRequired;
       
   846 
       
   847         if ((_encodeLineCharsRemaining == 0) && !(finalChunk && (it == end)))
       
   848             lineBreak(out, &_encodeLineCharsRemaining, _maximumLineLength); 
       
   849 
       
   850         _encodeLastChar = input;
       
   851     }
       
   852 
       
   853     Q_UNUSED(finalChunk)
       
   854 }
       
   855 
       
   856 /*! \internal */
       
   857 void QMailQuotedPrintableCodec::decodeChunk(QDataStream& out, const char* it, int length, bool finalChunk)
       
   858 {
       
   859     const char* const end = it + length;
       
   860 
       
   861     // The variable _decodePrecedingInput holds any unprocessed input from a previous call:
       
   862     // If '=', we've parsed only that char, otherwise, it is the hex value of the first parsed character
       
   863     if ((_decodePrecedingInput != NilPreceding) && (it != end))
       
   864     {
       
   865         unsigned char value = 0;
       
   866         if (_decodePrecedingInput == Equals)
       
   867         {
       
   868             // Get the first escaped char
       
   869             unsigned char input = *it++;
       
   870             if (input == LineFeed || input == CarriageReturn)
       
   871             {
       
   872                 // This is only a soft-line break
       
   873                 _decodePrecedingInput = NilPreceding;
       
   874             }
       
   875             else
       
   876             {
       
   877                 value = decodeCharacter(input);
       
   878                 _decodePrecedingInput = value;
       
   879             }
       
   880         }
       
   881         else
       
   882         {
       
   883             // We already have partial escaped input
       
   884             value = _decodePrecedingInput;
       
   885         }
       
   886 
       
   887         if (it != end && _decodePrecedingInput != NilPreceding)
       
   888         {
       
   889             out << static_cast<unsigned char>((value << 4) | decodeCharacter(*it++));
       
   890             _decodePrecedingInput = NilPreceding;
       
   891         }
       
   892     }
       
   893 
       
   894     while (it != end)
       
   895     {
       
   896         unsigned char input = *it++;
       
   897         if (input == Equals)
       
   898         {
       
   899             // We are in an escape sequence
       
   900             if (it == end)
       
   901             {
       
   902                 _decodePrecedingInput = Equals;
       
   903             }
       
   904             else
       
   905             {
       
   906                 input = *it++;
       
   907                 if (input == LineFeed || input == CarriageReturn)
       
   908                 {
       
   909                     // This is a soft-line break - move on
       
   910                 }
       
   911                 else
       
   912                 {
       
   913                     // This is an encoded character
       
   914                     unsigned char value = decodeCharacter(input);
       
   915 
       
   916                     if (it == end)
       
   917                     {
       
   918                         _decodePrecedingInput = value;
       
   919                     }
       
   920                     else
       
   921                     {
       
   922                         out << static_cast<unsigned char>((value << 4) | decodeCharacter(*it++));
       
   923                     }
       
   924                 }
       
   925             }
       
   926         }
       
   927         else 
       
   928         {
       
   929             if ((input == CarriageReturn || input == LineFeed) && (_content == Text))
       
   930             {
       
   931                 if (_decodeLastChar == CarriageReturn && input == LineFeed)
       
   932                 {
       
   933                     // We have already processed this sequence
       
   934                 }
       
   935                 else
       
   936                 {
       
   937                     // We should output the local newline sequence, but we can't
       
   938                     // because we don't know what it is, and C++ translation-from-\n will
       
   939                     // only work if the stream is a file...
       
   940                     out << static_cast<unsigned char>('\n');
       
   941                 }
       
   942             }
       
   943             else if (input == Underscore && _conformance == Rfc2047)
       
   944                 out << static_cast<unsigned char>(Space);
       
   945             else
       
   946                 out << input;
       
   947         }
       
   948 
       
   949         _decodeLastChar = input;
       
   950     }
       
   951 
       
   952     if (finalChunk && _decodePrecedingInput != NilPreceding)
       
   953     {
       
   954         qWarning() << "Huh? unfinished escape sequence...";
       
   955     }
       
   956 }
       
   957 
       
   958 static void writeStream(QDataStream& out, const char* it, int length)
       
   959 {
       
   960     int totalWritten = 0;
       
   961     while (totalWritten < length)
       
   962     {
       
   963         int bytesWritten = out.writeRawData(it + totalWritten, length - totalWritten);
       
   964         if (bytesWritten == -1)
       
   965             return;
       
   966 
       
   967         totalWritten += bytesWritten;
       
   968     }
       
   969 }
       
   970 
       
   971 /*!
       
   972   \class QMailPassThroughCodec
       
   973 
       
   974   \brief The QMailPassThroughCodec class uses the QMailCodec interface to move data between streams
       
   975   without coding or decoding.
       
   976 
       
   977   \ingroup messaginglibrary
       
   978 
       
   979   The QMailPassThroughCodec allows client code to use the same QMailCodec interface to convert data between
       
   980   different ASCII encodings, or no encoding at all, without having to be aware of the details involved.
       
   981 
       
   982   The pass-through codec is primarily useful when communicating with SMTP servers supporting the
       
   983   \l{http://www.ietf.org/rfc/rfc1652.txt} {RFC 1652} (8BITMIME) extension, which permits the exchange
       
   984   of data without coding via 7-bit ASCII.  
       
   985 
       
   986   A QMailPassThroughCodec can be instantiated directly, but is more likely to be used polymorphically:
       
   987 
       
   988   \code
       
   989   // Get an object to perform the encoding required for the current server
       
   990   QMailCodec* encoder = getCodecForServer(currentServer());
       
   991 
       
   992   // If the codec returned is a QMailPassThroughCodec, the input data will 
       
   993   // be written to the output stream without encoding to 7-bit ASCII
       
   994   encoder->encode(outputStream, inputStream);
       
   995   \endcode
       
   996 
       
   997   \sa QMailCodec
       
   998 */
       
   999 
       
  1000 /*! \reimp */
       
  1001 QString QMailPassThroughCodec::name() const
       
  1002 {
       
  1003     return "QMailPassThroughCodec";
       
  1004 }
       
  1005 
       
  1006 /*! \internal */
       
  1007 void QMailPassThroughCodec::encodeChunk(QDataStream& out, const unsigned char* it, int length, bool finalChunk)
       
  1008 {
       
  1009     writeStream(out, reinterpret_cast<const char*>(it), length);
       
  1010 
       
  1011     Q_UNUSED(finalChunk)
       
  1012 }
       
  1013 
       
  1014 /*! \internal */
       
  1015 void QMailPassThroughCodec::decodeChunk(QDataStream& out, const char* it, int length, bool finalChunk)
       
  1016 {
       
  1017     writeStream(out, it, length);
       
  1018 
       
  1019     Q_UNUSED(finalChunk)
       
  1020 }
       
  1021 
       
  1022 
       
  1023 /*!
       
  1024   \class QMailLineEndingCodec
       
  1025 
       
  1026   \brief The QMailLineEndingCodec class encodes textual data to use CR/LF line endings required for SMTP transmission.
       
  1027 
       
  1028   \ingroup messaginglibrary
       
  1029 
       
  1030   The QMailLineEndingCodec allows client code to use the QMailCodec interface to encode textual data
       
  1031   from the local line-ending convention to the CR/LF convention required for SMTP transmission.  The 
       
  1032   codec will convert from single carriage return or single line feed line-endings to CR/LF pairs, or 
       
  1033   will preserve data already using the correct encoding.
       
  1034 
       
  1035   Decoded data will have CR/LF pairs converted to \c \n.
       
  1036 
       
  1037   An instance of QMailLineEndingCodec contains state information about the encoding or decoding
       
  1038   operation it performs, so an instance should be used for a single coding operation only:
       
  1039 
       
  1040   \sa QMailCodec
       
  1041 */
       
  1042 
       
  1043 /*!
       
  1044     Constructs a codec object for coding text data, converting between the local line-ending
       
  1045     convention and the CR/LF line-ending sequence required for SMTP transmission.
       
  1046 */
       
  1047 QMailLineEndingCodec::QMailLineEndingCodec()
       
  1048     : _lastChar(0)
       
  1049 {
       
  1050 }
       
  1051 
       
  1052 /*! \reimp */
       
  1053 QString QMailLineEndingCodec::name() const
       
  1054 {
       
  1055     return "QMailLineEndingCodec";
       
  1056 }
       
  1057 
       
  1058 /*! \internal */
       
  1059 void QMailLineEndingCodec::encodeChunk(QDataStream& out, const unsigned char* it, int length, bool finalChunk)
       
  1060 {
       
  1061     const unsigned char* const end = it + length;
       
  1062 
       
  1063     const unsigned char* begin = it;
       
  1064     while (it != end)
       
  1065     {
       
  1066         const unsigned char input = *it;
       
  1067         if (input == CarriageReturn || input == LineFeed)
       
  1068         {
       
  1069             if (_lastChar == CarriageReturn && input == LineFeed)
       
  1070             {
       
  1071                 // We have already encoded this character-sequence; skip the input
       
  1072                 begin = (it + 1);
       
  1073             }
       
  1074             else 
       
  1075             {
       
  1076                 // Write the preceding characters
       
  1077                 if (it > begin)
       
  1078                     writeStream(out, reinterpret_cast<const char*>(begin), (it - begin));
       
  1079 
       
  1080                 // We must replace this character with ascii CRLF
       
  1081                 out << CarriageReturn << LineFeed;
       
  1082                 begin = (it + 1);
       
  1083             }
       
  1084         }
       
  1085 
       
  1086         _lastChar = input;
       
  1087         ++it;
       
  1088     }
       
  1089 
       
  1090     if (it > begin)
       
  1091     {
       
  1092         // Write the remaining characters
       
  1093         writeStream(out, reinterpret_cast<const char*>(begin), (it - begin));
       
  1094     }
       
  1095 
       
  1096     Q_UNUSED(finalChunk)
       
  1097 }
       
  1098 
       
  1099 /*! \internal */
       
  1100 void QMailLineEndingCodec::decodeChunk(QDataStream& out, const char* it, int length, bool finalChunk)
       
  1101 {
       
  1102     const char* const end = it + length;
       
  1103 
       
  1104     const char* begin = it;
       
  1105     while (it != end)
       
  1106     {
       
  1107         const char input = *it;
       
  1108         if (input == CarriageReturn || input == LineFeed)
       
  1109         {
       
  1110             if (_lastChar == CarriageReturn && input == LineFeed)
       
  1111             {
       
  1112                 // We have already processed this sequence
       
  1113                 begin = (it + 1);
       
  1114             }
       
  1115             else
       
  1116             {
       
  1117                 // Write the preceding characters
       
  1118                 if (it > begin)
       
  1119                     writeStream(out, begin, (it - begin));
       
  1120 
       
  1121                 // We should output the local newline sequence, but we can't
       
  1122                 // because we don't know what it is, and C++ translation-from-\n will
       
  1123                 // only work if the stream is a file...
       
  1124                 out << static_cast<unsigned char>('\n');
       
  1125                 begin = (it + 1);
       
  1126             }
       
  1127         }
       
  1128 
       
  1129         _lastChar = input;
       
  1130         ++it;
       
  1131     }
       
  1132 
       
  1133     if (it > begin)
       
  1134     {
       
  1135         // Write the remaining characters
       
  1136         writeStream(out, begin, (it - begin));
       
  1137     }
       
  1138 
       
  1139     Q_UNUSED(finalChunk)
       
  1140 }
       
  1141