|
1 /**************************************************************************** |
|
2 ** |
|
3 ** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies). |
|
4 ** All rights reserved. |
|
5 ** Contact: Nokia Corporation (qt-info@nokia.com) |
|
6 ** |
|
7 ** This file is part of the Qt Mobility Components. |
|
8 ** |
|
9 ** $QT_BEGIN_LICENSE:LGPL$ |
|
10 ** No Commercial Usage |
|
11 ** This file contains pre-release code and may not be distributed. |
|
12 ** You may use this file in accordance with the terms and conditions |
|
13 ** contained in the Technology Preview License Agreement accompanying |
|
14 ** this package. |
|
15 ** |
|
16 ** GNU Lesser General Public License Usage |
|
17 ** Alternatively, this file may be used under the terms of the GNU Lesser |
|
18 ** General Public License version 2.1 as published by the Free Software |
|
19 ** Foundation and appearing in the file LICENSE.LGPL included in the |
|
20 ** packaging of this file. Please review the following information to |
|
21 ** ensure the GNU Lesser General Public License version 2.1 requirements |
|
22 ** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html. |
|
23 ** |
|
24 ** In addition, as a special exception, Nokia gives you certain additional |
|
25 ** rights. These rights are described in the Nokia Qt LGPL Exception |
|
26 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package. |
|
27 ** |
|
28 ** If you have questions regarding the use of this file, please contact |
|
29 ** Nokia at qt-info@nokia.com. |
|
30 ** |
|
31 ** |
|
32 ** |
|
33 ** |
|
34 ** |
|
35 ** |
|
36 ** |
|
37 ** |
|
38 ** $QT_END_LICENSE$ |
|
39 ** |
|
40 ****************************************************************************/ |
|
41 |
|
42 #include "qmailcodec.h" |
|
43 #include "qmaillog.h" |
|
44 #include <QIODevice> |
|
45 #include <QTextCodec> |
|
46 #include <QtDebug> |
|
47 #include <ctype.h> |
|
48 |
|
49 // Allow these values to be reduced from test harness code: |
|
50 int QTOPIAMAIL_EXPORT MaxCharacters = QMailCodec::ChunkCharacters; |
|
51 // Must be an even multiple of 4: |
|
52 int QTOPIAMAIL_EXPORT Base64MaxLineLength = 76; |
|
53 // Can be any number: |
|
54 int QTOPIAMAIL_EXPORT QuotedPrintableMaxLineLength = 74; |
|
55 |
|
56 |
|
57 /*! |
|
58 \class QMailCodec |
|
59 |
|
60 \preliminary |
|
61 \brief The QMailCodec class provides mechanisms for encoding and decoding between 7-bit ASCII strings |
|
62 and arbitrary octet sequences. |
|
63 |
|
64 \ingroup messaginglibrary |
|
65 |
|
66 Messages transferred via the SMTP protocol must be encoded in 7-bit ASCII characters, even though |
|
67 their contents are typically composed in sequences of 8-bit octets. The QMailCodec class provides |
|
68 an interface through which data can be easily converted between an 8-bit octet sequence and |
|
69 a 7-bit ASCII character sequence. |
|
70 |
|
71 QMailCodec is an abstract class; in order to perform a coding operation, a derived class |
|
72 must be used that provides a policy for mapping 8-bit data to and from 7-bit ASCII characters. |
|
73 This policy is implemented by overriding the encodeChunk() and decodeChunk() virtual functions. |
|
74 |
|
75 Using the QMailCodec interface, data can be encoded or decoded from an input QDataStream to an |
|
76 output QDataStream, or for convenience, from an input QByteArray to an output QByteArray. |
|
77 |
|
78 If the data to be encoded is in unicode form, then the QMailCodec interface can be used to |
|
79 convert the data to ASCII via an intermediate QTextCodec, which converts the incoming text |
|
80 to a sequence of octets. The QTextCodec used is specified by the name of the encoding |
|
81 produced, or that decoded when decoding an ASCII input sequence. QMailCodec provides functions |
|
82 to encode from a QTextStream to a QDataStream, and to decode from a QDataStream to a QTextStream. |
|
83 For convenience, it is also possible to encode a QString to a QByteArray, and to decode a |
|
84 QByteArray to a QString. |
|
85 |
|
86 \sa QDataStream, QTextStream, QTextCodec |
|
87 */ |
|
88 |
|
89 /*! |
|
90 \fn void QMailCodec::encodeChunk(QDataStream& out, const unsigned char* input, int length, bool finalChunk) |
|
91 |
|
92 Overridden by derived classes to perform an encoding operation. The implementation function |
|
93 must encode \a length 8-bit octets at the location \a input, writing the resulting ASCII characters |
|
94 to the stream \a out. If \a finalChunk is false, further calls will be made to encodeChunk() |
|
95 with continued input data. Otherwise, the encoding operation is complete. |
|
96 */ |
|
97 |
|
98 /*! |
|
99 \fn void QMailCodec::decodeChunk(QDataStream& out, const char* input, int length, bool finalChunk) |
|
100 |
|
101 Overridden by derived classes to perform a decoding operation. The implementation function |
|
102 must decode \a length ASCII characters at the location \a input, writing the resulting octets |
|
103 to the stream \a out. If \a finalChunk is false, further calls will be made to decodeChunk() |
|
104 with continued input data. Otherwise, the decoding operation is complete. |
|
105 */ |
|
106 |
|
107 /*! |
|
108 Destroys a QMailCodec instance. |
|
109 */ |
|
110 QMailCodec::~QMailCodec() |
|
111 { |
|
112 } |
|
113 |
|
114 /*! |
|
115 \fn QMailCodec::name() const |
|
116 |
|
117 Returns a string that identifies the subclass of QMailCodec that this instance belongs to. |
|
118 */ |
|
119 |
|
120 static void enumerateCodecs() |
|
121 { |
|
122 static bool enumerated = false; |
|
123 |
|
124 if (!enumerated) |
|
125 { |
|
126 qWarning() << "Available codecs:"; |
|
127 foreach (const QByteArray& codec, QTextCodec::availableCodecs()) |
|
128 qWarning() << " " << codec; |
|
129 |
|
130 enumerated = true; |
|
131 } |
|
132 } |
|
133 |
|
134 static QTextCodec* codecForName(const QByteArray& charset, bool translateAscii = true) |
|
135 { |
|
136 QByteArray encoding(charset.toLower()); |
|
137 |
|
138 if (!encoding.isEmpty()) |
|
139 { |
|
140 int index; |
|
141 |
|
142 if (translateAscii && encoding.contains("ascii")) |
|
143 { |
|
144 // We'll assume the text is plain ASCII, to be extracted to Latin-1 |
|
145 encoding = "ISO-8859-1"; |
|
146 } |
|
147 else if ((index = encoding.indexOf('*')) != -1) |
|
148 { |
|
149 // This charset specification includes a trailing language specifier |
|
150 encoding = encoding.left(index); |
|
151 } |
|
152 |
|
153 QTextCodec* codec = QTextCodec::codecForName(encoding); |
|
154 if (!codec) |
|
155 { |
|
156 qWarning() << "QMailCodec::codecForName - Unable to find codec for charset" << encoding; |
|
157 enumerateCodecs(); |
|
158 } |
|
159 |
|
160 return codec; |
|
161 } |
|
162 |
|
163 return 0; |
|
164 } |
|
165 |
|
166 /*! |
|
167 Writes the data read from the stream \a in to the stream \a out, as a sequence |
|
168 of 7-bit ASCII characters. The unicode characters read from \a in are first |
|
169 encoded to the text encoding \a charset. |
|
170 |
|
171 \sa QTextCodec::codecForName() |
|
172 */ |
|
173 void QMailCodec::encode(QDataStream& out, QTextStream& in, const QString& charset) |
|
174 { |
|
175 if (QTextCodec* codec = codecForName(charset.toLatin1())) |
|
176 { |
|
177 while (!in.atEnd()) |
|
178 { |
|
179 QString chunk = in.read(MaxCharacters); |
|
180 QByteArray charsetEncoded = codec->fromUnicode(chunk); |
|
181 |
|
182 encodeChunk(out, |
|
183 reinterpret_cast<const unsigned char*>(charsetEncoded.constData()), |
|
184 charsetEncoded.length(), |
|
185 in.atEnd()); |
|
186 } |
|
187 } |
|
188 } |
|
189 |
|
190 /*! |
|
191 Writes the data read from the stream \a in to the stream \a out, converting from |
|
192 a sequence of 7-bit ASCII characters. The characters read from \a in are |
|
193 decoded from the text encoding \a charset to unicode. |
|
194 |
|
195 \sa QTextCodec::codecForName() |
|
196 */ |
|
197 void QMailCodec::decode(QTextStream& out, QDataStream& in, const QString& charset) |
|
198 { |
|
199 if (QTextCodec* codec = codecForName(charset.toLatin1())) |
|
200 { |
|
201 QByteArray decoded; |
|
202 { |
|
203 QDataStream decodedStream(&decoded, QIODevice::WriteOnly); |
|
204 |
|
205 char* buffer = new char[MaxCharacters]; |
|
206 while (!in.atEnd()) |
|
207 { |
|
208 int length = in.readRawData(buffer, MaxCharacters); |
|
209 |
|
210 // Allow for decoded data to be twice the size without reallocation |
|
211 decoded.reserve(decoded.size() + (MaxCharacters * 2)); |
|
212 |
|
213 decodeChunk(decodedStream, buffer, length, in.atEnd()); |
|
214 } |
|
215 delete [] buffer; |
|
216 } |
|
217 |
|
218 // This is an unfortunately-necessary copy operation; we should investigate |
|
219 // modifying QTextCodec to support a stream interface |
|
220 QString unicode = codec->toUnicode(decoded); |
|
221 out << unicode; |
|
222 out.flush(); |
|
223 } |
|
224 } |
|
225 |
|
226 /*! |
|
227 Writes the data read from the stream \a in to the stream \a out, as a sequence |
|
228 of 7-bit ASCII characters. |
|
229 */ |
|
230 void QMailCodec::encode(QDataStream& out, QDataStream& in) |
|
231 { |
|
232 char* buffer = new char[MaxCharacters]; |
|
233 while (!in.atEnd()) |
|
234 { |
|
235 int length = in.readRawData(buffer, MaxCharacters); |
|
236 |
|
237 encodeChunk(out, reinterpret_cast<unsigned char*>(buffer), length, in.atEnd()); |
|
238 } |
|
239 delete [] buffer; |
|
240 } |
|
241 |
|
242 /*! |
|
243 Writes the data read from the stream \a in to the stream \a out, converting from |
|
244 a sequence of 7-bit ASCII characters. |
|
245 */ |
|
246 void QMailCodec::decode(QDataStream& out, QDataStream& in) |
|
247 { |
|
248 char* buffer = new char[MaxCharacters]; |
|
249 while (!in.atEnd()) |
|
250 { |
|
251 int length = in.readRawData(buffer, MaxCharacters); |
|
252 |
|
253 decodeChunk(out, buffer, length, in.atEnd()); |
|
254 } |
|
255 delete [] buffer; |
|
256 } |
|
257 |
|
258 /*! |
|
259 Writes the data read from the stream \a in to the stream \a out, without conversion. |
|
260 */ |
|
261 void QMailCodec::copy(QDataStream& out, QDataStream& in) |
|
262 { |
|
263 char* buffer = new char[MaxCharacters]; |
|
264 while (!in.atEnd()) |
|
265 { |
|
266 int length = in.readRawData(buffer, MaxCharacters); |
|
267 out.writeRawData(buffer, length); |
|
268 } |
|
269 delete [] buffer; |
|
270 } |
|
271 |
|
272 /*! |
|
273 Writes the data read from the stream \a in to the stream \a out, without conversion. |
|
274 */ |
|
275 void QMailCodec::copy(QTextStream& out, QTextStream& in) |
|
276 { |
|
277 while (!in.atEnd()) |
|
278 { |
|
279 QString input = in.read(MaxCharacters); |
|
280 out << input; |
|
281 } |
|
282 } |
|
283 |
|
284 /*! |
|
285 Returns a QByteArray containing the string \a input, encoded to the text encoding \a charset |
|
286 and then to a sequence of 7-bit ASCII characters. |
|
287 |
|
288 \sa QTextCodec::codecForName() |
|
289 */ |
|
290 QByteArray QMailCodec::encode(const QString& input, const QString& charset) |
|
291 { |
|
292 QByteArray result; |
|
293 { |
|
294 QDataStream out(&result, QIODevice::WriteOnly); |
|
295 |
|
296 // We can't currently guarantee that this is safe - we should investigate modifying |
|
297 // QTextStream to support a read-only interface... |
|
298 QTextStream in(const_cast<QString*>(&input), QIODevice::ReadOnly); |
|
299 |
|
300 encode(out, in, charset); |
|
301 } |
|
302 |
|
303 return result; |
|
304 } |
|
305 |
|
306 /*! |
|
307 Returns a QString containing characters decoded from the text encoding \a charset, which |
|
308 are decoded from the sequence of 7-bit ASCII characters read from \a input. |
|
309 |
|
310 \sa QTextCodec::codecForName() |
|
311 */ |
|
312 QString QMailCodec::decode(const QByteArray& input, const QString& charset) |
|
313 { |
|
314 QString result; |
|
315 { |
|
316 QTextStream out(&result, QIODevice::WriteOnly); |
|
317 QDataStream in(input); |
|
318 decode(out, in, charset); |
|
319 } |
|
320 |
|
321 return result; |
|
322 } |
|
323 |
|
324 /*! |
|
325 Returns a QByteArray containing the octets from \a input, encoded to a sequence of |
|
326 7-bit ASCII characters. |
|
327 */ |
|
328 QByteArray QMailCodec::encode(const QByteArray& input) |
|
329 { |
|
330 QByteArray result; |
|
331 { |
|
332 QDataStream out(&result, QIODevice::WriteOnly); |
|
333 QDataStream in(input); |
|
334 |
|
335 encode(out, in); |
|
336 } |
|
337 |
|
338 return result; |
|
339 } |
|
340 |
|
341 /*! |
|
342 Returns a QByteArray containing the octets decoded from the sequence of 7-bit ASCII |
|
343 characters in \a input. |
|
344 */ |
|
345 QByteArray QMailCodec::decode(const QByteArray& input) |
|
346 { |
|
347 QByteArray result; |
|
348 { |
|
349 QDataStream out(&result, QIODevice::WriteOnly); |
|
350 QDataStream in(input); |
|
351 |
|
352 decode(out, in); |
|
353 } |
|
354 |
|
355 return result; |
|
356 } |
|
357 |
|
358 |
|
359 // ASCII character values used throughout |
|
360 const unsigned char MinPrintableRange = 0x20; |
|
361 const unsigned char MaxPrintableRange = 0x7e; |
|
362 const unsigned char HorizontalTab = 0x09; |
|
363 const unsigned char LineFeed = 0x0a; |
|
364 const unsigned char FormFeed = 0x0c; |
|
365 const unsigned char CarriageReturn = 0x0d; |
|
366 const unsigned char Space = 0x20; |
|
367 const unsigned char Equals = 0x3d; |
|
368 const unsigned char ExclamationMark = 0x21; |
|
369 const unsigned char Asterisk = 0x2a; |
|
370 const unsigned char Plus = 0x2b; |
|
371 const unsigned char Minus = 0x2d; |
|
372 const unsigned char Slash = 0x2f; |
|
373 const unsigned char Underscore = 0x5f; |
|
374 |
|
375 // Static data and functions for Base 64 codec |
|
376 static const char Base64Characters[64 + 1] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
|
377 static const unsigned char* Base64Values = reinterpret_cast<const unsigned char*>(Base64Characters); |
|
378 static const unsigned char Base64PaddingByte = 0x3d; |
|
379 |
|
380 static inline unsigned char base64Index(const char ascii) |
|
381 { |
|
382 if (ascii >= 'A' && ascii <= 'Z') |
|
383 return (ascii - 'A'); |
|
384 if (ascii >= 'a' && ascii <= 'z') |
|
385 return (ascii - 'a') + 26; |
|
386 if (ascii >= '0' && ascii <= '9') |
|
387 return (ascii - '0') + 52; |
|
388 if (ascii == '+') |
|
389 return 62; |
|
390 if (ascii == '/') |
|
391 return 63; |
|
392 if (ascii == Base64PaddingByte) |
|
393 return 64; |
|
394 return 65; |
|
395 } |
|
396 |
|
397 |
|
398 /*! |
|
399 \class QMailBase64Codec |
|
400 |
|
401 \preliminary |
|
402 \brief The QMailBase64Codec class encodes or decodes between 8-bit data and 7-bit ASCII, using the Base64 |
|
403 character mapping scheme. |
|
404 |
|
405 \ingroup messaginglibrary |
|
406 |
|
407 The Base64 character mapping scheme maps arbitrary 8-bit values into a range of 64 printable |
|
408 characters from the 7-bit ASCII set. The mapping scheme used is defined in |
|
409 \l{http://www.ietf.org/rfc/rfc2045.txt} {RFC 2045} (Multipurpose Internet Mail Extensions Part One). |
|
410 This encoding is also defined as the '"B" encoding' for 'encoded words' in |
|
411 \l{http://www.ietf.org/rfc/rfc2047.txt} {RFC 2047} (Multipurpose Internet Mail Extensions Part Three). |
|
412 |
|
413 The Base64 scheme encodes every incoming octet regardless of its original value, and thus |
|
414 produces the same ratio of output length to input length for any input data sequence. Since |
|
415 Base64 encodes four output characters for every three input octets, it produces a 33% |
|
416 increase in stream size when encoding. |
|
417 |
|
418 An instance of QMailBase64Codec contains state information about the encoding or decoding |
|
419 operation it performs, so an instance should be used for a single coding operation only: |
|
420 |
|
421 \code |
|
422 QString textData = acquireInput(); |
|
423 |
|
424 // Encode the string data to a UTF-8 byte sequence, and then encode to Base64 |
|
425 QMailBase64Codec encoder; |
|
426 QByteArray base64Data = encoder.encode(textData, "UTF-8"); |
|
427 \endcode |
|
428 |
|
429 \sa QMailCodec |
|
430 */ |
|
431 |
|
432 /*! |
|
433 \enum QMailBase64Codec::ContentType |
|
434 |
|
435 This enumerated type is used to specify whether content is textual data or binary data. |
|
436 |
|
437 \value Text The data is textual data; newline sequences within the data will be converted during coding. |
|
438 \value Binary The data is not textual, and does not contain newline sequences. |
|
439 */ |
|
440 |
|
441 /*! |
|
442 Constructs a codec object for coding to or from Base64 encoding. |
|
443 |
|
444 If \a content is QMailBase64Codec::Text, then newline sequences will be converted |
|
445 between the local representation (for example, 0x0A on Unix) and the transmission standard |
|
446 representation (0x0D 0x0A). Otherwise, the data will be coded without modification. |
|
447 |
|
448 The maximum number of encoded characters per output line can be specified as \a maximumLineLength. |
|
449 If not specified, or specified to a non-positive value, a default value will be used. |
|
450 */ |
|
451 QMailBase64Codec::QMailBase64Codec(ContentType content, int maximumLineLength) |
|
452 : _content(content), |
|
453 _maximumLineLength(maximumLineLength), |
|
454 _lastChar(0) |
|
455 { |
|
456 // Our buffer output iterators - initially at start of buffer |
|
457 _encodeBufferOut = _encodeBuffer; |
|
458 _decodeBufferOut = _decodeBuffer; |
|
459 |
|
460 // Each encoded line will contain 76 output chars - 57 input chars |
|
461 if (_maximumLineLength <= 0) |
|
462 _maximumLineLength = Base64MaxLineLength; |
|
463 _encodeLineCharsRemaining = (_maximumLineLength / 4 * 3); |
|
464 |
|
465 // Count the number of padding characters encountered during decode |
|
466 _decodePaddingCount = 0; |
|
467 } |
|
468 |
|
469 /*! \reimp */ |
|
470 QString QMailBase64Codec::name() const |
|
471 { |
|
472 return "QMailBase64Codec"; |
|
473 } |
|
474 |
|
475 /*! \internal */ |
|
476 void QMailBase64Codec::encodeChunk(QDataStream& out, const unsigned char* it, int length, bool finalChunk) |
|
477 { |
|
478 unsigned char* bufferEnd = _encodeBuffer + 3; |
|
479 |
|
480 // Set the input pointers relative to this input |
|
481 const unsigned char* lineEnd = it + _encodeLineCharsRemaining; |
|
482 const unsigned char* const end = it + length; |
|
483 |
|
484 while (it != end) |
|
485 { |
|
486 bool trailingLF = false; |
|
487 |
|
488 const unsigned char input = *it++; |
|
489 if ((input == CarriageReturn || input == LineFeed) && (_content == Text)) |
|
490 { |
|
491 if (_lastChar == CarriageReturn && input == LineFeed) |
|
492 { |
|
493 // We have already encoded this character-sequence |
|
494 |
|
495 // We can accept one more input character than accounted for |
|
496 lineEnd += 1; |
|
497 } |
|
498 else |
|
499 { |
|
500 // We must replace this character with ASCII CRLF |
|
501 *_encodeBufferOut++ = CarriageReturn; |
|
502 if (_encodeBufferOut != bufferEnd) |
|
503 { |
|
504 *_encodeBufferOut++ = LineFeed; |
|
505 } |
|
506 else |
|
507 { |
|
508 trailingLF = true; |
|
509 } |
|
510 |
|
511 // We can accept one fewer input character than expected, now |
|
512 lineEnd -= 1; |
|
513 } |
|
514 |
|
515 _lastChar = input; |
|
516 } |
|
517 else |
|
518 *_encodeBufferOut++ = input; |
|
519 |
|
520 if (_encodeBufferOut == bufferEnd) |
|
521 { |
|
522 // We have buffered 3 input bytes - write them out as four output bytes |
|
523 out << Base64Values[(_encodeBuffer[0] >> 2) & 0x3f]; |
|
524 out << Base64Values[(((_encodeBuffer[0] & 0x03) << 4) | (_encodeBuffer[1] >> 4)) & 0x3f]; |
|
525 out << Base64Values[(((_encodeBuffer[1] & 0x0f) << 2) | (_encodeBuffer[2] >> 6)) & 0x3f]; |
|
526 out << Base64Values[_encodeBuffer[2] & 0x3f]; |
|
527 |
|
528 _encodeBufferOut = _encodeBuffer; |
|
529 if ((it >= lineEnd) && ((it != end) || !finalChunk)) |
|
530 { |
|
531 // Insert an ASCII CRLF sequence |
|
532 out << static_cast<unsigned char>(CarriageReturn) << static_cast<unsigned char>(LineFeed); |
|
533 lineEnd += (_maximumLineLength / 4 * 3); |
|
534 } |
|
535 } |
|
536 |
|
537 if (trailingLF) |
|
538 { |
|
539 *_encodeBufferOut++ = LineFeed; |
|
540 } |
|
541 } |
|
542 |
|
543 if (finalChunk) |
|
544 { |
|
545 int bufferedBytesRemaining = _encodeBufferOut - _encodeBuffer; |
|
546 if (bufferedBytesRemaining > 0) |
|
547 { |
|
548 // We have some data still buffered - pad buffer with zero bits |
|
549 *_encodeBufferOut = 0; |
|
550 |
|
551 out << Base64Values[(_encodeBuffer[0] >> 2) & 0x3f]; |
|
552 out << Base64Values[(((_encodeBuffer[0] & 0x03) << 4) | (_encodeBuffer[1] >> 4)) & 0x3f]; |
|
553 |
|
554 // Indicate unused bytes with the padding character |
|
555 if (bufferedBytesRemaining == 1) |
|
556 { |
|
557 out << Base64PaddingByte; |
|
558 out << Base64PaddingByte; |
|
559 } |
|
560 else // must be two |
|
561 { |
|
562 out << Base64Values[(((_encodeBuffer[1] & 0x0f) << 2) | (_encodeBuffer[2] >> 6)) & 0x3f]; |
|
563 out << Base64PaddingByte; |
|
564 } |
|
565 } |
|
566 } |
|
567 else |
|
568 { |
|
569 // Leave the buffer intact, and adjust the line char count |
|
570 _encodeLineCharsRemaining = (lineEnd - it); |
|
571 } |
|
572 } |
|
573 |
|
574 /*! \internal */ |
|
575 void QMailBase64Codec::decodeChunk(QDataStream& out, const char* it, int length, bool finalChunk) |
|
576 { |
|
577 unsigned char* bufferEnd = _decodeBuffer + 4; |
|
578 |
|
579 const char* const end = it + length; |
|
580 while (it != end) |
|
581 { |
|
582 // Convert each character to the index value |
|
583 *_decodeBufferOut = base64Index(*it++); |
|
584 if (*_decodeBufferOut == 64) |
|
585 ++_decodePaddingCount; |
|
586 if (*_decodeBufferOut <= 64) |
|
587 ++_decodeBufferOut; |
|
588 |
|
589 if (_decodeBufferOut == bufferEnd) |
|
590 { |
|
591 // We have buffered 4 input characters - write them out as three output bytes |
|
592 // unless some of them are padding |
|
593 |
|
594 unsigned char decoded[3] = { 0 }; |
|
595 decoded[0] = static_cast<unsigned char>((_decodeBuffer[0] << 2) | ((_decodeBuffer[1] >> 4) & 0x03)); |
|
596 decoded[1] = static_cast<unsigned char>((_decodeBuffer[1] << 4) | ((_decodeBuffer[2] >> 2) & 0x0f)); |
|
597 decoded[2] = static_cast<unsigned char>(((_decodeBuffer[2] & 0x03) << 6) | (_decodeBuffer[3] & 0x3f)); |
|
598 |
|
599 int remainingChars = (3 - _decodePaddingCount); |
|
600 for (int i = 0; i < remainingChars; ++i) |
|
601 { |
|
602 if ((decoded[i] == CarriageReturn || decoded[i] == LineFeed) && (_content == Text)) |
|
603 { |
|
604 if (_lastChar == CarriageReturn && decoded[i] == LineFeed) |
|
605 { |
|
606 // We have already processed this sequence |
|
607 } |
|
608 else |
|
609 { |
|
610 // We should output the local newline sequence, but we can't |
|
611 // because we don't know what it is, and C++ translation-from-\n will |
|
612 // only work if the stream is a file... |
|
613 out << static_cast<unsigned char>('\n'); |
|
614 } |
|
615 |
|
616 _lastChar = decoded[i]; |
|
617 } |
|
618 else |
|
619 out << decoded[i]; |
|
620 } |
|
621 |
|
622 _decodeBufferOut = _decodeBuffer; |
|
623 } |
|
624 } |
|
625 |
|
626 if (finalChunk) |
|
627 { |
|
628 // There should always be an even multiple of 4 input bytes |
|
629 int bufferedBytesRemaining = _decodeBufferOut - _decodeBuffer; |
|
630 if (bufferedBytesRemaining > 0) |
|
631 { |
|
632 qWarning() << "Huh? bytes remaining:" << bufferedBytesRemaining; |
|
633 } |
|
634 } |
|
635 } |
|
636 |
|
637 |
|
638 // Static data and functions for Quoted-Prinatable codec |
|
639 static const unsigned char NilPreceding = 0x7f; |
|
640 static const char QuotedPrintableCharacters[16 + 1] = "0123456789ABCDEF"; |
|
641 static const unsigned char* QuotedPrintableValues = reinterpret_cast<const unsigned char*>(QuotedPrintableCharacters); |
|
642 |
|
643 static bool requiresEscape(unsigned char input, QMailQuotedPrintableCodec::ConformanceType conformance, int charsRemaining) |
|
644 { |
|
645 // For both, we need to escape '=' and anything unprintable |
|
646 bool escape = ((input > MaxPrintableRange) || |
|
647 ((input < MinPrintableRange) && (input != HorizontalTab) && (input != FormFeed)) || |
|
648 (input == Equals)); |
|
649 |
|
650 // For RFC 2047, we need to escape '?', '_', ' ' & '\t' |
|
651 // In fact, since the output may be used in a header field 'word', then the only characters |
|
652 // that can be used un-escaped are: alphanumerics, '!', '*', '+' '-', '/' and '_' |
|
653 if (!escape && (conformance == QMailQuotedPrintableCodec::Rfc2047)) |
|
654 { |
|
655 // We can also ignore space, since it will become an underscore |
|
656 if ((input != ExclamationMark) && (input != Asterisk) && (input != Plus) && |
|
657 (input != Minus) && (input != Slash) && (input != Underscore) && (input != Space)) |
|
658 { |
|
659 escape = !isalnum(input); |
|
660 } |
|
661 } |
|
662 |
|
663 if (!escape && (input == HorizontalTab || input == Space)) |
|
664 { |
|
665 // The (potentially) last whitespace character on a line must be escaped |
|
666 if (charsRemaining <= 3) |
|
667 escape = true; |
|
668 } |
|
669 |
|
670 return escape; |
|
671 } |
|
672 |
|
673 static inline void encodeCharacter(QDataStream& out, unsigned char value) |
|
674 { |
|
675 out << static_cast<unsigned char>(Equals); |
|
676 out << QuotedPrintableValues[value >> 4]; |
|
677 out << QuotedPrintableValues[value & 0x0f]; |
|
678 } |
|
679 |
|
680 static inline void lineBreak(QDataStream& out, int* _encodeLineCharsRemaining, int maximumLineLength) |
|
681 { |
|
682 out << static_cast<unsigned char>(Equals); |
|
683 out << static_cast<unsigned char>(LineFeed); |
|
684 |
|
685 *_encodeLineCharsRemaining = maximumLineLength; |
|
686 } |
|
687 |
|
688 static inline unsigned char decodeCharacter(unsigned char value) |
|
689 { |
|
690 if ((value >= 0x30) && (value <= 0x39)) |
|
691 return (value - 0x30); |
|
692 |
|
693 if ((value >= 0x41) && (value <= 0x46)) |
|
694 return ((value - 0x41) + 10); |
|
695 |
|
696 if ((value >= 0x61) && (value <= 0x66)) |
|
697 return ((value - 0x61) + 10); |
|
698 |
|
699 return 0; |
|
700 } |
|
701 |
|
702 |
|
703 /*! |
|
704 \class QMailQuotedPrintableCodec |
|
705 |
|
706 \preliminary |
|
707 \brief The QMailQuotedPrintableCodec class encodes or decodes between 8-bit data and 7-bit ASCII, |
|
708 using the 'quoted printable' character mapping scheme. |
|
709 |
|
710 \ingroup messaginglibrary |
|
711 |
|
712 The 'quoted printable' character mapping scheme maps arbitrary 8-bit values into 7-bit ASCII |
|
713 characters, by replacing values that cannot be directly represented with an escape sequence. |
|
714 The mapping scheme used is defined in |
|
715 \l{http://www.ietf.org/rfc/rfc2045.txt} {RFC 2045} (Multipurpose Internet Mail Extensions Part One). |
|
716 A minor variation on the scheme is defined as the '"Q" encoding' for 'encoded words' in |
|
717 \l{http://www.ietf.org/rfc/rfc2047.txt} {RFC 2047} (Multipurpose Internet Mail Extensions Part Three). |
|
718 |
|
719 The 'quoted printable' scheme encodes only those incoming octet values that cannot be directly |
|
720 represented in ASCII, by replacing the input octet with a three-character sequence that encodes |
|
721 the numeric value of the original octet. Therefore, the ratio of input length to output length |
|
722 for any input data sequence depends on the percentage of the input that corresponds to ASCII |
|
723 values, with ASCII-like encodings producing only small increases. With an input data encoding |
|
724 such as Latin-1 (ISO-8859-1), the output maintains a reasonable degree of human-readability. |
|
725 |
|
726 An instance of QMailQuotedPrintableCodec contains state information about the encoding or decoding |
|
727 operation it performs, so an instance should be used for a single coding operation only: |
|
728 |
|
729 \code |
|
730 QByteArray asciiData = acquireInput(); |
|
731 |
|
732 // We know the data is text in Latin-1 encoding, so decode the data from |
|
733 // quoted printable ASCII encoding, and then decode from Latin-1 to unicode |
|
734 QMailQuotedPrintableCodec decoder(QMailQuotedPrintableCodec::Text, QMailQuotedPrintableCodec::Rfc2045); |
|
735 QString textData = decoder.decode(asciiData, "ISO-8859-1"); |
|
736 \endcode |
|
737 |
|
738 \sa QMailCodec |
|
739 */ |
|
740 |
|
741 /*! |
|
742 \enum QMailQuotedPrintableCodec::ContentType |
|
743 |
|
744 This enumerated type is used to specify whether content is textual data or binary data. |
|
745 |
|
746 \value Text The data is textual data; newline sequences within the data will be converted during coding. |
|
747 \value Binary The data is not textual, and does not contain newline sequences. |
|
748 */ |
|
749 |
|
750 /*! |
|
751 \enum QMailQuotedPrintableCodec::ConformanceType |
|
752 |
|
753 This enumerated type is used to specify which RFC the coding operation should conform to. |
|
754 |
|
755 \value Rfc2045 The coding should be performed according to the requirements of RFC 2045. |
|
756 \value Rfc2047 The coding should be performed according to the requirements of RFC 2047's '"Q" encoding'. |
|
757 */ |
|
758 |
|
759 /*! |
|
760 Constructs a codec object for coding data of type \a content, using the mapping scheme |
|
761 specified by the requirements of \a conformance. |
|
762 |
|
763 If \a content is QMailQuotedPrintableCodec::Text, then newline sequences will be converted |
|
764 between the local representation (for example, 0x0A on Unix) and the transmission standard |
|
765 representation (0x0D 0x0A). Otherwise, the data will be coded without modification. |
|
766 |
|
767 If \a conformance is QMailQuotedPrintableCodec::Rfc2047, then coding will use the mapping |
|
768 scheme of the |
|
769 \l{http://www.ietf.org/rfc/rfc2047.txt} {RFC 2047} '"Q" encoding'; otherwise the scheme defined in |
|
770 \l{http://www.ietf.org/rfc/rfc2045.txt} {RFC 2045} will be used. |
|
771 |
|
772 The maximum number of encoded output characters per line can be specified as \a maximumLineLength. |
|
773 If not specified, or specified to a non-positive value, a default value will be used. |
|
774 */ |
|
775 QMailQuotedPrintableCodec::QMailQuotedPrintableCodec(ContentType content, ConformanceType conformance, int maximumLineLength) |
|
776 : _content(content), |
|
777 _conformance(conformance), |
|
778 _maximumLineLength(maximumLineLength) |
|
779 { |
|
780 // We're allowed up to 76 chars per output line, but the RFC isn't really clear on |
|
781 // whether this includes the '=' and '\n' of a soft line break, so we'll assume they're counted |
|
782 if (_maximumLineLength <= 0) |
|
783 _maximumLineLength = QuotedPrintableMaxLineLength; |
|
784 |
|
785 _encodeLineCharsRemaining = _maximumLineLength; |
|
786 _encodeLastChar = '\0'; |
|
787 |
|
788 _decodePrecedingInput = NilPreceding; |
|
789 _decodeLastChar = '\0'; |
|
790 } |
|
791 |
|
792 /*! \reimp */ |
|
793 QString QMailQuotedPrintableCodec::name() const |
|
794 { |
|
795 return "QMailQuotedPrintableCodec"; |
|
796 } |
|
797 |
|
798 /*! \internal */ |
|
799 void QMailQuotedPrintableCodec::encodeChunk(QDataStream& out, const unsigned char* it, int length, bool finalChunk) |
|
800 { |
|
801 // Set the input pointers relative to this input |
|
802 const unsigned char* const end = it + length; |
|
803 |
|
804 while (it != end) |
|
805 { |
|
806 unsigned char input = *it++; |
|
807 |
|
808 if ((input == CarriageReturn || input == LineFeed) && (_content == Text)) |
|
809 { |
|
810 if (_encodeLastChar == CarriageReturn && input == LineFeed) |
|
811 { |
|
812 // We have already encoded this character-sequence |
|
813 } |
|
814 else |
|
815 { |
|
816 // We must replace this character with ascii CRLF |
|
817 out << CarriageReturn << LineFeed; |
|
818 } |
|
819 |
|
820 _encodeLastChar = input; |
|
821 _encodeLineCharsRemaining = _maximumLineLength; |
|
822 continue; |
|
823 } |
|
824 |
|
825 bool escape = requiresEscape(input, _conformance, _encodeLineCharsRemaining); |
|
826 int charsRequired = (escape ? 3 : 1); |
|
827 |
|
828 // If we can't fit this character on the line, insert a line break |
|
829 if (charsRequired > _encodeLineCharsRemaining) |
|
830 { |
|
831 lineBreak(out, &_encodeLineCharsRemaining, _maximumLineLength); |
|
832 |
|
833 // We may no longer need the encoding after the line break |
|
834 if (input == Space || (input == HorizontalTab && _conformance != Rfc2047)) |
|
835 charsRequired = 1; |
|
836 } |
|
837 |
|
838 if (charsRequired == 1) |
|
839 { |
|
840 if (input == Space && _conformance == Rfc2047) // output space as '_' |
|
841 out << static_cast<unsigned char>(Underscore); |
|
842 else |
|
843 out << input; |
|
844 } |
|
845 else |
|
846 encodeCharacter(out, input); |
|
847 |
|
848 _encodeLineCharsRemaining -= charsRequired; |
|
849 |
|
850 if ((_encodeLineCharsRemaining == 0) && !(finalChunk && (it == end))) |
|
851 lineBreak(out, &_encodeLineCharsRemaining, _maximumLineLength); |
|
852 |
|
853 _encodeLastChar = input; |
|
854 } |
|
855 |
|
856 Q_UNUSED(finalChunk) |
|
857 } |
|
858 |
|
859 /*! \internal */ |
|
860 void QMailQuotedPrintableCodec::decodeChunk(QDataStream& out, const char* it, int length, bool finalChunk) |
|
861 { |
|
862 const char* const end = it + length; |
|
863 |
|
864 // The variable _decodePrecedingInput holds any unprocessed input from a previous call: |
|
865 // If '=', we've parsed only that char, otherwise, it is the hex value of the first parsed character |
|
866 if ((_decodePrecedingInput != NilPreceding) && (it != end)) |
|
867 { |
|
868 unsigned char value = 0; |
|
869 if (_decodePrecedingInput == Equals) |
|
870 { |
|
871 // Get the first escaped char |
|
872 unsigned char input = *it++; |
|
873 if (input == LineFeed || input == CarriageReturn) |
|
874 { |
|
875 // This is only a soft-line break |
|
876 _decodePrecedingInput = NilPreceding; |
|
877 } |
|
878 else |
|
879 { |
|
880 value = decodeCharacter(input); |
|
881 _decodePrecedingInput = value; |
|
882 } |
|
883 } |
|
884 else |
|
885 { |
|
886 // We already have partial escaped input |
|
887 value = _decodePrecedingInput; |
|
888 } |
|
889 |
|
890 if (it != end && _decodePrecedingInput != NilPreceding) |
|
891 { |
|
892 out << static_cast<unsigned char>((value << 4) | decodeCharacter(*it++)); |
|
893 _decodePrecedingInput = NilPreceding; |
|
894 } |
|
895 } |
|
896 |
|
897 while (it != end) |
|
898 { |
|
899 unsigned char input = *it++; |
|
900 if (input == Equals) |
|
901 { |
|
902 // We are in an escape sequence |
|
903 if (it == end) |
|
904 { |
|
905 _decodePrecedingInput = Equals; |
|
906 } |
|
907 else |
|
908 { |
|
909 input = *it++; |
|
910 if (input == LineFeed || input == CarriageReturn) |
|
911 { |
|
912 // This is a soft-line break - move on |
|
913 } |
|
914 else |
|
915 { |
|
916 // This is an encoded character |
|
917 unsigned char value = decodeCharacter(input); |
|
918 |
|
919 if (it == end) |
|
920 { |
|
921 _decodePrecedingInput = value; |
|
922 } |
|
923 else |
|
924 { |
|
925 out << static_cast<unsigned char>((value << 4) | decodeCharacter(*it++)); |
|
926 } |
|
927 } |
|
928 } |
|
929 } |
|
930 else |
|
931 { |
|
932 if ((input == CarriageReturn || input == LineFeed) && (_content == Text)) |
|
933 { |
|
934 if (_decodeLastChar == CarriageReturn && input == LineFeed) |
|
935 { |
|
936 // We have already processed this sequence |
|
937 } |
|
938 else |
|
939 { |
|
940 // We should output the local newline sequence, but we can't |
|
941 // because we don't know what it is, and C++ translation-from-\n will |
|
942 // only work if the stream is a file... |
|
943 out << static_cast<unsigned char>('\n'); |
|
944 } |
|
945 } |
|
946 else if (input == Underscore && _conformance == Rfc2047) |
|
947 out << static_cast<unsigned char>(Space); |
|
948 else |
|
949 out << input; |
|
950 } |
|
951 |
|
952 _decodeLastChar = input; |
|
953 } |
|
954 |
|
955 if (finalChunk && _decodePrecedingInput != NilPreceding) |
|
956 { |
|
957 qWarning() << "Huh? unfinished escape sequence..."; |
|
958 } |
|
959 } |
|
960 |
|
961 static void writeStream(QDataStream& out, const char* it, int length) |
|
962 { |
|
963 int totalWritten = 0; |
|
964 while (totalWritten < length) |
|
965 { |
|
966 int bytesWritten = out.writeRawData(it + totalWritten, length - totalWritten); |
|
967 if (bytesWritten == -1) |
|
968 return; |
|
969 |
|
970 totalWritten += bytesWritten; |
|
971 } |
|
972 } |
|
973 |
|
974 /*! |
|
975 \class QMailPassThroughCodec |
|
976 |
|
977 \preliminary |
|
978 \brief The QMailPassThroughCodec class uses the QMailCodec interface to move data between streams |
|
979 without coding or decoding. |
|
980 |
|
981 \ingroup messaginglibrary |
|
982 |
|
983 The QMailPassThroughCodec allows client code to use the same QMailCodec interface to convert data between |
|
984 different ASCII encodings, or no encoding at all, without having to be aware of the details involved. |
|
985 |
|
986 The pass-through codec is primarily useful when communicating with SMTP servers supporting the |
|
987 \l{http://www.ietf.org/rfc/rfc1652.txt} {RFC 1652} (8BITMIME) extension, which permits the exchange |
|
988 of data without coding via 7-bit ASCII. |
|
989 |
|
990 A QMailPassThroughCodec can be instantiated directly, but is more likely to be used polymorphically: |
|
991 |
|
992 \code |
|
993 // Get an object to perform the encoding required for the current server |
|
994 QMailCodec* encoder = getCodecForServer(currentServer()); |
|
995 |
|
996 // If the codec returned is a QMailPassThroughCodec, the input data will |
|
997 // be written to the output stream without encoding to 7-bit ASCII |
|
998 encoder->encode(outputStream, inputStream); |
|
999 \endcode |
|
1000 |
|
1001 \sa QMailCodec |
|
1002 */ |
|
1003 |
|
1004 /*! \reimp */ |
|
1005 QString QMailPassThroughCodec::name() const |
|
1006 { |
|
1007 return "QMailPassThroughCodec"; |
|
1008 } |
|
1009 |
|
1010 /*! \internal */ |
|
1011 void QMailPassThroughCodec::encodeChunk(QDataStream& out, const unsigned char* it, int length, bool finalChunk) |
|
1012 { |
|
1013 writeStream(out, reinterpret_cast<const char*>(it), length); |
|
1014 |
|
1015 Q_UNUSED(finalChunk) |
|
1016 } |
|
1017 |
|
1018 /*! \internal */ |
|
1019 void QMailPassThroughCodec::decodeChunk(QDataStream& out, const char* it, int length, bool finalChunk) |
|
1020 { |
|
1021 writeStream(out, it, length); |
|
1022 |
|
1023 Q_UNUSED(finalChunk) |
|
1024 } |
|
1025 |
|
1026 |
|
1027 /*! |
|
1028 \class QMailLineEndingCodec |
|
1029 |
|
1030 \preliminary |
|
1031 \brief The QMailLineEndingCodec class encodes textual data to use CR/LF line endings required for SMTP transmission. |
|
1032 |
|
1033 \ingroup messaginglibrary |
|
1034 |
|
1035 The QMailLineEndingCodec allows client code to use the QMailCodec interface to encode textual data |
|
1036 from the local line-ending convention to the CR/LF convention required for SMTP transmission. The |
|
1037 codec will convert from single carriage return or single line feed line-endings to CR/LF pairs, or |
|
1038 will preserve data already using the correct encoding. |
|
1039 |
|
1040 Decoded data will have CR/LF pairs converted to \c \n. |
|
1041 |
|
1042 An instance of QMailLineEndingCodec contains state information about the encoding or decoding |
|
1043 operation it performs, so an instance should be used for a single coding operation only: |
|
1044 |
|
1045 \sa QMailCodec |
|
1046 */ |
|
1047 |
|
1048 /*! |
|
1049 Constructs a codec object for coding text data, converting between the local line-ending |
|
1050 convention and the CR/LF line-ending sequence required for SMTP transmission. |
|
1051 */ |
|
1052 QMailLineEndingCodec::QMailLineEndingCodec() |
|
1053 : _lastChar(0) |
|
1054 { |
|
1055 } |
|
1056 |
|
1057 /*! \reimp */ |
|
1058 QString QMailLineEndingCodec::name() const |
|
1059 { |
|
1060 return "QMailLineEndingCodec"; |
|
1061 } |
|
1062 |
|
1063 /*! \internal */ |
|
1064 void QMailLineEndingCodec::encodeChunk(QDataStream& out, const unsigned char* it, int length, bool finalChunk) |
|
1065 { |
|
1066 const unsigned char* const end = it + length; |
|
1067 |
|
1068 const unsigned char* begin = it; |
|
1069 while (it != end) |
|
1070 { |
|
1071 const unsigned char input = *it; |
|
1072 if (input == CarriageReturn || input == LineFeed) |
|
1073 { |
|
1074 if (_lastChar == CarriageReturn && input == LineFeed) |
|
1075 { |
|
1076 // We have already encoded this character-sequence; skip the input |
|
1077 begin = (it + 1); |
|
1078 } |
|
1079 else |
|
1080 { |
|
1081 // Write the preceding characters |
|
1082 if (it > begin) |
|
1083 writeStream(out, reinterpret_cast<const char*>(begin), (it - begin)); |
|
1084 |
|
1085 // We must replace this character with ascii CRLF |
|
1086 out << CarriageReturn << LineFeed; |
|
1087 begin = (it + 1); |
|
1088 } |
|
1089 } |
|
1090 |
|
1091 _lastChar = input; |
|
1092 ++it; |
|
1093 } |
|
1094 |
|
1095 if (it > begin) |
|
1096 { |
|
1097 // Write the remaining characters |
|
1098 writeStream(out, reinterpret_cast<const char*>(begin), (it - begin)); |
|
1099 } |
|
1100 |
|
1101 Q_UNUSED(finalChunk) |
|
1102 } |
|
1103 |
|
1104 /*! \internal */ |
|
1105 void QMailLineEndingCodec::decodeChunk(QDataStream& out, const char* it, int length, bool finalChunk) |
|
1106 { |
|
1107 const char* const end = it + length; |
|
1108 |
|
1109 const char* begin = it; |
|
1110 while (it != end) |
|
1111 { |
|
1112 const char input = *it; |
|
1113 if (input == CarriageReturn || input == LineFeed) |
|
1114 { |
|
1115 if (_lastChar == CarriageReturn && input == LineFeed) |
|
1116 { |
|
1117 // We have already processed this sequence |
|
1118 begin = (it + 1); |
|
1119 } |
|
1120 else |
|
1121 { |
|
1122 // Write the preceding characters |
|
1123 if (it > begin) |
|
1124 writeStream(out, begin, (it - begin)); |
|
1125 |
|
1126 // We should output the local newline sequence, but we can't |
|
1127 // because we don't know what it is, and C++ translation-from-\n will |
|
1128 // only work if the stream is a file... |
|
1129 out << static_cast<unsigned char>('\n'); |
|
1130 begin = (it + 1); |
|
1131 } |
|
1132 } |
|
1133 |
|
1134 _lastChar = input; |
|
1135 ++it; |
|
1136 } |
|
1137 |
|
1138 if (it > begin) |
|
1139 { |
|
1140 // Write the remaining characters |
|
1141 writeStream(out, begin, (it - begin)); |
|
1142 } |
|
1143 |
|
1144 Q_UNUSED(finalChunk) |
|
1145 } |
|
1146 |