qtmobility/src/versit/qversitreader_p.cpp
changeset 4 90517678cc4f
parent 1 2b40d63a9c3d
child 5 453da2cfceef
equal deleted inserted replaced
1:2b40d63a9c3d 4:90517678cc4f
    54 #define MAX_VERSIT_DOCUMENT_NESTING_DEPTH 20
    54 #define MAX_VERSIT_DOCUMENT_NESTING_DEPTH 20
    55 
    55 
    56 /*!
    56 /*!
    57   \class LineReader
    57   \class LineReader
    58   \brief The LineReader class is a wrapper around a QIODevice that allows line-by-line reading.
    58   \brief The LineReader class is a wrapper around a QIODevice that allows line-by-line reading.
       
    59   \internal
    59 
    60 
    60   This class keeps an internal buffer which it uses to temporarily store data which it has read from
    61   This class keeps an internal buffer which it uses to temporarily store data which it has read from
    61   the device but not returned to the user.
    62   the device but not returned to the user.
    62  */
    63  */
    63 
    64 
   148  *
   149  *
   149  * Returns a VersitCursor containing and selecting the line.
   150  * Returns a VersitCursor containing and selecting the line.
   150  */
   151  */
   151 bool LineReader::tryReadLine(VersitCursor &cursor, bool atEnd)
   152 bool LineReader::tryReadLine(VersitCursor &cursor, bool atEnd)
   152 {
   153 {
   153     int crlfPos;
   154     int crlfPos = -1;
   154 
   155 
   155     QByteArray space = VersitUtils::encode(' ', mCodec);
   156     QByteArray space = VersitUtils::encode(' ', mCodec);
   156     QByteArray tab = VersitUtils::encode('\t', mCodec);
   157     QByteArray tab = VersitUtils::encode('\t', mCodec);
   157     int spaceLength = space.length();
   158     int spaceLength = space.length();
   158 
   159 
   192             return false;
   193             return false;
   193         }
   194         }
   194     }
   195     }
   195 }
   196 }
   196 
   197 
       
   198 /*! Links the signals from this to the signals of \a reader. */
       
   199 void QVersitReaderPrivate::init(QVersitReader* reader)
       
   200 {
       
   201     qRegisterMetaType<QVersitReader::State>("QVersitReader::State");
       
   202     connect(this, SIGNAL(stateChanged(QVersitReader::State)),
       
   203             reader, SIGNAL(stateChanged(QVersitReader::State)),Qt::DirectConnection);
       
   204     connect(this, SIGNAL(resultsAvailable()),
       
   205             reader, SIGNAL(resultsAvailable()), Qt::DirectConnection);
       
   206 }
       
   207 
   197 /*! Construct a reader. */
   208 /*! Construct a reader. */
   198 QVersitReaderPrivate::QVersitReaderPrivate()
   209 QVersitReaderPrivate::QVersitReaderPrivate()
   199     : mIoDevice(0),
   210     : mIoDevice(0),
   200     mDocumentNestingLevel(0),
   211     mDocumentNestingLevel(0),
   201     mDefaultCodec(QTextCodec::codecForName("UTF-8")),
   212     mDefaultCodec(QTextCodec::codecForName("UTF-8")),
   202     mState(QVersitReader::InactiveState),
   213     mState(QVersitReader::InactiveState),
   203     mError(QVersitReader::NoError),
   214     mError(QVersitReader::NoError),
   204     mIsCanceling(false)
   215     mIsCanceling(false)
   205 {
   216 {
       
   217     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard21Type, QString::fromAscii("AGENT")),
       
   218                          QVersitProperty::VersitDocumentType);
       
   219     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard30Type, QString::fromAscii("AGENT")),
       
   220                          QVersitProperty::VersitDocumentType);
       
   221     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard21Type, QString::fromAscii("N")),
       
   222                          QVersitProperty::CompoundType);
       
   223     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard30Type, QString::fromAscii("N")),
       
   224                          QVersitProperty::CompoundType);
       
   225     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard21Type, QString::fromAscii("ADR")),
       
   226                          QVersitProperty::CompoundType);
       
   227     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard30Type, QString::fromAscii("ADR")),
       
   228                          QVersitProperty::CompoundType);
       
   229     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard21Type, QString::fromAscii("GEO")),
       
   230                          QVersitProperty::CompoundType);
       
   231     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard30Type, QString::fromAscii("GEO")),
       
   232                          QVersitProperty::CompoundType);
       
   233     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard21Type, QString::fromAscii("ORG")),
       
   234                          QVersitProperty::CompoundType);
       
   235     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard30Type, QString::fromAscii("ORG")),
       
   236                          QVersitProperty::CompoundType);
       
   237     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard21Type, QString::fromAscii("NICKNAMES")),
       
   238                          QVersitProperty::ListType);
       
   239     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard30Type, QString::fromAscii("NICKNAMES")),
       
   240                          QVersitProperty::ListType);
       
   241     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard21Type, QString::fromAscii("CATEGORIES")),
       
   242                          QVersitProperty::ListType);
       
   243     mValueTypeMap.insert(qMakePair(QVersitDocument::VCard30Type, QString::fromAscii("CATEGORIES")),
       
   244                          QVersitProperty::ListType);
   206 }
   245 }
   207 
   246 
   208 /*! Destroy a reader. */
   247 /*! Destroy a reader. */
   209 QVersitReaderPrivate::~QVersitReaderPrivate()
   248 QVersitReaderPrivate::~QVersitReaderPrivate()
   210 {
   249 {
   243             if (document.isEmpty())
   282             if (document.isEmpty())
   244                 break;
   283                 break;
   245             else {
   284             else {
   246                 QMutexLocker locker(&mMutex);
   285                 QMutexLocker locker(&mMutex);
   247                 mVersitDocuments.append(document);
   286                 mVersitDocuments.append(document);
   248                 emit resultsAvailable(mVersitDocuments);
   287                 emit resultsAvailable();
   249             }
   288             }
   250         } else {
   289         } else {
   251             setError(QVersitReader::ParseError);
   290             setError(QVersitReader::ParseError);
   252             if (lineReader.odometer() == oldPos)
   291             if (lineReader.odometer() == oldPos)
   253                 break;
   292                 break;
   399                                                 LineReader& lineReader)
   438                                                 LineReader& lineReader)
   400 {
   439 {
   401     property.setParameters(extractVCard21PropertyParams(cursor, lineReader.codec()));
   440     property.setParameters(extractVCard21PropertyParams(cursor, lineReader.codec()));
   402 
   441 
   403     QByteArray value = extractPropertyValue(cursor);
   442     QByteArray value = extractPropertyValue(cursor);
   404     if (property.name() == QLatin1String("AGENT")) {
   443     if (mValueTypeMap.value(qMakePair(QVersitDocument::VCard21Type, property.name()))
       
   444             == QVersitProperty::VersitDocumentType) {
   405         // Hack to handle cases where start of document is on the same or next line as "AGENT:"
   445         // Hack to handle cases where start of document is on the same or next line as "AGENT:"
   406         // XXX: Handle non-ASCII charsets in nested AGENT documents.
       
   407         bool foundBegin = false;
   446         bool foundBegin = false;
   408         if (value == "BEGIN:VCARD") {
   447         if (value == "BEGIN:VCARD") {
   409             foundBegin = true;
   448             foundBegin = true;
   410         } else if (value.isEmpty()) {
   449         } else if (value.isEmpty()) {
   411         } else {
   450         } else {
   412             property = QVersitProperty();
   451             property = QVersitProperty();
   413             return;
   452             return;
   414         }
   453         }
   415         QVersitDocument agentDocument;
   454         QVersitDocument subDocument;
   416         if (!parseVersitDocument(lineReader, agentDocument, foundBegin)) {
   455         if (!parseVersitDocument(lineReader, subDocument, foundBegin)) {
   417             property = QVersitProperty();
   456             property = QVersitProperty();
   418         } else {
   457         } else {
   419             property.setValue(QVariant::fromValue(agentDocument));
   458             property.setValue(QVariant::fromValue(subDocument));
   420         }
   459         }
   421     } else {
   460     } else {
   422         QTextCodec* codec;
   461         QTextCodec* codec;
   423         QVariant valueVariant(decodeCharset(value, property, lineReader.codec(), &codec));
   462         QVariant valueVariant(decodeCharset(value, property, lineReader.codec(), &codec));
   424         unencode(valueVariant, cursor, property, codec, lineReader);
   463         bool isBinary = unencode(valueVariant, cursor, property, codec, lineReader);
   425         property.setValue(valueVariant);
   464         property.setValue(valueVariant);
       
   465         if (!isBinary) {
       
   466             splitStructuredValue(QVersitDocument::VCard21Type, property, false);
       
   467         }
   426     }
   468     }
   427 }
   469 }
   428 
   470 
   429 /*!
   471 /*!
   430  * Parses the property according to vCard 3.0 syntax.
   472  * Parses the property according to vCard 3.0 syntax.
   436 
   478 
   437     QByteArray value = extractPropertyValue(cursor);
   479     QByteArray value = extractPropertyValue(cursor);
   438 
   480 
   439     QTextCodec* codec;
   481     QTextCodec* codec;
   440     QString valueString(decodeCharset(value, property, lineReader.codec(), &codec));
   482     QString valueString(decodeCharset(value, property, lineReader.codec(), &codec));
   441     VersitUtils::removeBackSlashEscaping(valueString);
   483 
   442 
   484     if (mValueTypeMap.value(qMakePair(QVersitDocument::VCard30Type, property.name()))
   443     if (property.name() == QLatin1String("AGENT")) {
   485             == QVersitProperty::VersitDocumentType) {
       
   486         removeBackSlashEscaping(valueString);
   444         // Make a line reader from the value of the property.
   487         // Make a line reader from the value of the property.
   445         QByteArray agentValue(codec->fromUnicode(valueString));
   488         QByteArray subDocumentValue(codec->fromUnicode(valueString));
   446         QBuffer agentData(&agentValue);
   489         QBuffer subDocumentData(&subDocumentValue);
   447         agentData.open(QIODevice::ReadOnly);
   490         subDocumentData.open(QIODevice::ReadOnly);
   448         agentData.seek(0);
   491         subDocumentData.seek(0);
   449         LineReader agentLineReader(&agentData, codec);
   492         LineReader subDocumentLineReader(&subDocumentData, codec);
   450 
   493 
   451         QVersitDocument agentDocument;
   494         QVersitDocument subDocument;
   452         if (!parseVersitDocument(agentLineReader, agentDocument)) {
   495         if (!parseVersitDocument(subDocumentLineReader, subDocument)) {
   453             property = QVersitProperty();
   496             property = QVersitProperty();
   454         } else {
   497         } else {
   455             property.setValue(QVariant::fromValue(agentDocument));
   498             property.setValue(QVariant::fromValue(subDocument));
   456         }
   499         }
   457     } else {
   500     } else {
   458         QVariant valueVariant(valueString);
   501         QVariant valueVariant(valueString);
   459         unencode(valueVariant, cursor, property, codec, lineReader);
   502         bool isBinary = unencode(valueVariant, cursor, property, codec, lineReader);
   460         if (valueVariant.type() == QVariant::ByteArray) {
       
   461             // hack: add the charset parameter back in (even if there wasn't one to start with and
       
   462             // the default codec was used).  This will help later on if someone calls valueString()
       
   463             // on the property.
       
   464             property.insertParameter(QLatin1String("CHARSET"), QLatin1String(codec->name()));
       
   465         }
       
   466         property.setValue(valueVariant);
   503         property.setValue(valueVariant);
       
   504         if (!isBinary) {
       
   505             bool isList = splitStructuredValue(QVersitDocument::VCard30Type, property, true);
       
   506             // Do backslash unescaping
       
   507             if (isList) {
       
   508                 QStringList list = property.value<QStringList>();
       
   509                 for (int i = 0; i < list.length(); i++) {
       
   510                     removeBackSlashEscaping(list[i]);
       
   511                 }
       
   512                 property.setValue(list);
       
   513             } else {
       
   514                 QString value = property.value();
       
   515                 removeBackSlashEscaping(value);
       
   516                 property.setValue(value);
       
   517             }
       
   518         }
   467     }
   519     }
   468 }
   520 }
   469 
   521 
   470 /*!
   522 /*!
   471  * Sets version to \a document if \a property contains a supported version.
   523  * Sets version to \a document if \a property contains a supported version.
   489     return valid;
   541     return valid;
   490 }
   542 }
   491 
   543 
   492 /*!
   544 /*!
   493  * On entry, \a value should hold a QString.  On exit, it may be either a QString or a QByteArray.
   545  * On entry, \a value should hold a QString.  On exit, it may be either a QString or a QByteArray.
   494  */
   546  * Returns true if and only if the property value is turned into a QByteArray.
   495 void QVersitReaderPrivate::unencode(QVariant& value, VersitCursor& cursor,
   547  */
       
   548 bool QVersitReaderPrivate::unencode(QVariant& value, VersitCursor& cursor,
   496                                     QVersitProperty& property, QTextCodec* codec,
   549                                     QVersitProperty& property, QTextCodec* codec,
   497                                     LineReader& lineReader) const
   550                                     LineReader& lineReader) const
   498 {
   551 {
   499     Q_ASSERT(value.type() == QVariant::String);
   552     Q_ASSERT(value.type() == QVariant::String);
   500 
   553 
   513         }
   566         }
   514         decodeQuotedPrintable(valueString);
   567         decodeQuotedPrintable(valueString);
   515         // Remove the encoding parameter as the value is now decoded
   568         // Remove the encoding parameter as the value is now decoded
   516         property.removeParameters(QLatin1String("ENCODING"));
   569         property.removeParameters(QLatin1String("ENCODING"));
   517         value.setValue(valueString);
   570         value.setValue(valueString);
       
   571         return false;
   518     } else if (property.parameters().contains(QLatin1String("ENCODING"), QLatin1String("BASE64"))
   572     } else if (property.parameters().contains(QLatin1String("ENCODING"), QLatin1String("BASE64"))
   519         || property.parameters().contains(QLatin1String("ENCODING"), QLatin1String("B"))
   573         || property.parameters().contains(QLatin1String("ENCODING"), QLatin1String("B"))
   520         || property.parameters().contains(QLatin1String("TYPE"), QLatin1String("BASE64"))
   574         || property.parameters().contains(QLatin1String("TYPE"), QLatin1String("BASE64"))
   521         || property.parameters().contains(QLatin1String("TYPE"), QLatin1String("B"))) {
   575         || property.parameters().contains(QLatin1String("TYPE"), QLatin1String("B"))) {
   522         value.setValue(QByteArray::fromBase64(valueString.toAscii()));
   576         value.setValue(QByteArray::fromBase64(valueString.toAscii()));
   524         property.removeParameters(QLatin1String("ENCODING"));
   578         property.removeParameters(QLatin1String("ENCODING"));
   525         // Hack: add the charset parameter back in (even if there wasn't one to start with and
   579         // Hack: add the charset parameter back in (even if there wasn't one to start with and
   526         // the default codec was used).  This will help later on if someone calls valueString()
   580         // the default codec was used).  This will help later on if someone calls valueString()
   527         // on the property.
   581         // on the property.
   528         property.insertParameter(QLatin1String("CHARSET"), QLatin1String(codec->name()));
   582         property.insertParameter(QLatin1String("CHARSET"), QLatin1String(codec->name()));
   529     }
   583         return true;
       
   584     }
       
   585     return false;
   530 }
   586 }
   531 
   587 
   532 /*!
   588 /*!
   533  * Decodes \a value, after working out what charset it is in using the context of \a property and
   589  * Decodes \a value, after working out what charset it is in using the context of \a property and
   534  * returns it.  The codec used to decode is returned in \a codec.
   590  * returns it.  The codec used to decode is returned in \a codec.
   580             }
   636             }
   581         }
   637         }
   582     }
   638     }
   583 }
   639 }
   584 
   640 
   585 
       
   586 /*!
   641 /*!
   587  * Extracts the groups and the name of the property using \a codec to determine the delimiters
   642  * Extracts the groups and the name of the property using \a codec to determine the delimiters
   588  *
   643  *
   589  * On entry, \a line should select a whole line.
   644  * On entry, \a line should select a whole line.
   590  * On exit, \a line will be updated to point after the groups and name.
   645  * On exit, \a line will be updated to point after the groups and name.
   672     QMultiHash<QString,QString> result;
   727     QMultiHash<QString,QString> result;
   673     QList<QByteArray> paramList = extractParams(line, codec);
   728     QList<QByteArray> paramList = extractParams(line, codec);
   674     while (!paramList.isEmpty()) {
   729     while (!paramList.isEmpty()) {
   675         QByteArray param = paramList.takeLast();
   730         QByteArray param = paramList.takeLast();
   676         QString name(paramName(param, codec));
   731         QString name(paramName(param, codec));
   677         VersitUtils::removeBackSlashEscaping(name);
   732         removeBackSlashEscaping(name);
   678         QString values = paramValue(param, codec);
   733         QString values = paramValue(param, codec);
   679         QList<QString> valueList = values.split(QLatin1Char(','), QString::SkipEmptyParts);
   734         QStringList valueList = splitValue(values, QLatin1Char(','), QString::SkipEmptyParts, true);
   680         QString buffer; // for any part ending in a backslash, join it to the next.
       
   681         foreach (QString value, valueList) {
   735         foreach (QString value, valueList) {
   682             if (value.endsWith(QLatin1Char('\\')) && !value.endsWith(QLatin1String("\\\\"))) {
   736             removeBackSlashEscaping(value);
   683                 value.chop(1);
   737             result.insert(name, value);
   684                 buffer.append(value);
   738         }
   685                 buffer.append(QLatin1Char(',')); // because the comma got nuked by split()
   739     }
   686             }
       
   687             else {
       
   688                 buffer.append(value);
       
   689                 VersitUtils::removeBackSlashEscaping(buffer);
       
   690                 result.insert(name, buffer);
       
   691                 buffer.clear();
       
   692             }
       
   693         }
       
   694     }
       
   695 
       
   696     return result;
   740     return result;
   697 }
   741 }
   698 
   742 
   699 
   743 
   700 /*!
   744 /*!
   815     const char* textData = text.constData();
   859     const char* textData = text.constData();
   816     const char* matchData = match.constData();
   860     const char* matchData = match.constData();
   817     return memcmp(textData+index, matchData, n) == 0;
   861     return memcmp(textData+index, matchData, n) == 0;
   818 }
   862 }
   819 
   863 
       
   864 /*!
       
   865  * If the \a type and the \a property's name is known to contain a structured value, \a property's
       
   866  * value is split according to the type of structuring (compound vs. list) it is known to have.
       
   867  * Returns true if and only if such a split happened (ie. the property value holds a QStringList on
       
   868  * exit).
       
   869  */
       
   870 bool QVersitReaderPrivate::splitStructuredValue(
       
   871         QVersitDocument::VersitType type, QVersitProperty& property,
       
   872         bool hasEscapedBackslashes) const
       
   873 {
       
   874     QVariant variant = property.variantValue();
       
   875     QPair<QVersitDocument::VersitType,QString> key = qMakePair(type, property.name());
       
   876     if (mValueTypeMap.contains(key)) {
       
   877         if (mValueTypeMap.value(key) == QVersitProperty::CompoundType) {
       
   878             variant.setValue(splitValue(variant.toString(), QLatin1Char(';'),
       
   879                                         QString::KeepEmptyParts, hasEscapedBackslashes));
       
   880             property.setValue(variant);
       
   881             property.setValueType(QVersitProperty::CompoundType);
       
   882         } else if (mValueTypeMap.value(key) == QVersitProperty::ListType) {
       
   883             variant.setValue(splitValue(variant.toString(), QLatin1Char(','),
       
   884                                         QString::SkipEmptyParts, hasEscapedBackslashes));
       
   885             property.setValue(variant);
       
   886             property.setValueType(QVersitProperty::ListType);
       
   887         }
       
   888         return true;
       
   889     }
       
   890     return false;
       
   891 }
       
   892 
       
   893 /*!
       
   894  * Splits the \a string into substrings wherever \a sep occurs.
       
   895  * If \a hasEscapedBackslashes is false, then a \a sep preceded by a backslash is not considered
       
   896  * a split point (but the backslash is removed).
       
   897  * If \a hasEscapedBackslashes is true, then a \a sep preceded by an odd number of backslashes is
       
   898  * not considered a split point (but one backslash is removed).
       
   899  */
       
   900 QStringList QVersitReaderPrivate::splitValue(const QString& string,
       
   901                                              const QChar& sep,
       
   902                                              QString::SplitBehavior behaviour,
       
   903                                              bool hasEscapedBackslashes)
       
   904 {
       
   905     QStringList list;
       
   906     bool isEscaped = false; // is the current character escaped
       
   907     int segmentStartIndex = 0;
       
   908     QString segment;
       
   909     for (int i = 0; i < string.length(); i++) {
       
   910         if (string.at(i) == QLatin1Char('\\')) {
       
   911             if (hasEscapedBackslashes)
       
   912                 isEscaped = !isEscaped; // two consecutive backslashes make isEscaped false
       
   913             else
       
   914                 isEscaped = true;
       
   915         } else if (string.at(i) == sep) {
       
   916             if (isEscaped) {
       
   917                 // we see an escaped separator - remove the backslash
       
   918                 segment += string.midRef(segmentStartIndex, i-segmentStartIndex-1);
       
   919                 segment += sep;
       
   920             } else {
       
   921                 // we see a separator
       
   922                 segment += string.midRef(segmentStartIndex, i - segmentStartIndex);
       
   923                 if (behaviour == QString::KeepEmptyParts || !segment.isEmpty())
       
   924                     list.append(segment);
       
   925                 segment.clear();
       
   926             }
       
   927             segmentStartIndex = i+1;
       
   928             isEscaped = false;
       
   929         } else { // normal character - keep going
       
   930             isEscaped = false;
       
   931         }
       
   932     }
       
   933     // The rest of the string after the last sep.
       
   934     segment += string.midRef(segmentStartIndex);
       
   935     if (behaviour == QString::KeepEmptyParts || !segment.isEmpty())
       
   936         list.append(segment);
       
   937     return list;
       
   938 }
       
   939 
       
   940 /*!
       
   941  * Removes backslash escaping for line breaks (CRLFs), colons, semicolons, backslashes and commas
       
   942  * according to RFC 2426.  This is called on parameter names and values and property values.
       
   943  * Colons ARE unescaped because the text of RFC2426 suggests that they should be.
       
   944  */
       
   945 void QVersitReaderPrivate::removeBackSlashEscaping(QString& text)
       
   946 {
       
   947     if (!(text.startsWith(QLatin1Char('"')) && text.endsWith(QLatin1Char('"')))) {
       
   948         /* replaces \; with ;
       
   949                     \, with ,
       
   950                     \: with :
       
   951                     \\ with \
       
   952          */
       
   953         text.replace(QRegExp(QLatin1String("\\\\([;,:\\\\])")), QLatin1String("\\1"));
       
   954         // replaces \n with a CRLF
       
   955         text.replace(QLatin1String("\\n"), QLatin1String("\r\n"), Qt::CaseInsensitive);
       
   956     }
       
   957 }
       
   958 
       
   959 
   820 #include "moc_qversitreader_p.cpp"
   960 #include "moc_qversitreader_p.cpp"