• Skip to content
  • Skip to link menu
KDE 4.3 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KMIME Library

kmime_header_parsing.cpp

00001 /*  -*- c++ -*-
00002     kmime_header_parsing.cpp
00003 
00004     KMime, the KDE internet mail/usenet news message library.
00005     Copyright (c) 2001-2002 Marc Mutz <mutz@kde.org>
00006 
00007     This library is free software; you can redistribute it and/or
00008     modify it under the terms of the GNU Library General Public
00009     License as published by the Free Software Foundation; either
00010     version 2 of the License, or (at your option) any later version.
00011 
00012     This library is distributed in the hope that it will be useful,
00013     but WITHOUT ANY WARRANTY; without even the implied warranty of
00014     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00015     Library General Public License for more details.
00016 
00017     You should have received a copy of the GNU Library General Public License
00018     along with this library; see the file COPYING.LIB.  If not, write to
00019     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00020     Boston, MA 02110-1301, USA.
00021 */
00022 
00023 #include "kmime_header_parsing.h"
00024 
00025 #include "kmime_codecs.h"
00026 #include "kmime_util.h"
00027 #include "kmime_dateformatter.h"
00028 #include "kmime_warning.h"
00029 
00030 #include <kglobal.h>
00031 #include <kcharsets.h>
00032 
00033 #include <QtCore/QTextCodec>
00034 #include <QtCore/QMap>
00035 #include <QtCore/QStringList>
00036 #include <QtCore/QUrl>
00037 
00038 #include <ctype.h> // for isdigit
00039 #include <cassert>
00040 
00041 using namespace KMime;
00042 using namespace KMime::Types;
00043 
00044 namespace KMime {
00045 
00046 namespace Types {
00047 
00048 // QUrl::fromAce is extremely expensive, so only use it when necessary.
00049 // Fortunately, the presence of IDNA is readily detected with a substring match...
00050 static inline QString QUrl_fromAce_wrapper( const QString & domain )
00051 {
00052     if ( domain.contains( QLatin1String( "xn--" ) ) )
00053         return QUrl::fromAce( domain.toLatin1() );
00054     else
00055         return domain;
00056 }
00057 
00058 static QString addr_spec_as_string( const AddrSpec & as, bool pretty )
00059 {
00060   if ( as.isEmpty() ) {
00061     return QString();
00062   }
00063 
00064   bool needsQuotes = false;
00065   QString result;
00066   result.reserve( as.localPart.length() + as.domain.length() + 1 );
00067   for ( int i = 0 ; i < as.localPart.length() ; ++i ) {
00068     const char ch = as.localPart[i].toLatin1();
00069     if ( ch == '.' || isAText( ch ) ) {
00070       result += ch;
00071     } else {
00072       needsQuotes = true;
00073       if ( ch == '\\' || ch == '"' ) {
00074         result += '\\';
00075       }
00076       result += ch;
00077     }
00078   }
00079   const QString dom = pretty ? QUrl_fromAce_wrapper( as.domain ) : as.domain ;
00080   if ( needsQuotes ) {
00081     return '"' + result + "\"@" + dom;
00082   } else {
00083     return result + '@' + dom;
00084   }
00085 }
00086 
00087 QString AddrSpec::asString() const
00088 {
00089     return addr_spec_as_string( *this, false );
00090 }
00091 
00092 QString AddrSpec::asPrettyString() const
00093 {
00094     return addr_spec_as_string( *this, true );
00095 }
00096 
00097 bool AddrSpec::isEmpty() const
00098 {
00099   return localPart.isEmpty() && domain.isEmpty();
00100 }
00101 
00102 QByteArray Mailbox::address() const
00103 {
00104   return mAddrSpec.asString().toLatin1();
00105 }
00106 
00107 AddrSpec Mailbox::addrSpec() const
00108 {
00109   return mAddrSpec;
00110 }
00111 
00112 QString Mailbox::name() const
00113 {
00114   return mDisplayName;
00115 }
00116 
00117 void Mailbox::setAddress( const AddrSpec &addr )
00118 {
00119   mAddrSpec = addr;
00120 }
00121 
00122 void Mailbox::setAddress( const QByteArray &addr )
00123 {
00124   const char *cursor = addr.constData();
00125   if ( !HeaderParsing::parseAngleAddr( cursor,
00126                                        cursor + addr.length(), mAddrSpec ) ) {
00127     if ( !HeaderParsing::parseAddrSpec( cursor, cursor + addr.length(),
00128                                         mAddrSpec ) ) {
00129       kWarning() << "Invalid address";
00130       return;
00131     }
00132   }
00133 }
00134 
00135 void Mailbox::setName( const QString &name )
00136 {
00137   mDisplayName = name;
00138 }
00139 
00140 void Mailbox::setNameFrom7Bit( const QByteArray &name,
00141                                const QByteArray &defaultCharset )
00142 {
00143   QByteArray cs;
00144   mDisplayName = decodeRFC2047String( name, cs, defaultCharset, false );
00145 }
00146 
00147 bool Mailbox::hasAddress() const
00148 {
00149   return !mAddrSpec.isEmpty();
00150 }
00151 
00152 bool Mailbox::hasName() const
00153 {
00154   return !mDisplayName.isEmpty();
00155 }
00156 
00157 QString Mailbox::prettyAddress() const
00158 {
00159   if ( !hasName() ) {
00160     return address();
00161   }
00162   QString s = name();
00163   if ( hasAddress() ) {
00164     s += QLatin1String(" <") + address() + QLatin1Char('>');
00165   }
00166   return s;
00167 }
00168 
00169 void Mailbox::fromUnicodeString( const QString &s )
00170 {
00171   from7BitString( encodeRFC2047String( s, "utf-8", false ) );
00172 }
00173 
00174 void Mailbox::from7BitString( const QByteArray &s )
00175 {
00176   const char *cursor = s.constData();
00177   HeaderParsing::parseMailbox( cursor, cursor + s.length(), *this );
00178 }
00179 
00180 QByteArray KMime::Types::Mailbox::as7BitString( const QByteArray &encCharset ) const
00181 {
00182   if ( !hasName() ) {
00183     return address();
00184   }
00185   QByteArray rv;
00186   if ( isUsAscii( name() ) ) {
00187     QByteArray tmp = name().toLatin1();
00188     addQuotes( tmp, false );
00189     rv += tmp;
00190   } else {
00191     rv += encodeRFC2047String( name(), encCharset, true );
00192   }
00193   if ( hasAddress() ) {
00194     rv += " <" + address() + '>';
00195   }
00196   return rv;
00197 }
00198 
00199 } // namespace Types
00200 
00201 namespace HeaderParsing {
00202 
00203 // parse the encoded-word (scursor points to after the initial '=')
00204 bool parseEncodedWord( const char* &scursor, const char * const send,
00205                        QString &result, QByteArray &language,
00206                        QByteArray &usedCS, const QByteArray &defaultCS,
00207                        bool forceCS )
00208 {
00209   // make sure the caller already did a bit of the work.
00210   assert( *(scursor-1) == '=' );
00211 
00212   //
00213   // STEP 1:
00214   // scan for the charset/language portion of the encoded-word
00215   //
00216 
00217   char ch = *scursor++;
00218 
00219   if ( ch != '?' ) {
00220     // kDebug(5320) << "first";
00221     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00222     return false;
00223   }
00224 
00225   // remember start of charset (ie. just after the initial "=?") and
00226   // language (just after the first '*') fields:
00227   const char * charsetStart = scursor;
00228   const char * languageStart = 0;
00229 
00230   // find delimiting '?' (and the '*' separating charset and language
00231   // tags, if any):
00232   for ( ; scursor != send ; scursor++ ) {
00233     if ( *scursor == '?') {
00234       break;
00235     } else if ( *scursor == '*' && languageStart == 0 ) {
00236       languageStart = scursor + 1;
00237     }
00238   }
00239 
00240   // not found? can't be an encoded-word!
00241   if ( scursor == send || *scursor != '?' ) {
00242     // kDebug(5320) << "second";
00243     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00244     return false;
00245   }
00246 
00247   // extract the language information, if any (if languageStart is 0,
00248   // language will be null, too):
00249   QByteArray maybeLanguage( languageStart, scursor - languageStart );
00250   // extract charset information (keep in mind: the size given to the
00251   // ctor is one off due to the \0 terminator):
00252   QByteArray maybeCharset( charsetStart,
00253                            ( languageStart ? languageStart - 1 : scursor ) - charsetStart );
00254 
00255   //
00256   // STEP 2:
00257   // scan for the encoding portion of the encoded-word
00258   //
00259 
00260   // remember start of encoding (just _after_ the second '?'):
00261   scursor++;
00262   const char * encodingStart = scursor;
00263 
00264   // find next '?' (ending the encoding tag):
00265   for ( ; scursor != send ; scursor++ ) {
00266     if ( *scursor == '?' ) {
00267       break;
00268     }
00269   }
00270 
00271   // not found? Can't be an encoded-word!
00272   if ( scursor == send || *scursor != '?' ) {
00273     // kDebug(5320) << "third";
00274     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00275     return false;
00276   }
00277 
00278   // extract the encoding information:
00279   QByteArray maybeEncoding( encodingStart, scursor - encodingStart );
00280 
00281   // kDebug(5320) << "parseEncodedWord: found charset == \"" << maybeCharset
00282   //         << "\"; language == \"" << maybeLanguage
00283   //         << "\"; encoding == \"" << maybeEncoding << "\"";
00284 
00285   //
00286   // STEP 3:
00287   // scan for encoded-text portion of encoded-word
00288   //
00289 
00290   // remember start of encoded-text (just after the third '?'):
00291   scursor++;
00292   const char * encodedTextStart = scursor;
00293 
00294   // find next '?' (ending the encoded-text):
00295   for ( ; scursor != send ; scursor++ ) {
00296     if ( *scursor == '?' ) {
00297       break;
00298     }
00299   }
00300 
00301   // not found? Can't be an encoded-word!
00302   // ### maybe evaluate it nonetheless if the rest is OK?
00303   if ( scursor == send || *scursor != '?' ) {
00304     // kDebug(5320) << "fourth";
00305     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00306     return false;
00307   }
00308   scursor++;
00309   // check for trailing '=':
00310   if ( scursor == send || *scursor != '=' ) {
00311     // kDebug(5320) << "fifth";
00312     KMIME_WARN_PREMATURE_END_OF( EncodedWord );
00313     return false;
00314   }
00315   scursor++;
00316 
00317   // set end sentinel for encoded-text:
00318   const char * const encodedTextEnd = scursor - 2;
00319 
00320   //
00321   // STEP 4:
00322   // setup decoders for the transfer encoding and the charset
00323   //
00324 
00325   // try if there's a codec for the encoding found:
00326   Codec * codec = Codec::codecForName( maybeEncoding );
00327   if ( !codec ) {
00328     KMIME_WARN_UNKNOWN( Encoding, maybeEncoding );
00329     return false;
00330   }
00331 
00332   // get an instance of a corresponding decoder:
00333   Decoder * dec = codec->makeDecoder();
00334   assert( dec );
00335 
00336   // try if there's a (text)codec for the charset found:
00337   bool matchOK = false;
00338   QTextCodec *textCodec = 0;
00339   if ( forceCS || maybeCharset.isEmpty() ) {
00340     textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK );
00341     usedCS = cachedCharset( defaultCS );
00342   } else {
00343     textCodec = KGlobal::charsets()->codecForName( maybeCharset, matchOK );
00344     if ( !matchOK ) {  //no suitable codec found => use default charset
00345       textCodec = KGlobal::charsets()->codecForName( defaultCS, matchOK );
00346       usedCS = cachedCharset( defaultCS );
00347     } else {
00348       usedCS = cachedCharset( maybeCharset );
00349     }
00350   }
00351 
00352   if ( !matchOK || !textCodec ) {
00353     KMIME_WARN_UNKNOWN( Charset, maybeCharset );
00354     delete dec;
00355     return false;
00356   };
00357 
00358   // kDebug(5320) << "mimeName(): \"" << textCodec->name() << "\"";
00359 
00360   // allocate a temporary buffer to store the 8bit text:
00361   int encodedTextLength = encodedTextEnd - encodedTextStart;
00362   QByteArray buffer;
00363   buffer.resize( codec->maxDecodedSizeFor( encodedTextLength ) );
00364   char *bbegin = buffer.data();
00365   char *bend = bbegin + buffer.length();
00366 
00367   //
00368   // STEP 5:
00369   // do the actual decoding
00370   //
00371 
00372   if ( !dec->decode( encodedTextStart, encodedTextEnd, bbegin, bend ) ) {
00373     KMIME_WARN << codec->name() << "codec lies about its maxDecodedSizeFor("
00374                << encodedTextLength << ")\nresult may be truncated";
00375   }
00376 
00377   result = textCodec->toUnicode( buffer.data(), bbegin - buffer.data() );
00378 
00379   // kDebug(5320) << "result now: \"" << result << "\"";
00380   // cleanup:
00381   delete dec;
00382   language = maybeLanguage;
00383 
00384   return true;
00385 }
00386 
00387 static inline void eatWhiteSpace( const char* &scursor, const char * const send )
00388 {
00389   while ( scursor != send &&
00390           ( *scursor == ' ' || *scursor == '\n' ||
00391             *scursor == '\t' || *scursor == '\r' ) )
00392     scursor++;
00393 }
00394 
00395 bool parseAtom( const char * &scursor, const char * const send,
00396                 QString &result, bool allow8Bit )
00397 {
00398   QPair<const char*,int> maybeResult;
00399 
00400   if ( parseAtom( scursor, send, maybeResult, allow8Bit ) ) {
00401     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00402     return true;
00403   }
00404 
00405   return false;
00406 }
00407 
00408 bool parseAtom( const char * &scursor, const char * const send,
00409                 QPair<const char*,int> &result, bool allow8Bit )
00410 {
00411   bool success = false;
00412   const char *start = scursor;
00413 
00414   while ( scursor != send ) {
00415     signed char ch = *scursor++;
00416     if ( ch > 0 && isAText( ch ) ) {
00417       // AText: OK
00418       success = true;
00419     } else if ( allow8Bit && ch < 0 ) {
00420       // 8bit char: not OK, but be tolerant.
00421       KMIME_WARN_8BIT( ch );
00422       success = true;
00423     } else {
00424       // CTL or special - marking the end of the atom:
00425       // re-set sursor to point to the offending
00426       // char and return:
00427       scursor--;
00428       break;
00429     }
00430   }
00431   result.first = start;
00432   result.second = scursor - start;
00433   return success;
00434 }
00435 
00436 bool parseToken( const char * &scursor, const char * const send,
00437                  QString &result, bool allow8Bit )
00438 {
00439   QPair<const char*,int> maybeResult;
00440 
00441   if ( parseToken( scursor, send, maybeResult, allow8Bit ) ) {
00442     result += QString::fromLatin1( maybeResult.first, maybeResult.second );
00443     return true;
00444   }
00445 
00446   return false;
00447 }
00448 
00449 bool parseToken( const char * &scursor, const char * const send,
00450                  QPair<const char*,int> &result, bool allow8Bit )
00451 {
00452   bool success = false;
00453   const char * start = scursor;
00454 
00455   while ( scursor != send ) {
00456     signed char ch = *scursor++;
00457     if ( ch > 0 && isTText( ch ) ) {
00458       // TText: OK
00459       success = true;
00460     } else if ( allow8Bit && ch < 0 ) {
00461       // 8bit char: not OK, but be tolerant.
00462       KMIME_WARN_8BIT( ch );
00463       success = true;
00464     } else {
00465       // CTL or tspecial - marking the end of the atom:
00466       // re-set sursor to point to the offending
00467       // char and return:
00468       scursor--;
00469       break;
00470     }
00471   }
00472   result.first = start;
00473   result.second = scursor - start;
00474   return success;
00475 }
00476 
00477 #define READ_ch_OR_FAIL if ( scursor == send ) {        \
00478     KMIME_WARN_PREMATURE_END_OF( GenericQuotedString ); \
00479     return false;                                       \
00480   } else {                                              \
00481     ch = *scursor++;                                    \
00482   }
00483 
00484 // known issues:
00485 //
00486 // - doesn't handle quoted CRLF
00487 
00488 bool parseGenericQuotedString( const char* &scursor, const char * const send,
00489                                QString &result, bool isCRLF,
00490                                const char openChar, const char closeChar )
00491 {
00492   char ch;
00493   // We are in a quoted-string or domain-literal or comment and the
00494   // cursor points to the first char after the openChar.
00495   // We will apply unfolding and quoted-pair removal.
00496   // We return when we either encounter the end or unescaped openChar
00497   // or closeChar.
00498 
00499   assert( *(scursor-1) == openChar || *(scursor-1) == closeChar );
00500 
00501   while ( scursor != send ) {
00502     ch = *scursor++;
00503 
00504     if ( ch == closeChar || ch == openChar ) {
00505       // end of quoted-string or another opening char:
00506       // let caller decide what to do.
00507       return true;
00508     }
00509 
00510     switch( ch ) {
00511     case '\\':      // quoted-pair
00512       // misses "\" CRLF LWSP-char handling, see rfc822, 3.4.5
00513       READ_ch_OR_FAIL;
00514       KMIME_WARN_IF_8BIT( ch );
00515       result += QChar( ch );
00516       break;
00517     case '\r':
00518       // ###
00519       // The case of lonely '\r' is easy to solve, as they're
00520       // not part of Unix Line-ending conventions.
00521       // But I see a problem if we are given Unix-native
00522       // line-ending-mails, where we cannot determine anymore
00523       // whether a given '\n' was part of a CRLF or was occurring
00524       // on it's own.
00525       READ_ch_OR_FAIL;
00526       if ( ch != '\n' ) {
00527         // CR on it's own...
00528         KMIME_WARN_LONE( CR );
00529         result += QChar('\r');
00530         scursor--; // points to after the '\r' again
00531       } else {
00532         // CRLF encountered.
00533         // lookahead: check for folding
00534         READ_ch_OR_FAIL;
00535         if ( ch == ' ' || ch == '\t' ) {
00536           // correct folding;
00537           // position cursor behind the CRLF WSP (unfolding)
00538           // and add the WSP to the result
00539           result += QChar( ch );
00540         } else {
00541           // this is the "shouldn't happen"-case. There is a CRLF
00542           // inside a quoted-string without it being part of FWS.
00543           // We take it verbatim.
00544           KMIME_WARN_NON_FOLDING( CRLF );
00545           result += "\r\n";
00546           // the cursor is decremented again, so's we need not
00547           // duplicate the whole switch here. "ch" could've been
00548           // everything (incl. openChar or closeChar).
00549           scursor--;
00550         }
00551       }
00552       break;
00553     case '\n':
00554       // Note: CRLF has been handled above already!
00555       // ### LF needs special treatment, depending on whether isCRLF
00556       // is true (we can be sure a lonely '\n' was meant this way) or
00557       // false ('\n' alone could have meant LF or CRLF in the original
00558       // message. This parser assumes CRLF iff the LF is followed by
00559       // either WSP (folding) or NULL (premature end of quoted-string;
00560       // Should be fixed, since NULL is allowed as per rfc822).
00561       READ_ch_OR_FAIL;
00562       if ( !isCRLF && ( ch == ' ' || ch == '\t' ) ) {
00563         // folding
00564         // correct folding
00565         result += QChar( ch );
00566       } else {
00567         // non-folding
00568         KMIME_WARN_LONE( LF );
00569         result += QChar('\n');
00570         // pos is decremented, so's we need not duplicate the whole
00571         // switch here. ch could've been everything (incl. <">, "\").
00572         scursor--;
00573       }
00574       break;
00575     default:
00576       KMIME_WARN_IF_8BIT( ch );
00577       result += QChar( ch );
00578     }
00579   }
00580 
00581   return false;
00582 }
00583 
00584 // known issues:
00585 //
00586 // - doesn't handle encoded-word inside comments.
00587 
00588 bool parseComment( const char* &scursor, const char * const send,
00589                    QString &result, bool isCRLF, bool reallySave )
00590 {
00591   int commentNestingDepth = 1;
00592   const char *afterLastClosingParenPos = 0;
00593   QString maybeCmnt;
00594   const char *oldscursor = scursor;
00595 
00596   assert( *(scursor-1) == '(' );
00597 
00598   while ( commentNestingDepth ) {
00599     QString cmntPart;
00600     if ( parseGenericQuotedString( scursor, send, cmntPart, isCRLF, '(', ')' ) ) {
00601       assert( *(scursor-1) == ')' || *(scursor-1) == '(' );
00602       // see the kdoc for above function for the possible conditions
00603       // we have to check:
00604       switch ( *(scursor-1) ) {
00605       case ')':
00606         if ( reallySave ) {
00607           // add the chunk that's now surely inside the comment.
00608           result += maybeCmnt;
00609           result += cmntPart;
00610           if ( commentNestingDepth > 1 ) {
00611             // don't add the outermost ')'...
00612             result += QChar(')');
00613           }
00614           maybeCmnt.clear();
00615         }
00616         afterLastClosingParenPos = scursor;
00617         --commentNestingDepth;
00618         break;
00619       case '(':
00620         if ( reallySave ) {
00621           // don't add to "result" yet, because we might find that we
00622           // are already outside the (broken) comment...
00623           maybeCmnt += cmntPart;
00624           maybeCmnt += QChar('(');
00625         }
00626         ++commentNestingDepth;
00627         break;
00628       default: assert( 0 );
00629       } // switch
00630     } else {
00631       // !parseGenericQuotedString, ie. premature end
00632       if ( afterLastClosingParenPos ) {
00633         scursor = afterLastClosingParenPos;
00634       } else {
00635         scursor = oldscursor;
00636       }
00637       return false;
00638     }
00639   } // while
00640 
00641   return true;
00642 }
00643 
00644 // known issues: none.
00645 
00646 bool parsePhrase( const char* &scursor, const char * const send,
00647                   QString &result, bool isCRLF )
00648 {
00649   enum {
00650     None, Phrase, Atom, EncodedWord, QuotedString
00651   } found = None;
00652 
00653   QString tmp;
00654   QByteArray lang, charset;
00655   const char *successfullyParsed = 0;
00656   // only used by the encoded-word branch
00657   const char *oldscursor;
00658   // used to suppress whitespace between adjacent encoded-words
00659   // (rfc2047, 6.2):
00660   bool lastWasEncodedWord = false;
00661 
00662   while ( scursor != send ) {
00663     char ch = *scursor++;
00664     switch ( ch ) {
00665     case '.': // broken, but allow for intorop's sake
00666       if ( found == None ) {
00667         --scursor;
00668         return false;
00669       } else {
00670         if ( scursor != send && ( *scursor == ' ' || *scursor == '\t' ) ) {
00671           result += ". ";
00672         } else {
00673           result += '.';
00674         }
00675         successfullyParsed = scursor;
00676       }
00677       break;
00678     case '"': // quoted-string
00679       tmp.clear();
00680       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
00681         successfullyParsed = scursor;
00682         assert( *(scursor-1) == '"' );
00683         switch ( found ) {
00684         case None:
00685           found = QuotedString;
00686           break;
00687         case Phrase:
00688         case Atom:
00689         case EncodedWord:
00690         case QuotedString:
00691           found = Phrase;
00692           result += QChar(' '); // rfc822, 3.4.4
00693           break;
00694         default:
00695           assert( 0 );
00696         }
00697         lastWasEncodedWord = false;
00698         result += tmp;
00699       } else {
00700         // premature end of quoted string.
00701         // What to do? Return leading '"' as special? Return as quoted-string?
00702         // We do the latter if we already found something, else signal failure.
00703         if ( found == None ) {
00704           return false;
00705         } else {
00706           result += QChar(' '); // rfc822, 3.4.4
00707           result += tmp;
00708           return true;
00709         }
00710       }
00711       break;
00712     case '(': // comment
00713       // parse it, but ignore content:
00714       tmp.clear();
00715       if ( parseComment( scursor, send, tmp, isCRLF,
00716                          false /*don't bother with the content*/ ) ) {
00717         successfullyParsed = scursor;
00718         lastWasEncodedWord = false; // strictly interpreting rfc2047, 6.2
00719       } else {
00720         if ( found == None ) {
00721           return false;
00722         } else {
00723           scursor = successfullyParsed;
00724           return true;
00725         }
00726       }
00727       break;
00728     case '=': // encoded-word
00729       tmp.clear();
00730       oldscursor = scursor;
00731       lang.clear();
00732       charset.clear();
00733       if ( parseEncodedWord( scursor, send, tmp, lang, charset ) ) {
00734         successfullyParsed = scursor;
00735         switch ( found ) {
00736         case None:
00737           found = EncodedWord;
00738           break;
00739         case Phrase:
00740         case EncodedWord:
00741         case Atom:
00742         case QuotedString:
00743           if ( !lastWasEncodedWord ) {
00744             result += QChar(' '); // rfc822, 3.4.4
00745           }
00746           found = Phrase;
00747           break;
00748         default: assert( 0 );
00749         }
00750         lastWasEncodedWord = true;
00751         result += tmp;
00752         break;
00753       } else {
00754         // parse as atom:
00755         scursor = oldscursor;
00756       }
00757       // fall though...
00758 
00759     default: //atom
00760       tmp.clear();
00761       scursor--;
00762       if ( parseAtom( scursor, send, tmp, true /* allow 8bit */ ) ) {
00763         successfullyParsed = scursor;
00764         switch ( found ) {
00765         case None:
00766           found = Atom;
00767           break;
00768         case Phrase:
00769         case Atom:
00770         case EncodedWord:
00771         case QuotedString:
00772           found = Phrase;
00773           result += QChar(' '); // rfc822, 3.4.4
00774           break;
00775         default:
00776           assert( 0 );
00777         }
00778         lastWasEncodedWord = false;
00779         result += tmp;
00780       } else {
00781         if ( found == None ) {
00782           return false;
00783         } else {
00784           scursor = successfullyParsed;
00785           return true;
00786         }
00787       }
00788     }
00789     eatWhiteSpace( scursor, send );
00790   }
00791 
00792   return found != None;
00793 }
00794 
00795 bool parseDotAtom( const char* &scursor, const char * const send,
00796                    QString &result, bool isCRLF )
00797 {
00798   eatCFWS( scursor, send, isCRLF );
00799 
00800   // always points to just after the last atom parsed:
00801   const char *successfullyParsed;
00802 
00803   QString tmp;
00804   if ( !parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
00805     return false;
00806   }
00807   result += tmp;
00808   successfullyParsed = scursor;
00809 
00810   while ( scursor != send ) {
00811 
00812     // end of header or no '.' -> return
00813     if ( scursor == send || *scursor != '.' ) {
00814       return true;
00815     }
00816     scursor++; // eat '.'
00817 
00818     if ( scursor == send || !isAText( *scursor ) ) {
00819       // end of header or no AText, but this time following a '.'!:
00820       // reset cursor to just after last successfully parsed char and
00821       // return:
00822       scursor = successfullyParsed;
00823       return true;
00824     }
00825 
00826     // try to parse the next atom:
00827     QString maybeAtom;
00828     if ( !parseAtom( scursor, send, maybeAtom, false /*no 8bit*/ ) ) {
00829       scursor = successfullyParsed;
00830       return true;
00831     }
00832 
00833     result += QChar('.');
00834     result += maybeAtom;
00835     successfullyParsed = scursor;
00836   }
00837 
00838   scursor = successfullyParsed;
00839   return true;
00840 }
00841 
00842 void eatCFWS( const char* &scursor, const char * const send, bool isCRLF )
00843 {
00844   QString dummy;
00845 
00846   while ( scursor != send ) {
00847     const char *oldscursor = scursor;
00848 
00849     char ch = *scursor++;
00850 
00851     switch( ch ) {
00852     case ' ':
00853     case '\t': // whitespace
00854     case '\r':
00855     case '\n': // folding
00856       continue;
00857 
00858     case '(': // comment
00859       if ( parseComment( scursor, send, dummy, isCRLF, false /*don't save*/ ) ) {
00860         continue;
00861       }
00862       scursor = oldscursor;
00863       return;
00864 
00865     default:
00866       scursor = oldscursor;
00867       return;
00868     }
00869   }
00870 }
00871 
00872 bool parseDomain( const char* &scursor, const char * const send,
00873                   QString &result, bool isCRLF )
00874 {
00875   eatCFWS( scursor, send, isCRLF );
00876   if ( scursor == send ) {
00877     return false;
00878   }
00879 
00880   // domain := dot-atom / domain-literal / atom *("." atom)
00881   //
00882   // equivalent to:
00883   // domain = dot-atom / domain-literal,
00884   // since parseDotAtom does allow CFWS between atoms and dots
00885 
00886   if ( *scursor == '[' ) {
00887     // domain-literal:
00888     QString maybeDomainLiteral;
00889     // eat '[':
00890     scursor++;
00891     while ( parseGenericQuotedString( scursor, send, maybeDomainLiteral,
00892                                       isCRLF, '[', ']' ) ) {
00893       if ( scursor == send ) {
00894         // end of header: check for closing ']':
00895         if ( *(scursor-1) == ']' ) {
00896           // OK, last char was ']':
00897           result = maybeDomainLiteral;
00898           return true;
00899         } else {
00900           // not OK, domain-literal wasn't closed:
00901           return false;
00902         }
00903       }
00904       // we hit openChar in parseGenericQuotedString.
00905       // include it in maybeDomainLiteral and keep on parsing:
00906       if ( *(scursor-1) == '[' ) {
00907         maybeDomainLiteral += QChar('[');
00908         continue;
00909       }
00910       // OK, real end of domain-literal:
00911       result = maybeDomainLiteral;
00912       return true;
00913     }
00914   } else {
00915     // dot-atom:
00916     QString maybeDotAtom;
00917     if ( parseDotAtom( scursor, send, maybeDotAtom, isCRLF ) ) {
00918       result = maybeDotAtom;
00919       return true;
00920     }
00921   }
00922   return false;
00923 }
00924 
00925 bool parseObsRoute( const char* &scursor, const char* const send,
00926                     QStringList &result, bool isCRLF, bool save )
00927 {
00928   while ( scursor != send ) {
00929     eatCFWS( scursor, send, isCRLF );
00930     if ( scursor == send ) {
00931       return false;
00932     }
00933 
00934     // empty entry:
00935     if ( *scursor == ',' ) {
00936       scursor++;
00937       if ( save ) {
00938         result.append( QString() );
00939       }
00940       continue;
00941     }
00942 
00943     // empty entry ending the list:
00944     if ( *scursor == ':' ) {
00945       scursor++;
00946       if ( save ) {
00947         result.append( QString() );
00948       }
00949       return true;
00950     }
00951 
00952     // each non-empty entry must begin with '@':
00953     if ( *scursor != '@' ) {
00954       return false;
00955     } else {
00956       scursor++;
00957     }
00958 
00959     QString maybeDomain;
00960     if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
00961       return false;
00962     }
00963     if ( save ) {
00964       result.append( maybeDomain );
00965     }
00966 
00967     // eat the following (optional) comma:
00968     eatCFWS( scursor, send, isCRLF );
00969     if ( scursor == send ) {
00970       return false;
00971     }
00972     if ( *scursor == ':' ) {
00973       scursor++;
00974       return true;
00975     }
00976     if ( *scursor == ',' ) {
00977       scursor++;
00978     }
00979   }
00980 
00981   return false;
00982 }
00983 
00984 bool parseAddrSpec( const char* &scursor, const char * const send,
00985                     AddrSpec &result, bool isCRLF )
00986 {
00987   //
00988   // STEP 1:
00989   // local-part := dot-atom / quoted-string / word *("." word)
00990   //
00991   // this is equivalent to:
00992   // local-part := word *("." word)
00993 
00994   QString maybeLocalPart;
00995   QString tmp;
00996 
00997   while ( scursor != send ) {
00998     // first, eat any whitespace
00999     eatCFWS( scursor, send, isCRLF );
01000 
01001     char ch = *scursor++;
01002     switch ( ch ) {
01003     case '.': // dot
01004       maybeLocalPart += QChar('.');
01005       break;
01006 
01007     case '@':
01008       goto SAW_AT_SIGN;
01009       break;
01010 
01011     case '"': // quoted-string
01012       tmp.clear();
01013       if ( parseGenericQuotedString( scursor, send, tmp, isCRLF, '"', '"' ) ) {
01014         maybeLocalPart += tmp;
01015       } else {
01016         return false;
01017       }
01018       break;
01019 
01020     default: // atom
01021       scursor--; // re-set scursor to point to ch again
01022       tmp.clear();
01023       if ( parseAtom( scursor, send, tmp, false /* no 8bit */ ) ) {
01024         maybeLocalPart += tmp;
01025       } else {
01026         return false; // parseAtom can only fail if the first char is non-atext.
01027       }
01028       break;
01029     }
01030   }
01031 
01032   return false;
01033 
01034   //
01035   // STEP 2:
01036   // domain
01037   //
01038 
01039 SAW_AT_SIGN:
01040 
01041   assert( *(scursor-1) == '@' );
01042 
01043   QString maybeDomain;
01044   if ( !parseDomain( scursor, send, maybeDomain, isCRLF ) ) {
01045     return false;
01046   }
01047 
01048   result.localPart = maybeLocalPart;
01049   result.domain = maybeDomain;
01050 
01051   return true;
01052 }
01053 
01054 bool parseAngleAddr( const char* &scursor, const char * const send,
01055                      AddrSpec &result, bool isCRLF )
01056 {
01057   // first, we need an opening angle bracket:
01058   eatCFWS( scursor, send, isCRLF );
01059   if ( scursor == send || *scursor != '<' ) {
01060     return false;
01061   }
01062   scursor++; // eat '<'
01063 
01064   eatCFWS( scursor, send, isCRLF );
01065   if ( scursor == send ) {
01066     return false;
01067   }
01068 
01069   if ( *scursor == '@' || *scursor == ',' ) {
01070     // obs-route: parse, but ignore:
01071     KMIME_WARN << "obsolete source route found! ignoring.";
01072     QStringList dummy;
01073     if ( !parseObsRoute( scursor, send, dummy,
01074                          isCRLF, false /* don't save */ ) ) {
01075       return false;
01076     }
01077     // angle-addr isn't complete until after the '>':
01078     if ( scursor == send ) {
01079       return false;
01080     }
01081   }
01082 
01083   // parse addr-spec:
01084   AddrSpec maybeAddrSpec;
01085   if ( !parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
01086     return false;
01087   }
01088 
01089   eatCFWS( scursor, send, isCRLF );
01090   if ( scursor == send || *scursor != '>' ) {
01091     return false;
01092   }
01093   scursor++;
01094 
01095   result = maybeAddrSpec;
01096   return true;
01097 
01098 }
01099 
01100 bool parseMailbox( const char* &scursor, const char * const send,
01101                    Mailbox &result, bool isCRLF )
01102 {
01103   eatCFWS( scursor, send, isCRLF );
01104   if ( scursor == send ) {
01105     return false;
01106   }
01107 
01108   AddrSpec maybeAddrSpec;
01109   QString maybeDisplayName;
01110 
01111   // first, try if it's a vanilla addr-spec:
01112   const char * oldscursor = scursor;
01113   if ( parseAddrSpec( scursor, send, maybeAddrSpec, isCRLF ) ) {
01114     result.setAddress( maybeAddrSpec );
01115     // check for the obsolete form of display-name (as comment):
01116     eatWhiteSpace( scursor, send );
01117     if ( scursor != send && *scursor == '(' ) {
01118       scursor++;
01119       if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
01120         return false;
01121       }
01122     }
01123     result.setNameFrom7Bit( maybeDisplayName.toLatin1() );
01124     return true;
01125   }
01126   scursor = oldscursor;
01127 
01128   // second, see if there's a display-name:
01129   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
01130     // failed: reset cursor, note absent display-name
01131     maybeDisplayName.clear();
01132     scursor = oldscursor;
01133   } else {
01134     // succeeded: eat CFWS
01135     eatCFWS( scursor, send, isCRLF );
01136     if ( scursor == send ) {
01137       return false;
01138     }
01139   }
01140 
01141   // third, parse the angle-addr:
01142   if ( !parseAngleAddr( scursor, send, maybeAddrSpec, isCRLF ) ) {
01143     return false;
01144   }
01145 
01146   if ( maybeDisplayName.isNull() ) {
01147     // check for the obsolete form of display-name (as comment):
01148     eatWhiteSpace( scursor, send );
01149     if ( scursor != send && *scursor == '(' ) {
01150       scursor++;
01151       if ( !parseComment( scursor, send, maybeDisplayName, isCRLF, true /*keep*/ ) ) {
01152         return false;
01153       }
01154     }
01155   }
01156 
01157   result.setName( maybeDisplayName );
01158   result.setAddress( maybeAddrSpec );
01159   return true;
01160 }
01161 
01162 bool parseGroup( const char* &scursor, const char * const send,
01163                  Address &result, bool isCRLF )
01164 {
01165   // group         := display-name ":" [ mailbox-list / CFWS ] ";" [CFWS]
01166   //
01167   // equivalent to:
01168   // group   := display-name ":" [ obs-mbox-list ] ";"
01169 
01170   eatCFWS( scursor, send, isCRLF );
01171   if ( scursor == send ) {
01172     return false;
01173   }
01174 
01175   // get display-name:
01176   QString maybeDisplayName;
01177   if ( !parsePhrase( scursor, send, maybeDisplayName, isCRLF ) ) {
01178     return false;
01179   }
01180 
01181   // get ":":
01182   eatCFWS( scursor, send, isCRLF );
01183   if ( scursor == send || *scursor != ':' ) {
01184     return false;
01185   }
01186 
01187   result.displayName = maybeDisplayName;
01188 
01189   // get obs-mbox-list (may contain empty entries):
01190   scursor++;
01191   while ( scursor != send ) {
01192     eatCFWS( scursor, send, isCRLF );
01193     if ( scursor == send ) {
01194       return false;
01195     }
01196 
01197     // empty entry:
01198     if ( *scursor == ',' ) {
01199       scursor++;
01200       continue;
01201     }
01202 
01203     // empty entry ending the list:
01204     if ( *scursor == ';' ) {
01205       scursor++;
01206       return true;
01207     }
01208 
01209     Mailbox maybeMailbox;
01210     if ( !parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
01211       return false;
01212     }
01213     result.mailboxList.append( maybeMailbox );
01214 
01215     eatCFWS( scursor, send, isCRLF );
01216     // premature end:
01217     if ( scursor == send ) {
01218       return false;
01219     }
01220     // regular end of the list:
01221     if ( *scursor == ';' ) {
01222       scursor++;
01223       return true;
01224     }
01225     // eat regular list entry separator:
01226     if ( *scursor == ',' ) {
01227       scursor++;
01228     }
01229   }
01230   return false;
01231 }
01232 
01233 bool parseAddress( const char* &scursor, const char * const send,
01234                    Address &result, bool isCRLF )
01235 {
01236   // address       := mailbox / group
01237 
01238   eatCFWS( scursor, send, isCRLF );
01239   if ( scursor == send ) {
01240     return false;
01241   }
01242 
01243   // first try if it's a single mailbox:
01244   Mailbox maybeMailbox;
01245   const char * oldscursor = scursor;
01246   if ( parseMailbox( scursor, send, maybeMailbox, isCRLF ) ) {
01247     // yes, it is:
01248     result.displayName.clear();
01249     result.mailboxList.append( maybeMailbox );
01250     return true;
01251   }
01252   scursor = oldscursor;
01253 
01254   Address maybeAddress;
01255 
01256   // no, it's not a single mailbox. Try if it's a group:
01257   if ( !parseGroup( scursor, send, maybeAddress, isCRLF ) ) {
01258     return false;
01259   }
01260 
01261   result = maybeAddress;
01262   return true;
01263 }
01264 
01265 bool parseAddressList( const char* &scursor, const char * const send,
01266                        AddressList &result, bool isCRLF )
01267 {
01268   while ( scursor != send ) {
01269     eatCFWS( scursor, send, isCRLF );
01270     // end of header: this is OK.
01271     if ( scursor == send ) {
01272       return true;
01273     }
01274     // empty entry: ignore:
01275     if ( *scursor == ',' ) {
01276       scursor++;
01277       continue;
01278     }
01279     // broken clients might use ';' as list delimiter, accept that as well
01280     if ( *scursor == ';' ) {
01281       scursor++;
01282       continue;
01283     }
01284 
01285     // parse one entry
01286     Address maybeAddress;
01287     if ( !parseAddress( scursor, send, maybeAddress, isCRLF ) ) {
01288       return false;
01289     }
01290     result.append( maybeAddress );
01291 
01292     eatCFWS( scursor, send, isCRLF );
01293     // end of header: this is OK.
01294     if ( scursor == send ) {
01295       return true;
01296     }
01297     // comma separating entries: eat it.
01298     if ( *scursor == ',' ) {
01299       scursor++;
01300     }
01301   }
01302   return true;
01303 }
01304 
01305 static QString asterisk = QString::fromLatin1( "*0*", 1 );
01306 static QString asteriskZero = QString::fromLatin1( "*0*", 2 );
01307 //static QString asteriskZeroAsterisk = QString::fromLatin1( "*0*", 3 );
01308 
01309 bool parseParameter( const char* &scursor, const char * const send,
01310                      QPair<QString,QStringOrQPair> &result, bool isCRLF )
01311 {
01312   // parameter = regular-parameter / extended-parameter
01313   // regular-parameter = regular-parameter-name "=" value
01314   // extended-parameter =
01315   // value = token / quoted-string
01316   //
01317   // note that rfc2231 handling is out of the scope of this function.
01318   // Therefore we return the attribute as QString and the value as
01319   // (start,length) tupel if we see that the value is encoded
01320   // (trailing asterisk), for parseParameterList to decode...
01321 
01322   eatCFWS( scursor, send, isCRLF );
01323   if ( scursor == send ) {
01324     return false;
01325   }
01326 
01327   //
01328   // parse the parameter name:
01329   //
01330   QString maybeAttribute;
01331   if ( !parseToken( scursor, send, maybeAttribute, false /* no 8bit */ ) ) {
01332     return false;
01333   }
01334 
01335   eatCFWS( scursor, send, isCRLF );
01336   // premature end: not OK (haven't seen '=' yet).
01337   if ( scursor == send || *scursor != '=' ) {
01338     return false;
01339   }
01340   scursor++; // eat '='
01341 
01342   eatCFWS( scursor, send, isCRLF );
01343   if ( scursor == send ) {
01344     // don't choke on attribute=, meaning the value was omitted:
01345     if ( maybeAttribute.endsWith( asterisk ) ) {
01346       KMIME_WARN << "attribute ends with \"*\", but value is empty!"
01347         "Chopping away \"*\".";
01348       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01349     }
01350     result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01351     return true;
01352   }
01353 
01354   const char * oldscursor = scursor;
01355 
01356   //
01357   // parse the parameter value:
01358   //
01359   QStringOrQPair maybeValue;
01360   if ( *scursor == '"' ) {
01361     // value is a quoted-string:
01362     scursor++;
01363     if ( maybeAttribute.endsWith( asterisk ) ) {
01364       // attributes ending with "*" designate extended-parameters,
01365       // which cannot have quoted-strings as values. So we remove the
01366       // trailing "*" to not confuse upper layers.
01367       KMIME_WARN << "attribute ends with \"*\", but value is a quoted-string!"
01368         "Chopping away \"*\".";
01369       maybeAttribute.truncate( maybeAttribute.length() - 1 );
01370     }
01371 
01372     if ( !parseGenericQuotedString( scursor, send, maybeValue.qstring, isCRLF ) ) {
01373       scursor = oldscursor;
01374       result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01375       return false; // this case needs further processing by upper layers!!
01376     }
01377   } else {
01378     // value is a token:
01379     if ( !parseToken( scursor, send, maybeValue.qpair, false /* no 8bit */ ) ) {
01380       scursor = oldscursor;
01381       result = qMakePair( maybeAttribute.toLower(), QStringOrQPair() );
01382       return false; // this case needs further processing by upper layers!!
01383     }
01384   }
01385 
01386   result = qMakePair( maybeAttribute.toLower(), maybeValue );
01387   return true;
01388 }
01389 
01390 bool parseRawParameterList( const char* &scursor, const char * const send,
01391                             QMap<QString,QStringOrQPair> &result,
01392                             bool isCRLF )
01393 {
01394   // we use parseParameter() consecutively to obtain a map of raw
01395   // attributes to raw values. "Raw" here means that we don't do
01396   // rfc2231 decoding and concatenation. This is left to
01397   // parseParameterList(), which will call this function.
01398   //
01399   // The main reason for making this chunk of code a separate
01400   // (private) method is that we can deal with broken parameters
01401   // _here_ and leave the rfc2231 handling solely to
01402   // parseParameterList(), which will still be enough work.
01403 
01404   while ( scursor != send ) {
01405     eatCFWS( scursor, send, isCRLF );
01406     // empty entry ending the list: OK.
01407     if ( scursor == send ) {
01408       return true;
01409     }
01410     // empty list entry: ignore.
01411     if ( *scursor == ';' ) {
01412       scursor++;
01413       continue;
01414     }
01415 
01416     QPair<QString,QStringOrQPair> maybeParameter;
01417     if ( !parseParameter( scursor, send, maybeParameter, isCRLF ) ) {
01418       // we need to do a bit of work if the attribute is not
01419       // NULL. These are the cases marked with "needs further
01420       // processing" in parseParameter(). Specifically, parsing of the
01421       // token or the quoted-string, which should represent the value,
01422       // failed. We take the easy way out and simply search for the
01423       // next ';' to start parsing again. (Another option would be to
01424       // take the text between '=' and ';' as value)
01425       if ( maybeParameter.first.isNull() ) {
01426         return false;
01427       }
01428       while ( scursor != send ) {
01429         if ( *scursor++ == ';' ) {
01430           goto IS_SEMICOLON;
01431         }
01432       }
01433       // scursor == send case: end of list.
01434       return true;
01435     IS_SEMICOLON:
01436       // *scursor == ';' case: parse next entry.
01437       continue;
01438     }
01439     // successful parsing brings us here:
01440     result.insert( maybeParameter.first, maybeParameter.second );
01441 
01442     eatCFWS( scursor, send, isCRLF );
01443     // end of header: ends list.
01444     if ( scursor == send ) {
01445       return true;
01446     }
01447     // regular separator: eat it.
01448     if ( *scursor == ';' ) {
01449       scursor++;
01450     }
01451   }
01452   return true;
01453 }
01454 
01455 static void decodeRFC2231Value( Codec* &rfc2231Codec,
01456                                 QTextCodec* &textcodec,
01457                                 bool isContinuation, QString &value,
01458                                 QPair<const char*,int> &source )
01459 {
01460   //
01461   // parse the raw value into (charset,language,text):
01462   //
01463 
01464   const char * decBegin = source.first;
01465   const char * decCursor = decBegin;
01466   const char * decEnd = decCursor + source.second;
01467 
01468   if ( !isContinuation ) {
01469     // find the first single quote
01470     while ( decCursor != decEnd ) {
01471       if ( *decCursor == '\'' ) {
01472         break;
01473       } else {
01474         decCursor++;
01475       }
01476     }
01477 
01478     if ( decCursor == decEnd ) {
01479       // there wasn't a single single quote at all!
01480       // take the whole value to be in latin-1:
01481       KMIME_WARN << "No charset in extended-initial-value."
01482         "Assuming \"iso-8859-1\".";
01483       value += QString::fromLatin1( decBegin, source.second );
01484       return;
01485     }
01486 
01487     QByteArray charset( decBegin, decCursor - decBegin );
01488 
01489     const char * oldDecCursor = ++decCursor;
01490     // find the second single quote (we ignore the language tag):
01491     while ( decCursor != decEnd ) {
01492       if ( *decCursor == '\'' ) {
01493         break;
01494       } else {
01495         decCursor++;
01496       }
01497     }
01498     if ( decCursor == decEnd ) {
01499       KMIME_WARN << "No language in extended-initial-value."
01500         "Trying to recover.";
01501       decCursor = oldDecCursor;
01502     } else {
01503       decCursor++;
01504     }
01505 
01506     // decCursor now points to the start of the
01507     // "extended-other-values":
01508 
01509     //
01510     // get the decoders:
01511     //
01512 
01513     bool matchOK = false;
01514     textcodec = KGlobal::charsets()->codecForName( charset, matchOK );
01515     if ( !matchOK ) {
01516       textcodec = 0;
01517       KMIME_WARN_UNKNOWN( Charset, charset );
01518     }
01519   }
01520 
01521   if ( !rfc2231Codec ) {
01522     rfc2231Codec = Codec::codecForName("x-kmime-rfc2231");
01523     assert( rfc2231Codec );
01524   }
01525 
01526   if ( !textcodec ) {
01527     value += QString::fromLatin1( decCursor, decEnd - decCursor );
01528     return;
01529   }
01530 
01531   Decoder * dec = rfc2231Codec->makeDecoder();
01532   assert( dec );
01533 
01534   //
01535   // do the decoding:
01536   //
01537 
01538   QByteArray buffer;
01539   buffer.resize( rfc2231Codec->maxDecodedSizeFor( decEnd - decCursor ) );
01540   QByteArray::Iterator bit = buffer.begin();
01541   QByteArray::ConstIterator bend = buffer.end();
01542 
01543   if ( !dec->decode( decCursor, decEnd, bit, bend ) ) {
01544     KMIME_WARN << rfc2231Codec->name()
01545                << "codec lies about its maxDecodedSizeFor()" << endl
01546                << "result may be truncated";
01547   }
01548 
01549   value += textcodec->toUnicode( buffer.begin(), bit - buffer.begin() );
01550 
01551   // kDebug(5320) << "value now: \"" << value << "\"";
01552   // cleanup:
01553   delete dec;
01554 }
01555 
01556 // known issues:
01557 //  - permutes rfc2231 continuations when the total number of parts
01558 //    exceeds 10 (other-sections then becomes *xy, ie. two digits)
01559 
01560 bool parseParameterList( const char* &scursor, const char * const send,
01561                          QMap<QString,QString> &result, bool isCRLF )
01562 {
01563   // parse the list into raw attribute-value pairs:
01564   QMap<QString,QStringOrQPair> rawParameterList;
01565   if (!parseRawParameterList( scursor, send, rawParameterList, isCRLF ) ) {
01566     return false;
01567   }
01568 
01569   if ( rawParameterList.isEmpty() ) {
01570     return true;
01571   }
01572 
01573   // decode rfc 2231 continuations and alternate charset encoding:
01574 
01575   // NOTE: this code assumes that what QMapIterator delivers is sorted
01576   // by the key!
01577 
01578   Codec * rfc2231Codec = 0;
01579   QTextCodec * textcodec = 0;
01580   QString attribute;
01581   QString value;
01582   enum Modes {
01583     NoMode = 0x0, Continued = 0x1, Encoded = 0x2
01584   } mode;
01585 
01586   QMap<QString,QStringOrQPair>::Iterator it, end = rawParameterList.end();
01587 
01588   for ( it = rawParameterList.begin() ; it != end ; ++it ) {
01589     if ( attribute.isNull() || !it.key().startsWith( attribute ) ) {
01590       //
01591       // new attribute:
01592       //
01593 
01594       // store the last attribute/value pair in the result map now:
01595       if ( !attribute.isNull() ) {
01596         result.insert( attribute, value );
01597       }
01598       // and extract the information from the new raw attribute:
01599       value.clear();
01600       attribute = it.key();
01601       mode = NoMode;
01602       // is the value encoded?
01603       if ( attribute.endsWith( asterisk ) ) {
01604         attribute.truncate( attribute.length() - 1 );
01605         mode = (Modes) ((int) mode | Encoded);
01606       }
01607       // is the value continued?
01608       if ( attribute.endsWith( asteriskZero ) ) {
01609         attribute.truncate( attribute.length() - 2 );
01610         mode = (Modes) ((int) mode | Continued);
01611       }
01612       //
01613       // decode if necessary:
01614       //
01615       if ( mode & Encoded ) {
01616         decodeRFC2231Value( rfc2231Codec, textcodec,
01617                             false, /* isn't continuation */
01618                             value, (*it).qpair );
01619       } else {
01620         // not encoded.
01621         if ( (*it).qpair.first ) {
01622           value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01623         } else {
01624           value += (*it).qstring;
01625         }
01626       }
01627 
01628       //
01629       // shortcut-processing when the value isn't encoded:
01630       //
01631 
01632       if ( !(mode & Continued) ) {
01633         // save result already:
01634         result.insert( attribute, value );
01635         // force begin of a new attribute:
01636         attribute.clear();
01637       }
01638     } else { // it.key().startsWith( attribute )
01639       //
01640       // continuation
01641       //
01642 
01643       // ignore the section and trust QMap to have sorted the keys:
01644       if ( it.key().endsWith( asterisk ) ) {
01645         // encoded
01646         decodeRFC2231Value( rfc2231Codec, textcodec,
01647                             true, /* is continuation */
01648                             value, (*it).qpair );
01649       } else {
01650         // not encoded
01651         if ( (*it).qpair.first ) {
01652           value += QString::fromLatin1( (*it).qpair.first, (*it).qpair.second );
01653         } else {
01654           value += (*it).qstring;
01655         }
01656       }
01657     }
01658   }
01659 
01660   // write last attr/value pair:
01661   if ( !attribute.isNull() ) {
01662     result.insert( attribute, value );
01663   }
01664 
01665   return true;
01666 }
01667 
01668 static const char * const stdDayNames[] = {
01669   "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
01670 };
01671 static const int stdDayNamesLen = sizeof stdDayNames / sizeof *stdDayNames;
01672 
01673 static bool parseDayName( const char* &scursor, const char * const send )
01674 {
01675   // check bounds:
01676   if ( send - scursor < 3 ) {
01677     return false;
01678   }
01679 
01680   for ( int i = 0 ; i < stdDayNamesLen ; ++i ) {
01681     if ( qstrnicmp( scursor, stdDayNames[i], 3 ) == 0 ) {
01682       scursor += 3;
01683       // kDebug(5320) << "found" << stdDayNames[i];
01684       return true;
01685     }
01686   }
01687 
01688   return false;
01689 }
01690 
01691 static const char * const stdMonthNames[] = {
01692   "Jan", "Feb", "Mar", "Apr", "May", "Jun",
01693   "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
01694 };
01695 static const int stdMonthNamesLen =
01696                               sizeof stdMonthNames / sizeof *stdMonthNames;
01697 
01698 static bool parseMonthName( const char* &scursor, const char * const send,
01699                             int &result )
01700 {
01701   // check bounds:
01702   if ( send - scursor < 3 ) {
01703     return false;
01704   }
01705 
01706   for ( result = 0 ; result < stdMonthNamesLen ; ++result ) {
01707     if ( qstrnicmp( scursor, stdMonthNames[result], 3 ) == 0 ) {
01708       scursor += 3;
01709       return true;
01710     }
01711   }
01712 
01713   // not found:
01714   return false;
01715 }
01716 
01717 static const struct {
01718   const char * tzName;
01719   long int secsEastOfGMT;
01720 } timeZones[] = {
01721   // rfc 822 timezones:
01722   { "GMT", 0 },
01723   { "UT", 0 },
01724   { "EDT", -4*3600 },
01725   { "EST", -5*3600 },
01726   { "MST", -5*3600 },
01727   { "CST", -6*3600 },
01728   { "MDT", -6*3600 },
01729   { "MST", -7*3600 },
01730   { "PDT", -7*3600 },
01731   { "PST", -8*3600 },
01732   // common, non-rfc-822 zones:
01733   { "CET", 1*3600 },
01734   { "MET", 1*3600 },
01735   { "UTC", 0 },
01736   { "CEST", 2*3600 },
01737   { "BST", 1*3600 },
01738   // rfc 822 military timezones:
01739   { "Z", 0 },
01740   { "A", -1*3600 },
01741   { "B", -2*3600 },
01742   { "C", -3*3600 },
01743   { "D", -4*3600 },
01744   { "E", -5*3600 },
01745   { "F", -6*3600 },
01746   { "G", -7*3600 },
01747   { "H", -8*3600 },
01748   { "I", -9*3600 },
01749   // J is not used!
01750   { "K", -10*3600 },
01751   { "L", -11*3600 },
01752   { "M", -12*3600 },
01753   { "N", 1*3600 },
01754   { "O", 2*3600 },
01755   { "P", 3*3600 },
01756   { "Q", 4*3600 },
01757   { "R", 5*3600 },
01758   { "S", 6*3600 },
01759   { "T", 7*3600 },
01760   { "U", 8*3600 },
01761   { "V", 9*3600 },
01762   { "W", 10*3600 },
01763   { "X", 11*3600 },
01764   { "Y", 12*3600 },
01765 };
01766 static const int timeZonesLen = sizeof timeZones / sizeof *timeZones;
01767 
01768 static bool parseAlphaNumericTimeZone( const char* &scursor,
01769                                        const char * const send,
01770                                        long int &secsEastOfGMT,
01771                                        bool &timeZoneKnown )
01772 {
01773   QPair<const char*,int> maybeTimeZone( 0, 0 );
01774   if ( !parseToken( scursor, send, maybeTimeZone, false /*no 8bit*/ ) ) {
01775     return false;
01776   }
01777   for ( int i = 0 ; i < timeZonesLen ; ++i ) {
01778     if ( qstrnicmp( timeZones[i].tzName,
01779                     maybeTimeZone.first, maybeTimeZone.second ) == 0 ) {
01780       scursor += maybeTimeZone.second;
01781       secsEastOfGMT = timeZones[i].secsEastOfGMT;
01782       timeZoneKnown = true;
01783       return true;
01784     }
01785   }
01786 
01787   // don't choke just because we don't happen to know the time zone
01788   KMIME_WARN_UNKNOWN( time zone,
01789                       QByteArray( maybeTimeZone.first, maybeTimeZone.second ) );
01790   secsEastOfGMT = 0;
01791   timeZoneKnown = false;
01792   return true;
01793 }
01794 
01795 // parse a number and return the number of digits parsed:
01796 int parseDigits( const char* &scursor, const char * const send, int &result )
01797 {
01798   result = 0;
01799   int digits = 0;
01800   for ( ; scursor != send && isdigit( *scursor ) ; scursor++, digits++ ) {
01801     result *= 10;
01802     result += int( *scursor - '0' );
01803   }
01804   return digits;
01805 }
01806 
01807 static bool parseTimeOfDay( const char* &scursor, const char * const send,
01808                             int &hour, int &min, int &sec, bool isCRLF=false )
01809 {
01810   // time-of-day := 2DIGIT [CFWS] ":" [CFWS] 2DIGIT [ [CFWS] ":" 2DIGIT ]
01811 
01812   //
01813   // 2DIGIT representing "hour":
01814   //
01815   if ( !parseDigits( scursor, send, hour ) ) {
01816     return false;
01817   }
01818 
01819   eatCFWS( scursor, send, isCRLF );
01820   if ( scursor == send || *scursor != ':' ) {
01821     return false;
01822   }
01823   scursor++; // eat ':'
01824 
01825   eatCFWS( scursor, send, isCRLF );
01826   if ( scursor == send ) {
01827     return false;
01828   }
01829 
01830   //
01831   // 2DIGIT representing "minute":
01832   //
01833   if ( !parseDigits( scursor, send, min ) ) {
01834     return false;
01835   }
01836 
01837   eatCFWS( scursor, send, isCRLF );
01838   if ( scursor == send ) {
01839     return true; // seconds are optional
01840   }
01841 
01842   //
01843   // let's see if we have a 2DIGIT representing "second":
01844   //
01845   if ( *scursor == ':' ) {
01846     // yepp, there are seconds:
01847     scursor++; // eat ':'
01848     eatCFWS( scursor, send, isCRLF );
01849     if ( scursor == send ) {
01850       return false;
01851     }
01852 
01853     if ( !parseDigits( scursor, send, sec ) ) {
01854       return false;
01855     }
01856   } else {
01857     sec = 0;
01858   }
01859 
01860   return true;
01861 }
01862 
01863 bool parseTime( const char* &scursor, const char * send,
01864                 int &hour, int &min, int &sec, long int &secsEastOfGMT,
01865                 bool &timeZoneKnown, bool isCRLF )
01866 {
01867   // time := time-of-day CFWS ( zone / obs-zone )
01868   //
01869   // obs-zone    := "UT" / "GMT" /
01870   //                "EST" / "EDT" / ; -0500 / -0400
01871   //                "CST" / "CDT" / ; -0600 / -0500
01872   //                "MST" / "MDT" / ; -0700 / -0600
01873   //                "PST" / "PDT" / ; -0800 / -0700
01874   //                "A"-"I" / "a"-"i" /
01875   //                "K"-"Z" / "k"-"z"
01876 
01877   eatCFWS( scursor, send, isCRLF );
01878   if ( scursor == send ) {
01879     return false;
01880   }
01881 
01882   if ( !parseTimeOfDay( scursor, send, hour, min, sec, isCRLF ) ) {
01883     return false;
01884   }
01885 
01886   eatCFWS( scursor, send, isCRLF );
01887   if ( scursor == send ) {
01888     timeZoneKnown = false;
01889     secsEastOfGMT = 0;
01890     return true; // allow missing timezone
01891   }
01892 
01893   timeZoneKnown = true;
01894   if ( *scursor == '+' || *scursor == '-' ) {
01895     // remember and eat '-'/'+':
01896     const char sign = *scursor++;
01897     // numerical timezone:
01898     int maybeTimeZone;
01899     if ( parseDigits( scursor, send, maybeTimeZone ) != 4 ) {
01900       return false;
01901     }
01902     secsEastOfGMT = 60 * ( maybeTimeZone / 100 * 60 + maybeTimeZone % 100 );
01903     if ( sign == '-' ) {
01904       secsEastOfGMT *= -1;
01905       if ( secsEastOfGMT == 0 ) {
01906         timeZoneKnown = false; // -0000 means indetermined tz
01907       }
01908     }
01909   } else {
01910     // maybe alphanumeric timezone:
01911     if ( !parseAlphaNumericTimeZone( scursor, send, secsEastOfGMT, timeZoneKnown ) ) {
01912       return false;
01913     }
01914   }
01915   return true;
01916 }
01917 
01918 bool parseDateTime( const char* &scursor, const char * const send,
01919                     KDateTime &result, bool isCRLF )
01920 {
01921   // Parsing date-time; strict mode:
01922   //
01923   // date-time   := [ [CFWS] day-name [CFWS] "," ]                      ; wday
01924   // (expanded)     [CFWS] 1*2DIGIT CFWS month-name CFWS 2*DIGIT [CFWS] ; date
01925   //                time
01926   //
01927   // day-name    := "Mon" / "Tue" / "Wed" / "Thu" / "Fri" / "Sat" / "Sun"
01928   // month-name  := "Jan" / "Feb" / "Mar" / "Apr" / "May" / "Jun" /
01929   //                "Jul" / "Aug" / "Sep" / "Oct" / "Nov" / "Dec"
01930 
01931   result = KDateTime();
01932   QDateTime maybeDateTime;
01933 
01934   eatCFWS( scursor, send, isCRLF );
01935   if ( scursor == send ) {
01936     return false;
01937   }
01938 
01939   //
01940   // let's see if there's a day-of-week:
01941   //
01942   if ( parseDayName( scursor, send ) ) {
01943     eatCFWS( scursor, send, isCRLF );
01944     if ( scursor == send ) {
01945       return false;
01946     }
01947     // day-name should be followed by ',' but we treat it as optional:
01948     if ( *scursor == ',' ) {
01949       scursor++; // eat ','
01950       eatCFWS( scursor, send, isCRLF );
01951     }
01952   }
01953 
01954   //
01955   // 1*2DIGIT representing "day" (of month):
01956   //
01957   int maybeDay;
01958   if ( !parseDigits( scursor, send, maybeDay ) ) {
01959     return false;
01960   }
01961 
01962   eatCFWS( scursor, send, isCRLF );
01963   if ( scursor == send ) {
01964     return false;
01965   }
01966 
01967   //
01968   // month-name:
01969   //
01970   int maybeMonth = 0;
01971   if ( !parseMonthName( scursor, send, maybeMonth ) ) {
01972     return false;
01973   }
01974   if ( scursor == send ) {
01975     return false;
01976   }
01977   assert( maybeMonth >= 0 ); assert( maybeMonth <= 11 );
01978   ++maybeMonth; // 0-11 -> 1-12
01979 
01980   eatCFWS( scursor, send, isCRLF );
01981   if ( scursor == send ) {
01982     return false;
01983   }
01984 
01985   //
01986   // 2*DIGIT representing "year":
01987   //
01988   int maybeYear;
01989   if ( !parseDigits( scursor, send, maybeYear ) ) {
01990     return false;
01991   }
01992   // RFC 2822 4.3 processing:
01993   if ( maybeYear < 50 ) {
01994     maybeYear += 2000;
01995   } else if ( maybeYear < 1000 ) {
01996     maybeYear += 1900;
01997   }
01998   // else keep as is
01999   if ( maybeYear < 1900 ) {
02000     return false; // rfc2822, 3.3
02001   }
02002 
02003   eatCFWS( scursor, send, isCRLF );
02004   if ( scursor == send ) {
02005     return false;
02006   }
02007 
02008   maybeDateTime.setDate( QDate( maybeYear, maybeMonth, maybeDay ) );
02009 
02010   //
02011   // time
02012   //
02013   int maybeHour, maybeMinute, maybeSecond;
02014   long int secsEastOfGMT;
02015   bool timeZoneKnown = true;
02016 
02017   if ( !parseTime( scursor, send,
02018                    maybeHour, maybeMinute, maybeSecond,
02019                    secsEastOfGMT, timeZoneKnown, isCRLF ) ) {
02020     return false;
02021   }
02022 
02023   maybeDateTime.setTime( QTime( maybeHour, maybeMinute, maybeSecond ) );
02024   if ( !maybeDateTime.isValid() )
02025     return false;
02026 
02027   result = KDateTime( maybeDateTime, KDateTime::Spec( KDateTime::OffsetFromUTC, secsEastOfGMT ) );
02028   if ( !result.isValid() )
02029     return false;
02030   return true;
02031 }
02032 
02033 } // namespace HeaderParsing
02034 
02035 } // namespace KMime

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  • kabc
  • kblog
  • kcal
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  • kldap
  • kmime
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal