• Skip to content
  • Skip to link menu
KDE 4.3 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KIMAP Library

rfccodecs.cpp

Go to the documentation of this file.
00001 /**********************************************************************
00002  *
00003  *   rfccodecs.cpp - handler for various rfc/mime encodings
00004  *   Copyright (C) 2000 s.carstens@gmx.de
00005  *
00006  *   This library is free software; you can redistribute it and/or
00007  *   modify it under the terms of the GNU Library General Public
00008  *   License as published by the Free Software Foundation; either
00009  *   version 2 of the License, or (at your option) any later version.
00010  *
00011  *   This library is distributed in the hope that it will be useful,
00012  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014  *   Library General Public License for more details.
00015  *
00016  *   You should have received a copy of the GNU Library General Public License
00017  *   along with this library; see the file COPYING.LIB.  If not, write to
00018  *   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019  *   Boston, MA 02110-1301, USA.
00020  *
00021  *********************************************************************/
00033 #include "rfccodecs.h"
00034 
00035 #include <ctype.h>
00036 #include <sys/types.h>
00037 
00038 #include <stdio.h>
00039 #include <stdlib.h>
00040 
00041 #include <QtCore/QTextCodec>
00042 #include <QtCore/QBuffer>
00043 #include <QtCore/QRegExp>
00044 #include <QtCore/QByteArray>
00045 #include <QtCore/QLatin1Char>
00046 #include <kcodecs.h>
00047 
00048 using namespace KIMAP;
00049 
00050 // This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
00051 // adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
00052 
00053 //@cond PRIVATE
00054 static const unsigned char base64chars[] =
00055   "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
00056 #define UNDEFINED 64
00057 #define MAXLINE  76
00058 static const char especials[17] = "()<>@,;:\"/[]?.= ";
00059 
00060 /* UTF16 definitions */
00061 #define UTF16MASK       0x03FFUL
00062 #define UTF16SHIFT      10
00063 #define UTF16BASE       0x10000UL
00064 #define UTF16HIGHSTART  0xD800UL
00065 #define UTF16HIGHEND    0xDBFFUL
00066 #define UTF16LOSTART    0xDC00UL
00067 #define UTF16LOEND      0xDFFFUL
00068 //@endcond
00069 
00070 //-----------------------------------------------------------------------------
00071 QByteArray KIMAP::decodeImapFolderName( const QByteArray &inSrc )
00072 {
00073   unsigned char c, i, bitcount;
00074   unsigned long ucs4, utf16, bitbuf;
00075   unsigned char base64[256], utf8[6];
00076   unsigned int srcPtr = 0;
00077   QByteArray dst;
00078   QByteArray src = inSrc;
00079   uint srcLen = inSrc.length();
00080 
00081   /* initialize modified base64 decoding table */
00082   memset( base64, UNDEFINED, sizeof( base64 ) );
00083   for ( i = 0; i < sizeof( base64chars ); ++i ) {
00084     base64[(int)base64chars[i]] = i;
00085   }
00086 
00087   /* loop until end of string */
00088   while ( srcPtr < srcLen ) {
00089     c = src[srcPtr++];
00090     /* deal with literal characters and &- */
00091     if ( c != '&' || src[srcPtr] == '-' ) {
00092       /* encode literally */
00093       dst += c;
00094       /* skip over the '-' if this is an &- sequence */
00095       if ( c == '&' ) {
00096         srcPtr++;
00097       }
00098     } else {
00099       /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
00100       bitbuf = 0;
00101       bitcount = 0;
00102       ucs4 = 0;
00103       while ( ( c = base64[(unsigned char)src[srcPtr]] ) != UNDEFINED ) {
00104         ++srcPtr;
00105         bitbuf = ( bitbuf << 6 ) | c;
00106         bitcount += 6;
00107         /* enough bits for a UTF-16 character? */
00108         if ( bitcount >= 16 ) {
00109           bitcount -= 16;
00110           utf16 = ( bitcount ? bitbuf >> bitcount : bitbuf ) & 0xffff;
00111           /* convert UTF16 to UCS4 */
00112           if ( utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND ) {
00113             ucs4 = ( utf16 - UTF16HIGHSTART ) << UTF16SHIFT;
00114             continue;
00115           } else if ( utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND ) {
00116             ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
00117           } else {
00118             ucs4 = utf16;
00119           }
00120           /* convert UTF-16 range of UCS4 to UTF-8 */
00121           if ( ucs4 <= 0x7fUL ) {
00122             utf8[0] = ucs4;
00123             i = 1;
00124           } else if ( ucs4 <= 0x7ffUL ) {
00125             utf8[0] = 0xc0 | ( ucs4 >> 6 );
00126             utf8[1] = 0x80 | ( ucs4 & 0x3f );
00127             i = 2;
00128           } else if ( ucs4 <= 0xffffUL ) {
00129             utf8[0] = 0xe0 | ( ucs4 >> 12 );
00130             utf8[1] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
00131             utf8[2] = 0x80 | ( ucs4 & 0x3f );
00132             i = 3;
00133           } else {
00134             utf8[0] = 0xf0 | ( ucs4 >> 18 );
00135             utf8[1] = 0x80 | ( ( ucs4 >> 12 ) & 0x3f );
00136             utf8[2] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
00137             utf8[3] = 0x80 | ( ucs4 & 0x3f );
00138             i = 4;
00139           }
00140           /* copy it */
00141           for ( c = 0; c < i; ++c ) {
00142             dst += utf8[c];
00143           }
00144         }
00145       }
00146       /* skip over trailing '-' in modified UTF-7 encoding */
00147       if ( src[srcPtr] == '-' ) {
00148         ++srcPtr;
00149       }
00150     }
00151   }
00152   return dst;
00153 }
00154 
00155 QString KIMAP::decodeImapFolderName( const QString &inSrc )
00156 {
00157   return QString::fromUtf8( decodeImapFolderName( inSrc.toUtf8() ).data() );
00158 }
00159 
00160 //-----------------------------------------------------------------------------
00161 
00162 QByteArray KIMAP::quoteIMAP( const QByteArray &src )
00163 {
00164   uint len = src.length();
00165   QByteArray result;
00166   result.reserve( 2 * len );
00167   for ( unsigned int i = 0; i < len; i++ ) {
00168     if ( src[i] == '"' || src[i] == '\\' ) {
00169       result += '\\';
00170     }
00171     result += src[i];
00172   }
00173   result.squeeze();
00174   return result;
00175 }
00176 
00177 QString KIMAP::quoteIMAP( const QString &src )
00178 {
00179   uint len = src.length();
00180   QString result;
00181   result.reserve( 2 * len );
00182   for ( unsigned int i = 0; i < len; i++ ) {
00183     if ( src[i] == '"' || src[i] == '\\' ) {
00184       result += '\\';
00185     }
00186     result += src[i];
00187   }
00188   //result.squeeze(); - unnecessary and slow
00189   return result;
00190 }
00191 
00192 //-----------------------------------------------------------------------------
00193 QString KIMAP::encodeImapFolderName( const QString &inSrc )
00194 {
00195   return QString::fromUtf8( encodeImapFolderName( inSrc.toUtf8() ).data() );
00196 }
00197 
00198 QByteArray KIMAP::encodeImapFolderName( const QByteArray &inSrc )
00199 {
00200   unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
00201   unsigned int ucs4, bitbuf;
00202   QByteArray src = inSrc;
00203   QByteArray dst;
00204 
00205   int srcPtr = 0;
00206   utf7mode = 0;
00207   utf8total = 0;
00208   bitstogo = 0;
00209   utf8pos = 0;
00210   bitbuf = 0;
00211   ucs4 = 0;
00212   while ( srcPtr < src.length () ) {
00213     c = (unsigned char)src[srcPtr++];
00214     /* normal character? */
00215     if ( c >= ' ' && c <= '~' ) {
00216       /* switch out of UTF-7 mode */
00217       if ( utf7mode ) {
00218         if ( bitstogo ) {
00219           dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
00220           bitstogo = 0;
00221         }
00222         dst += '-';
00223         utf7mode = 0;
00224       }
00225       dst += c;
00226       /* encode '&' as '&-' */
00227       if ( c == '&' ) {
00228         dst += '-';
00229       }
00230       continue;
00231     }
00232     /* switch to UTF-7 mode */
00233     if ( !utf7mode ) {
00234       dst += '&';
00235       utf7mode = 1;
00236     }
00237     /* Encode US-ASCII characters as themselves */
00238     if ( c < 0x80 ) {
00239       ucs4 = c;
00240       utf8total = 1;
00241     } else if ( utf8total ) {
00242       /* save UTF8 bits into UCS4 */
00243       ucs4 = ( ucs4 << 6 ) | ( c & 0x3FUL );
00244       if ( ++utf8pos < utf8total ) {
00245         continue;
00246       }
00247     } else {
00248       utf8pos = 1;
00249       if ( c < 0xE0 ) {
00250         utf8total = 2;
00251         ucs4 = c & 0x1F;
00252       } else if ( c < 0xF0 ) {
00253         utf8total = 3;
00254         ucs4 = c & 0x0F;
00255       } else {
00256         /* NOTE: can't convert UTF8 sequences longer than 4 */
00257         utf8total = 4;
00258         ucs4 = c & 0x03;
00259       }
00260       continue;
00261     }
00262     /* loop to split ucs4 into two utf16 chars if necessary */
00263     utf8total = 0;
00264     do
00265     {
00266       if ( ucs4 >= UTF16BASE ) {
00267         ucs4 -= UTF16BASE;
00268         bitbuf =
00269           ( bitbuf << 16 ) | ( ( ucs4 >> UTF16SHIFT ) + UTF16HIGHSTART );
00270         ucs4 = ( ucs4 & UTF16MASK ) + UTF16LOSTART;
00271         utf16flag = 1;
00272       } else {
00273         bitbuf = ( bitbuf << 16 ) | ucs4;
00274         utf16flag = 0;
00275       }
00276       bitstogo += 16;
00277       /* spew out base64 */
00278       while ( bitstogo >= 6 ) {
00279         bitstogo -= 6;
00280         dst +=
00281           base64chars[( bitstogo ? ( bitbuf >> bitstogo ) : bitbuf ) & 0x3F];
00282       }
00283     }
00284     while ( utf16flag );
00285   }
00286   /* if in UTF-7 mode, finish in ASCII */
00287   if ( utf7mode ) {
00288     if ( bitstogo ) {
00289       dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
00290     }
00291     dst += '-';
00292   }
00293   return quoteIMAP( dst );
00294 }
00295 
00296 //-----------------------------------------------------------------------------
00297 QTextCodec *KIMAP::codecForName( const QString &str )
00298 {
00299   if ( str.isEmpty () ) {
00300     return 0;
00301   }
00302   return QTextCodec::codecForName ( str.toLower ().
00303                                     replace ( "windows", "cp" ).toLatin1 () );
00304 }
00305 
00306 //-----------------------------------------------------------------------------
00307 const QString KIMAP::decodeRFC2047String( const QString &str )
00308 {
00309   QString throw_away;
00310 
00311   return decodeRFC2047String( str, throw_away );
00312 }
00313 
00314 //-----------------------------------------------------------------------------
00315 const QString KIMAP::decodeRFC2047String( const QString &str,
00316                                           QString &charset )
00317 {
00318   QString throw_away;
00319 
00320   return decodeRFC2047String( str, charset, throw_away );
00321 }
00322 
00323 //-----------------------------------------------------------------------------
00324 const QString KIMAP::decodeRFC2047String( const QString &str,
00325                                           QString &charset,
00326                                           QString &language )
00327 {
00328   //do we have a rfc string
00329   if ( !str.contains( "=?" ) ) {
00330     return str;
00331   }
00332 
00333   // FIXME get rid of the conversion?
00334   QByteArray aStr = str.toAscii ();  // QString.length() means Unicode chars
00335   QByteArray result;
00336   char *pos, *beg, *end, *mid = 0;
00337   QByteArray cstr;
00338   char encoding = 0, ch;
00339   bool valid;
00340   const int maxLen = 200;
00341   int i;
00342 
00343 //  result.truncate(aStr.length());
00344   for ( pos = aStr.data (); *pos; pos++ ) {
00345     if ( pos[0] != '=' || pos[1] != '?' ) {
00346       result += *pos;
00347       continue;
00348     }
00349     beg = pos + 2;
00350     end = beg;
00351     valid = true;
00352     // parse charset name
00353     for ( i = 2, pos += 2;
00354           i < maxLen &&
00355               ( *pos != '?' && ( ispunct( *pos ) || isalnum ( *pos ) ) );
00356           i++ )
00357       pos++;
00358     if ( *pos != '?' || i < 4 || i >= maxLen ) {
00359       valid = false;
00360     } else {
00361       charset = QByteArray( beg, i - 1 );  // -2 + 1 for the zero
00362       int pt = charset.lastIndexOf( '*' );
00363       if ( pt != -1 ) {
00364         // save language for later usage
00365         language = charset.right( charset.length () - pt - 1 );
00366 
00367         // tie off language as defined in rfc2047
00368         charset.truncate( pt );
00369       }
00370       // get encoding and check delimiting question marks
00371       encoding = toupper( pos[1] );
00372       if ( pos[2] != '?' ||
00373            ( encoding != 'Q' && encoding != 'B' &&
00374              encoding != 'q' && encoding != 'b' ) ) {
00375         valid = false;
00376       }
00377       pos += 3;
00378       i += 3;
00379 //  kDebug() << "Charset:" << charset << "- Language:" << language << "-'" << pos << "'";
00380     }
00381     if ( valid ) {
00382       mid = pos;
00383       // search for end of encoded part
00384       while ( i < maxLen && *pos && !( *pos == '?' && *( pos + 1 ) == '=' ) ) {
00385         i++;
00386         pos++;
00387       }
00388       end = pos + 2;//end now points to the first char after the encoded string
00389       if ( i >= maxLen || !*pos ) {
00390         valid = false;
00391       }
00392     }
00393     if ( valid ) {
00394       ch = *pos;
00395       *pos = '\0';
00396       cstr = QByteArray (mid).left( (int)( mid - pos - 1 ) );
00397       if ( encoding == 'Q' ) {
00398         // decode quoted printable text
00399         for ( i = cstr.length () - 1; i >= 0; i-- ) {
00400           if ( cstr[i] == '_' ) {
00401             cstr[i] = ' ';
00402           }
00403         }
00404 //    kDebug() << "before QP '"
00405 //    << cstr << "'";
00406         cstr = KCodecs::quotedPrintableDecode( cstr );
00407 //    kDebug() << "after QP '"
00408 //    << cstr << "'";
00409       } else {
00410         // decode base64 text
00411         cstr = QByteArray::fromBase64( cstr );
00412       }
00413       *pos = ch;
00414       int len = cstr.length();
00415       for ( i = 0; i < len; i++ ) {
00416         result += cstr[i];
00417       }
00418 
00419       pos = end - 1;
00420     } else {
00421 //    kDebug() << "invalid";
00422       //result += "=?";
00423       //pos = beg -1; // because pos gets increased shortly afterwards
00424       pos = beg - 2;
00425       result += *pos++;
00426       result += *pos;
00427     }
00428   }
00429   if ( !charset.isEmpty () ) {
00430     QTextCodec *aCodec = codecForName( charset.toAscii () );
00431     if ( aCodec ) {
00432 //    kDebug() << "Codec is" << aCodec->name();
00433       return aCodec->toUnicode( result );
00434     }
00435   }
00436   return result;
00437 }
00438 
00439 //-----------------------------------------------------------------------------
00440 const QString KIMAP::encodeRFC2047String( const QString &str )
00441 {
00442   return encodeRFC2047String( str.toLatin1() );
00443 }
00444 
00445 //-----------------------------------------------------------------------------
00446 const QByteArray KIMAP::encodeRFC2047String( const QByteArray &str )
00447 {
00448   if ( str.isEmpty () ) {
00449     return str;
00450   }
00451 
00452   const signed char *latin =
00453     reinterpret_cast<const signed char *>
00454     ( str.data() ), *l, *start, *stop;
00455   char hexcode;
00456   int numQuotes, i;
00457   int rptr = 0;
00458   // My stats show this number results in 12 resize() out of 73,000
00459   int resultLen = 3 * str.length() / 2;
00460   QByteArray result( resultLen, '\0' );
00461 
00462   while ( *latin ) {
00463     l = latin;
00464     start = latin;
00465     while ( *l ) {
00466       if ( *l == 32 ) {
00467         start = l + 1;
00468       }
00469       if ( *l < 0 ) {
00470         break;
00471       }
00472       l++;
00473     }
00474     if ( *l ) {
00475       numQuotes = 1;
00476       while ( *l ) {
00477         /* The encoded word must be limited to 75 character */
00478         for ( i = 0; i < 16; i++ ) {
00479           if ( *l == especials[i] ) {
00480             numQuotes++;
00481           }
00482         }
00483         if ( *l < 0 ) {
00484           numQuotes++;
00485         }
00486         /* Stop after 58 = 75 - 17 characters or at "<user@host..." */
00487         if ( l - start + 2 * numQuotes >= 58 || *l == 60 ) {
00488           break;
00489         }
00490         l++;
00491       }
00492       if ( *l ) {
00493         stop = l - 1;
00494         while ( stop >= start && *stop != 32 ) {
00495           stop--;
00496         }
00497         if ( stop <= start ) {
00498           stop = l;
00499         }
00500       } else {
00501         stop = l;
00502       }
00503       if ( resultLen - rptr - 1 <= start -  latin + 1 + 16 ) {
00504         // =?iso-88...
00505         resultLen += ( start - latin + 1 ) * 2 + 20; // more space
00506         result.resize( resultLen );
00507       }
00508       while ( latin < start ) {
00509         result[rptr++] = *latin;
00510         latin++;
00511       }
00512       result.replace( rptr, 15, "=?iso-8859-1?q?" );
00513       rptr += 15;
00514       if ( resultLen - rptr - 1 <= 3 * ( stop - latin + 1 ) ) {
00515         resultLen += ( stop - latin + 1 ) * 4 + 20; // more space
00516         result.resize( resultLen );
00517       }
00518       while ( latin < stop ) {
00519         // can add up to 3 chars/iteration
00520         numQuotes = 0;
00521         for ( i = 0; i < 16; i++ ) {
00522           if ( *latin == especials[i] ) {
00523             numQuotes = 1;
00524           }
00525         }
00526         if ( *latin < 0 ) {
00527           numQuotes = 1;
00528         }
00529         if ( numQuotes ) {
00530           result[rptr++] = '=';
00531           hexcode = ( ( *latin & 0xF0 ) >> 4 ) + 48;
00532           if ( hexcode >= 58 ) {
00533             hexcode += 7;
00534           }
00535           result[rptr++] = hexcode;
00536           hexcode = ( *latin & 0x0F ) + 48;
00537           if ( hexcode >= 58 ) {
00538             hexcode += 7;
00539           }
00540           result[rptr++] = hexcode;
00541         } else {
00542           result[rptr++] = *latin;
00543         }
00544         latin++;
00545       }
00546       result[rptr++] = '?';
00547       result[rptr++] = '=';
00548     } else {
00549       while ( *latin ) {
00550         if ( rptr == resultLen - 1 ) {
00551           resultLen += 30;
00552           result.resize( resultLen );
00553         }
00554         result[rptr++] = *latin;
00555         latin++;
00556       }
00557     }
00558   }
00559   result[rptr] = 0;
00560   return result;
00561 }
00562 
00563 //-----------------------------------------------------------------------------
00564 const QString KIMAP::encodeRFC2231String( const QString &str )
00565 {
00566   if ( str.isEmpty () ) {
00567     return str;
00568   }
00569 
00570   signed char *latin = (signed char *)calloc( 1, str.length () + 1 );
00571   char *latin_us = (char *)latin;
00572   strcpy( latin_us, str.toLatin1 () );
00573   signed char *l = latin;
00574   char hexcode;
00575   int i;
00576   bool quote;
00577   while ( *l ) {
00578     if ( *l < 0 ) {
00579       break;
00580     }
00581     l++;
00582   }
00583   if ( !*l ) {
00584     free( latin );
00585     return str;
00586   }
00587   QByteArray result;
00588   l = latin;
00589   while ( *l ) {
00590     quote = *l < 0;
00591     for ( i = 0; i < 16; i++ ) {
00592       if ( *l == especials[i] ) {
00593         quote = true;
00594       }
00595     }
00596     if ( quote ) {
00597       result += '%';
00598       hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
00599       if ( hexcode >= 58 ) {
00600         hexcode += 7;
00601       }
00602       result += hexcode;
00603       hexcode = ( *l & 0x0F ) + 48;
00604       if ( hexcode >= 58 ) {
00605         hexcode += 7;
00606       }
00607       result += hexcode;
00608     } else {
00609       result += *l;
00610     }
00611     l++;
00612   }
00613   free( latin );
00614   return result;
00615 }
00616 
00617 //-----------------------------------------------------------------------------
00618 const QString KIMAP::decodeRFC2231String( const QString &str )
00619 {
00620   int p = str.indexOf ( '\'' );
00621 
00622   //see if it is an rfc string
00623   if ( p < 0 ) {
00624     return str;
00625   }
00626 
00627   int l = str.lastIndexOf( '\'' );
00628 
00629   //second is language
00630   if ( p >= l ) {
00631     return str;
00632   }
00633 
00634   //first is charset or empty
00635   QString charset = str.left ( p );
00636   QString st = str.mid ( l + 1 );
00637   QString language = str.mid ( p + 1, l - p - 1 );
00638 
00639   //kDebug() << "Charset:" << charset << "Language:" << language;
00640 
00641   char ch, ch2;
00642   p = 0;
00643   while ( p < (int) st.length () ) {
00644     if ( st.at( p ) == 37 ) {
00645       ch = st.at( p + 1 ).toLatin1 () - 48;
00646       if ( ch > 16 ) {
00647         ch -= 7;
00648       }
00649       ch2 = st.at( p + 2 ).toLatin1 () - 48;
00650       if ( ch2 > 16 ) {
00651         ch2 -= 7;
00652       }
00653       st.replace( p, 1, ch * 16 + ch2 );
00654       st.remove ( p + 1, 2 );
00655     }
00656     p++;
00657   }
00658   return st;
00659 }

KIMAP Library

Skip menu "KIMAP Library"
  • Main Page
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Class Members
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  • kabc
  • kblog
  • kcal
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  • kldap
  • kmime
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal