• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdepimlibs-4.8.5 API Reference
  • KDE Home
  • Contact Us
 

KMIME Library

kmime_util.cpp
00001 /*
00002   kmime_util.cpp
00003 
00004   KMime, the KDE Internet mail/usenet news message library.
00005   Copyright (c) 2001 the KMime authors.
00006   See file AUTHORS for details
00007 
00008   This library is free software; you can redistribute it and/or
00009   modify it under the terms of the GNU Library General Public
00010   License as published by the Free Software Foundation; either
00011   version 2 of the License, or (at your option) any later version.
00012 
00013   This library is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016   Library General Public License for more details.
00017 
00018   You should have received a copy of the GNU Library General Public License
00019   along with this library; see the file COPYING.LIB.  If not, write to
00020   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00021   Boston, MA 02110-1301, USA.
00022 */
00023 
00024 #include "kmime_util.h"
00025 #include "kmime_util_p.h"
00026 
00027 #include "kmime_charfreq.h"
00028 #include "kmime_codecs.h"
00029 #include "kmime_header_parsing.h"
00030 #include "kmime_message.h"
00031 #include "kmime_warning.h"
00032 
00033 #include <config-kmime.h>
00034 #include <kdefakes.h> // for strcasestr
00035 #include <kglobal.h>
00036 #include <klocale.h>
00037 #include <kcharsets.h>
00038 #include <kcodecs.h>
00039 #include <kdebug.h>
00040 
00041 #include <QtCore/QList>
00042 #include <QtCore/QString>
00043 #include <QtCore/QTextCodec>
00044 
00045 #include <ctype.h>
00046 #include <time.h>
00047 #include <stdlib.h>
00048 #include <unistd.h>
00049 #include <boost/concept_check.hpp>
00050 
00051 using namespace KMime;
00052 
00053 namespace KMime {
00054 
00055 QList<QByteArray> c_harsetCache;
00056 QList<QByteArray> l_anguageCache;
00057 QString f_allbackCharEnc;
00058 bool u_seOutlookEncoding = false;
00059 
00060 QByteArray cachedCharset( const QByteArray &name )
00061 {
00062   foreach ( const QByteArray& charset, c_harsetCache ) {
00063     if ( qstricmp( name.data(), charset.data() ) == 0 ) {
00064       return charset;
00065     }
00066   }
00067 
00068   c_harsetCache.append( name.toUpper() );
00069   //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00070   return c_harsetCache.last();
00071 }
00072 
00073 QByteArray cachedLanguage( const QByteArray &name )
00074 {
00075   foreach ( const QByteArray& language, l_anguageCache ) {
00076     if ( qstricmp( name.data(), language.data() ) == 0 ) {
00077       return language;
00078     }
00079   }
00080 
00081   l_anguageCache.append( name.toUpper() );
00082   //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00083   return l_anguageCache.last();
00084 }
00085 
00086 bool isUsAscii( const QString &s )
00087 {
00088   uint sLength = s.length();
00089   for ( uint i=0; i<sLength; i++ ) {
00090     if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii
00091       return false;
00092     }
00093   }
00094   return true;
00095 }
00096 
00097 QString nameForEncoding( Headers::contentEncoding enc )
00098 {
00099   switch( enc ) {
00100     case Headers::CE7Bit: return QString::fromLatin1( "7bit" );
00101     case Headers::CE8Bit: return QString::fromLatin1( "8bit" );
00102     case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" );
00103     case Headers::CEbase64: return QString::fromLatin1( "base64" );
00104     case Headers::CEuuenc: return QString::fromLatin1( "uuencode" );
00105     case Headers::CEbinary: return QString::fromLatin1( "binary" );
00106     default: return QString::fromLatin1( "unknown" );
00107   }
00108 }
00109 
00110 QList<Headers::contentEncoding> encodingsForData( const QByteArray &data )
00111 {
00112   QList<Headers::contentEncoding> allowed;
00113   CharFreq cf( data );
00114 
00115   switch ( cf.type() ) {
00116     case CharFreq::SevenBitText:
00117       allowed << Headers::CE7Bit;
00118     case CharFreq::EightBitText:
00119       allowed << Headers::CE8Bit;
00120     case CharFreq::SevenBitData:
00121       if ( cf.printableRatio() > 5.0/6.0 ) {
00122         // let n the length of data and p the number of printable chars.
00123         // Then base64 \approx 4n/3; qp \approx p + 3(n-p)
00124         // => qp < base64 iff p > 5n/6.
00125         allowed << Headers::CEquPr;
00126         allowed << Headers::CEbase64;
00127       } else {
00128         allowed << Headers::CEbase64;
00129         allowed << Headers::CEquPr;
00130       }
00131       break;
00132     case CharFreq::EightBitData:
00133       allowed << Headers::CEbase64;
00134       break;
00135     case CharFreq::None:
00136     default:
00137       Q_ASSERT( false );
00138   }
00139 
00140   return allowed;
00141 }
00142 
00143 // "(),.:;<>@[\]
00144 const uchar specialsMap[16] = {
00145   0x00, 0x00, 0x00, 0x00, // CTLs
00146   0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?'
00147   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00148   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00149 };
00150 
00151 // "(),:;<>@[\]/=?
00152 const uchar tSpecialsMap[16] = {
00153   0x00, 0x00, 0x00, 0x00, // CTLs
00154   0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?'
00155   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00156   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00157 };
00158 
00159 // all except specials, CTLs, SPACE.
00160 const uchar aTextMap[16] = {
00161   0x00, 0x00, 0x00, 0x00,
00162   0x5F, 0x35, 0xFF, 0xC5,
00163   0x7F, 0xFF, 0xFF, 0xE3,
00164   0xFF, 0xFF, 0xFF, 0xFE
00165 };
00166 
00167 // all except tspecials, CTLs, SPACE.
00168 const uchar tTextMap[16] = {
00169   0x00, 0x00, 0x00, 0x00,
00170   0x5F, 0x36, 0xFF, 0xC0,
00171   0x7F, 0xFF, 0xFF, 0xE3,
00172   0xFF, 0xFF, 0xFF, 0xFE
00173 };
00174 
00175 // none except a-zA-Z0-9!*+-/
00176 const uchar eTextMap[16] = {
00177   0x00, 0x00, 0x00, 0x00,
00178   0x40, 0x35, 0xFF, 0xC0,
00179   0x7F, 0xFF, 0xFF, 0xE0,
00180   0x7F, 0xFF, 0xFF, 0xE0
00181 };
00182 
00183 void setFallbackCharEncoding(const QString& fallbackCharEnc)
00184 {
00185   f_allbackCharEnc = fallbackCharEnc;
00186 }
00187 
00188 QString fallbackCharEncoding()
00189 {
00190   return f_allbackCharEnc;
00191 }
00192 
00193 void setUseOutlookAttachmentEncoding( bool violateStandard )
00194 {
00195   u_seOutlookEncoding = violateStandard;
00196 }
00197 
00198 bool useOutlookAttachmentEncoding()
00199 {
00200   return u_seOutlookEncoding;
00201 }
00202 
00203 
00204 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS,
00205                              const QByteArray &defaultCS, bool forceCS )
00206 {
00207   QByteArray result;
00208   QByteArray spaceBuffer;
00209   const char *scursor = src.constData();
00210   const char *send = scursor + src.length();
00211   bool onlySpacesSinceLastWord = false;
00212 
00213   while ( scursor != send ) {
00214      // space
00215     if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
00216       spaceBuffer += *scursor++;
00217       continue;
00218     }
00219 
00220     // possible start of an encoded word
00221     if ( *scursor == '=' ) {
00222       QByteArray language;
00223       QString decoded;
00224       ++scursor;
00225       const char *start = scursor;
00226       if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
00227         result += decoded.toUtf8();
00228         onlySpacesSinceLastWord = true;
00229         spaceBuffer.clear();
00230       } else {
00231         if ( onlySpacesSinceLastWord ) {
00232           result += spaceBuffer;
00233           onlySpacesSinceLastWord = false;
00234         }
00235         result += '=';
00236         scursor = start; // reset cursor after parsing failure
00237       }
00238       continue;
00239     } else {
00240       // unencoded data
00241       if ( onlySpacesSinceLastWord ) {
00242         result += spaceBuffer;
00243         onlySpacesSinceLastWord = false;
00244       }
00245       result += *scursor;
00246       ++scursor;
00247     }
00248   }
00249   // If there are any chars that couldn't be decoded in UTF-8,
00250   //  use the fallback charset if it exists
00251   const QString tryUtf8 = QString::fromUtf8( result );
00252   if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
00253     QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
00254     return codec->toUnicode( result );
00255   } else {
00256     return tryUtf8;
00257   }
00258 }
00259 
00260 QString decodeRFC2047String( const QByteArray &src )
00261 {
00262   QByteArray usedCS;
00263   return decodeRFC2047String( src, usedCS, "utf-8", false );
00264 }
00265 
00266 static const char *reservedCharacters = "\"()<>@,.;:\\[]=";
00267 
00268 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset,
00269                                 bool addressHeader, bool allow8BitHeaders )
00270 {
00271   QByteArray result;
00272   int start=0, end=0;
00273   bool nonAscii=false, ok=true, useQEncoding=false;
00274 
00275   // fromLatin1() is safe here, codecForName() uses toLatin1() internally
00276   const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
00277 
00278   QByteArray usedCS;
00279   if ( !ok ) {
00280     //no codec available => try local8Bit and hope the best ;-)
00281     usedCS = KGlobal::locale()->encoding();
00282     codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
00283   }
00284   else {
00285     Q_ASSERT( codec );
00286     if ( charset.isEmpty() )
00287       usedCS = codec->name();
00288     else
00289       usedCS = charset;
00290   }
00291 
00292   QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
00293   QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
00294   if ( converterState.invalidChars > 0 ) {
00295     usedCS = "utf-8";
00296     codec = QTextCodec::codecForName( usedCS );
00297     encoded8Bit = codec->fromUnicode( src );
00298   }
00299 
00300   if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets
00301     useQEncoding = true;
00302   }
00303 
00304   if ( allow8BitHeaders ) {
00305     return encoded8Bit;
00306   }
00307 
00308   uint encoded8BitLength = encoded8Bit.length();
00309   for ( unsigned int i=0; i<encoded8BitLength; i++ ) {
00310     if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries
00311       start = i + 1;
00312     }
00313 
00314     // encode escape character, for japanese encodings...
00315     if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) ||
00316          ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
00317       end = start;   // non us-ascii char found, now we determine where to stop encoding
00318       nonAscii = true;
00319       break;
00320     }
00321   }
00322 
00323   if ( nonAscii ) {
00324     while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00325       // we encode complete words
00326       end++;
00327     }
00328 
00329     for ( int x=end; x<encoded8Bit.length(); x++ ) {
00330       if ( ( (signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] == '\033' ) ||
00331            ( addressHeader && ( strchr(reservedCharacters, encoded8Bit[x]) != 0 ) ) ) {
00332         end = x;     // we found another non-ascii word
00333 
00334         while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00335           // we encode complete words
00336           end++;
00337         }
00338       }
00339     }
00340 
00341     result = encoded8Bit.left( start ) + "=?" + usedCS;
00342 
00343     if ( useQEncoding ) {
00344       result += "?Q?";
00345 
00346       char c, hexcode;// "Q"-encoding implementation described in RFC 2047
00347       for ( int i=start; i<end; i++ ) {
00348         c = encoded8Bit[i];
00349         if ( c == ' ' ) { // make the result readable with not MIME-capable readers
00350           result += '_';
00351         } else {
00352           if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems
00353               ( ( c >= 'A' ) && ( c <= 'Z' ) ) ||  // with "From" & "To" headers
00354               ( ( c >= '0' ) && ( c <= '9' ) ) ) {
00355             result += c;
00356           } else {
00357             result += '=';                 // "stolen" from KMail ;-)
00358             hexcode = ((c & 0xF0) >> 4) + 48;
00359             if ( hexcode >= 58 ) {
00360               hexcode += 7;
00361             }
00362             result += hexcode;
00363             hexcode = (c & 0x0F) + 48;
00364             if ( hexcode >= 58 ) {
00365               hexcode += 7;
00366             }
00367             result += hexcode;
00368           }
00369         }
00370       }
00371     } else {
00372       result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64();
00373     }
00374 
00375     result +="?=";
00376     result += encoded8Bit.right( encoded8Bit.length() - end );
00377   } else {
00378     result = encoded8Bit;
00379   }
00380 
00381   return result;
00382 }
00383 
00384 QByteArray encodeRFC2047Sentence(const QString& src, const QByteArray& charset )
00385 {
00386   QByteArray result;
00387   QList<QChar> splitChars;
00388   splitChars << QLatin1Char(',') << QLatin1Char('\"') << QLatin1Char(';') << QLatin1Char('\\');
00389   const QChar *ch = src.constData();
00390   const int length = src.length();
00391   int pos = 0;
00392   int wordStart = 0;
00393 
00394   //qDebug() << "Input:" << src;
00395   // Loop over all characters of the string.
00396   // When encountering a split character, RFC-2047-encode the word before it, and add it to the result.
00397   while (pos < length) {
00398     //qDebug() << "Pos:" << pos << "Result:" << result << "Char:" << ch->toAscii();
00399     const bool isAscii = ch->unicode() < 127;
00400     const bool isReserved = (strchr( reservedCharacters, ch->toAscii() ) != 0);
00401     if ( isAscii && isReserved ) {
00402       const int wordSize = pos - wordStart;
00403       if (wordSize > 0) {
00404         const QString word = src.mid( wordStart, wordSize );
00405         result += encodeRFC2047String( word, charset );
00406       }
00407 
00408       result += ch->toAscii();
00409       wordStart = pos + 1;
00410     }
00411     ch++;
00412     pos++;
00413   }
00414 
00415   // Encode the last word
00416   const int wordSize = pos - wordStart;
00417   if (wordSize > 0) {
00418     const QString word = src.mid( wordStart, pos - wordStart );
00419     result += encodeRFC2047String( word, charset );
00420   }
00421 
00422   return result;
00423 }
00424 
00425 
00426 
00427 //-----------------------------------------------------------------------------
00428 QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset )
00429 {
00430   if ( str.isEmpty() )
00431     return QByteArray();
00432 
00433 
00434   const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
00435   QByteArray latin;
00436   if ( charset == "us-ascii" )
00437     latin = str.toAscii();
00438   else if ( codec )
00439     latin = codec->fromUnicode( str );
00440   else
00441     latin = str.toLocal8Bit();
00442 
00443   char *l;
00444   for ( l = latin.data(); *l; ++l ) {
00445     if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) )
00446       // *l is control character or 8-bit char
00447       break;
00448   }
00449   if ( !*l )
00450     return latin;
00451 
00452   QByteArray result = charset + "''";
00453   for ( l = latin.data(); *l; ++l ) {
00454     bool needsQuoting = ( *l & 0x80 ) || ( *l == '%' );
00455     if( !needsQuoting ) {
00456       const QByteArray especials = "()<>@,;:\"/[]?.= \033";
00457       int len = especials.length();
00458       for ( int i = 0; i < len; i++ )
00459         if ( *l == especials[i] ) {
00460           needsQuoting = true;
00461           break;
00462         }
00463     }
00464     if ( needsQuoting ) {
00465       result += '%';
00466       unsigned char hexcode;
00467       hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
00468       if ( hexcode >= 58 )
00469         hexcode += 7;
00470       result += hexcode;
00471       hexcode = ( *l & 0x0F ) + 48;
00472       if ( hexcode >= 58 )
00473         hexcode += 7;
00474       result += hexcode;
00475     } else {
00476       result += *l;
00477     }
00478   }
00479   return result;
00480 }
00481 
00482 
00483 //-----------------------------------------------------------------------------
00484 QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS,
00485   bool forceCS )
00486 {
00487   int p = str.indexOf('\'');
00488   if (p < 0) return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS  ))->toUnicode( str );
00489 
00490 
00491   QByteArray charset = str.left(p);
00492 
00493   QByteArray st = str.mid( str.lastIndexOf('\'') + 1 );
00494 
00495   char ch, ch2;
00496   p = 0;
00497   while (p < (int)st.length())
00498   {
00499     if (st.at(p) == 37)
00500     {
00501       // Only try to decode the percent-encoded character if the percent sign
00502       // is really followed by two other characters, see testcase at bug 163024
00503       if ( p + 2 < st.length() ) {
00504         ch = st.at(p+1) - 48;
00505         if (ch > 16)
00506           ch -= 7;
00507         ch2 = st.at(p+2) - 48;
00508         if (ch2 > 16)
00509           ch2 -= 7;
00510         st[p] = ch * 16 + ch2;
00511         st.remove( p+1, 2 );
00512       }
00513     }
00514     p++;
00515   }
00516   kDebug() << "Got pre-decoded:" << st;
00517   QString result;
00518   const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
00519   if ( !charsetcodec || forceCS )
00520     charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
00521 
00522   usedCS = charsetcodec->name();
00523   return charsetcodec->toUnicode( st );
00524 }
00525 
00526 QString decodeRFC2231String( const QByteArray &src )
00527 {
00528   QByteArray usedCS;
00529   return decodeRFC2231String( src, usedCS, "utf-8", false );
00530 }
00531 
00532 QByteArray uniqueString()
00533 {
00534   static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
00535   time_t now;
00536   char p[11];
00537   int pos, ran;
00538   unsigned int timeval;
00539 
00540   p[10] = '\0';
00541   now = time( 0 );
00542   ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0));
00543   timeval = (now / ran) + getpid();
00544 
00545   for ( int i=0; i<10; i++ ) {
00546     pos = (int) (61.0*rand() / (RAND_MAX + 1.0));
00547     //kDebug() << pos;
00548     p[i] = chars[pos];
00549   }
00550 
00551   QByteArray ret;
00552   ret.setNum( timeval );
00553   ret += '.';
00554   ret += p;
00555 
00556   return ret;
00557 }
00558 
00559 QByteArray multiPartBoundary()
00560 {
00561   return "nextPart" + uniqueString();
00562 }
00563 
00564 QByteArray unfoldHeader( const QByteArray &header )
00565 {
00566   QByteArray result;
00567   if ( header.isEmpty() ) {
00568     return result;
00569   }
00570 
00571   int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
00572   while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) {
00573     foldBegin = foldEnd = foldMid;
00574     // find the first space before the line-break
00575     while ( foldBegin > 0 ) {
00576       if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
00577         break;
00578       }
00579       --foldBegin;
00580     }
00581     // find the first non-space after the line-break
00582     while ( foldEnd <= header.length() - 1 ) {
00583       if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
00584         ++foldEnd;
00585       }
00586       else if ( foldEnd > 0 && header[foldEnd - 1] == '\n' &&
00587                 header[foldEnd] == '=' && foldEnd + 2 < header.length() &&
00588                 ( ( header[foldEnd + 1] == '0' &&
00589                     header[foldEnd + 2] == '9' ) ||
00590                   ( header[foldEnd + 1] == '2' &&
00591                     header[foldEnd + 2] == '0' ) ) ) {
00592         // bug #86302: malformed header continuation starting with =09/=20
00593         foldEnd += 3;
00594       }
00595       else {
00596         break;
00597       }
00598     }
00599 
00600     result += header.mid( pos, foldBegin - pos );
00601     if ( foldEnd < header.length() -1 )
00602       result += ' ';
00603     pos = foldEnd;
00604   }
00605   const int len = header.length();
00606   if ( len > pos ) {
00607     result += header.mid( pos, len - pos );
00608   }
00609   return result;
00610 }
00611 
00612 int findHeaderLineEnd( const QByteArray &src, int &dataBegin, bool *folded )
00613 {
00614   int end = dataBegin;
00615   int len = src.length() - 1;
00616 
00617   if ( folded )
00618     *folded = false;
00619 
00620   if ( dataBegin < 0 ) {
00621     // Not found
00622     return -1;
00623   }
00624 
00625   if ( dataBegin > len ) {
00626     // No data available
00627     return len + 1;
00628   }
00629 
00630   // If the first line contains nothing, but the next line starts with a space
00631   // or a tab, that means a stupid mail client has made the first header field line
00632   // entirely empty, and has folded the rest to the next line(s).
00633   if ( src.at(end) == '\n' && end + 1 < len &&
00634        ( src[end+1] == ' ' || src[end+1] == '\t' ) ) {
00635 
00636     // Skip \n and first whitespace
00637     dataBegin += 2;
00638     end += 2;
00639   }
00640 
00641   if ( src.at(end) != '\n' ) {  // check if the header is not empty
00642     while ( true ) {
00643       end = src.indexOf( '\n', end + 1 );
00644       if ( end == -1 || end == len ) {
00645         // end of string
00646         break;
00647       }
00648       else if ( src[end+1] == ' ' || src[end+1] == '\t' ||
00649                 ( src[end+1] == '=' && end+3 <= len &&
00650                   ( ( src[end+2] == '0' && src[end+3] == '9' ) ||
00651                     ( src[end+2] == '2' && src[end+3] == '0' ) ) ) ) {
00652         // next line is header continuation or starts with =09/=20 (bug #86302)
00653         if ( folded )
00654           *folded = true;
00655       } else {
00656         // end of header (no header continuation)
00657         break;
00658       }
00659     }
00660   }
00661 
00662   if ( end < 0 ) {
00663     end = len + 1; //take the rest of the string
00664   }
00665   return end;
00666 }
00667 
00668 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded )
00669 {
00670   QByteArray n = name;
00671   n.append( ':' );
00672   int begin = -1;
00673 
00674   if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
00675     begin = 0;
00676   } else {
00677     n.prepend('\n');
00678     const char *p = strcasestr( src.constData(), n.constData() );
00679     if ( !p ) {
00680       begin = -1;
00681     } else {
00682       begin = p - src.constData();
00683       ++begin;
00684     }
00685   }
00686 
00687   if ( begin > -1) {     //there is a header with the given name
00688     dataBegin = begin + name.length() + 1; //skip the name
00689     // skip the usual space after the colon
00690     if ( src.at( dataBegin ) == ' ' ) {
00691       ++dataBegin;
00692     }
00693     end = findHeaderLineEnd( src, dataBegin, folded );
00694     return begin;
00695 
00696   } else {
00697     end = -1;
00698     dataBegin = -1;
00699     return -1; //header not found
00700   }
00701 }
00702 
00703 QByteArray extractHeader( const QByteArray &src, const QByteArray &name )
00704 {
00705   int begin, end;
00706   bool folded;
00707   QByteArray result;
00708 
00709   if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) {
00710     return result;
00711   }
00712 
00713   if ( begin >= 0 ) {
00714     if ( !folded ) {
00715       result = src.mid( begin, end - begin );
00716     } else {
00717       if ( end > begin ) {
00718         QByteArray hdrValue = src.mid( begin, end - begin );
00719         result = unfoldHeader( hdrValue );
00720       }
00721     }
00722   }
00723   return result;
00724 }
00725 
00726 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name )
00727 {
00728   int begin, end;
00729   bool folded;
00730   QList<QByteArray> result;
00731   QByteArray copySrc( src );
00732 
00733   if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
00734     return result;
00735   }
00736 
00737   while ( begin >= 0 ) {
00738     if ( !folded ) {
00739       result.append( copySrc.mid( begin, end - begin ) );
00740     } else {
00741       QByteArray hdrValue = copySrc.mid( begin, end - begin );
00742       result.append( unfoldHeader( hdrValue ) );
00743     }
00744 
00745     // get the next one, a tiny bit ugly, but we don't want the previous to be found again...
00746     copySrc = copySrc.mid( end );
00747     if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
00748       break;
00749     }
00750   }
00751 
00752   return result;
00753 }
00754 
00755 void removeHeader( QByteArray &header, const QByteArray &name )
00756 {
00757   int begin, end, dummy;
00758   begin = indexOfHeader( header, name, end, dummy );
00759   if ( begin >= 0 ) {
00760     header.remove( begin, end - begin + 1 );
00761   }
00762 }
00763 
00764 QByteArray CRLFtoLF( const QByteArray &s )
00765 {
00766   QByteArray ret = s;
00767   ret.replace( "\r\n", "\n" );
00768   return ret;
00769 }
00770 
00771 QByteArray CRLFtoLF( const char *s )
00772 {
00773   QByteArray ret = s;
00774   return CRLFtoLF( ret );
00775 }
00776 
00777 QByteArray LFtoCRLF( const QByteArray &s )
00778 {
00779   QByteArray ret = s;
00780   ret.replace( '\n', "\r\n" );
00781   return ret;
00782 }
00783 
00784 QByteArray LFtoCRLF( const char *s )
00785 {
00786   QByteArray ret = s;
00787   return LFtoCRLF( ret );
00788 }
00789 
00790 namespace {
00791 template < typename StringType, typename CharType > void removeQuotesGeneric( StringType & str )
00792 {
00793   bool inQuote = false;
00794   for ( int i = 0; i < str.length(); ++i ) {
00795     if ( str[i] == CharType( '"' ) ) {
00796       str.remove( i, 1 );
00797       i--;
00798       inQuote = !inQuote;
00799     } else {
00800       if ( inQuote && ( str[i] == CharType( '\\' ) ) ) {
00801         str.remove( i, 1 );
00802       }
00803     }
00804   }
00805 }
00806 }
00807 
00808 void removeQuots( QByteArray &str )
00809 {
00810   removeQuotesGeneric<QByteArray,char>( str );
00811 }
00812 
00813 void removeQuots( QString &str )
00814 {
00815   removeQuotesGeneric<QString,QLatin1Char>( str );
00816 }
00817 
00818 template<class StringType,class CharType,class CharConverterType,class StringConverterType,class ToString>
00819 void addQuotes_impl( StringType &str, bool forceQuotes )
00820 {
00821   bool needsQuotes=false;
00822   for ( int i=0; i < str.length(); i++ ) {
00823     const CharType cur = str.at( i );
00824     if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String( "\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
00825       needsQuotes = true;
00826     }
00827     if ( cur == CharConverterType( '\\' ) || cur == CharConverterType( '\"' ) ) {
00828       str.insert( i, CharConverterType( '\\' ) );
00829       i++;
00830     }
00831   }
00832 
00833   if ( needsQuotes || forceQuotes ) {
00834     str.insert( 0, CharConverterType( '\"' ) );
00835     str.append( StringConverterType( "\"" ) );
00836   }
00837 }
00838 
00839 void addQuotes( QByteArray &str, bool forceQuotes )
00840 {
00841   addQuotes_impl<QByteArray,char,char,char*,QLatin1String>( str, forceQuotes );
00842 }
00843 
00844 void addQuotes( QString &str, bool forceQuotes )
00845 {
00846   addQuotes_impl<QString,QChar,QLatin1Char,QLatin1String,QString>( str, forceQuotes );
00847 }
00848 
00849 KMIME_EXPORT QString balanceBidiState( const QString &input )
00850 {
00851   const int LRO = 0x202D;
00852   const int RLO = 0x202E;
00853   const int LRE = 0x202A;
00854   const int RLE = 0x202B;
00855   const int PDF = 0x202C;
00856 
00857   QString result = input;
00858 
00859   int openDirChangers = 0;
00860   int numPDFsRemoved = 0;
00861   for ( int i = 0; i < input.length(); i++ ) {
00862     const ushort &code = input.at( i ).unicode();
00863     if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
00864       openDirChangers++;
00865     }
00866     else if ( code == PDF ) {
00867       if ( openDirChangers > 0 ) {
00868         openDirChangers--;
00869       }
00870       else {
00871         // One PDF too much, remove it
00872         kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input;
00873         result.remove( i - numPDFsRemoved, 1 );
00874         numPDFsRemoved++;
00875       }
00876     }
00877   }
00878 
00879   if ( openDirChangers > 0 ) {
00880     kWarning() << "Possible Unicode spoofing detected in" << input;
00881 
00882     // At PDF chars to the end until the correct state is restored.
00883     // As a special exception, when encountering quoted strings, place the PDF before
00884     // the last quote.
00885     for ( int i = openDirChangers; i > 0; i-- ) {
00886       if ( result.endsWith( QLatin1Char( '"' ) ) )
00887         result.insert( result.length() - 1, QChar( PDF ) );
00888       else
00889         result += QChar( PDF );
00890     }
00891   }
00892 
00893   return result;
00894 }
00895 
00896 QString removeBidiControlChars( const QString &input )
00897 {
00898   const int LRO = 0x202D;
00899   const int RLO = 0x202E;
00900   const int LRE = 0x202A;
00901   const int RLE = 0x202B;
00902   QString result = input;
00903   result.remove( LRO );
00904   result.remove( RLO );
00905   result.remove( LRE );
00906   result.remove( RLE );
00907   return result;
00908 }
00909 
00910 static bool isCryptoPart( Content* content )
00911 {
00912   if( !content->contentType( false ) )
00913     return false;
00914 
00915   if( content->contentType()->subType().toLower() == "octet-stream" &&
00916       !content->contentDisposition( false ) )
00917     return false;
00918 
00919   const Headers::ContentType *contentType = content->contentType();
00920   const QByteArray lowerSubType = contentType->subType().toLower();
00921   return ( contentType->mediaType().toLower() == "application" &&
00922          ( lowerSubType == "pgp-encrypted" ||
00923            lowerSubType == "pgp-signature" ||
00924            lowerSubType == "pkcs7-mime" ||
00925            lowerSubType == "pkcs7-signature" ||
00926            lowerSubType == "x-pkcs7-signature" ||
00927            ( lowerSubType == "octet-stream" &&
00928              content->contentDisposition()->filename().toLower() == QLatin1String( "msg.asc" ) ) ) );
00929 }
00930 
00931 bool hasAttachment( Content* content )
00932 {
00933   if( !content )
00934     return false;
00935 
00936   bool emptyFilename = true;
00937   if( content->contentDisposition( false ) && !content->contentDisposition()->filename().isEmpty() )
00938     emptyFilename = false;
00939 
00940   if( emptyFilename && content->contentType( false ) && !content->contentType()->name().isEmpty() )
00941     emptyFilename = false;
00942 
00943   // ignore crypto parts
00944   if( !emptyFilename && !isCryptoPart( content ) )
00945     return true;
00946 
00947   // Ok, content itself is not an attachment. now we deal with multiparts
00948   if( content->contentType()->isMultipart() ) {
00949     Q_FOREACH( Content* child, content->contents() ) {
00950       if( hasAttachment( child ) )
00951         return true;
00952     }
00953   }
00954 
00955   return false;
00956 }
00957 
00958 bool isSigned( Message *message )
00959 {
00960   if ( !message )
00961     return false;
00962 
00963   const KMime::Headers::ContentType* const contentType = message->contentType();
00964   if ( contentType->isSubtype( "signed" ) ||
00965        contentType->isSubtype( "pgp-signature" ) ||
00966        contentType->isSubtype( "pkcs7-signature" ) ||
00967        contentType->isSubtype( "x-pkcs7-signature" ) ||
00968        message->mainBodyPart( "multipart/signed" ) ||
00969        message->mainBodyPart( "application/pgp-signature" ) ||
00970        message->mainBodyPart( "application/pkcs7-signature" ) ||
00971        message->mainBodyPart( "application/x-pkcs7-signature" ) ) {
00972     return true;
00973   }
00974 
00975   return false;
00976 }
00977 
00978 bool isEncrypted( Message *message )
00979 {
00980   if ( !message )
00981     return false;
00982 
00983   const KMime::Headers::ContentType* const contentType = message->contentType();
00984   if ( contentType->isSubtype( "encrypted" ) ||
00985        contentType->isSubtype( "pgp-encrypted" ) ||
00986        contentType->isSubtype( "pkcs7-mime" ) ||
00987        message->mainBodyPart( "multipart/encrypted" ) ||
00988        message->mainBodyPart( "application/pgp-encrypted" ) ||
00989        message->mainBodyPart( "application/pkcs7-mime" ) ) {
00990     return true;
00991   }
00992 
00993   return false;
00994 }
00995 
00996 bool isInvitation( Content *content )
00997 {
00998   if ( !content )
00999     return false;
01000 
01001   const KMime::Headers::ContentType* const contentType = content->contentType( false );
01002 
01003   if ( contentType && contentType->isMediatype( "text" ) && contentType->isSubtype( "calendar" ) )
01004     return true;
01005 
01006   return false;
01007 }
01008 
01009 } // namespace KMime
This file is part of the KDE documentation.
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon Aug 27 2012 22:08:31 by doxygen 1.7.5 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • Related Pages

kdepimlibs-4.8.5 API Reference

Skip menu "kdepimlibs-4.8.5 API Reference"
  • akonadi
  •   contact
  •   kmime
  • kabc
  • kalarmcal
  • kblog
  • kcal
  • kcalcore
  • kcalutils
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmbox
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal