• Skip to content
  • Skip to link menu
KDE 4.7 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • KDE Home
  • Contact Us
 

KMIME Library

kmime_util.cpp
00001 /*
00002   kmime_util.cpp
00003 
00004   KMime, the KDE Internet mail/usenet news message library.
00005   Copyright (c) 2001 the KMime authors.
00006   See file AUTHORS for details
00007 
00008   This library is free software; you can redistribute it and/or
00009   modify it under the terms of the GNU Library General Public
00010   License as published by the Free Software Foundation; either
00011   version 2 of the License, or (at your option) any later version.
00012 
00013   This library is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016   Library General Public License for more details.
00017 
00018   You should have received a copy of the GNU Library General Public License
00019   along with this library; see the file COPYING.LIB.  If not, write to
00020   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00021   Boston, MA 02110-1301, USA.
00022 */
00023 
00024 #include "kmime_util.h"
00025 #include "kmime_util_p.h"
00026 
00027 #include "kmime_charfreq.h"
00028 #include "kmime_codecs.h"
00029 #include "kmime_header_parsing.h"
00030 #include "kmime_message.h"
00031 #include "kmime_warning.h"
00032 
00033 #include <config-kmime.h>
00034 #include <kdefakes.h> // for strcasestr
00035 #include <kglobal.h>
00036 #include <klocale.h>
00037 #include <kcharsets.h>
00038 #include <kcodecs.h>
00039 #include <kdebug.h>
00040 
00041 #include <QtCore/QList>
00042 #include <QtCore/QString>
00043 #include <QtCore/QTextCodec>
00044 
00045 #include <ctype.h>
00046 #include <time.h>
00047 #include <stdlib.h>
00048 #include <unistd.h>
00049 #include <boost/concept_check.hpp>
00050 
00051 using namespace KMime;
00052 
00053 namespace KMime {
00054 
00055 QList<QByteArray> c_harsetCache;
00056 QList<QByteArray> l_anguageCache;
00057 QString f_allbackCharEnc;
00058 bool u_seOutlookEncoding = false;
00059 
00060 QByteArray cachedCharset( const QByteArray &name )
00061 {
00062   foreach ( const QByteArray& charset, c_harsetCache ) {
00063     if ( qstricmp( name.data(), charset.data() ) == 0 ) {
00064       return charset;
00065     }
00066   }
00067 
00068   c_harsetCache.append( name.toUpper() );
00069   //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00070   return c_harsetCache.last();
00071 }
00072 
00073 QByteArray cachedLanguage( const QByteArray &name )
00074 {
00075   foreach ( const QByteArray& language, l_anguageCache ) {
00076     if ( qstricmp( name.data(), language.data() ) == 0 ) {
00077       return language;
00078     }
00079   }
00080 
00081   l_anguageCache.append( name.toUpper() );
00082   //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00083   return l_anguageCache.last();
00084 }
00085 
00086 bool isUsAscii( const QString &s )
00087 {
00088   uint sLength = s.length();
00089   for ( uint i=0; i<sLength; i++ ) {
00090     if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii
00091       return false;
00092     }
00093   }
00094   return true;
00095 }
00096 
00097 QString nameForEncoding( Headers::contentEncoding enc )
00098 {
00099   switch( enc ) {
00100     case Headers::CE7Bit: return QString::fromLatin1( "7bit" );
00101     case Headers::CE8Bit: return QString::fromLatin1( "8bit" );
00102     case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" );
00103     case Headers::CEbase64: return QString::fromLatin1( "base64" );
00104     case Headers::CEuuenc: return QString::fromLatin1( "uuencode" );
00105     case Headers::CEbinary: return QString::fromLatin1( "binary" );
00106     default: return QString::fromLatin1( "unknown" );
00107   }
00108 }
00109 
00110 QList<Headers::contentEncoding> encodingsForData( const QByteArray &data )
00111 {
00112   QList<Headers::contentEncoding> allowed;
00113   CharFreq cf( data );
00114 
00115   switch ( cf.type() ) {
00116     case CharFreq::SevenBitText:
00117       allowed << Headers::CE7Bit;
00118     case CharFreq::EightBitText:
00119       allowed << Headers::CE8Bit;
00120     case CharFreq::SevenBitData:
00121       if ( cf.printableRatio() > 5.0/6.0 ) {
00122         // let n the length of data and p the number of printable chars.
00123         // Then base64 \approx 4n/3; qp \approx p + 3(n-p)
00124         // => qp < base64 iff p > 5n/6.
00125         allowed << Headers::CEquPr;
00126         allowed << Headers::CEbase64;
00127       } else {
00128         allowed << Headers::CEbase64;
00129         allowed << Headers::CEquPr;
00130       }
00131       break;
00132     case CharFreq::EightBitData:
00133       allowed << Headers::CEbase64;
00134       break;
00135     case CharFreq::None:
00136     default:
00137       Q_ASSERT( false );
00138   }
00139 
00140   return allowed;
00141 }
00142 
00143 // "(),.:;<>@[\]
00144 const uchar specialsMap[16] = {
00145   0x00, 0x00, 0x00, 0x00, // CTLs
00146   0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?'
00147   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00148   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00149 };
00150 
00151 // "(),:;<>@[\]/=?
00152 const uchar tSpecialsMap[16] = {
00153   0x00, 0x00, 0x00, 0x00, // CTLs
00154   0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?'
00155   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00156   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00157 };
00158 
00159 // all except specials, CTLs, SPACE.
00160 const uchar aTextMap[16] = {
00161   0x00, 0x00, 0x00, 0x00,
00162   0x5F, 0x35, 0xFF, 0xC5,
00163   0x7F, 0xFF, 0xFF, 0xE3,
00164   0xFF, 0xFF, 0xFF, 0xFE
00165 };
00166 
00167 // all except tspecials, CTLs, SPACE.
00168 const uchar tTextMap[16] = {
00169   0x00, 0x00, 0x00, 0x00,
00170   0x5F, 0x36, 0xFF, 0xC0,
00171   0x7F, 0xFF, 0xFF, 0xE3,
00172   0xFF, 0xFF, 0xFF, 0xFE
00173 };
00174 
00175 // none except a-zA-Z0-9!*+-/
00176 const uchar eTextMap[16] = {
00177   0x00, 0x00, 0x00, 0x00,
00178   0x40, 0x35, 0xFF, 0xC0,
00179   0x7F, 0xFF, 0xFF, 0xE0,
00180   0x7F, 0xFF, 0xFF, 0xE0
00181 };
00182 
00183 void setFallbackCharEncoding(const QString& fallbackCharEnc)
00184 {
00185   f_allbackCharEnc = fallbackCharEnc;
00186 }
00187 
00188 QString fallbackCharEncoding()
00189 {
00190   return f_allbackCharEnc;
00191 }
00192 
00193 void setUseOutlookAttachmentEncoding( bool violateStandard )
00194 {
00195   u_seOutlookEncoding = violateStandard;
00196 }
00197 
00198 bool useOutlookAttachmentEncoding()
00199 {
00200   return u_seOutlookEncoding;
00201 }
00202 
00203 
00204 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS,
00205                              const QByteArray &defaultCS, bool forceCS )
00206 {
00207   QByteArray result;
00208   QByteArray spaceBuffer;
00209   const char *scursor = src.constData();
00210   const char *send = scursor + src.length();
00211   bool onlySpacesSinceLastWord = false;
00212 
00213   while ( scursor != send ) {
00214      // space
00215     if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
00216       spaceBuffer += *scursor++;
00217       continue;
00218     }
00219 
00220     // possible start of an encoded word
00221     if ( *scursor == '=' ) {
00222       QByteArray language;
00223       QString decoded;
00224       ++scursor;
00225       const char *start = scursor;
00226       if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
00227         result += decoded.toUtf8();
00228         onlySpacesSinceLastWord = true;
00229         spaceBuffer.clear();
00230       } else {
00231         if ( onlySpacesSinceLastWord ) {
00232           result += spaceBuffer;
00233           onlySpacesSinceLastWord = false;
00234         }
00235         result += '=';
00236         scursor = start; // reset cursor after parsing failure
00237       }
00238       continue;
00239     } else {
00240       // unencoded data
00241       if ( onlySpacesSinceLastWord ) {
00242         result += spaceBuffer;
00243         onlySpacesSinceLastWord = false;
00244       }
00245       result += *scursor;
00246       ++scursor;
00247     }
00248   }
00249   // If there are any chars that couldn't be decoded in UTF-8,
00250   //  use the fallback charset if it exists
00251   const QString tryUtf8 = QString::fromUtf8( result );
00252   if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
00253     QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
00254     return codec->toUnicode( result );
00255   } else {
00256     return tryUtf8;
00257   }
00258 }
00259 
00260 QString decodeRFC2047String( const QByteArray &src )
00261 {
00262   QByteArray usedCS;
00263   return decodeRFC2047String( src, usedCS, "utf-8", false );
00264 }
00265 
00266 static const char *reservedCharacters = "\"()<>@,.;:\\[]=";
00267 
00268 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset,
00269                                 bool addressHeader, bool allow8BitHeaders )
00270 {
00271   QByteArray result;
00272   int start=0, end=0;
00273   bool nonAscii=false, ok=true, useQEncoding=false;
00274 
00275   // fromLatin1() is safe here, codecForName() uses toLatin1() internally
00276   const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
00277 
00278   QByteArray usedCS;
00279   if ( !ok ) {
00280     //no codec available => try local8Bit and hope the best ;-)
00281     usedCS = KGlobal::locale()->encoding();
00282     codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
00283   }
00284   else {
00285     Q_ASSERT( codec );
00286     if ( charset.isEmpty() )
00287       usedCS = codec->name();
00288     else
00289       usedCS = charset;
00290   }
00291 
00292   QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
00293   QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
00294   if ( converterState.invalidChars > 0 ) {
00295     usedCS = "utf-8";
00296     codec = QTextCodec::codecForName( usedCS );
00297     encoded8Bit = codec->fromUnicode( src );
00298   }
00299 
00300   if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets
00301     useQEncoding = true;
00302   }
00303 
00304   if ( allow8BitHeaders ) {
00305     return encoded8Bit;
00306   }
00307 
00308   uint encoded8BitLength = encoded8Bit.length();
00309   for ( unsigned int i=0; i<encoded8BitLength; i++ ) {
00310     if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries
00311       start = i + 1;
00312     }
00313 
00314     // encode escape character, for japanese encodings...
00315     if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) ||
00316          ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
00317       end = start;   // non us-ascii char found, now we determine where to stop encoding
00318       nonAscii = true;
00319       break;
00320     }
00321   }
00322 
00323   if ( nonAscii ) {
00324     while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00325       // we encode complete words
00326       end++;
00327     }
00328 
00329     for ( int x=end; x<encoded8Bit.length(); x++ ) {
00330       if ( ( (signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] == '\033' ) ||
00331            ( addressHeader && ( strchr(reservedCharacters, encoded8Bit[x]) != 0 ) ) ) {
00332         end = x;     // we found another non-ascii word
00333 
00334         while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00335           // we encode complete words
00336           end++;
00337         }
00338       }
00339     }
00340 
00341     result = encoded8Bit.left( start ) + "=?" + usedCS;
00342 
00343     if ( useQEncoding ) {
00344       result += "?Q?";
00345 
00346       char c, hexcode;// "Q"-encoding implementation described in RFC 2047
00347       for ( int i=start; i<end; i++ ) {
00348         c = encoded8Bit[i];
00349         if ( c == ' ' ) { // make the result readable with not MIME-capable readers
00350           result += '_';
00351         } else {
00352           if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems
00353               ( ( c >= 'A' ) && ( c <= 'Z' ) ) ||  // with "From" & "To" headers
00354               ( ( c >= '0' ) && ( c <= '9' ) ) ) {
00355             result += c;
00356           } else {
00357             result += '=';                 // "stolen" from KMail ;-)
00358             hexcode = ((c & 0xF0) >> 4) + 48;
00359             if ( hexcode >= 58 ) {
00360               hexcode += 7;
00361             }
00362             result += hexcode;
00363             hexcode = (c & 0x0F) + 48;
00364             if ( hexcode >= 58 ) {
00365               hexcode += 7;
00366             }
00367             result += hexcode;
00368           }
00369         }
00370       }
00371     } else {
00372       result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64();
00373     }
00374 
00375     result +="?=";
00376     result += encoded8Bit.right( encoded8Bit.length() - end );
00377   } else {
00378     result = encoded8Bit;
00379   }
00380 
00381   return result;
00382 }
00383 
00384 QByteArray encodeRFC2047Sentence(const QString& src, const QByteArray& charset )
00385 {
00386   QByteArray result;
00387   QList<QChar> splitChars;
00388   splitChars << QLatin1Char(',') << QLatin1Char('\"') << QLatin1Char(';') << QLatin1Char('\\');
00389   const QChar *ch = src.constData();
00390   const int length = src.length();
00391   int pos = 0;
00392   int wordStart = 0;
00393 
00394   //qDebug() << "Input:" << src;
00395   // Loop over all characters of the string.
00396   // When encountering a split character, RFC-2047-encode the word before it, and add it to the result.
00397   while (pos < length) {
00398     //qDebug() << "Pos:" << pos << "Result:" << result << "Char:" << ch->toAscii();
00399     const bool isAscii = ch->unicode() < 127;
00400     const bool isReserved = (strchr( reservedCharacters, ch->toAscii() ) != 0);
00401     if ( isAscii && isReserved ) {
00402       const int wordSize = pos - wordStart;
00403       if (wordSize > 0) {
00404         const QString word = src.mid( wordStart, wordSize );
00405         result += encodeRFC2047String( word, charset );
00406       }
00407 
00408       result += ch->toAscii();
00409       wordStart = pos + 1;
00410     }
00411     ch++;
00412     pos++;
00413   }
00414 
00415   // Encode the last word
00416   const int wordSize = pos - wordStart;
00417   if (wordSize > 0) {
00418     const QString word = src.mid( wordStart, pos - wordStart );
00419     result += encodeRFC2047String( word, charset );
00420   }
00421 
00422   return result;
00423 }
00424 
00425 
00426 
00427 //-----------------------------------------------------------------------------
00428 QByteArray encodeRFC2231String( const QString& str, const QByteArray& charset )
00429 {
00430   if ( str.isEmpty() )
00431     return QByteArray();
00432 
00433   
00434   const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
00435   QByteArray latin;
00436   if ( charset == "us-ascii" )
00437     latin = str.toAscii();
00438   else if ( codec )
00439     latin = codec->fromUnicode( str );
00440   else
00441     latin = str.toLocal8Bit();
00442 
00443   char *l;
00444   for ( l = latin.data(); *l; ++l ) {
00445     if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) )
00446       // *l is control character or 8-bit char
00447       break;
00448   }
00449   if ( !*l )
00450     return latin;
00451 
00452   QByteArray result = charset + "''";
00453   for ( l = latin.data(); *l; ++l ) {
00454     bool needsQuoting = ( *l & 0x80 ) || ( *l == '%' );
00455     if( !needsQuoting ) {
00456       const QByteArray especials = "()<>@,;:\"/[]?.= \033";
00457       int len = especials.length();
00458       for ( int i = 0; i < len; i++ )
00459         if ( *l == especials[i] ) {
00460           needsQuoting = true;
00461           break;
00462         }
00463     }
00464     if ( needsQuoting ) {
00465       result += '%';
00466       unsigned char hexcode;
00467       hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
00468       if ( hexcode >= 58 )
00469         hexcode += 7;
00470       result += hexcode;
00471       hexcode = ( *l & 0x0F ) + 48;
00472       if ( hexcode >= 58 )
00473         hexcode += 7;
00474       result += hexcode;
00475     } else {
00476       result += *l;
00477     }
00478   }
00479   return result;
00480 }
00481 
00482 
00483 //-----------------------------------------------------------------------------
00484 QString decodeRFC2231String( const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS,
00485   bool forceCS )
00486 {
00487   int p = str.indexOf('\'');
00488   if (p < 0) return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS  ))->toUnicode( str );
00489 
00490   
00491   QByteArray charset = str.left(p);
00492 
00493   QByteArray st = str.mid( str.lastIndexOf('\'') + 1 );
00494   
00495   char ch, ch2;
00496   p = 0;
00497   while (p < (int)st.length())
00498   {
00499     if (st.at(p) == 37)
00500     {
00501       // Only try to decode the percent-encoded character if the percent sign
00502       // is really followed by two other characters, see testcase at bug 163024
00503       if ( p + 2 < st.length() ) {
00504         ch = st.at(p+1) - 48;
00505         if (ch > 16)
00506           ch -= 7;
00507         ch2 = st.at(p+2) - 48;
00508         if (ch2 > 16)
00509           ch2 -= 7;
00510         st[p] = ch * 16 + ch2;
00511         st.remove( p+1, 2 );
00512       }
00513     }
00514     p++;
00515   }
00516   kDebug() << "Got pre-decoded:" << st;
00517   QString result;
00518   const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
00519   if ( !charsetcodec || forceCS )
00520     charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
00521 
00522   usedCS = charsetcodec->name();
00523   return charsetcodec->toUnicode( st );
00524 }
00525 
00526 QString decodeRFC2231String( const QByteArray &src )
00527 {
00528   QByteArray usedCS;
00529   return decodeRFC2231String( src, usedCS, "utf-8", false );
00530 }
00531 
00532 QByteArray uniqueString()
00533 {
00534   static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
00535   time_t now;
00536   char p[11];
00537   int pos, ran;
00538   unsigned int timeval;
00539 
00540   p[10] = '\0';
00541   now = time( 0 );
00542   ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0));
00543   timeval = (now / ran) + getpid();
00544 
00545   for ( int i=0; i<10; i++ ) {
00546     pos = (int) (61.0*rand() / (RAND_MAX + 1.0));
00547     //kDebug() << pos;
00548     p[i] = chars[pos];
00549   }
00550 
00551   QByteArray ret;
00552   ret.setNum( timeval );
00553   ret += '.';
00554   ret += p;
00555 
00556   return ret;
00557 }
00558 
00559 QByteArray multiPartBoundary()
00560 {
00561   return "nextPart" + uniqueString();
00562 }
00563 
00564 QByteArray unfoldHeader( const QByteArray &header )
00565 {
00566   QByteArray result;
00567   int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
00568   while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) {
00569     foldBegin = foldEnd = foldMid;
00570     // find the first space before the line-break
00571     while ( foldBegin > 0 ) {
00572       if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
00573         break;
00574       }
00575       --foldBegin;
00576     }
00577     // find the first non-space after the line-break
00578     while ( foldEnd <= header.length() - 1 ) {
00579       if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
00580         ++foldEnd;
00581       }
00582       else if ( foldEnd > 0 && header[foldEnd - 1] == '\n' &&
00583                 header[foldEnd] == '=' && foldEnd + 2 < header.length() &&
00584                 ( ( header[foldEnd + 1] == '0' &&
00585                     header[foldEnd + 2] == '9' ) ||
00586                   ( header[foldEnd + 1] == '2' &&
00587                     header[foldEnd + 2] == '0' ) ) ) {
00588         // bug #86302: malformed header continuation starting with =09/=20
00589         foldEnd += 3;
00590       }
00591       else {
00592         break;
00593       }
00594     }
00595 
00596     result += header.mid( pos, foldBegin - pos );
00597     if ( foldEnd < header.length() -1 )
00598       result += ' ';
00599     pos = foldEnd;
00600   }
00601   result += header.mid( pos, header.length() - pos );
00602   return result;
00603 }
00604 
00605 int findHeaderLineEnd( const QByteArray &src, int &dataBegin, bool *folded )
00606 {
00607   int end = dataBegin;
00608   int len = src.length() - 1;
00609 
00610   if ( folded )
00611     *folded = false;
00612 
00613   if ( dataBegin < 0 ) {
00614     // Not found
00615     return -1;
00616   }
00617 
00618   if ( dataBegin > len ) {
00619     // No data available
00620     return len + 1;
00621   }
00622 
00623   // If the first line contains nothing, but the next line starts with a space
00624   // or a tab, that means a stupid mail client has made the first header field line
00625   // entirely empty, and has folded the rest to the next line(s).
00626   if ( src.at(end) == '\n' && end + 1 < len &&
00627        ( src[end+1] == ' ' || src[end+1] == '\t' ) ) {
00628 
00629     // Skip \n and first whitespace
00630     dataBegin += 2;
00631     end += 2;
00632   }
00633 
00634   if ( src.at(end) != '\n' ) {  // check if the header is not empty
00635     while ( true ) {
00636       end = src.indexOf( '\n', end + 1 );
00637       if ( end == -1 || end == len ) {
00638         // end of string
00639         break;
00640       }
00641       else if ( src[end+1] == ' ' || src[end+1] == '\t' ||
00642                 ( src[end+1] == '=' && end+3 <= len &&
00643                   ( ( src[end+2] == '0' && src[end+3] == '9' ) ||
00644                     ( src[end+2] == '2' && src[end+3] == '0' ) ) ) ) {
00645         // next line is header continuation or starts with =09/=20 (bug #86302)
00646         if ( folded )
00647           *folded = true;
00648       } else {
00649         // end of header (no header continuation)
00650         break;
00651       }
00652     }
00653   }
00654 
00655   if ( end < 0 ) {
00656     end = len + 1; //take the rest of the string
00657   }
00658   return end;
00659 }
00660 
00661 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded )
00662 {
00663   QByteArray n = name;
00664   n.append( ':' );
00665   int begin = -1;
00666 
00667   if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
00668     begin = 0;
00669   } else {
00670     n.prepend('\n');
00671     const char *p = strcasestr( src.constData(), n.constData() );
00672     if ( !p ) {
00673       begin = -1;
00674     } else {
00675       begin = p - src.constData();
00676       ++begin;
00677     }
00678   }
00679 
00680   if ( begin > -1) {     //there is a header with the given name
00681     dataBegin = begin + name.length() + 1; //skip the name
00682     // skip the usual space after the colon
00683     if ( src.at( dataBegin ) == ' ' ) {
00684       ++dataBegin;
00685     }
00686     end = findHeaderLineEnd( src, dataBegin, folded );
00687     return begin;
00688 
00689   } else {
00690     dataBegin = -1;
00691     return -1; //header not found
00692   }
00693 }
00694 
00695 QByteArray extractHeader( const QByteArray &src, const QByteArray &name )
00696 {
00697   int begin, end;
00698   bool folded;
00699   indexOfHeader( src, name, end, begin, &folded );
00700 
00701   if ( begin >= 0 ) {
00702     if ( !folded ) {
00703       return src.mid( begin, end - begin );
00704     } else {
00705       QByteArray hdrValue = src.mid( begin, end - begin );
00706       return unfoldHeader( hdrValue );
00707     }
00708   } else {
00709     return QByteArray(); //header not found
00710   }
00711 }
00712 
00713 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name )
00714 {
00715   int begin, end;
00716   bool folded;
00717   QList<QByteArray> result;
00718   QByteArray copySrc( src );
00719 
00720   indexOfHeader( copySrc, name, end, begin, &folded );
00721   while ( begin >= 0 ) {
00722     if ( !folded ) {
00723       result.append( copySrc.mid( begin, end - begin ) );
00724     } else {
00725       QByteArray hdrValue = copySrc.mid( begin, end - begin );
00726       result.append( unfoldHeader( hdrValue ) );
00727     }
00728 
00729     // get the next one, a tiny bit ugly, but we don't want the previous to be found again...
00730     copySrc = copySrc.mid( end );
00731     indexOfHeader( copySrc, name, end, begin, &folded );
00732   }
00733 
00734   return result;
00735 }
00736 
00737 void removeHeader( QByteArray &header, const QByteArray &name )
00738 {
00739   int begin, end, dummy;
00740   begin = indexOfHeader( header, name, end, dummy );
00741   if ( begin >= 0 ) {
00742     header.remove( begin, end - begin + 1 );
00743   }
00744 }
00745 
00746 QByteArray CRLFtoLF( const QByteArray &s )
00747 {
00748   QByteArray ret = s;
00749   ret.replace( "\r\n", "\n" );
00750   return ret;
00751 }
00752 
00753 QByteArray CRLFtoLF( const char *s )
00754 {
00755   QByteArray ret = s;
00756   return CRLFtoLF( ret );
00757 }
00758 
00759 QByteArray LFtoCRLF( const QByteArray &s )
00760 {
00761   QByteArray ret = s;
00762   ret.replace( '\n', "\r\n" );
00763   return ret;
00764 }
00765 
00766 QByteArray LFtoCRLF( const char *s )
00767 {
00768   QByteArray ret = s;
00769   return LFtoCRLF( ret );
00770 }
00771 
00772 namespace {
00773 template < typename StringType, typename CharType > void removeQuotesGeneric( StringType & str )
00774 {
00775   bool inQuote = false;
00776   for ( int i = 0; i < str.length(); ++i ) {
00777     if ( str[i] == CharType( '"' ) ) {
00778       str.remove( i, 1 );
00779       i--;
00780       inQuote = !inQuote;
00781     } else {
00782       if ( inQuote && ( str[i] == CharType( '\\' ) ) ) {
00783         str.remove( i, 1 );
00784       }
00785     }
00786   }
00787 }
00788 }
00789 
00790 void removeQuots( QByteArray &str )
00791 {
00792   removeQuotesGeneric<QByteArray,char>( str );
00793 }
00794 
00795 void removeQuots( QString &str )
00796 {
00797   removeQuotesGeneric<QString,QLatin1Char>( str );
00798 }
00799 
00800 template<class StringType,class CharType,class CharConverterType,class StringConverterType,class ToString>
00801 void addQuotes_impl( StringType &str, bool forceQuotes )
00802 {
00803   bool needsQuotes=false;
00804   for ( int i=0; i < str.length(); i++ ) {
00805     const CharType cur = str.at( i );
00806     if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String( "\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
00807       needsQuotes = true;
00808     }
00809     if ( cur == CharConverterType( '\\' ) || cur == CharConverterType( '\"' ) ) {
00810       str.insert( i, CharConverterType( '\\' ) );
00811       i++;
00812     }
00813   }
00814 
00815   if ( needsQuotes || forceQuotes ) {
00816     str.insert( 0, CharConverterType( '\"' ) );
00817     str.append( StringConverterType( "\"" ) );
00818   }
00819 }
00820 
00821 void addQuotes( QByteArray &str, bool forceQuotes )
00822 {
00823   addQuotes_impl<QByteArray,char,char,char*,QLatin1String>( str, forceQuotes );
00824 }
00825 
00826 void addQuotes( QString &str, bool forceQuotes )
00827 {
00828   addQuotes_impl<QString,QChar,QLatin1Char,QLatin1String,QString>( str, forceQuotes );
00829 }
00830 
00831 KMIME_EXPORT QString balanceBidiState( const QString &input )
00832 {
00833   const int LRO = 0x202D;
00834   const int RLO = 0x202E;
00835   const int LRE = 0x202A;
00836   const int RLE = 0x202B;
00837   const int PDF = 0x202C;
00838 
00839   QString result = input;
00840 
00841   int openDirChangers = 0;
00842   int numPDFsRemoved = 0;
00843   for ( int i = 0; i < input.length(); i++ ) {
00844     const ushort &code = input.at( i ).unicode();
00845     if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
00846       openDirChangers++;
00847     }
00848     else if ( code == PDF ) {
00849       if ( openDirChangers > 0 ) {
00850         openDirChangers--;
00851       }
00852       else {
00853         // One PDF too much, remove it
00854         kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input;
00855         result.remove( i - numPDFsRemoved, 1 );
00856         numPDFsRemoved++;
00857       }
00858     }
00859   }
00860 
00861   if ( openDirChangers > 0 ) {
00862     kWarning() << "Possible Unicode spoofing detected in" << input;
00863 
00864     // At PDF chars to the end until the correct state is restored.
00865     // As a special exception, when encountering quoted strings, place the PDF before
00866     // the last quote.
00867     for ( int i = openDirChangers; i > 0; i-- ) {
00868       if ( result.endsWith( QLatin1Char( '"' ) ) )
00869         result.insert( result.length() - 1, QChar( PDF ) );
00870       else
00871         result += QChar( PDF );
00872     }
00873   }
00874 
00875   return result;
00876 }
00877 
00878 QString removeBidiControlChars( const QString &input )
00879 {
00880   const int LRO = 0x202D;
00881   const int RLO = 0x202E;
00882   const int LRE = 0x202A;
00883   const int RLE = 0x202B;
00884   QString result = input;
00885   result.remove( LRO );
00886   result.remove( RLO );
00887   result.remove( LRE );
00888   result.remove( RLE );
00889   return result;
00890 }
00891 
00892 static bool isCryptoPart( Content* content )
00893 {
00894   if( !content->contentType( false ) )
00895     return false;
00896 
00897   if( content->contentType()->subType().toLower() == "octet-stream" &&
00898       !content->contentDisposition( false ) )
00899     return false;
00900 
00901   const Headers::ContentType *contentType = content->contentType();
00902   const QByteArray lowerSubType = contentType->subType().toLower();
00903   return ( contentType->mediaType().toLower() == "application" &&
00904          ( lowerSubType == "pgp-encrypted" ||
00905            lowerSubType == "pgp-signature" ||
00906            lowerSubType == "pkcs7-mime" ||
00907            lowerSubType == "pkcs7-signature" ||
00908            lowerSubType == "x-pkcs7-signature" ||
00909            ( lowerSubType == "octet-stream" &&
00910              content->contentDisposition()->filename().toLower() == QLatin1String( "msg.asc" ) ) ) );
00911 }
00912 
00913 bool hasAttachment( Content* content )
00914 {
00915   if( !content )
00916     return false;
00917 
00918   bool emptyFilename = true;
00919   if( content->contentDisposition( false ) && !content->contentDisposition()->filename().isEmpty() )
00920     emptyFilename = false;
00921 
00922   if( emptyFilename && content->contentType( false ) && !content->contentType()->name().isEmpty() )
00923     emptyFilename = false;
00924 
00925   // ignore crypto parts
00926   if( !emptyFilename && !isCryptoPart( content ) )
00927     return true;
00928 
00929   // Ok, content itself is not an attachment. now we deal with multiparts
00930   if( content->contentType()->isMultipart() ) {
00931     Q_FOREACH( Content* child, content->contents() ) {
00932       if( hasAttachment( child ) )
00933         return true;
00934     }
00935   }
00936   
00937   return false;
00938 }
00939 
00940 bool isSigned( Message *message )
00941 {
00942   if ( !message )
00943     return false;
00944 
00945   const KMime::Headers::ContentType* const contentType = message->contentType();
00946   if ( contentType->isSubtype( "signed" ) ||
00947        contentType->isSubtype( "pgp-signature" ) ||
00948        contentType->isSubtype( "pkcs7-signature" ) ||
00949        contentType->isSubtype( "x-pkcs7-signature" ) ||
00950        message->mainBodyPart( "multipart/signed" ) ||
00951        message->mainBodyPart( "application/pgp-signature" ) ||
00952        message->mainBodyPart( "application/pkcs7-signature" ) ||
00953        message->mainBodyPart( "application/x-pkcs7-signature" ) ) {
00954     return true;
00955   }
00956 
00957   return false;
00958 }
00959 
00960 bool isEncrypted( Message *message )
00961 {
00962   if ( !message )
00963     return false;
00964 
00965   const KMime::Headers::ContentType* const contentType = message->contentType();
00966   if ( contentType->isSubtype( "encrypted" ) ||
00967        contentType->isSubtype( "pgp-encrypted" ) ||
00968        contentType->isSubtype( "pkcs7-mime" ) || 
00969        message->mainBodyPart( "multipart/encrypted" ) ||
00970        message->mainBodyPart( "application/pgp-encrypted" ) ||
00971        message->mainBodyPart( "application/pkcs7-mime" ) ) {
00972     return true;
00973   }
00974 
00975   return false;
00976 }
00977 
00978 bool isInvitation( Content *content )
00979 {
00980   if ( !content )
00981     return false;
00982 
00983   const KMime::Headers::ContentType* const contentType = content->contentType( false );
00984 
00985   if ( contentType && contentType->isMediatype( "text" ) && contentType->isSubtype( "calendar" ) )
00986     return true;
00987 
00988   return false;
00989 }
00990 
00991 } // namespace KMime

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  •   contact
  •   kmime
  • kabc
  • kblog
  • kcal
  • kcalcore
  • kcalutils
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmbox
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.7.5
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal