• Skip to content
  • Skip to link menu
KDE 4.4 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KMIME Library

kmime_util.cpp

00001 /*
00002   kmime_util.cpp
00003 
00004   KMime, the KDE Internet mail/usenet news message library.
00005   Copyright (c) 2001 the KMime authors.
00006   See file AUTHORS for details
00007 
00008   This library is free software; you can redistribute it and/or
00009   modify it under the terms of the GNU Library General Public
00010   License as published by the Free Software Foundation; either
00011   version 2 of the License, or (at your option) any later version.
00012 
00013   This library is distributed in the hope that it will be useful,
00014   but WITHOUT ANY WARRANTY; without even the implied warranty of
00015   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016   Library General Public License for more details.
00017 
00018   You should have received a copy of the GNU Library General Public License
00019   along with this library; see the file COPYING.LIB.  If not, write to
00020   the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00021   Boston, MA 02110-1301, USA.
00022 */
00023 
00024 #include "kmime_util.h"
00025 #include "kmime_util_p.h"
00026 #include "kmime_header_parsing.h"
00027 #include "kmime_charfreq.h"
00028 
00029 #include <config-kmime.h>
00030 #include <kdefakes.h> // for strcasestr
00031 #include <kglobal.h>
00032 #include <klocale.h>
00033 #include <kcharsets.h>
00034 #include <kcodecs.h>
00035 #include <kdebug.h>
00036 
00037 #include <QtCore/QList>
00038 #include <QtCore/QString>
00039 #include <QtCore/QTextCodec>
00040 
00041 #include <ctype.h>
00042 #include <time.h>
00043 #include <stdlib.h>
00044 #include <unistd.h>
00045 
00046 using namespace KMime;
00047 
00048 namespace KMime {
00049 
00050 QList<QByteArray> c_harsetCache;
00051 QList<QByteArray> l_anguageCache;
00052 
00053 QByteArray cachedCharset( const QByteArray &name )
00054 {
00055   foreach ( const QByteArray& charset, c_harsetCache ) {
00056     if ( qstricmp( name.data(), charset.data() ) == 0 ) {
00057       return charset;
00058     }
00059   }
00060 
00061   c_harsetCache.append( name.toUpper() );
00062   //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00063   return c_harsetCache.last();
00064 }
00065 
00066 QByteArray cachedLanguage( const QByteArray &name )
00067 {
00068   foreach ( const QByteArray& language, l_anguageCache ) {
00069     if ( qstricmp( name.data(), language.data() ) == 0 ) {
00070       return language;
00071     }
00072   }
00073 
00074   l_anguageCache.append( name.toUpper() );
00075   //kDebug() << "KNMimeBase::cachedCharset() number of cs" << c_harsetCache.count();
00076   return l_anguageCache.last();
00077 }
00078 
00079 bool isUsAscii( const QString &s )
00080 {
00081   uint sLength = s.length();
00082   for ( uint i=0; i<sLength; i++ ) {
00083     if ( s.at( i ).toLatin1() <= 0 ) { // c==0: non-latin1, c<0: non-us-ascii
00084       return false;
00085     }
00086   }
00087   return true;
00088 }
00089 
00090 QString nameForEncoding( Headers::contentEncoding enc )
00091 {
00092   switch( enc ) {
00093     case Headers::CE7Bit: return QString::fromLatin1( "7bit" );
00094     case Headers::CE8Bit: return QString::fromLatin1( "8bit" );
00095     case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" );
00096     case Headers::CEbase64: return QString::fromLatin1( "base64" );
00097     case Headers::CEuuenc: return QString::fromLatin1( "uuencode" );
00098     case Headers::CEbinary: return QString::fromLatin1( "binary" );
00099     default: return QString::fromLatin1( "unknown" );
00100   }
00101 }
00102 
00103 QList<Headers::contentEncoding> encodingsForData( const QByteArray &data )
00104 {
00105   QList<Headers::contentEncoding> allowed;
00106   CharFreq cf( data );
00107 
00108   switch ( cf.type() ) {
00109     case CharFreq::SevenBitText:
00110       allowed << Headers::CE7Bit;
00111     case CharFreq::EightBitText:
00112       allowed << Headers::CE8Bit;
00113     case CharFreq::SevenBitData:
00114       if ( cf.printableRatio() > 5.0/6.0 ) {
00115         // let n the length of data and p the number of printable chars.
00116         // Then base64 \approx 4n/3; qp \approx p + 3(n-p)
00117         // => qp < base64 iff p > 5n/6.
00118         allowed << Headers::CEquPr;
00119         allowed << Headers::CEbase64;
00120       } else {
00121         allowed << Headers::CEbase64;
00122         allowed << Headers::CEquPr;
00123       }
00124       break;
00125     case CharFreq::EightBitData:
00126       allowed << Headers::CEbase64;
00127       break;
00128     case CharFreq::None:
00129     default:
00130       Q_ASSERT( false );
00131   }
00132 
00133   return allowed;
00134 }
00135 
00136 // "(),.:;<>@[\]
00137 const uchar specialsMap[16] = {
00138   0x00, 0x00, 0x00, 0x00, // CTLs
00139   0x20, 0xCA, 0x00, 0x3A, // SPACE ... '?'
00140   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00141   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00142 };
00143 
00144 // "(),:;<>@[\]/=?
00145 const uchar tSpecialsMap[16] = {
00146   0x00, 0x00, 0x00, 0x00, // CTLs
00147   0x20, 0xC9, 0x00, 0x3F, // SPACE ... '?'
00148   0x80, 0x00, 0x00, 0x1C, // '@' ... '_'
00149   0x00, 0x00, 0x00, 0x00  // '`' ... DEL
00150 };
00151 
00152 // all except specials, CTLs, SPACE.
00153 const uchar aTextMap[16] = {
00154   0x00, 0x00, 0x00, 0x00,
00155   0x5F, 0x35, 0xFF, 0xC5,
00156   0x7F, 0xFF, 0xFF, 0xE3,
00157   0xFF, 0xFF, 0xFF, 0xFE
00158 };
00159 
00160 // all except tspecials, CTLs, SPACE.
00161 const uchar tTextMap[16] = {
00162   0x00, 0x00, 0x00, 0x00,
00163   0x5F, 0x36, 0xFF, 0xC0,
00164   0x7F, 0xFF, 0xFF, 0xE3,
00165   0xFF, 0xFF, 0xFF, 0xFE
00166 };
00167 
00168 // none except a-zA-Z0-9!*+-/
00169 const uchar eTextMap[16] = {
00170   0x00, 0x00, 0x00, 0x00,
00171   0x40, 0x35, 0xFF, 0xC0,
00172   0x7F, 0xFF, 0xFF, 0xE0,
00173   0x7F, 0xFF, 0xFF, 0xE0
00174 };
00175 
00176 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS,
00177                              const QByteArray &defaultCS, bool forceCS )
00178 {
00179   QByteArray result;
00180   QByteArray spaceBuffer;
00181   const char *scursor = src.constData();
00182   const char *send = scursor + src.length();
00183   bool onlySpacesSinceLastWord = false;
00184 
00185   while ( scursor != send ) {
00186      // space
00187     if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
00188       spaceBuffer += *scursor++;
00189       continue;
00190     }
00191 
00192     // possible start of an encoded word
00193     if ( *scursor == '=' ) {
00194       QByteArray language;
00195       QString decoded;
00196       ++scursor;
00197       const char *start = scursor;
00198       if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
00199         result += decoded.toUtf8();
00200         onlySpacesSinceLastWord = true;
00201         spaceBuffer.clear();
00202       } else {
00203         if ( onlySpacesSinceLastWord ) {
00204           result += spaceBuffer;
00205           onlySpacesSinceLastWord = false;
00206         }
00207         result += '=';
00208         scursor = start; // reset cursor after parsing failure
00209       }
00210       continue;
00211     } else {
00212       // unencoded data
00213       if ( onlySpacesSinceLastWord ) {
00214         result += spaceBuffer;
00215         onlySpacesSinceLastWord = false;
00216       }
00217       result += *scursor;
00218       ++scursor;
00219     }
00220   }
00221 
00222   return QString::fromUtf8(result);
00223 }
00224 
00225 QString decodeRFC2047String( const QByteArray &src )
00226 {
00227   QByteArray usedCS;
00228   return decodeRFC2047String( src, usedCS, "utf-8", false );
00229 }
00230 
00231 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset,
00232                                 bool addressHeader, bool allow8BitHeaders )
00233 {
00234   QByteArray encoded8Bit, result;
00235   int start=0, end=0;
00236   bool nonAscii=false, ok=true, useQEncoding=false;
00237 
00238   const QTextCodec *codec = KGlobal::charsets()->codecForName( charset, ok );
00239 
00240   QByteArray usedCS;
00241   if ( !ok ) {
00242     //no codec available => try local8Bit and hope the best ;-)
00243     usedCS = KGlobal::locale()->encoding();
00244     codec = KGlobal::charsets()->codecForName( usedCS, ok );
00245   }
00246   else {
00247     Q_ASSERT( codec );
00248     if ( charset.isEmpty() )
00249       usedCS = codec->name();
00250     else
00251       usedCS = charset;
00252   }
00253 
00254   if ( usedCS.contains( "8859-" ) ) { // use "B"-Encoding for non iso-8859-x charsets
00255     useQEncoding = true;
00256   }
00257 
00258   encoded8Bit = codec->fromUnicode( src );
00259 
00260   if ( allow8BitHeaders ) {
00261     return encoded8Bit;
00262   }
00263 
00264   uint encoded8BitLength = encoded8Bit.length();
00265   for ( unsigned int i=0; i<encoded8BitLength; i++ ) {
00266     if ( encoded8Bit[i] == ' ' ) { // encoding starts at word boundaries
00267       start = i + 1;
00268     }
00269 
00270     // encode escape character, for japanese encodings...
00271     if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) ||
00272          ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
00273       end = start;   // non us-ascii char found, now we determine where to stop encoding
00274       nonAscii = true;
00275       break;
00276     }
00277   }
00278 
00279   if ( nonAscii ) {
00280     while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00281       // we encode complete words
00282       end++;
00283     }
00284 
00285     for ( int x=end; x<encoded8Bit.length(); x++ ) {
00286       if ( ( (signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] == '\033' ) ||
00287            ( addressHeader && ( strchr("\"()<>@,.;:\\[]=",encoded8Bit[x]) != 0 ) ) ) {
00288         end = encoded8Bit.length();     // we found another non-ascii word
00289 
00290         while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00291           // we encode complete words
00292           end++;
00293         }
00294       }
00295     }
00296 
00297     result = encoded8Bit.left( start ) + "=?" + usedCS;
00298 
00299     if ( useQEncoding ) {
00300       result += "?Q?";
00301 
00302       char c, hexcode;// "Q"-encoding implementation described in RFC 2047
00303       for ( int i=start; i<end; i++ ) {
00304         c = encoded8Bit[i];
00305         if ( c == ' ' ) { // make the result readable with not MIME-capable readers
00306           result += '_';
00307         } else {
00308           if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) || // paranoid mode, encode *all* special chars to avoid problems
00309               ( ( c >= 'A' ) && ( c <= 'Z' ) ) ||  // with "From" & "To" headers
00310               ( ( c >= '0' ) && ( c <= '9' ) ) ) {
00311             result += c;
00312           } else {
00313             result += '=';                 // "stolen" from KMail ;-)
00314             hexcode = ((c & 0xF0) >> 4) + 48;
00315             if ( hexcode >= 58 ) {
00316               hexcode += 7;
00317             }
00318             result += hexcode;
00319             hexcode = (c & 0x0F) + 48;
00320             if ( hexcode >= 58 ) {
00321               hexcode += 7;
00322             }
00323             result += hexcode;
00324           }
00325         }
00326       }
00327     } else {
00328       result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64();
00329     }
00330 
00331     result +="?=";
00332     result += encoded8Bit.right( encoded8Bit.length() - end );
00333   } else {
00334     result = encoded8Bit;
00335   }
00336 
00337   return result;
00338 }
00339 
00340 QByteArray uniqueString()
00341 {
00342   static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
00343   time_t now;
00344   char p[11];
00345   int pos, ran;
00346   unsigned int timeval;
00347 
00348   p[10] = '\0';
00349   now = time( 0 );
00350   ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0));
00351   timeval = (now / ran) + getpid();
00352 
00353   for ( int i=0; i<10; i++ ) {
00354     pos = (int) (61.0*rand() / (RAND_MAX + 1.0));
00355     //kDebug() << pos;
00356     p[i] = chars[pos];
00357   }
00358 
00359   QByteArray ret;
00360   ret.setNum( timeval );
00361   ret += '.';
00362   ret += p;
00363 
00364   return ret;
00365 }
00366 
00367 QByteArray multiPartBoundary()
00368 {
00369   return "nextPart" + uniqueString();
00370 }
00371 
00372 QByteArray unfoldHeader( const QByteArray &header )
00373 {
00374   QByteArray result;
00375   int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
00376   while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) {
00377     foldBegin = foldEnd = foldMid;
00378     // find the first space before the line-break
00379     while ( foldBegin > 0 ) {
00380       if ( !QChar( header[foldBegin - 1] ).isSpace() ) {
00381         break;
00382       }
00383       --foldBegin;
00384     }
00385     // find the first non-space after the line-break
00386     while ( foldEnd <= header.length() - 1 ) {
00387       if ( !QChar( header[foldEnd] ).isSpace() ) {
00388         break;
00389       }
00390       ++foldEnd;
00391     }
00392     result += header.mid( pos, foldBegin - pos );
00393     if ( foldEnd < header.length() -1 )
00394       result += ' ';
00395     pos = foldEnd;
00396   }
00397   result += header.mid( pos, header.length() - pos );
00398   return result;
00399 }
00400 
00401 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded )
00402 {
00403   QByteArray n = name;
00404   n.append( ':' );
00405   int begin = -1;
00406 
00407   if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
00408     begin = 0;
00409   } else {
00410     n.prepend('\n');
00411     const char *p = strcasestr( src.constData(), n.constData() );
00412     if ( !p ) {
00413       begin = -1;
00414     } else {
00415       begin = p - src.constData();
00416       ++begin;
00417     }
00418   }
00419 
00420   if ( begin > -1) {     //there is a header with the given name
00421     dataBegin = begin + name.length() + 1; //skip the name
00422     // skip the usual space after the colon
00423     if ( src.at( dataBegin ) == ' ' ) {
00424       ++dataBegin;
00425     }
00426     end = dataBegin;
00427     int len = src.length() - 1;
00428     if ( folded )
00429       *folded = false;
00430 
00431     if ( src.at(end) != '\n' ) {  // check if the header is not empty
00432       while ( true ) {
00433         end = src.indexOf( '\n', end + 1 );
00434         if ( end == -1 || end == len ||
00435              ( src[end+1] != ' ' && src[end+1] != '\t' ) ) {
00436           //break if we reach the end of the string, honor folded lines
00437           break;
00438         } else {
00439           if ( folded )
00440             *folded = true;
00441         }
00442       }
00443     }
00444 
00445     if ( end < 0 ) {
00446       end = len + 1; //take the rest of the string
00447     }
00448     return begin;
00449 
00450   } else {
00451     dataBegin = -1;
00452     return -1; //header not found
00453   }
00454 }
00455 
00456 QByteArray extractHeader( const QByteArray &src, const QByteArray &name )
00457 {
00458   int begin, end;
00459   bool folded;
00460   indexOfHeader( src, name, end, begin, &folded );
00461 
00462   if ( begin >= 0 ) {
00463     if ( !folded ) {
00464       return src.mid( begin, end - begin );
00465     } else {
00466       QByteArray hdrValue = src.mid( begin, end - begin );
00467       return unfoldHeader( hdrValue );
00468     }
00469   } else {
00470     return QByteArray(); //header not found
00471   }
00472 }
00473 
00474 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name )
00475 {
00476   int begin, end;
00477   bool folded;
00478   QList<QByteArray> result;
00479   QByteArray copySrc( src );
00480 
00481   indexOfHeader( copySrc, name, end, begin, &folded );
00482   while ( begin >= 0 ) {
00483     if ( !folded ) {
00484       result.append( copySrc.mid( begin, end - begin ) );
00485     } else {
00486       QByteArray hdrValue = copySrc.mid( begin, end - begin );
00487       result.append( unfoldHeader( hdrValue ) );
00488     }
00489 
00490     // get the next one, a tiny bit ugly, but we don't want the previous to be found again...
00491     copySrc = copySrc.mid( end );
00492     indexOfHeader( copySrc, name, end, begin, &folded );
00493   }
00494 
00495   return result;
00496 }
00497 
00498 void removeHeader( QByteArray &header, const QByteArray &name )
00499 {
00500   int begin, end, dummy;
00501   begin = indexOfHeader( header, name, end, dummy );
00502   if ( begin >= 0 ) {
00503     header.remove( begin, end - begin + 1 );
00504   }
00505 }
00506 
00507 QByteArray CRLFtoLF( const QByteArray &s )
00508 {
00509   QByteArray ret = s;
00510   ret.replace( "\r\n", "\n" );
00511   return ret;
00512 }
00513 
00514 QByteArray LFtoCRLF( const QByteArray &s )
00515 {
00516   QByteArray ret = s;
00517   ret.replace( '\n', "\r\n" );
00518   return ret;
00519 }
00520 
00521 namespace {
00522 template < typename T > void removeQuotesGeneric( T & str )
00523 {
00524   bool inQuote = false;
00525   for ( int i = 0; i < str.length(); ++i ) {
00526     if ( str[i] == '"' ) {
00527       str.remove( i, 1 );
00528       i--;
00529       inQuote = !inQuote;
00530     } else {
00531       if ( inQuote && ( str[i] == '\\' ) ) {
00532         str.remove( i, 1 );
00533       }
00534     }
00535   }
00536 }
00537 }
00538 
00539 void removeQuots( QByteArray &str )
00540 {
00541   removeQuotesGeneric( str );
00542 }
00543 
00544 void removeQuots( QString &str )
00545 {
00546   removeQuotesGeneric( str );
00547 }
00548 
00549 void addQuotes( QByteArray &str, bool forceQuotes )
00550 {
00551   bool needsQuotes=false;
00552   for ( int i=0; i < str.length(); i++ ) {
00553     if ( strchr("()<>@,.;:[]=\\\"", str[i] ) != 0 ) {
00554       needsQuotes = true;
00555     }
00556     if ( str[i] == '\\' || str[i] == '\"' ) {
00557       str.insert( i, '\\' );
00558       i++;
00559     }
00560   }
00561 
00562   if ( needsQuotes || forceQuotes ) {
00563     str.insert( 0, '\"' );
00564     str.append( "\"" );
00565   }
00566 }
00567 
00568 KMIME_EXPORT QString balanceBidiState( const QString &input )
00569 {
00570   const int LRO = 0x202D;
00571   const int RLO = 0x202E;
00572   const int LRE = 0x202A;
00573   const int RLE = 0x202B;
00574   const int PDF = 0x202C;
00575 
00576   QString result = input;
00577 
00578   int openDirChangers = 0;
00579   int numPDFsRemoved = 0;
00580   for ( int i = 0; i < input.length(); i++ ) {
00581     const ushort &code = input.at( i ).unicode();
00582     if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
00583       openDirChangers++;
00584     }
00585     else if ( code == PDF ) {
00586       if ( openDirChangers > 0 ) {
00587         openDirChangers--;
00588       }
00589       else {
00590         // One PDF too much, remove it
00591         kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input;
00592         result.remove( i - numPDFsRemoved, 1 );
00593         numPDFsRemoved++;
00594       }
00595     }
00596   }
00597 
00598   if ( openDirChangers > 0 ) {
00599     kWarning() << "Possible Unicode spoofing detected in" << input;
00600 
00601     // At PDF chars to the end until the correct state is restored.
00602     // As a special exception, when encountering quoted strings, place the PDF before
00603     // the last quote.
00604     for ( int i = openDirChangers; i > 0; i-- ) {
00605       if ( result.endsWith( '"' ) )
00606         result.insert( result.length() - 1, QChar( PDF ) );
00607       else
00608         result += QChar( PDF );
00609     }
00610   }
00611 
00612   return result;
00613 }
00614 
00615 QString removeBidiControlChars( const QString &input )
00616 {
00617   const int LRO = 0x202D;
00618   const int RLO = 0x202E;
00619   const int LRE = 0x202A;
00620   const int RLE = 0x202B;
00621   QString result = input;
00622   result.remove( LRO );
00623   result.remove( RLO );
00624   result.remove( LRE );
00625   result.remove( RLE );
00626   return result;
00627 }
00628 
00629 } // namespace KMime

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • akonadi
  •   contact
  •   kmime
  • kabc
  • kblog
  • kcal
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.6.1
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal