00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "kmime_util.h"
00025 #include "kmime_util_p.h"
00026 #include "kmime_header_parsing.h"
00027 #include "kmime_charfreq.h"
00028
00029 #include <config-kmime.h>
00030 #include <kdefakes.h>
00031 #include <kglobal.h>
00032 #include <klocale.h>
00033 #include <kcharsets.h>
00034 #include <kcodecs.h>
00035 #include <kdebug.h>
00036
00037 #include <QtCore/QList>
00038 #include <QtCore/QString>
00039 #include <QtCore/QTextCodec>
00040
00041 #include <ctype.h>
00042 #include <time.h>
00043 #include <stdlib.h>
00044 #include <unistd.h>
00045
00046 using namespace KMime;
00047
00048 namespace KMime {
00049
00050 QList<QByteArray> c_harsetCache;
00051 QList<QByteArray> l_anguageCache;
00052
00053 QByteArray cachedCharset( const QByteArray &name )
00054 {
00055 foreach ( const QByteArray& charset, c_harsetCache ) {
00056 if ( qstricmp( name.data(), charset.data() ) == 0 ) {
00057 return charset;
00058 }
00059 }
00060
00061 c_harsetCache.append( name.toUpper() );
00062
00063 return c_harsetCache.last();
00064 }
00065
00066 QByteArray cachedLanguage( const QByteArray &name )
00067 {
00068 foreach ( const QByteArray& language, l_anguageCache ) {
00069 if ( qstricmp( name.data(), language.data() ) == 0 ) {
00070 return language;
00071 }
00072 }
00073
00074 l_anguageCache.append( name.toUpper() );
00075
00076 return l_anguageCache.last();
00077 }
00078
00079 bool isUsAscii( const QString &s )
00080 {
00081 uint sLength = s.length();
00082 for ( uint i=0; i<sLength; i++ ) {
00083 if ( s.at( i ).toLatin1() <= 0 ) {
00084 return false;
00085 }
00086 }
00087 return true;
00088 }
00089
00090 QString nameForEncoding( Headers::contentEncoding enc )
00091 {
00092 switch( enc ) {
00093 case Headers::CE7Bit: return QString::fromLatin1( "7bit" );
00094 case Headers::CE8Bit: return QString::fromLatin1( "8bit" );
00095 case Headers::CEquPr: return QString::fromLatin1( "quoted-printable" );
00096 case Headers::CEbase64: return QString::fromLatin1( "base64" );
00097 case Headers::CEuuenc: return QString::fromLatin1( "uuencode" );
00098 case Headers::CEbinary: return QString::fromLatin1( "binary" );
00099 default: return QString::fromLatin1( "unknown" );
00100 }
00101 }
00102
00103 QList<Headers::contentEncoding> encodingsForData( const QByteArray &data )
00104 {
00105 QList<Headers::contentEncoding> allowed;
00106 CharFreq cf( data );
00107
00108 switch ( cf.type() ) {
00109 case CharFreq::SevenBitText:
00110 allowed << Headers::CE7Bit;
00111 case CharFreq::EightBitText:
00112 allowed << Headers::CE8Bit;
00113 case CharFreq::SevenBitData:
00114 if ( cf.printableRatio() > 5.0/6.0 ) {
00115
00116
00117
00118 allowed << Headers::CEquPr;
00119 allowed << Headers::CEbase64;
00120 } else {
00121 allowed << Headers::CEbase64;
00122 allowed << Headers::CEquPr;
00123 }
00124 break;
00125 case CharFreq::EightBitData:
00126 allowed << Headers::CEbase64;
00127 break;
00128 case CharFreq::None:
00129 default:
00130 Q_ASSERT( false );
00131 }
00132
00133 return allowed;
00134 }
00135
00136
00137 const uchar specialsMap[16] = {
00138 0x00, 0x00, 0x00, 0x00,
00139 0x20, 0xCA, 0x00, 0x3A,
00140 0x80, 0x00, 0x00, 0x1C,
00141 0x00, 0x00, 0x00, 0x00
00142 };
00143
00144
00145 const uchar tSpecialsMap[16] = {
00146 0x00, 0x00, 0x00, 0x00,
00147 0x20, 0xC9, 0x00, 0x3F,
00148 0x80, 0x00, 0x00, 0x1C,
00149 0x00, 0x00, 0x00, 0x00
00150 };
00151
00152
00153 const uchar aTextMap[16] = {
00154 0x00, 0x00, 0x00, 0x00,
00155 0x5F, 0x35, 0xFF, 0xC5,
00156 0x7F, 0xFF, 0xFF, 0xE3,
00157 0xFF, 0xFF, 0xFF, 0xFE
00158 };
00159
00160
00161 const uchar tTextMap[16] = {
00162 0x00, 0x00, 0x00, 0x00,
00163 0x5F, 0x36, 0xFF, 0xC0,
00164 0x7F, 0xFF, 0xFF, 0xE3,
00165 0xFF, 0xFF, 0xFF, 0xFE
00166 };
00167
00168
00169 const uchar eTextMap[16] = {
00170 0x00, 0x00, 0x00, 0x00,
00171 0x40, 0x35, 0xFF, 0xC0,
00172 0x7F, 0xFF, 0xFF, 0xE0,
00173 0x7F, 0xFF, 0xFF, 0xE0
00174 };
00175
00176 QString decodeRFC2047String( const QByteArray &src, QByteArray &usedCS,
00177 const QByteArray &defaultCS, bool forceCS )
00178 {
00179 QByteArray result;
00180 QByteArray spaceBuffer;
00181 const char *scursor = src.constData();
00182 const char *send = scursor + src.length();
00183 bool onlySpacesSinceLastWord = false;
00184
00185 while ( scursor != send ) {
00186
00187 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
00188 spaceBuffer += *scursor++;
00189 continue;
00190 }
00191
00192
00193 if ( *scursor == '=' ) {
00194 QByteArray language;
00195 QString decoded;
00196 ++scursor;
00197 const char *start = scursor;
00198 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
00199 result += decoded.toUtf8();
00200 onlySpacesSinceLastWord = true;
00201 spaceBuffer.clear();
00202 } else {
00203 if ( onlySpacesSinceLastWord ) {
00204 result += spaceBuffer;
00205 onlySpacesSinceLastWord = false;
00206 }
00207 result += '=';
00208 scursor = start;
00209 }
00210 continue;
00211 } else {
00212
00213 if ( onlySpacesSinceLastWord ) {
00214 result += spaceBuffer;
00215 onlySpacesSinceLastWord = false;
00216 }
00217 result += *scursor;
00218 ++scursor;
00219 }
00220 }
00221
00222 return QString::fromUtf8(result);
00223 }
00224
00225 QString decodeRFC2047String( const QByteArray &src )
00226 {
00227 QByteArray usedCS;
00228 return decodeRFC2047String( src, usedCS, "utf-8", false );
00229 }
00230
00231 QByteArray encodeRFC2047String( const QString &src, const QByteArray &charset,
00232 bool addressHeader, bool allow8BitHeaders )
00233 {
00234 QByteArray encoded8Bit, result;
00235 int start=0, end=0;
00236 bool nonAscii=false, ok=true, useQEncoding=false;
00237
00238 const QTextCodec *codec = KGlobal::charsets()->codecForName( charset, ok );
00239
00240 QByteArray usedCS;
00241 if ( !ok ) {
00242
00243 usedCS = KGlobal::locale()->encoding();
00244 codec = KGlobal::charsets()->codecForName( usedCS, ok );
00245 }
00246 else {
00247 Q_ASSERT( codec );
00248 if ( charset.isEmpty() )
00249 usedCS = codec->name();
00250 else
00251 usedCS = charset;
00252 }
00253
00254 if ( usedCS.contains( "8859-" ) ) {
00255 useQEncoding = true;
00256 }
00257
00258 encoded8Bit = codec->fromUnicode( src );
00259
00260 if ( allow8BitHeaders ) {
00261 return encoded8Bit;
00262 }
00263
00264 uint encoded8BitLength = encoded8Bit.length();
00265 for ( unsigned int i=0; i<encoded8BitLength; i++ ) {
00266 if ( encoded8Bit[i] == ' ' ) {
00267 start = i + 1;
00268 }
00269
00270
00271 if ( ( (signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] == '\033' ) ||
00272 ( addressHeader && ( strchr( "\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
00273 end = start;
00274 nonAscii = true;
00275 break;
00276 }
00277 }
00278
00279 if ( nonAscii ) {
00280 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00281
00282 end++;
00283 }
00284
00285 for ( int x=end; x<encoded8Bit.length(); x++ ) {
00286 if ( ( (signed char)encoded8Bit[x]<0) || ( encoded8Bit[x] == '\033' ) ||
00287 ( addressHeader && ( strchr("\"()<>@,.;:\\[]=",encoded8Bit[x]) != 0 ) ) ) {
00288 end = encoded8Bit.length();
00289
00290 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] != ' ' ) ) {
00291
00292 end++;
00293 }
00294 }
00295 }
00296
00297 result = encoded8Bit.left( start ) + "=?" + usedCS;
00298
00299 if ( useQEncoding ) {
00300 result += "?Q?";
00301
00302 char c, hexcode;
00303 for ( int i=start; i<end; i++ ) {
00304 c = encoded8Bit[i];
00305 if ( c == ' ' ) {
00306 result += '_';
00307 } else {
00308 if ( ( ( c >= 'a' ) && ( c <= 'z' ) ) ||
00309 ( ( c >= 'A' ) && ( c <= 'Z' ) ) ||
00310 ( ( c >= '0' ) && ( c <= '9' ) ) ) {
00311 result += c;
00312 } else {
00313 result += '=';
00314 hexcode = ((c & 0xF0) >> 4) + 48;
00315 if ( hexcode >= 58 ) {
00316 hexcode += 7;
00317 }
00318 result += hexcode;
00319 hexcode = (c & 0x0F) + 48;
00320 if ( hexcode >= 58 ) {
00321 hexcode += 7;
00322 }
00323 result += hexcode;
00324 }
00325 }
00326 }
00327 } else {
00328 result += "?B?" + encoded8Bit.mid( start, end - start ).toBase64();
00329 }
00330
00331 result +="?=";
00332 result += encoded8Bit.right( encoded8Bit.length() - end );
00333 } else {
00334 result = encoded8Bit;
00335 }
00336
00337 return result;
00338 }
00339
00340 QByteArray uniqueString()
00341 {
00342 static char chars[] = "0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
00343 time_t now;
00344 char p[11];
00345 int pos, ran;
00346 unsigned int timeval;
00347
00348 p[10] = '\0';
00349 now = time( 0 );
00350 ran = 1 + (int)(1000.0*rand() / (RAND_MAX + 1.0));
00351 timeval = (now / ran) + getpid();
00352
00353 for ( int i=0; i<10; i++ ) {
00354 pos = (int) (61.0*rand() / (RAND_MAX + 1.0));
00355
00356 p[i] = chars[pos];
00357 }
00358
00359 QByteArray ret;
00360 ret.setNum( timeval );
00361 ret += '.';
00362 ret += p;
00363
00364 return ret;
00365 }
00366
00367 QByteArray multiPartBoundary()
00368 {
00369 return "nextPart" + uniqueString();
00370 }
00371
00372 QByteArray unfoldHeader( const QByteArray &header )
00373 {
00374 QByteArray result;
00375 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
00376 while ( ( foldMid = header.indexOf( '\n', pos ) ) >= 0 ) {
00377 foldBegin = foldEnd = foldMid;
00378
00379 while ( foldBegin > 0 ) {
00380 if ( !QChar( header[foldBegin - 1] ).isSpace() ) {
00381 break;
00382 }
00383 --foldBegin;
00384 }
00385
00386 while ( foldEnd <= header.length() - 1 ) {
00387 if ( !QChar( header[foldEnd] ).isSpace() ) {
00388 break;
00389 }
00390 ++foldEnd;
00391 }
00392 result += header.mid( pos, foldBegin - pos );
00393 if ( foldEnd < header.length() -1 )
00394 result += ' ';
00395 pos = foldEnd;
00396 }
00397 result += header.mid( pos, header.length() - pos );
00398 return result;
00399 }
00400
00401 int indexOfHeader( const QByteArray &src, const QByteArray &name, int &end, int &dataBegin, bool *folded )
00402 {
00403 QByteArray n = name;
00404 n.append( ':' );
00405 int begin = -1;
00406
00407 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
00408 begin = 0;
00409 } else {
00410 n.prepend('\n');
00411 const char *p = strcasestr( src.constData(), n.constData() );
00412 if ( !p ) {
00413 begin = -1;
00414 } else {
00415 begin = p - src.constData();
00416 ++begin;
00417 }
00418 }
00419
00420 if ( begin > -1) {
00421 dataBegin = begin + name.length() + 1;
00422
00423 if ( src.at( dataBegin ) == ' ' ) {
00424 ++dataBegin;
00425 }
00426 end = dataBegin;
00427 int len = src.length() - 1;
00428 if ( folded )
00429 *folded = false;
00430
00431 if ( src.at(end) != '\n' ) {
00432 while ( true ) {
00433 end = src.indexOf( '\n', end + 1 );
00434 if ( end == -1 || end == len ||
00435 ( src[end+1] != ' ' && src[end+1] != '\t' ) ) {
00436
00437 break;
00438 } else {
00439 if ( folded )
00440 *folded = true;
00441 }
00442 }
00443 }
00444
00445 if ( end < 0 ) {
00446 end = len + 1;
00447 }
00448 return begin;
00449
00450 } else {
00451 dataBegin = -1;
00452 return -1;
00453 }
00454 }
00455
00456 QByteArray extractHeader( const QByteArray &src, const QByteArray &name )
00457 {
00458 int begin, end;
00459 bool folded;
00460 indexOfHeader( src, name, end, begin, &folded );
00461
00462 if ( begin >= 0 ) {
00463 if ( !folded ) {
00464 return src.mid( begin, end - begin );
00465 } else {
00466 QByteArray hdrValue = src.mid( begin, end - begin );
00467 return unfoldHeader( hdrValue );
00468 }
00469 } else {
00470 return QByteArray();
00471 }
00472 }
00473
00474 QList<QByteArray> extractHeaders( const QByteArray &src, const QByteArray &name )
00475 {
00476 int begin, end;
00477 bool folded;
00478 QList<QByteArray> result;
00479 QByteArray copySrc( src );
00480
00481 indexOfHeader( copySrc, name, end, begin, &folded );
00482 while ( begin >= 0 ) {
00483 if ( !folded ) {
00484 result.append( copySrc.mid( begin, end - begin ) );
00485 } else {
00486 QByteArray hdrValue = copySrc.mid( begin, end - begin );
00487 result.append( unfoldHeader( hdrValue ) );
00488 }
00489
00490
00491 copySrc = copySrc.mid( end );
00492 indexOfHeader( copySrc, name, end, begin, &folded );
00493 }
00494
00495 return result;
00496 }
00497
00498 void removeHeader( QByteArray &header, const QByteArray &name )
00499 {
00500 int begin, end, dummy;
00501 begin = indexOfHeader( header, name, end, dummy );
00502 if ( begin >= 0 ) {
00503 header.remove( begin, end - begin + 1 );
00504 }
00505 }
00506
00507 QByteArray CRLFtoLF( const QByteArray &s )
00508 {
00509 QByteArray ret = s;
00510 ret.replace( "\r\n", "\n" );
00511 return ret;
00512 }
00513
00514 QByteArray LFtoCRLF( const QByteArray &s )
00515 {
00516 QByteArray ret = s;
00517 ret.replace( '\n', "\r\n" );
00518 return ret;
00519 }
00520
00521 namespace {
00522 template < typename T > void removeQuotesGeneric( T & str )
00523 {
00524 bool inQuote = false;
00525 for ( int i = 0; i < str.length(); ++i ) {
00526 if ( str[i] == '"' ) {
00527 str.remove( i, 1 );
00528 i--;
00529 inQuote = !inQuote;
00530 } else {
00531 if ( inQuote && ( str[i] == '\\' ) ) {
00532 str.remove( i, 1 );
00533 }
00534 }
00535 }
00536 }
00537 }
00538
00539 void removeQuots( QByteArray &str )
00540 {
00541 removeQuotesGeneric( str );
00542 }
00543
00544 void removeQuots( QString &str )
00545 {
00546 removeQuotesGeneric( str );
00547 }
00548
00549 void addQuotes( QByteArray &str, bool forceQuotes )
00550 {
00551 bool needsQuotes=false;
00552 for ( int i=0; i < str.length(); i++ ) {
00553 if ( strchr("()<>@,.;:[]=\\\"", str[i] ) != 0 ) {
00554 needsQuotes = true;
00555 }
00556 if ( str[i] == '\\' || str[i] == '\"' ) {
00557 str.insert( i, '\\' );
00558 i++;
00559 }
00560 }
00561
00562 if ( needsQuotes || forceQuotes ) {
00563 str.insert( 0, '\"' );
00564 str.append( "\"" );
00565 }
00566 }
00567
00568 KMIME_EXPORT QString balanceBidiState( const QString &input )
00569 {
00570 const int LRO = 0x202D;
00571 const int RLO = 0x202E;
00572 const int LRE = 0x202A;
00573 const int RLE = 0x202B;
00574 const int PDF = 0x202C;
00575
00576 QString result = input;
00577
00578 int openDirChangers = 0;
00579 int numPDFsRemoved = 0;
00580 for ( int i = 0; i < input.length(); i++ ) {
00581 const ushort &code = input.at( i ).unicode();
00582 if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
00583 openDirChangers++;
00584 }
00585 else if ( code == PDF ) {
00586 if ( openDirChangers > 0 ) {
00587 openDirChangers--;
00588 }
00589 else {
00590
00591 kWarning() << "Possible Unicode spoofing (unexpected PDF) detected in" << input;
00592 result.remove( i - numPDFsRemoved, 1 );
00593 numPDFsRemoved++;
00594 }
00595 }
00596 }
00597
00598 if ( openDirChangers > 0 ) {
00599 kWarning() << "Possible Unicode spoofing detected in" << input;
00600
00601
00602
00603
00604 for ( int i = openDirChangers; i > 0; i-- ) {
00605 if ( result.endsWith( '"' ) )
00606 result.insert( result.length() - 1, QChar( PDF ) );
00607 else
00608 result += QChar( PDF );
00609 }
00610 }
00611
00612 return result;
00613 }
00614
00615 QString removeBidiControlChars( const QString &input )
00616 {
00617 const int LRO = 0x202D;
00618 const int RLO = 0x202E;
00619 const int LRE = 0x202A;
00620 const int RLE = 0x202B;
00621 QString result = input;
00622 result.remove( LRO );
00623 result.remove( RLO );
00624 result.remove( LRE );
00625 result.remove( RLE );
00626 return result;
00627 }
00628
00629 }