24 #include "kmime_util.h"
25 #include "kmime_util_p.h"
29 #include "kmime_header_parsing.h"
30 #include "kmime_message.h"
31 #include "kmime_warning.h"
33 #include <config-kmime.h>
37 #include <klocalizedstring.h>
38 #include <kcharsets.h>
42 #include <QtCore/QList>
43 #include <QtCore/QString>
44 #include <QtCore/QTextCodec>
50 #include <boost/concept_check.hpp>
52 using namespace KMime;
56 QList<QByteArray> c_harsetCache;
57 QList<QByteArray> l_anguageCache;
58 QString f_allbackCharEnc;
59 bool u_seOutlookEncoding =
false;
63 foreach (
const QByteArray& charset, c_harsetCache ) {
64 if ( qstricmp( name.data(), charset.data() ) == 0 ) {
69 c_harsetCache.append( name.toUpper() );
71 return c_harsetCache.last();
76 foreach (
const QByteArray& language, l_anguageCache ) {
77 if ( qstricmp( name.data(), language.data() ) == 0 ) {
82 l_anguageCache.append( name.toUpper() );
84 return l_anguageCache.last();
89 uint sLength = s.length();
90 for ( uint i=0; i<sLength; i++ ) {
91 if ( s.at( i ).toLatin1() <= 0 ) {
101 case Headers::CE7Bit:
return QString::fromLatin1(
"7bit" );
102 case Headers::CE8Bit:
return QString::fromLatin1(
"8bit" );
103 case Headers::CEquPr:
return QString::fromLatin1(
"quoted-printable" );
104 case Headers::CEbase64:
return QString::fromLatin1(
"base64" );
105 case Headers::CEuuenc:
return QString::fromLatin1(
"uuencode" );
106 case Headers::CEbinary:
return QString::fromLatin1(
"binary" );
107 default:
return QString::fromLatin1(
"unknown" );
113 QList<Headers::contentEncoding> allowed;
116 switch ( cf.
type() ) {
118 allowed << Headers::CE7Bit;
120 allowed << Headers::CE8Bit;
126 allowed << Headers::CEquPr;
127 allowed << Headers::CEbase64;
129 allowed << Headers::CEbase64;
130 allowed << Headers::CEquPr;
134 allowed << Headers::CEbase64;
145 const uchar specialsMap[16] = {
146 0x00, 0x00, 0x00, 0x00,
147 0x20, 0xCA, 0x00, 0x3A,
148 0x80, 0x00, 0x00, 0x1C,
149 0x00, 0x00, 0x00, 0x00
153 const uchar tSpecialsMap[16] = {
154 0x00, 0x00, 0x00, 0x00,
155 0x20, 0xC9, 0x00, 0x3F,
156 0x80, 0x00, 0x00, 0x1C,
157 0x00, 0x00, 0x00, 0x00
161 const uchar aTextMap[16] = {
162 0x00, 0x00, 0x00, 0x00,
163 0x5F, 0x35, 0xFF, 0xC5,
164 0x7F, 0xFF, 0xFF, 0xE3,
165 0xFF, 0xFF, 0xFF, 0xFE
169 const uchar tTextMap[16] = {
170 0x00, 0x00, 0x00, 0x00,
171 0x5F, 0x36, 0xFF, 0xC0,
172 0x7F, 0xFF, 0xFF, 0xE3,
173 0xFF, 0xFF, 0xFF, 0xFE
177 const uchar eTextMap[16] = {
178 0x00, 0x00, 0x00, 0x00,
179 0x40, 0x35, 0xFF, 0xC0,
180 0x7F, 0xFF, 0xFF, 0xE0,
181 0x7F, 0xFF, 0xFF, 0xE0
186 f_allbackCharEnc = fallbackCharEnc;
191 return f_allbackCharEnc;
196 u_seOutlookEncoding = violateStandard;
201 return u_seOutlookEncoding;
206 const QByteArray &defaultCS,
bool forceCS )
209 QByteArray spaceBuffer;
210 const char *scursor = src.constData();
211 const char *send = scursor + src.length();
212 bool onlySpacesSinceLastWord =
false;
214 while ( scursor != send ) {
216 if ( isspace( *scursor ) && onlySpacesSinceLastWord ) {
217 spaceBuffer += *scursor++;
222 if ( *scursor ==
'=' ) {
226 const char *start = scursor;
227 if ( HeaderParsing::parseEncodedWord( scursor, send, decoded, language, usedCS, defaultCS, forceCS ) ) {
228 result += decoded.toUtf8();
229 onlySpacesSinceLastWord =
true;
232 if ( onlySpacesSinceLastWord ) {
233 result += spaceBuffer;
234 onlySpacesSinceLastWord =
false;
242 if ( onlySpacesSinceLastWord ) {
243 result += spaceBuffer;
244 onlySpacesSinceLastWord =
false;
252 const QString tryUtf8 = QString::fromUtf8( result );
253 if ( tryUtf8.contains( 0xFFFD ) && !f_allbackCharEnc.isEmpty() ) {
254 QTextCodec* codec = KGlobal::charsets()->codecForName( f_allbackCharEnc );
255 return codec->toUnicode( result );
267 static const char *reservedCharacters =
"\"()<>@,.;:\\[]=";
270 bool addressHeader,
bool allow8BitHeaders )
274 bool nonAscii=
false, ok=
true, useQEncoding=
false;
277 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ), ok );
282 usedCS = KGlobal::locale()->encoding();
283 codec = KGlobal::charsets()->codecForName( QString::fromLatin1( usedCS ), ok );
286 if ( charset.isEmpty() ) {
287 usedCS = codec->name();
293 QTextCodec::ConverterState converterState( QTextCodec::IgnoreHeader );
294 QByteArray encoded8Bit = codec->fromUnicode( src.constData(), src.length(), &converterState );
295 if ( converterState.invalidChars > 0 ) {
297 codec = QTextCodec::codecForName( usedCS );
298 encoded8Bit = codec->fromUnicode( src );
301 if ( usedCS.contains(
"8859-" ) ) {
305 if ( allow8BitHeaders ) {
309 uint encoded8BitLength = encoded8Bit.length();
310 for (
unsigned int i=0; i<encoded8BitLength; i++ ) {
311 if ( encoded8Bit[i] ==
' ' ) {
316 if ( ( (
signed char)encoded8Bit[i] < 0 ) || ( encoded8Bit[i] ==
'\033' ) ||
317 ( addressHeader && ( strchr(
"\"()<>@,.;:\\[]=", encoded8Bit[i] ) != 0 ) ) ) {
325 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
330 for (
int x=end; x<encoded8Bit.length(); x++ ) {
331 if ( ( (
signed char)encoded8Bit[x] < 0 ) || ( encoded8Bit[x] ==
'\033' ) ||
332 ( addressHeader && ( strchr( reservedCharacters, encoded8Bit[x] ) != 0 ) ) ) {
335 while ( ( end < encoded8Bit.length() ) && ( encoded8Bit[end] !=
' ' ) ) {
342 result = encoded8Bit.left( start ) +
"=?" + usedCS;
344 if ( useQEncoding ) {
348 for (
int i=start; i<end; i++ ) {
353 if ( ( ( c >=
'a' ) && ( c <=
'z' ) ) ||
354 ( ( c >=
'A' ) && ( c <=
'Z' ) ) ||
355 ( ( c >=
'0' ) && ( c <=
'9' ) ) ) {
359 hexcode = ( ( c & 0xF0 ) >> 4 ) + 48;
360 if ( hexcode >= 58 ) {
364 hexcode = ( c & 0x0F ) + 48;
365 if ( hexcode >= 58 ) {
373 result +=
"?B?" + encoded8Bit.mid( start, end - start ).toBase64();
377 result += encoded8Bit.right( encoded8Bit.length() - end );
379 result = encoded8Bit;
385 QByteArray encodeRFC2047Sentence(
const QString& src,
const QByteArray& charset )
388 QList<QChar> splitChars;
389 splitChars << QLatin1Char(
',' ) << QLatin1Char(
'\"' ) << QLatin1Char(
';' ) << QLatin1Char(
'\\' );
390 const QChar *ch = src.constData();
391 const int length = src.length();
398 while ( pos < length ) {
400 const bool isAscii = ch->unicode() < 127;
401 const bool isReserved = ( strchr( reservedCharacters, ch->toLatin1() ) != 0 );
402 if ( isAscii && isReserved ) {
403 const int wordSize = pos - wordStart;
404 if ( wordSize > 0 ) {
405 const QString word = src.mid( wordStart, wordSize );
409 result += ch->toLatin1();
417 const int wordSize = pos - wordStart;
418 if ( wordSize > 0 ) {
419 const QString word = src.mid( wordStart, pos - wordStart );
431 if ( str.isEmpty() ) {
435 const QTextCodec *codec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
437 if ( charset ==
"us-ascii" ) {
438 latin = str.toLatin1();
439 }
else if ( codec ) {
440 latin = codec->fromUnicode( str );
442 latin = str.toLocal8Bit();
446 for ( l = latin.data(); *l; ++l ) {
447 if ( ( ( *l & 0xE0 ) == 0 ) || ( *l & 0x80 ) ) {
456 QByteArray result = charset +
"''";
457 for ( l = latin.data(); *l; ++l ) {
458 bool needsQuoting = ( *l & 0x80 ) || ( *l ==
'%' );
459 if ( !needsQuoting ) {
460 const QByteArray especials =
"()<>@,;:\"/[]?.= \033";
461 int len = especials.length();
462 for (
int i = 0; i < len; i++ ) {
463 if ( *l == especials[i] ) {
469 if ( needsQuoting ) {
471 unsigned char hexcode;
472 hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
473 if ( hexcode >= 58 ) {
477 hexcode = ( *l & 0x0F ) + 48;
478 if ( hexcode >= 58 ) {
494 int p = str.indexOf(
'\'' );
496 return KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) )->toUnicode( str );
500 QByteArray charset = str.left( p );
502 QByteArray st = str.mid( str.lastIndexOf(
'\'' ) + 1 );
506 while ( p < (
int)st.length() ) {
507 if ( st.at( p ) == 37 ) {
510 if ( p + 2 < st.length() ) {
511 ch = st.at( p + 1 ) - 48;
515 ch2 = st.at( p + 2 ) - 48;
519 st[p] = ch * 16 + ch2;
520 st.remove( p + 1, 2 );
525 kDebug() <<
"Got pre-decoded:" << st;
527 const QTextCodec * charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( charset ) );
528 if ( !charsetcodec || forceCS ) {
529 charsetcodec = KGlobal::charsets()->codecForName( QString::fromLatin1( defaultCS ) );
532 usedCS = charsetcodec->name();
533 return charsetcodec->toUnicode( st );
544 static char chars[] =
"0123456789abcdefghijklmnopqrstuvxyzABCDEFGHIJKLMNOPQRSTUVWXYZ";
548 unsigned int timeval;
552 ran = 1 + (int)( 1000.0 * rand() / ( RAND_MAX + 1.0 ) );
553 timeval = ( now / ran ) + getpid();
555 for (
int i = 0; i < 10; i++ ) {
556 pos = (int) ( 61.0 * rand() / ( RAND_MAX + 1.0 ) );
562 ret.setNum( timeval );
577 if ( header.isEmpty() ) {
581 int pos = 0, foldBegin = 0, foldMid = 0, foldEnd = 0;
582 while ( ( foldMid = header.indexOf(
'\n', pos ) ) >= 0 ) {
583 foldBegin = foldEnd = foldMid;
585 while ( foldBegin > 0 ) {
586 if ( !QChar::fromLatin1( header[foldBegin - 1] ).isSpace() ) {
592 while ( foldEnd <= header.length() - 1 ) {
593 if ( QChar::fromLatin1( header[foldEnd] ).isSpace() ) {
595 }
else if ( foldEnd > 0 && header[foldEnd - 1] ==
'\n' &&
596 header[foldEnd] ==
'=' && foldEnd + 2 < header.length() &&
597 ( ( header[foldEnd + 1] ==
'0' &&
598 header[foldEnd + 2] ==
'9' ) ||
599 ( header[foldEnd + 1] ==
'2' &&
600 header[foldEnd + 2] ==
'0' ) ) ) {
609 result += header.mid( pos, foldBegin - pos );
610 if ( foldEnd < header.length() - 1 ) {
615 const int len = header.length();
617 result += header.mid( pos, len - pos );
622 int findHeaderLineEnd(
const QByteArray &src,
int &dataBegin,
bool *folded )
625 int len = src.length() - 1;
631 if ( dataBegin < 0 ) {
636 if ( dataBegin > len ) {
644 if ( src.at( end ) ==
'\n' && end + 1 < len &&
645 ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ) ) {
652 if ( src.at( end ) !=
'\n' ) {
654 end = src.indexOf(
'\n', end + 1 );
655 if ( end == -1 || end == len ) {
658 }
else if ( src[end + 1] ==
' ' || src[end + 1] ==
'\t' ||
659 ( src[end + 1] ==
'=' && end + 3 <= len &&
660 ( ( src[end + 2] ==
'0' && src[end + 3] ==
'9' ) ||
661 ( src[end + 2] ==
'2' && src[end + 3] ==
'0' ) ) ) ) {
679 int indexOfHeader(
const QByteArray &src,
const QByteArray &name,
int &end,
int &dataBegin,
bool *folded )
685 if ( qstrnicmp( n.constData(), src.constData(), n.length() ) == 0 ) {
689 const char *p = strcasestr( src.constData(), n.constData() );
693 begin = p - src.constData();
699 dataBegin = begin + name.length() + 1;
701 if ( src.at( dataBegin ) ==
' ' ) {
704 end = findHeaderLineEnd( src, dataBegin, folded );
720 if ( src.isEmpty() || indexOfHeader( src, name, end, begin, &folded ) < 0 ) {
726 result = src.mid( begin, end - begin );
729 QByteArray hdrValue = src.mid( begin, end - begin );
737 QList<QByteArray>
extractHeaders(
const QByteArray &src,
const QByteArray &name )
741 QList<QByteArray> result;
742 QByteArray copySrc( src );
744 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
748 while ( begin >= 0 ) {
750 result.append( copySrc.mid( begin, end - begin ) );
752 QByteArray hdrValue = copySrc.mid( begin, end - begin );
757 copySrc = copySrc.mid( end );
758 if ( indexOfHeader( copySrc, name, end, begin, &folded ) < 0 ) {
765 void removeHeader( QByteArray &header,
const QByteArray &name )
767 int begin, end, dummy;
768 begin = indexOfHeader( header, name, end, dummy );
770 header.remove( begin, end - begin + 1 );
777 ret.replace(
"\r\n",
"\n" );
790 ret.replace(
'\n',
"\r\n" );
794 QByteArray
LFtoCRLF(
const char *s )
801 template <
typename StringType,
typename CharType >
void removeQuotesGeneric( StringType & str )
803 bool inQuote =
false;
804 for (
int i = 0; i < str.length(); ++i ) {
805 if ( str[i] == CharType(
'"' ) ) {
810 if ( inQuote && ( str[i] == CharType(
'\\' ) ) ) {
820 removeQuotesGeneric<QByteArray, char>( str );
825 removeQuotesGeneric<QString, QLatin1Char>( str );
828 template<
class StringType,
class CharType,
class CharConverterType,
class StringConverterType,
class ToString>
829 void addQuotes_impl( StringType &str,
bool forceQuotes )
831 bool needsQuotes=
false;
832 for (
int i=0; i < str.length(); i++ ) {
833 const CharType cur = str.at( i );
834 if ( QString( ToString( str ) ).contains( QRegExp( QLatin1String(
"\"|\\\\|=|\\]|\\[|:|;|,|\\.|,|@|<|>|\\)|\\(" ) ) ) ) {
837 if ( cur == CharConverterType(
'\\' ) || cur == CharConverterType(
'\"' ) ) {
838 str.insert( i, CharConverterType(
'\\' ) );
843 if ( needsQuotes || forceQuotes ) {
844 str.insert( 0, CharConverterType(
'\"' ) );
845 str.append( StringConverterType(
"\"" ) );
851 addQuotes_impl<QByteArray, char, char, char*, QLatin1String>( str, forceQuotes );
856 addQuotes_impl<QString, QChar, QLatin1Char, QLatin1String, QString>( str, forceQuotes );
861 const int LRO = 0x202D;
862 const int RLO = 0x202E;
863 const int LRE = 0x202A;
864 const int RLE = 0x202B;
865 const int PDF = 0x202C;
867 QString result = input;
869 int openDirChangers = 0;
870 int numPDFsRemoved = 0;
871 for (
int i = 0; i < input.length(); i++ ) {
872 const ushort &code = input.at( i ).unicode();
873 if ( code == LRO || code == RLO || code == LRE || code == RLE ) {
875 }
else if ( code == PDF ) {
876 if ( openDirChangers > 0 ) {
880 kWarning() <<
"Possible Unicode spoofing (unexpected PDF) detected in" << input;
881 result.remove( i - numPDFsRemoved, 1 );
887 if ( openDirChangers > 0 ) {
888 kWarning() <<
"Possible Unicode spoofing detected in" << input;
893 for (
int i = openDirChangers; i > 0; i-- ) {
894 if ( result.endsWith( QLatin1Char(
'"' ) ) ) {
895 result.insert( result.length() - 1, QChar( PDF ) );
897 result += QChar( PDF );
907 const int LRO = 0x202D;
908 const int RLO = 0x202E;
909 const int LRE = 0x202A;
910 const int RLE = 0x202B;
911 QString result = input;
912 result.remove( LRO );
913 result.remove( RLO );
914 result.remove( LRE );
915 result.remove( RLE );
919 static bool isCryptoPart(
Content* content )
931 const QByteArray lowerSubType = contentType->
subType().toLower();
932 return ( contentType->
mediaType().toLower() ==
"application" &&
933 ( lowerSubType ==
"pgp-encrypted" ||
934 lowerSubType ==
"pgp-signature" ||
935 lowerSubType ==
"pkcs7-mime" ||
936 lowerSubType ==
"pkcs7-signature" ||
937 lowerSubType ==
"x-pkcs7-signature" ||
938 ( lowerSubType ==
"octet-stream" &&
948 bool emptyFilename =
true;
951 emptyFilename =
false;
954 if ( emptyFilename &&
957 emptyFilename =
false;
961 if ( !emptyFilename && !isCryptoPart( content ) ) {
983 if ( contentType->
isSubtype(
"signed" ) ||
984 contentType->
isSubtype(
"pgp-signature" ) ||
985 contentType->
isSubtype(
"pkcs7-signature" ) ||
986 contentType->
isSubtype(
"x-pkcs7-signature" ) ||
989 message->
mainBodyPart(
"application/pkcs7-signature" ) ||
990 message->
mainBodyPart(
"application/x-pkcs7-signature" ) ) {
1003 if ( contentType->
isSubtype(
"encrypted" ) ||
1004 contentType->
isSubtype(
"pgp-encrypted" ) ||
1005 contentType->
isSubtype(
"pkcs7-mime" ) ||
1007 message->
mainBodyPart(
"application/pgp-encrypted" ) ||
1023 if ( contentType && contentType->
isMediatype(
"text" ) && contentType->
isSubtype(
"calendar" ) ) {
This file is part of the API for handling MIME data and defines the Codec class.
QByteArray unfoldHeader(const QByteArray &header)
Unfolds the given header if necessary.
List contents() const
For multipart contents, this will return a list of all multipart child contents.
bool hasAttachment(Content *content)
Returns whether or not the given MIME node contains an attachment part.
void addQuotes(QByteArray &str, bool forceQuotes)
Converts the given string into a quoted-string if the string contains any special characters (ie...
bool isSigned(Message *message)
Returns whether or not the given message is partly or fully signed.
QByteArray cachedLanguage(const QByteArray &name)
Consult the language cache.
QByteArray cachedCharset(const QByteArray &name)
Consult the charset cache.
QString decodeRFC2231String(const QByteArray &str, QByteArray &usedCS, const QByteArray &defaultCS, bool forceCS)
Decodes string src according to RFC2231.
void setUseOutlookAttachmentEncoding(bool violateStandard)
Set whether or not to use outlook compatible attachment filename encoding.
QByteArray encodeRFC2231String(const QString &str, const QByteArray &charset)
Encodes string src according to RFC2231 using charset charset.
QByteArray LFtoCRLF(const QByteArray &s)
Converts all occurrences of "\n" (LF) in s to "\r\n" (CRLF).
Content * mainBodyPart(const QByteArray &type=QByteArray())
Returns the first main body part of a given type, taking multipart/mixed and multipart/alternative no...
Headers::ContentDisposition * contentDisposition(bool create=true)
Returns the Content-Disposition header.
void removeQuots(QByteArray &str)
Removes quote (DQUOTE) characters and decodes "quoted-pairs" (ie.
QString nameForEncoding(Headers::contentEncoding enc)
Returns a user-visible string for a contentEncoding, for example "quoted-printable" for CEquPr...
QList< QByteArray > extractHeaders(const QByteArray &src, const QByteArray &name)
Tries to extract the headers with name name from the string src, unfolding it if necessary.
This file is part of the API for handling MIME data and defines the CharFreq class.
bool isEncrypted(Message *message)
Returns whether or not the given message is partly or fully encrypted.
QByteArray multiPartBoundary()
Constructs a random string (sans leading/trailing "--") that can be used as a multipart delimiter (ie...
float printableRatio() const
Returns the percentage of printable characters in the data.
Headers::ContentType * contentType(bool create=true)
Returns the Content-Type header.
void setFallbackCharEncoding(const QString &fallbackCharEnc)
Set the fallback charset to use when decoding RFC2047-encoded headers.
QList< Headers::contentEncoding > encodingsForData(const QByteArray &data)
Returns a list of encodings that can correctly encode the data.
Represents a (email) message.
QString decodeRFC2047String(const QByteArray &src, QByteArray &usedCS, const QByteArray &defaultCS, bool forceCS)
Decodes string src according to RFC2047,i.e., the construct =?charset?[qb]?encoded?=.
QByteArray uniqueString()
Uses current time, pid and random numbers to construct a string that aims to be unique on a per-host ...
A class that encapsulates MIME encoded Content.
bool isUsAscii(const QString &s)
Checks whether s contains any non-us-ascii characters.
A class for performing basic data typing using frequency count heuristics.
QString balanceBidiState(const QString &input)
Makes sure that the bidirectional state at the end of the string is the same as at the beginning of t...
Type type() const
Returns the data Type as derived from the class heuristics.
bool isInvitation(Content *content)
Returns whether or not the given MIME content is an invitation message of the iTIP protocol...
QString fallbackCharEncoding()
Retrieve the set fallback charset if there is one set.
bool useOutlookAttachmentEncoding()
Retrieve whether or not to use outlook compatible encodings for attachments.
QString removeBidiControlChars(const QString &input)
Similar to the above function.
QByteArray extractHeader(const QByteArray &src, const QByteArray &name)
Tries to extract the header with name name from the string src, unfolding it if necessary.
QByteArray encodeRFC2047String(const QString &src, const QByteArray &charset, bool addressHeader, bool allow8BitHeaders)
Encodes string src according to RFC2047 using charset charset.
QByteArray CRLFtoLF(const QByteArray &s)
Converts all occurrences of "\r\n" (CRLF) in s to "\n" (LF).