00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00035 #include "kmime_codec_qp.h"
00036 #include "kmime_util.h"
00037
00038 #include <kdebug.h>
00039
00040 #include <cassert>
00041
00042 using namespace KMime;
00043
00044 namespace KMime {
00045
00046
00047
00054 static inline char binToHex( uchar value )
00055 {
00056 if ( value > 9 ) {
00057 return value + 'A' - 10;
00058 } else {
00059 return value + '0';
00060 }
00061 }
00062
00067 static inline uchar highNibble( uchar ch )
00068 {
00069 return ch >> 4;
00070 }
00071
00076 static inline uchar lowNibble( uchar ch )
00077 {
00078 return ch & 0xF;
00079 }
00080
00086 static inline bool keep( uchar ch )
00087 {
00088
00089 return !( ( ch < ' ' && ch != '\t' ) || ch == '?' );
00090 }
00091
00092
00093
00094
00095
00096 class QuotedPrintableEncoder : public Encoder
00097 {
00098 char mInputBuffer[16];
00099 uchar mCurrentLineLength;
00100 uchar mAccu;
00101 uint mInputBufferReadCursor : 4;
00102 uint mInputBufferWriteCursor : 4;
00103 enum {
00104 Never, AtBOL, Definitely
00105 } mAccuNeedsEncoding : 2;
00106 bool mSawLineEnd : 1;
00107 bool mSawCR : 1;
00108 bool mFinishing : 1;
00109 bool mFinished : 1;
00110 protected:
00111 friend class QuotedPrintableCodec;
00112 QuotedPrintableEncoder( bool withCRLF=false )
00113 : Encoder( withCRLF ), mCurrentLineLength( 0 ), mAccu( 0 ),
00114 mInputBufferReadCursor( 0 ), mInputBufferWriteCursor( 0 ),
00115 mAccuNeedsEncoding( Never ),
00116 mSawLineEnd( false ), mSawCR( false ), mFinishing( false ),
00117 mFinished( false ) {}
00118
00119 bool needsEncoding( uchar ch )
00120 { return ch > '~' || ( ch < ' ' && ch != '\t' ) || ch == '='; }
00121 bool needsEncodingAtEOL( uchar ch )
00122 { return ch == ' ' || ch == '\t'; }
00123 bool needsEncodingAtBOL( uchar ch )
00124 { return ch == 'F' || ch == '.' || ch == '-'; }
00125 bool fillInputBuffer( const char* &scursor, const char * const send );
00126 bool processNextChar();
00127 void createOutputBuffer( char* &dcursor, const char * const dend );
00128 public:
00129 virtual ~QuotedPrintableEncoder() {}
00130
00131 bool encode( const char* &scursor, const char * const send,
00132 char* &dcursor, const char * const dend );
00133
00134 bool finish( char* &dcursor, const char * const dend );
00135 };
00136
00137 class QuotedPrintableDecoder : public Decoder
00138 {
00139 const char mEscapeChar;
00140 char mBadChar;
00142 uchar mAccu;
00152 const bool mQEncoding;
00153 bool mInsideHexChar;
00154 bool mFlushing;
00155 bool mExpectLF;
00156 bool mHaveAccu;
00159 char mLastChar;
00160 protected:
00161 friend class QuotedPrintableCodec;
00162 friend class Rfc2047QEncodingCodec;
00163 friend class Rfc2231EncodingCodec;
00164 QuotedPrintableDecoder( bool withCRLF=false,
00165 bool aQEncoding=false, char aEscapeChar='=' )
00166 : Decoder( withCRLF ),
00167 mEscapeChar( aEscapeChar ),
00168 mBadChar( 0 ),
00169 mAccu( 0 ),
00170 mQEncoding( aQEncoding ),
00171 mInsideHexChar( false ),
00172 mFlushing( false ),
00173 mExpectLF( false ),
00174 mHaveAccu( false ),
00175 mLastChar( 0 ) {}
00176 public:
00177 virtual ~QuotedPrintableDecoder() {}
00178
00179 bool decode( const char* &scursor, const char * const send,
00180 char* &dcursor, const char * const dend );
00181 bool finish( char* & dcursor, const char * const dend );
00182 };
00183
00184 class Rfc2047QEncodingEncoder : public Encoder
00185 {
00186 uchar mAccu;
00187 uchar mStepNo;
00188 const char mEscapeChar;
00189 bool mInsideFinishing : 1;
00190 protected:
00191 friend class Rfc2047QEncodingCodec;
00192 friend class Rfc2231EncodingCodec;
00193 Rfc2047QEncodingEncoder( bool withCRLF=false, char aEscapeChar='=' )
00194 : Encoder( withCRLF ),
00195 mAccu( 0 ), mStepNo( 0 ), mEscapeChar( aEscapeChar ),
00196 mInsideFinishing( false )
00197 {
00198
00199 assert( aEscapeChar == '=' || aEscapeChar == '%' );
00200 }
00201
00202
00203 bool needsEncoding( uchar ch )
00204 {
00205 if ( ch > 'z' ) {
00206 return true;
00207 }
00208 if ( !isEText( ch ) ) {
00209 return true;
00210 }
00211 if ( mEscapeChar == '%' && ( ch == '*' || ch == '/' ) ) {
00212 return true;
00213 }
00214 return false;
00215 }
00216
00217 public:
00218 virtual ~Rfc2047QEncodingEncoder() {}
00219
00220 bool encode( const char* & scursor, const char * const send,
00221 char* & dcursor, const char * const dend );
00222 bool finish( char* & dcursor, const char * const dend );
00223 };
00224
00225
00226
00227 static int QuotedPrintableDecoder_maxDecodedSizeFor( int insize, bool withCRLF )
00228 {
00229
00230 int result = insize;
00231
00232 if ( withCRLF )
00233 result += insize;
00234
00235
00236 result += 2;
00237
00238 return result;
00239 }
00240
00241 Encoder *QuotedPrintableCodec::makeEncoder( bool withCRLF ) const
00242 {
00243 return new QuotedPrintableEncoder( withCRLF );
00244 }
00245
00246 Decoder *QuotedPrintableCodec::makeDecoder( bool withCRLF ) const
00247 {
00248 return new QuotedPrintableDecoder( withCRLF );
00249 }
00250
00251 int QuotedPrintableCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const
00252 {
00253 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00254 }
00255
00256 Encoder *Rfc2047QEncodingCodec::makeEncoder( bool withCRLF ) const
00257 {
00258 return new Rfc2047QEncodingEncoder( withCRLF );
00259 }
00260
00261 Decoder *Rfc2047QEncodingCodec::makeDecoder( bool withCRLF ) const
00262 {
00263 return new QuotedPrintableDecoder( withCRLF, true );
00264 }
00265
00266 int Rfc2047QEncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const
00267 {
00268 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00269 }
00270
00271 Encoder *Rfc2231EncodingCodec::makeEncoder( bool withCRLF ) const
00272 {
00273 return new Rfc2047QEncodingEncoder( withCRLF, '%' );
00274 }
00275
00276 Decoder *Rfc2231EncodingCodec::makeDecoder( bool withCRLF ) const
00277 {
00278 return new QuotedPrintableDecoder( withCRLF, true, '%' );
00279 }
00280
00281 int Rfc2231EncodingCodec::maxDecodedSizeFor( int insize, bool withCRLF ) const
00282 {
00283 return QuotedPrintableDecoder_maxDecodedSizeFor(insize, withCRLF);
00284 }
00285
00286
00287
00288
00289
00290 bool QuotedPrintableDecoder::decode( const char* &scursor,
00291 const char * const send,
00292 char* &dcursor, const char * const dend )
00293 {
00294 if ( mWithCRLF ) {
00295 kWarning() << "CRLF output for decoders isn't yet supported!";
00296 }
00297
00298 while ( scursor != send && dcursor != dend ) {
00299 if ( mFlushing ) {
00300
00301
00302
00303
00304
00305
00306
00307
00308
00309 if ( mInsideHexChar ) {
00310
00311 *dcursor++ = mEscapeChar;
00312 mInsideHexChar = false;
00313 } else if ( mHaveAccu ) {
00314
00315 *dcursor++ = mLastChar;
00316 mHaveAccu = false;
00317 mAccu = 0;
00318 } else {
00319
00320 assert( mAccu == 0 );
00321 if ( mBadChar ) {
00322 if ( mBadChar == '=' ) {
00323 mInsideHexChar = true;
00324 } else {
00325 *dcursor++ = mBadChar;
00326 }
00327 mBadChar = 0;
00328 }
00329 mFlushing = false;
00330 }
00331 continue;
00332 }
00333 assert( mBadChar == 0 );
00334
00335 uchar ch = *scursor++;
00336 uchar value = 255;
00337
00338 if ( mExpectLF && ch != '\n' ) {
00339 kWarning() << "QuotedPrintableDecoder:"
00340 "illegally formed soft linebreak or lonely CR!";
00341 mInsideHexChar = false;
00342 mExpectLF = false;
00343 assert( mAccu == 0 );
00344 }
00345
00346 if ( mInsideHexChar ) {
00347
00348 if ( ch <= '9' ) {
00349 if ( ch >= '0' ) {
00350 value = ch - '0';
00351 } else {
00352 switch ( ch ) {
00353 case '\r':
00354 mExpectLF = true;
00355 break;
00356 case '\n':
00357
00358 if ( !mHaveAccu ) {
00359 mExpectLF = false;
00360 mInsideHexChar = false;
00361 break;
00362 }
00363
00364 default:
00365 kWarning() << "QuotedPrintableDecoder:"
00366 "illegally formed hex char! Outputting verbatim.";
00367 mBadChar = ch;
00368 mFlushing = true;
00369 }
00370 continue;
00371 }
00372 } else {
00373 if ( ch <= 'F' ) {
00374 if ( ch >= 'A' ) {
00375 value = 10 + ch - 'A';
00376 } else {
00377 mBadChar = ch;
00378 mFlushing = true;
00379 continue;
00380 }
00381 } else {
00382 if ( ch <= 'f' && ch >= 'a' ) {
00383 value = 10 + ch - 'a';
00384 } else {
00385 mBadChar = ch;
00386 mFlushing = true;
00387 continue;
00388 }
00389 }
00390 }
00391
00392 assert( value < 16 );
00393 assert( mBadChar == 0 );
00394 assert( !mExpectLF );
00395
00396 if ( mHaveAccu ) {
00397 *dcursor++ = char( mAccu | value );
00398 mAccu = 0;
00399 mHaveAccu = false;
00400 mInsideHexChar = false;
00401 } else {
00402 mHaveAccu = true;
00403 mAccu = value << 4;
00404 mLastChar = ch;
00405 }
00406 } else {
00407 if ( ( ch <= '~' && ch >= ' ' ) || ch == '\t' ) {
00408 if ( ch == mEscapeChar ) {
00409 mInsideHexChar = true;
00410 } else if ( mQEncoding && ch == '_' ) {
00411 *dcursor++ = char( 0x20 );
00412 } else {
00413 *dcursor++ = char( ch );
00414 }
00415 } else if ( ch == '\n' ) {
00416 *dcursor++ = '\n';
00417 mExpectLF = false;
00418 } else if ( ch == '\r' ) {
00419 mExpectLF = true;
00420 } else {
00421 kWarning() << "QuotedPrintableDecoder:" << ch <<
00422 "illegal character in input stream!";
00423 *dcursor++ = char( ch );
00424 }
00425 }
00426 }
00427
00428 return scursor == send;
00429 }
00430
00431 bool QuotedPrintableDecoder::finish( char* &dcursor, const char * const dend )
00432 {
00433 while ( ( mInsideHexChar || mHaveAccu || mFlushing ) && dcursor != dend ) {
00434
00435 if ( mInsideHexChar ) {
00436
00437 *dcursor++ = mEscapeChar;
00438 mInsideHexChar = false;
00439 }
00440 else if ( mHaveAccu ) {
00441
00442 *dcursor++ = mLastChar;
00443 mHaveAccu = false;
00444 mAccu = 0;
00445 } else {
00446
00447 assert( mAccu == 0 );
00448 if ( mBadChar ) {
00449 *dcursor++ = mBadChar;
00450 mBadChar = 0;
00451 }
00452 mFlushing = false;
00453 }
00454 }
00455
00456
00457 return !( mHaveAccu || mFlushing );
00458 }
00459
00460 bool QuotedPrintableEncoder::fillInputBuffer( const char* &scursor,
00461 const char * const send ) {
00462
00463 if ( mSawLineEnd ) {
00464 return true;
00465 }
00466
00467
00468
00469 for ( ; ( mInputBufferWriteCursor + 1 ) % 16 != mInputBufferReadCursor
00470 && scursor != send ; mInputBufferWriteCursor++ ) {
00471 char ch = *scursor++;
00472 if ( ch == '\r' ) {
00473 mSawCR = true;
00474 } else if ( ch == '\n' ) {
00475
00476
00477 if ( mSawCR ) {
00478 mSawCR = false;
00479 assert( mInputBufferWriteCursor != mInputBufferReadCursor );
00480 mInputBufferWriteCursor--;
00481 }
00482 mSawLineEnd = true;
00483 return true;
00484 } else {
00485 mSawCR = false;
00486 }
00487 mInputBuffer[ mInputBufferWriteCursor ] = ch;
00488 }
00489 mSawLineEnd = false;
00490 return false;
00491 }
00492
00493 bool QuotedPrintableEncoder::processNextChar()
00494 {
00495
00496
00497
00498
00499
00500
00501 const int minBufferFillWithoutLineEnd = 4;
00502
00503 assert( mOutputBufferCursor == 0 );
00504
00505 int bufferFill =
00506 int( mInputBufferWriteCursor ) - int( mInputBufferReadCursor ) ;
00507 if ( bufferFill < 0 ) {
00508 bufferFill += 16;
00509 }
00510
00511 assert( bufferFill >=0 && bufferFill <= 15 );
00512
00513 if ( !mFinishing && !mSawLineEnd &&
00514 bufferFill < minBufferFillWithoutLineEnd ) {
00515 return false;
00516 }
00517
00518
00519 if ( mInputBufferReadCursor == mInputBufferWriteCursor ) {
00520 return false;
00521 }
00522
00523
00524 mAccu = mInputBuffer[ mInputBufferReadCursor++ ];
00525 if ( needsEncoding( mAccu ) ) {
00526 mAccuNeedsEncoding = Definitely;
00527 } else if ( ( mSawLineEnd || mFinishing )
00528 && bufferFill == 1
00529 && needsEncodingAtEOL( mAccu ) ) {
00530 mAccuNeedsEncoding = Definitely;
00531 } else if ( needsEncodingAtBOL( mAccu ) ) {
00532 mAccuNeedsEncoding = AtBOL;
00533 } else {
00534
00535 mAccuNeedsEncoding = Never;
00536 }
00537
00538 return true;
00539 }
00540
00541
00542
00543
00544
00545 void QuotedPrintableEncoder::createOutputBuffer( char* &dcursor,
00546 const char * const dend )
00547 {
00548 const int maxLineLength = 76;
00549
00550 assert( mOutputBufferCursor == 0 );
00551
00552 bool lastOneOnThisLine = mSawLineEnd
00553 && mInputBufferReadCursor == mInputBufferWriteCursor;
00554
00555 int neededSpace = 1;
00556 if ( mAccuNeedsEncoding == Definitely ) {
00557 neededSpace = 3;
00558 }
00559
00560
00561 if ( !lastOneOnThisLine ) {
00562 neededSpace++;
00563 }
00564
00565 if ( mCurrentLineLength > maxLineLength - neededSpace ) {
00566
00567 write( '=', dcursor, dend );
00568 writeCRLF( dcursor, dend );
00569 mCurrentLineLength = 0;
00570 }
00571
00572 if ( Never == mAccuNeedsEncoding ||
00573 ( AtBOL == mAccuNeedsEncoding && mCurrentLineLength != 0 ) ) {
00574 write( mAccu, dcursor, dend );
00575 mCurrentLineLength++;
00576 } else {
00577 write( '=', dcursor, dend );
00578 write( binToHex( highNibble( mAccu ) ), dcursor, dend );
00579 write( binToHex( lowNibble( mAccu ) ), dcursor, dend );
00580 mCurrentLineLength += 3;
00581 }
00582 }
00583
00584 bool QuotedPrintableEncoder::encode( const char* &scursor,
00585 const char * const send,
00586 char* &dcursor, const char * const dend )
00587 {
00588
00589 if ( mFinishing ) {
00590 return true;
00591 }
00592
00593 while ( scursor != send && dcursor != dend ) {
00594 if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) {
00595 return scursor == send;
00596 }
00597
00598 assert( mOutputBufferCursor == 0 );
00599
00600
00601
00602 fillInputBuffer( scursor, send );
00603
00604 if ( processNextChar() ) {
00605
00606 createOutputBuffer( dcursor, dend );
00607 } else if ( mSawLineEnd &&
00608 mInputBufferWriteCursor == mInputBufferReadCursor ) {
00609
00610 writeCRLF( dcursor, dend );
00611
00612 mSawLineEnd = false;
00613 mCurrentLineLength = 0;
00614 } else {
00615
00616 break;
00617 }
00618 }
00619
00620
00621
00622 if ( mOutputBufferCursor ) {
00623 flushOutputBuffer( dcursor, dend );
00624 }
00625
00626 return scursor == send;
00627
00628 }
00629
00630 bool QuotedPrintableEncoder::finish( char* &dcursor, const char * const dend )
00631 {
00632 mFinishing = true;
00633
00634 if ( mFinished ) {
00635 return flushOutputBuffer( dcursor, dend );
00636 }
00637
00638 while ( dcursor != dend ) {
00639 if ( mOutputBufferCursor && !flushOutputBuffer( dcursor, dend ) ) {
00640 return false;
00641 }
00642
00643 assert( mOutputBufferCursor == 0 );
00644
00645 if ( processNextChar() ) {
00646
00647 createOutputBuffer( dcursor, dend );
00648 } else if ( mSawLineEnd &&
00649 mInputBufferWriteCursor == mInputBufferReadCursor ) {
00650
00651 writeCRLF( dcursor, dend );
00652 mSawLineEnd = false;
00653 mCurrentLineLength = 0;
00654 } else {
00655 mFinished = true;
00656 return flushOutputBuffer( dcursor, dend );
00657 }
00658 }
00659
00660 return mFinished && !mOutputBufferCursor;
00661
00662 }
00663
00664 bool Rfc2047QEncodingEncoder::encode( const char* &scursor,
00665 const char * const send,
00666 char* &dcursor, const char * const dend )
00667 {
00668 if ( mInsideFinishing ) {
00669 return true;
00670 }
00671
00672 while ( scursor != send && dcursor != dend ) {
00673 uchar value;
00674 switch ( mStepNo ) {
00675 case 0:
00676
00677 mAccu = *scursor++;
00678 if ( !needsEncoding( mAccu ) ) {
00679 *dcursor++ = char( mAccu );
00680 } else if ( mEscapeChar == '=' && mAccu == 0x20 ) {
00681
00682
00683 *dcursor++ = '_';
00684 } else {
00685
00686 *dcursor++ = mEscapeChar;
00687 mStepNo = 1;
00688 }
00689 continue;
00690 case 1:
00691
00692 value = highNibble( mAccu );
00693 mStepNo = 2;
00694 break;
00695 case 2:
00696
00697 value = lowNibble( mAccu );
00698 mStepNo = 0;
00699 break;
00700 default: assert( 0 );
00701 }
00702
00703
00704 *dcursor++ = binToHex( value );
00705 }
00706
00707 return scursor == send;
00708 }
00709
00710 #include <QtCore/QString>
00711
00712 bool Rfc2047QEncodingEncoder::finish( char* &dcursor, const char * const dend )
00713 {
00714 mInsideFinishing = true;
00715
00716
00717 while ( mStepNo != 0 && dcursor != dend ) {
00718 uchar value;
00719 switch ( mStepNo ) {
00720 case 1:
00721
00722 value = highNibble( mAccu );
00723 mStepNo = 2;
00724 break;
00725 case 2:
00726
00727 value = lowNibble( mAccu );
00728 mStepNo = 0;
00729 break;
00730 default: assert( 0 );
00731 }
00732
00733
00734 *dcursor++ = binToHex( value );
00735 }
00736
00737 return mStepNo == 0;
00738 }
00739
00740 }