• Skip to content
  • Skip to link menu
  • KDE API Reference
  • kdepimlibs-4.9.5 API Reference
  • KDE Home
  • Contact Us
 

KIMAP Library

  • kimap
rfccodecs.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  *
3  * rfccodecs.cpp - handler for various rfc/mime encodings
4  * Copyright (C) 2000 s.carstens@gmx.de
5  *
6  * This library is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU Library General Public
8  * License as published by the Free Software Foundation; either
9  * version 2 of the License, or (at your option) any later version.
10  *
11  * This library is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  * Library General Public License for more details.
15  *
16  * You should have received a copy of the GNU Library General Public License
17  * along with this library; see the file COPYING.LIB. If not, write to
18  * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  * Boston, MA 02110-1301, USA.
20  *
21  *********************************************************************/
33 #include "rfccodecs.h"
34 
35 #include <ctype.h>
36 #include <sys/types.h>
37 
38 #include <stdio.h>
39 #include <stdlib.h>
40 
41 #include <QtCore/QTextCodec>
42 #include <QtCore/QBuffer>
43 #include <QtCore/QRegExp>
44 #include <QtCore/QByteArray>
45 #include <QtCore/QLatin1Char>
46 #include <kcodecs.h>
47 
48 using namespace KIMAP;
49 
50 // This part taken from rfc 2192 IMAP URL Scheme. C. Newman. September 1997.
51 // adapted to QT-Toolkit by Sven Carstens <s.carstens@gmx.de> 2000
52 
53 //@cond PRIVATE
54 static const unsigned char base64chars[] =
55  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
56 #define UNDEFINED 64
57 #define MAXLINE 76
58 static const char especials[17] = "()<>@,;:\"/[]?.= ";
59 
60 /* UTF16 definitions */
61 #define UTF16MASK 0x03FFUL
62 #define UTF16SHIFT 10
63 #define UTF16BASE 0x10000UL
64 #define UTF16HIGHSTART 0xD800UL
65 #define UTF16HIGHEND 0xDBFFUL
66 #define UTF16LOSTART 0xDC00UL
67 #define UTF16LOEND 0xDFFFUL
68 //@endcond
69 
70 //-----------------------------------------------------------------------------
71 QByteArray KIMAP::decodeImapFolderName( const QByteArray &inSrc )
72 {
73  unsigned char c, i, bitcount;
74  unsigned long ucs4, utf16, bitbuf;
75  unsigned char base64[256], utf8[6];
76  unsigned int srcPtr = 0;
77  QByteArray dst;
78  QByteArray src = inSrc;
79  uint srcLen = inSrc.length();
80 
81  /* initialize modified base64 decoding table */
82  memset( base64, UNDEFINED, sizeof( base64 ) );
83  for ( i = 0; i < sizeof( base64chars ); ++i ) {
84  base64[(int)base64chars[i]] = i;
85  }
86 
87  /* loop until end of string */
88  while ( srcPtr < srcLen ) {
89  c = src[srcPtr++];
90  /* deal with literal characters and &- */
91  if ( c != '&' || src[srcPtr] == '-' ) {
92  /* encode literally */
93  dst += c;
94  /* skip over the '-' if this is an &- sequence */
95  if ( c == '&' ) {
96  srcPtr++;
97  }
98  } else {
99  /* convert modified UTF-7 -> UTF-16 -> UCS-4 -> UTF-8 -> HEX */
100  bitbuf = 0;
101  bitcount = 0;
102  ucs4 = 0;
103  while ( ( c = base64[(unsigned char)src[srcPtr]] ) != UNDEFINED ) {
104  ++srcPtr;
105  bitbuf = ( bitbuf << 6 ) | c;
106  bitcount += 6;
107  /* enough bits for a UTF-16 character? */
108  if ( bitcount >= 16 ) {
109  bitcount -= 16;
110  utf16 = ( bitcount ? bitbuf >> bitcount : bitbuf ) & 0xffff;
111  /* convert UTF16 to UCS4 */
112  if ( utf16 >= UTF16HIGHSTART && utf16 <= UTF16HIGHEND ) {
113  ucs4 = ( utf16 - UTF16HIGHSTART ) << UTF16SHIFT;
114  continue;
115  } else if ( utf16 >= UTF16LOSTART && utf16 <= UTF16LOEND ) {
116  ucs4 += utf16 - UTF16LOSTART + UTF16BASE;
117  } else {
118  ucs4 = utf16;
119  }
120  /* convert UTF-16 range of UCS4 to UTF-8 */
121  if ( ucs4 <= 0x7fUL ) {
122  utf8[0] = ucs4;
123  i = 1;
124  } else if ( ucs4 <= 0x7ffUL ) {
125  utf8[0] = 0xc0 | ( ucs4 >> 6 );
126  utf8[1] = 0x80 | ( ucs4 & 0x3f );
127  i = 2;
128  } else if ( ucs4 <= 0xffffUL ) {
129  utf8[0] = 0xe0 | ( ucs4 >> 12 );
130  utf8[1] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
131  utf8[2] = 0x80 | ( ucs4 & 0x3f );
132  i = 3;
133  } else {
134  utf8[0] = 0xf0 | ( ucs4 >> 18 );
135  utf8[1] = 0x80 | ( ( ucs4 >> 12 ) & 0x3f );
136  utf8[2] = 0x80 | ( ( ucs4 >> 6 ) & 0x3f );
137  utf8[3] = 0x80 | ( ucs4 & 0x3f );
138  i = 4;
139  }
140  /* copy it */
141  for ( c = 0; c < i; ++c ) {
142  dst += utf8[c];
143  }
144  }
145  }
146  /* skip over trailing '-' in modified UTF-7 encoding */
147  if ( src[srcPtr] == '-' ) {
148  ++srcPtr;
149  }
150  }
151  }
152  return dst;
153 }
154 
155 QString KIMAP::decodeImapFolderName( const QString &inSrc )
156 {
157  return QString::fromUtf8( decodeImapFolderName( inSrc.toUtf8() ).data() );
158 }
159 
160 //-----------------------------------------------------------------------------
161 
162 QByteArray KIMAP::quoteIMAP( const QByteArray &src )
163 {
164  uint len = src.length();
165  QByteArray result;
166  result.reserve( 2 * len );
167  for ( unsigned int i = 0; i < len; i++ ) {
168  if ( src[i] == '"' || src[i] == '\\' ) {
169  result += '\\';
170  }
171  result += src[i];
172  }
173  result.squeeze();
174  return result;
175 }
176 
177 QString KIMAP::quoteIMAP( const QString &src )
178 {
179  uint len = src.length();
180  QString result;
181  result.reserve( 2 * len );
182  for ( unsigned int i = 0; i < len; i++ ) {
183  if ( src[i] == '"' || src[i] == '\\' ) {
184  result += '\\';
185  }
186  result += src[i];
187  }
188  //result.squeeze(); - unnecessary and slow
189  return result;
190 }
191 
192 //-----------------------------------------------------------------------------
193 QString KIMAP::encodeImapFolderName( const QString &inSrc )
194 {
195  return QString::fromUtf8( encodeImapFolderName( inSrc.toUtf8() ).data() );
196 }
197 
198 QByteArray KIMAP::encodeImapFolderName( const QByteArray &inSrc )
199 {
200  unsigned int utf8pos, utf8total, c, utf7mode, bitstogo, utf16flag;
201  unsigned int ucs4, bitbuf;
202  QByteArray src = inSrc;
203  QByteArray dst;
204 
205  int srcPtr = 0;
206  utf7mode = 0;
207  utf8total = 0;
208  bitstogo = 0;
209  utf8pos = 0;
210  bitbuf = 0;
211  ucs4 = 0;
212  while ( srcPtr < src.length () ) {
213  c = (unsigned char)src[srcPtr++];
214  /* normal character? */
215  if ( c >= ' ' && c <= '~' ) {
216  /* switch out of UTF-7 mode */
217  if ( utf7mode ) {
218  if ( bitstogo ) {
219  dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
220  bitstogo = 0;
221  }
222  dst += '-';
223  utf7mode = 0;
224  }
225  dst += c;
226  /* encode '&' as '&-' */
227  if ( c == '&' ) {
228  dst += '-';
229  }
230  continue;
231  }
232  /* switch to UTF-7 mode */
233  if ( !utf7mode ) {
234  dst += '&';
235  utf7mode = 1;
236  }
237  /* Encode US-ASCII characters as themselves */
238  if ( c < 0x80 ) {
239  ucs4 = c;
240  utf8total = 1;
241  } else if ( utf8total ) {
242  /* save UTF8 bits into UCS4 */
243  ucs4 = ( ucs4 << 6 ) | ( c & 0x3FUL );
244  if ( ++utf8pos < utf8total ) {
245  continue;
246  }
247  } else {
248  utf8pos = 1;
249  if ( c < 0xE0 ) {
250  utf8total = 2;
251  ucs4 = c & 0x1F;
252  } else if ( c < 0xF0 ) {
253  utf8total = 3;
254  ucs4 = c & 0x0F;
255  } else {
256  /* NOTE: can't convert UTF8 sequences longer than 4 */
257  utf8total = 4;
258  ucs4 = c & 0x03;
259  }
260  continue;
261  }
262  /* loop to split ucs4 into two utf16 chars if necessary */
263  utf8total = 0;
264  do
265  {
266  if ( ucs4 >= UTF16BASE ) {
267  ucs4 -= UTF16BASE;
268  bitbuf =
269  ( bitbuf << 16 ) | ( ( ucs4 >> UTF16SHIFT ) + UTF16HIGHSTART );
270  ucs4 = ( ucs4 & UTF16MASK ) + UTF16LOSTART;
271  utf16flag = 1;
272  } else {
273  bitbuf = ( bitbuf << 16 ) | ucs4;
274  utf16flag = 0;
275  }
276  bitstogo += 16;
277  /* spew out base64 */
278  while ( bitstogo >= 6 ) {
279  bitstogo -= 6;
280  dst +=
281  base64chars[( bitstogo ? ( bitbuf >> bitstogo ) : bitbuf ) & 0x3F];
282  }
283  }
284  while ( utf16flag );
285  }
286  /* if in UTF-7 mode, finish in ASCII */
287  if ( utf7mode ) {
288  if ( bitstogo ) {
289  dst += base64chars[( bitbuf << ( 6 - bitstogo ) ) & 0x3F];
290  }
291  dst += '-';
292  }
293  return quoteIMAP( dst );
294 }
295 
296 //-----------------------------------------------------------------------------
297 QTextCodec *KIMAP::codecForName( const QString &str )
298 {
299  if ( str.isEmpty () ) {
300  return 0;
301  }
302  return QTextCodec::codecForName ( str.toLower ().
303  replace ( "windows", "cp" ).toLatin1 () );
304 }
305 
306 //-----------------------------------------------------------------------------
307 const QString KIMAP::decodeRFC2047String( const QString &str )
308 {
309  QString throw_away;
310 
311  return decodeRFC2047String( str, throw_away );
312 }
313 
314 //-----------------------------------------------------------------------------
315 const QString KIMAP::decodeRFC2047String( const QString &str,
316  QString &charset )
317 {
318  QString throw_away;
319 
320  return decodeRFC2047String( str, charset, throw_away );
321 }
322 
323 //-----------------------------------------------------------------------------
324 const QString KIMAP::decodeRFC2047String( const QString &str,
325  QString &charset,
326  QString &language )
327 {
328  //do we have a rfc string
329  if ( !str.contains( "=?" ) ) {
330  return str;
331  }
332 
333  // FIXME get rid of the conversion?
334  QByteArray aStr = str.toAscii (); // QString.length() means Unicode chars
335  QByteArray result;
336  char *pos, *beg, *end, *mid = 0;
337  QByteArray cstr;
338  char encoding = 0, ch;
339  bool valid;
340  const int maxLen = 200;
341  int i;
342 
343 // result.truncate(aStr.length());
344  for ( pos = aStr.data (); *pos; pos++ ) {
345  if ( pos[0] != '=' || pos[1] != '?' ) {
346  result += *pos;
347  continue;
348  }
349  beg = pos + 2;
350  end = beg;
351  valid = true;
352  // parse charset name
353  for ( i = 2, pos += 2;
354  i < maxLen &&
355  ( *pos != '?' && ( ispunct( *pos ) || isalnum ( *pos ) ) );
356  i++ )
357  pos++;
358  if ( *pos != '?' || i < 4 || i >= maxLen ) {
359  valid = false;
360  } else {
361  charset = QByteArray( beg, i - 1 ); // -2 + 1 for the zero
362  int pt = charset.lastIndexOf( '*' );
363  if ( pt != -1 ) {
364  // save language for later usage
365  language = charset.right( charset.length () - pt - 1 );
366 
367  // tie off language as defined in rfc2047
368  charset.truncate( pt );
369  }
370  // get encoding and check delimiting question marks
371  encoding = toupper( pos[1] );
372  if ( pos[2] != '?' ||
373  ( encoding != 'Q' && encoding != 'B' &&
374  encoding != 'q' && encoding != 'b' ) ) {
375  valid = false;
376  }
377  pos += 3;
378  i += 3;
379 // kDebug() << "Charset:" << charset << "- Language:" << language << "-'" << pos << "'";
380  }
381  if ( valid ) {
382  mid = pos;
383  // search for end of encoded part
384  while ( i < maxLen && *pos && !( *pos == '?' && *( pos + 1 ) == '=' ) ) {
385  i++;
386  pos++;
387  }
388  end = pos + 2;//end now points to the first char after the encoded string
389  if ( i >= maxLen || !*pos ) {
390  valid = false;
391  }
392  }
393  if ( valid ) {
394  ch = *pos;
395  *pos = '\0';
396  cstr = QByteArray (mid).left( (int)( mid - pos - 1 ) );
397  if ( encoding == 'Q' ) {
398  // decode quoted printable text
399  for ( i = cstr.length () - 1; i >= 0; --i ) {
400  if ( cstr[i] == '_' ) {
401  cstr[i] = ' ';
402  }
403  }
404 // kDebug() << "before QP '"
405 // << cstr << "'";
406  cstr = KCodecs::quotedPrintableDecode( cstr );
407 // kDebug() << "after QP '"
408 // << cstr << "'";
409  } else {
410  // decode base64 text
411  cstr = QByteArray::fromBase64( cstr );
412  }
413  *pos = ch;
414  int len = cstr.length();
415  for ( i = 0; i < len; ++i ) {
416  result += cstr[i];
417  }
418 
419  pos = end - 1;
420  } else {
421 // kDebug() << "invalid";
422  //result += "=?";
423  //pos = beg -1; // because pos gets increased shortly afterwards
424  pos = beg - 2;
425  result += *pos++;
426  result += *pos;
427  }
428  }
429  if ( !charset.isEmpty () ) {
430  QTextCodec *aCodec = codecForName( charset.toAscii () );
431  if ( aCodec ) {
432 // kDebug() << "Codec is" << aCodec->name();
433  return aCodec->toUnicode( result );
434  }
435  }
436  return result;
437 }
438 
439 //-----------------------------------------------------------------------------
440 const QString KIMAP::encodeRFC2047String( const QString &str )
441 {
442  return encodeRFC2047String( str.toLatin1() );
443 }
444 
445 //-----------------------------------------------------------------------------
446 const QByteArray KIMAP::encodeRFC2047String( const QByteArray &str )
447 {
448  if ( str.isEmpty () ) {
449  return str;
450  }
451 
452  const signed char *latin =
453  reinterpret_cast<const signed char *>
454  ( str.data() ), *l, *start, *stop;
455  char hexcode;
456  int numQuotes, i;
457  int rptr = 0;
458  // My stats show this number results in 12 resize() out of 73,000
459  int resultLen = 3 * str.length() / 2;
460  QByteArray result( resultLen, '\0' );
461 
462  while ( *latin ) {
463  l = latin;
464  start = latin;
465  while ( *l ) {
466  if ( *l == 32 ) {
467  start = l + 1;
468  }
469  if ( *l < 0 ) {
470  break;
471  }
472  l++;
473  }
474  if ( *l ) {
475  numQuotes = 1;
476  while ( *l ) {
477  /* The encoded word must be limited to 75 character */
478  for ( i = 0; i < 16; ++i ) {
479  if ( *l == especials[i] ) {
480  numQuotes++;
481  }
482  }
483  if ( *l < 0 ) {
484  numQuotes++;
485  }
486  /* Stop after 58 = 75 - 17 characters or at "<user@host..." */
487  if ( l - start + 2 * numQuotes >= 58 || *l == 60 ) {
488  break;
489  }
490  l++;
491  }
492  if ( *l ) {
493  stop = l - 1;
494  while ( stop >= start && *stop != 32 ) {
495  stop--;
496  }
497  if ( stop <= start ) {
498  stop = l;
499  }
500  } else {
501  stop = l;
502  }
503  if ( resultLen - rptr - 1 <= start - latin + 1 + 16 ) {
504  // =?iso-88...
505  resultLen += ( start - latin + 1 ) * 2 + 20; // more space
506  result.resize( resultLen );
507  }
508  while ( latin < start ) {
509  result[rptr++] = *latin;
510  latin++;
511  }
512  result.replace( rptr, 15, "=?iso-8859-1?q?" );
513  rptr += 15;
514  if ( resultLen - rptr - 1 <= 3 * ( stop - latin + 1 ) ) {
515  resultLen += ( stop - latin + 1 ) * 4 + 20; // more space
516  result.resize( resultLen );
517  }
518  while ( latin < stop ) {
519  // can add up to 3 chars/iteration
520  numQuotes = 0;
521  for ( i = 0; i < 16; ++i ) {
522  if ( *latin == especials[i] ) {
523  numQuotes = 1;
524  }
525  }
526  if ( *latin < 0 ) {
527  numQuotes = 1;
528  }
529  if ( numQuotes ) {
530  result[rptr++] = '=';
531  hexcode = ( ( *latin & 0xF0 ) >> 4 ) + 48;
532  if ( hexcode >= 58 ) {
533  hexcode += 7;
534  }
535  result[rptr++] = hexcode;
536  hexcode = ( *latin & 0x0F ) + 48;
537  if ( hexcode >= 58 ) {
538  hexcode += 7;
539  }
540  result[rptr++] = hexcode;
541  } else {
542  result[rptr++] = *latin;
543  }
544  latin++;
545  }
546  result[rptr++] = '?';
547  result[rptr++] = '=';
548  } else {
549  while ( *latin ) {
550  if ( rptr == resultLen - 1 ) {
551  resultLen += 30;
552  result.resize( resultLen );
553  }
554  result[rptr++] = *latin;
555  latin++;
556  }
557  }
558  }
559  result[rptr] = 0;
560  return result;
561 }
562 
563 //-----------------------------------------------------------------------------
564 const QString KIMAP::encodeRFC2231String( const QString &str )
565 {
566  if ( str.isEmpty () ) {
567  return str;
568  }
569 
570  signed char *latin = (signed char *)calloc( 1, str.length () + 1 );
571  char *latin_us = (char *)latin;
572  strcpy( latin_us, str.toLatin1 () );
573  signed char *l = latin;
574  char hexcode;
575  int i;
576  bool quote;
577  while ( *l ) {
578  if ( *l < 0 ) {
579  break;
580  }
581  l++;
582  }
583  if ( !*l ) {
584  free( latin );
585  return str;
586  }
587  QByteArray result;
588  l = latin;
589  while ( *l ) {
590  quote = *l < 0;
591  for ( i = 0; i < 16; ++i ) {
592  if ( *l == especials[i] ) {
593  quote = true;
594  }
595  }
596  if ( quote ) {
597  result += '%';
598  hexcode = ( ( *l & 0xF0 ) >> 4 ) + 48;
599  if ( hexcode >= 58 ) {
600  hexcode += 7;
601  }
602  result += hexcode;
603  hexcode = ( *l & 0x0F ) + 48;
604  if ( hexcode >= 58 ) {
605  hexcode += 7;
606  }
607  result += hexcode;
608  } else {
609  result += *l;
610  }
611  l++;
612  }
613  free( latin );
614  return result;
615 }
616 
617 //-----------------------------------------------------------------------------
618 const QString KIMAP::decodeRFC2231String( const QString &str )
619 {
620  int p = str.indexOf ( '\'' );
621 
622  //see if it is an rfc string
623  if ( p < 0 ) {
624  return str;
625  }
626 
627  int l = str.lastIndexOf( '\'' );
628 
629  //second is language
630  if ( p >= l ) {
631  return str;
632  }
633 
634  //first is charset or empty
635  QString charset = str.left ( p );
636  QString st = str.mid ( l + 1 );
637  QString language = str.mid ( p + 1, l - p - 1 );
638 
639  //kDebug() << "Charset:" << charset << "Language:" << language;
640 
641  char ch, ch2;
642  p = 0;
643  while ( p < (int) st.length () ) {
644  if ( st.at( p ) == 37 ) {
645  ch = st.at( p + 1 ).toLatin1 () - 48;
646  if ( ch > 16 ) {
647  ch -= 7;
648  }
649  ch2 = st.at( p + 2 ).toLatin1 () - 48;
650  if ( ch2 > 16 ) {
651  ch2 -= 7;
652  }
653  st.replace( p, 1, ch * 16 + ch2 );
654  st.remove ( p + 1, 2 );
655  }
656  p++;
657  }
658  return st;
659 }
This file is part of the KDE documentation.
Documentation copyright © 1996-2013 The KDE developers.
Generated on Sat Jan 5 2013 19:44:10 by doxygen 1.8.1.2 written by Dimitri van Heesch, © 1997-2006

KDE's Doxygen guidelines are available online.

KIMAP Library

Skip menu "KIMAP Library"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • Related Pages

kdepimlibs-4.9.5 API Reference

Skip menu "kdepimlibs-4.9.5 API Reference"
  • akonadi
  •   contact
  •   kmime
  • kabc
  • kalarmcal
  • kblog
  • kcal
  • kcalcore
  • kcalutils
  • kholidays
  • kimap
  • kioslave
  •   imap4
  •   mbox
  •   nntp
  • kldap
  • kmbox
  • kmime
  • kontactinterface
  • kpimidentities
  • kpimtextedit
  •   richtextbuilders
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • microblog
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Report problems with this website to our bug tracking system.
Contact the specific authors with questions and comments about the page contents.

KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal