• Skip to content
  • Skip to link menu
KDE 4.0 API Reference
  • KDE API Reference
  • KDE-PIM Libraries
  • Sitemap
  • Contact Us
 

KMIME Library

kmime_parsers.cpp

00001 /*
00002     kmime_parsers.cpp
00003 
00004     KMime, the KDE internet mail/usenet news message library.
00005     Copyright (c) 2001 the KMime authors.
00006     See file AUTHORS for details
00007 
00008     This library is free software; you can redistribute it and/or
00009     modify it under the terms of the GNU Library General Public
00010     License as published by the Free Software Foundation; either
00011     version 2 of the License, or (at your option) any later version.
00012 
00013     This library is distributed in the hope that it will be useful,
00014     but WITHOUT ANY WARRANTY; without even the implied warranty of
00015     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00016     Library General Public License for more details.
00017 
00018     You should have received a copy of the GNU Library General Public License
00019     along with this library; see the file COPYING.LIB.  If not, write to
00020     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00021     Boston, MA 02110-1301, USA.
00022 */
00023 #include "kmime_parsers.h"
00024 
00025 #include <QtCore/QRegExp>
00026 #include <QtCore/QByteArray>
00027 
00028 using namespace KMime::Parser;
00029 
00030 namespace KMime {
00031 namespace Parser {
00032 
00033 MultiPart::MultiPart( const QByteArray &src, const QByteArray &boundary )
00034 {
00035   s_rc=src;
00036   b_oundary=boundary;
00037 }
00038 
00039 bool MultiPart::parse()
00040 {
00041   QByteArray b = "--" + b_oundary, part;
00042   int pos1=0, pos2=0, blen=b.length();
00043 
00044   p_arts.clear();
00045 
00046   //find the first valid boundary
00047   while ( 1 ) {
00048     if ( ( pos1 = s_rc.indexOf( b, pos1 ) ) == -1 || pos1 == 0 ||
00049          s_rc[pos1-1] == '\n' ) { //valid boundary found or no boundary at all
00050       break;
00051     }
00052     pos1 += blen; //boundary found but not valid => skip it;
00053   }
00054 
00055   if ( pos1 > -1 ) {
00056     pos1 += blen;
00057     if ( s_rc[pos1] == '-' && s_rc[pos1+1] == '-' ) {
00058       // the only valid boundary is the end-boundary
00059       // this message is *really* broken
00060       pos1 = -1; //we give up
00061     } else if ( ( pos1 - blen ) > 1 ) { //preamble present
00062       p_reamble = s_rc.left( pos1 - blen );
00063     }
00064   }
00065 
00066   while ( pos1 > -1 && pos2 > -1 ) {
00067 
00068     //skip the rest of the line for the first boundary - the message-part starts here
00069     if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) {
00070       //now search the next linebreak
00071       //now find the next valid boundary
00072       pos2=++pos1; //pos1 and pos2 point now to the beginning of the next line after the boundary
00073       while ( 1 ) {
00074         if ( ( pos2 = s_rc.indexOf( b, pos2 ) ) == -1 ||
00075              s_rc[pos2-1] == '\n' ) { //valid boundary or no more boundaries found
00076           break;
00077         }
00078         pos2 += blen; //boundary is invalid => skip it;
00079       }
00080 
00081       if ( pos2 == -1 ) { // no more boundaries found
00082         part = s_rc.mid( pos1, s_rc.length() - pos1 ); //take the rest of the string
00083         p_arts.append( part );
00084         pos1 = -1;
00085         pos2 = -1; //break;
00086       } else {
00087         part = s_rc.mid( pos1, pos2 - pos1 - 1 ); // pos2 - 1 (\n) is part of the boundary (see RFC 2046, section 5.1.1)
00088         p_arts.append( part );
00089         pos2 += blen; //pos2 points now to the first charakter after the boundary
00090         if ( s_rc[pos2] == '-' && s_rc[pos2+1] == '-' ) { //end-boundary
00091           pos1 = pos2 + 2; //pos1 points now to the character directly after the end-boundary
00092 
00093           if ( ( pos1 = s_rc.indexOf( '\n', pos1 ) ) > -1 ) { //skip the rest of this line
00094             //everything after the end-boundary is considered as the epilouge
00095             e_pilouge = s_rc.mid( pos1 + 1, s_rc.length() - pos1 - 1 );
00096           }
00097           pos1 = -1;
00098           pos2 = -1; //break
00099         } else {
00100           pos1 = pos2; //the search continues ...
00101         }
00102       }
00103     }
00104   }
00105 
00106   return !p_arts.isEmpty();
00107 }
00108 
00109 //=============================================================================
00110 
00111 NonMimeParser::NonMimeParser( const QByteArray &src ) :
00112   s_rc( src ), p_artNr( -1 ), t_otalNr( -1 )
00113 {
00114 }
00115 
00119 QByteArray NonMimeParser::guessMimeType( const QByteArray &fileName )
00120 {
00121   QByteArray tmp, mimeType;
00122   int pos;
00123 
00124   if ( !fileName.isEmpty() ) {
00125     pos = fileName.lastIndexOf( '.' );
00126     if ( pos++ != -1 ) {
00127       tmp = fileName.mid( pos, fileName.length() - pos).toUpper();
00128       if ( tmp == "JPG" || tmp=="JPEG" ) {
00129         mimeType = "image/jpeg";
00130       } else if ( tmp == "GIF") {
00131         mimeType = "image/gif";
00132       } else if ( tmp == "PNG") {
00133         mimeType = "image/png";
00134       } else if ( tmp == "TIFF" || tmp == "TIF") {
00135         mimeType = "image/tiff";
00136       } else if ( tmp == "XPM") {
00137         mimeType = "image/x-xpixmap";
00138       } else if ( tmp == "XBM") {
00139         mimeType = "image/x-xbitmap";
00140       } else if ( tmp == "BMP") {
00141         mimeType = "image/bmp";
00142       } else if ( tmp == "TXT" ||
00143                   tmp == "ASC" ||
00144                   tmp == "H" ||
00145                   tmp == "C" ||
00146                   tmp == "CC" ||
00147                   tmp == "CPP") {
00148         mimeType = "text/plain";
00149       } else if ( tmp == "HTML" || tmp == "HTM" ) {
00150         mimeType = "text/html";
00151       } else {
00152         mimeType = "application/octet-stream";
00153       }
00154     } else {
00155       mimeType = "application/octet-stream";
00156     }
00157   } else {
00158     mimeType = "application/octet-stream";
00159   }
00160 
00161   return mimeType;
00162 }
00163 
00164 //==============================================================================
00165 
00166 UUEncoded::UUEncoded( const QByteArray &src, const QByteArray &subject ) :
00167   NonMimeParser( src ), s_ubject( subject )
00168 {}
00169 
00170 bool UUEncoded::parse()
00171 {
00172   int currentPos=0;
00173   bool success=true, firstIteration=true;
00174 
00175   while ( success ) {
00176     int beginPos=currentPos, uuStart=currentPos, endPos=0, lineCount=0, MCount=0, pos=0, len=0;
00177     bool containsBegin=false, containsEnd=false;
00178     QByteArray tmp, fileName;
00179 
00180     if ( ( beginPos = QString( s_rc ).
00181            indexOf( QRegExp( "begin [0-9][0-9][0-9]" ), currentPos ) ) > -1 &&
00182          ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n') ) {
00183       containsBegin = true;
00184       uuStart = s_rc.indexOf( '\n', beginPos );
00185       if ( uuStart == -1 ) {//no more line breaks found, we give up
00186         success = false;
00187         break;
00188       } else {
00189         uuStart++; //points now at the beginning of the next line
00190       }
00191     } else {
00192       beginPos=currentPos;
00193     }
00194 
00195     if ( ( endPos = s_rc.
00196            indexOf( "\nend", ( uuStart > 0 ) ? uuStart-1:0 ) ) == -1 ) {
00197       endPos = s_rc.length(); //no end found
00198     } else {
00199       containsEnd = true;
00200     }
00201 
00202     if ( ( containsBegin && containsEnd ) || firstIteration ) {
00203 
00204       //printf("beginPos=%d , uuStart=%d , endPos=%d\n", beginPos, uuStart, endPos);
00205       //all lines in a uuencoded text start with 'M'
00206       for ( int idx=uuStart; idx<endPos; idx++ ) {
00207         if ( s_rc[idx] == '\n' ) {
00208           lineCount++;
00209           if ( idx+1 < endPos && s_rc[idx+1] == 'M') {
00210             idx++;
00211             MCount++;
00212           }
00213         }
00214       }
00215 
00216       //printf("lineCount=%d , MCount=%d\n", lineCount, MCount);
00217       if ( MCount == 0 || ( lineCount - MCount ) > 10 ||
00218            ( ( !containsBegin || !containsEnd ) && ( MCount < 15 ) ) ) {
00219         // harder check for splitted-articles
00220         success = false;
00221         break; //too many "non-M-Lines" found, we give up
00222       }
00223 
00224       if ( ( !containsBegin || !containsEnd ) && !s_ubject.isNull() ) {
00225         // message may be split up => parse subject
00226         QRegExp rx("[0-9]+/[0-9]+");
00227         pos = rx.indexIn( QString( s_ubject ), 0 );
00228         len = rx.matchedLength();
00229         if ( pos != -1 ) {
00230           tmp = s_ubject.mid( pos, len );
00231           pos = tmp.indexOf( '/' );
00232           p_artNr = tmp.left( pos ).toInt();
00233           t_otalNr = tmp.right( tmp.length() - pos - 1).toInt();
00234         } else {
00235           success = false;
00236           break; //no "part-numbers" found in the subject, we give up
00237         }
00238       }
00239 
00240       //everything before "begin" is text
00241       if ( beginPos > 0 ) {
00242         t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) );
00243       }
00244 
00245       if ( containsBegin ) {
00246         //everything between "begin ### " and the next LF is considered as the filename
00247         fileName = s_rc.mid( beginPos + 10, uuStart - beginPos - 11 );
00248       } else {
00249         fileName = "";
00250       }
00251       f_ilenames.append( fileName );
00252       //everything beetween "begin" and "end" is uuencoded
00253       b_ins.append( s_rc.mid( uuStart, endPos - uuStart + 1 ) );
00254       m_imeTypes.append( guessMimeType( fileName ) );
00255       firstIteration = false;
00256 
00257       int next = s_rc.indexOf( '\n', endPos + 1 );
00258       if ( next == -1 ) { //no more line breaks found, we give up
00259         success = false;
00260         break;
00261       } else {
00262         next++; //points now at the beginning of the next line
00263       }
00264       currentPos = next;
00265 
00266     } else {
00267       success = false;
00268     }
00269   }
00270 
00271   // append trailing text part of the article
00272   t_ext.append( s_rc.right( s_rc.length() - currentPos ) );
00273 
00274   return ( ( b_ins.count() > 0 ) || isPartial() );
00275 }
00276 
00277 //==============================================================================
00278 
00279 YENCEncoded::YENCEncoded( const QByteArray &src ) :
00280   NonMimeParser( src )
00281 {
00282 }
00283 
00284 bool YENCEncoded::yencMeta( QByteArray &src, const QByteArray &name, int *value )
00285 {
00286   bool found = false;
00287   QByteArray sought=name + '=';
00288 
00289   int iPos = src.indexOf( sought );
00290   if ( iPos > -1 ) {
00291     int pos1 = src.indexOf( ' ', iPos );
00292     int pos2 = src.indexOf( '\r', iPos );
00293     int pos3 = src.indexOf( '\t', iPos );
00294     int pos4 = src.indexOf( '\n', iPos );
00295     if ( pos2 >= 0 && ( pos1 < 0 || pos1 > pos2 ) ) {
00296       pos1 = pos2;
00297     }
00298     if ( pos3 >= 0 && ( pos1 < 0 || pos1 > pos3 ) ) {
00299       pos1 = pos3;
00300     }
00301     if ( pos4 >= 0 && ( pos1 < 0 || pos1 > pos4 ) ) {
00302       pos1 = pos4;
00303     }
00304     iPos=src.lastIndexOf( '=', pos1 ) + 1;
00305     if ( iPos < pos1 ) {
00306       char c = src.at( iPos );
00307       if ( c>='0' && c<='9' ) {
00308         found = true;
00309         *value = src.mid( iPos, pos1 - iPos ).toInt();
00310       }
00311     }
00312   }
00313   return found;
00314 }
00315 
00316 bool YENCEncoded::parse()
00317 {
00318   int currentPos=0;
00319   bool success=true;
00320 
00321   while ( success ) {
00322     int beginPos=currentPos, yencStart=currentPos;
00323     bool containsPart=false;
00324     QByteArray fileName, mimeType;
00325 
00326     if ( ( beginPos = s_rc.
00327            indexOf( "=ybegin ", currentPos ) ) > -1 &&
00328          ( beginPos == 0 || s_rc.at( beginPos - 1 ) == '\n' ) ) {
00329       yencStart = s_rc.indexOf( '\n', beginPos );
00330       if ( yencStart == -1 ) { // no more line breaks found, give up
00331         success = false;
00332         break;
00333       } else {
00334         yencStart++;
00335         if ( s_rc.indexOf( "=ypart", yencStart ) == yencStart ) {
00336           containsPart = true;
00337           yencStart = s_rc.indexOf( '\n', yencStart );
00338           if ( yencStart == -1 ) {
00339             success = false;
00340             break;
00341           }
00342           yencStart++;
00343         }
00344       }
00345       // Try to identify yenc meta data
00346 
00347       // Filenames can contain any embedded chars until end of line
00348       QByteArray meta = s_rc.mid( beginPos, yencStart - beginPos );
00349       int namePos = meta.indexOf( "name=" );
00350       if ( namePos == -1 ) {
00351         success = false;
00352         break;
00353       }
00354       int eolPos = meta.indexOf( '\r', namePos );
00355       if ( eolPos == -1 ) {
00356         eolPos = meta.indexOf( '\n', namePos );
00357       }
00358       if ( eolPos == -1 ) {
00359         success = false;
00360         break;
00361       }
00362       fileName = meta.mid( namePos + 5, eolPos - ( namePos + 5 ) );
00363 
00364       // Other metadata is integer
00365       int yencLine;
00366       if ( !yencMeta( meta, "line", &yencLine ) ) {
00367         success = false;
00368         break;
00369       }
00370       int yencSize;
00371       if ( !yencMeta( meta, "size", &yencSize ) ) {
00372         success = false;
00373         break;
00374       }
00375 
00376       int partBegin, partEnd;
00377       if ( containsPart ) {
00378         if ( !yencMeta( meta, "part", &p_artNr ) ) {
00379           success = false;
00380           break;
00381         }
00382         if ( !yencMeta( meta, "begin", &partBegin ) ||
00383              !yencMeta( meta, "end", &partEnd ) ) {
00384           success = false;
00385           break;
00386         }
00387         if ( !yencMeta( meta, "total", &t_otalNr ) ) {
00388           t_otalNr = p_artNr + 1;
00389         }
00390         if ( yencSize == partEnd - partBegin + 1 ) {
00391           t_otalNr = 1;
00392         } else {
00393           yencSize = partEnd - partBegin + 1;
00394         }
00395       }
00396 
00397       // We have a valid yenc header; now we extract the binary data
00398       int totalSize = 0;
00399       int pos = yencStart;
00400       int len = s_rc.length();
00401       bool lineStart = true;
00402       int lineLength = 0;
00403       bool containsEnd = false;
00404       QByteArray binary;
00405       binary.resize( yencSize );
00406       while ( pos < len ) {
00407         int ch = s_rc.at( pos );
00408         if ( ch < 0 ) {
00409           ch += 256;
00410         }
00411         if ( ch == '\r' ) {
00412           if ( lineLength != yencLine && totalSize != yencSize ) {
00413             break;
00414           }
00415           pos++;
00416         }
00417         else if ( ch == '\n' ) {
00418           lineStart = true;
00419           lineLength = 0;
00420           pos++;
00421         } else {
00422           if ( ch == '=' ) {
00423             if ( pos + 1 < len ) {
00424               ch = s_rc.at( pos + 1 );
00425               if ( lineStart && ch == 'y' ) {
00426                 containsEnd = true;
00427                 break;
00428               }
00429               pos += 2;
00430               ch -= 64+42;
00431               if ( ch < 0 ) {
00432                 ch += 256;
00433               }
00434               if ( totalSize >= yencSize ) {
00435                 break;
00436               }
00437               binary[totalSize++] = ch;
00438               lineLength++;
00439             } else {
00440               break;
00441             }
00442           } else {
00443             ch -= 42;
00444             if ( ch < 0 ) {
00445               ch += 256;
00446             }
00447             if ( totalSize >= yencSize ) {
00448               break;
00449             }
00450             binary[totalSize++] = ch;
00451             lineLength++;
00452             pos++;
00453           }
00454           lineStart = false;
00455         }
00456       }
00457 
00458       if ( !containsEnd ) {
00459         success = false;
00460         break;
00461       }
00462       if ( totalSize != yencSize ) {
00463         success = false;
00464         break;
00465       }
00466 
00467       // pos now points to =yend; get end data
00468       eolPos = s_rc.indexOf( '\n', pos );
00469       if ( eolPos == -1 ) {
00470         success = false;
00471         break;
00472       }
00473       meta = s_rc.mid( pos, eolPos - pos );
00474       if ( !yencMeta( meta, "size", &totalSize ) ) {
00475         success = false;
00476         break;
00477       }
00478       if ( totalSize != yencSize ) {
00479         success = false;
00480         break;
00481       }
00482 
00483       f_ilenames.append( fileName );
00484       m_imeTypes.append( guessMimeType( fileName ) );
00485       b_ins.append( binary );
00486 
00487       //everything before "begin" is text
00488       if ( beginPos > 0 ) {
00489         t_ext.append( s_rc.mid( currentPos, beginPos - currentPos ) );
00490       }
00491       currentPos = eolPos + 1;
00492 
00493     } else {
00494       success = false;
00495     }
00496   }
00497 
00498   // append trailing text part of the article
00499   t_ext.append( s_rc.right( s_rc.length() - currentPos ) );
00500 
00501   return b_ins.count()>0;
00502 }
00503 
00504 } // namespace Parser
00505 
00506 } // namespace KMime

KMIME Library

Skip menu "KMIME Library"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

KDE-PIM Libraries

Skip menu "KDE-PIM Libraries"
  • kabc
  • kblog
  • kcal
  • kimap
  • kioslave
  •   imap4
  •   mbox
  • kldap
  • kmime
  • kpimidentities
  • kpimutils
  • kresources
  • ktnef
  • kxmlrpcclient
  • mailtransport
  • qgpgme
  • syndication
  •   atom
  •   rdf
  •   rss2
Generated for KDE-PIM Libraries by doxygen 1.5.5
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal