kpimutils
linklocator.cpp
Go to the documentation of this file.
00001 /* 00002 Copyright (c) 2002 Dave Corrie <kde@davecorrie.com> 00003 00004 This library is free software; you can redistribute it and/or 00005 modify it under the terms of the GNU Library General Public 00006 License as published by the Free Software Foundation; either 00007 version 2 of the License, or (at your option) any later version. 00008 00009 This library is distributed in the hope that it will be useful, 00010 but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 Library General Public License for more details. 00013 00014 You should have received a copy of the GNU Library General Public License 00015 along with this library; see the file COPYING.LIB. If not, write to 00016 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00017 Boston, MA 02110-1301, USA. 00018 */ 00029 #include "linklocator.h" 00030 00031 #include <KEmoticons> 00032 00033 #include <QtCore/QCoreApplication> 00034 #include <QtCore/QFile> 00035 #include <QtCore/QRegExp> 00036 #include <QtGui/QTextDocument> 00037 00038 #include <climits> 00039 00040 using namespace KPIMUtils; 00041 00046 //@cond PRIVATE 00047 class KPIMUtils::LinkLocator::Private 00048 { 00049 public: 00050 int mMaxUrlLen; 00051 int mMaxAddressLen; 00052 }; 00053 //@endcond 00054 00055 // Use a static for this as calls to the KEmoticons constructor are expensive. 00056 K_GLOBAL_STATIC( KEmoticons, sEmoticons ) 00057 00058 LinkLocator::LinkLocator( const QString &text, int pos ) 00059 : mText( text ), mPos( pos ), d( new KPIMUtils::LinkLocator::Private ) 00060 { 00061 d->mMaxUrlLen = 4096; 00062 d->mMaxAddressLen = 255; 00063 00064 // If you change either of the above values for maxUrlLen or 00065 // maxAddressLen, then please also update the documentation for 00066 // setMaxUrlLen()/setMaxAddressLen() in the header file AND the 00067 // default values used for the maxUrlLen/maxAddressLen parameters 00068 // of convertToHtml(). 00069 } 00070 00071 LinkLocator::~LinkLocator() 00072 { 00073 delete d; 00074 } 00075 00076 void LinkLocator::setMaxUrlLen( int length ) 00077 { 00078 d->mMaxUrlLen = length; 00079 } 00080 00081 int LinkLocator::maxUrlLen() const 00082 { 00083 return d->mMaxUrlLen; 00084 } 00085 00086 void LinkLocator::setMaxAddressLen( int length ) 00087 { 00088 d->mMaxAddressLen = length; 00089 } 00090 00091 int LinkLocator::maxAddressLen() const 00092 { 00093 return d->mMaxAddressLen; 00094 } 00095 00096 QString LinkLocator::getUrl() 00097 { 00098 QString url; 00099 if ( atUrl() ) { 00100 // NOTE: see http://tools.ietf.org/html/rfc3986#appendix-A and especially appendix-C 00101 // Appendix-C mainly says, that when extracting URLs from plain text, line breaks shall 00102 // be allowed and should be ignored when the URI is extracted. 00103 00104 // This implementation follows this recommendation and 00105 // allows the URL to be enclosed within different kind of brackets/quotes 00106 // If an URL is enclosed, whitespace characters are allowed and removed, otherwise 00107 // the URL ends with the first whitespace 00108 // Also, if the URL is enclosed in brackets, the URL itself is not allowed 00109 // to contain the closing bracket, as this would be detected as the end of the URL 00110 00111 QChar beforeUrl, afterUrl; 00112 00113 // detect if the url has been surrounded by brackets or quotes 00114 if ( mPos > 0 ) { 00115 beforeUrl = mText[mPos - 1]; 00116 00117 if ( beforeUrl == '(' ) { 00118 afterUrl = ')'; 00119 } else if ( beforeUrl == '[' ) { 00120 afterUrl = ']'; 00121 } else if ( beforeUrl == '<' ) { 00122 afterUrl = '>'; 00123 } else if ( beforeUrl == '>' ) { // for e.g. <link>http://.....</link> 00124 afterUrl = '<'; 00125 } else if ( beforeUrl == '"' ) { 00126 afterUrl = '"'; 00127 } 00128 } 00129 00130 url.reserve( maxUrlLen() ); // avoid allocs 00131 int start = mPos; 00132 while ( ( mPos < (int)mText.length() ) && 00133 ( mText[mPos].isPrint() || mText[mPos].isSpace() ) && 00134 ( ( afterUrl.isNull() && !mText[mPos].isSpace() ) || 00135 ( !afterUrl.isNull() && mText[mPos] != afterUrl ) ) ) { 00136 if ( !mText[mPos].isSpace() ) { // skip whitespace 00137 url.append( mText[mPos] ); 00138 if ( url.length() > maxUrlLen() ) { 00139 break; 00140 } 00141 } 00142 00143 mPos++; 00144 } 00145 00146 if ( isEmptyUrl(url) || ( url.length() > maxUrlLen() ) ) { 00147 mPos = start; 00148 url = ""; 00149 } else { 00150 --mPos; 00151 } 00152 } 00153 00154 // HACK: This is actually against the RFC. However, most people don't properly escape the URL in 00155 // their text with "" or <>. That leads to people writing an url, followed immediatley by 00156 // a dot to finish the sentence. That would lead the parser to include the dot in the url, 00157 // even though that is not wanted. So work around that here. 00158 // Most real-life URLs hopefully don't end with dots or commas. 00159 if ( url.length() > 1 ) { 00160 QList<QChar> wordBoundaries; 00161 wordBoundaries << '.' << ',' << ':' << '!' << '?'; 00162 if ( wordBoundaries.contains( url.at( url.length() - 1 ) ) ) { 00163 url.chop( 1 ); 00164 --mPos; 00165 } 00166 } 00167 00168 return url; 00169 } 00170 00171 // keep this in sync with KMMainWin::slotUrlClicked() 00172 bool LinkLocator::atUrl() const 00173 { 00174 // the following characters are allowed in a dot-atom (RFC 2822): 00175 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~ 00176 const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" ); 00177 00178 // the character directly before the URL must not be a letter, a number or 00179 // any other character allowed in a dot-atom (RFC 2822). 00180 if ( ( mPos > 0 ) && 00181 ( mText[mPos-1].isLetterOrNumber() || 00182 ( allowedSpecialChars.indexOf( mText[mPos-1] ) != -1 ) ) ) { 00183 return false; 00184 } 00185 00186 QChar ch = mText[mPos]; 00187 return 00188 ( ch == 'h' && ( mText.mid( mPos, 7 ) == "http://" || 00189 mText.mid( mPos, 8 ) == "https://" ) ) || 00190 ( ch == 'v' && mText.mid( mPos, 6 ) == "vnc://" ) || 00191 ( ch == 'f' && ( mText.mid( mPos, 7 ) == "fish://" || 00192 mText.mid( mPos, 6 ) == "ftp://" || 00193 mText.mid( mPos, 7 ) == "ftps://" ) ) || 00194 ( ch == 's' && ( mText.mid( mPos, 7 ) == "sftp://" || 00195 mText.mid( mPos, 6 ) == "smb://" ) ) || 00196 ( ch == 'm' && mText.mid( mPos, 7 ) == "mailto:" ) || 00197 ( ch == 'w' && mText.mid( mPos, 4 ) == "www." ) || 00198 ( ch == 'f' && ( mText.mid( mPos, 4 ) == "ftp." || 00199 mText.mid( mPos, 7 ) == "file://" ) ) || 00200 ( ch == 'n' && mText.mid( mPos, 5 ) == "news:" ); 00201 } 00202 00203 bool LinkLocator::isEmptyUrl( const QString &url ) const 00204 { 00205 return url.isEmpty() || 00206 url == "http://" || 00207 url == "https://" || 00208 url == "fish://" || 00209 url == "ftp://" || 00210 url == "ftps://" || 00211 url == "sftp://" || 00212 url == "smb://" || 00213 url == "vnc://" || 00214 url == "mailto" || 00215 url == "www" || 00216 url == "ftp" || 00217 url == "news" || 00218 url == "news://"; 00219 } 00220 00221 QString LinkLocator::getEmailAddress() 00222 { 00223 QString address; 00224 00225 if ( mText[mPos] == '@' ) { 00226 // the following characters are allowed in a dot-atom (RFC 2822): 00227 // a-z A-Z 0-9 . ! # $ % & ' * + - / = ? ^ _ ` { | } ~ 00228 const QString allowedSpecialChars = QString( ".!#$%&'*+-/=?^_`{|}~" ); 00229 00230 // determine the local part of the email address 00231 int start = mPos - 1; 00232 while ( start >= 0 && mText[start].unicode() < 128 && 00233 ( mText[start].isLetterOrNumber() || 00234 mText[start] == '@' || // allow @ to find invalid email addresses 00235 allowedSpecialChars.indexOf( mText[start] ) != -1 ) ) { 00236 if ( mText[start] == '@' ) { 00237 return QString(); // local part contains '@' -> no email address 00238 } 00239 --start; 00240 } 00241 ++start; 00242 // we assume that an email address starts with a letter or a digit 00243 while ( ( start < mPos ) && !mText[start].isLetterOrNumber() ) { 00244 ++start; 00245 } 00246 if ( start == mPos ) { 00247 return QString(); // local part is empty -> no email address 00248 } 00249 00250 // determine the domain part of the email address 00251 int dotPos = INT_MAX; 00252 int end = mPos + 1; 00253 while ( end < (int)mText.length() && 00254 ( mText[end].isLetterOrNumber() || 00255 mText[end] == '@' || // allow @ to find invalid email addresses 00256 mText[end] == '.' || 00257 mText[end] == '-' ) ) { 00258 if ( mText[end] == '@' ) { 00259 return QString(); // domain part contains '@' -> no email address 00260 } 00261 if ( mText[end] == '.' ) { 00262 dotPos = qMin( dotPos, end ); // remember index of first dot in domain 00263 } 00264 ++end; 00265 } 00266 // we assume that an email address ends with a letter or a digit 00267 while ( ( end > mPos ) && !mText[end - 1].isLetterOrNumber() ) { 00268 --end; 00269 } 00270 if ( end == mPos ) { 00271 return QString(); // domain part is empty -> no email address 00272 } 00273 if ( dotPos >= end ) { 00274 return QString(); // domain part doesn't contain a dot 00275 } 00276 00277 if ( end - start > maxAddressLen() ) { 00278 return QString(); // too long -> most likely no email address 00279 } 00280 address = mText.mid( start, end - start ); 00281 00282 mPos = end - 1; 00283 } 00284 return address; 00285 } 00286 00287 QString LinkLocator::convertToHtml( const QString &plainText, int flags, 00288 int maxUrlLen, int maxAddressLen ) 00289 { 00290 LinkLocator locator( plainText ); 00291 locator.setMaxUrlLen( maxUrlLen ); 00292 locator.setMaxAddressLen( maxAddressLen ); 00293 00294 QString str; 00295 QString result( (QChar*)0, (int)locator.mText.length() * 2 ); 00296 QChar ch; 00297 int x; 00298 bool startOfLine = true; 00299 QString emoticon; 00300 00301 for ( locator.mPos = 0, x = 0; locator.mPos < (int)locator.mText.length(); 00302 locator.mPos++, x++ ) { 00303 ch = locator.mText[locator.mPos]; 00304 if ( flags & PreserveSpaces ) { 00305 if ( ch == ' ' ) { 00306 if ( locator.mPos + 1 < locator.mText.length() ) { 00307 if ( locator.mText[locator.mPos + 1] != ' ' ) { 00308 00309 // A single space, make it breaking if not at the start or end of the line 00310 const bool endOfLine = locator.mText[locator.mPos + 1] == '\n'; 00311 if ( !startOfLine && !endOfLine ) { 00312 result += ' '; 00313 } else { 00314 result += " "; 00315 } 00316 } else { 00317 00318 // Whitespace of more than one space, make it all non-breaking 00319 while ( locator.mPos < locator.mText.length() && locator.mText[locator.mPos] == ' ' ) { 00320 result += " "; 00321 locator.mPos++; 00322 x++; 00323 } 00324 00325 // We incremented once to often, undo that 00326 locator.mPos--; 00327 x--; 00328 } 00329 } else { 00330 // Last space in the text, it is non-breaking 00331 result += " "; 00332 } 00333 00334 if ( startOfLine ) { 00335 startOfLine = false; 00336 } 00337 continue; 00338 } else if ( ch == '\t' ) { 00339 do 00340 { 00341 result += " "; 00342 x++; 00343 } 00344 while ( ( x & 7 ) != 0 ); 00345 x--; 00346 startOfLine = false; 00347 continue; 00348 } 00349 } 00350 if ( ch == '\n' ) { 00351 result += "<br />\n"; // Keep the \n, so apps can figure out the quoting levels correctly. 00352 startOfLine = true; 00353 x = -1; 00354 continue; 00355 } 00356 00357 startOfLine = false; 00358 if ( ch == '&' ) { 00359 result += "&"; 00360 } else if ( ch == '"' ) { 00361 result += """; 00362 } else if ( ch == '<' ) { 00363 result += "<"; 00364 } else if ( ch == '>' ) { 00365 result += ">"; 00366 } else { 00367 const int start = locator.mPos; 00368 if ( !( flags & IgnoreUrls ) ) { 00369 str = locator.getUrl(); 00370 if ( !str.isEmpty() ) { 00371 QString hyperlink; 00372 if ( str.left( 4 ) == "www." ) { 00373 hyperlink = "http://" + str; 00374 } else if ( str.left( 4 ) == "ftp." ) { 00375 hyperlink = "ftp://" + str; 00376 } else { 00377 hyperlink = str; 00378 } 00379 00380 result += "<a href=\"" + hyperlink + "\">" + Qt::escape( str ) + "</a>"; 00381 x += locator.mPos - start; 00382 continue; 00383 } 00384 str = locator.getEmailAddress(); 00385 if ( !str.isEmpty() ) { 00386 // len is the length of the local part 00387 int len = str.indexOf( '@' ); 00388 QString localPart = str.left( len ); 00389 00390 // remove the local part from the result (as '&'s have been expanded to 00391 // & we have to take care of the 4 additional characters per '&') 00392 result.truncate( result.length() - 00393 len - ( localPart.count( '&' ) * 4 ) ); 00394 x -= len; 00395 00396 result += "<a href=\"mailto:" + str + "\">" + str + "</a>"; 00397 x += str.length() - 1; 00398 continue; 00399 } 00400 } 00401 if ( flags & HighlightText ) { 00402 str = locator.highlightedText(); 00403 if ( !str.isEmpty() ) { 00404 result += str; 00405 x += locator.mPos - start; 00406 continue; 00407 } 00408 } 00409 result += ch; 00410 } 00411 } 00412 00413 if ( flags & ReplaceSmileys ) { 00414 QStringList exclude; 00415 exclude << "(c)" << "(C)" << ">:-(" << ">:(" << "(B)" << "(b)" << "(P)" << "(p)"; 00416 exclude << "(O)" << "(o)" << "(D)" << "(d)" << "(E)" << "(e)" << "(K)" << "(k)"; 00417 exclude << "(I)" << "(i)" << "(L)" << "(l)" << "(8)" << "(T)" << "(t)" << "(G)"; 00418 exclude << "(g)" << "(F)" << "(f)" << "(H)"; 00419 exclude << "8)" << "(N)" << "(n)" << "(Y)" << "(y)" << "(U)" << "(u)" << "(W)" << "(w)"; 00420 static QString cachedEmoticonsThemeName; 00421 if ( cachedEmoticonsThemeName.isEmpty() ) { 00422 cachedEmoticonsThemeName = KEmoticons::currentThemeName(); 00423 } 00424 result = 00425 sEmoticons->theme( cachedEmoticonsThemeName ).parseEmoticons( 00426 result, KEmoticonsTheme::StrictParse | KEmoticonsTheme::SkipHTML, exclude ); 00427 } 00428 00429 return result; 00430 } 00431 00432 QString LinkLocator::pngToDataUrl( const QString &iconPath ) 00433 { 00434 if ( iconPath.isEmpty() ) { 00435 return QString(); 00436 } 00437 00438 QFile pngFile( iconPath ); 00439 if ( !pngFile.open( QIODevice::ReadOnly | QIODevice::Unbuffered ) ) { 00440 return QString(); 00441 } 00442 00443 QByteArray ba = pngFile.readAll(); 00444 pngFile.close(); 00445 return QString::fromLatin1( "data:image/png;base64,%1" ).arg( ba.toBase64().constData() ); 00446 } 00447 00448 QString LinkLocator::highlightedText() 00449 { 00450 // formating symbols must be prepended with a whitespace 00451 if ( ( mPos > 0 ) && !mText[mPos-1].isSpace() ) { 00452 return QString(); 00453 } 00454 00455 const QChar ch = mText[mPos]; 00456 if ( ch != '/' && ch != '*' && ch != '_' ) { 00457 return QString(); 00458 } 00459 00460 QRegExp re = 00461 QRegExp( QString( "\\%1((\\w+)([\\s-']\\w+)*( ?[,.:\\?!;])?)\\%2" ).arg( ch ).arg( ch ) ); 00462 re.setMinimal(true); 00463 if ( re.indexIn( mText, mPos ) == mPos ) { 00464 int length = re.matchedLength(); 00465 // there must be a whitespace after the closing formating symbol 00466 if ( mPos + length < mText.length() && !mText[mPos + length].isSpace() ) { 00467 return QString(); 00468 } 00469 mPos += length - 1; 00470 switch ( ch.toLatin1() ) { 00471 case '*': 00472 return "<b>" + re.cap( 1 ) + "</b>"; 00473 case '_': 00474 return "<u>" + re.cap( 1 ) + "</u>"; 00475 case '/': 00476 return "<i>" + re.cap( 1 ) + "</i>"; 00477 } 00478 } 00479 return QString(); 00480 }
This file is part of the KDE documentation.
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon Apr 30 2012 21:48:48 by doxygen 1.8.0 written by Dimitri van Heesch, © 1997-2006
Documentation copyright © 1996-2012 The KDE developers.
Generated on Mon Apr 30 2012 21:48:48 by doxygen 1.8.0 written by Dimitri van Heesch, © 1997-2006
KDE's Doxygen guidelines are available online.