27 #ifndef __MYGUI_U_STRING_H__
28 #define __MYGUI_U_STRING_H__
41 #if MYGUI_COMPILER == MYGUI_COMPILER_GNUC
42 #pragma GCC system_header
52 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC && (1300 <= MYGUI_COMP_VER && MYGUI_COMP_VER <= 1310)
54 # if defined(_DLL_CPPLIB)
58 template class _CRTIMP2 basic_string<unsigned short, char_traits<unsigned short>,
59 allocator<unsigned short> >;
61 template class _CRTIMP2 basic_string<__wchar_t, char_traits<__wchar_t>,
62 allocator<__wchar_t> >;
65 # endif // defined(_DLL_CPPLIB)
67 #endif // MYGUI_COMPILER == MYGUI_COMPILER_MSVC && MYGUI_COMP_VER == 1300
106 #ifdef __STDC_ISO_10646__
109 #else // #ifdef __STDC_ISO_10646__
110 #if defined( __WIN32__ ) || defined( _WIN32 )
111 #define WCHAR_UTF16 // All currently known Windows platforms utilize UTF-16 encoding in wchar_t
112 #else // #if defined( __WIN32__ ) || defined( _WIN32 )
113 #if WCHAR_MAX <= 0xFFFF // this is a last resort fall back test; WCHAR_MAX is defined in <wchar.h>
114 #define WCHAR_UTF16 // best we can tell, wchar_t is not larger than 16-bit
115 #endif // #if WCHAR_MAX <= 0xFFFF
116 #endif // #if defined( __WIN32__ ) || defined( _WIN32 )
117 #endif // #ifdef __STDC_ISO_10646__
122 #if MYGUI_COMPILER == MYGUI_COMPILER_MSVC
128 # if defined(_NATIVE_WCHAR_T_DEFINED)
129 # define MYGUI_IS_NATIVE_WCHAR_T 1
131 # define MYGUI_IS_NATIVE_WCHAR_T 0
134 #else // MYGUI_COMPILER != MYGUI_COMPILER_MSVC
137 # define MYGUI_IS_NATIVE_WCHAR_T 1
139 #endif // MYGUI_COMPILER == MYGUI_COMPILER_MSVC
170 static const unsigned char _lead1 = 0xC0;
171 static const unsigned char _lead1_mask = 0x1F;
172 static const unsigned char _lead2 = 0xE0;
173 static const unsigned char _lead2_mask = 0x0F;
174 static const unsigned char _lead3 = 0xF0;
175 static const unsigned char _lead3_mask = 0x07;
176 static const unsigned char _lead4 = 0xF8;
177 static const unsigned char _lead4_mask = 0x03;
178 static const unsigned char _lead5 = 0xFC;
179 static const unsigned char _lead5_mask = 0x01;
180 static const unsigned char _cont = 0x80;
181 static const unsigned char _cont_mask = 0x3F;
198 typedef std::basic_string<code_point>
dstring;
208 explicit invalid_data(
const std::string& _Message ): std::runtime_error( _Message )
216 class _base_iterator:
public std::iterator<std::random_access_iterator_tag, value_type>
278 lead_half =
mIter[-1];
294 lead_half =
mIter[-1];
630 class _const_rev_iterator;
944 assign( str, index, length );
946 #if MYGUI_IS_NATIVE_WCHAR_T
1021 return mData.max_size();
1026 mData.reserve( size );
1031 mData.resize( num, val );
1036 mData.swap( from.mData );
1041 return mData.empty();
1046 return mData.c_str();
1056 return mData.capacity();
1070 tmp.mData.swap( data );
1081 #if MYGUI_IS_NATIVE_WCHAR_T
1086 mData.push_back( static_cast<unicode_char>( val ) );
1094 mData.push_back( val );
1100 mData.push_back( static_cast<code_point>( val ) );
1122 _load_buffer_UTF8();
1123 return *m_buffer.mStrBuffer;
1128 _load_buffer_UTF8();
1129 return m_buffer.mStrBuffer->c_str();
1134 _load_buffer_UTF32();
1135 return *m_buffer.mUTF32StrBuffer;
1140 _load_buffer_UTF32();
1141 return m_buffer.mUTF32StrBuffer->c_str();
1146 _load_buffer_WStr();
1147 return *m_buffer.mWStrBuffer;
1152 _load_buffer_WStr();
1153 return m_buffer.mWStrBuffer->c_str();
1164 return mData.at( loc );
1169 return mData.at( loc );
1184 if ( len == 2 && ( loc + 1 ) < mData.length() )
1208 if ( newSize > existingSize )
1211 insert( loc + 1, 1, cp[1] );
1214 if ( newSize < existingSize )
1223 if ( lc == 2 )
at( loc + 1 ) = cp[1];
1236 i.
mIter = mData.begin();
1252 i.
mIter = mData.end();
1268 i.
mIter = mData.end();
1284 i.
mIter = mData.begin();
1311 mData.assign( str.mData );
1317 mData.assign( str );
1323 mData.assign( str, num );
1329 mData.assign( str.mData, index, len );
1335 mData.assign( num, ch );
1342 mData.reserve( wstr.length() );
1343 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
1345 std::wstring::const_iterator i, ie = wstr.end();
1346 for ( i = wstr.begin(); i != ie; ++i )
1349 mData.push_back( tmp );
1351 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
1354 std::wstring::const_iterator i, ie = wstr.end();
1355 for ( i = wstr.begin(); i != ie; i++ )
1359 if ( lc > 0 ) mData.push_back( cp[0] );
1360 if ( lc > 1 ) mData.push_back( cp[1] );
1365 #if MYGUI_IS_NATIVE_WCHAR_T
1377 tmp.
assign( w_str, num );
1392 unsigned char utf8buf[7];
1399 std::string::const_iterator i, ie = str.end();
1400 for ( i = str.begin(); i != ie; ++i )
1403 for (
size_t j = 0; j < utf8len; j++ )
1405 utf8buf[j] = (
static_cast<unsigned char>( *( i + j ) ) );
1407 utf8buf[utf8len] = 0;
1412 append( utf16buff, utf16len );
1419 std::string tmp( c_str );
1426 tmp.
assign( c_str, num );
1438 mData.append( str.mData );
1444 mData.append( str );
1450 mData.append( str.mData, index, len );
1456 mData.append( str, num );
1462 mData.append( num, ch );
1471 #if MYGUI_IS_NATIVE_WCHAR_T
1475 std::wstring tmp( w_str, num );
1481 return append( num, static_cast<unicode_char>( ch ) );
1494 append( num, static_cast<code_point>( ch ) );
1535 mData.insert( index, str.mData );
1541 mData.insert( index, str );
1547 mData.insert( index1, str.mData, index2, num );
1558 mData.insert( index, str, num );
1561 #if MYGUI_IS_NATIVE_WCHAR_T
1580 mData.insert( index, num, ch );
1583 #if MYGUI_IS_NATIVE_WCHAR_T
1587 insert( index, num, static_cast<unicode_char>( ch ) );
1594 insert( index, num, static_cast<code_point>( ch ) );
1604 return insert( index, num, cp[0] );
1609 insert( index, 1, cp[1] );
1610 insert( index, 1, cp[0] );
1617 mData.insert( i.
mIter, num, ch );
1619 #if MYGUI_IS_NATIVE_WCHAR_T
1623 insert( i, num, static_cast<unicode_char>( ch ) );
1629 insert( i, num, static_cast<code_point>( ch ) );
1676 mData.erase( index );
1678 mData.erase( index, num );
1690 mData.replace( index1, num1, str.mData, 0,
npos );
1696 mData.replace( index1, num1, str.mData, 0, num2 );
1702 mData.replace( index1, num1, str.mData, index2, num2 );
1712 return replace( index1, num1, str, 0, num );
1717 mData.replace( index, num1, num2, ch );
1727 return replace( index1, num1, num, ch );
1738 return mData.compare( str.mData );
1743 return mData.compare( str );
1748 return mData.compare( index, length, str.mData );
1753 return mData.compare( index, length, str.mData, index2, length2 );
1758 return mData.compare( index, length, str, length2 );
1760 #if MYGUI_IS_NATIVE_WCHAR_T
1764 UString tmp( w_str, length2 );
1765 return compare( index, length, tmp );
1771 UString tmp( c_str, length2 );
1772 return compare( index, length, tmp );
1784 return mData.find( str.
c_str(), index );
1800 #if MYGUI_IS_NATIVE_WCHAR_T
1813 return find( static_cast<code_point>( ch ), index );
1819 return mData.find( ch, index );
1821 #if MYGUI_IS_NATIVE_WCHAR_T
1826 return find( static_cast<unicode_char>( ch ), index );
1841 return mData.rfind( str.
c_str(), index );
1847 return mData.rfind( tmp.
c_str(), index, num );
1853 return mData.rfind( tmp.
c_str(), index, num );
1855 #if MYGUI_IS_NATIVE_WCHAR_T
1860 return mData.rfind( tmp.
c_str(), index, num );
1866 return rfind( static_cast<code_point>( ch ), index );
1871 return mData.rfind( ch, index );
1873 #if MYGUI_IS_NATIVE_WCHAR_T
1877 return rfind( static_cast<unicode_char>( ch ), index );
1898 while ( i < num && ( index + i ) < len )
1917 return find_first_of( static_cast<code_point>( ch ), index );
1919 #if MYGUI_IS_NATIVE_WCHAR_T
1923 return find_first_of( static_cast<unicode_char>( ch ), index );
1939 while ( i < num && ( index + i ) < len )
1960 #if MYGUI_IS_NATIVE_WCHAR_T
1980 if ( index > len ) index = len - 1;
1982 while ( i < num && ( index - i ) !=
npos )
2008 return find_last_of( static_cast<code_point>( ch ), index );
2010 #if MYGUI_IS_NATIVE_WCHAR_T
2014 return find_last_of( static_cast<unicode_char>( ch ), index );
2030 if ( index > len ) index = len - 1;
2032 while ( i < num && ( index - i ) !=
npos )
2060 #if MYGUI_IS_NATIVE_WCHAR_T
2127 #if MYGUI_IS_NATIVE_WCHAR_T
2158 operator std::string()
const
2160 return std::string(
asUTF8() );
2163 operator std::wstring()
const
2165 return std::wstring(
asWStr() );
2176 if ( 0xD800 <= cp && cp <= 0xDFFF )
2183 if ( 0xD800 <= cp && cp <= 0xDBFF )
2190 if ( 0xDC00 <= cp && cp <= 0xDFFF )
2197 if ( 0xD800 <= cp && cp <= 0xDBFF )
2217 bool wordPair =
false;
2220 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF )
2223 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
2233 unsigned short cU = cp1, cL = cp2;
2237 out_uc = ( cU & 0x03FF ) << 10;
2238 out_uc |= ( cL & 0x03FF );
2251 if ( in_uc <= 0xFFFF )
2261 tmp = ( uc >> 10 ) & 0x03FF;
2281 return ( cp & ~_cont_mask ) != _cont;
2286 if ( !( cp & 0x80 ) )
return 1;
2287 if (( cp & ~_lead1_mask ) == _lead1 )
return 2;
2288 if (( cp & ~_lead2_mask ) == _lead2 )
return 3;
2289 if (( cp & ~_lead3_mask ) == _lead3 )
return 4;
2290 if (( cp & ~_lead4_mask ) == _lead4 )
return 5;
2291 if (( cp & ~_lead5_mask ) == _lead5 )
return 6;
2292 throw invalid_data(
"invalid UTF-8 sequence header value" );
2305 if ( !( uc & ~0x0000007F ) )
return 1;
2306 if ( !( uc & ~0x000007FF ) )
return 2;
2307 if ( !( uc & ~0x0000FFFF ) )
return 3;
2308 if ( !( uc & ~0x001FFFFF ) )
return 4;
2309 if ( !( uc & ~0x03FFFFFF ) )
return 5;
2310 if ( !( uc & ~0x7FFFFFFF ) )
return 6;
2329 c = in_cp[i] & _lead5_mask;
2332 c = in_cp[i] & _lead4_mask;
2335 c = in_cp[i] & _lead3_mask;
2338 c = in_cp[i] & _lead2_mask;
2341 c = in_cp[i] & _lead1_mask;
2345 for ( ++i; i <
len; i++ )
2347 if (( in_cp[i] & ~_cont_mask ) != _cont )
2350 c |= ( in_cp[i] & _cont_mask );
2363 for (
size_t i = len - 1; i > 0; i-- )
2365 out_cp[i] = (( c ) & _cont_mask ) | _cont;
2373 out_cp[0] = (( c ) & _lead5_mask ) | _lead5;
2376 out_cp[0] = (( c ) & _lead4_mask ) | _lead4;
2379 out_cp[0] = (( c ) & _lead3_mask ) | _lead3;
2382 out_cp[0] = (( c ) & _lead2_mask ) | _lead2;
2385 out_cp[0] = (( c ) & _lead1_mask ) | _lead1;
2389 out_cp[0] = ( c ) & 0x7F;
2400 std::string tmp( reinterpret_cast<const char*>( c_str ) );
2406 std::string::const_iterator i, ie = str.end();
2415 unsigned char c = ( *i );
2416 size_t contBytes = 0;
2419 if (( c & ~_lead1_mask ) == _lead1 )
2421 if ( c == _lead1 )
throw invalid_data(
"overlong UTF-8 sequence" );
2425 else if (( c & ~_lead2_mask ) == _lead2 )
2431 if (( c & _lead2 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
2435 else if (( c & ~_lead3_mask ) == _lead3 )
2441 if (( c & _lead3 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
2445 else if (( c & ~_lead4_mask ) == _lead4 )
2451 if (( c & _lead4 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
2455 else if (( c & ~_lead5_mask ) == _lead5 )
2461 if (( c & _lead5 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
2466 while ( contBytes-- )
2469 if (( c & ~_cont_mask ) != _cont )
2496 m_buffer.mVoidBuffer = 0;
2497 m_bufferType = bt_none;
2504 void _cleanBuffer()
const
2506 if ( m_buffer.mVoidBuffer != 0 )
2508 switch ( m_bufferType )
2511 delete m_buffer.mStrBuffer;
2514 delete m_buffer.mWStrBuffer;
2516 case bt_utf32string:
2517 delete m_buffer.mUTF32StrBuffer;
2523 MYGUI_ASSERT(
false,
"This should never happen - mVoidBuffer should never contain something if we "
2524 "don't know the type");
2527 m_buffer.mVoidBuffer = 0;
2533 void _getBufferStr()
const
2535 if ( m_bufferType != bt_string )
2538 m_buffer.mStrBuffer =
new std::string();
2539 m_bufferType = bt_string;
2541 m_buffer.mStrBuffer->clear();
2544 void _getBufferWStr()
const
2546 if ( m_bufferType != bt_wstring )
2549 m_buffer.mWStrBuffer =
new std::wstring();
2550 m_bufferType = bt_wstring;
2552 m_buffer.mWStrBuffer->clear();
2555 void _getBufferUTF32Str()
const
2557 if ( m_bufferType != bt_utf32string )
2561 m_bufferType = bt_utf32string;
2563 m_buffer.mUTF32StrBuffer->clear();
2566 void _load_buffer_UTF8()
const
2569 std::string& buffer = ( *m_buffer.mStrBuffer );
2570 buffer.reserve(
length() );
2572 unsigned char utf8buf[6];
2573 char* charbuf = (
char* )utf8buf;
2580 c = i.getCharacter();
2584 buffer.push_back( charbuf[j++] );
2587 void _load_buffer_WStr()
const
2590 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
2591 buffer.reserve(
length() );
2592 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
2594 for ( i =
begin(); i != ie; ++i )
2596 buffer.push_back((
wchar_t )( *i ) );
2598 #else // wchar_t fits UTF-32
2603 c = i.getCharacter();
2604 buffer.push_back((
wchar_t )c );
2608 void _load_buffer_UTF32()
const
2610 _getBufferUTF32Str();
2611 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2612 buffer.reserve(
length() );
2619 c = i.getCharacter();
2620 buffer.push_back( c );
2624 mutable BufferType m_bufferType;
2625 mutable size_t m_bufferSize;
2630 mutable void* mVoidBuffer;
2631 mutable std::string* mStrBuffer;
2632 mutable std::wstring* mWStrBuffer;
2658 #if MYGUI_IS_NATIVE_WCHAR_T
2680 #if MYGUI_IS_NATIVE_WCHAR_T
2750 inline std::ostream& operator << ( std::ostream& os,
const UString& s )
2765 #endif // __MYGUI_U_STRING_H__