52 return mIter == mString->mData.begin();
57 return mIter == mString->mData.end();
62 return mIter - mString->mData.begin();
67 mIter = mString->mData.begin() + index;
73 return mString->getChar( current_index );
79 int change = mString->setChar( current_index, uc );
80 _jump_to( current_index );
87 if ( _test_end() )
return;
93 lead_half = mIter[-1];
103 if ( _test_begin() )
return;
108 lead_half = mIter[-1];
218 return _getCharacter();
223 return _setCharacter( uc );
334 return _getCharacter();
555 assign( str, index, length );
558 #if MYGUI_IS_NATIVE_WCHAR_T
624 return mData.max_size();
629 mData.reserve( size );
634 mData.resize( num, val );
639 mData.swap( from.mData );
644 return mData.empty();
649 return mData.c_str();
659 return mData.capacity();
672 tmp.mData.swap( data );
684 #if MYGUI_IS_NATIVE_WCHAR_T
688 mData.push_back( static_cast<code_point>( val ) );
694 mData.push_back( val );
699 mData.push_back( static_cast<code_point>( val ) );
715 return *m_buffer.mStrBuffer;
721 return m_buffer.mStrBuffer->c_str();
726 _load_buffer_UTF32();
727 return *m_buffer.mUTF32StrBuffer;
732 _load_buffer_UTF32();
733 return m_buffer.mUTF32StrBuffer->c_str();
739 return *m_buffer.mWStrBuffer;
745 return m_buffer.mWStrBuffer->c_str();
750 return mData.at( loc );
755 return mData.at( loc );
768 if ( l == 2 && ( loc + 1 ) < mData.length() ) {
785 if ( newSize > existingSize ) {
787 insert( loc + 1, 1, cp[1] );
790 if ( newSize < existingSize ) {
798 if ( l == 2 )
at( loc + 1 ) = cp[1];
805 i.
mIter = mData.begin();
821 i.
mIter = mData.end();
837 i.
mIter = mData.end();
853 i.
mIter = mData.begin();
874 mData.assign( str.mData );
886 mData.assign( str, num );
892 mData.assign( str.mData, index, len );
898 mData.assign( num, ch );
905 mData.reserve( wstr.length() );
906 #ifdef WCHAR_UTF16 // if we're already working in UTF-16, this is easy
908 std::wstring::const_iterator i, ie = wstr.end();
909 for ( i = wstr.begin(); i != ie; i++ ) {
911 mData.push_back( tmp );
913 #else // otherwise we do it the safe way (which is still 100% safe to pass UTF-16 through, just slower)
916 std::wstring::const_iterator i, ie = wstr.end();
917 for ( i = wstr.begin(); i != ie; i++ ) {
920 if ( l > 0 ) mData.push_back( cp[0] );
921 if ( l > 1 ) mData.push_back( cp[1] );
927 #if MYGUI_IS_NATIVE_WCHAR_T
953 unsigned char utf8buf[7];
960 std::string::const_iterator i, ie = str.end();
961 for ( i = str.begin(); i != ie; i++ ) {
963 for (
size_t j = 0; j < utf8len; j++ ) {
964 utf8buf[j] = (
static_cast<unsigned char>( *( i + j ) ) );
966 utf8buf[utf8len] = 0;
971 append( utf16buff, utf16len );
978 std::string tmp( c_str );
991 mData.append( str.mData );
1003 mData.append( str.mData, index, len );
1009 mData.append( str, num );
1015 mData.append( num, ch );
1025 #if MYGUI_IS_NATIVE_WCHAR_T
1028 std::wstring tmp( w_str, num );
1034 return append( num, static_cast<unicode_char>( ch ) );
1046 append( num, static_cast<code_point>( ch ) );
1076 mData.insert( index, str.mData );
1082 mData.insert( index1, str.mData, index2, num );
1093 mData.insert( index, str, num );
1097 #if MYGUI_IS_NATIVE_WCHAR_T
1115 mData.insert( index, num, ch );
1119 #if MYGUI_IS_NATIVE_WCHAR_T
1122 insert( index, num, static_cast<unicode_char>( ch ) );
1129 insert( index, num, static_cast<code_point>( ch ) );
1138 return insert( index, num, cp[0] );
1142 insert( index, 1, cp[1] );
1143 insert( index, 1, cp[0] );
1150 mData.insert( i.
mIter, num, ch );
1152 #if MYGUI_IS_NATIVE_WCHAR_T
1155 insert( i, num, static_cast<unicode_char>( ch ) );
1161 insert( i, num, static_cast<code_point>( ch ) );
1198 mData.erase( index );
1200 mData.erase( index, num );
1206 mData.replace( index1, num1, str.mData, 0,
npos );
1212 mData.replace( index1, num1, str.mData, 0, num2 );
1218 mData.replace( index1, num1, str.mData, index2, num2 );
1228 return replace( index1, num1, str, 0, num );
1233 mData.replace( index, num1, num2, ch );
1243 return replace( index1, num1, num, ch );
1248 return mData.compare( str.mData );
1253 return mData.compare( str );
1258 return mData.compare( index, length, str.mData );
1263 return mData.compare( index, length, str.mData, index2, length2 );
1268 return mData.compare( index, length, str, length2 );
1271 #if MYGUI_IS_NATIVE_WCHAR_T
1274 UString tmp( w_str, length2 );
1275 return compare( index, length, tmp );
1281 UString tmp( c_str, length2 );
1282 return compare( index, length, tmp );
1287 return mData.find( str.
c_str(), index );
1302 #if MYGUI_IS_NATIVE_WCHAR_T
1306 return mData.find( tmp.c_str(), index,
length );
1312 return find( static_cast<code_point>( ch ), index );
1317 return mData.find( ch, index );
1320 #if MYGUI_IS_NATIVE_WCHAR_T
1323 return find( static_cast<unicode_char>( ch ), index );
1336 return mData.rfind( str.
c_str(), index );
1342 return mData.rfind( tmp.
c_str(), index, num );
1348 return mData.rfind( tmp.
c_str(), index, num );
1351 #if MYGUI_IS_NATIVE_WCHAR_T
1355 return mData.rfind( tmp.c_str(), index, num );
1361 return rfind( static_cast<code_point>( ch ), index );
1366 return mData.rfind( ch, index );
1369 #if MYGUI_IS_NATIVE_WCHAR_T
1372 return rfind( static_cast<unicode_char>( ch ), index );
1387 while ( i < num && ( index + i ) < len ) {
1405 return find_first_of( static_cast<code_point>( ch ), index );
1408 #if MYGUI_IS_NATIVE_WCHAR_T
1411 return find_first_of( static_cast<unicode_char>( ch ), index );
1426 while ( i < num && ( index + i ) < len ) {
1447 #if MYGUI_IS_NATIVE_WCHAR_T
1465 if ( index > len ) index = len - 1;
1467 while ( i < num && ( index - i ) !=
npos ) {
1489 #if MYGUI_IS_NATIVE_WCHAR_T
1492 return find_last_of( static_cast<unicode_char>( ch ), index );
1507 if ( index > len ) index = len - 1;
1509 while ( i < num && ( index - i ) !=
npos ) {
1536 #if MYGUI_IS_NATIVE_WCHAR_T
1577 #if MYGUI_IS_NATIVE_WCHAR_T
1621 UString::operator std::string()
const
1623 return std::string(
asUTF8() );
1627 UString::operator std::wstring()
const
1629 return std::wstring(
asWStr() );
1635 if ( 0xD800 <= cp && cp <= 0xDFFF )
1642 if ( 0xD800 <= cp && cp <= 0xDBFF )
1649 if ( 0xDC00 <= cp && cp <= 0xDFFF )
1656 if ( 0xD800 <= cp && cp <= 0xDBFF )
1672 bool wordPair =
false;
1675 if ( 0xD800 <= cp1 && cp1 <= 0xDBFF ) {
1677 if ( 0xDC00 <= cp2 && cp2 <= 0xDFFF )
1686 unsigned short cU = cp1, cL = cp2;
1690 out_uc = ( cU & 0x03FF ) << 10;
1691 out_uc |= ( cL & 0x03FF );
1699 if ( in_uc <= 0xFFFF ) {
1708 tmp =
static_cast<unsigned short>(( uc >> 10 ) & 0x03FF);
1713 tmp =
static_cast<unsigned short>(uc & 0x03FF);
1722 return ( cp & ~_cont_mask ) != _cont;
1727 if ( !( cp & 0x80 ) )
return 1;
1728 if (( cp & ~_lead1_mask ) == _lead1 )
return 2;
1729 if (( cp & ~_lead2_mask ) == _lead2 )
return 3;
1730 if (( cp & ~_lead3_mask ) == _lead3 )
return 4;
1731 if (( cp & ~_lead4_mask ) == _lead4 )
return 5;
1732 if (( cp & ~_lead5_mask ) == _lead5 )
return 6;
1733 throw invalid_data(
"invalid UTF-8 sequence header value" );
1746 if ( !( uc & ~0x0000007F ) )
return 1;
1747 if ( !( uc & ~0x000007FF ) )
return 2;
1748 if ( !( uc & ~0x0000FFFF ) )
return 3;
1749 if ( !( uc & ~0x001FFFFF ) )
return 4;
1750 if ( !( uc & ~0x03FFFFFF ) )
return 5;
1751 if ( !( uc & ~0x7FFFFFFF ) )
return 6;
1767 c = in_cp[i] & _lead5_mask;
1770 c = in_cp[i] & _lead4_mask;
1773 c = in_cp[i] & _lead3_mask;
1776 c = in_cp[i] & _lead2_mask;
1779 c = in_cp[i] & _lead1_mask;
1783 for ( ++i; i <
len; i++ ) {
1784 if (( in_cp[i] & ~_cont_mask ) != _cont )
1787 c |= ( in_cp[i] & _cont_mask );
1800 for (
size_t i = len - 1; i > 0; i-- ) {
1801 out_cp[i] =
static_cast<unsigned char>((( c ) & _cont_mask ) | _cont);
1808 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead5_mask ) | _lead5);
1811 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead4_mask ) | _lead4);
1814 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead3_mask ) | _lead3);
1817 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead2_mask ) | _lead2);
1820 out_cp[0] =
static_cast<unsigned char>((( c ) & _lead1_mask ) | _lead1);
1824 out_cp[0] =
static_cast<unsigned char>(( c ) & 0x7F);
1834 std::string tmp( reinterpret_cast<const char*>( c_str ) );
1840 std::string::const_iterator i, ie = str.end();
1846 if (( *i ) & 0x80 ) {
1847 unsigned char c = ( *i );
1848 size_t contBytes = 0;
1851 if (( c & ~_lead1_mask ) == _lead1 ) {
1852 if ( c == _lead1 )
throw invalid_data(
"overlong UTF-8 sequence" );
1855 }
else if (( c & ~_lead2_mask ) == _lead2 ) {
1857 if ( c == _lead2 ) {
1859 if (( c & _lead2 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
1862 }
else if (( c & ~_lead3_mask ) == _lead3 ) {
1864 if ( c == _lead3 ) {
1866 if (( c & _lead3 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
1869 }
else if (( c & ~_lead4_mask ) == _lead4 ) {
1871 if ( c == _lead4 ) {
1873 if (( c & _lead4 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
1876 }
else if (( c & ~_lead5_mask ) == _lead5 ) {
1878 if ( c == _lead5 ) {
1880 if (( c & _lead5 ) == _cont )
throw invalid_data(
"overlong UTF-8 sequence" );
1885 while ( contBytes-- ) {
1887 if (( c & ~_cont_mask ) != _cont )
1897 void UString::_init()
1899 m_buffer.mVoidBuffer = 0;
1900 m_bufferType = bt_none;
1904 void UString::_cleanBuffer()
const
1906 if ( m_buffer.mVoidBuffer != 0 ) {
1907 switch ( m_bufferType ) {
1909 delete m_buffer.mStrBuffer;
1912 delete m_buffer.mWStrBuffer;
1914 case bt_utf32string:
1915 delete m_buffer.mUTF32StrBuffer;
1921 assert(
"This should never happen - mVoidBuffer should never contain something if we "
1922 "don't know the type");
1925 m_buffer.mVoidBuffer = 0;
1927 m_bufferType = bt_none;
1931 void UString::_getBufferStr()
const
1933 if ( m_bufferType != bt_string ) {
1935 m_buffer.mStrBuffer =
new std::string();
1936 m_bufferType = bt_string;
1938 m_buffer.mStrBuffer->clear();
1941 void UString::_getBufferWStr()
const
1943 if ( m_bufferType != bt_wstring ) {
1945 m_buffer.mWStrBuffer =
new std::wstring();
1946 m_bufferType = bt_wstring;
1948 m_buffer.mWStrBuffer->clear();
1951 void UString::_getBufferUTF32Str()
const
1953 if ( m_bufferType != bt_utf32string ) {
1956 m_bufferType = bt_utf32string;
1958 m_buffer.mUTF32StrBuffer->clear();
1961 void UString::_load_buffer_UTF8()
const
1964 std::string& buffer = ( *m_buffer.mStrBuffer );
1965 buffer.reserve(
length() );
1967 unsigned char utf8buf[6];
1968 char* charbuf = (
char* )utf8buf;
1974 c = i.getCharacter();
1978 buffer.push_back( charbuf[j++] );
1982 void UString::_load_buffer_WStr()
const
1985 std::wstring& buffer = ( *m_buffer.mWStrBuffer );
1986 buffer.reserve(
length() );
1987 #ifdef WCHAR_UTF16 // wchar_t matches UTF-16
1989 for ( i =
begin(); i != ie; ++i ) {
1990 buffer.push_back((
wchar_t )( *i ) );
1992 #else // wchar_t fits UTF-32
1996 c = i.getCharacter();
1997 buffer.push_back((
wchar_t )c );
2002 void UString::_load_buffer_UTF32()
const
2004 _getBufferUTF32Str();
2005 utf32string& buffer = ( *m_buffer.mUTF32StrBuffer );
2006 buffer.reserve(
length() );
2012 c = i.getCharacter();
2013 buffer.push_back( c );