ICU 49.1.1  49.1.1
unistr.h
Go to the documentation of this file.
1 /*
2 **********************************************************************
3 * Copyright (C) 1998-2011, International Business Machines
4 * Corporation and others. All Rights Reserved.
5 **********************************************************************
6 *
7 * File unistr.h
8 *
9 * Modification History:
10 *
11 * Date Name Description
12 * 09/25/98 stephen Creation.
13 * 11/11/98 stephen Changed per 11/9 code review.
14 * 04/20/99 stephen Overhauled per 4/16 code review.
15 * 11/18/99 aliu Made to inherit from Replaceable. Added method
16 * handleReplaceBetween(); other methods unchanged.
17 * 06/25/01 grhoten Remove dependency on iostream.
18 ******************************************************************************
19 */
20 
21 #ifndef UNISTR_H
22 #define UNISTR_H
23 
29 #include "unicode/utypes.h"
30 #include "unicode/rep.h"
31 #include "unicode/std_string.h"
32 #include "unicode/stringpiece.h"
33 #include "unicode/bytestream.h"
34 #include "unicode/ucasemap.h"
35 
36 struct UConverter; // unicode/ucnv.h
37 class StringThreadTest;
38 
39 #ifndef U_COMPARE_CODE_POINT_ORDER
40 /* see also ustring.h and unorm.h */
46 #define U_COMPARE_CODE_POINT_ORDER 0x8000
47 #endif
48 
49 #ifndef USTRING_H
50 
53 U_STABLE int32_t U_EXPORT2
54 u_strlen(const UChar *s);
55 #endif
56 
57 #ifndef U_STRING_CASE_MAPPER_DEFINED
58 #define U_STRING_CASE_MAPPER_DEFINED
59 
64 typedef int32_t U_CALLCONV
66  UChar *dest, int32_t destCapacity,
67  const UChar *src, int32_t srcLength,
68  UErrorCode *pErrorCode);
69 
70 #endif
71 
73 
74 class BreakIterator; // unicode/brkiter.h
75 class Locale; // unicode/locid.h
77 class UnicodeStringAppendable; // unicode/appendable.h
78 
79 /* The <iostream> include has been moved to unicode/ustream.h */
80 
91 #define US_INV icu::UnicodeString::kInvariant
92 
110 #if defined(U_DECLARE_UTF16)
111 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)U_DECLARE_UTF16(cs), _length)
112 #elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || (U_SIZEOF_UCHAR == 2 && defined(U_WCHAR_IS_UTF16)))
113 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)L ## cs, _length)
114 #elif U_SIZEOF_UCHAR==1 && U_CHARSET_FAMILY==U_ASCII_FAMILY
115 # define UNICODE_STRING(cs, _length) icu::UnicodeString(TRUE, (const UChar *)cs, _length)
116 #else
117 # define UNICODE_STRING(cs, _length) icu::UnicodeString(cs, _length, US_INV)
118 #endif
119 
133 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
134 
142 #ifndef UNISTR_FROM_CHAR_EXPLICIT
143 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
144  // Auto-"explicit" in ICU library code.
145 # define UNISTR_FROM_CHAR_EXPLICIT explicit
146 # else
147  // Empty by default for source code compatibility.
148 # define UNISTR_FROM_CHAR_EXPLICIT
149 # endif
150 #endif
151 
162 #ifndef UNISTR_FROM_STRING_EXPLICIT
163 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
164  // Auto-"explicit" in ICU library code.
165 # define UNISTR_FROM_STRING_EXPLICIT explicit
166 # else
167  // Empty by default for source code compatibility.
168 # define UNISTR_FROM_STRING_EXPLICIT
169 # endif
170 #endif
171 
242 {
243 public:
244 
253  enum EInvariant {
258  kInvariant
259  };
260 
261  //========================================
262  // Read-only operations
263  //========================================
264 
265  /* Comparison - bitwise only - for international comparison use collation */
266 
274  inline UBool operator== (const UnicodeString& text) const;
275 
283  inline UBool operator!= (const UnicodeString& text) const;
284 
292  inline UBool operator> (const UnicodeString& text) const;
293 
301  inline UBool operator< (const UnicodeString& text) const;
302 
310  inline UBool operator>= (const UnicodeString& text) const;
311 
319  inline UBool operator<= (const UnicodeString& text) const;
320 
332  inline int8_t compare(const UnicodeString& text) const;
333 
348  inline int8_t compare(int32_t start,
349  int32_t length,
350  const UnicodeString& text) const;
351 
369  inline int8_t compare(int32_t start,
370  int32_t length,
371  const UnicodeString& srcText,
372  int32_t srcStart,
373  int32_t srcLength) const;
374 
387  inline int8_t compare(const UChar *srcChars,
388  int32_t srcLength) const;
389 
404  inline int8_t compare(int32_t start,
405  int32_t length,
406  const UChar *srcChars) const;
407 
425  inline int8_t compare(int32_t start,
426  int32_t length,
427  const UChar *srcChars,
428  int32_t srcStart,
429  int32_t srcLength) const;
430 
448  inline int8_t compareBetween(int32_t start,
449  int32_t limit,
450  const UnicodeString& srcText,
451  int32_t srcStart,
452  int32_t srcLimit) const;
453 
471  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
472 
492  inline int8_t compareCodePointOrder(int32_t start,
493  int32_t length,
494  const UnicodeString& srcText) const;
495 
517  inline int8_t compareCodePointOrder(int32_t start,
518  int32_t length,
519  const UnicodeString& srcText,
520  int32_t srcStart,
521  int32_t srcLength) const;
522 
541  inline int8_t compareCodePointOrder(const UChar *srcChars,
542  int32_t srcLength) const;
543 
563  inline int8_t compareCodePointOrder(int32_t start,
564  int32_t length,
565  const UChar *srcChars) const;
566 
588  inline int8_t compareCodePointOrder(int32_t start,
589  int32_t length,
590  const UChar *srcChars,
591  int32_t srcStart,
592  int32_t srcLength) const;
593 
615  inline int8_t compareCodePointOrderBetween(int32_t start,
616  int32_t limit,
617  const UnicodeString& srcText,
618  int32_t srcStart,
619  int32_t srcLimit) const;
620 
639  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
640 
661  inline int8_t caseCompare(int32_t start,
662  int32_t length,
663  const UnicodeString& srcText,
664  uint32_t options) const;
665 
688  inline int8_t caseCompare(int32_t start,
689  int32_t length,
690  const UnicodeString& srcText,
691  int32_t srcStart,
692  int32_t srcLength,
693  uint32_t options) const;
694 
714  inline int8_t caseCompare(const UChar *srcChars,
715  int32_t srcLength,
716  uint32_t options) const;
717 
738  inline int8_t caseCompare(int32_t start,
739  int32_t length,
740  const UChar *srcChars,
741  uint32_t options) const;
742 
765  inline int8_t caseCompare(int32_t start,
766  int32_t length,
767  const UChar *srcChars,
768  int32_t srcStart,
769  int32_t srcLength,
770  uint32_t options) const;
771 
794  inline int8_t caseCompareBetween(int32_t start,
795  int32_t limit,
796  const UnicodeString& srcText,
797  int32_t srcStart,
798  int32_t srcLimit,
799  uint32_t options) const;
800 
808  inline UBool startsWith(const UnicodeString& text) const;
809 
820  inline UBool startsWith(const UnicodeString& srcText,
821  int32_t srcStart,
822  int32_t srcLength) const;
823 
832  inline UBool startsWith(const UChar *srcChars,
833  int32_t srcLength) const;
834 
844  inline UBool startsWith(const UChar *srcChars,
845  int32_t srcStart,
846  int32_t srcLength) const;
847 
855  inline UBool endsWith(const UnicodeString& text) const;
856 
867  inline UBool endsWith(const UnicodeString& srcText,
868  int32_t srcStart,
869  int32_t srcLength) const;
870 
879  inline UBool endsWith(const UChar *srcChars,
880  int32_t srcLength) const;
881 
892  inline UBool endsWith(const UChar *srcChars,
893  int32_t srcStart,
894  int32_t srcLength) const;
895 
896 
897  /* Searching - bitwise only */
898 
907  inline int32_t indexOf(const UnicodeString& text) const;
908 
918  inline int32_t indexOf(const UnicodeString& text,
919  int32_t start) const;
920 
932  inline int32_t indexOf(const UnicodeString& text,
933  int32_t start,
934  int32_t length) const;
935 
952  inline int32_t indexOf(const UnicodeString& srcText,
953  int32_t srcStart,
954  int32_t srcLength,
955  int32_t start,
956  int32_t length) const;
957 
969  inline int32_t indexOf(const UChar *srcChars,
970  int32_t srcLength,
971  int32_t start) const;
972 
985  inline int32_t indexOf(const UChar *srcChars,
986  int32_t srcLength,
987  int32_t start,
988  int32_t length) const;
989 
1006  int32_t indexOf(const UChar *srcChars,
1007  int32_t srcStart,
1008  int32_t srcLength,
1009  int32_t start,
1010  int32_t length) const;
1011 
1019  inline int32_t indexOf(UChar c) const;
1020 
1029  inline int32_t indexOf(UChar32 c) const;
1030 
1039  inline int32_t indexOf(UChar c,
1040  int32_t start) const;
1041 
1051  inline int32_t indexOf(UChar32 c,
1052  int32_t start) const;
1053 
1064  inline int32_t indexOf(UChar c,
1065  int32_t start,
1066  int32_t length) const;
1067 
1079  inline int32_t indexOf(UChar32 c,
1080  int32_t start,
1081  int32_t length) const;
1082 
1091  inline int32_t lastIndexOf(const UnicodeString& text) const;
1092 
1102  inline int32_t lastIndexOf(const UnicodeString& text,
1103  int32_t start) const;
1104 
1116  inline int32_t lastIndexOf(const UnicodeString& text,
1117  int32_t start,
1118  int32_t length) const;
1119 
1136  inline int32_t lastIndexOf(const UnicodeString& srcText,
1137  int32_t srcStart,
1138  int32_t srcLength,
1139  int32_t start,
1140  int32_t length) const;
1141 
1152  inline int32_t lastIndexOf(const UChar *srcChars,
1153  int32_t srcLength,
1154  int32_t start) const;
1155 
1168  inline int32_t lastIndexOf(const UChar *srcChars,
1169  int32_t srcLength,
1170  int32_t start,
1171  int32_t length) const;
1172 
1189  int32_t lastIndexOf(const UChar *srcChars,
1190  int32_t srcStart,
1191  int32_t srcLength,
1192  int32_t start,
1193  int32_t length) const;
1194 
1202  inline int32_t lastIndexOf(UChar c) const;
1203 
1212  inline int32_t lastIndexOf(UChar32 c) const;
1213 
1222  inline int32_t lastIndexOf(UChar c,
1223  int32_t start) const;
1224 
1234  inline int32_t lastIndexOf(UChar32 c,
1235  int32_t start) const;
1236 
1247  inline int32_t lastIndexOf(UChar c,
1248  int32_t start,
1249  int32_t length) const;
1250 
1262  inline int32_t lastIndexOf(UChar32 c,
1263  int32_t start,
1264  int32_t length) const;
1265 
1266 
1267  /* Character access */
1268 
1277  inline UChar charAt(int32_t offset) const;
1278 
1286  inline UChar operator[] (int32_t offset) const;
1287 
1299  UChar32 char32At(int32_t offset) const;
1300 
1316  int32_t getChar32Start(int32_t offset) const;
1317 
1334  int32_t getChar32Limit(int32_t offset) const;
1335 
1386  int32_t moveIndex32(int32_t index, int32_t delta) const;
1387 
1388  /* Substring extraction */
1389 
1405  inline void extract(int32_t start,
1406  int32_t length,
1407  UChar *dst,
1408  int32_t dstStart = 0) const;
1409 
1431  int32_t
1432  extract(UChar *dest, int32_t destCapacity,
1433  UErrorCode &errorCode) const;
1434 
1445  inline void extract(int32_t start,
1446  int32_t length,
1447  UnicodeString& target) const;
1448 
1460  inline void extractBetween(int32_t start,
1461  int32_t limit,
1462  UChar *dst,
1463  int32_t dstStart = 0) const;
1464 
1474  virtual void extractBetween(int32_t start,
1475  int32_t limit,
1476  UnicodeString& target) const;
1477 
1499  int32_t extract(int32_t start,
1500  int32_t startLength,
1501  char *target,
1502  int32_t targetCapacity,
1503  enum EInvariant inv) const;
1504 
1505 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1506 
1526  int32_t extract(int32_t start,
1527  int32_t startLength,
1528  char *target,
1529  uint32_t targetLength) const;
1530 
1531 #endif
1532 
1533 #if !UCONFIG_NO_CONVERSION
1534 
1560  inline int32_t extract(int32_t start,
1561  int32_t startLength,
1562  char *target,
1563  const char *codepage = 0) const;
1564 
1594  int32_t extract(int32_t start,
1595  int32_t startLength,
1596  char *target,
1597  uint32_t targetLength,
1598  const char *codepage) const;
1599 
1617  int32_t extract(char *dest, int32_t destCapacity,
1618  UConverter *cnv,
1619  UErrorCode &errorCode) const;
1620 
1621 #endif
1622 
1636  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1637 
1648  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1649 
1661  void toUTF8(ByteSink &sink) const;
1662 
1663 #if U_HAVE_STD_STRING
1664 
1677  template<typename StringClass>
1678  StringClass &toUTF8String(StringClass &result) const {
1679  StringByteSink<StringClass> sbs(&result);
1680  toUTF8(sbs);
1681  return result;
1682  }
1683 
1684 #endif
1685 
1701  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1702 
1703  /* Length operations */
1704 
1713  inline int32_t length(void) const;
1714 
1728  int32_t
1729  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1730 
1754  UBool
1755  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1756 
1762  inline UBool isEmpty(void) const;
1763 
1773  inline int32_t getCapacity(void) const;
1774 
1775  /* Other operations */
1776 
1782  inline int32_t hashCode(void) const;
1783 
1796  inline UBool isBogus(void) const;
1797 
1798 
1799  //========================================
1800  // Write operations
1801  //========================================
1802 
1803  /* Assignment operations */
1804 
1812  UnicodeString &operator=(const UnicodeString &srcText);
1813 
1834  UnicodeString &fastCopyFrom(const UnicodeString &src);
1835 
1843  inline UnicodeString& operator= (UChar ch);
1844 
1852  inline UnicodeString& operator= (UChar32 ch);
1853 
1865  inline UnicodeString& setTo(const UnicodeString& srcText,
1866  int32_t srcStart);
1867 
1881  inline UnicodeString& setTo(const UnicodeString& srcText,
1882  int32_t srcStart,
1883  int32_t srcLength);
1884 
1893  inline UnicodeString& setTo(const UnicodeString& srcText);
1894 
1903  inline UnicodeString& setTo(const UChar *srcChars,
1904  int32_t srcLength);
1905 
1914  UnicodeString& setTo(UChar srcChar);
1915 
1924  UnicodeString& setTo(UChar32 srcChar);
1925 
1946  UnicodeString &setTo(UBool isTerminated,
1947  const UChar *text,
1948  int32_t textLength);
1949 
1969  UnicodeString &setTo(UChar *buffer,
1970  int32_t buffLength,
1971  int32_t buffCapacity);
1972 
2013  void setToBogus();
2014 
2022  UnicodeString& setCharAt(int32_t offset,
2023  UChar ch);
2024 
2025 
2026  /* Append operations */
2027 
2035  inline UnicodeString& operator+= (UChar ch);
2036 
2044  inline UnicodeString& operator+= (UChar32 ch);
2045 
2053  inline UnicodeString& operator+= (const UnicodeString& srcText);
2054 
2069  inline UnicodeString& append(const UnicodeString& srcText,
2070  int32_t srcStart,
2071  int32_t srcLength);
2072 
2080  inline UnicodeString& append(const UnicodeString& srcText);
2081 
2095  inline UnicodeString& append(const UChar *srcChars,
2096  int32_t srcStart,
2097  int32_t srcLength);
2098 
2108  inline UnicodeString& append(const UChar *srcChars,
2109  int32_t srcLength);
2110 
2117  inline UnicodeString& append(UChar srcChar);
2118 
2125  UnicodeString& append(UChar32 srcChar);
2126 
2127 
2128  /* Insert operations */
2129 
2143  inline UnicodeString& insert(int32_t start,
2144  const UnicodeString& srcText,
2145  int32_t srcStart,
2146  int32_t srcLength);
2147 
2156  inline UnicodeString& insert(int32_t start,
2157  const UnicodeString& srcText);
2158 
2172  inline UnicodeString& insert(int32_t start,
2173  const UChar *srcChars,
2174  int32_t srcStart,
2175  int32_t srcLength);
2176 
2186  inline UnicodeString& insert(int32_t start,
2187  const UChar *srcChars,
2188  int32_t srcLength);
2189 
2198  inline UnicodeString& insert(int32_t start,
2199  UChar srcChar);
2200 
2209  inline UnicodeString& insert(int32_t start,
2210  UChar32 srcChar);
2211 
2212 
2213  /* Replace operations */
2214 
2232  UnicodeString& replace(int32_t start,
2233  int32_t length,
2234  const UnicodeString& srcText,
2235  int32_t srcStart,
2236  int32_t srcLength);
2237 
2250  UnicodeString& replace(int32_t start,
2251  int32_t length,
2252  const UnicodeString& srcText);
2253 
2271  UnicodeString& replace(int32_t start,
2272  int32_t length,
2273  const UChar *srcChars,
2274  int32_t srcStart,
2275  int32_t srcLength);
2276 
2289  inline UnicodeString& replace(int32_t start,
2290  int32_t length,
2291  const UChar *srcChars,
2292  int32_t srcLength);
2293 
2305  inline UnicodeString& replace(int32_t start,
2306  int32_t length,
2307  UChar srcChar);
2308 
2320  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2321 
2331  inline UnicodeString& replaceBetween(int32_t start,
2332  int32_t limit,
2333  const UnicodeString& srcText);
2334 
2349  inline UnicodeString& replaceBetween(int32_t start,
2350  int32_t limit,
2351  const UnicodeString& srcText,
2352  int32_t srcStart,
2353  int32_t srcLimit);
2354 
2365  virtual void handleReplaceBetween(int32_t start,
2366  int32_t limit,
2367  const UnicodeString& text);
2368 
2374  virtual UBool hasMetaData() const;
2375 
2391  virtual void copy(int32_t start, int32_t limit, int32_t dest);
2392 
2393  /* Search and replace operations */
2394 
2403  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2404  const UnicodeString& newText);
2405 
2417  inline UnicodeString& findAndReplace(int32_t start,
2418  int32_t length,
2419  const UnicodeString& oldText,
2420  const UnicodeString& newText);
2421 
2439  UnicodeString& findAndReplace(int32_t start,
2440  int32_t length,
2441  const UnicodeString& oldText,
2442  int32_t oldStart,
2443  int32_t oldLength,
2444  const UnicodeString& newText,
2445  int32_t newStart,
2446  int32_t newLength);
2447 
2448 
2449  /* Remove operations */
2450 
2456  inline UnicodeString& remove(void);
2457 
2466  inline UnicodeString& remove(int32_t start,
2467  int32_t length = (int32_t)INT32_MAX);
2468 
2477  inline UnicodeString& removeBetween(int32_t start,
2478  int32_t limit = (int32_t)INT32_MAX);
2479 
2489  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2490 
2491  /* Length operations */
2492 
2504  UBool padLeading(int32_t targetLength,
2505  UChar padChar = 0x0020);
2506 
2518  UBool padTrailing(int32_t targetLength,
2519  UChar padChar = 0x0020);
2520 
2527  inline UBool truncate(int32_t targetLength);
2528 
2534  UnicodeString& trim(void);
2535 
2536 
2537  /* Miscellaneous operations */
2538 
2544  inline UnicodeString& reverse(void);
2545 
2554  inline UnicodeString& reverse(int32_t start,
2555  int32_t length);
2556 
2563  UnicodeString& toUpper(void);
2564 
2572  UnicodeString& toUpper(const Locale& locale);
2573 
2580  UnicodeString& toLower(void);
2581 
2589  UnicodeString& toLower(const Locale& locale);
2590 
2591 #if !UCONFIG_NO_BREAK_ITERATION
2592 
2619  UnicodeString &toTitle(BreakIterator *titleIter);
2620 
2648  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2649 
2681  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2682 
2683 #endif
2684 
2696  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2697 
2698  //========================================
2699  // Access to the internal buffer
2700  //========================================
2701 
2745  UChar *getBuffer(int32_t minCapacity);
2746 
2767  void releaseBuffer(int32_t newLength=-1);
2768 
2799  inline const UChar *getBuffer() const;
2800 
2834  inline const UChar *getTerminatedBuffer();
2835 
2836  //========================================
2837  // Constructors
2838  //========================================
2839 
2843  UnicodeString();
2844 
2856  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2857 
2868 
2879 
2891 
2899  UnicodeString(const UChar *text,
2900  int32_t textLength);
2901 
2921  UnicodeString(UBool isTerminated,
2922  const UChar *text,
2923  int32_t textLength);
2924 
2943  UnicodeString(UChar *buffer, int32_t buffLength, int32_t buffCapacity);
2944 
2945 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
2946 
2966  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
2967 
2976  UnicodeString(const char *codepageData, int32_t dataLength);
2977 
2978 #endif
2979 
2980 #if !UCONFIG_NO_CONVERSION
2981 
2999  UnicodeString(const char *codepageData, const char *codepage);
3000 
3018  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3019 
3041  UnicodeString(
3042  const char *src, int32_t srcLength,
3043  UConverter *cnv,
3044  UErrorCode &errorCode);
3045 
3046 #endif
3047 
3072  UnicodeString(const char *src, int32_t length, enum EInvariant inv);
3073 
3074 
3080  UnicodeString(const UnicodeString& that);
3081 
3088  UnicodeString(const UnicodeString& src, int32_t srcStart);
3089 
3097  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3098 
3115  virtual Replaceable *clone() const;
3116 
3120  virtual ~UnicodeString();
3121 
3135  static UnicodeString fromUTF8(const StringPiece &utf8);
3136 
3148  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3149 
3150  /* Miscellaneous operations */
3151 
3186  UnicodeString unescape() const;
3187 
3207  UChar32 unescapeAt(int32_t &offset) const;
3208 
3214  static UClassID U_EXPORT2 getStaticClassID();
3215 
3221  virtual UClassID getDynamicClassID() const;
3222 
3223  //========================================
3224  // Implementation methods
3225  //========================================
3226 
3227 protected:
3232  virtual int32_t getLength() const;
3233 
3239  virtual UChar getCharAt(int32_t offset) const;
3240 
3246  virtual UChar32 getChar32At(int32_t offset) const;
3247 
3248 private:
3249  // For char* constructors. Could be made public.
3250  UnicodeString &setToUTF8(const StringPiece &utf8);
3251  // For extract(char*).
3252  // We could make a toUTF8(target, capacity, errorCode) public but not
3253  // this version: New API will be cleaner if we make callers create substrings
3254  // rather than having start+length on every method,
3255  // and it should take a UErrorCode&.
3256  int32_t
3257  toUTF8(int32_t start, int32_t len,
3258  char *target, int32_t capacity) const;
3259 
3260 
3261  inline int8_t
3262  doCompare(int32_t start,
3263  int32_t length,
3264  const UnicodeString& srcText,
3265  int32_t srcStart,
3266  int32_t srcLength) const;
3267 
3268  int8_t doCompare(int32_t start,
3269  int32_t length,
3270  const UChar *srcChars,
3271  int32_t srcStart,
3272  int32_t srcLength) const;
3273 
3274  inline int8_t
3275  doCompareCodePointOrder(int32_t start,
3276  int32_t length,
3277  const UnicodeString& srcText,
3278  int32_t srcStart,
3279  int32_t srcLength) const;
3280 
3281  int8_t doCompareCodePointOrder(int32_t start,
3282  int32_t length,
3283  const UChar *srcChars,
3284  int32_t srcStart,
3285  int32_t srcLength) const;
3286 
3287  inline int8_t
3288  doCaseCompare(int32_t start,
3289  int32_t length,
3290  const UnicodeString &srcText,
3291  int32_t srcStart,
3292  int32_t srcLength,
3293  uint32_t options) const;
3294 
3295  int8_t
3296  doCaseCompare(int32_t start,
3297  int32_t length,
3298  const UChar *srcChars,
3299  int32_t srcStart,
3300  int32_t srcLength,
3301  uint32_t options) const;
3302 
3303  int32_t doIndexOf(UChar c,
3304  int32_t start,
3305  int32_t length) const;
3306 
3307  int32_t doIndexOf(UChar32 c,
3308  int32_t start,
3309  int32_t length) const;
3310 
3311  int32_t doLastIndexOf(UChar c,
3312  int32_t start,
3313  int32_t length) const;
3314 
3315  int32_t doLastIndexOf(UChar32 c,
3316  int32_t start,
3317  int32_t length) const;
3318 
3319  void doExtract(int32_t start,
3320  int32_t length,
3321  UChar *dst,
3322  int32_t dstStart) const;
3323 
3324  inline void doExtract(int32_t start,
3325  int32_t length,
3326  UnicodeString& target) const;
3327 
3328  inline UChar doCharAt(int32_t offset) const;
3329 
3330  UnicodeString& doReplace(int32_t start,
3331  int32_t length,
3332  const UnicodeString& srcText,
3333  int32_t srcStart,
3334  int32_t srcLength);
3335 
3336  UnicodeString& doReplace(int32_t start,
3337  int32_t length,
3338  const UChar *srcChars,
3339  int32_t srcStart,
3340  int32_t srcLength);
3341 
3342  UnicodeString& doReverse(int32_t start,
3343  int32_t length);
3344 
3345  // calculate hash code
3346  int32_t doHashCode(void) const;
3347 
3348  // get pointer to start of array
3349  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3350  inline UChar* getArrayStart(void);
3351  inline const UChar* getArrayStart(void) const;
3352 
3353  // A UnicodeString object (not necessarily its current buffer)
3354  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3355  inline UBool isWritable() const;
3356 
3357  // Is the current buffer writable?
3358  inline UBool isBufferWritable() const;
3359 
3360  // None of the following does releaseArray().
3361  inline void setLength(int32_t len); // sets only fShortLength and fLength
3362  inline void setToEmpty(); // sets fFlags=kShortString
3363  inline void setArray(UChar *array, int32_t len, int32_t capacity); // does not set fFlags
3364 
3365  // allocate the array; result may be fStackBuffer
3366  // sets refCount to 1 if appropriate
3367  // sets fArray, fCapacity, and fFlags
3368  // returns boolean for success or failure
3369  UBool allocate(int32_t capacity);
3370 
3371  // release the array if owned
3372  void releaseArray(void);
3373 
3374  // turn a bogus string into an empty one
3375  void unBogus();
3376 
3377  // implements assigment operator, copy constructor, and fastCopyFrom()
3378  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=FALSE);
3379 
3380  // Pin start and limit to acceptable values.
3381  inline void pinIndex(int32_t& start) const;
3382  inline void pinIndices(int32_t& start,
3383  int32_t& length) const;
3384 
3385 #if !UCONFIG_NO_CONVERSION
3386 
3387  /* Internal extract() using UConverter. */
3388  int32_t doExtract(int32_t start, int32_t length,
3389  char *dest, int32_t destCapacity,
3390  UConverter *cnv,
3391  UErrorCode &errorCode) const;
3392 
3393  /*
3394  * Real constructor for converting from codepage data.
3395  * It assumes that it is called with !fRefCounted.
3396  *
3397  * If <code>codepage==0</code>, then the default converter
3398  * is used for the platform encoding.
3399  * If <code>codepage</code> is an empty string (<code>""</code>),
3400  * then a simple conversion is performed on the codepage-invariant
3401  * subset ("invariant characters") of the platform encoding. See utypes.h.
3402  */
3403  void doCodepageCreate(const char *codepageData,
3404  int32_t dataLength,
3405  const char *codepage);
3406 
3407  /*
3408  * Worker function for creating a UnicodeString from
3409  * a codepage string using a UConverter.
3410  */
3411  void
3412  doCodepageCreate(const char *codepageData,
3413  int32_t dataLength,
3414  UConverter *converter,
3415  UErrorCode &status);
3416 
3417 #endif
3418 
3419  /*
3420  * This function is called when write access to the array
3421  * is necessary.
3422  *
3423  * We need to make a copy of the array if
3424  * the buffer is read-only, or
3425  * the buffer is refCounted (shared), and refCount>1, or
3426  * the buffer is too small.
3427  *
3428  * Return FALSE if memory could not be allocated.
3429  */
3430  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3431  int32_t growCapacity = -1,
3432  UBool doCopyArray = TRUE,
3433  int32_t **pBufferToDelete = 0,
3434  UBool forceClone = FALSE);
3435 
3441  UnicodeString &
3442  caseMap(const UCaseMap *csm, UStringCaseMapper *stringCaseMapper);
3443 
3444  // ref counting
3445  void addRef(void);
3446  int32_t removeRef(void);
3447  int32_t refCount(void) const;
3448 
3449  // constants
3450  enum {
3451  // Set the stack buffer size so that sizeof(UnicodeString) is,
3452  // naturally (without padding), a multiple of sizeof(pointer).
3453  US_STACKBUF_SIZE= sizeof(void *)==4 ? 13 : 15, // Size of stack buffer for short strings
3454  kInvalidUChar=0xffff, // invalid UChar index
3455  kGrowSize=128, // grow size for this buffer
3456  kInvalidHashCode=0, // invalid hash code
3457  kEmptyHashCode=1, // hash code for empty string
3458 
3459  // bit flag values for fFlags
3460  kIsBogus=1, // this string is bogus, i.e., not valid or NULL
3461  kUsingStackBuffer=2,// using fUnion.fStackBuffer instead of fUnion.fFields
3462  kRefCounted=4, // there is a refCount field before the characters in fArray
3463  kBufferIsReadonly=8,// do not write to this buffer
3464  kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3465  // and releaseBuffer(newLength) must be called
3466 
3467  // combined values for convenience
3468  kShortString=kUsingStackBuffer,
3469  kLongString=kRefCounted,
3470  kReadonlyAlias=kBufferIsReadonly,
3471  kWritableAlias=0
3472  };
3473 
3474  friend class StringThreadTest;
3475  friend class UnicodeStringAppendable;
3476 
3477  union StackBufferOrFields; // forward declaration necessary before friend declaration
3478  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3479 
3480  /*
3481  * The following are all the class fields that are stored
3482  * in each UnicodeString object.
3483  * Note that UnicodeString has virtual functions,
3484  * therefore there is an implicit vtable pointer
3485  * as the first real field.
3486  * The fields should be aligned such that no padding is necessary.
3487  * On 32-bit machines, the size should be 32 bytes,
3488  * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3489  *
3490  * We use a hack to achieve this.
3491  *
3492  * With at least some compilers, each of the following is forced to
3493  * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3494  * rounded up with additional padding if the fields do not already fit that requirement:
3495  * - sizeof(class UnicodeString)
3496  * - offsetof(UnicodeString, fUnion)
3497  * - sizeof(fUnion)
3498  * - sizeof(fFields)
3499  *
3500  * In order to avoid padding, we make sizeof(fStackBuffer)=16 (=8 UChars)
3501  * which is at least as large as sizeof(fFields) on 32-bit and 64-bit machines.
3502  * (Padding at the end of fFields is ok:
3503  * As long as there is no padding after fStackBuffer, it is not wasted space.)
3504  *
3505  * We further assume that the compiler does not reorder the fields,
3506  * so that fRestOfStackBuffer (which holds a few more UChars) immediately follows after fUnion,
3507  * with at most some padding (but no other field) in between.
3508  * (Padding there would be wasted space, but functionally harmless.)
3509  *
3510  * We use a few more sizeof(pointer)'s chunks of space with
3511  * fRestOfStackBuffer, fShortLength and fFlags,
3512  * to get up exactly to the intended sizeof(UnicodeString).
3513  */
3514  // (implicit) *vtable;
3515  union StackBufferOrFields {
3516  // fStackBuffer is used iff (fFlags&kUsingStackBuffer)
3517  // else fFields is used
3518  UChar fStackBuffer[8]; // buffer for short strings, together with fRestOfStackBuffer
3519  struct {
3520  UChar *fArray; // the Unicode data
3521  int32_t fCapacity; // capacity of fArray (in UChars)
3522  int32_t fLength; // number of characters in fArray if >127; else undefined
3523  } fFields;
3524  } fUnion;
3525  UChar fRestOfStackBuffer[US_STACKBUF_SIZE-8];
3526  int8_t fShortLength; // 0..127: length <0: real length is in fUnion.fFields.fLength
3527  uint8_t fFlags; // bit flags: see constants above
3528 };
3529 
3538 U_COMMON_API UnicodeString U_EXPORT2
3539 operator+ (const UnicodeString &s1, const UnicodeString &s2);
3540 
3541 //========================================
3542 // Inline members
3543 //========================================
3544 
3545 //========================================
3546 // Privates
3547 //========================================
3548 
3549 inline void
3550 UnicodeString::pinIndex(int32_t& start) const
3551 {
3552  // pin index
3553  if(start < 0) {
3554  start = 0;
3555  } else if(start > length()) {
3556  start = length();
3557  }
3558 }
3559 
3560 inline void
3561 UnicodeString::pinIndices(int32_t& start,
3562  int32_t& _length) const
3563 {
3564  // pin indices
3565  int32_t len = length();
3566  if(start < 0) {
3567  start = 0;
3568  } else if(start > len) {
3569  start = len;
3570  }
3571  if(_length < 0) {
3572  _length = 0;
3573  } else if(_length > (len - start)) {
3574  _length = (len - start);
3575  }
3576 }
3577 
3578 inline UChar*
3579 UnicodeString::getArrayStart()
3580 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3581 
3582 inline const UChar*
3583 UnicodeString::getArrayStart() const
3584 { return (fFlags&kUsingStackBuffer) ? fUnion.fStackBuffer : fUnion.fFields.fArray; }
3585 
3586 //========================================
3587 // Read-only implementation methods
3588 //========================================
3589 inline int32_t
3591 { return fShortLength>=0 ? fShortLength : fUnion.fFields.fLength; }
3592 
3593 inline int32_t
3595 { return (fFlags&kUsingStackBuffer) ? US_STACKBUF_SIZE : fUnion.fFields.fCapacity; }
3596 
3597 inline int32_t
3599 { return doHashCode(); }
3600 
3601 inline UBool
3603 { return (UBool)(fFlags & kIsBogus); }
3604 
3605 inline UBool
3606 UnicodeString::isWritable() const
3607 { return (UBool)!(fFlags&(kOpenGetBuffer|kIsBogus)); }
3608 
3609 inline UBool
3610 UnicodeString::isBufferWritable() const
3611 {
3612  return (UBool)(
3613  !(fFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3614  (!(fFlags&kRefCounted) || refCount()==1));
3615 }
3616 
3617 inline const UChar *
3619  if(fFlags&(kIsBogus|kOpenGetBuffer)) {
3620  return 0;
3621  } else if(fFlags&kUsingStackBuffer) {
3622  return fUnion.fStackBuffer;
3623  } else {
3624  return fUnion.fFields.fArray;
3625  }
3626 }
3627 
3628 //========================================
3629 // Read-only alias methods
3630 //========================================
3631 inline int8_t
3632 UnicodeString::doCompare(int32_t start,
3633  int32_t thisLength,
3634  const UnicodeString& srcText,
3635  int32_t srcStart,
3636  int32_t srcLength) const
3637 {
3638  if(srcText.isBogus()) {
3639  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3640  } else {
3641  srcText.pinIndices(srcStart, srcLength);
3642  return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3643  }
3644 }
3645 
3646 inline UBool
3648 {
3649  if(isBogus()) {
3650  return text.isBogus();
3651  } else {
3652  int32_t len = length(), textLength = text.length();
3653  return
3654  !text.isBogus() &&
3655  len == textLength &&
3656  doCompare(0, len, text, 0, textLength) == 0;
3657  }
3658 }
3659 
3660 inline UBool
3662 { return (! operator==(text)); }
3663 
3664 inline UBool
3666 { return doCompare(0, length(), text, 0, text.length()) == 1; }
3667 
3668 inline UBool
3670 { return doCompare(0, length(), text, 0, text.length()) == -1; }
3671 
3672 inline UBool
3674 { return doCompare(0, length(), text, 0, text.length()) != -1; }
3675 
3676 inline UBool
3678 { return doCompare(0, length(), text, 0, text.length()) != 1; }
3679 
3680 inline int8_t
3682 { return doCompare(0, length(), text, 0, text.length()); }
3683 
3684 inline int8_t
3686  int32_t _length,
3687  const UnicodeString& srcText) const
3688 { return doCompare(start, _length, srcText, 0, srcText.length()); }
3689 
3690 inline int8_t
3692  int32_t srcLength) const
3693 { return doCompare(0, length(), srcChars, 0, srcLength); }
3694 
3695 inline int8_t
3697  int32_t _length,
3698  const UnicodeString& srcText,
3699  int32_t srcStart,
3700  int32_t srcLength) const
3701 { return doCompare(start, _length, srcText, srcStart, srcLength); }
3702 
3703 inline int8_t
3705  int32_t _length,
3706  const UChar *srcChars) const
3707 { return doCompare(start, _length, srcChars, 0, _length); }
3708 
3709 inline int8_t
3711  int32_t _length,
3712  const UChar *srcChars,
3713  int32_t srcStart,
3714  int32_t srcLength) const
3715 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
3716 
3717 inline int8_t
3719  int32_t limit,
3720  const UnicodeString& srcText,
3721  int32_t srcStart,
3722  int32_t srcLimit) const
3723 { return doCompare(start, limit - start,
3724  srcText, srcStart, srcLimit - srcStart); }
3725 
3726 inline int8_t
3727 UnicodeString::doCompareCodePointOrder(int32_t start,
3728  int32_t thisLength,
3729  const UnicodeString& srcText,
3730  int32_t srcStart,
3731  int32_t srcLength) const
3732 {
3733  if(srcText.isBogus()) {
3734  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3735  } else {
3736  srcText.pinIndices(srcStart, srcLength);
3737  return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3738  }
3739 }
3740 
3741 inline int8_t
3743 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
3744 
3745 inline int8_t
3747  int32_t _length,
3748  const UnicodeString& srcText) const
3749 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
3750 
3751 inline int8_t
3753  int32_t srcLength) const
3754 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
3755 
3756 inline int8_t
3758  int32_t _length,
3759  const UnicodeString& srcText,
3760  int32_t srcStart,
3761  int32_t srcLength) const
3762 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
3763 
3764 inline int8_t
3766  int32_t _length,
3767  const UChar *srcChars) const
3768 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
3769 
3770 inline int8_t
3772  int32_t _length,
3773  const UChar *srcChars,
3774  int32_t srcStart,
3775  int32_t srcLength) const
3776 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
3777 
3778 inline int8_t
3780  int32_t limit,
3781  const UnicodeString& srcText,
3782  int32_t srcStart,
3783  int32_t srcLimit) const
3784 { return doCompareCodePointOrder(start, limit - start,
3785  srcText, srcStart, srcLimit - srcStart); }
3786 
3787 inline int8_t
3788 UnicodeString::doCaseCompare(int32_t start,
3789  int32_t thisLength,
3790  const UnicodeString &srcText,
3791  int32_t srcStart,
3792  int32_t srcLength,
3793  uint32_t options) const
3794 {
3795  if(srcText.isBogus()) {
3796  return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3797  } else {
3798  srcText.pinIndices(srcStart, srcLength);
3799  return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
3800  }
3801 }
3802 
3803 inline int8_t
3804 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
3805  return doCaseCompare(0, length(), text, 0, text.length(), options);
3806 }
3807 
3808 inline int8_t
3810  int32_t _length,
3811  const UnicodeString &srcText,
3812  uint32_t options) const {
3813  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
3814 }
3815 
3816 inline int8_t
3818  int32_t srcLength,
3819  uint32_t options) const {
3820  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
3821 }
3822 
3823 inline int8_t
3825  int32_t _length,
3826  const UnicodeString &srcText,
3827  int32_t srcStart,
3828  int32_t srcLength,
3829  uint32_t options) const {
3830  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
3831 }
3832 
3833 inline int8_t
3835  int32_t _length,
3836  const UChar *srcChars,
3837  uint32_t options) const {
3838  return doCaseCompare(start, _length, srcChars, 0, _length, options);
3839 }
3840 
3841 inline int8_t
3843  int32_t _length,
3844  const UChar *srcChars,
3845  int32_t srcStart,
3846  int32_t srcLength,
3847  uint32_t options) const {
3848  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
3849 }
3850 
3851 inline int8_t
3853  int32_t limit,
3854  const UnicodeString &srcText,
3855  int32_t srcStart,
3856  int32_t srcLimit,
3857  uint32_t options) const {
3858  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
3859 }
3860 
3861 inline int32_t
3863  int32_t srcStart,
3864  int32_t srcLength,
3865  int32_t start,
3866  int32_t _length) const
3867 {
3868  if(!srcText.isBogus()) {
3869  srcText.pinIndices(srcStart, srcLength);
3870  if(srcLength > 0) {
3871  return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3872  }
3873  }
3874  return -1;
3875 }
3876 
3877 inline int32_t
3879 { return indexOf(text, 0, text.length(), 0, length()); }
3880 
3881 inline int32_t
3883  int32_t start) const {
3884  pinIndex(start);
3885  return indexOf(text, 0, text.length(), start, length() - start);
3886 }
3887 
3888 inline int32_t
3890  int32_t start,
3891  int32_t _length) const
3892 { return indexOf(text, 0, text.length(), start, _length); }
3893 
3894 inline int32_t
3896  int32_t srcLength,
3897  int32_t start) const {
3898  pinIndex(start);
3899  return indexOf(srcChars, 0, srcLength, start, length() - start);
3900 }
3901 
3902 inline int32_t
3904  int32_t srcLength,
3905  int32_t start,
3906  int32_t _length) const
3907 { return indexOf(srcChars, 0, srcLength, start, _length); }
3908 
3909 inline int32_t
3911  int32_t start,
3912  int32_t _length) const
3913 { return doIndexOf(c, start, _length); }
3914 
3915 inline int32_t
3917  int32_t start,
3918  int32_t _length) const
3919 { return doIndexOf(c, start, _length); }
3920 
3921 inline int32_t
3923 { return doIndexOf(c, 0, length()); }
3924 
3925 inline int32_t
3927 { return indexOf(c, 0, length()); }
3928 
3929 inline int32_t
3931  int32_t start) const {
3932  pinIndex(start);
3933  return doIndexOf(c, start, length() - start);
3934 }
3935 
3936 inline int32_t
3938  int32_t start) const {
3939  pinIndex(start);
3940  return indexOf(c, start, length() - start);
3941 }
3942 
3943 inline int32_t
3945  int32_t srcLength,
3946  int32_t start,
3947  int32_t _length) const
3948 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
3949 
3950 inline int32_t
3952  int32_t srcLength,
3953  int32_t start) const {
3954  pinIndex(start);
3955  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
3956 }
3957 
3958 inline int32_t
3960  int32_t srcStart,
3961  int32_t srcLength,
3962  int32_t start,
3963  int32_t _length) const
3964 {
3965  if(!srcText.isBogus()) {
3966  srcText.pinIndices(srcStart, srcLength);
3967  if(srcLength > 0) {
3968  return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
3969  }
3970  }
3971  return -1;
3972 }
3973 
3974 inline int32_t
3976  int32_t start,
3977  int32_t _length) const
3978 { return lastIndexOf(text, 0, text.length(), start, _length); }
3979 
3980 inline int32_t
3982  int32_t start) const {
3983  pinIndex(start);
3984  return lastIndexOf(text, 0, text.length(), start, length() - start);
3985 }
3986 
3987 inline int32_t
3989 { return lastIndexOf(text, 0, text.length(), 0, length()); }
3990 
3991 inline int32_t
3993  int32_t start,
3994  int32_t _length) const
3995 { return doLastIndexOf(c, start, _length); }
3996 
3997 inline int32_t
3999  int32_t start,
4000  int32_t _length) const {
4001  return doLastIndexOf(c, start, _length);
4002 }
4003 
4004 inline int32_t
4006 { return doLastIndexOf(c, 0, length()); }
4007 
4008 inline int32_t
4010  return lastIndexOf(c, 0, length());
4011 }
4012 
4013 inline int32_t
4015  int32_t start) const {
4016  pinIndex(start);
4017  return doLastIndexOf(c, start, length() - start);
4018 }
4019 
4020 inline int32_t
4022  int32_t start) const {
4023  pinIndex(start);
4024  return lastIndexOf(c, start, length() - start);
4025 }
4026 
4027 inline UBool
4029 { return compare(0, text.length(), text, 0, text.length()) == 0; }
4030 
4031 inline UBool
4033  int32_t srcStart,
4034  int32_t srcLength) const
4035 { return doCompare(0, srcLength, srcText, srcStart, srcLength) == 0; }
4036 
4037 inline UBool
4038 UnicodeString::startsWith(const UChar *srcChars, int32_t srcLength) const {
4039  if(srcLength < 0) {
4040  srcLength = u_strlen(srcChars);
4041  }
4042  return doCompare(0, srcLength, srcChars, 0, srcLength) == 0;
4043 }
4044 
4045 inline UBool
4046 UnicodeString::startsWith(const UChar *srcChars, int32_t srcStart, int32_t srcLength) const {
4047  if(srcLength < 0) {
4048  srcLength = u_strlen(srcChars);
4049  }
4050  return doCompare(0, srcLength, srcChars, srcStart, srcLength) == 0;
4051 }
4052 
4053 inline UBool
4055 { return doCompare(length() - text.length(), text.length(),
4056  text, 0, text.length()) == 0; }
4057 
4058 inline UBool
4060  int32_t srcStart,
4061  int32_t srcLength) const {
4062  srcText.pinIndices(srcStart, srcLength);
4063  return doCompare(length() - srcLength, srcLength,
4064  srcText, srcStart, srcLength) == 0;
4065 }
4066 
4067 inline UBool
4069  int32_t srcLength) const {
4070  if(srcLength < 0) {
4071  srcLength = u_strlen(srcChars);
4072  }
4073  return doCompare(length() - srcLength, srcLength,
4074  srcChars, 0, srcLength) == 0;
4075 }
4076 
4077 inline UBool
4079  int32_t srcStart,
4080  int32_t srcLength) const {
4081  if(srcLength < 0) {
4082  srcLength = u_strlen(srcChars + srcStart);
4083  }
4084  return doCompare(length() - srcLength, srcLength,
4085  srcChars, srcStart, srcLength) == 0;
4086 }
4087 
4088 //========================================
4089 // replace
4090 //========================================
4091 inline UnicodeString&
4093  int32_t _length,
4094  const UnicodeString& srcText)
4095 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4096 
4097 inline UnicodeString&
4099  int32_t _length,
4100  const UnicodeString& srcText,
4101  int32_t srcStart,
4102  int32_t srcLength)
4103 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4104 
4105 inline UnicodeString&
4107  int32_t _length,
4108  const UChar *srcChars,
4109  int32_t srcLength)
4110 { return doReplace(start, _length, srcChars, 0, srcLength); }
4111 
4112 inline UnicodeString&
4114  int32_t _length,
4115  const UChar *srcChars,
4116  int32_t srcStart,
4117  int32_t srcLength)
4118 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4119 
4120 inline UnicodeString&
4122  int32_t _length,
4123  UChar srcChar)
4124 { return doReplace(start, _length, &srcChar, 0, 1); }
4125 
4126 inline UnicodeString&
4128  int32_t limit,
4129  const UnicodeString& srcText)
4130 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4131 
4132 inline UnicodeString&
4134  int32_t limit,
4135  const UnicodeString& srcText,
4136  int32_t srcStart,
4137  int32_t srcLimit)
4138 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4139 
4140 inline UnicodeString&
4142  const UnicodeString& newText)
4143 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4144  newText, 0, newText.length()); }
4145 
4146 inline UnicodeString&
4148  int32_t _length,
4149  const UnicodeString& oldText,
4150  const UnicodeString& newText)
4151 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4152  newText, 0, newText.length()); }
4153 
4154 // ============================
4155 // extract
4156 // ============================
4157 inline void
4158 UnicodeString::doExtract(int32_t start,
4159  int32_t _length,
4160  UnicodeString& target) const
4161 { target.replace(0, target.length(), *this, start, _length); }
4162 
4163 inline void
4165  int32_t _length,
4166  UChar *target,
4167  int32_t targetStart) const
4168 { doExtract(start, _length, target, targetStart); }
4169 
4170 inline void
4172  int32_t _length,
4173  UnicodeString& target) const
4174 { doExtract(start, _length, target); }
4175 
4176 #if !UCONFIG_NO_CONVERSION
4177 
4178 inline int32_t
4180  int32_t _length,
4181  char *dst,
4182  const char *codepage) const
4183 
4184 {
4185  // This dstSize value will be checked explicitly
4186  return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4187 }
4188 
4189 #endif
4190 
4191 inline void
4193  int32_t limit,
4194  UChar *dst,
4195  int32_t dstStart) const {
4196  pinIndex(start);
4197  pinIndex(limit);
4198  doExtract(start, limit - start, dst, dstStart);
4199 }
4200 
4201 inline UnicodeString
4202 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4203  return tempSubString(start, limit - start);
4204 }
4205 
4206 inline UChar
4207 UnicodeString::doCharAt(int32_t offset) const
4208 {
4209  if((uint32_t)offset < (uint32_t)length()) {
4210  return getArrayStart()[offset];
4211  } else {
4212  return kInvalidUChar;
4213  }
4214 }
4215 
4216 inline UChar
4217 UnicodeString::charAt(int32_t offset) const
4218 { return doCharAt(offset); }
4219 
4220 inline UChar
4221 UnicodeString::operator[] (int32_t offset) const
4222 { return doCharAt(offset); }
4223 
4224 inline UBool
4226  return fShortLength == 0;
4227 }
4228 
4229 //========================================
4230 // Write implementation methods
4231 //========================================
4232 inline void
4233 UnicodeString::setLength(int32_t len) {
4234  if(len <= 127) {
4235  fShortLength = (int8_t)len;
4236  } else {
4237  fShortLength = (int8_t)-1;
4238  fUnion.fFields.fLength = len;
4239  }
4240 }
4241 
4242 inline void
4243 UnicodeString::setToEmpty() {
4244  fShortLength = 0;
4245  fFlags = kShortString;
4246 }
4247 
4248 inline void
4249 UnicodeString::setArray(UChar *array, int32_t len, int32_t capacity) {
4250  setLength(len);
4251  fUnion.fFields.fArray = array;
4252  fUnion.fFields.fCapacity = capacity;
4253 }
4254 
4255 inline const UChar *
4257  if(!isWritable()) {
4258  return 0;
4259  } else {
4260  UChar *array = getArrayStart();
4261  int32_t len = length();
4262  if(len < getCapacity() && ((fFlags&kRefCounted) == 0 || refCount() == 1)) {
4263  /*
4264  * kRefCounted: Do not write the NUL if the buffer is shared.
4265  * That is mostly safe, except when the length of one copy was modified
4266  * without copy-on-write, e.g., via truncate(newLength) or remove(void).
4267  * Then the NUL would be written into the middle of another copy's string.
4268  */
4269  if(!(fFlags&kBufferIsReadonly)) {
4270  /*
4271  * We must not write to a readonly buffer, but it is known to be
4272  * NUL-terminated if len<capacity.
4273  * A shared, allocated buffer (refCount()>1) must not have its contents
4274  * modified, but the NUL at [len] is beyond the string contents,
4275  * and multiple string objects and threads writing the same NUL into the
4276  * same location is harmless.
4277  * In all other cases, the buffer is fully writable and it is anyway safe
4278  * to write the NUL.
4279  *
4280  * Note: An earlier version of this code tested whether there is a NUL
4281  * at [len] already, but, while safe, it generated lots of warnings from
4282  * tools like valgrind and Purify.
4283  */
4284  array[len] = 0;
4285  }
4286  return array;
4287  } else if(cloneArrayIfNeeded(len+1)) {
4288  array = getArrayStart();
4289  array[len] = 0;
4290  return array;
4291  } else {
4292  return 0;
4293  }
4294  }
4295 }
4296 
4297 inline UnicodeString&
4299 { return doReplace(0, length(), &ch, 0, 1); }
4300 
4301 inline UnicodeString&
4303 { return replace(0, length(), ch); }
4304 
4305 inline UnicodeString&
4307  int32_t srcStart,
4308  int32_t srcLength)
4309 {
4310  unBogus();
4311  return doReplace(0, length(), srcText, srcStart, srcLength);
4312 }
4313 
4314 inline UnicodeString&
4316  int32_t srcStart)
4317 {
4318  unBogus();
4319  srcText.pinIndex(srcStart);
4320  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4321 }
4322 
4323 inline UnicodeString&
4325 {
4326  return copyFrom(srcText);
4327 }
4328 
4329 inline UnicodeString&
4330 UnicodeString::setTo(const UChar *srcChars,
4331  int32_t srcLength)
4332 {
4333  unBogus();
4334  return doReplace(0, length(), srcChars, 0, srcLength);
4335 }
4336 
4337 inline UnicodeString&
4339 {
4340  unBogus();
4341  return doReplace(0, length(), &srcChar, 0, 1);
4342 }
4343 
4344 inline UnicodeString&
4346 {
4347  unBogus();
4348  return replace(0, length(), srcChar);
4349 }
4350 
4351 inline UnicodeString&
4353  int32_t srcStart,
4354  int32_t srcLength)
4355 { return doReplace(length(), 0, srcText, srcStart, srcLength); }
4356 
4357 inline UnicodeString&
4359 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4360 
4361 inline UnicodeString&
4363  int32_t srcStart,
4364  int32_t srcLength)
4365 { return doReplace(length(), 0, srcChars, srcStart, srcLength); }
4366 
4367 inline UnicodeString&
4369  int32_t srcLength)
4370 { return doReplace(length(), 0, srcChars, 0, srcLength); }
4371 
4372 inline UnicodeString&
4374 { return doReplace(length(), 0, &srcChar, 0, 1); }
4375 
4376 inline UnicodeString&
4378 { return doReplace(length(), 0, &ch, 0, 1); }
4379 
4380 inline UnicodeString&
4382  return append(ch);
4383 }
4384 
4385 inline UnicodeString&
4387 { return doReplace(length(), 0, srcText, 0, srcText.length()); }
4388 
4389 inline UnicodeString&
4391  const UnicodeString& srcText,
4392  int32_t srcStart,
4393  int32_t srcLength)
4394 { return doReplace(start, 0, srcText, srcStart, srcLength); }
4395 
4396 inline UnicodeString&
4398  const UnicodeString& srcText)
4399 { return doReplace(start, 0, srcText, 0, srcText.length()); }
4400 
4401 inline UnicodeString&
4403  const UChar *srcChars,
4404  int32_t srcStart,
4405  int32_t srcLength)
4406 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
4407 
4408 inline UnicodeString&
4410  const UChar *srcChars,
4411  int32_t srcLength)
4412 { return doReplace(start, 0, srcChars, 0, srcLength); }
4413 
4414 inline UnicodeString&
4416  UChar srcChar)
4417 { return doReplace(start, 0, &srcChar, 0, 1); }
4418 
4419 inline UnicodeString&
4421  UChar32 srcChar)
4422 { return replace(start, 0, srcChar); }
4423 
4424 
4425 inline UnicodeString&
4427 {
4428  // remove() of a bogus string makes the string empty and non-bogus
4429  // we also un-alias a read-only alias to deal with NUL-termination
4430  // issues with getTerminatedBuffer()
4431  if(fFlags & (kIsBogus|kBufferIsReadonly)) {
4432  setToEmpty();
4433  } else {
4434  fShortLength = 0;
4435  }
4436  return *this;
4437 }
4438 
4439 inline UnicodeString&
4441  int32_t _length)
4442 {
4443  if(start <= 0 && _length == INT32_MAX) {
4444  // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4445  return remove();
4446  }
4447  return doReplace(start, _length, NULL, 0, 0);
4448 }
4449 
4450 inline UnicodeString&
4452  int32_t limit)
4453 { return doReplace(start, limit - start, NULL, 0, 0); }
4454 
4455 inline UnicodeString &
4456 UnicodeString::retainBetween(int32_t start, int32_t limit) {
4457  truncate(limit);
4458  return doReplace(0, start, NULL, 0, 0);
4459 }
4460 
4461 inline UBool
4462 UnicodeString::truncate(int32_t targetLength)
4463 {
4464  if(isBogus() && targetLength == 0) {
4465  // truncate(0) of a bogus string makes the string empty and non-bogus
4466  unBogus();
4467  return FALSE;
4468  } else if((uint32_t)targetLength < (uint32_t)length()) {
4469  setLength(targetLength);
4470  if(fFlags&kBufferIsReadonly) {
4471  fUnion.fFields.fCapacity = targetLength; // not NUL-terminated any more
4472  }
4473  return TRUE;
4474  } else {
4475  return FALSE;
4476  }
4477 }
4478 
4479 inline UnicodeString&
4481 { return doReverse(0, length()); }
4482 
4483 inline UnicodeString&
4485  int32_t _length)
4486 { return doReverse(start, _length); }
4487 
4489 
4490 #endif