00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011 #ifndef UNICODESET_H
00012 #define UNICODESET_H
00013
00014 #include "unicode/unifilt.h"
00015 #include "unicode/unistr.h"
00016 #include "unicode/uset.h"
00017
00023 U_NAMESPACE_BEGIN
00024
00025 class BMPSet;
00026 class ParsePosition;
00027 class SymbolTable;
00028 class UnicodeSetStringSpan;
00029 class UVector;
00030 class RuleCharacterIterator;
00031
00263 class U_COMMON_API UnicodeSet : public UnicodeFilter {
00264
00265 int32_t len;
00266 int32_t capacity;
00267 UChar32* list;
00268 BMPSet *bmpSet;
00269 UChar32* buffer;
00270 int32_t bufferCapacity;
00271 int32_t patLen;
00272
00282 UChar *pat;
00283 UVector* strings;
00284 UnicodeSetStringSpan *stringSpan;
00285
00286 public:
00287
00288 enum {
00293 MIN_VALUE = 0,
00294
00299 MAX_VALUE = 0x10ffff
00300 };
00301
00302
00303
00304
00305
00306 public:
00307
00312 UnicodeSet();
00313
00322 UnicodeSet(UChar32 start, UChar32 end);
00323
00332 UnicodeSet(const UnicodeString& pattern,
00333 UErrorCode& status);
00334
00347 UnicodeSet(const UnicodeString& pattern,
00348 uint32_t options,
00349 const SymbolTable* symbols,
00350 UErrorCode& status);
00351
00365 UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
00366 uint32_t options,
00367 const SymbolTable* symbols,
00368 UErrorCode& status);
00369
00374 UnicodeSet(const UnicodeSet& o);
00375
00380 virtual ~UnicodeSet();
00381
00387 UnicodeSet& operator=(const UnicodeSet& o);
00388
00400 virtual UBool operator==(const UnicodeSet& o) const;
00401
00407 UBool operator!=(const UnicodeSet& o) const;
00408
00418 virtual UnicodeFunctor* clone() const;
00419
00427 virtual int32_t hashCode(void) const;
00428
00429
00430
00431
00432
00441 inline UBool isFrozen() const;
00442
00456 UnicodeFunctor *freeze();
00457
00466 UnicodeFunctor *cloneAsThawed() const;
00467
00468
00469
00470
00471
00482 UnicodeSet& set(UChar32 start, UChar32 end);
00483
00489 static UBool resemblesPattern(const UnicodeString& pattern,
00490 int32_t pos);
00491
00504 UnicodeSet& applyPattern(const UnicodeString& pattern,
00505 UErrorCode& status);
00506
00523 UnicodeSet& applyPattern(const UnicodeString& pattern,
00524 uint32_t options,
00525 const SymbolTable* symbols,
00526 UErrorCode& status);
00527
00559 UnicodeSet& applyPattern(const UnicodeString& pattern,
00560 ParsePosition& pos,
00561 uint32_t options,
00562 const SymbolTable* symbols,
00563 UErrorCode& status);
00564
00578 virtual UnicodeString& toPattern(UnicodeString& result,
00579 UBool escapeUnprintable = FALSE) const;
00580
00603 UnicodeSet& applyIntPropertyValue(UProperty prop,
00604 int32_t value,
00605 UErrorCode& ec);
00606
00636 UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
00637 const UnicodeString& value,
00638 UErrorCode& ec);
00639
00648 virtual int32_t size(void) const;
00649
00656 virtual UBool isEmpty(void) const;
00657
00665 virtual UBool contains(UChar32 c) const;
00666
00675 virtual UBool contains(UChar32 start, UChar32 end) const;
00676
00684 UBool contains(const UnicodeString& s) const;
00685
00693 virtual UBool containsAll(const UnicodeSet& c) const;
00694
00702 UBool containsAll(const UnicodeString& s) const;
00703
00712 UBool containsNone(UChar32 start, UChar32 end) const;
00713
00721 UBool containsNone(const UnicodeSet& c) const;
00722
00730 UBool containsNone(const UnicodeString& s) const;
00731
00740 inline UBool containsSome(UChar32 start, UChar32 end) const;
00741
00749 inline UBool containsSome(const UnicodeSet& s) const;
00750
00758 inline UBool containsSome(const UnicodeString& s) const;
00759
00778 int32_t span(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00779
00797 int32_t spanBack(const UChar *s, int32_t length, USetSpanCondition spanCondition) const;
00798
00817 int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00818
00836 int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
00837
00842 virtual UMatchDegree matches(const Replaceable& text,
00843 int32_t& offset,
00844 int32_t limit,
00845 UBool incremental);
00846
00847 private:
00869 static int32_t matchRest(const Replaceable& text,
00870 int32_t start, int32_t limit,
00871 const UnicodeString& s);
00872
00882 int32_t findCodePoint(UChar32 c) const;
00883
00884 public:
00885
00893 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
00894
00903 int32_t indexOf(UChar32 c) const;
00904
00914 UChar32 charAt(int32_t index) const;
00915
00930 virtual UnicodeSet& add(UChar32 start, UChar32 end);
00931
00939 UnicodeSet& add(UChar32 c);
00940
00952 UnicodeSet& add(const UnicodeString& s);
00953
00954 private:
00960 static int32_t getSingleCP(const UnicodeString& s);
00961
00962 void _add(const UnicodeString& s);
00963
00964 public:
00973 UnicodeSet& addAll(const UnicodeString& s);
00974
00983 UnicodeSet& retainAll(const UnicodeString& s);
00984
00993 UnicodeSet& complementAll(const UnicodeString& s);
00994
01003 UnicodeSet& removeAll(const UnicodeString& s);
01004
01013 static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
01014
01015
01023 static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
01024
01038 virtual UnicodeSet& retain(UChar32 start, UChar32 end);
01039
01040
01046 UnicodeSet& retain(UChar32 c);
01047
01061 virtual UnicodeSet& remove(UChar32 start, UChar32 end);
01062
01070 UnicodeSet& remove(UChar32 c);
01071
01081 UnicodeSet& remove(const UnicodeString& s);
01082
01090 virtual UnicodeSet& complement(void);
01091
01106 virtual UnicodeSet& complement(UChar32 start, UChar32 end);
01107
01115 UnicodeSet& complement(UChar32 c);
01116
01127 UnicodeSet& complement(const UnicodeString& s);
01128
01141 virtual UnicodeSet& addAll(const UnicodeSet& c);
01142
01154 virtual UnicodeSet& retainAll(const UnicodeSet& c);
01155
01167 virtual UnicodeSet& removeAll(const UnicodeSet& c);
01168
01179 virtual UnicodeSet& complementAll(const UnicodeSet& c);
01180
01187 virtual UnicodeSet& clear(void);
01188
01214 UnicodeSet& closeOver(int32_t attribute);
01215
01223 virtual int32_t getRangeCount(void) const;
01224
01232 virtual UChar32 getRangeStart(int32_t index) const;
01233
01241 virtual UChar32 getRangeEnd(int32_t index) const;
01242
01291 int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
01292
01299 virtual UnicodeSet& compact();
01300
01312 static UClassID U_EXPORT2 getStaticClassID(void);
01313
01322 virtual UClassID getDynamicClassID(void) const;
01323
01324 private:
01325
01326
01327
01328 friend class USetAccess;
01329
01330 int32_t getStringCount() const;
01331
01332 const UnicodeString* getString(int32_t index) const;
01333
01334
01335
01336
01337
01338 private:
01339
01345 virtual UBool matchesIndexValue(uint8_t v) const;
01346
01347 private:
01348
01349
01350
01351
01352
01353 UnicodeSet(const UnicodeSet& o, UBool );
01354
01355
01356
01357
01358
01359 void applyPattern(RuleCharacterIterator& chars,
01360 const SymbolTable* symbols,
01361 UnicodeString& rebuiltPat,
01362 uint32_t options,
01363 UErrorCode& ec);
01364
01365
01366
01367
01368
01369 void ensureCapacity(int32_t newLen);
01370
01371 void ensureBufferCapacity(int32_t newLen);
01372
01373 void swapBuffers(void);
01374
01375 UBool allocateStrings(UErrorCode &status);
01376
01377 UnicodeString& _toPattern(UnicodeString& result,
01378 UBool escapeUnprintable) const;
01379
01380 UnicodeString& _generatePattern(UnicodeString& result,
01381 UBool escapeUnprintable) const;
01382
01383 static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
01384
01385 static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
01386
01387
01388
01389
01390
01391 void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
01392
01393 void add(const UChar32* other, int32_t otherLen, int8_t polarity);
01394
01395 void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
01396
01402 static UBool resemblesPropertyPattern(const UnicodeString& pattern,
01403 int32_t pos);
01404
01405 static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
01406 int32_t iterOpts);
01407
01446 UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
01447 ParsePosition& ppos,
01448 UErrorCode &ec);
01449
01450 void applyPropertyPattern(RuleCharacterIterator& chars,
01451 UnicodeString& rebuiltPat,
01452 UErrorCode& ec);
01453
01458 typedef UBool (*Filter)(UChar32 codePoint, void* context);
01459
01469 void applyFilter(Filter filter,
01470 void* context,
01471 int32_t src,
01472 UErrorCode &status);
01473
01477 void setPattern(const UnicodeString& newPat);
01481 void releasePattern();
01482
01483 friend class UnicodeSetIterator;
01484 };
01485
01486 inline UBool UnicodeSet::operator!=(const UnicodeSet& o) const {
01487 return !operator==(o);
01488 }
01489
01490 inline UBool UnicodeSet::isFrozen() const {
01491 return (UBool)(bmpSet!=NULL || stringSpan!=NULL);
01492 }
01493
01494 inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
01495 return !containsNone(start, end);
01496 }
01497
01498 inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
01499 return !containsNone(s);
01500 }
01501
01502 inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
01503 return !containsNone(s);
01504 }
01505
01506 U_NAMESPACE_END
01507
01508 #endif