ICU 56.1  56.1
uset.h
Go to the documentation of this file.
1 /*
2 *******************************************************************************
3 *
4 * Copyright (C) 2002-2014, International Business Machines
5 * Corporation and others. All Rights Reserved.
6 *
7 *******************************************************************************
8 * file name: uset.h
9 * encoding: US-ASCII
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2002mar07
14 * created by: Markus W. Scherer
15 *
16 * C version of UnicodeSet.
17 */
18 
19 
27 #ifndef __USET_H__
28 #define __USET_H__
29 
30 #include "unicode/utypes.h"
31 #include "unicode/uchar.h"
32 #include "unicode/localpointer.h"
33 
34 #ifndef UCNV_H
35 struct USet;
41 typedef struct USet USet;
42 #endif
43 
49 enum {
55 
83 
93 };
94 
150 typedef enum USetSpanCondition {
205 
206 enum {
214 };
215 
221 typedef struct USerializedSet {
226  const uint16_t *array;
231  int32_t bmpLength;
236  int32_t length;
243 
244 /*********************************************************************
245  * USet API
246  *********************************************************************/
247 
255 U_STABLE USet* U_EXPORT2
256 uset_openEmpty(void);
257 
268 U_STABLE USet* U_EXPORT2
269 uset_open(UChar32 start, UChar32 end);
270 
280 U_STABLE USet* U_EXPORT2
281 uset_openPattern(const UChar* pattern, int32_t patternLength,
282  UErrorCode* ec);
283 
295 U_STABLE USet* U_EXPORT2
296 uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
297  uint32_t options,
298  UErrorCode* ec);
299 
306 U_STABLE void U_EXPORT2
307 uset_close(USet* set);
308 
309 #if U_SHOW_CPLUSPLUS_API
310 
312 
323 
325 
326 #endif
327 
337 U_STABLE USet * U_EXPORT2
338 uset_clone(const USet *set);
339 
349 U_STABLE UBool U_EXPORT2
350 uset_isFrozen(const USet *set);
351 
366 U_STABLE void U_EXPORT2
367 uset_freeze(USet *set);
368 
379 U_STABLE USet * U_EXPORT2
380 uset_cloneAsThawed(const USet *set);
381 
391 U_STABLE void U_EXPORT2
392 uset_set(USet* set,
393  UChar32 start, UChar32 end);
394 
416 U_STABLE int32_t U_EXPORT2
418  const UChar *pattern, int32_t patternLength,
419  uint32_t options,
420  UErrorCode *status);
421 
444 U_STABLE void U_EXPORT2
446  UProperty prop, int32_t value, UErrorCode* ec);
447 
483 U_STABLE void U_EXPORT2
485  const UChar *prop, int32_t propLength,
486  const UChar *value, int32_t valueLength,
487  UErrorCode* ec);
488 
498 U_STABLE UBool U_EXPORT2
499 uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
500  int32_t pos);
501 
517 U_STABLE int32_t U_EXPORT2
518 uset_toPattern(const USet* set,
519  UChar* result, int32_t resultCapacity,
520  UBool escapeUnprintable,
521  UErrorCode* ec);
522 
531 U_STABLE void U_EXPORT2
532 uset_add(USet* set, UChar32 c);
533 
546 U_STABLE void U_EXPORT2
547 uset_addAll(USet* set, const USet *additionalSet);
548 
558 U_STABLE void U_EXPORT2
559 uset_addRange(USet* set, UChar32 start, UChar32 end);
560 
570 U_STABLE void U_EXPORT2
571 uset_addString(USet* set, const UChar* str, int32_t strLen);
572 
582 U_STABLE void U_EXPORT2
583 uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
584 
593 U_STABLE void U_EXPORT2
594 uset_remove(USet* set, UChar32 c);
595 
605 U_STABLE void U_EXPORT2
606 uset_removeRange(USet* set, UChar32 start, UChar32 end);
607 
617 U_STABLE void U_EXPORT2
618 uset_removeString(USet* set, const UChar* str, int32_t strLen);
619 
631 U_STABLE void U_EXPORT2
632 uset_removeAll(USet* set, const USet* removeSet);
633 
648 U_STABLE void U_EXPORT2
649 uset_retain(USet* set, UChar32 start, UChar32 end);
650 
663 U_STABLE void U_EXPORT2
664 uset_retainAll(USet* set, const USet* retain);
665 
674 U_STABLE void U_EXPORT2
675 uset_compact(USet* set);
676 
685 U_STABLE void U_EXPORT2
686 uset_complement(USet* set);
687 
699 U_STABLE void U_EXPORT2
700 uset_complementAll(USet* set, const USet* complement);
701 
709 U_STABLE void U_EXPORT2
710 uset_clear(USet* set);
711 
738 U_STABLE void U_EXPORT2
739 uset_closeOver(USet* set, int32_t attributes);
740 
747 U_STABLE void U_EXPORT2
749 
757 U_STABLE UBool U_EXPORT2
758 uset_isEmpty(const USet* set);
759 
768 U_STABLE UBool U_EXPORT2
769 uset_contains(const USet* set, UChar32 c);
770 
780 U_STABLE UBool U_EXPORT2
781 uset_containsRange(const USet* set, UChar32 start, UChar32 end);
782 
791 U_STABLE UBool U_EXPORT2
792 uset_containsString(const USet* set, const UChar* str, int32_t strLen);
793 
804 U_STABLE int32_t U_EXPORT2
805 uset_indexOf(const USet* set, UChar32 c);
806 
817 U_STABLE UChar32 U_EXPORT2
818 uset_charAt(const USet* set, int32_t charIndex);
819 
828 U_STABLE int32_t U_EXPORT2
829 uset_size(const USet* set);
830 
839 U_STABLE int32_t U_EXPORT2
840 uset_getItemCount(const USet* set);
841 
860 U_STABLE int32_t U_EXPORT2
861 uset_getItem(const USet* set, int32_t itemIndex,
862  UChar32* start, UChar32* end,
863  UChar* str, int32_t strCapacity,
864  UErrorCode* ec);
865 
874 U_STABLE UBool U_EXPORT2
875 uset_containsAll(const USet* set1, const USet* set2);
876 
887 U_STABLE UBool U_EXPORT2
888 uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
889 
898 U_STABLE UBool U_EXPORT2
899 uset_containsNone(const USet* set1, const USet* set2);
900 
909 U_STABLE UBool U_EXPORT2
910 uset_containsSome(const USet* set1, const USet* set2);
911 
931 U_STABLE int32_t U_EXPORT2
932 uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
933 
952 U_STABLE int32_t U_EXPORT2
953 uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
954 
974 U_STABLE int32_t U_EXPORT2
975 uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
976 
995 U_STABLE int32_t U_EXPORT2
996 uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
997 
1006 U_STABLE UBool U_EXPORT2
1007 uset_equals(const USet* set1, const USet* set2);
1008 
1009 /*********************************************************************
1010  * Serialized set API
1011  *********************************************************************/
1012 
1062 U_STABLE int32_t U_EXPORT2
1063 uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1064 
1073 U_STABLE UBool U_EXPORT2
1074 uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1075 
1083 U_STABLE void U_EXPORT2
1085 
1094 U_STABLE UBool U_EXPORT2
1096 
1106 U_STABLE int32_t U_EXPORT2
1108 
1122 U_STABLE UBool U_EXPORT2
1123 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1124  UChar32* pStart, UChar32* pEnd);
1125 
1126 #endif
UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns TRUE if the given USet contains all characters c where start <= c && c <= end...
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition: uset.h:241
void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
void uset_clear(USet *set)
Removes all of the elements from this set.
UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
USet * uset_clone(const USet *set)
Returns a copy of this object.
int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they&#39;re not already present.
Spans the longest substring that is a concatenation of set elements (characters or strings)...
Definition: uset.h:178
UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
One more than the last span condition.
Definition: uset.h:203
void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
const uint16_t * array
The serialized Unicode Set.
Definition: uset.h:226
int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in the given USet.
USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
"Smart pointer" class, closes a USet via uset_close().
void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
UBool uset_contains(const USet *set, UChar32 c)
Returns TRUE if the given USet contains the given character.
UBool uset_isEmpty(const USet *set)
Returns TRUE if the given USet contains no characters and no strings.
int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
void uset_removeAllStrings(USet *set)
Remove all strings from this set.
void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property...
void uset_freeze(USet *set)
Freeze the set (make it immutable).
Ignore white space within patterns unless quoted or escaped.
Definition: uset.h:54
USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
#define U_NAMESPACE_BEGIN
This is used to begin a declaration of a public ICU C++ API.
Definition: uversion.h:129
USet * uset_openEmpty(void)
Create an empty USet object.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:535
UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:332
void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns TRUE if the given USet contains the given string.
int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set...
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition: uset.h:150
C API: Unicode Properties.
int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
uint16_t UChar
Define UChar to be UCHAR_TYPE, if that is #defined (for example, to char16_t), or wchar_t if that is ...
Definition: umachine.h:312
void uset_complement(USet *set)
Inverts this set.
#define U_NAMESPACE_END
This is used to end a declaration of a public ICU C++ API.
Definition: uversion.h:130
UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
struct USet USet
Definition: ucnv.h:67
int32_t length
The total length of the array.
Definition: uset.h:236
void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
UProperty
Selection constants for Unicode properties.
Definition: uchar.h:161
UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns TRUE if the given USerializedSet contains the given character.
int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
UErrorCode
Error code to replace exception handling, so that the code is compatible with all C++ compilers...
Definition: utypes.h:476
UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
Enable case insensitive matching.
Definition: uset.h:82
void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
Enable case insensitive matching.
Definition: uset.h:92
int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
Basic definitions for ICU, for both C and C++ APIs.
int32_t bmpLength
The length of the array that contains BMP characters.
Definition: uset.h:231
A serialized form of a Unicode set.
Definition: uset.h:221
Continues a span() while there is a set element at the current position.
Definition: uset.h:198
void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
Capacity of USerializedSet::staticArray.
Definition: uset.h:213
void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
Continues a span() while there is no set element at the current position.
Definition: uset.h:163
#define U_STABLE
This is used to declare a function as a stable public ICU C API.
Definition: umachine.h:109
int8_t UBool
The ICU boolean type.
Definition: umachine.h:234
void uset_close(USet *set)
Disposes of the storage used by a USet object.