ICU 76.1 76.1
uset.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2002-2014, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: uset.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2002mar07
16* created by: Markus W. Scherer
17*
18* C version of UnicodeSet.
19*/
20
21
29#ifndef __USET_H__
30#define __USET_H__
31
32#include "unicode/utypes.h"
33#include "unicode/uchar.h"
34
35#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
36#include <string>
37#include <string_view>
38#include "unicode/char16ptr.h"
40#include "unicode/utf16.h"
41#endif
42
43#ifndef USET_DEFINED
44
45#ifndef U_IN_DOXYGEN
46#define USET_DEFINED
47#endif
54typedef struct USet USet;
55#endif
56
68enum {
74
102
115
130
186typedef enum USetSpanCondition {
235#ifndef U_HIDE_DEPRECATED_API
241#endif // U_HIDE_DEPRECATED_API
243
244enum {
253
281
282/*********************************************************************
283 * USet API
284 *********************************************************************/
285
293U_CAPI USet* U_EXPORT2
295
306U_CAPI USet* U_EXPORT2
308
318U_CAPI USet* U_EXPORT2
319uset_openPattern(const UChar* pattern, int32_t patternLength,
320 UErrorCode* ec);
321
335U_CAPI USet* U_EXPORT2
336uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
337 uint32_t options,
338 UErrorCode* ec);
339
346U_CAPI void U_EXPORT2
348
349#if U_SHOW_CPLUSPLUS_API
350
351U_NAMESPACE_BEGIN
352
363
364U_NAMESPACE_END
365
366#endif
367
377U_CAPI USet * U_EXPORT2
378uset_clone(const USet *set);
379
389U_CAPI UBool U_EXPORT2
390uset_isFrozen(const USet *set);
391
406U_CAPI void U_EXPORT2
408
419U_CAPI USet * U_EXPORT2
421
431U_CAPI void U_EXPORT2
433 UChar32 start, UChar32 end);
434
459U_CAPI int32_t U_EXPORT2
461 const UChar *pattern, int32_t patternLength,
462 uint32_t options,
463 UErrorCode *status);
464
487U_CAPI void U_EXPORT2
489 UProperty prop, int32_t value, UErrorCode* ec);
490
526U_CAPI void U_EXPORT2
528 const UChar *prop, int32_t propLength,
529 const UChar *value, int32_t valueLength,
530 UErrorCode* ec);
531
541U_CAPI UBool U_EXPORT2
542uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
543 int32_t pos);
544
560U_CAPI int32_t U_EXPORT2
562 UChar* result, int32_t resultCapacity,
563 UBool escapeUnprintable,
564 UErrorCode* ec);
565
574U_CAPI void U_EXPORT2
576
589U_CAPI void U_EXPORT2
590uset_addAll(USet* set, const USet *additionalSet);
591
601U_CAPI void U_EXPORT2
603
613U_CAPI void U_EXPORT2
614uset_addString(USet* set, const UChar* str, int32_t strLen);
615
625U_CAPI void U_EXPORT2
626uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
627
636U_CAPI void U_EXPORT2
638
648U_CAPI void U_EXPORT2
650
660U_CAPI void U_EXPORT2
661uset_removeString(USet* set, const UChar* str, int32_t strLen);
662
672U_CAPI void U_EXPORT2
673uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
674
686U_CAPI void U_EXPORT2
687uset_removeAll(USet* set, const USet* removeSet);
688
701U_CAPI void U_EXPORT2
702uset_retain(USet* set, UChar32 start, UChar32 end);
703
715U_CAPI void U_EXPORT2
716uset_retainString(USet *set, const UChar *str, int32_t length);
717
727U_CAPI void U_EXPORT2
728uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
729
742U_CAPI void U_EXPORT2
743uset_retainAll(USet* set, const USet* retain);
744
753U_CAPI void U_EXPORT2
755
769U_CAPI void U_EXPORT2
771
785U_CAPI void U_EXPORT2
787
798U_CAPI void U_EXPORT2
799uset_complementString(USet *set, const UChar *str, int32_t length);
800
810U_CAPI void U_EXPORT2
811uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
812
824U_CAPI void U_EXPORT2
825uset_complementAll(USet* set, const USet* complement);
826
834U_CAPI void U_EXPORT2
836
865U_CAPI void U_EXPORT2
866uset_closeOver(USet* set, int32_t attributes);
867
874U_CAPI void U_EXPORT2
876
884U_CAPI UBool U_EXPORT2
885uset_isEmpty(const USet* set);
886
892U_CAPI UBool U_EXPORT2
894
903U_CAPI UBool U_EXPORT2
905
915U_CAPI UBool U_EXPORT2
916uset_containsRange(const USet* set, UChar32 start, UChar32 end);
917
926U_CAPI UBool U_EXPORT2
927uset_containsString(const USet* set, const UChar* str, int32_t strLen);
928
939U_CAPI int32_t U_EXPORT2
940uset_indexOf(const USet* set, UChar32 c);
941
957U_CAPI UChar32 U_EXPORT2
958uset_charAt(const USet* set, int32_t charIndex);
959
975U_CAPI int32_t U_EXPORT2
976uset_size(const USet* set);
977
987U_CAPI int32_t U_EXPORT2
989
990#ifndef U_HIDE_DRAFT_API
991
1000U_CAPI int32_t U_EXPORT2
1002
1015U_CAPI const UChar* U_EXPORT2
1016uset_getString(const USet *set, int32_t index, int32_t *pLength);
1017
1018#endif // U_HIDE_DRAFT_API
1019
1030U_CAPI int32_t U_EXPORT2
1032
1063U_CAPI int32_t U_EXPORT2
1064uset_getItem(const USet* set, int32_t itemIndex,
1065 UChar32* start, UChar32* end,
1066 UChar* str, int32_t strCapacity,
1067 UErrorCode* ec);
1068
1077U_CAPI UBool U_EXPORT2
1078uset_containsAll(const USet* set1, const USet* set2);
1079
1090U_CAPI UBool U_EXPORT2
1091uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1092
1101U_CAPI UBool U_EXPORT2
1102uset_containsNone(const USet* set1, const USet* set2);
1103
1112U_CAPI UBool U_EXPORT2
1113uset_containsSome(const USet* set1, const USet* set2);
1114
1134U_CAPI int32_t U_EXPORT2
1135uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1136
1155U_CAPI int32_t U_EXPORT2
1156uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1157
1177U_CAPI int32_t U_EXPORT2
1178uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1179
1198U_CAPI int32_t U_EXPORT2
1199uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1200
1209U_CAPI UBool U_EXPORT2
1210uset_equals(const USet* set1, const USet* set2);
1211
1212/*********************************************************************
1213 * Serialized set API
1214 *********************************************************************/
1215
1265U_CAPI int32_t U_EXPORT2
1266uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1267
1276U_CAPI UBool U_EXPORT2
1277uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1278
1286U_CAPI void U_EXPORT2
1288
1297U_CAPI UBool U_EXPORT2
1299
1309U_CAPI int32_t U_EXPORT2
1311
1325U_CAPI UBool U_EXPORT2
1326uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1327 UChar32* pStart, UChar32* pEnd);
1328
1329#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1330#ifndef U_HIDE_DRAFT_API
1331
1332namespace U_HEADER_ONLY_NAMESPACE {
1333
1334// Note: Not U_COMMON_API, and not a subclass of UMemory, because this is a header-only class,
1335// not intended to be used via export from the ICU DLL.
1336
1342public:
1345
1347 bool operator==(const USetCodePointIterator &other) const {
1348 // No need to compare rangeCount & end given private constructor
1349 // and assuming we don't compare iterators across the set being modified.
1350 // And comparing rangeIndex is redundant with comparing c.
1351 // We might even skip comparing uset.
1352 // Unless we want operator==() to be "correct" for more than iteration.
1353 return uset == other.uset && c == other.c;
1354 }
1355
1357 bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); }
1358
1360 UChar32 operator*() const { return c; }
1361
1367 if (c < end) {
1368 ++c;
1369 } else if (rangeIndex < rangeCount) {
1370 UErrorCode errorCode = U_ZERO_ERROR;
1371 int32_t result = uset_getItem(uset, rangeIndex, &c, &end, nullptr, 0, &errorCode);
1372 if (U_SUCCESS(errorCode) && result == 0) {
1373 ++rangeIndex;
1374 } else {
1375 c = end = U_SENTINEL;
1376 }
1377 } else {
1378 c = end = U_SENTINEL;
1379 }
1380 return *this;
1381 }
1382
1388 USetCodePointIterator result(*this);
1389 operator++();
1390 return result;
1391 }
1392
1393private:
1394 friend class USetCodePoints;
1395
1396 USetCodePointIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
1397 : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount),
1398 c(U_SENTINEL), end(U_SENTINEL) {
1399 // Fetch the first range.
1400 operator++();
1401 }
1402
1403 const USet *uset;
1404 int32_t rangeIndex;
1405 int32_t rangeCount;
1406 UChar32 c, end;
1407};
1408
1428public:
1433 USetCodePoints(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
1434
1436 USetCodePoints(const USetCodePoints &other) = default;
1437
1440 return USetCodePointIterator(uset, 0, rangeCount);
1441 }
1442
1445 return USetCodePointIterator(uset, rangeCount, rangeCount);
1446 }
1447
1448private:
1449 const USet *uset;
1450 int32_t rangeCount;
1451};
1452
1462 struct iterator {
1465
1467 bool operator==(const iterator &other) const { return c == other.c; }
1469 bool operator!=(const iterator &other) const { return !operator==(other); }
1470
1472 UChar32 operator*() const { return c; }
1473
1479 ++c;
1480 return *this;
1481 }
1482
1488 return c++;
1489 }
1490
1496 };
1497
1501 CodePointRange(const CodePointRange &other) = default;
1503 size_t size() const { return (rangeEnd + 1) - rangeStart; }
1505 iterator begin() const { return rangeStart; }
1507 iterator end() const { return rangeEnd + 1; }
1508
1519};
1520
1526public:
1528 USetRangeIterator(const USetRangeIterator &other) = default;
1529
1531 bool operator==(const USetRangeIterator &other) const {
1532 // No need to compare rangeCount given private constructor
1533 // and assuming we don't compare iterators across the set being modified.
1534 // We might even skip comparing uset.
1535 // Unless we want operator==() to be "correct" for more than iteration.
1536 return uset == other.uset && rangeIndex == other.rangeIndex;
1537 }
1538
1540 bool operator!=(const USetRangeIterator &other) const { return !operator==(other); }
1541
1544 if (rangeIndex < rangeCount) {
1545 UChar32 start, end;
1546 UErrorCode errorCode = U_ZERO_ERROR;
1547 int32_t result = uset_getItem(uset, rangeIndex, &start, &end, nullptr, 0, &errorCode);
1548 if (U_SUCCESS(errorCode) && result == 0) {
1549 return CodePointRange(start, end);
1550 }
1551 }
1553 }
1554
1560 ++rangeIndex;
1561 return *this;
1562 }
1563
1569 USetRangeIterator result(*this);
1570 ++rangeIndex;
1571 return result;
1572 }
1573
1574private:
1575 friend class USetRanges;
1576
1577 USetRangeIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
1578 : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount) {}
1579
1580 const USet *uset;
1581 int32_t rangeIndex;
1582 int32_t rangeCount;
1583};
1584
1609public:
1614 USetRanges(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
1615
1617 USetRanges(const USetRanges &other) = default;
1618
1621 return USetRangeIterator(uset, 0, rangeCount);
1622 }
1623
1626 return USetRangeIterator(uset, rangeCount, rangeCount);
1627 }
1628
1629private:
1630 const USet *uset;
1631 int32_t rangeCount;
1632};
1633
1639public:
1642
1644 bool operator==(const USetStringIterator &other) const {
1645 // No need to compare count given private constructor
1646 // and assuming we don't compare iterators across the set being modified.
1647 // We might even skip comparing uset.
1648 // Unless we want operator==() to be "correct" for more than iteration.
1649 return uset == other.uset && index == other.index;
1650 }
1651
1653 bool operator!=(const USetStringIterator &other) const { return !operator==(other); }
1654
1656 std::u16string_view operator*() const {
1657 if (index < count) {
1658 int32_t length;
1659 const UChar *uchars = uset_getString(uset, index, &length);
1660 // assert uchars != nullptr;
1661 return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
1662 }
1663 return {};
1664 }
1665
1671 ++index;
1672 return *this;
1673 }
1674
1680 USetStringIterator result(*this);
1681 ++index;
1682 return result;
1683 }
1684
1685private:
1686 friend class USetStrings;
1687
1688 USetStringIterator(const USet *uset, int32_t index, int32_t count)
1689 : uset(uset), index(index), count(count) {}
1690
1691 const USet *uset;
1692 int32_t index;
1693 int32_t count;
1694};
1695
1717public:
1722 USetStrings(const USet *uset) : uset(uset), count(uset_getStringCount(uset)) {}
1723
1725 USetStrings(const USetStrings &other) = default;
1726
1729 return USetStringIterator(uset, 0, count);
1730 }
1731
1734 return USetStringIterator(uset, count, count);
1735 }
1736
1737private:
1738 const USet *uset;
1739 int32_t count;
1740};
1741#endif // U_HIDE_DRAFT_API
1742
1743#ifndef U_HIDE_DRAFT_API
1749public:
1752
1754 bool operator==(const USetElementIterator &other) const {
1755 // No need to compare rangeCount & end given private constructor
1756 // and assuming we don't compare iterators across the set being modified.
1757 // We might even skip comparing uset.
1758 // Unless we want operator==() to be "correct" for more than iteration.
1759 return uset == other.uset && c == other.c && index == other.index;
1760 }
1761
1763 bool operator!=(const USetElementIterator &other) const { return !operator==(other); }
1764
1766 std::u16string operator*() const {
1767 if (c >= 0) {
1768 return c <= 0xffff ?
1769 std::u16string({static_cast<char16_t>(c)}) :
1770 std::u16string({U16_LEAD(c), U16_TRAIL(c)});
1771 } else if (index < totalCount) {
1772 int32_t length;
1773 const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
1774 // assert uchars != nullptr;
1775 return {uprv_char16PtrFromUChar(uchars), static_cast<size_t>(length)};
1776 } else {
1777 return {};
1778 }
1779 }
1780
1786 if (c < end) {
1787 ++c;
1788 } else if (index < rangeCount) {
1789 UErrorCode errorCode = U_ZERO_ERROR;
1790 int32_t result = uset_getItem(uset, index, &c, &end, nullptr, 0, &errorCode);
1791 if (U_SUCCESS(errorCode) && result == 0) {
1792 ++index;
1793 } else {
1794 c = end = U_SENTINEL;
1795 }
1796 } else if (c >= 0) {
1797 // assert index == rangeCount;
1798 // Switch from the last range to the first string.
1799 c = end = U_SENTINEL;
1800 } else {
1801 ++index;
1802 }
1803 return *this;
1804 }
1805
1811 USetElementIterator result(*this);
1812 operator++();
1813 return result;
1814 }
1815
1816private:
1817 friend class USetElements;
1818
1819 USetElementIterator(const USet *uset, int32_t index, int32_t rangeCount, int32_t totalCount)
1820 : uset(uset), index(index), rangeCount(rangeCount), totalCount(totalCount),
1821 c(U_SENTINEL), end(U_SENTINEL) {
1822 if (index < rangeCount) {
1823 // Fetch the first range.
1824 operator++();
1825 }
1826 // Otherwise don't move beyond the (index - rangeCount)-th string.
1827 }
1828
1829 const USet *uset;
1830 int32_t index;
1832 int32_t rangeCount;
1842 int32_t totalCount;
1843 UChar32 c, end;
1844};
1845
1872public:
1877 USetElements(const USet *uset)
1878 : uset(uset), rangeCount(uset_getRangeCount(uset)),
1879 stringCount(uset_getStringCount(uset)) {}
1880
1882 USetElements(const USetElements &other) = default;
1883
1886 return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
1887 }
1888
1891 return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
1892 }
1893
1894private:
1895 const USet *uset;
1896 int32_t rangeCount, stringCount;
1897};
1898
1899} // namespace U_HEADER_ONLY_NAMESPACE
1900
1901#endif // U_HIDE_DRAFT_API
1902#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1903
1904#endif // __USET_H__
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
"Smart pointer" class, closes a USet via uset_close().
Iterator returned by USetCodePoints.
Definition uset.h:1341
bool operator==(const USetCodePointIterator &other) const
Definition uset.h:1347
USetCodePointIterator & operator++()
Pre-increment.
Definition uset.h:1366
bool operator!=(const USetCodePointIterator &other) const
Definition uset.h:1357
USetCodePointIterator operator++(int)
Post-increment.
Definition uset.h:1387
USetCodePointIterator(const USetCodePointIterator &other)=default
C++ "range" for iterating over the code points of a USet.
Definition uset.h:1427
USetCodePoints(const USetCodePoints &other)=default
USetCodePointIterator end() const
Definition uset.h:1444
USetCodePoints(const USet *uset)
Constructs a C++ "range" object over the code points of the USet.
Definition uset.h:1433
USetCodePointIterator begin() const
Definition uset.h:1439
Iterator returned by USetElements.
Definition uset.h:1748
USetElementIterator operator++(int)
Post-increment.
Definition uset.h:1810
bool operator==(const USetElementIterator &other) const
Definition uset.h:1754
bool operator!=(const USetElementIterator &other) const
Definition uset.h:1763
USetElementIterator & operator++()
Pre-increment.
Definition uset.h:1785
USetElementIterator(const USetElementIterator &other)=default
A C++ "range" for iterating over all of the elements of a USet.
Definition uset.h:1871
USetElements(const USetElements &other)=default
USetElementIterator end() const
Definition uset.h:1890
USetElementIterator begin() const
Definition uset.h:1885
USetElements(const USet *uset)
Constructs a C++ "range" object over all of the elements of the USet.
Definition uset.h:1877
Iterator returned by USetRanges.
Definition uset.h:1525
USetRangeIterator & operator++()
Pre-increment.
Definition uset.h:1559
CodePointRange operator*() const
Definition uset.h:1543
bool operator==(const USetRangeIterator &other) const
Definition uset.h:1531
USetRangeIterator operator++(int)
Post-increment.
Definition uset.h:1568
bool operator!=(const USetRangeIterator &other) const
Definition uset.h:1540
USetRangeIterator(const USetRangeIterator &other)=default
C++ "range" for iterating over the code point ranges of a USet.
Definition uset.h:1608
USetRangeIterator end() const
Definition uset.h:1625
USetRangeIterator begin() const
Definition uset.h:1620
USetRanges(const USet *uset)
Constructs a C++ "range" object over the code point ranges of the USet.
Definition uset.h:1614
USetRanges(const USetRanges &other)=default
Iterator returned by USetStrings.
Definition uset.h:1638
USetStringIterator & operator++()
Pre-increment.
Definition uset.h:1670
USetStringIterator(const USetStringIterator &other)=default
bool operator!=(const USetStringIterator &other) const
Definition uset.h:1653
std::u16string_view operator*() const
Definition uset.h:1656
USetStringIterator operator++(int)
Post-increment.
Definition uset.h:1679
bool operator==(const USetStringIterator &other) const
Definition uset.h:1644
C++ "range" for iterating over the empty and multi-character strings of a USet.
Definition uset.h:1716
USetStrings(const USetStrings &other)=default
USetStringIterator begin() const
Definition uset.h:1728
USetStringIterator end() const
Definition uset.h:1733
USetStrings(const USet *uset)
Constructs a C++ "range" object over the strings of the USet.
Definition uset.h:1722
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
A serialized form of a Unicode set.
Definition uset.h:259
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition uset.h:279
int32_t bmpLength
The length of the array that contains BMP characters.
Definition uset.h:269
const uint16_t * array
The serialized Unicode Set.
Definition uset.h:264
int32_t length
The total length of the array.
Definition uset.h:274
bool operator!=(const iterator &other) const
Definition uset.h:1469
UChar32 c
The current code point in the range.
Definition uset.h:1495
iterator & operator++()
Pre-increment.
Definition uset.h:1478
iterator operator++(int)
Post-increment.
Definition uset.h:1487
bool operator==(const iterator &other) const
Definition uset.h:1467
A contiguous range of code points in a USet/UnicodeSet.
Definition uset.h:1460
CodePointRange(UChar32 start, UChar32 end)
Definition uset.h:1499
UChar32 rangeEnd
Inclusive end of a USet/UnicodeSet range of code points.
Definition uset.h:1518
CodePointRange(const CodePointRange &other)=default
UChar32 rangeStart
Start of a USet/UnicodeSet range of code points.
Definition uset.h:1513
C API: Unicode Properties.
UProperty
Selection constants for Unicode properties.
Definition uchar.h:196
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition umachine.h:378
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:427
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:247
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition umachine.h:110
#define U_SENTINEL
This value is intended for sentinel values for APIs that (take or) return single code points (UChar32...
Definition umachine.h:447
U_CAPI UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
U_CAPI UBool uset_hasStrings(const USet *set)
U_CAPI int32_t uset_getStringCount(const USet *set)
U_CAPI USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
U_CAPI UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns true if the given USerializedSet contains the given character.
U_CAPI UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
U_CAPI void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
U_CAPI void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
U_CAPI UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns true if the given USet contains the given string.
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition uset.h:73
@ USET_ADD_CASE_MAPPINGS
Adds all case mappings for each element in the set.
Definition uset.h:114
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:101
@ USET_SIMPLE_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:128
U_CAPI UBool uset_isEmpty(const USet *set)
Returns true if the given USet contains no characters and no strings.
U_CAPI int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
U_CAPI int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
U_CAPI UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
U_CAPI void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
U_CAPI void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
U_CAPI void uset_complementString(USet *set, const UChar *str, int32_t length)
Complements the specified string in this set.
U_CAPI void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
U_CAPI void uset_complementRange(USet *set, UChar32 start, UChar32 end)
Complements the specified range in this set.
U_CAPI UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
U_CAPI void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
U_CAPI void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
U_CAPI void uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length)
Complements EACH of the characters in this string.
U_CAPI int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in this set.
U_CAPI int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI void uset_removeAllStrings(USet *set)
Remove all strings from this set.
U_CAPI void uset_clear(USet *set)
Removes all of the elements from this set.
U_CAPI void uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length)
Removes EACH of the characters in this string.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition uset.h:186
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition uset.h:199
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition uset.h:214
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition uset.h:240
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition uset.h:234
U_CAPI void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
U_CAPI USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
U_CAPI USet * uset_openEmpty(void)
Create an empty USet object.
U_CAPI UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
U_CAPI UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
U_CAPI int32_t uset_getRangeCount(const USet *set)
U_CAPI USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
U_CAPI UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
U_CAPI int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
U_CAPI int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
U_CAPI USet * uset_clone(const USet *set)
Returns a copy of this object.
U_CAPI void uset_freeze(USet *set)
Freeze the set (make it immutable).
U_CAPI int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
struct USerializedSet USerializedSet
A serialized form of a Unicode set.
U_CAPI void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
U_CAPI int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
U_CAPI int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI void uset_complement(USet *set)
This is equivalent to uset_complementRange(set, 0, 0x10FFFF).
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition uset.h:251
U_CAPI UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
U_CAPI int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
U_CAPI void uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length)
Retains EACH of the characters in this string.
U_CAPI UBool uset_contains(const USet *set, UChar32 c)
Returns true if the given USet contains the given character.
U_CAPI int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
U_CAPI void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
U_CAPI void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
U_CAPI void uset_close(USet *set)
Disposes of the storage used by a USet object.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition uset.h:54
U_CAPI const UChar * uset_getString(const USet *set, int32_t index, int32_t *pLength)
Returns the index-th string (empty or multi-character) in the set.
U_CAPI void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
U_CAPI void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
U_CAPI void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
U_CAPI int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
U_CAPI void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
U_CAPI void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
U_CAPI void uset_retainString(USet *set, const UChar *str, int32_t length)
Retains only the specified string from this set if it is present.
U_CAPI void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
U_CAPI UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns true if the given USet contains all characters c where start <= c && c <= end.
U_CAPI UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
C API: 16-bit Unicode handling macros.
#define U16_TRAIL(supplementary)
Get the trail surrogate (0xdc00..0xdfff) for a supplementary code point (0x10000.....
Definition utf16.h:132
#define U16_LEAD(supplementary)
Get the lead surrogate (0xd800..0xdbff) for a supplementary code point (0x10000..0x10ffff).
Definition utf16.h:123
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:430
@ U_ZERO_ERROR
No error, no warning.
Definition utypes.h:465
#define U_SUCCESS(x)
Does the error code indicate success?
Definition utypes.h:742