uchar.h

Go to the documentation of this file.
00001 /*
00002 **********************************************************************
00003 *   Copyright (C) 1997-2005, International Business Machines
00004 *   Corporation and others.  All Rights Reserved.
00005 **********************************************************************
00006 *
00007 * File UCHAR.H
00008 *
00009 * Modification History:
00010 *
00011 *   Date        Name        Description
00012 *   04/02/97    aliu        Creation.
00013 *   03/29/99    helena      Updated for C APIs.
00014 *   4/15/99     Madhu       Updated for C Implementation and Javadoc
00015 *   5/20/99     Madhu       Added the function u_getVersion()
00016 *   8/19/1999   srl         Upgraded scripts to Unicode 3.0
00017 *   8/27/1999   schererm    UCharDirection constants: U_...
00018 *   11/11/1999  weiv        added u_isalnum(), cleaned comments
00019 *   01/11/2000  helena      Renamed u_getVersion to u_getUnicodeVersion().
00020 ******************************************************************************
00021 */
00022 
00023 #ifndef UCHAR_H
00024 #define UCHAR_H
00025 
00026 #include "unicode/utypes.h"
00027 
00028 U_CDECL_BEGIN
00029 
00030 /*==========================================================================*/
00031 /* Unicode version number                                                   */
00032 /*==========================================================================*/
00042 #define U_UNICODE_VERSION "4.1"
00043 
00124 #define UCHAR_MIN_VALUE 0
00125 
00134 #define UCHAR_MAX_VALUE 0x10ffff
00135 
00140 #define U_MASK(x) ((uint32_t)1<<(x))
00141 
00142 /*
00143  * !! Note: Several comments in this file are machine-read by the
00144  * genpname tool.  These comments describe the correspondence between
00145  * icu enum constants and UCD entities.  Do not delete them.  Update
00146  * these comments as needed.
00147  *
00148  * Any comment of the form "/ *[name]* /" (spaces added) is such
00149  * a comment.
00150  *
00151  * The U_JG_* and U_GC_*_MASK constants are matched by their symbolic
00152  * name, which must match PropertyValueAliases.txt.
00153  */
00154 
00174 typedef enum UProperty {
00175     /*  See note !!.  Comments of the form "Binary property Dash",
00176         "Enumerated property Script", "Double property Numeric_Value",
00177         and "String property Age" are read by genpname. */
00178 
00179     /*  Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
00180     debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
00181     rather than UCHAR_BINARY_START.  Likewise for other *_START
00182     identifiers. */
00183 
00186     UCHAR_ALPHABETIC=0,
00188     UCHAR_BINARY_START=UCHAR_ALPHABETIC,
00190     UCHAR_ASCII_HEX_DIGIT,
00194     UCHAR_BIDI_CONTROL,
00199     UCHAR_BIDI_MIRRORED,
00201     UCHAR_DASH,
00205     UCHAR_DEFAULT_IGNORABLE_CODE_POINT,
00208     UCHAR_DEPRECATED,
00211     UCHAR_DIACRITIC,
00215     UCHAR_EXTENDER,
00219     UCHAR_FULL_COMPOSITION_EXCLUSION,
00223     UCHAR_GRAPHEME_BASE,
00227     UCHAR_GRAPHEME_EXTEND,
00230     UCHAR_GRAPHEME_LINK,
00233     UCHAR_HEX_DIGIT,
00236     UCHAR_HYPHEN,
00241     UCHAR_ID_CONTINUE,
00245     UCHAR_ID_START,
00248     UCHAR_IDEOGRAPHIC,
00252     UCHAR_IDS_BINARY_OPERATOR,
00256     UCHAR_IDS_TRINARY_OPERATOR,
00259     UCHAR_JOIN_CONTROL,
00263     UCHAR_LOGICAL_ORDER_EXCEPTION,
00266     UCHAR_LOWERCASE,
00268     UCHAR_MATH,
00272     UCHAR_NONCHARACTER_CODE_POINT,
00274     UCHAR_QUOTATION_MARK,
00278     UCHAR_RADICAL,
00283     UCHAR_SOFT_DOTTED,
00287     UCHAR_TERMINAL_PUNCTUATION,
00291     UCHAR_UNIFIED_IDEOGRAPH,
00294     UCHAR_UPPERCASE,
00298     UCHAR_WHITE_SPACE,
00302     UCHAR_XID_CONTINUE,
00305     UCHAR_XID_START,
00309     UCHAR_CASE_SENSITIVE,
00314     UCHAR_S_TERM,
00320     UCHAR_VARIATION_SELECTOR,
00343     UCHAR_NFD_INERT,
00352     UCHAR_NFKD_INERT,
00361     UCHAR_NFC_INERT,
00370     UCHAR_NFKC_INERT,
00381     UCHAR_SEGMENT_STARTER,
00386     UCHAR_PATTERN_SYNTAX,
00391     UCHAR_PATTERN_WHITE_SPACE,
00396     UCHAR_POSIX_ALNUM,
00401     UCHAR_POSIX_BLANK,
00406     UCHAR_POSIX_GRAPH,
00411     UCHAR_POSIX_PRINT,
00416     UCHAR_POSIX_XDIGIT,
00418     UCHAR_BINARY_LIMIT,
00419 
00422     UCHAR_BIDI_CLASS=0x1000,
00424     UCHAR_INT_START=UCHAR_BIDI_CLASS,
00427     UCHAR_BLOCK,
00430     UCHAR_CANONICAL_COMBINING_CLASS,
00433     UCHAR_DECOMPOSITION_TYPE,
00437     UCHAR_EAST_ASIAN_WIDTH,
00440     UCHAR_GENERAL_CATEGORY,
00443     UCHAR_JOINING_GROUP,
00446     UCHAR_JOINING_TYPE,
00449     UCHAR_LINE_BREAK,
00452     UCHAR_NUMERIC_TYPE,
00455     UCHAR_SCRIPT,
00458     UCHAR_HANGUL_SYLLABLE_TYPE,
00461     UCHAR_NFD_QUICK_CHECK,
00464     UCHAR_NFKD_QUICK_CHECK,
00467     UCHAR_NFC_QUICK_CHECK,
00470     UCHAR_NFKC_QUICK_CHECK,
00477     UCHAR_LEAD_CANONICAL_COMBINING_CLASS,
00484     UCHAR_TRAIL_CANONICAL_COMBINING_CLASS,
00489     UCHAR_GRAPHEME_CLUSTER_BREAK,
00494     UCHAR_SENTENCE_BREAK,
00499     UCHAR_WORD_BREAK,
00501     UCHAR_INT_LIMIT,
00502 
00511     UCHAR_GENERAL_CATEGORY_MASK=0x2000,
00513     UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
00515     UCHAR_MASK_LIMIT,
00516 
00519     UCHAR_NUMERIC_VALUE=0x3000,
00521     UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
00523     UCHAR_DOUBLE_LIMIT,
00524 
00527     UCHAR_AGE=0x4000,
00529     UCHAR_STRING_START=UCHAR_AGE,
00532     UCHAR_BIDI_MIRRORING_GLYPH,
00535     UCHAR_CASE_FOLDING,
00538     UCHAR_ISO_COMMENT,
00541     UCHAR_LOWERCASE_MAPPING,
00544     UCHAR_NAME,
00547     UCHAR_SIMPLE_CASE_FOLDING,
00550     UCHAR_SIMPLE_LOWERCASE_MAPPING,
00553     UCHAR_SIMPLE_TITLECASE_MAPPING,
00556     UCHAR_SIMPLE_UPPERCASE_MAPPING,
00559     UCHAR_TITLECASE_MAPPING,
00562     UCHAR_UNICODE_1_NAME,
00565     UCHAR_UPPERCASE_MAPPING,
00567     UCHAR_STRING_LIMIT,
00568 
00570     UCHAR_INVALID_CODE = -1
00571 } UProperty;
00572 
00578 typedef enum UCharCategory
00579 {
00583     U_UNASSIGNED              = 0,
00585     U_GENERAL_OTHER_TYPES     = 0,
00587     U_UPPERCASE_LETTER        = 1,
00589     U_LOWERCASE_LETTER        = 2,
00591     U_TITLECASE_LETTER        = 3,
00593     U_MODIFIER_LETTER         = 4,
00595     U_OTHER_LETTER            = 5,
00597     U_NON_SPACING_MARK        = 6,
00599     U_ENCLOSING_MARK          = 7,
00601     U_COMBINING_SPACING_MARK  = 8,
00603     U_DECIMAL_DIGIT_NUMBER    = 9,
00605     U_LETTER_NUMBER           = 10,
00607     U_OTHER_NUMBER            = 11,
00609     U_SPACE_SEPARATOR         = 12,
00611     U_LINE_SEPARATOR          = 13,
00613     U_PARAGRAPH_SEPARATOR     = 14,
00615     U_CONTROL_CHAR            = 15,
00617     U_FORMAT_CHAR             = 16,
00619     U_PRIVATE_USE_CHAR        = 17,
00621     U_SURROGATE               = 18,
00623     U_DASH_PUNCTUATION        = 19,
00625     U_START_PUNCTUATION       = 20,
00627     U_END_PUNCTUATION         = 21,
00629     U_CONNECTOR_PUNCTUATION   = 22,
00631     U_OTHER_PUNCTUATION       = 23,
00633     U_MATH_SYMBOL             = 24,
00635     U_CURRENCY_SYMBOL         = 25,
00637     U_MODIFIER_SYMBOL         = 26,
00639     U_OTHER_SYMBOL            = 27,
00641     U_INITIAL_PUNCTUATION     = 28,
00643     U_FINAL_PUNCTUATION       = 29,
00645     U_CHAR_CATEGORY_COUNT
00646 } UCharCategory;
00647 
00662 #define U_GC_CN_MASK    U_MASK(U_GENERAL_OTHER_TYPES)
00663 
00665 #define U_GC_LU_MASK    U_MASK(U_UPPERCASE_LETTER)
00666 
00667 #define U_GC_LL_MASK    U_MASK(U_LOWERCASE_LETTER)
00668 
00669 #define U_GC_LT_MASK    U_MASK(U_TITLECASE_LETTER)
00670 
00671 #define U_GC_LM_MASK    U_MASK(U_MODIFIER_LETTER)
00672 
00673 #define U_GC_LO_MASK    U_MASK(U_OTHER_LETTER)
00674 
00676 #define U_GC_MN_MASK    U_MASK(U_NON_SPACING_MARK)
00677 
00678 #define U_GC_ME_MASK    U_MASK(U_ENCLOSING_MARK)
00679 
00680 #define U_GC_MC_MASK    U_MASK(U_COMBINING_SPACING_MARK)
00681 
00683 #define U_GC_ND_MASK    U_MASK(U_DECIMAL_DIGIT_NUMBER)
00684 
00685 #define U_GC_NL_MASK    U_MASK(U_LETTER_NUMBER)
00686 
00687 #define U_GC_NO_MASK    U_MASK(U_OTHER_NUMBER)
00688 
00690 #define U_GC_ZS_MASK    U_MASK(U_SPACE_SEPARATOR)
00691 
00692 #define U_GC_ZL_MASK    U_MASK(U_LINE_SEPARATOR)
00693 
00694 #define U_GC_ZP_MASK    U_MASK(U_PARAGRAPH_SEPARATOR)
00695 
00697 #define U_GC_CC_MASK    U_MASK(U_CONTROL_CHAR)
00698 
00699 #define U_GC_CF_MASK    U_MASK(U_FORMAT_CHAR)
00700 
00701 #define U_GC_CO_MASK    U_MASK(U_PRIVATE_USE_CHAR)
00702 
00703 #define U_GC_CS_MASK    U_MASK(U_SURROGATE)
00704 
00706 #define U_GC_PD_MASK    U_MASK(U_DASH_PUNCTUATION)
00707 
00708 #define U_GC_PS_MASK    U_MASK(U_START_PUNCTUATION)
00709 
00710 #define U_GC_PE_MASK    U_MASK(U_END_PUNCTUATION)
00711 
00712 #define U_GC_PC_MASK    U_MASK(U_CONNECTOR_PUNCTUATION)
00713 
00714 #define U_GC_PO_MASK    U_MASK(U_OTHER_PUNCTUATION)
00715 
00717 #define U_GC_SM_MASK    U_MASK(U_MATH_SYMBOL)
00718 
00719 #define U_GC_SC_MASK    U_MASK(U_CURRENCY_SYMBOL)
00720 
00721 #define U_GC_SK_MASK    U_MASK(U_MODIFIER_SYMBOL)
00722 
00723 #define U_GC_SO_MASK    U_MASK(U_OTHER_SYMBOL)
00724 
00726 #define U_GC_PI_MASK    U_MASK(U_INITIAL_PUNCTUATION)
00727 
00728 #define U_GC_PF_MASK    U_MASK(U_FINAL_PUNCTUATION)
00729 
00730 
00732 #define U_GC_L_MASK \
00733             (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
00734 
00736 #define U_GC_LC_MASK \
00737             (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
00738 
00740 #define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
00741 
00743 #define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
00744 
00746 #define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
00747 
00749 #define U_GC_C_MASK \
00750             (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
00751 
00753 #define U_GC_P_MASK \
00754             (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
00755              U_GC_PI_MASK|U_GC_PF_MASK)
00756 
00758 #define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
00759 
00764 typedef enum UCharDirection {
00768     U_LEFT_TO_RIGHT               = 0,
00770     U_RIGHT_TO_LEFT               = 1,
00772     U_EUROPEAN_NUMBER             = 2,
00774     U_EUROPEAN_NUMBER_SEPARATOR   = 3,
00776     U_EUROPEAN_NUMBER_TERMINATOR  = 4,
00778     U_ARABIC_NUMBER               = 5,
00780     U_COMMON_NUMBER_SEPARATOR     = 6,
00782     U_BLOCK_SEPARATOR             = 7,
00784     U_SEGMENT_SEPARATOR           = 8,
00786     U_WHITE_SPACE_NEUTRAL         = 9,
00788     U_OTHER_NEUTRAL               = 10,
00790     U_LEFT_TO_RIGHT_EMBEDDING     = 11,
00792     U_LEFT_TO_RIGHT_OVERRIDE      = 12,
00794     U_RIGHT_TO_LEFT_ARABIC        = 13,
00796     U_RIGHT_TO_LEFT_EMBEDDING     = 14,
00798     U_RIGHT_TO_LEFT_OVERRIDE      = 15,
00800     U_POP_DIRECTIONAL_FORMAT      = 16,
00802     U_DIR_NON_SPACING_MARK        = 17,
00804     U_BOUNDARY_NEUTRAL            = 18,
00806     U_CHAR_DIRECTION_COUNT
00807 } UCharDirection;
00808 
00813 enum UBlockCode {
00814 
00816     UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
00817 
00819     UBLOCK_BASIC_LATIN = 1, /*[0000]*/ /*See note !!*/
00820 
00822     UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
00823 
00825     UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
00826 
00828     UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
00829 
00831     UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
00832 
00834     UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/
00835 
00837     UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/
00838 
00843     UBLOCK_GREEK =8, /*[0370]*/
00844 
00846     UBLOCK_CYRILLIC =9, /*[0400]*/
00847 
00849     UBLOCK_ARMENIAN =10, /*[0530]*/
00850 
00852     UBLOCK_HEBREW =11, /*[0590]*/
00853 
00855     UBLOCK_ARABIC =12, /*[0600]*/
00856 
00858     UBLOCK_SYRIAC =13, /*[0700]*/
00859 
00861     UBLOCK_THAANA =14, /*[0780]*/
00862 
00864     UBLOCK_DEVANAGARI =15, /*[0900]*/
00865 
00867     UBLOCK_BENGALI =16, /*[0980]*/
00868 
00870     UBLOCK_GURMUKHI =17, /*[0A00]*/
00871 
00873     UBLOCK_GUJARATI =18, /*[0A80]*/
00874 
00876     UBLOCK_ORIYA =19, /*[0B00]*/
00877 
00879     UBLOCK_TAMIL =20, /*[0B80]*/
00880 
00882     UBLOCK_TELUGU =21, /*[0C00]*/
00883 
00885     UBLOCK_KANNADA =22, /*[0C80]*/
00886 
00888     UBLOCK_MALAYALAM =23, /*[0D00]*/
00889 
00891     UBLOCK_SINHALA =24, /*[0D80]*/
00892 
00894     UBLOCK_THAI =25, /*[0E00]*/
00895 
00897     UBLOCK_LAO =26, /*[0E80]*/
00898 
00900     UBLOCK_TIBETAN =27, /*[0F00]*/
00901 
00903     UBLOCK_MYANMAR =28, /*[1000]*/
00904 
00906     UBLOCK_GEORGIAN =29, /*[10A0]*/
00907 
00909     UBLOCK_HANGUL_JAMO =30, /*[1100]*/
00910 
00912     UBLOCK_ETHIOPIC =31, /*[1200]*/
00913 
00915     UBLOCK_CHEROKEE =32, /*[13A0]*/
00916 
00918     UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/
00919 
00921     UBLOCK_OGHAM =34, /*[1680]*/
00922 
00924     UBLOCK_RUNIC =35, /*[16A0]*/
00925 
00927     UBLOCK_KHMER =36, /*[1780]*/
00928 
00930     UBLOCK_MONGOLIAN =37, /*[1800]*/
00931 
00933     UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/
00934 
00936     UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
00937 
00939     UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/
00940 
00942     UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/
00943 
00945     UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
00946 
00951     UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/
00952 
00954     UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/
00955 
00957     UBLOCK_NUMBER_FORMS =45, /*[2150]*/
00958 
00960     UBLOCK_ARROWS =46, /*[2190]*/
00961 
00963     UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/
00964 
00966     UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/
00967 
00969     UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
00970 
00972     UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/
00973 
00975     UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/
00976 
00978     UBLOCK_BOX_DRAWING =52, /*[2500]*/
00979 
00981     UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
00982 
00984     UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
00985 
00987     UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/
00988 
00990     UBLOCK_DINGBATS =56, /*[2700]*/
00991 
00993     UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
00994 
00996     UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/
00997 
00999     UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
01000 
01002     UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/
01003 
01005     UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/
01006 
01008     UBLOCK_HIRAGANA =62, /*[3040]*/
01009 
01011     UBLOCK_KATAKANA =63, /*[30A0]*/
01012 
01014     UBLOCK_BOPOMOFO =64, /*[3100]*/
01015 
01017     UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/
01018 
01020     UBLOCK_KANBUN =66, /*[3190]*/
01021 
01023     UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/
01024 
01026     UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/
01027 
01029     UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/
01030 
01032     UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/
01033 
01035     UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/
01036 
01038     UBLOCK_YI_SYLLABLES =72, /*[A000]*/
01039 
01041     UBLOCK_YI_RADICALS =73, /*[A490]*/
01042 
01044     UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
01045 
01047     UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
01048 
01050     UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/
01051 
01053     UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
01054 
01064     UBLOCK_PRIVATE_USE = 78,
01074     UBLOCK_PRIVATE_USE_AREA =UBLOCK_PRIVATE_USE, /*[E000]*/
01075 
01077     UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
01078 
01080     UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/
01081 
01083     UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/
01084 
01086     UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/
01087 
01089     UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/
01090 
01092     UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/
01093 
01095     UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/
01096 
01098     UBLOCK_SPECIALS =86, /*[FFF0]*/
01099 
01101     UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/
01102 
01103     /* New blocks in Unicode 3.1 */
01104 
01106     UBLOCK_OLD_ITALIC = 88  , /*[10300]*/
01108     UBLOCK_GOTHIC = 89 , /*[10330]*/
01110     UBLOCK_DESERET = 90 , /*[10400]*/
01112     UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91 , /*[1D000]*/
01114     UBLOCK_MUSICAL_SYMBOLS = 92 , /*[1D100]*/
01116     UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93  , /*[1D400]*/
01118     UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B  = 94 , /*[20000]*/
01120     UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95 , /*[2F800]*/
01122     UBLOCK_TAGS = 96, /*[E0000]*/
01123 
01124     /* New blocks in Unicode 3.2 */
01125 
01130     UBLOCK_CYRILLIC_SUPPLEMENTARY = 97, 
01132     UBLOCK_CYRILLIC_SUPPLEMENT = UBLOCK_CYRILLIC_SUPPLEMENTARY, /*[0500]*/
01134     UBLOCK_TAGALOG = 98, /*[1700]*/
01136     UBLOCK_HANUNOO = 99, /*[1720]*/
01138     UBLOCK_BUHID = 100, /*[1740]*/
01140     UBLOCK_TAGBANWA = 101, /*[1760]*/
01142     UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
01144     UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
01146     UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
01148     UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
01150     UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
01152     UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
01154     UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
01156     UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
01158     UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
01159 
01160     /* New blocks in Unicode 4 */
01161 
01163     UBLOCK_LIMBU = 111, /*[1900]*/
01165     UBLOCK_TAI_LE = 112, /*[1950]*/
01167     UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
01169     UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
01171     UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/
01173     UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/
01175     UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
01177     UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
01179     UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
01181     UBLOCK_UGARITIC = 120, /*[10380]*/
01183     UBLOCK_SHAVIAN = 121, /*[10450]*/
01185     UBLOCK_OSMANYA = 122, /*[10480]*/
01187     UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
01189     UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
01191     UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/
01192 
01193     /* New blocks in Unicode 4.1 */
01194 
01196     UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/
01198     UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
01200     UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
01202     UBLOCK_BUGINESE = 129, /*[1A00]*/
01204     UBLOCK_CJK_STROKES = 130, /*[31C0]*/
01206     UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/
01208     UBLOCK_COPTIC = 132, /*[2C80]*/
01210     UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
01212     UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
01214     UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
01216     UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
01218     UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
01220     UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/
01222     UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
01224     UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
01226     UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/
01228     UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/
01230     UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
01232     UBLOCK_TIFINAGH = 144, /*[2D30]*/
01234     UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
01235 
01237     UBLOCK_COUNT,
01238 
01240     UBLOCK_INVALID_CODE=-1
01241 };
01242 
01244 typedef enum UBlockCode UBlockCode;
01245 
01253 typedef enum UEastAsianWidth {
01254     U_EA_NEUTRAL,   /*[N]*/ /*See note !!*/
01255     U_EA_AMBIGUOUS, /*[A]*/
01256     U_EA_HALFWIDTH, /*[H]*/
01257     U_EA_FULLWIDTH, /*[F]*/
01258     U_EA_NARROW,    /*[Na]*/
01259     U_EA_WIDE,      /*[W]*/
01260     U_EA_COUNT
01261 } UEastAsianWidth;
01262 /*
01263  * Implementation note:
01264  * Keep UEastAsianWidth constant values in sync with names list in genprops/props2.c.
01265  */
01266 
01278 typedef enum UCharNameChoice {
01279     U_UNICODE_CHAR_NAME,
01280     U_UNICODE_10_CHAR_NAME,
01281     U_EXTENDED_CHAR_NAME,
01282     U_CHAR_NAME_CHOICE_COUNT
01283 } UCharNameChoice;
01284 
01298 typedef enum UPropertyNameChoice {
01299     U_SHORT_PROPERTY_NAME,
01300     U_LONG_PROPERTY_NAME,
01301     U_PROPERTY_NAME_CHOICE_COUNT
01302 } UPropertyNameChoice;
01303 
01310 typedef enum UDecompositionType {
01311     U_DT_NONE,              /*[none]*/ /*See note !!*/
01312     U_DT_CANONICAL,         /*[can]*/
01313     U_DT_COMPAT,            /*[com]*/
01314     U_DT_CIRCLE,            /*[enc]*/
01315     U_DT_FINAL,             /*[fin]*/
01316     U_DT_FONT,              /*[font]*/
01317     U_DT_FRACTION,          /*[fra]*/
01318     U_DT_INITIAL,           /*[init]*/
01319     U_DT_ISOLATED,          /*[iso]*/
01320     U_DT_MEDIAL,            /*[med]*/
01321     U_DT_NARROW,            /*[nar]*/
01322     U_DT_NOBREAK,           /*[nb]*/
01323     U_DT_SMALL,             /*[sml]*/
01324     U_DT_SQUARE,            /*[sqr]*/
01325     U_DT_SUB,               /*[sub]*/
01326     U_DT_SUPER,             /*[sup]*/
01327     U_DT_VERTICAL,          /*[vert]*/
01328     U_DT_WIDE,              /*[wide]*/
01329     U_DT_COUNT /* 18 */
01330 } UDecompositionType;
01331 
01338 typedef enum UJoiningType {
01339     U_JT_NON_JOINING,       /*[U]*/ /*See note !!*/
01340     U_JT_JOIN_CAUSING,      /*[C]*/
01341     U_JT_DUAL_JOINING,      /*[D]*/
01342     U_JT_LEFT_JOINING,      /*[L]*/
01343     U_JT_RIGHT_JOINING,     /*[R]*/
01344     U_JT_TRANSPARENT,       /*[T]*/
01345     U_JT_COUNT /* 6 */
01346 } UJoiningType;
01347 
01354 typedef enum UJoiningGroup {
01355     U_JG_NO_JOINING_GROUP,
01356     U_JG_AIN,
01357     U_JG_ALAPH,
01358     U_JG_ALEF,
01359     U_JG_BEH,
01360     U_JG_BETH,
01361     U_JG_DAL,
01362     U_JG_DALATH_RISH,
01363     U_JG_E,
01364     U_JG_FEH,
01365     U_JG_FINAL_SEMKATH,
01366     U_JG_GAF,
01367     U_JG_GAMAL,
01368     U_JG_HAH,
01369     U_JG_HAMZA_ON_HEH_GOAL,
01370     U_JG_HE,
01371     U_JG_HEH,
01372     U_JG_HEH_GOAL,
01373     U_JG_HETH,
01374     U_JG_KAF,
01375     U_JG_KAPH,
01376     U_JG_KNOTTED_HEH,
01377     U_JG_LAM,
01378     U_JG_LAMADH,
01379     U_JG_MEEM,
01380     U_JG_MIM,
01381     U_JG_NOON,
01382     U_JG_NUN,
01383     U_JG_PE,
01384     U_JG_QAF,
01385     U_JG_QAPH,
01386     U_JG_REH,
01387     U_JG_REVERSED_PE,
01388     U_JG_SAD,
01389     U_JG_SADHE,
01390     U_JG_SEEN,
01391     U_JG_SEMKATH,
01392     U_JG_SHIN,
01393     U_JG_SWASH_KAF,
01394     U_JG_SYRIAC_WAW,
01395     U_JG_TAH,
01396     U_JG_TAW,
01397     U_JG_TEH_MARBUTA,
01398     U_JG_TETH,
01399     U_JG_WAW,
01400     U_JG_YEH,
01401     U_JG_YEH_BARREE,
01402     U_JG_YEH_WITH_TAIL,
01403     U_JG_YUDH,
01404     U_JG_YUDH_HE,
01405     U_JG_ZAIN,
01406     U_JG_FE,        
01407     U_JG_KHAPH,     
01408     U_JG_ZHAIN,     
01409     U_JG_COUNT
01410 } UJoiningGroup;
01411 
01418 typedef enum UGraphemeClusterBreak {
01419     U_GCB_OTHER,            /*[XX]*/ /*See note !!*/
01420     U_GCB_CONTROL,          /*[CN]*/
01421     U_GCB_CR,               /*[CR]*/
01422     U_GCB_EXTEND,           /*[EX]*/
01423     U_GCB_L,                /*[L]*/
01424     U_GCB_LF,               /*[LF]*/
01425     U_GCB_LV,               /*[LV]*/
01426     U_GCB_LVT,              /*[LVT]*/
01427     U_GCB_T,                /*[T]*/
01428     U_GCB_V,                /*[V]*/
01429     U_GCB_COUNT
01430 } UGraphemeClusterBreak;
01431 
01439 typedef enum UWordBreakValues {
01440     U_WB_OTHER,             /*[XX]*/ /*See note !!*/
01441     U_WB_ALETTER,           /*[LE]*/
01442     U_WB_FORMAT,            /*[FO]*/
01443     U_WB_KATAKANA,          /*[KA]*/
01444     U_WB_MIDLETTER,         /*[ML]*/
01445     U_WB_MIDNUM,            /*[MN]*/
01446     U_WB_NUMERIC,           /*[NU]*/
01447     U_WB_EXTENDNUMLET,      /*[EX]*/
01448     U_WB_COUNT
01449 } UWordBreakValues;
01450 
01457 typedef enum USentenceBreak {
01458     U_SB_OTHER,             /*[XX]*/ /*See note !!*/
01459     U_SB_ATERM,             /*[AT]*/
01460     U_SB_CLOSE,             /*[CL]*/
01461     U_SB_FORMAT,            /*[FO]*/
01462     U_SB_LOWER,             /*[LO]*/
01463     U_SB_NUMERIC,           /*[NU]*/
01464     U_SB_OLETTER,           /*[LE]*/
01465     U_SB_SEP,               /*[SE]*/
01466     U_SB_SP,                /*[SP]*/
01467     U_SB_STERM,             /*[ST]*/
01468     U_SB_UPPER,             /*[UP]*/
01469     U_SB_COUNT
01470 } USentenceBreak;
01471 
01478 typedef enum ULineBreak {
01479     U_LB_UNKNOWN,           /*[XX]*/ /*See note !!*/
01480     U_LB_AMBIGUOUS,         /*[AI]*/
01481     U_LB_ALPHABETIC,        /*[AL]*/
01482     U_LB_BREAK_BOTH,        /*[B2]*/
01483     U_LB_BREAK_AFTER,       /*[BA]*/
01484     U_LB_BREAK_BEFORE,      /*[BB]*/
01485     U_LB_MANDATORY_BREAK,   /*[BK]*/
01486     U_LB_CONTINGENT_BREAK,  /*[CB]*/
01487     U_LB_CLOSE_PUNCTUATION, /*[CL]*/
01488     U_LB_COMBINING_MARK,    /*[CM]*/
01489     U_LB_CARRIAGE_RETURN,   /*[CR]*/
01490     U_LB_EXCLAMATION,       /*[EX]*/
01491     U_LB_GLUE,              /*[GL]*/
01492     U_LB_HYPHEN,            /*[HY]*/
01493     U_LB_IDEOGRAPHIC,       /*[ID]*/
01494     U_LB_INSEPERABLE,
01496     U_LB_INSEPARABLE=U_LB_INSEPERABLE,/*[IN]*/
01497     U_LB_INFIX_NUMERIC,     /*[IS]*/
01498     U_LB_LINE_FEED,         /*[LF]*/
01499     U_LB_NONSTARTER,        /*[NS]*/
01500     U_LB_NUMERIC,           /*[NU]*/
01501     U_LB_OPEN_PUNCTUATION,  /*[OP]*/
01502     U_LB_POSTFIX_NUMERIC,   /*[PO]*/
01503     U_LB_PREFIX_NUMERIC,    /*[PR]*/
01504     U_LB_QUOTATION,         /*[QU]*/
01505     U_LB_COMPLEX_CONTEXT,   /*[SA]*/
01506     U_LB_SURROGATE,         /*[SG]*/
01507     U_LB_SPACE,             /*[SP]*/
01508     U_LB_BREAK_SYMBOLS,     /*[SY]*/
01509     U_LB_ZWSPACE,           /*[ZW]*/
01510     U_LB_NEXT_LINE,         /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
01511     U_LB_WORD_JOINER,       /*[WJ]*/
01512     U_LB_H2,                /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
01513     U_LB_H3,                /*[H3]*/
01514     U_LB_JL,                /*[JL]*/
01515     U_LB_JT,                /*[JT]*/
01516     U_LB_JV,                /*[JV]*/
01517     U_LB_COUNT
01518 } ULineBreak;
01519 
01526 typedef enum UNumericType {
01527     U_NT_NONE,              /*[None]*/ /*See note !!*/
01528     U_NT_DECIMAL,           /*[de]*/
01529     U_NT_DIGIT,             /*[di]*/
01530     U_NT_NUMERIC,           /*[nu]*/
01531     U_NT_COUNT
01532 } UNumericType;
01533 
01540 typedef enum UHangulSyllableType {
01541     U_HST_NOT_APPLICABLE,   /*[NA]*/ /*See note !!*/
01542     U_HST_LEADING_JAMO,     /*[L]*/
01543     U_HST_VOWEL_JAMO,       /*[V]*/
01544     U_HST_TRAILING_JAMO,    /*[T]*/
01545     U_HST_LV_SYLLABLE,      /*[LV]*/
01546     U_HST_LVT_SYLLABLE,     /*[LVT]*/
01547     U_HST_COUNT
01548 } UHangulSyllableType;
01549 
01576 U_STABLE UBool U_EXPORT2
01577 u_hasBinaryProperty(UChar32 c, UProperty which);
01578 
01591 U_STABLE UBool U_EXPORT2
01592 u_isUAlphabetic(UChar32 c);
01593 
01606 U_STABLE UBool U_EXPORT2
01607 u_isULowercase(UChar32 c);
01608 
01621 U_STABLE UBool U_EXPORT2
01622 u_isUUppercase(UChar32 c);
01623 
01642 U_STABLE UBool U_EXPORT2
01643 u_isUWhiteSpace(UChar32 c);
01644 
01682 U_STABLE int32_t U_EXPORT2
01683 u_getIntPropertyValue(UChar32 c, UProperty which);
01684 
01703 U_STABLE int32_t U_EXPORT2
01704 u_getIntPropertyMinValue(UProperty which);
01705 
01732 U_STABLE int32_t U_EXPORT2
01733 u_getIntPropertyMaxValue(UProperty which);
01734 
01755 U_STABLE double U_EXPORT2
01756 u_getNumericValue(UChar32 c);
01757 
01765 #define U_NO_NUMERIC_VALUE ((double)-123456789.)
01766 
01790 U_STABLE UBool U_EXPORT2
01791 u_islower(UChar32 c);
01792 
01817 U_STABLE UBool U_EXPORT2
01818 u_isupper(UChar32 c);
01819 
01834 U_STABLE UBool U_EXPORT2
01835 u_istitle(UChar32 c);
01836 
01855 U_STABLE UBool U_EXPORT2
01856 u_isdigit(UChar32 c);
01857 
01876 U_STABLE UBool U_EXPORT2
01877 u_isalpha(UChar32 c);
01878 
01897 U_STABLE UBool U_EXPORT2
01898 u_isalnum(UChar32 c);
01899 
01920 U_STABLE UBool U_EXPORT2
01921 u_isxdigit(UChar32 c);
01922 
01936 U_STABLE UBool U_EXPORT2
01937 u_ispunct(UChar32 c);
01938 
01955 U_STABLE UBool U_EXPORT2
01956 u_isgraph(UChar32 c);
01957 
01984 U_STABLE UBool U_EXPORT2
01985 u_isblank(UChar32 c);
01986 
02009 U_STABLE UBool U_EXPORT2
02010 u_isdefined(UChar32 c);
02011 
02030 U_STABLE UBool U_EXPORT2
02031 u_isspace(UChar32 c);
02032 
02051 U_STABLE UBool U_EXPORT2
02052 u_isJavaSpaceChar(UChar32 c);
02053 
02085 U_STABLE UBool U_EXPORT2
02086 u_isWhitespace(UChar32 c);
02087 
02109 U_STABLE UBool U_EXPORT2
02110 u_iscntrl(UChar32 c);
02111 
02124 U_STABLE UBool U_EXPORT2
02125 u_isISOControl(UChar32 c);
02126 
02142 U_STABLE UBool U_EXPORT2
02143 u_isprint(UChar32 c);
02144 
02163 U_STABLE UBool U_EXPORT2
02164 u_isbase(UChar32 c);
02165 
02182 U_STABLE UCharDirection U_EXPORT2
02183 u_charDirection(UChar32 c);
02184 
02200 U_STABLE UBool U_EXPORT2
02201 u_isMirrored(UChar32 c);
02202 
02222 U_STABLE UChar32 U_EXPORT2
02223 u_charMirror(UChar32 c);
02224 
02236 U_STABLE int8_t U_EXPORT2
02237 u_charType(UChar32 c);
02238 
02252 #define U_GET_GC_MASK(c) U_MASK(u_charType(c))
02253 
02271 typedef UBool U_CALLCONV
02272 UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
02273 
02293 U_STABLE void U_EXPORT2
02294 u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
02295 
02296 #if !UCONFIG_NO_NORMALIZATION
02297 
02305 U_STABLE uint8_t U_EXPORT2
02306 u_getCombiningClass(UChar32 c);
02307 
02308 #endif
02309 
02333 U_STABLE int32_t U_EXPORT2
02334 u_charDigitValue(UChar32 c);
02335 
02345 U_STABLE UBlockCode U_EXPORT2
02346 ublock_getCode(UChar32 c);
02347 
02380 U_STABLE int32_t U_EXPORT2
02381 u_charName(UChar32 code, UCharNameChoice nameChoice,
02382            char *buffer, int32_t bufferLength,
02383            UErrorCode *pErrorCode);
02384 
02407 U_STABLE int32_t U_EXPORT2
02408 u_getISOComment(UChar32 c,
02409                 char *dest, int32_t destCapacity,
02410                 UErrorCode *pErrorCode);
02411 
02432 U_STABLE UChar32 U_EXPORT2
02433 u_charFromName(UCharNameChoice nameChoice,
02434                const char *name,
02435                UErrorCode *pErrorCode);
02436 
02454 typedef UBool UEnumCharNamesFn(void *context,
02455                                UChar32 code,
02456                                UCharNameChoice nameChoice,
02457                                const char *name,
02458                                int32_t length);
02459 
02481 U_STABLE void U_EXPORT2
02482 u_enumCharNames(UChar32 start, UChar32 limit,
02483                 UEnumCharNamesFn *fn,
02484                 void *context,
02485                 UCharNameChoice nameChoice,
02486                 UErrorCode *pErrorCode);
02487 
02519 U_STABLE const char* U_EXPORT2
02520 u_getPropertyName(UProperty property,
02521                   UPropertyNameChoice nameChoice);
02522 
02542 U_STABLE UProperty U_EXPORT2
02543 u_getPropertyEnum(const char* alias);
02544 
02592 U_STABLE const char* U_EXPORT2
02593 u_getPropertyValueName(UProperty property,
02594                        int32_t value,
02595                        UPropertyNameChoice nameChoice);
02596 
02628 U_STABLE int32_t U_EXPORT2
02629 u_getPropertyValueEnum(UProperty property,
02630                        const char* alias);
02631 
02649 U_STABLE UBool U_EXPORT2
02650 u_isIDStart(UChar32 c);
02651 
02673 U_STABLE UBool U_EXPORT2
02674 u_isIDPart(UChar32 c);
02675 
02698 U_STABLE UBool U_EXPORT2
02699 u_isIDIgnorable(UChar32 c);
02700 
02717 U_STABLE UBool U_EXPORT2
02718 u_isJavaIDStart(UChar32 c);
02719 
02738 U_STABLE UBool U_EXPORT2
02739 u_isJavaIDPart(UChar32 c);
02740 
02763 U_STABLE UChar32 U_EXPORT2
02764 u_tolower(UChar32 c);
02765 
02788 U_STABLE UChar32 U_EXPORT2
02789 u_toupper(UChar32 c);
02790 
02813 U_STABLE UChar32 U_EXPORT2
02814 u_totitle(UChar32 c);
02815 
02817 #define U_FOLD_CASE_DEFAULT 0
02818 
02835 #define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
02836 
02859 U_STABLE UChar32 U_EXPORT2
02860 u_foldCase(UChar32 c, uint32_t options);
02861 
02900 U_STABLE int32_t U_EXPORT2
02901 u_digit(UChar32 ch, int8_t radix);
02902 
02931 U_STABLE UChar32 U_EXPORT2
02932 u_forDigit(int32_t digit, int8_t radix);
02933 
02948 U_STABLE void U_EXPORT2
02949 u_charAge(UChar32 c, UVersionInfo versionArray);
02950 
02962 U_STABLE void U_EXPORT2
02963 u_getUnicodeVersion(UVersionInfo versionArray);
02964 
02986 U_STABLE int32_t U_EXPORT2
02987 u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
02988 
02989 U_CDECL_END
02990 
02991 #endif /*_UCHAR*/
02992 /*eof*/

Generated on Tue Sep 13 11:03:25 2005 for ICU 3.4 by  doxygen 1.4.4