Main Page | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Class Members | File Members | Related Pages

UChar.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file UChar.d
00004         
00005         Copyright (c) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026         4. Derivative works are permitted, but they must carry this notice
00027            in full and credit the original source.
00028 
00029 
00030                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00031 
00032 
00033         @version        Initial version, October 2004      
00034         @author         Kris
00035         
00036 
00037         Note that this package and documentation is built around the ICU 
00038         project (http://oss.software.ibm.com/icu/). Below is the license 
00039         statement as specified by that software:
00040 
00041 
00042                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00043 
00044 
00045         ICU License - ICU 1.8.1 and later
00046 
00047         COPYRIGHT AND PERMISSION NOTICE
00048 
00049         Copyright (c) 1995-2003 International Business Machines Corporation and 
00050         others.
00051 
00052         All rights reserved.
00053 
00054         Permission is hereby granted, free of charge, to any person obtaining a
00055         copy of this software and associated documentation files (the
00056         "Software"), to deal in the Software without restriction, including
00057         without limitation the rights to use, copy, modify, merge, publish,
00058         distribute, and/or sell copies of the Software, and to permit persons
00059         to whom the Software is furnished to do so, provided that the above
00060         copyright notice(s) and this permission notice appear in all copies of
00061         the Software and that both the above copyright notice(s) and this
00062         permission notice appear in supporting documentation.
00063 
00064         THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00065         OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00066         MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
00067         OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
00068         HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
00069         INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
00070         FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
00071         NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
00072         WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
00073 
00074         Except as contained in this notice, the name of a copyright holder
00075         shall not be used in advertising or otherwise to promote the sale, use
00076         or other dealings in this Software without prior written authorization
00077         of the copyright holder.
00078 
00079         ----------------------------------------------------------------------
00080 
00081         All trademarks and registered trademarks mentioned herein are the 
00082         property of their respective owners.
00083 
00084 *******************************************************************************/
00085 
00086 module mango.icu.UChar;
00087 
00088 private import  mango.icu.ICU;
00089 
00090 /*******************************************************************************
00091 
00092         This API provides low-level access to the Unicode Character 
00093         Database. In addition to raw property values, some convenience 
00094         functions calculate derived properties, for example for Java-style 
00095         programming.
00096 
00097         Unicode assigns each code point (not just assigned character) 
00098         values for many properties. Most of them are simple boolean 
00099         flags, or constants from a small enumerated list. For some 
00100         properties, values are strings or other relatively more complex 
00101         types.
00102 
00103         For more information see "About the Unicode Character Database" 
00104         (http://www.unicode.org/ucd/) and the ICU User Guide chapter on 
00105         Properties (http://oss.software.ibm.com/icu/userguide/properties.html).
00106 
00107         Many functions are designed to match java.lang.Character functions. 
00108         See the individual function documentation, and see the JDK 1.4.1 
00109         java.lang.Character documentation at 
00110         http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html
00111 
00112         There are also functions that provide easy migration from C/POSIX 
00113         functions like isblank(). Their use is generally discouraged because 
00114         the C/POSIX standards do not define their semantics beyond the ASCII 
00115         range, which means that different implementations exhibit very different 
00116         behavior. Instead, Unicode properties should be used directly.
00117 
00118         There are also only a few, broad C/POSIX character classes, and they 
00119         tend to be used for conflicting purposes. For example, the "isalpha()" 
00120         class is sometimes used to determine word boundaries, while a more 
00121         sophisticated approach would at least distinguish initial letters from 
00122         continuation characters (the latter including combining marks). (In 
00123         ICU, BreakIterator is the most sophisticated API for word boundaries.) 
00124         Another example: There is no "istitle()" class for titlecase characters.
00125 
00126         A summary of the behavior of some C/POSIX character classification 
00127         implementations for Unicode is available at 
00128         http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html
00129 
00130         See <A HREF="http://oss.software.ibm.com/icu/apiref/uchar_8h.html">
00131         this page</A> for full details.
00132 
00133 *******************************************************************************/
00134 
00135 class UChar : ICU
00136 {
00137         public enum     Property
00138                         {
00139                         Alphabetic = 0, 
00140                         BinaryStart = Alphabetic, 
00141                         AsciiHexDigit, 
00142                         BidiControl,
00143                         BidiMirrored, 
00144                         Dash, 
00145                         DefaultIgnorableCodePoint, 
00146                         Deprecated,
00147                         Diacritic, 
00148                         Extender, 
00149                         FullCompositionExclusion, 
00150                         GraphemeBase,
00151                         GraphemeExtend, 
00152                         GraphemeLink, 
00153                         HexDigit, 
00154                         Hyphen,
00155                         IdContinue, 
00156                         IdStart, 
00157                         Ideographic, 
00158                         IdsBinaryOperator,
00159                         IdsTrinaryOperator, 
00160                         JoinControl, 
00161                         LogicalOrderException, 
00162                         Lowercase,
00163                         Math, 
00164                         NoncharacterCodePoint, 
00165                         QuotationMark, 
00166                         Radical,
00167                         SoftDotted, 
00168                         TerminalPunctuation, 
00169                         UnifiedIdeograph, 
00170                         Uppercase,
00171                         WhiteSpace, 
00172                         XidContinue, 
00173                         XidStart, 
00174                         CaseSensitive,
00175                         STerm, 
00176                         VariationSelector, 
00177                         NfdInert, 
00178                         NfkdInert,
00179                         NfcInert, 
00180                         NfkcInert, 
00181                         SegmentStarter, 
00182                         BinaryLimit,
00183                         BidiClass = 0x1000, 
00184                         IntStart = BidiClass, 
00185                         Block, CanonicalCombiningClass,
00186                         DecompositionType, 
00187                         EastAsianWidth, 
00188                         GeneralCategory, 
00189                         JoiningGroup,
00190                         JoiningType, 
00191                         LineBreak, 
00192                         NumericType, 
00193                         Script,
00194                         HangulSyllableType, 
00195                         NfdQuickCheck, 
00196                         NfkdQuickCheck, 
00197                         NfcQuickCheck,
00198                         NfkcQuickCheck, 
00199                         LeadCanonicalCombiningClass,
00200                         TrailCanonicalCombiningClass, 
00201                         IntLimit,
00202                         GeneralCategoryMask = 0x2000, 
00203                         MaskStart = GeneralCategoryMask, 
00204                         MaskLimit, 
00205                         NumericValue = 0x3000,
00206                         DoubleStart = NumericValue, 
00207                         DoubleLimit, 
00208                         Age = 0x4000, 
00209                         StringStart = Age,
00210                         BidiMirroringGlyph, 
00211                         CaseFolding, 
00212                         IsoComment, 
00213                         LowercaseMapping,
00214                         Name, 
00215                         SimpleCaseFolding, 
00216                         SimpleLowercaseMapping, 
00217                         SimpleTitlecaseMapping,
00218                         SimpleUppercaseMapping, 
00219                         TitlecaseMapping, 
00220                         Unicode1Name, 
00221                         UppercaseMapping,
00222                         StringLimit, 
00223                         InvalidCode = -1
00224                         }
00225         
00226         public enum     Category
00227                         {
00228                         Unassigned = 0, 
00229                         GeneralOtherTypes = 0,
00230                         UppercaseLetter = 1, 
00231                         LowercaseLetter = 2,
00232                         TitlecaseLetter = 3, 
00233                         ModifierLetter = 4, 
00234                         OtherLetter = 5, 
00235                         NonSpacingMark = 6,
00236                         EnclosingMark = 7, 
00237                         CombiningSpacingMark = 8, 
00238                         DecimalDigitNumber = 9, 
00239                         LetterNumber = 10,
00240                         OtherNumber = 11, 
00241                         SpaceSeparator = 12, 
00242                         LineSeparator = 13, 
00243                         ParagraphSeparator = 14,
00244                         ControlChar = 15, 
00245                         FormatChar = 16, 
00246                         PrivateUseChar = 17, 
00247                         Surrogate = 18,
00248                         DashPunctuation = 19, 
00249                         StartPunctuation = 20, 
00250                         EndPunctuation = 21, 
00251                         ConnectorPunctuation = 22,
00252                         OtherPunctuation = 23,
00253                         MathSymbol = 24, 
00254                         CurrencySymbol = 25, 
00255                         ModifierSymbol = 26,
00256                         OtherSymbol = 27, 
00257                         InitialPunctuation = 28, 
00258                         FinalPunctuation = 29, 
00259                         Count 
00260                         }
00261 
00262         public enum     Direction
00263                         {
00264                         LeftToRight = 0, 
00265                         RightToLeft = 1, 
00266                         EuropeanNumber = 2, 
00267                         EuropeanNumberSeparator = 3,
00268                         EuropeanNumberTerminator = 4, 
00269                         ArabicNumber = 5, 
00270                         CommonNumberSeparator = 6, 
00271                         BlockSeparator = 7,
00272                         SegmentSeparator = 8, 
00273                         WhiteSpaceNeutral = 9, 
00274                         OtherNeutral = 10, 
00275                         LeftToRightEmbedding = 11,
00276                         LeftToRightOverride = 12, 
00277                         RightToLeftArabic = 13, 
00278                         RightToLeftEmbedding = 14, 
00279                         RightToLeftOverride = 15,
00280                         PopDirectionalFormat = 16, 
00281                         DirNonSpacingMark = 17, 
00282                         BoundaryNeutral = 18, 
00283                         Count
00284                         }
00285 
00286         public enum     BlockCode
00287                         {
00288                         NoBlock = 0, 
00289                         BasicLatin = 1, 
00290                         Latin1Supplement = 2, 
00291                         LatinExtendedA = 3,
00292                         LatinExtendedB = 4, 
00293                         IpaExtensions = 5, 
00294                         SpacingModifierLetters = 6, 
00295                         CombiningDiacriticalMarks = 7,
00296                         Greek = 8, 
00297                         Cyrillic = 9, 
00298                         Armenian = 10, 
00299                         Hebrew = 11,
00300                         Arabic = 12, 
00301                         Syriac = 13, 
00302                         Thaana = 14, 
00303                         Devanagari = 15,
00304                         Bengali = 16, 
00305                         Gurmukhi = 17, 
00306                         Gujarati = 18, 
00307                         Oriya = 19,
00308                         Tamil = 20, 
00309                         Telugu = 21, 
00310                         Kannada = 22, 
00311                         Malayalam = 23,
00312                         Sinhala = 24, 
00313                         Thai = 25, 
00314                         Lao = 26, 
00315                         Tibetan = 27,
00316                         Myanmar = 28, 
00317                         Georgian = 29, 
00318                         HangulJamo = 30, 
00319                         Ethiopic = 31,
00320                         Cherokee = 32, 
00321                         UnifiedCanadianAboriginalSyllabics = 33, 
00322                         Ogham = 34, 
00323                         Runic = 35,
00324                         Khmer = 36, 
00325                         Mongolian = 37, 
00326                         LatinExtendedAdditional = 38, 
00327                         GreekExtended = 39,
00328                         GeneralPunctuation = 40, 
00329                         SuperscriptsAndSubscripts = 41, 
00330                         CurrencySymbols = 42, 
00331                         CombiningMarksForSymbols = 43,
00332                         LetterlikeSymbols = 44, 
00333                         NumberForms = 45, 
00334                         Arrows = 46, 
00335                         MathematicalOperators = 47,
00336                         MiscellaneousTechnical = 48, 
00337                         ControlPictures = 49, 
00338                         OpticalCharacterRecognition = 50, 
00339                         EnclosedAlphanumerics = 51,
00340                         BoxDrawing = 52, 
00341                         BlockElements = 53, 
00342                         GeometricShapes = 54, 
00343                         MiscellaneousSymbols = 55,
00344                         Dingbats = 56, 
00345                         BraillePatterns = 57, 
00346                         CjkRadicalsSupplement = 58, 
00347                         KangxiRadicals = 59,
00348                         IdeographicDescriptionCharacters = 60, 
00349                         CjkSymbolsAndPunctuation = 61, 
00350                         Hiragana = 62, 
00351                         Katakana = 63,
00352                         Bopomofo = 64, 
00353                         HangulCompatibilityJamo = 65, 
00354                         Kanbun = 66, 
00355                         BopomofoExtended = 67,
00356                         EnclosedCjkLettersAndMonths = 68, 
00357                         CjkCompatibility = 69, 
00358                         CjkUnifiedIdeographsExtensionA = 70, 
00359                         CjkUnifiedIdeographs = 71,
00360                         YiSyllables = 72, 
00361                         YiRadicals = 73, 
00362                         HangulSyllables = 74, 
00363                         HighSurrogates = 75,
00364                         HighPrivateUseSurrogates = 76, 
00365                         LowSurrogates = 77, 
00366                         PrivateUse = 78, 
00367                         PrivateUseArea = PrivateUse,
00368                         CjkCompatibilityIdeographs = 79, 
00369                         AlphabeticPresentationForms = 80, 
00370                         ArabicPresentationFormsA = 81, 
00371                         CombiningHalfMarks = 82,
00372                         CjkCompatibilityForms = 83, 
00373                         SmallFormVariants = 84, 
00374                         ArabicPresentationFormsB = 85, 
00375                         Specials = 86,
00376                         HalfwidthAndFullwidthForms = 87, 
00377                         OldItalic = 88, 
00378                         Gothic = 89, 
00379                         Deseret = 90,
00380                         ByzantineMusicalSymbols = 91, 
00381                         MusicalSymbols = 92, 
00382                         MathematicalAlphanumericSymbols = 93, 
00383                         CjkUnifiedIdeographsExtensionB = 94,
00384                         CjkCompatibilityIdeographsSupplement = 95, 
00385                         Tags = 96, 
00386                         CyrillicSupplementary = 97, 
00387                         CyrillicSupplement = CyrillicSupplementary,
00388                         Tagalog = 98, 
00389                         Hanunoo = 99, 
00390                         Buhid = 100, 
00391                         Tagbanwa = 101,
00392                         MiscellaneousMathematicalSymbolsA = 102, 
00393                         SupplementalArrowsA = 103, 
00394                         SupplementalArrowsB = 104, 
00395                         MiscellaneousMathematicalSymbolsB = 105,
00396                         SupplementalMathematicalOperators = 106, 
00397                         KatakanaPhoneticExtensions = 107, 
00398                         VariationSelectors = 108, 
00399                         SupplementaryPrivateUseAreaA = 109,
00400                         SupplementaryPrivateUseAreaB = 110, 
00401                         Limbu = 111, 
00402                         TaiLe = 112, 
00403                         KhmerSymbols = 113,
00404                         PhoneticExtensions = 114, 
00405                         MiscellaneousSymbolsAndArrows = 115, 
00406                         YijingHexagramSymbols = 116, 
00407                         LinearBSyllabary = 117,
00408                         LinearBIdeograms = 118, 
00409                         AegeanNumbers = 119, 
00410                         Ugaritic = 120, 
00411                         Shavian = 121,
00412                         Osmanya = 122, 
00413                         CypriotSyllabary = 123, 
00414                         TaiXuanJingSymbols = 124, 
00415                         VariationSelectorsSupplement = 125,
00416                         Count, 
00417                         InvalidCode = -1
00418                         }
00419 
00420         public enum     EastAsianWidth
00421                         {
00422                         Neutral, 
00423                         Ambiguous, 
00424                         Halfwidth, 
00425                         Fullwidth,
00426                         Narrow, 
00427                         Wide, 
00428                         Count
00429                         }
00430 
00431         public enum     CharNameChoice
00432                         {
00433                         Unicode, 
00434                         Unicode10, 
00435                         Extended, 
00436                         Count
00437                         }
00438                      
00439         public enum     NameChoice
00440                         {
00441                         Short, 
00442                         Long, 
00443                         Count
00444                         }
00445 
00446         public enum     DecompositionType
00447                         {
00448                         None, 
00449                         Canonical, 
00450                         Compat, 
00451                         Circle,
00452                         Final, 
00453                         Font, 
00454                         Fraction, 
00455                         Initial,
00456                         Isolated, 
00457                         Medial, 
00458                         Narrow, 
00459                         Nobreak,
00460                         Small, 
00461                         Square, 
00462                         Sub, 
00463                         Super,
00464                         Vertical, 
00465                         Wide, 
00466                         Count
00467                         }
00468 
00469         public enum     JoiningType
00470                         {
00471                         NonJoining, 
00472                         JoinCausing, 
00473                         DualJoining, 
00474                         LeftJoining,
00475                         RightJoining, 
00476                         Transparent, 
00477                         Count
00478                         }
00479 
00480         public enum     JoiningGroup
00481                         {
00482                         NoJoiningGroup, 
00483                         Ain, 
00484                         Alaph, 
00485                         Alef,
00486                         Beh, 
00487                         Beth, 
00488                         Dal, 
00489                         DalathRish,
00490                         E, 
00491                         Feh, 
00492                         FinalSemkath, 
00493                         Gaf,
00494                         Gamal, 
00495                         Hah, 
00496                         HamzaOnHehGoal, 
00497                         He,
00498                         Heh, 
00499                         HehGoal, 
00500                         Heth, 
00501                         Kaf,
00502                         Kaph, 
00503                         KnottedHeh, 
00504                         Lam, 
00505                         Lamadh,
00506                         Meem, 
00507                         Mim, 
00508                         Noon, 
00509                         Nun,
00510                         Pe, 
00511                         Qaf, 
00512                         Qaph, 
00513                         Reh,
00514                         Reversed_Pe, 
00515                         Sad, 
00516                         Sadhe, 
00517                         Seen,
00518                         Semkath, 
00519                         Shin, 
00520                         Swash_Kaf, 
00521                         Syriac_Waw,
00522                         Tah, 
00523                         Taw, 
00524                         Teh_Marbuta, 
00525                         Teth,
00526                         Waw, 
00527                         Yeh, 
00528                         Yeh_Barree, 
00529                         Yeh_With_Tail,
00530                         Yudh, 
00531                         Yudh_He, 
00532                         Zain, 
00533                         Fe,
00534                         Khaph, 
00535                         Zhain, 
00536                         Count
00537                         }
00538 
00539         public enum     LineBreak
00540                         {
00541                         Unknown, 
00542                         Ambiguous, 
00543                         Alphabetic, 
00544                         BreakBoth,
00545                         BreakAfter, 
00546                         BreakBefore, 
00547                         MandatoryBreak, 
00548                         ContingentBreak,
00549                         ClosePunctuation, 
00550                         CombiningMark, 
00551                         CarriageReturn, 
00552                         Exclamation,
00553                         Glue, 
00554                         Hyphen, 
00555                         Ideographic, 
00556                         Inseperable,
00557                         Inseparable = Inseperable, 
00558                         InfixNumeric, 
00559                         LineFeed, 
00560                         Nonstarter,
00561                         Numeric, 
00562                         OpenPunctuation, 
00563                         PostfixNumeric, 
00564                         PrefixNumeric,
00565                         Quotation, 
00566                         ComplexContext, 
00567                         Surrogate, 
00568                         Space,
00569                         BreakSymbols, 
00570                         Zwspace, 
00571                         NextLine, 
00572                         WordJoiner,
00573                         Count
00574                         }
00575 
00576         public enum     NumericType
00577                         {
00578                         None, 
00579                         Decimal, 
00580                         Digit, 
00581                         Numeric,
00582                         Count
00583                         }
00584 
00585         public enum     HangulSyllableType
00586                         {
00587                         NotApplicable, 
00588                         LeadingJamo, 
00589                         VowelJamo, 
00590                         TrailingJamo,
00591                         LvSyllable, 
00592                         LvtSyllable, 
00593                         Count
00594                         }
00595 
00596         /***********************************************************************
00597         
00598                 Get the property value for an enumerated or integer 
00599                 Unicode property for a code point. Also returns binary 
00600                 and mask property values.
00601 
00602                 Unicode, especially in version 3.2, defines many more 
00603                 properties than the original set in UnicodeData.txt.
00604 
00605                 The properties APIs are intended to reflect Unicode 
00606                 properties as defined in the Unicode Character Database 
00607                 (UCD) and Unicode Technical Reports (UTR). For details 
00608                 about the properties see http://www.unicode.org/ . For 
00609                 names of Unicode properties see the file PropertyAliases.txt
00610 
00611         ***********************************************************************/
00612 
00613         uint getProperty (dchar c, Property p)
00614         {
00615                 return u_getIntPropertyValue (cast(uint) c, cast(uint) p);
00616         }
00617 
00618         /***********************************************************************
00619         
00620                 Get the minimum value for an enumerated/integer/binary 
00621                 Unicode property
00622 
00623         ***********************************************************************/
00624 
00625         uint getPropertyMinimum (Property p)
00626         {
00627                 return u_getIntPropertyMinValue (p);
00628         }
00629 
00630         /***********************************************************************
00631         
00632                 Get the maximum value for an enumerated/integer/binary 
00633                 Unicode property
00634 
00635         ***********************************************************************/
00636 
00637         uint getPropertyMaximum (Property p)
00638         {
00639                 return u_getIntPropertyMaxValue (p);
00640         }
00641        
00642         /***********************************************************************
00643         
00644                 Returns the bidirectional category value for the code 
00645                 point, which is used in the Unicode bidirectional algorithm 
00646                 (UAX #9 http://www.unicode.org/reports/tr9/).
00647 
00648         ***********************************************************************/
00649 
00650         Direction charDirection (dchar c)
00651         {
00652                 return cast(Direction) u_charDirection (c);
00653         }
00654 
00655         /***********************************************************************
00656         
00657                 Returns the Unicode allocation block that contains the 
00658                 character
00659 
00660         ***********************************************************************/
00661 
00662         BlockCode getBlockCode (dchar c)
00663         {
00664                 return cast(BlockCode) ublock_getCode (c);
00665         }
00666         
00667         /***********************************************************************
00668         
00669                 Retrieve the name of a Unicode character.
00670 
00671         ***********************************************************************/
00672 
00673         char[] getCharName (dchar c, CharNameChoice choice, inout char[] dst)
00674         {
00675                 Error e;
00676 
00677                 uint len = u_charName (c, choice, dst, dst.length, e);
00678                 testError (e, "failed to extract char name (buffer too small?)");
00679                 return dst [0..len];
00680         }
00681         
00682         /***********************************************************************
00683         
00684                 Get the ISO 10646 comment for a character.
00685 
00686         ***********************************************************************/
00687 
00688         char[] getComment (dchar c, inout char[] dst)
00689         {
00690                 Error e;
00691 
00692                 uint len = u_getISOComment (c, dst, dst.length, e);
00693                 testError (e, "failed to extract comment (buffer too small?)");
00694                 return dst [0..len];
00695         }
00696         
00697         /***********************************************************************
00698         
00699                 Find a Unicode character by its name and return its code 
00700                 point value.
00701 
00702         ***********************************************************************/
00703 
00704         dchar charFromName (CharNameChoice choice, char[] name)
00705         {
00706                 Error e;
00707 
00708                 dchar c = u_charFromName (choice, toString(name), e);
00709                 testError (e, "failed to locate char name");
00710                 return c;
00711         }
00712         
00713         /***********************************************************************
00714         
00715                 Return the Unicode name for a given property, as given in the 
00716                 Unicode database file PropertyAliases.txt
00717 
00718         ***********************************************************************/
00719 
00720         char[] getPropertyName (Property p, NameChoice choice)
00721         {
00722                 return toArray (u_getPropertyName (p, choice));
00723         }
00724         
00725         /***********************************************************************
00726         
00727                 Return the Unicode name for a given property value, as given 
00728                 in the Unicode database file PropertyValueAliases.txt. 
00729 
00730         ***********************************************************************/
00731 
00732         char[] getPropertyValueName (Property p, NameChoice choice, uint value)
00733         {
00734                 return toArray (u_getPropertyValueName (p, value, choice));
00735         }
00736         
00737         /***********************************************************************
00738         
00739                 Gets the Unicode version information
00740 
00741         ***********************************************************************/
00742 
00743         void getUnicodeVersion (inout Version v)
00744         {
00745                 u_getUnicodeVersion (v);
00746         }
00747         
00748         /***********************************************************************
00749         
00750                 Get the "age" of the code point
00751 
00752         ***********************************************************************/
00753 
00754         void getCharAge (dchar c, inout Version v)
00755         {
00756                 u_charAge (c, v);
00757         }
00758         
00759 
00760         /***********************************************************************
00761         
00762                 These are externalised directly to the client (sans wrapper),
00763                 but this may have to change for linux, depending upon the
00764                 ICU function-naming conventions within the Posix libraries.
00765 
00766         ***********************************************************************/
00767 
00768         final static extern (C) 
00769         {
00770                 /***************************************************************
00771 
00772                         Check if a code point has the Alphabetic Unicode 
00773                         property.
00774 
00775                 ***************************************************************/
00776 
00777                 bool function (dchar c) isUAlphabetic;
00778 
00779                 /***************************************************************
00780 
00781                         Check if a code point has the Lowercase Unicode 
00782                         property.
00783 
00784                 ***************************************************************/
00785 
00786                 bool function (dchar c) isULowercase;
00787 
00788                 /***************************************************************
00789 
00790                         Check if a code point has the Uppercase Unicode 
00791                         property.
00792 
00793                 ***************************************************************/
00794 
00795                 bool function (dchar c) isUUppercase;
00796 
00797                 /***************************************************************
00798 
00799                         Check if a code point has the White_Space Unicode 
00800                         property.
00801 
00802                 ***************************************************************/
00803 
00804                 bool function (dchar c) isUWhiteSpace;
00805 
00806                 /***************************************************************
00807 
00808                         Determines whether the specified code point has the 
00809                         general category "Ll" (lowercase letter).
00810 
00811                 ***************************************************************/
00812 
00813                 bool function (dchar c) isLower;
00814 
00815                 /***************************************************************
00816 
00817                         Determines whether the specified code point has the 
00818                         general category "Lu" (uppercase letter).
00819 
00820                 ***************************************************************/
00821 
00822                 bool function (dchar c) isUpper;
00823 
00824                 /***************************************************************
00825 
00826                         Determines whether the specified code point is a 
00827                         titlecase letter.
00828 
00829                 ***************************************************************/
00830 
00831                 bool function (dchar c) isTitle;
00832 
00833                 /***************************************************************
00834 
00835                         Determines whether the specified code point is a 
00836                         digit character according to Java.
00837 
00838                 ***************************************************************/
00839 
00840                 bool function (dchar c) isDigit;
00841 
00842                 /***************************************************************
00843 
00844                         Determines whether the specified code point is a 
00845                         letter character.
00846 
00847                 ***************************************************************/
00848 
00849                 bool function (dchar c) isAlpha;
00850 
00851                 /***************************************************************
00852 
00853                         Determines whether the specified code point is an 
00854                         alphanumeric character (letter or digit) according 
00855                         to Java.
00856 
00857                 ***************************************************************/
00858 
00859                 bool function (dchar c) isAlphaNumeric;
00860 
00861                 /***************************************************************
00862 
00863                         Determines whether the specified code point is a 
00864                         hexadecimal digit.
00865 
00866                 ***************************************************************/
00867 
00868                 bool function (dchar c) isHexDigit;
00869 
00870                 /***************************************************************
00871 
00872                         Determines whether the specified code point is a 
00873                         punctuation character.
00874 
00875                 ***************************************************************/
00876 
00877                 bool function (dchar c) isPunct;
00878 
00879                 /***************************************************************
00880 
00881                         Determines whether the specified code point is a 
00882                         "graphic" character (printable, excluding spaces).
00883 
00884                 ***************************************************************/
00885 
00886                 bool function (dchar c) isGraph;
00887 
00888                 /***************************************************************
00889 
00890                         Determines whether the specified code point is a 
00891                         "blank" or "horizontal space", a character that 
00892                         visibly separates words on a line.
00893 
00894                 ***************************************************************/
00895 
00896                 bool function (dchar c) isBlank;
00897 
00898                 /***************************************************************
00899 
00900                         Determines whether the specified code point is 
00901                         "defined", which usually means that it is assigned 
00902                         a character.
00903 
00904                 ***************************************************************/
00905 
00906                 bool function (dchar c) isDefined;
00907 
00908                 /***************************************************************
00909 
00910                         Determines if the specified character is a space 
00911                         character or not.
00912 
00913                 ***************************************************************/
00914 
00915                 bool function (dchar c) isSpace;
00916 
00917                 /***************************************************************
00918 
00919                         Determine if the specified code point is a space 
00920                         character according to Java.
00921 
00922                 ***************************************************************/
00923 
00924                 bool function (dchar c) isJavaSpaceChar;
00925 
00926                 /***************************************************************
00927 
00928                         Determines if the specified code point is a whitespace 
00929                         character according to Java/ICU.
00930 
00931                 ***************************************************************/
00932 
00933                 bool function (dchar c) isWhiteSpace;
00934 
00935                 /***************************************************************
00936 
00937                         Determines whether the specified code point is a 
00938                         control character (as defined by this function).
00939 
00940                 ***************************************************************/
00941 
00942                 bool function (dchar c) isCtrl;
00943 
00944                 /***************************************************************
00945 
00946                         Determines whether the specified code point is an ISO 
00947                         control code.
00948 
00949                 ***************************************************************/
00950 
00951                 bool function (dchar c) isISOControl;
00952 
00953                 /***************************************************************
00954 
00955                         Determines whether the specified code point is a 
00956                         printable character.
00957 
00958                 ***************************************************************/
00959 
00960                 bool function (dchar c) isPrint;
00961 
00962                 /***************************************************************
00963 
00964                         Determines whether the specified code point is a 
00965                         base character.
00966 
00967                 ***************************************************************/
00968 
00969                 bool function (dchar c) isBase;
00970 
00971                 /***************************************************************
00972 
00973                         Determines if the specified character is permissible 
00974                         as the first character in an identifier according to 
00975                         Unicode (The Unicode Standard, Version 3.0, chapter 
00976                         5.16 Identifiers).
00977 
00978                 ***************************************************************/
00979 
00980                 bool function (dchar c) isIDStart;
00981 
00982                 /***************************************************************
00983 
00984                         Determines if the specified character is permissible 
00985                         in an identifier according to Java.
00986 
00987                 ***************************************************************/
00988 
00989                 bool function (dchar c) isIDPart;
00990 
00991                 /***************************************************************
00992 
00993                         Determines if the specified character should be 
00994                         regarded as an ignorable character in an identifier, 
00995                         according to Java.
00996 
00997                 ***************************************************************/
00998 
00999                 bool function (dchar c) isIDIgnorable;
01000 
01001                 /***************************************************************
01002 
01003                         Determines if the specified character is permissible 
01004                         as the first character in a Java identifier.
01005 
01006                 ***************************************************************/
01007 
01008                 bool function (dchar c) isJavaIDStart;
01009 
01010                 /***************************************************************
01011 
01012                         Determines if the specified character is permissible 
01013                         in a Java identifier.
01014 
01015                 ***************************************************************/
01016 
01017                 bool function (dchar c) isJavaIDPart;
01018 
01019                 /***************************************************************
01020 
01021                         Determines whether the code point has the 
01022                         Bidi_Mirrored property.
01023 
01024                 ***************************************************************/
01025 
01026                 bool function (dchar c) isMirrored;
01027 
01028                 /***************************************************************
01029 
01030                         Returns the decimal digit value of a decimal digit 
01031                         character.
01032 
01033                 ***************************************************************/
01034 
01035                 ubyte function (dchar c) charDigitValue;
01036 
01037                 /***************************************************************
01038 
01039                         Maps the specified character to a "mirror-image" 
01040                         character.
01041 
01042                 ***************************************************************/
01043 
01044                 dchar function (dchar c) charMirror;
01045 
01046                 /***************************************************************
01047 
01048                         Returns the general category value for the code point.
01049 
01050                 ***************************************************************/
01051 
01052                 ubyte function (dchar c) charType;
01053 
01054                 /***************************************************************
01055 
01056                         Returns the combining class of the code point as 
01057                         specified in UnicodeData.txt.
01058 
01059                 ***************************************************************/
01060 
01061                 ubyte function (dchar c) getCombiningClass;
01062 
01063                 /***************************************************************
01064 
01065                         The given character is mapped to its lowercase 
01066                         equivalent according to UnicodeData.txt; if the 
01067                         character has no lowercase equivalent, the 
01068                         character itself is returned.
01069 
01070                 ***************************************************************/
01071 
01072                 dchar function (dchar c) toLower;
01073 
01074                 /***************************************************************
01075 
01076                         The given character is mapped to its uppercase equivalent 
01077                         according to UnicodeData.txt; if the character has no 
01078                         uppercase equivalent, the character itself is returned.
01079 
01080                 ***************************************************************/
01081 
01082                 dchar function (dchar c) toUpper;
01083 
01084                 /***************************************************************
01085 
01086                         The given character is mapped to its titlecase 
01087                         equivalent according to UnicodeData.txt; if none 
01088                         is defined, the character itself is returned.
01089 
01090                 ***************************************************************/
01091 
01092                 dchar function (dchar c) toTitle;
01093 
01094                 /***************************************************************
01095 
01096                         The given character is mapped to its case folding 
01097                         equivalent according to UnicodeData.txt and 
01098                         CaseFolding.txt; if the character has no case folding 
01099                         equivalent, the character itself is returned.
01100 
01101                 ***************************************************************/
01102 
01103                 dchar function (dchar c, uint options) foldCase;
01104 
01105                 /***************************************************************
01106 
01107                         Returns the decimal digit value of the code point in 
01108                         the specified radix.
01109 
01110                 ***************************************************************/
01111 
01112                 uint function (dchar ch, ubyte radix) digit;
01113 
01114                 /***************************************************************
01115 
01116                         Determines the character representation for a specific 
01117                         digit in the specified radix.
01118 
01119                 ***************************************************************/
01120 
01121                 dchar function (uint digit, ubyte radix) forDigit;
01122 
01123                 /***************************************************************
01124 
01125                         Get the numeric value for a Unicode code point as 
01126                         defined in the Unicode Character Database.
01127 
01128                 ***************************************************************/
01129 
01130                 double function (dchar c) getNumericValue;
01131         }
01132 
01133             
01134         /***********************************************************************
01135         
01136                 Bind the ICU functions from a shared library. This is
01137                 complicated by the issues regarding D and DLLs on the
01138                 Windows platform
01139 
01140         ***********************************************************************/
01141 
01142         private static void* library;
01143 
01144         /***********************************************************************
01145 
01146         ***********************************************************************/
01147 
01148         private static extern (C) 
01149         {
01150                 uint   function (uint, uint) u_getIntPropertyValue;
01151                 uint   function (uint) u_getIntPropertyMinValue;
01152                 uint   function (uint) u_getIntPropertyMaxValue;
01153                 uint   function (dchar) u_charDirection;
01154                 uint   function (dchar) ublock_getCode;
01155                 uint   function (dchar, uint, char*, uint, inout Error) u_charName;
01156                 uint   function (dchar, char*, uint, inout Error) u_getISOComment;
01157                 uint   function (uint, char*, inout Error) u_charFromName;
01158                 char*  function (uint, uint) u_getPropertyName;
01159                 char*  function (uint, uint, uint) u_getPropertyValueName;
01160                 void   function (inout Version) u_getUnicodeVersion;
01161                 void   function (dchar, inout Version) u_charAge;
01162         }
01163 
01164         /***********************************************************************
01165 
01166         ***********************************************************************/
01167 
01168         static  FunctionLoader.Bind[] targets = 
01169                 [
01170                 {cast(void**) &forDigit,                "u_forDigit"},
01171                 {cast(void**) &digit,                   "u_digit"},
01172                 {cast(void**) &foldCase,                "u_foldCase"},
01173                 {cast(void**) &toTitle,                 "u_totitle"},
01174                 {cast(void**) &toUpper,                 "u_toupper"},
01175                 {cast(void**) &toLower,                 "u_tolower"},
01176                 {cast(void**) &charType,                "u_charType"},
01177                 {cast(void**) &charMirror,              "u_charMirror"},
01178                 {cast(void**) &charDigitValue,          "u_charDigitValue"},
01179                 {cast(void**) &isJavaIDPart,            "u_isJavaIDPart"},
01180                 {cast(void**) &isJavaIDStart,           "u_isJavaIDStart"},
01181                 {cast(void**) &isIDIgnorable,           "u_isIDIgnorable"},
01182                 {cast(void**) &isIDPart,                "u_isIDPart"},
01183                 {cast(void**) &isIDStart,               "u_isIDStart"},
01184                 {cast(void**) &isMirrored,              "u_isMirrored"},
01185                 {cast(void**) &isBase,                  "u_isbase"},
01186                 {cast(void**) &isPrint,                 "u_isprint"},
01187                 {cast(void**) &isISOControl,            "u_isISOControl"},
01188                 {cast(void**) &isCtrl,                  "u_iscntrl"},
01189                 {cast(void**) &isWhiteSpace,            "u_isWhitespace"},
01190                 {cast(void**) &isJavaSpaceChar,         "u_isJavaSpaceChar"},
01191                 {cast(void**) &isSpace,                 "u_isspace"},
01192                 {cast(void**) &isDefined,               "u_isdefined"},
01193                 {cast(void**) &isBlank,                 "u_isblank"},
01194                 {cast(void**) &isGraph,                 "u_isgraph"},
01195                 {cast(void**) &isPunct,                 "u_ispunct"},
01196                 {cast(void**) &isHexDigit,              "u_isxdigit"},
01197                 {cast(void**) &isAlpha,                 "u_isalpha"},
01198                 {cast(void**) &isAlphaNumeric,          "u_isalnum"},
01199                 {cast(void**) &isDigit,                 "u_isdigit"},
01200                 {cast(void**) &isTitle,                 "u_istitle"},
01201                 {cast(void**) &isUpper,                 "u_isupper"},
01202                 {cast(void**) &isLower,                 "u_islower"},
01203                 {cast(void**) &isUAlphabetic,           "u_isUAlphabetic"},
01204                 {cast(void**) &isUWhiteSpace,           "u_isUWhiteSpace"},
01205                 {cast(void**) &isUUppercase,            "u_isUUppercase"},
01206                 {cast(void**) &isULowercase,            "u_isULowercase"},
01207                 {cast(void**) &getNumericValue,         "u_getNumericValue"},
01208                 {cast(void**) &getCombiningClass,       "u_getCombiningClass"},
01209                 {cast(void**) &u_getIntPropertyValue,   "u_getIntPropertyValue"},
01210                 {cast(void**) &u_getIntPropertyMinValue,"u_getIntPropertyMinValue"},
01211                 {cast(void**) &u_getIntPropertyMaxValue,"u_getIntPropertyMaxValue"},
01212                 {cast(void**) &u_charDirection,         "u_charDirection"},
01213                 {cast(void**) &ublock_getCode,          "ublock_getCode"},
01214                 {cast(void**) &u_charName,              "u_charName"},
01215                 {cast(void**) &u_getISOComment,         "u_getISOComment"},
01216                 {cast(void**) &u_charFromName,          "u_charFromName"},
01217                 {cast(void**) &u_getPropertyName,       "u_getPropertyName"},
01218                 {cast(void**) &u_getPropertyValueName,  "u_getPropertyValueName"},
01219                 {cast(void**) &u_getUnicodeVersion,     "u_getUnicodeVersion"},
01220                 {cast(void**) &u_charAge,               "u_charAge"},
01221                 ];
01222 
01223         /***********************************************************************
01224 
01225         ***********************************************************************/
01226 
01227         static this ()
01228         {
01229                 library = FunctionLoader.bind (icuuc, targets);
01230         }
01231 
01232         /***********************************************************************
01233 
01234         ***********************************************************************/
01235 
01236         static ~this ()
01237         {
01238                 FunctionLoader.unbind (library);
01239         }
01240 }

Generated on Fri May 27 18:11:57 2005 for Mango by  doxygen 1.4.0