Main Page | Class Hierarchy | Alphabetical List | Class List | File List | Class Members | File Members | Related Pages

UChar.d

Go to the documentation of this file.
00001 /*******************************************************************************
00002 
00003         @file UChar.d
00004         
00005         Copyright (C) 2004 Kris Bell
00006         
00007         This software is provided 'as-is', without any express or implied
00008         warranty. In no event will the authors be held liable for damages
00009         of any kind arising from the use of this software.
00010         
00011         Permission is hereby granted to anyone to use this software for any 
00012         purpose, including commercial applications, and to alter it and/or 
00013         redistribute it freely, subject to the following restrictions:
00014         
00015         1. The origin of this software must not be misrepresented; you must 
00016            not claim that you wrote the original software. If you use this 
00017            software in a product, an acknowledgment within documentation of 
00018            said product would be appreciated but is not required.
00019 
00020         2. Altered source versions must be plainly marked as such, and must 
00021            not be misrepresented as being the original software.
00022 
00023         3. This notice may not be removed or altered from any distribution
00024            of the source.
00025 
00026 
00027                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00028 
00029 
00030         @version        Initial version, October 2004      
00031         @author         Kris
00032         
00033 
00034         Note that this package and documentation is built around the ICU 
00035         project (http://oss.software.ibm.com/icu/). Below is the license 
00036         statement as specified by that software:
00037 
00038 
00039                         ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
00040 
00041 
00042         ICU License - ICU 1.8.1 and later
00043 
00044         COPYRIGHT AND PERMISSION NOTICE
00045 
00046         Copyright (c) 1995-2003 International Business Machines Corporation and 
00047         others.
00048 
00049         All rights reserved.
00050 
00051         Permission is hereby granted, free of charge, to any person obtaining a
00052         copy of this software and associated documentation files (the
00053         "Software"), to deal in the Software without restriction, including
00054         without limitation the rights to use, copy, modify, merge, publish,
00055         distribute, and/or sell copies of the Software, and to permit persons
00056         to whom the Software is furnished to do so, provided that the above
00057         copyright notice(s) and this permission notice appear in all copies of
00058         the Software and that both the above copyright notice(s) and this
00059         permission notice appear in supporting documentation.
00060 
00061         THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
00062         OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
00063         MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
00064         OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
00065         HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
00066         INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
00067         FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
00068         NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
00069         WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
00070 
00071         Except as contained in this notice, the name of a copyright holder
00072         shall not be used in advertising or otherwise to promote the sale, use
00073         or other dealings in this Software without prior written authorization
00074         of the copyright holder.
00075 
00076         ----------------------------------------------------------------------
00077 
00078         All trademarks and registered trademarks mentioned herein are the 
00079         property of their respective owners.
00080 
00081 *******************************************************************************/
00082 
00083 module mango.icu.UChar;
00084 
00085 private import  mango.icu.ICU;
00086 
00087 /*******************************************************************************
00088 
00089         This API provides low-level access to the Unicode Character 
00090         Database. In addition to raw property values, some convenience 
00091         functions calculate derived properties, for example for Java-style 
00092         programming.
00093 
00094         Unicode assigns each code point (not just assigned character) 
00095         values for many properties. Most of them are simple boolean 
00096         flags, or constants from a small enumerated list. For some 
00097         properties, values are strings or other relatively more complex 
00098         types.
00099 
00100         For more information see "About the Unicode Character Database" 
00101         (http://www.unicode.org/ucd/) and the ICU User Guide chapter on 
00102         Properties (http://oss.software.ibm.com/icu/userguide/properties.html).
00103 
00104         Many functions are designed to match java.lang.Character functions. 
00105         See the individual function documentation, and see the JDK 1.4.1 
00106         java.lang.Character documentation at 
00107         http://java.sun.com/j2se/1.4.1/docs/api/java/lang/Character.html
00108 
00109         There are also functions that provide easy migration from C/POSIX 
00110         functions like isblank(). Their use is generally discouraged because 
00111         the C/POSIX standards do not define their semantics beyond the ASCII 
00112         range, which means that different implementations exhibit very different 
00113         behavior. Instead, Unicode properties should be used directly.
00114 
00115         There are also only a few, broad C/POSIX character classes, and they 
00116         tend to be used for conflicting purposes. For example, the "isalpha()" 
00117         class is sometimes used to determine word boundaries, while a more 
00118         sophisticated approach would at least distinguish initial letters from 
00119         continuation characters (the latter including combining marks). (In 
00120         ICU, BreakIterator is the most sophisticated API for word boundaries.) 
00121         Another example: There is no "istitle()" class for titlecase characters.
00122 
00123         A summary of the behavior of some C/POSIX character classification 
00124         implementations for Unicode is available at 
00125         http://oss.software.ibm.com/cvs/icu/~checkout~/icuhtml/design/posix_classes.html
00126 
00127         See <A HREF="http://oss.software.ibm.com/icu/apiref/uchar_8h.html">
00128         this page</A> for full details.
00129 
00130 *******************************************************************************/
00131 
00132 class UChar : ICU
00133 {
00134         /***********************************************************************
00135         
00136         ***********************************************************************/
00137 
00138         public static extern (C) 
00139         {
00140         /***********************************************************************
00141         
00142                 Check if a code point has the Alphabetic Unicode property.
00143 
00144         ***********************************************************************/
00145 
00146         bool function (dchar c) isUAlphabetic;
00147 
00148         /***********************************************************************
00149         
00150                 Check if a code point has the Lowercase Unicode property.
00151 
00152         ***********************************************************************/
00153 
00154         bool function (dchar c) isULowercase;
00155 
00156         /***********************************************************************
00157         
00158                 Check if a code point has the Uppercase Unicode property.
00159 
00160         ***********************************************************************/
00161 
00162         bool function (dchar c) isUUppercase;
00163 
00164         /***********************************************************************
00165         
00166                 Check if a code point has the White_Space Unicode property.
00167 
00168         ***********************************************************************/
00169 
00170         bool function (dchar c) isUWhiteSpace;
00171 
00172         /***********************************************************************
00173         
00174                 Determines whether the specified code point has the general 
00175                 category "Ll" (lowercase letter).
00176 
00177         ***********************************************************************/
00178 
00179         bool function (dchar c) isLower;
00180 
00181         /***********************************************************************
00182         
00183                 Determines whether the specified code point has the general 
00184                 category "Lu" (uppercase letter).
00185 
00186         ***********************************************************************/
00187 
00188         bool function (dchar c) isUpper;
00189 
00190         /***********************************************************************
00191         
00192                 Determines whether the specified code point is a titlecase 
00193                 letter.
00194 
00195         ***********************************************************************/
00196 
00197         bool function (dchar c) isTitle;
00198 
00199         /***********************************************************************
00200         
00201                 Determines whether the specified code point is a digit 
00202                 character according to Java.
00203 
00204         ***********************************************************************/
00205 
00206         bool function (dchar c) isDigit;
00207 
00208         /***********************************************************************
00209         
00210                 Determines whether the specified code point is a letter 
00211                 character.
00212 
00213         ***********************************************************************/
00214 
00215         bool function (dchar c) isAlpha;
00216 
00217         /***********************************************************************
00218         
00219                 Determines whether the specified code point is an 
00220                 alphanumeric character (letter or digit) according 
00221                 to Java.
00222 
00223         ***********************************************************************/
00224 
00225         bool function (dchar c) isAlphaNumeric;
00226 
00227         /***********************************************************************
00228         
00229                 Determines whether the specified code point is a 
00230                 hexadecimal digit.
00231 
00232         ***********************************************************************/
00233 
00234         bool function (dchar c) isHexDigit;
00235 
00236         /***********************************************************************
00237         
00238                 Determines whether the specified code point is a 
00239                 punctuation character.
00240 
00241         ***********************************************************************/
00242 
00243         bool function (dchar c) isPunct;
00244 
00245         /***********************************************************************
00246         
00247                 Determines whether the specified code point is a "graphic" 
00248                 character (printable, excluding spaces).
00249 
00250         ***********************************************************************/
00251 
00252         bool function (dchar c) isGraph;
00253 
00254         /***********************************************************************
00255         
00256                 Determines whether the specified code point is a "blank" 
00257                 or "horizontal space", a character that visibly separates 
00258                 words on a line.
00259 
00260         ***********************************************************************/
00261 
00262         bool function (dchar c) isBlank;
00263 
00264         /***********************************************************************
00265         
00266                 Determines whether the specified code point is "defined", 
00267                 which usually means that it is assigned a character.
00268 
00269         ***********************************************************************/
00270 
00271         bool function (dchar c) isDefined;
00272 
00273         /***********************************************************************
00274         
00275                 Determines if the specified character is a space character
00276                  or not.
00277 
00278         ***********************************************************************/
00279 
00280         bool function (dchar c) isSpace;
00281 
00282         /***********************************************************************
00283         
00284                 Determine if the specified code point is a space character 
00285                 according to Java.
00286 
00287         ***********************************************************************/
00288 
00289         bool function (dchar c) isJavaSpaceChar;
00290 
00291         /***********************************************************************
00292         
00293                 Determines if the specified code point is a whitespace 
00294                 character according to Java/ICU.
00295 
00296         ***********************************************************************/
00297 
00298         bool function (dchar c) isWhiteSpace;
00299 
00300         /***********************************************************************
00301         
00302                 Determines whether the specified code point is a control 
00303                 character (as defined by this function).
00304 
00305         ***********************************************************************/
00306 
00307         bool function (dchar c) isCtrl;
00308 
00309         /***********************************************************************
00310         
00311                 Determines whether the specified code point is an ISO 
00312                 control code.
00313 
00314         ***********************************************************************/
00315 
00316         bool function (dchar c) isISOControl;
00317 
00318         /***********************************************************************
00319         
00320                 Determines whether the specified code point is a 
00321                 printable character.
00322 
00323         ***********************************************************************/
00324 
00325         bool function (dchar c) isPrint;
00326 
00327         /***********************************************************************
00328         
00329                 Determines whether the specified code point is a 
00330                 base character.
00331 
00332         ***********************************************************************/
00333 
00334         bool function (dchar c) isBase;
00335 
00336         /***********************************************************************
00337         
00338                 Determines if the specified character is permissible 
00339                 as the first character in an identifier according to 
00340                 Unicode (The Unicode Standard, Version 3.0, chapter 
00341                 5.16 Identifiers).
00342 
00343         ***********************************************************************/
00344 
00345         bool function (dchar c) isIDStart;
00346 
00347         /***********************************************************************
00348         
00349                 Determines if the specified character is permissible 
00350                 in an identifier according to Java.
00351 
00352         ***********************************************************************/
00353 
00354         bool function (dchar c) isIDPart;
00355 
00356         /***********************************************************************
00357         
00358                 Determines if the specified character should be regarded
00359                 as an ignorable character in an identifier, according 
00360                 to Java.
00361 
00362         ***********************************************************************/
00363 
00364         bool function (dchar c) isIDIgnorable;
00365 
00366         /***********************************************************************
00367         
00368                 Determines if the specified character is permissible 
00369                 as the first character in a Java identifier.
00370 
00371         ***********************************************************************/
00372 
00373         bool function (dchar c) isJavaIDStart;
00374 
00375         /***********************************************************************
00376         
00377                 Determines if the specified character is permissible 
00378                 in a Java identifier.
00379 
00380         ***********************************************************************/
00381 
00382         bool function (dchar c) isJavaIDPart;
00383 
00384         /***********************************************************************
00385         
00386                 Determines whether the code point has the Bidi_Mirrored 
00387                 property.
00388 
00389         ***********************************************************************/
00390 
00391         bool function (dchar c) isMirrored;
00392 
00393         /***********************************************************************
00394         
00395                 Returns the decimal digit value of a decimal digit character.
00396 
00397         ***********************************************************************/
00398 
00399         ubyte function (dchar c) charDigitValue;
00400 
00401         /***********************************************************************
00402         
00403                 Maps the specified character to a "mirror-image" character.
00404 
00405         ***********************************************************************/
00406 
00407         dchar function (dchar c) charMirror;
00408 
00409         /***********************************************************************
00410         
00411                 Returns the general category value for the code point.
00412 
00413         ***********************************************************************/
00414 
00415         ubyte function (dchar c) charType;
00416 
00417         /***********************************************************************
00418         
00419                 Returns the combining class of the code point as specified 
00420                 in UnicodeData.txt.
00421 
00422         ***********************************************************************/
00423 
00424         ubyte function (dchar c) getCombiningClass;
00425 
00426         /***********************************************************************
00427         
00428                 The given character is mapped to its lowercase equivalent 
00429                 according to UnicodeData.txt; if the character has no 
00430                 lowercase equivalent, the character itself is returned.
00431 
00432         ***********************************************************************/
00433 
00434         dchar function (dchar c) toLower;
00435 
00436         /***********************************************************************
00437         
00438                 The given character is mapped to its uppercase equivalent 
00439                 according to UnicodeData.txt; if the character has no 
00440                 uppercase equivalent, the character itself is returned.
00441 
00442         ***********************************************************************/
00443 
00444         dchar function (dchar c) toUpper;
00445 
00446         /***********************************************************************
00447         
00448                 The given character is mapped to its titlecase equivalent 
00449                 according to UnicodeData.txt; if none is defined, the 
00450                 character itself is returned.
00451 
00452         ***********************************************************************/
00453 
00454         dchar function (dchar c) toTitle;
00455 
00456         /***********************************************************************
00457         
00458                 The given character is mapped to its case folding equivalent 
00459                 according to UnicodeData.txt and CaseFolding.txt; if the 
00460                 character has no case folding equivalent, the character 
00461                 itself is returned.
00462 
00463         ***********************************************************************/
00464 
00465         dchar function (dchar c, uint options) foldCase;
00466 
00467         /***********************************************************************
00468         
00469                 Returns the decimal digit value of the code point in the 
00470                 specified radix.
00471 
00472         ***********************************************************************/
00473 
00474         uint function (dchar ch, ubyte radix) digit;
00475 
00476         /***********************************************************************
00477         
00478                 Determines the character representation for a specific 
00479                 digit in the specified radix.
00480 
00481         ***********************************************************************/
00482 
00483         dchar function (uint digit, ubyte radix) forDigit;
00484 
00485         /***********************************************************************
00486         
00487                 Get the numeric value for a Unicode code point as defined 
00488                 in the Unicode Character Database.
00489 
00490         ***********************************************************************/
00491 
00492         double function (dchar c) getNumericValue;
00493         }
00494         
00495 
00496         /***********************************************************************
00497         
00498                 Bind the ICU functions from a shared library. This is
00499                 complicated by the issues regarding D and DLLs on the
00500                 Windows platform
00501 
00502         ***********************************************************************/
00503 
00504         version (Win32)
00505         {
00506                 private static void*    library;
00507                 private static char[]   libraryName = "icuuc30.dll";     
00508 
00509                 /***************************************************************
00510 
00511                 ***************************************************************/
00512 
00513                 static  FunctionLoader.Bind[] targets = 
00514                         [
00515                         {cast(void**) &forDigit,          "u_forDigit"},
00516                         {cast(void**) &digit,             "u_digit"},
00517                         {cast(void**) &foldCase,          "u_foldCase"},
00518                         {cast(void**) &toTitle,           "u_totitle"},
00519                         {cast(void**) &toUpper,           "u_toupper"},
00520                         {cast(void**) &toLower,           "u_tolower"},
00521                         {cast(void**) &charDigitValue,    "u_charDigitValue"},
00522                         {cast(void**) &getCombiningClass, "u_getCombiningClass"},
00523                         {cast(void**) &charType,          "u_charType"},
00524                         {cast(void**) &charMirror,        "u_charMirror"},
00525                         {cast(void**) &isJavaIDPart,      "u_isJavaIDPart"},
00526                         {cast(void**) &isJavaIDStart,     "u_isJavaIDStart"},
00527                         {cast(void**) &isIDIgnorable,     "u_isIDIgnorable"},
00528                         {cast(void**) &isIDPart,          "u_isIDPart"},
00529                         {cast(void**) &isIDStart,         "u_isIDStart"},
00530                         {cast(void**) &isMirrored,        "u_isMirrored"},
00531                         {cast(void**) &isBase,            "u_isbase"},
00532                         {cast(void**) &isPrint,           "u_isprint"},
00533                         {cast(void**) &isISOControl,      "u_isISOControl"},
00534                         {cast(void**) &isCtrl,            "u_iscntrl"},
00535                         {cast(void**) &isWhiteSpace,      "u_isWhitespace"},
00536                         {cast(void**) &isJavaSpaceChar,   "u_isJavaSpaceChar"},
00537                         {cast(void**) &isSpace,           "u_isspace"},
00538                         {cast(void**) &isDefined,         "u_isdefined"},
00539                         {cast(void**) &isBlank,           "u_isblank"},
00540                         {cast(void**) &isGraph,           "u_isgraph"},
00541                         {cast(void**) &isPunct,           "u_ispunct"},
00542                         {cast(void**) &isHexDigit,        "u_isxdigit"},
00543                         {cast(void**) &isAlpha,           "u_isalpha"},
00544                         {cast(void**) &isAlphaNumeric,    "u_isalnum"},
00545                         {cast(void**) &isDigit,           "u_isdigit"},
00546                         {cast(void**) &isTitle,           "u_istitle"},
00547                         {cast(void**) &isUpper,           "u_isupper"},
00548                         {cast(void**) &isLower,           "u_islower"},
00549                         {cast(void**) &isUAlphabetic,     "u_isUAlphabetic"},
00550                         {cast(void**) &isUWhiteSpace,     "u_isUWhiteSpace"},
00551                         {cast(void**) &isUUppercase,      "u_isUUppercase"},
00552                         {cast(void**) &isULowercase,      "u_isULowercase"},
00553                         {cast(void**) &getNumericValue,   "u_getNumericValue"},
00554                         ];
00555 
00556                 /***************************************************************
00557 
00558                 ***************************************************************/
00559 
00560                 static this ()
00561                 {
00562                         library = FunctionLoader.bind (libraryName, targets);
00563                 }
00564 
00565                 /***************************************************************
00566 
00567                 ***************************************************************/
00568 
00569                 static ~this ()
00570                 {
00571                         FunctionLoader.unbind (library);
00572                 }
00573         }
00574 }

Generated on Sun Nov 7 19:06:53 2004 for Mango by doxygen 1.3.6